Skip to content

feat(healthcheck): add failing sections to report #7789

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions coderd/apidoc/docs.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions coderd/apidoc/swagger.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions coderd/coderd.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ type Options struct {
// AppSecurityKey is the crypto key used to sign and encrypt tokens related to
// workspace applications. It consists of both a signing and encryption key.
AppSecurityKey workspaceapps.SecurityKey
HealthcheckFunc func(ctx context.Context, apiKey string) (*healthcheck.Report, error)
HealthcheckFunc func(ctx context.Context, apiKey string) *healthcheck.Report
HealthcheckTimeout time.Duration
HealthcheckRefresh time.Duration

Expand Down Expand Up @@ -266,7 +266,7 @@ func New(options *Options) *API {
options.TemplateScheduleStore.Store(&v)
}
if options.HealthcheckFunc == nil {
options.HealthcheckFunc = func(ctx context.Context, apiKey string) (*healthcheck.Report, error) {
options.HealthcheckFunc = func(ctx context.Context, apiKey string) *healthcheck.Report {
return healthcheck.Run(ctx, &healthcheck.ReportOptions{
AccessURL: options.AccessURL,
DERPMap: options.DERPMap.Clone(),
Expand Down
2 changes: 1 addition & 1 deletion coderd/coderdtest/coderdtest.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ type Options struct {
TrialGenerator func(context.Context, string) error
TemplateScheduleStore schedule.TemplateScheduleStore

HealthcheckFunc func(ctx context.Context, apiKey string) (*healthcheck.Report, error)
HealthcheckFunc func(ctx context.Context, apiKey string) *healthcheck.Report
HealthcheckTimeout time.Duration
HealthcheckRefresh time.Duration

Expand Down
8 changes: 3 additions & 5 deletions coderd/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,9 @@ func (api *API) debugDeploymentHealth(rw http.ResponseWriter, r *http.Request) {
ctx, cancel := context.WithTimeout(context.Background(), api.HealthcheckTimeout)
defer cancel()

report, err := api.HealthcheckFunc(ctx, apiKey)
if err == nil {
api.healthCheckCache.Store(report)
}
return report, err
report := api.HealthcheckFunc(ctx, apiKey)
api.healthCheckCache.Store(report)
return report, nil
})

select {
Expand Down
14 changes: 7 additions & 7 deletions coderd/debug_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ func TestDebugHealth(t *testing.T) {
ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort)
sessionToken string
client = coderdtest.New(t, &coderdtest.Options{
HealthcheckFunc: func(_ context.Context, apiKey string) (*healthcheck.Report, error) {
HealthcheckFunc: func(_ context.Context, apiKey string) *healthcheck.Report {
assert.Equal(t, sessionToken, apiKey)
return &healthcheck.Report{}, nil
return &healthcheck.Report{}
},
})
_ = coderdtest.CreateFirstUser(t, client)
Expand All @@ -48,15 +48,15 @@ func TestDebugHealth(t *testing.T) {
ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort)
client = coderdtest.New(t, &coderdtest.Options{
HealthcheckTimeout: time.Microsecond,
HealthcheckFunc: func(context.Context, string) (*healthcheck.Report, error) {
HealthcheckFunc: func(context.Context, string) *healthcheck.Report {
t := time.NewTimer(time.Second)
defer t.Stop()

select {
case <-ctx.Done():
return nil, ctx.Err()
return &healthcheck.Report{}
case <-t.C:
return &healthcheck.Report{}, nil
return &healthcheck.Report{}
}
},
})
Expand All @@ -80,11 +80,11 @@ func TestDebugHealth(t *testing.T) {
client = coderdtest.New(t, &coderdtest.Options{
HealthcheckRefresh: time.Hour,
HealthcheckTimeout: time.Hour,
HealthcheckFunc: func(context.Context, string) (*healthcheck.Report, error) {
HealthcheckFunc: func(context.Context, string) *healthcheck.Report {
calls++
return &healthcheck.Report{
Time: time.Now(),
}, nil
}
},
})
_ = coderdtest.CreateFirstUser(t, client)
Expand Down
10 changes: 5 additions & 5 deletions coderd/healthcheck/derp.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import (
)

type DERPReport struct {
mu sync.Mutex
Healthy bool `json:"healthy"`

Regions map[int]*DERPRegionReport `json:"regions"`
Expand Down Expand Up @@ -78,6 +77,7 @@ func (r *DERPReport) Run(ctx context.Context, opts *DERPReportOptions) {
r.Regions = map[int]*DERPRegionReport{}

wg := &sync.WaitGroup{}
mu := sync.Mutex{}

wg.Add(len(opts.DERPMap.Regions))
for _, region := range opts.DERPMap.Regions {
Expand All @@ -97,19 +97,19 @@ func (r *DERPReport) Run(ctx context.Context, opts *DERPReportOptions) {

regionReport.Run(ctx)

r.mu.Lock()
mu.Lock()
r.Regions[region.RegionID] = &regionReport
if !regionReport.Healthy {
r.Healthy = false
}
r.mu.Unlock()
mu.Unlock()
}()
}

ncLogf := func(format string, args ...interface{}) {
r.mu.Lock()
mu.Lock()
r.NetcheckLogs = append(r.NetcheckLogs, fmt.Sprintf(format, args...))
r.mu.Unlock()
mu.Unlock()
}
nc := &netcheck.Client{
PortMapper: portmapper.NewClient(tslogger.WithPrefix(ncLogf, "portmap: "), nil),
Expand Down
68 changes: 57 additions & 11 deletions coderd/healthcheck/healthcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,24 @@ import (
"tailscale.com/tailcfg"
)

const (
SectionDERP string = "DERP"
SectionAccessURL string = "AccessURL"
SectionWebsocket string = "Websocket"
)

type Checker interface {
DERP(ctx context.Context, opts *DERPReportOptions) DERPReport
AccessURL(ctx context.Context, opts *AccessURLOptions) AccessURLReport
Websocket(ctx context.Context, opts *WebsocketReportOptions) WebsocketReport
}

type Report struct {
// Time is the time the report was generated at.
Time time.Time `json:"time"`
// Healthy is true if the report returns no errors.
Healthy bool `json:"healthy"`
Healthy bool `json:"healthy"`
FailingSections []string `json:"failing_sections"`

DERP DERPReport `json:"derp"`
AccessURL AccessURLReport `json:"access_url"`
Expand All @@ -28,12 +41,36 @@ type ReportOptions struct {
AccessURL *url.URL
Client *http.Client
APIKey string

Checker Checker
}

type defaultChecker struct{}

func (defaultChecker) DERP(ctx context.Context, opts *DERPReportOptions) (report DERPReport) {
report.Run(ctx, opts)
return report
}

func (defaultChecker) AccessURL(ctx context.Context, opts *AccessURLOptions) (report AccessURLReport) {
report.Run(ctx, opts)
return report
}

func Run(ctx context.Context, opts *ReportOptions) (*Report, error) {
var report Report
func (defaultChecker) Websocket(ctx context.Context, opts *WebsocketReportOptions) (report WebsocketReport) {
report.Run(ctx, opts)
return report
}

wg := &sync.WaitGroup{}
func Run(ctx context.Context, opts *ReportOptions) *Report {
var (
wg sync.WaitGroup
report Report
)

if opts.Checker == nil {
opts.Checker = defaultChecker{}
}

wg.Add(1)
go func() {
Expand All @@ -44,7 +81,7 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) {
}
}()

report.DERP.Run(ctx, &DERPReportOptions{
report.DERP = opts.Checker.DERP(ctx, &DERPReportOptions{
DERPMap: opts.DERPMap,
})
}()
Expand All @@ -58,7 +95,7 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) {
}
}()

report.AccessURL.Run(ctx, &AccessURLOptions{
report.AccessURL = opts.Checker.AccessURL(ctx, &AccessURLOptions{
AccessURL: opts.AccessURL,
Client: opts.Client,
})
Expand All @@ -72,16 +109,25 @@ func Run(ctx context.Context, opts *ReportOptions) (*Report, error) {
report.Websocket.Error = xerrors.Errorf("%v", err)
}
}()
report.Websocket.Run(ctx, &WebsocketReportOptions{

report.Websocket = opts.Checker.Websocket(ctx, &WebsocketReportOptions{
APIKey: opts.APIKey,
AccessURL: opts.AccessURL,
})
}()

wg.Wait()
report.Time = time.Now()
report.Healthy = report.DERP.Healthy &&
report.AccessURL.Healthy &&
report.Websocket.Healthy
return &report, nil
if !report.DERP.Healthy {
report.FailingSections = append(report.FailingSections, SectionDERP)
}
if !report.AccessURL.Healthy {
report.FailingSections = append(report.FailingSections, SectionAccessURL)
}
if !report.Websocket.Healthy {
report.FailingSections = append(report.FailingSections, SectionWebsocket)
}

report.Healthy = len(report.FailingSections) == 0
return &report
}
120 changes: 120 additions & 0 deletions coderd/healthcheck/healthcheck_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
package healthcheck_test

import (
"context"
"testing"

"github.com/stretchr/testify/assert"

"github.com/coder/coder/coderd/healthcheck"
)

type testChecker struct {
DERPReport healthcheck.DERPReport
AccessURLReport healthcheck.AccessURLReport
WebsocketReport healthcheck.WebsocketReport
}

func (c *testChecker) DERP(context.Context, *healthcheck.DERPReportOptions) healthcheck.DERPReport {
return c.DERPReport
}

func (c *testChecker) AccessURL(context.Context, *healthcheck.AccessURLOptions) healthcheck.AccessURLReport {
return c.AccessURLReport
}

func (c *testChecker) Websocket(context.Context, *healthcheck.WebsocketReportOptions) healthcheck.WebsocketReport {
return c.WebsocketReport
}

func TestHealthcheck(t *testing.T) {
t.Parallel()

for _, c := range []struct {
name string
checker *testChecker
healthy bool
failingSections []string
}{{
name: "OK",
checker: &testChecker{
DERPReport: healthcheck.DERPReport{
Healthy: true,
},
AccessURLReport: healthcheck.AccessURLReport{
Healthy: true,
},
WebsocketReport: healthcheck.WebsocketReport{
Healthy: true,
},
},
healthy: true,
failingSections: nil,
}, {
name: "DERPFail",
checker: &testChecker{
DERPReport: healthcheck.DERPReport{
Healthy: false,
},
AccessURLReport: healthcheck.AccessURLReport{
Healthy: true,
},
WebsocketReport: healthcheck.WebsocketReport{
Healthy: true,
},
},
healthy: false,
failingSections: []string{healthcheck.SectionDERP},
}, {
name: "AccessURLFail",
checker: &testChecker{
DERPReport: healthcheck.DERPReport{
Healthy: true,
},
AccessURLReport: healthcheck.AccessURLReport{
Healthy: false,
},
WebsocketReport: healthcheck.WebsocketReport{
Healthy: true,
},
},
healthy: false,
failingSections: []string{healthcheck.SectionAccessURL},
}, {
name: "WebsocketFail",
checker: &testChecker{
DERPReport: healthcheck.DERPReport{
Healthy: true,
},
AccessURLReport: healthcheck.AccessURLReport{
Healthy: true,
},
WebsocketReport: healthcheck.WebsocketReport{
Healthy: false,
},
},
healthy: false,
failingSections: []string{healthcheck.SectionWebsocket},
}, {
name: "AllFail",
checker: &testChecker{},
healthy: false,
failingSections: []string{healthcheck.SectionDERP, healthcheck.SectionAccessURL, healthcheck.SectionWebsocket},
}} {
c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()

report := healthcheck.Run(context.Background(), &healthcheck.ReportOptions{
Checker: c.checker,
})

assert.Equal(t, c.healthy, report.Healthy)
assert.Equal(t, c.failingSections, report.FailingSections)
assert.Equal(t, c.checker.DERPReport.Healthy, report.DERP.Healthy)
assert.Equal(t, c.checker.AccessURLReport.Healthy, report.AccessURL.Healthy)
assert.Equal(t, c.checker.WebsocketReport.Healthy, report.Websocket.Healthy)
assert.NotZero(t, report.Time)
})
}
}
1 change: 1 addition & 0 deletions docs/api/debug.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ curl -X GET http://coder-server:8080/api/v2/debug/health \
}
}
},
"failing_sections": ["string"],
"healthy": true,
"time": "string",
"websocket": {
Expand Down
Loading