Skip to content

Commit 5ea61dd

Browse files
committed
feat(coderd/healthcheck: add error codes for the rest of the owl
1 parent cdcad96 commit 5ea61dd

File tree

12 files changed

+161
-54
lines changed

12 files changed

+161
-54
lines changed

coderd/healthcheck/accessurl.go

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,10 @@ import (
77
"net/url"
88
"time"
99

10-
"golang.org/x/xerrors"
11-
1210
"github.com/coder/coder/v2/coderd/healthcheck/health"
1311
"github.com/coder/coder/v2/coderd/util/ptr"
1412
)
1513

16-
var (
17-
ErrAccessURLNotSet = "EACSURL01: Access URL not set"
18-
ErrAccessURLInvalid = "EACSURL02: Access URL invalid: "
19-
ErrAccessURLFetch = "EACSURL03: Failed to fetch /healthz: "
20-
ErrAccessURLNotOK = "EACSURL04: /healthz did not return 200 OK"
21-
)
22-
2314
// @typescript-generate AccessURLReport
2415
type AccessURLReport struct {
2516
// Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.
@@ -46,7 +37,7 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
4637
r.Severity = health.SeverityOK
4738
r.Warnings = []string{}
4839
if opts.AccessURL == nil {
49-
r.Error = ptr.Ref(ErrAccessURLNotSet)
40+
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLNotSet, "Access URL not set"))
5041
r.Severity = health.SeverityError
5142
return
5243
}
@@ -58,29 +49,29 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
5849

5950
accessURL, err := opts.AccessURL.Parse("/healthz")
6051
if err != nil {
61-
r.Error = convertError(xerrors.Errorf(ErrAccessURLInvalid+"parse healthz endpoint: %w", err))
52+
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLInvalid, "parse healthz endpoint: %s", err))
6253
r.Severity = health.SeverityError
6354
return
6455
}
6556

6657
req, err := http.NewRequestWithContext(ctx, "GET", accessURL.String(), nil)
6758
if err != nil {
68-
r.Error = convertError(xerrors.Errorf(ErrAccessURLFetch+"create healthz request: %w", err))
59+
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLFetch, "create healthz request: %s", err))
6960
r.Severity = health.SeverityError
7061
return
7162
}
7263

7364
res, err := opts.Client.Do(req)
7465
if err != nil {
75-
r.Error = convertError(xerrors.Errorf(ErrAccessURLFetch+"get healthz endpoint: %w", err))
66+
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLFetch, "get healthz endpoint: %s", err))
7667
r.Severity = health.SeverityError
7768
return
7869
}
7970
defer res.Body.Close()
8071

8172
body, err := io.ReadAll(res.Body)
8273
if err != nil {
83-
r.Error = convertError(xerrors.Errorf(ErrAccessURLFetch+"read healthz response: %w", err))
74+
r.Error = ptr.Ref(health.Messagef(health.CodeAccessURLFetch, "read healthz response: %s", err))
8475
r.Severity = health.SeverityError
8576
return
8677
}
@@ -90,7 +81,7 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
9081
r.StatusCode = res.StatusCode
9182
if res.StatusCode != http.StatusOK {
9283
r.Severity = health.SeverityWarning
93-
r.Warnings = append(r.Warnings, ErrAccessURLNotOK)
84+
r.Warnings = append(r.Warnings, health.Messagef(health.CodeAccessURLNotOK, "/healthz did not return 200 OK"))
9485
}
9586
r.HealthzResponse = string(body)
9687
}

coderd/healthcheck/accessurl_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ func TestAccessURL(t *testing.T) {
6565
assert.Equal(t, 0, report.StatusCode)
6666
assert.Equal(t, "", report.HealthzResponse)
6767
require.NotNil(t, report.Error)
68-
assert.Contains(t, *report.Error, healthcheck.ErrAccessURLNotSet)
68+
assert.Contains(t, *report.Error, health.CodeAccessURLNotSet)
6969
})
7070

7171
t.Run("ClientErr", func(t *testing.T) {
@@ -101,7 +101,7 @@ func TestAccessURL(t *testing.T) {
101101
assert.Equal(t, "", report.HealthzResponse)
102102
require.NotNil(t, report.Error)
103103
assert.Contains(t, *report.Error, expErr.Error())
104-
assert.Contains(t, *report.Error, healthcheck.ErrAccessURLFetch)
104+
assert.Contains(t, *report.Error, health.CodeAccessURLFetch)
105105
})
106106

107107
t.Run("404", func(t *testing.T) {
@@ -131,7 +131,7 @@ func TestAccessURL(t *testing.T) {
131131
assert.Equal(t, string(resp), report.HealthzResponse)
132132
assert.Nil(t, report.Error)
133133
if assert.NotEmpty(t, report.Warnings) {
134-
assert.Contains(t, report.Warnings[0], healthcheck.ErrAccessURLNotOK)
134+
assert.Contains(t, report.Warnings[0], health.CodeAccessURLNotOK)
135135
}
136136
})
137137
}

coderd/healthcheck/database.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ import (
44
"context"
55
"time"
66

7-
"golang.org/x/exp/slices"
8-
"golang.org/x/xerrors"
9-
107
"github.com/coder/coder/v2/coderd/database"
118
"github.com/coder/coder/v2/coderd/healthcheck/health"
9+
"github.com/coder/coder/v2/coderd/util/ptr"
10+
11+
"golang.org/x/exp/slices"
1212
)
1313

1414
const (
@@ -50,8 +50,9 @@ func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) {
5050
for i := 0; i < pingCount; i++ {
5151
pong, err := opts.DB.Ping(ctx)
5252
if err != nil {
53-
r.Error = convertError(xerrors.Errorf("ping: %w", err))
53+
r.Error = ptr.Ref(health.Messagef(health.CodeDatabasePingFailed, "ping database: %s", err))
5454
r.Severity = health.SeverityError
55+
5556
return
5657
}
5758
pings = append(pings, pong)
@@ -64,6 +65,7 @@ func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) {
6465
r.LatencyMS = latency.Milliseconds()
6566
if r.LatencyMS >= r.ThresholdMS {
6667
r.Severity = health.SeverityWarning
68+
r.Warnings = append(r.Warnings, health.Messagef(health.CodeDatabasePingSlow, "median database ping above threshold"))
6769
}
6870
r.Healthy = true
6971
r.Reachable = true

coderd/healthcheck/derphealth/derp.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ func (r *Report) Run(ctx context.Context, opts *ReportOptions) {
133133
}
134134

135135
for _, w := range regionReport.Warnings {
136-
r.Warnings = append(r.Warnings, fmt.Sprintf("[%s] %s", regionReport.Region.RegionName, w))
136+
r.Warnings = append(r.Warnings, w)
137137
}
138138
mu.Unlock()
139139
}()
@@ -199,7 +199,7 @@ func (r *RegionReport) Run(ctx context.Context) {
199199
}
200200

201201
for _, w := range nodeReport.Warnings {
202-
r.Warnings = append(r.Warnings, fmt.Sprintf("[%s] %s", nodeReport.Node.Name, w))
202+
r.Warnings = append(r.Warnings, w)
203203
}
204204
r.mu.Unlock()
205205
}()
@@ -224,7 +224,7 @@ func (r *RegionReport) Run(ctx context.Context) {
224224
} else if unhealthyNodes == 1 {
225225
// r.Healthy = true (by default)
226226
r.Severity = health.SeverityWarning
227-
r.Warnings = append(r.Warnings, oneNodeUnhealthy)
227+
r.Warnings = append(r.Warnings, health.Messagef(health.CodeDERPOneNodeUnhealthy, oneNodeUnhealthy))
228228
} else if unhealthyNodes > 1 {
229229
r.Healthy = false
230230

@@ -288,7 +288,7 @@ func (r *NodeReport) Run(ctx context.Context) {
288288
}
289289

290290
if r.UsesWebsocket {
291-
r.Warnings = append(r.Warnings, warningNodeUsesWebsocket)
291+
r.Warnings = append(r.Warnings, health.Messagef(health.CodeDERPNodeUsesWebsocket, warningNodeUsesWebsocket))
292292
r.Severity = health.SeverityWarning
293293
}
294294
}

coderd/healthcheck/derphealth/derp_test.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,9 @@ func TestDERP(t *testing.T) {
127127

128128
assert.True(t, report.Healthy)
129129
assert.Equal(t, health.SeverityWarning, report.Severity)
130+
if assert.NotEmpty(t, report.Warnings) {
131+
assert.Contains(t, report.Warnings[0], health.CodeDERPOneNodeUnhealthy)
132+
}
130133
for _, region := range report.Regions {
131134
assert.True(t, region.Healthy)
132135
assert.True(t, region.NodeReports[0].Healthy)
@@ -230,7 +233,9 @@ func TestDERP(t *testing.T) {
230233

231234
assert.True(t, report.Healthy)
232235
assert.Equal(t, health.SeverityWarning, report.Severity)
233-
assert.NotEmpty(t, report.Warnings)
236+
if assert.NotEmpty(t, report.Warnings) {
237+
assert.Contains(t, report.Warnings[0], health.CodeDERPNodeUsesWebsocket)
238+
}
234239
for _, region := range report.Regions {
235240
assert.True(t, region.Healthy)
236241
assert.Equal(t, health.SeverityWarning, region.Severity)

coderd/healthcheck/health/model.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,37 @@
11
package health
22

3+
import (
4+
"fmt"
5+
"strings"
6+
)
7+
38
const (
49
SeverityOK Severity = "ok"
510
SeverityWarning Severity = "warning"
611
SeverityError Severity = "error"
12+
13+
// CodeUnknown is a catch-all health code when something unexpected goes wrong (for example, a panic).
14+
CodeUnknown Code = "EUNKNOWN"
15+
16+
CodeProxyUpdate Code = "EWP01"
17+
CodeProxyFetch Code = "EWP02"
18+
CodeProxyVersionMismatch Code = "EWP03"
19+
CodeProxyUnhealthy Code = "EWP04"
20+
21+
CodeDatabasePingFailed Code = "EDB01"
22+
CodeDatabasePingSlow Code = "EDB02"
23+
24+
CodeWebsocketDial Code = "EWS01"
25+
CodeWebsocketEcho Code = "EWS02"
26+
CodeWebsocketMsg Code = "EWS03"
27+
28+
CodeAccessURLNotSet Code = "EACS01"
29+
CodeAccessURLInvalid Code = "EACS02"
30+
CodeAccessURLFetch Code = "EACS03"
31+
CodeAccessURLNotOK Code = "EACS04"
32+
33+
CodeDERPNodeUsesWebsocket Code = `EDERP01`
34+
CodeDERPOneNodeUnhealthy Code = `EDERP02`
735
)
836

937
// @typescript-generate Severity
@@ -18,3 +46,17 @@ var severityRank = map[Severity]int{
1846
func (s Severity) Value() int {
1947
return severityRank[s]
2048
}
49+
50+
// Code is a stable identifier used to link to documentation.
51+
// @typescript-generate Code
52+
type Code string
53+
54+
// Messagef is a convenience function for formatting a healthcheck error message.
55+
func Messagef(code Code, msg string, args ...any) string {
56+
var sb strings.Builder
57+
sb.WriteString(string(code))
58+
sb.WriteRune(':')
59+
sb.WriteRune(' ')
60+
sb.WriteString(fmt.Sprintf(msg, args...))
61+
return sb.String()
62+
}

coderd/healthcheck/healthcheck.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package healthcheck
22

33
import (
44
"context"
5-
"fmt"
65
"sync"
76
"time"
87

@@ -104,7 +103,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
104103
defer wg.Done()
105104
defer func() {
106105
if err := recover(); err != nil {
107-
report.DERP.Error = ptr.Ref(fmt.Sprint(err))
106+
report.DERP.Error = ptr.Ref(health.Messagef(health.CodeUnknown, "derp report panic: %s", err))
108107
}
109108
}()
110109

@@ -116,7 +115,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
116115
defer wg.Done()
117116
defer func() {
118117
if err := recover(); err != nil {
119-
report.AccessURL.Error = ptr.Ref(fmt.Sprint(err))
118+
report.AccessURL.Error = ptr.Ref(health.Messagef(health.CodeUnknown, "access url report panic: %s", err))
120119
}
121120
}()
122121

@@ -128,7 +127,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
128127
defer wg.Done()
129128
defer func() {
130129
if err := recover(); err != nil {
131-
report.Websocket.Error = ptr.Ref(fmt.Sprint(err))
130+
report.Websocket.Error = ptr.Ref(health.Messagef(health.CodeUnknown, "websocket report panic: %s", err))
132131
}
133132
}()
134133

@@ -140,7 +139,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
140139
defer wg.Done()
141140
defer func() {
142141
if err := recover(); err != nil {
143-
report.Database.Error = ptr.Ref(fmt.Sprint(err))
142+
report.Database.Error = ptr.Ref(health.Messagef(health.CodeUnknown, "database report panic: %s", err))
144143
}
145144
}()
146145

@@ -152,7 +151,7 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
152151
defer wg.Done()
153152
defer func() {
154153
if err := recover(); err != nil {
155-
report.WorkspaceProxy.Error = ptr.Ref(fmt.Sprint(err))
154+
report.WorkspaceProxy.Error = ptr.Ref(health.Messagef(health.CodeUnknown, "proxy report panic: %s", err))
156155
}
157156
}()
158157

coderd/healthcheck/websocket.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"nhooyr.io/websocket"
1414

1515
"github.com/coder/coder/v2/coderd/healthcheck/health"
16+
"github.com/coder/coder/v2/coderd/util/ptr"
1617
)
1718

1819
type WebsocketReportOptions struct {
@@ -70,6 +71,7 @@ func (r *WebsocketReport) Run(ctx context.Context, opts *WebsocketReportOptions)
7071
}
7172
if err != nil {
7273
r.Error = convertError(xerrors.Errorf("websocket dial: %w", err))
74+
r.Error = ptr.Ref(health.Messagef(health.CodeWebsocketDial, "websocket dial: %s", err))
7375
r.Severity = health.SeverityError
7476
return
7577
}
@@ -79,26 +81,26 @@ func (r *WebsocketReport) Run(ctx context.Context, opts *WebsocketReportOptions)
7981
msg := strconv.Itoa(i)
8082
err := c.Write(ctx, websocket.MessageText, []byte(msg))
8183
if err != nil {
82-
r.Error = convertError(xerrors.Errorf("write message: %w", err))
84+
r.Error = ptr.Ref(health.Messagef(health.CodeWebsocketEcho, "write message: %s", err))
8385
r.Severity = health.SeverityError
8486
return
8587
}
8688

8789
ty, got, err := c.Read(ctx)
8890
if err != nil {
89-
r.Error = convertError(xerrors.Errorf("read message: %w", err))
91+
r.Error = ptr.Ref(health.Messagef(health.CodeWebsocketEcho, "read message: %s", err))
9092
r.Severity = health.SeverityError
9193
return
9294
}
9395

9496
if ty != websocket.MessageText {
95-
r.Error = convertError(xerrors.Errorf("received incorrect message type: %v", ty))
97+
r.Error = ptr.Ref(health.Messagef(health.CodeWebsocketMsg, "received incorrect message type: %v", ty))
9698
r.Severity = health.SeverityError
9799
return
98100
}
99101

100102
if string(got) != msg {
101-
r.Error = convertError(xerrors.Errorf("received incorrect message: wanted %q, got %q", msg, string(got)))
103+
r.Error = ptr.Ref(health.Messagef(health.CodeWebsocketMsg, "received incorrect message: wanted %q, got %q", msg, string(got)))
102104
r.Severity = health.SeverityError
103105
return
104106
}

coderd/healthcheck/websocket_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@ func TestWebsocket(t *testing.T) {
6363
APIKey: "test",
6464
})
6565

66-
require.NotNil(t, wsReport.Error)
66+
if assert.NotNil(t, wsReport.Error) {
67+
assert.Contains(t, *wsReport.Error, health.CodeWebsocketDial)
68+
}
6769
require.Equal(t, health.SeverityError, wsReport.Severity)
6870
assert.Equal(t, wsReport.Body, "test error")
6971
assert.Equal(t, wsReport.Code, http.StatusBadRequest)

0 commit comments

Comments
 (0)