Skip to content

Commit cdcad96

Browse files
committed
feat(coderd/healthcheck): add access URL error codes and healthcheck doc
1 parent e9c12c3 commit cdcad96

File tree

3 files changed

+149
-30
lines changed

3 files changed

+149
-30
lines changed

coderd/healthcheck/accessurl.go

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@ import (
1313
"github.com/coder/coder/v2/coderd/util/ptr"
1414
)
1515

16+
var (
17+
ErrAccessURLNotSet = "EACSURL01: Access URL not set"
18+
ErrAccessURLInvalid = "EACSURL02: Access URL invalid: "
19+
ErrAccessURLFetch = "EACSURL03: Failed to fetch /healthz: "
20+
ErrAccessURLNotOK = "EACSURL04: /healthz did not return 200 OK"
21+
)
22+
1623
// @typescript-generate AccessURLReport
1724
type AccessURLReport struct {
1825
// Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.
@@ -39,7 +46,7 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
3946
r.Severity = health.SeverityOK
4047
r.Warnings = []string{}
4148
if opts.AccessURL == nil {
42-
r.Error = ptr.Ref("access URL is nil")
49+
r.Error = ptr.Ref(ErrAccessURLNotSet)
4350
r.Severity = health.SeverityError
4451
return
4552
}
@@ -51,29 +58,29 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
5158

5259
accessURL, err := opts.AccessURL.Parse("/healthz")
5360
if err != nil {
54-
r.Error = convertError(xerrors.Errorf("parse healthz endpoint: %w", err))
61+
r.Error = convertError(xerrors.Errorf(ErrAccessURLInvalid+"parse healthz endpoint: %w", err))
5562
r.Severity = health.SeverityError
5663
return
5764
}
5865

5966
req, err := http.NewRequestWithContext(ctx, "GET", accessURL.String(), nil)
6067
if err != nil {
61-
r.Error = convertError(xerrors.Errorf("create healthz request: %w", err))
68+
r.Error = convertError(xerrors.Errorf(ErrAccessURLFetch+"create healthz request: %w", err))
6269
r.Severity = health.SeverityError
6370
return
6471
}
6572

6673
res, err := opts.Client.Do(req)
6774
if err != nil {
68-
r.Error = convertError(xerrors.Errorf("get healthz endpoint: %w", err))
75+
r.Error = convertError(xerrors.Errorf(ErrAccessURLFetch+"get healthz endpoint: %w", err))
6976
r.Severity = health.SeverityError
7077
return
7178
}
7279
defer res.Body.Close()
7380

7481
body, err := io.ReadAll(res.Body)
7582
if err != nil {
76-
r.Error = convertError(xerrors.Errorf("read healthz response: %w", err))
83+
r.Error = convertError(xerrors.Errorf(ErrAccessURLFetch+"read healthz response: %w", err))
7784
r.Severity = health.SeverityError
7885
return
7986
}
@@ -83,6 +90,7 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
8390
r.StatusCode = res.StatusCode
8491
if res.StatusCode != http.StatusOK {
8592
r.Severity = health.SeverityWarning
93+
r.Warnings = append(r.Warnings, ErrAccessURLNotOK)
8694
}
8795
r.HealthzResponse = string(body)
8896
}

coderd/healthcheck/accessurl_test.go

Lines changed: 57 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
"github.com/stretchr/testify/require"
1212
"golang.org/x/xerrors"
1313

14-
"github.com/coder/coder/v2/coderd/coderdtest"
1514
"github.com/coder/coder/v2/coderd/healthcheck"
1615
"github.com/coder/coder/v2/coderd/healthcheck/health"
1716
)
@@ -25,12 +24,17 @@ func TestAccessURL(t *testing.T) {
2524
var (
2625
ctx, cancel = context.WithCancel(context.Background())
2726
report healthcheck.AccessURLReport
28-
client = coderdtest.New(t, nil)
27+
resp = []byte("OK")
28+
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
29+
w.WriteHeader(http.StatusOK)
30+
_, _ = w.Write(resp)
31+
}))
2932
)
3033
defer cancel()
3134

3235
report.Run(ctx, &healthcheck.AccessURLReportOptions{
33-
AccessURL: client.URL,
36+
Client: srv.Client(),
37+
AccessURL: mustURL(t, srv.URL),
3438
})
3539

3640
assert.True(t, report.Healthy)
@@ -41,35 +45,27 @@ func TestAccessURL(t *testing.T) {
4145
assert.Nil(t, report.Error)
4246
})
4347

44-
t.Run("404", func(t *testing.T) {
48+
t.Run("NotSet", func(t *testing.T) {
4549
t.Parallel()
4650

4751
var (
4852
ctx, cancel = context.WithCancel(context.Background())
4953
report healthcheck.AccessURLReport
50-
resp = []byte("NOT OK")
51-
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
52-
w.WriteHeader(http.StatusNotFound)
53-
w.Write(resp)
54-
}))
5554
)
5655
defer cancel()
57-
defer srv.Close()
58-
59-
u, err := url.Parse(srv.URL)
60-
require.NoError(t, err)
6156

6257
report.Run(ctx, &healthcheck.AccessURLReportOptions{
63-
Client: srv.Client(),
64-
AccessURL: u,
58+
Client: nil, // defaults to http.DefaultClient
59+
AccessURL: nil,
6560
})
6661

6762
assert.False(t, report.Healthy)
68-
assert.True(t, report.Reachable)
69-
assert.Equal(t, health.SeverityWarning, report.Severity)
70-
assert.Equal(t, http.StatusNotFound, report.StatusCode)
71-
assert.Equal(t, string(resp), report.HealthzResponse)
72-
assert.Nil(t, report.Error)
63+
assert.False(t, report.Reachable)
64+
assert.Equal(t, health.SeverityError, report.Severity)
65+
assert.Equal(t, 0, report.StatusCode)
66+
assert.Equal(t, "", report.HealthzResponse)
67+
require.NotNil(t, report.Error)
68+
assert.Contains(t, *report.Error, healthcheck.ErrAccessURLNotSet)
7369
})
7470

7571
t.Run("ClientErr", func(t *testing.T) {
@@ -81,7 +77,7 @@ func TestAccessURL(t *testing.T) {
8177
resp = []byte("OK")
8278
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
8379
w.WriteHeader(http.StatusOK)
84-
w.Write(resp)
80+
_, _ = w.Write(resp)
8581
}))
8682
client = srv.Client()
8783
)
@@ -93,12 +89,9 @@ func TestAccessURL(t *testing.T) {
9389
return nil, expErr
9490
})
9591

96-
u, err := url.Parse(srv.URL)
97-
require.NoError(t, err)
98-
9992
report.Run(ctx, &healthcheck.AccessURLReportOptions{
10093
Client: client,
101-
AccessURL: u,
94+
AccessURL: mustURL(t, srv.URL),
10295
})
10396

10497
assert.False(t, report.Healthy)
@@ -108,6 +101,38 @@ func TestAccessURL(t *testing.T) {
108101
assert.Equal(t, "", report.HealthzResponse)
109102
require.NotNil(t, report.Error)
110103
assert.Contains(t, *report.Error, expErr.Error())
104+
assert.Contains(t, *report.Error, healthcheck.ErrAccessURLFetch)
105+
})
106+
107+
t.Run("404", func(t *testing.T) {
108+
t.Parallel()
109+
110+
var (
111+
ctx, cancel = context.WithCancel(context.Background())
112+
report healthcheck.AccessURLReport
113+
resp = []byte("NOT OK")
114+
srv = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
115+
w.WriteHeader(http.StatusNotFound)
116+
_, _ = w.Write(resp)
117+
}))
118+
)
119+
defer cancel()
120+
defer srv.Close()
121+
122+
report.Run(ctx, &healthcheck.AccessURLReportOptions{
123+
Client: srv.Client(),
124+
AccessURL: mustURL(t, srv.URL),
125+
})
126+
127+
assert.False(t, report.Healthy)
128+
assert.True(t, report.Reachable)
129+
assert.Equal(t, health.SeverityWarning, report.Severity)
130+
assert.Equal(t, http.StatusNotFound, report.StatusCode)
131+
assert.Equal(t, string(resp), report.HealthzResponse)
132+
assert.Nil(t, report.Error)
133+
if assert.NotEmpty(t, report.Warnings) {
134+
assert.Contains(t, report.Warnings[0], healthcheck.ErrAccessURLNotOK)
135+
}
111136
})
112137
}
113138

@@ -116,3 +141,10 @@ type roundTripFunc func(r *http.Request) (*http.Response, error)
116141
func (rt roundTripFunc) RoundTrip(r *http.Request) (*http.Response, error) {
117142
return rt(r)
118143
}
144+
145+
func mustURL(t testing.TB, s string) *url.URL {
146+
t.Helper()
147+
u, err := url.Parse(s)
148+
require.NoError(t, err)
149+
return u
150+
}

docs/admin/healthcheck.md

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Deployment Health
2+
3+
Coder includes an operator-friendly deployment health page that provides a
4+
number of details about the health of your Coder deployment.
5+
6+
You can view it at `https://${CODER_URL}/health`, or you can alternatively view
7+
the [JSON response directly](../api/debug.md#debug-info-deployment-health).
8+
9+
The deployment health page is broken up into the following sections:
10+
11+
## Access URL
12+
13+
The Access URL section shows checks related to Coder's
14+
[access URL](./configure.md#access-url).
15+
16+
Coder will periodically send a GET request to `${CODER_ACCESS_URL}/healthz` and
17+
validate that the response is `200 OK`.
18+
19+
If there is an issue, you may see one of the following errors reported:
20+
21+
### <a name="EACSURL01">EACSURL01: Access URL not set</a>
22+
23+
**Problem:** no access URL has been configured.
24+
25+
**Solution:** configure an [access URL](./configure.md#access-url) for Coder.
26+
27+
### <a name="EACSURL02">EACSURL02: Access URL invalid</a>
28+
29+
**Problem:** `${CODER_ACCESS_URL}/healthz` is not a valid URL.
30+
31+
**Solution:** Ensure that the access URL is a valid URL accepted by
32+
[`url.Parse`](https://pkg.go.dev/net/url#Parse).
33+
34+
### <a name="EACSURL03">EACSURL03: Failed to fetch /healthz</a>
35+
36+
**Problem:** Coder was unable to execute a GET request to
37+
`${CODER_ACCESS_URL}/healthz`.
38+
39+
This could be due to a number of reasons, including but not limited to:
40+
41+
- DNS lookup failure
42+
- A misconfigured firewall
43+
- A misconfigured reverse proxy
44+
- Invalid or expired SSL certificates
45+
46+
**Solution:** Investigate and resolve the root cause of the connection issue.
47+
48+
To troubleshoot further, you can log into the machine running Coder and attempt
49+
to run the following command:
50+
51+
```shell
52+
curl -v ${CODER_ACCESS_URL}
53+
```
54+
55+
The output of this command should aid further diagnosis.
56+
57+
### <a name="EACSURL04">EACSURL04: /healthz did not return 200 OK</a>
58+
59+
**Problem:** Coder was able to execute a GET request to
60+
`${CODER_ACCESS_URL}/healthz`, but the response code was not `200 OK` as
61+
expected.
62+
63+
This could mean, for instance, that:
64+
65+
- The request did not actually hit your Coder instance (potentially an incorrect
66+
DNS entry)
67+
- The request hit your Coder instance, but on an unexpected path (potentially a
68+
misconfigured reverse proxy)
69+
70+
**Solution:** Inspect the `HealthzResponse` in the health check output. This
71+
should give you a good indication of the root cause.
72+
73+
## Database
74+
75+
## DERP
76+
77+
## Websocket
78+
79+
## Workspace Proxy

0 commit comments

Comments
 (0)