Skip to content

Commit 102816e

Browse files
committed
implement provisioner daemon healthcheck
1 parent d257806 commit 102816e

File tree

3 files changed

+142
-76
lines changed

3 files changed

+142
-76
lines changed

coderd/healthcheck/health/model.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,9 @@ const (
3535
CodeDERPNodeUsesWebsocket Code = `EDERP01`
3636
CodeDERPOneNodeUnhealthy Code = `EDERP02`
3737

38-
CodeProvisionerDaemonsNoProvisionerDaemons Code = `EPD01`
39-
CodeProvisionerDaemonVersionOutOfDate Code = `EPD02`
40-
CodeProvisionerDaemonAPIMajorVersionNotAvailable Code = `EPD03`
41-
CodeProvisionerDaemonAPIMinorVersionNotAvailable Code = `EPD04`
38+
CodeProvisionerDaemonsNoProvisionerDaemons Code = `EPD01`
39+
CodeProvisionerDaemonVersionMismatch Code = `EPD02`
40+
CodeProvisionerDaemonAPIVersionIncompatible Code = `EPD03`
4241
)
4342

4443
// @typescript-generate Severity

coderd/healthcheck/provisioner.go

Lines changed: 54 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,22 @@ package healthcheck
22

33
import (
44
"context"
5+
"time"
56

67
"golang.org/x/mod/semver"
78

9+
"github.com/coder/coder/v2/buildinfo"
810
"github.com/coder/coder/v2/coderd/database"
11+
"github.com/coder/coder/v2/coderd/database/dbauthz"
912
"github.com/coder/coder/v2/coderd/healthcheck/health"
13+
"github.com/coder/coder/v2/coderd/provisionerdserver"
14+
"github.com/coder/coder/v2/coderd/util/apiversion"
15+
"github.com/coder/coder/v2/coderd/util/ptr"
1016
"github.com/coder/coder/v2/codersdk"
1117
)
1218

13-
// @typescript-generate ProvisionerDaemonReport
14-
type ProvisionerDaemonReport struct {
19+
// @typescript-generate ProvisionerDaemonsReport
20+
type ProvisionerDaemonsReport struct {
1521
Severity health.Severity `json:"severity"`
1622
Warnings []health.Message `json:"warnings"`
1723
Dismissed bool `json:"dismissed"`
@@ -20,74 +26,95 @@ type ProvisionerDaemonReport struct {
2026
Provisioners []codersdk.ProvisionerDaemon
2127
}
2228

23-
// @typescript-generate ProvisionerDaemonReportOptions
24-
type ProvisionerDaemonReportOptions struct {
29+
// @typescript-generate ProvisionerDaemonsReportOptions
30+
type ProvisionerDaemonsReportOptions struct {
2531
CurrentVersion string
26-
CurrentAPIVersion string
32+
CurrentAPIVersion *apiversion.APIVersion
2733

2834
// ProvisionerDaemonsFn is a function that returns ProvisionerDaemons.
2935
// Satisfied by database.Store.ProvisionerDaemons
3036
ProvisionerDaemonsFn func(context.Context) ([]database.ProvisionerDaemon, error)
3137

38+
TimeNowFn func() time.Time
39+
StaleInterval time.Duration
40+
3241
Dismissed bool
3342
}
3443

35-
func (r *ProvisionerDaemonReport) Run(ctx context.Context, opts *ProvisionerDaemonReportOptions) {
44+
func (r *ProvisionerDaemonsReport) Run(ctx context.Context, opts *ProvisionerDaemonsReportOptions) {
3645
r.Severity = health.SeverityOK
3746
r.Warnings = make([]health.Message, 0)
3847
r.Dismissed = opts.Dismissed
48+
now := opts.TimeNowFn()
49+
if opts.StaleInterval == 0 {
50+
opts.StaleInterval = provisionerdserver.DefaultHeartbeatInterval * 3
51+
}
3952

4053
if opts.CurrentVersion == "" {
4154
r.Severity = health.SeverityError
42-
r.Warnings = append(r.Warnings, health.Messagef(health.CodeUnknown, "Developer error: CurrentVersion is empty!"))
55+
r.Error = ptr.Ref("Developer error: CurrentVersion is empty!")
4356
return
4457
}
4558

46-
if opts.CurrentAPIVersion == "" {
59+
if opts.CurrentAPIVersion == nil {
4760
r.Severity = health.SeverityError
48-
r.Warnings = append(r.Warnings, health.Messagef(health.CodeUnknown, "Developer error: CurrentAPIVersion is empty!"))
61+
r.Error = ptr.Ref("Developer error: CurrentAPIVersion is nil!")
4962
return
5063
}
5164

5265
if opts.ProvisionerDaemonsFn == nil {
5366
r.Severity = health.SeverityError
54-
r.Warnings = append(r.Warnings, health.Messagef(health.CodeUnknown, "Developer error: ProvisionerDaemonsFn is nil!"))
67+
r.Error = ptr.Ref("Developer error: ProvisionerDaemonsFn is nil!")
5568
return
5669
}
5770

58-
daemons, err := opts.ProvisionerDaemonsFn(ctx)
71+
// nolint: gocritic // need an actor to fetch provisioner daemons
72+
daemons, err := opts.ProvisionerDaemonsFn(dbauthz.AsSystemRestricted(ctx))
5973
if err != nil {
6074
r.Severity = health.SeverityError
61-
r.Warnings = append(r.Warnings, health.Messagef(health.CodeUnknown, "Unable to fetch provisioner daemons: %s", err.Error()))
75+
r.Error = ptr.Ref("error fetching provisioner daemons: " + err.Error())
6276
return
6377
}
6478

6579
if len(daemons) == 0 {
6680
r.Severity = health.SeverityError
67-
r.Warnings = append(r.Warnings, health.Messagef(health.CodeProvisionerDaemonsNoProvisionerDaemons, "No provisioner daemons found!"))
81+
r.Error = ptr.Ref("No provisioner daemons found!")
82+
return
6883
}
6984

7085
for _, daemon := range daemons {
86+
// Daemon never connected, skip.
87+
if !daemon.LastSeenAt.Valid {
88+
continue
89+
}
90+
// Daemon has gone away, skip.
91+
if now.Sub(daemon.LastSeenAt.Time) > (opts.StaleInterval) {
92+
continue
93+
}
7194
// For release versions, just check MAJOR.MINOR and ignore patch.
7295
if !semver.IsValid(daemon.Version) {
73-
r.Severity = health.SeverityWarning
96+
if r.Severity.Value() < health.SeverityWarning.Value() {
97+
r.Severity = health.SeverityWarning
98+
}
7499
r.Warnings = append(r.Warnings, health.Messagef(health.CodeUnknown, "Provisioner daemon %q reports invalid version %q", opts.CurrentVersion, daemon.Version))
75-
} else if semver.Compare(semver.MajorMinor(opts.CurrentVersion), semver.MajorMinor(daemon.Version)) > 1 {
76-
r.Severity = health.SeverityWarning
77-
r.Warnings = append(r.Warnings, health.Messagef(health.CodeUnknown, "Provisioner daemon %q has outdated version %q", daemon.Name, daemon.Version))
100+
} else if !buildinfo.VersionsMatch(opts.CurrentVersion, daemon.Version) {
101+
if r.Severity.Value() < health.SeverityWarning.Value() {
102+
r.Severity = health.SeverityWarning
103+
}
104+
r.Warnings = append(r.Warnings, health.Messagef(health.CodeProvisionerDaemonVersionMismatch, "Provisioner daemon %q has outdated version %q", daemon.Name, daemon.Version))
78105
}
79106

80107
// Provisioner daemon API version follows different rules.
81-
// 1) Coderd must support the requested API major version.
82-
// 2) The requested API minor version must be less than or equal to that of Coderd.
83-
ourMaj := semver.Major(opts.CurrentVersion)
84-
theirMaj := semver.Major(daemon.APIVersion)
85-
if semver.Compare(ourMaj, theirMaj) != 0 {
86-
r.Severity = health.SeverityError
87-
r.Warnings = append(r.Warnings, health.Messagef("Provisioner daemon %q requested major API version %s but only %s is available", daemon.Name, theirMaj, ourMaj))
88-
} else if semver.Compare(semver.MajorMinor(opts.CurrentAPIVersion), semver.MajorMinor(daemon.APIVersion)) > 1 {
89-
r.Severity = health.SeverityWarning
90-
r.Warnings = append(r.Warnings, health.Messagef(health.CodeUnknown, "Provisioner daemon %q requested API version %q but only %q is available", daemon.Name, daemon.Version, opts.CurrentAPIVersion))
108+
if _, _, err := apiversion.Parse(daemon.APIVersion); err != nil {
109+
if r.Severity.Value() < health.SeverityError.Value() {
110+
r.Severity = health.SeverityError
111+
}
112+
r.Warnings = append(r.Warnings, health.Messagef(health.CodeUnknown, "Provisioner daemon %q reports invalid API version: %s", daemon.Name, err.Error()))
113+
} else if err := opts.CurrentAPIVersion.Validate(daemon.APIVersion); err != nil {
114+
if r.Severity.Value() < health.SeverityError.Value() {
115+
r.Severity = health.SeverityError
116+
}
117+
r.Warnings = append(r.Warnings, health.Messagef(health.CodeProvisionerDaemonAPIVersionIncompatible, "Provisioner daemon %q reports incompatible API version: %s", daemon.Name, err.Error()))
91118
}
92119
}
93120
}

coderd/healthcheck/provisioner_test.go

Lines changed: 85 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"database/sql"
66
"testing"
7+
"time"
78

89
"github.com/google/uuid"
910
"github.com/stretchr/testify/assert"
@@ -12,111 +13,139 @@ import (
1213
"github.com/coder/coder/v2/coderd/database/dbtime"
1314
"github.com/coder/coder/v2/coderd/healthcheck"
1415
"github.com/coder/coder/v2/coderd/healthcheck/health"
16+
"github.com/coder/coder/v2/coderd/util/apiversion"
17+
"github.com/coder/coder/v2/provisionersdk"
1518
)
1619

1720
func TestProvisionerDaemonReport(t *testing.T) {
1821
t.Parallel()
1922

20-
var ()
21-
2223
for _, tt := range []struct {
2324
name string
2425
currentVersion string
25-
currentAPIVersion string
26+
currentAPIVersion *apiversion.APIVersion
2627
provisionerDaemonsFn func(context.Context) ([]database.ProvisionerDaemon, error)
2728
expectedSeverity health.Severity
2829
expectedWarningCode health.Code
30+
expectedError string
2931
}{
3032
{
31-
name: "current version empty",
32-
currentVersion: "",
33-
expectedSeverity: health.SeverityError,
34-
expectedWarningCode: health.CodeUnknown,
33+
name: "current version empty",
34+
currentVersion: "",
35+
expectedSeverity: health.SeverityError,
36+
expectedError: "Developer error: CurrentVersion is empty",
3537
},
3638
{
37-
name: "current api version empty",
38-
currentVersion: "v1.2.3",
39-
currentAPIVersion: "",
40-
expectedSeverity: health.SeverityError,
41-
expectedWarningCode: health.CodeUnknown,
39+
name: "provisionerdaemonsfn nil",
40+
currentVersion: "v1.2.3",
41+
currentAPIVersion: provisionersdk.VersionCurrent,
42+
expectedSeverity: health.SeverityError,
43+
expectedError: "Developer error: ProvisionerDaemonsFn is nil",
4244
},
4345
{
44-
name: "provisionerdaemonsfn nil",
45-
currentVersion: "v1.2.3",
46-
currentAPIVersion: "v1.0",
47-
expectedSeverity: health.SeverityError,
48-
expectedWarningCode: health.CodeUnknown,
46+
name: "no daemons",
47+
currentVersion: "v1.2.3",
48+
currentAPIVersion: provisionersdk.VersionCurrent,
49+
provisionerDaemonsFn: fakeProvisionerDaemonsFn(),
50+
expectedSeverity: health.SeverityError,
51+
expectedError: "No provisioner daemons found!",
4952
},
5053
{
51-
name: "no daemons",
54+
name: "error fetching daemons",
5255
currentVersion: "v1.2.3",
53-
currentAPIVersion: "v1.0",
56+
currentAPIVersion: provisionersdk.VersionCurrent,
57+
provisionerDaemonsFn: fakeProvisionerDaemonsFnErr(assert.AnError),
5458
expectedSeverity: health.SeverityError,
55-
expectedWarningCode: health.CodeProvisionerDaemonsNoProvisionerDaemons,
56-
provisionerDaemonsFn: fakeProvisionerDaemonsFn(),
59+
expectedError: assert.AnError.Error(),
5760
},
5861
{
5962
name: "one daemon up to date",
6063
currentVersion: "v1.2.3",
61-
currentAPIVersion: "v1.0",
64+
currentAPIVersion: provisionersdk.VersionCurrent,
6265
expectedSeverity: health.SeverityOK,
63-
provisionerDaemonsFn: fakeProvisionerDaemonsFn(fakeProvisionerDaemon(t, "pd-ok", "v1.2.3", "v1.0")),
66+
provisionerDaemonsFn: fakeProvisionerDaemonsFn(fakeProvisionerDaemon(t, "pd-ok", "v1.2.3", "1.0")),
6467
},
6568
{
6669
name: "one daemon out of date",
6770
currentVersion: "v1.2.3",
68-
currentAPIVersion: "v1.0",
71+
currentAPIVersion: provisionersdk.VersionCurrent,
6972
expectedSeverity: health.SeverityWarning,
70-
expectedWarningCode: health.CodeProvisionerDaemonVersionOutOfDate,
71-
provisionerDaemonsFn: fakeProvisionerDaemonsFn(fakeProvisionerDaemon(t, "pd-old", "v1.1.2", "v1.0")),
73+
expectedWarningCode: health.CodeProvisionerDaemonVersionMismatch,
74+
provisionerDaemonsFn: fakeProvisionerDaemonsFn(fakeProvisionerDaemon(t, "pd-old", "v1.1.2", "1.0")),
7275
},
7376
{
7477
name: "major api version not available",
7578
currentVersion: "v1.2.3",
76-
currentAPIVersion: "v1.0",
79+
currentAPIVersion: provisionersdk.VersionCurrent,
7780
expectedSeverity: health.SeverityError,
78-
expectedWarningCode: health.CodeProvisionerDaemonAPIMajorVersionNotAvailable,
79-
provisionerDaemonsFn: fakeProvisionerDaemonsFn(fakeProvisionerDaemon(t, "pd-new-major", "v1.2.3", "v2.0")),
81+
expectedWarningCode: health.CodeProvisionerDaemonAPIVersionIncompatible,
82+
provisionerDaemonsFn: fakeProvisionerDaemonsFn(fakeProvisionerDaemon(t, "pd-new-major", "v1.2.3", "2.0")),
8083
},
8184
{
8285
name: "minor api version not available",
8386
currentVersion: "v1.2.3",
84-
currentAPIVersion: "v1.0",
85-
expectedSeverity: health.SeverityWarning,
86-
expectedWarningCode: health.CodeProvisionerDaemonAPIMinorVersionNotAvailable,
87-
provisionerDaemonsFn: fakeProvisionerDaemonsFn(fakeProvisionerDaemon(t, "pd-new-minor", "v1.2.3", "v1.1")),
87+
currentAPIVersion: provisionersdk.VersionCurrent,
88+
expectedSeverity: health.SeverityError,
89+
expectedWarningCode: health.CodeProvisionerDaemonAPIVersionIncompatible,
90+
provisionerDaemonsFn: fakeProvisionerDaemonsFn(fakeProvisionerDaemon(t, "pd-new-minor", "v1.2.3", "1.1")),
91+
},
92+
{
93+
name: "api version backward compat",
94+
currentVersion: "v2.3.4",
95+
currentAPIVersion: apiversion.New([]int{2, 1}, 0),
96+
expectedSeverity: health.SeverityOK,
97+
provisionerDaemonsFn: fakeProvisionerDaemonsFn(
98+
fakeProvisionerDaemon(t, "pd-old-api", "v2.3.4", "1.0")),
8899
},
89100
{
90101
name: "one up to date, one out of date",
91102
currentVersion: "v1.2.3",
92-
currentAPIVersion: "v1.0",
103+
currentAPIVersion: provisionersdk.VersionCurrent,
93104
expectedSeverity: health.SeverityWarning,
94-
expectedWarningCode: health.CodeProvisionerDaemonVersionOutOfDate,
105+
expectedWarningCode: health.CodeProvisionerDaemonVersionMismatch,
95106
provisionerDaemonsFn: fakeProvisionerDaemonsFn(
96-
fakeProvisionerDaemon(t, "pd-ok", "v1.2.3", "v1.0"),
97-
fakeProvisionerDaemon(t, "pd-old", "v1.1.2", "v1.0")),
107+
fakeProvisionerDaemon(t, "pd-ok", "v1.2.3", "1.0"),
108+
fakeProvisionerDaemon(t, "pd-old", "v1.1.2", "1.0")),
98109
},
99110
{
100-
name: "one up to date, one newer",
101-
currentVersion: "v1.2.3",
102-
currentAPIVersion: "v1.0",
111+
name: "one up to date, one newer",
112+
currentVersion: "v1.2.3",
113+
currentAPIVersion: provisionersdk.VersionCurrent,
114+
expectedSeverity: health.SeverityWarning,
115+
expectedWarningCode: health.CodeProvisionerDaemonVersionMismatch,
116+
provisionerDaemonsFn: fakeProvisionerDaemonsFn(
117+
fakeProvisionerDaemon(t, "pd-ok", "v1.2.3", "1.0"),
118+
fakeProvisionerDaemon(t, "pd-new", "v2.3.4", "1.0")),
119+
},
120+
{
121+
name: "one up to date, one stale older",
122+
currentVersion: "v2.3.4",
123+
currentAPIVersion: provisionersdk.VersionCurrent,
103124
expectedSeverity: health.SeverityOK,
104125
provisionerDaemonsFn: fakeProvisionerDaemonsFn(
105-
fakeProvisionerDaemon(t, "pd-ok", "v1.2.3", "v1.0"),
106-
fakeProvisionerDaemon(t, "pd-new", "v2.3.4", "v1.0")),
126+
fakeProvisionerDaemonStale(t, "pd-ok", "v1.2.3", "0.9", dbtime.Now().Add(-5*time.Minute)),
127+
fakeProvisionerDaemon(t, "pd-new", "v2.3.4", "1.0")),
107128
},
108129
} {
109130
tt := tt
110131
t.Run(tt.name, func(t *testing.T) {
111132
t.Parallel()
112133

113-
var rpt healthcheck.ProvisionerDaemonReport
114-
var opts healthcheck.ProvisionerDaemonReportOptions
134+
var rpt healthcheck.ProvisionerDaemonsReport
135+
var opts healthcheck.ProvisionerDaemonsReportOptions
115136
opts.CurrentVersion = tt.currentVersion
116-
opts.CurrentAPIVersion = tt.currentAPIVersion
137+
if tt.currentAPIVersion == nil {
138+
opts.CurrentAPIVersion = provisionersdk.VersionCurrent
139+
} else {
140+
opts.CurrentAPIVersion = tt.currentAPIVersion
141+
}
117142
if tt.provisionerDaemonsFn != nil {
118143
opts.ProvisionerDaemonsFn = tt.provisionerDaemonsFn
119144
}
145+
now := dbtime.Now()
146+
opts.TimeNowFn = func() time.Time {
147+
return now
148+
}
120149

121150
rpt.Run(context.Background(), &opts)
122151

@@ -133,6 +162,9 @@ func TestProvisionerDaemonReport(t *testing.T) {
133162
} else {
134163
assert.Empty(t, rpt.Warnings)
135164
}
165+
if tt.expectedError != "" && assert.NotNil(t, rpt.Error) {
166+
assert.Contains(t, *rpt.Error, tt.expectedError)
167+
}
136168
})
137169
}
138170
}
@@ -163,3 +195,11 @@ func fakeProvisionerDaemonsFnErr(err error) func(context.Context) ([]database.Pr
163195
return nil, err
164196
}
165197
}
198+
199+
func fakeProvisionerDaemonStale(t *testing.T, name, version, apiVersion string, lastSeenAt time.Time) database.ProvisionerDaemon {
200+
t.Helper()
201+
d := fakeProvisionerDaemon(t, name, version, apiVersion)
202+
d.LastSeenAt.Valid = true
203+
d.LastSeenAt.Time = lastSeenAt
204+
return d
205+
}

0 commit comments

Comments
 (0)