Skip to content

Commit ff51035

Browse files
committed
Merge branch 'main' into mafredri/agent-connection-timeout-troubleshooting
2 parents 436238f + bf4a6fb commit ff51035

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+3731
-1651
lines changed

.github/workflows/mlc_config.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@
99
{
1010
"pattern": "developer.github.com"
1111
},
12+
{
13+
"pattern": "docs.github.com"
14+
},
15+
{
16+
"pattern": "support.google.com"
17+
},
1218
{
1319
"pattern": "tailscale.com"
1420
}

.vscode/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
"promptui",
8989
"protobuf",
9090
"provisionerd",
91+
"provisionerdserver",
9192
"provisionersdk",
9293
"ptty",
9394
"ptys",

agent/apphealth.go

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"time"
88

99
"golang.org/x/xerrors"
10+
"github.com/google/uuid"
1011

1112
"cdr.dev/slog"
1213
"github.com/coder/coder/codersdk"
@@ -31,9 +32,9 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
3132
}
3233

3334
hasHealthchecksEnabled := false
34-
health := make(map[string]codersdk.WorkspaceAppHealth, 0)
35+
health := make(map[uuid.UUID]codersdk.WorkspaceAppHealth, 0)
3536
for _, app := range apps {
36-
health[app.DisplayName] = app.Health
37+
health[app.ID] = app.Health
3738
if !hasHealthchecksEnabled && app.Health != codersdk.WorkspaceAppHealthDisabled {
3839
hasHealthchecksEnabled = true
3940
}
@@ -46,7 +47,7 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
4647

4748
// run a ticker for each app health check.
4849
var mu sync.RWMutex
49-
failures := make(map[string]int, 0)
50+
failures := make(map[uuid.UUID]int, 0)
5051
for _, nextApp := range apps {
5152
if !shouldStartTicker(nextApp) {
5253
continue
@@ -85,21 +86,21 @@ func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.Workspace
8586
}()
8687
if err != nil {
8788
mu.Lock()
88-
if failures[app.DisplayName] < int(app.Healthcheck.Threshold) {
89+
if failures[app.ID] < int(app.Healthcheck.Threshold) {
8990
// increment the failure count and keep status the same.
9091
// we will change it when we hit the threshold.
91-
failures[app.DisplayName]++
92+
failures[app.ID]++
9293
} else {
9394
// set to unhealthy if we hit the failure threshold.
9495
// we stop incrementing at the threshold to prevent the failure value from increasing forever.
95-
health[app.DisplayName] = codersdk.WorkspaceAppHealthUnhealthy
96+
health[app.ID] = codersdk.WorkspaceAppHealthUnhealthy
9697
}
9798
mu.Unlock()
9899
} else {
99100
mu.Lock()
100101
// we only need one successful health check to be considered healthy.
101-
health[app.DisplayName] = codersdk.WorkspaceAppHealthHealthy
102-
failures[app.DisplayName] = 0
102+
health[app.ID] = codersdk.WorkspaceAppHealthHealthy
103+
failures[app.ID] = 0
103104
mu.Unlock()
104105
}
105106

@@ -155,7 +156,7 @@ func shouldStartTicker(app codersdk.WorkspaceApp) bool {
155156
return app.Healthcheck.URL != "" && app.Healthcheck.Interval > 0 && app.Healthcheck.Threshold > 0
156157
}
157158

158-
func healthChanged(old map[string]codersdk.WorkspaceAppHealth, new map[string]codersdk.WorkspaceAppHealth) bool {
159+
func healthChanged(old map[uuid.UUID]codersdk.WorkspaceAppHealth, new map[uuid.UUID]codersdk.WorkspaceAppHealth) bool {
159160
for name, newValue := range new {
160161
oldValue, found := old[name]
161162
if !found {
@@ -169,8 +170,8 @@ func healthChanged(old map[string]codersdk.WorkspaceAppHealth, new map[string]co
169170
return false
170171
}
171172

172-
func copyHealth(h1 map[string]codersdk.WorkspaceAppHealth) map[string]codersdk.WorkspaceAppHealth {
173-
h2 := make(map[string]codersdk.WorkspaceAppHealth, 0)
173+
func copyHealth(h1 map[uuid.UUID]codersdk.WorkspaceAppHealth) map[uuid.UUID]codersdk.WorkspaceAppHealth {
174+
h2 := make(map[uuid.UUID]codersdk.WorkspaceAppHealth, 0)
174175
for k, v := range h1 {
175176
h2[k] = v
176177
}

agent/apphealth_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@ func TestAppHealth(t *testing.T) {
2727
defer cancel()
2828
apps := []codersdk.WorkspaceApp{
2929
{
30-
DisplayName: "app1",
30+
Slug: "app1",
3131
Healthcheck: codersdk.Healthcheck{},
3232
Health: codersdk.WorkspaceAppHealthDisabled,
3333
},
3434
{
35-
DisplayName: "app2",
35+
Slug: "app2",
3636
Healthcheck: codersdk.Healthcheck{
3737
// URL: We don't set the URL for this test because the setup will
3838
// create a httptest server for us and set it for us.
@@ -69,7 +69,7 @@ func TestAppHealth(t *testing.T) {
6969
defer cancel()
7070
apps := []codersdk.WorkspaceApp{
7171
{
72-
DisplayName: "app2",
72+
Slug: "app2",
7373
Healthcheck: codersdk.Healthcheck{
7474
// URL: We don't set the URL for this test because the setup will
7575
// create a httptest server for us and set it for us.
@@ -102,7 +102,7 @@ func TestAppHealth(t *testing.T) {
102102
defer cancel()
103103
apps := []codersdk.WorkspaceApp{
104104
{
105-
DisplayName: "app2",
105+
Slug: "app2",
106106
Healthcheck: codersdk.Healthcheck{
107107
// URL: We don't set the URL for this test because the setup will
108108
// create a httptest server for us and set it for us.
@@ -137,7 +137,7 @@ func TestAppHealth(t *testing.T) {
137137
defer cancel()
138138
apps := []codersdk.WorkspaceApp{
139139
{
140-
DisplayName: "app2",
140+
Slug: "app2",
141141
Healthcheck: codersdk.Healthcheck{
142142
// URL: We don't set the URL for this test because the setup will
143143
// create a httptest server for us and set it for us.
@@ -185,9 +185,9 @@ func setupAppReporter(ctx context.Context, t *testing.T, apps []codersdk.Workspa
185185
}
186186
postWorkspaceAgentAppHealth := func(_ context.Context, req codersdk.PostWorkspaceAppHealthsRequest) error {
187187
mu.Lock()
188-
for name, health := range req.Healths {
188+
for id, health := range req.Healths {
189189
for i, app := range apps {
190-
if app.DisplayName != name {
190+
if app.ID != id {
191191
continue
192192
}
193193
app.Health = health

cli/agent.go

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import (
44
"context"
55
"fmt"
66
"net/http"
7-
_ "net/http/pprof" //nolint: gosec
7+
"net/http/pprof"
88
"net/url"
99
"os"
1010
"path/filepath"
@@ -28,7 +28,6 @@ import (
2828
func workspaceAgent() *cobra.Command {
2929
var (
3030
auth string
31-
pprofEnabled bool
3231
pprofAddress string
3332
noReap bool
3433
)
@@ -82,15 +81,11 @@ func workspaceAgent() *cobra.Command {
8281
// Set a reasonable timeout so requests can't hang forever!
8382
client.HTTPClient.Timeout = 10 * time.Second
8483

85-
if pprofEnabled {
86-
srvClose := serveHandler(cmd.Context(), logger, nil, pprofAddress, "pprof")
87-
defer srvClose()
88-
} else {
89-
// If pprof wasn't enabled at startup, allow a
90-
// `kill -USR1 $agent_pid` to start it (on Unix).
91-
srvClose := agentStartPPROFOnUSR1(cmd.Context(), logger, pprofAddress)
92-
defer srvClose()
93-
}
84+
// Enable pprof handler
85+
// This prevents the pprof import from being accidentally deleted.
86+
_ = pprof.Handler
87+
pprofSrvClose := serveHandler(cmd.Context(), logger, nil, pprofAddress, "pprof")
88+
defer pprofSrvClose()
9489

9590
// exchangeToken returns a session token.
9691
// This is abstracted to allow for the same looping condition
@@ -177,7 +172,6 @@ func workspaceAgent() *cobra.Command {
177172
}
178173

179174
cliflag.StringVarP(cmd.Flags(), &auth, "auth", "", "CODER_AGENT_AUTH", "token", "Specify the authentication type to use for the agent")
180-
cliflag.BoolVarP(cmd.Flags(), &pprofEnabled, "pprof-enable", "", "CODER_AGENT_PPROF_ENABLE", false, "Enable serving pprof metrics on the address defined by --pprof-address.")
181175
cliflag.BoolVarP(cmd.Flags(), &noReap, "no-reap", "", "", false, "Do not start a process reaper.")
182176
cliflag.StringVarP(cmd.Flags(), &pprofAddress, "pprof-address", "", "CODER_AGENT_PPROF_ADDRESS", "127.0.0.1:6060", "The address to serve pprof.")
183177
return cmd

cli/agent_unix.go

Lines changed: 0 additions & 38 deletions
This file was deleted.

cli/agent_windows.go

Lines changed: 0 additions & 12 deletions
This file was deleted.

cli/deployment/config.go

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -151,12 +151,6 @@ func newConfig() *codersdk.DeploymentConfig {
151151
Flag: "in-memory",
152152
Hidden: true,
153153
},
154-
ProvisionerDaemons: &codersdk.DeploymentConfigField[int]{
155-
Name: "Provisioner Daemons",
156-
Usage: "Number of provisioner daemons to create on start. If builds are stuck in queued state for a long time, consider increasing this.",
157-
Flag: "provisioner-daemons",
158-
Default: 3,
159-
},
160154
PostgresURL: &codersdk.DeploymentConfigField[string]{
161155
Name: "Postgres Connection URL",
162156
Usage: "URL of a PostgreSQL database. If empty, PostgreSQL binaries will be downloaded from Maven (https://repo1.maven.org/maven2) and store all data in the config root. Access the built-in database with \"coder server postgres-builtin-url\".",
@@ -359,6 +353,20 @@ func newConfig() *codersdk.DeploymentConfig {
359353
Flag: "user-workspace-quota",
360354
Enterprise: true,
361355
},
356+
Provisioner: &codersdk.ProvisionerConfig{
357+
Daemons: &codersdk.DeploymentConfigField[int]{
358+
Name: "Provisioner Daemons",
359+
Usage: "Number of provisioner daemons to create on start. If builds are stuck in queued state for a long time, consider increasing this.",
360+
Flag: "provisioner-daemons",
361+
Default: 3,
362+
},
363+
ForceCancelInterval: &codersdk.DeploymentConfigField[time.Duration]{
364+
Name: "Force Cancel Interval",
365+
Usage: "Time to force cancel provisioning tasks that are stuck.",
366+
Flag: "provisioner-force-cancel-interval",
367+
Default: 10 * time.Minute,
368+
},
369+
},
362370
}
363371
}
364372

cli/deployment/config_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ func TestConfig(t *testing.T) {
4747
require.Equal(t, config.Pprof.Enable.Value, true)
4848
require.Equal(t, config.Prometheus.Address.Value, "hello-world")
4949
require.Equal(t, config.Prometheus.Enable.Value, true)
50-
require.Equal(t, config.ProvisionerDaemons.Value, 5)
50+
require.Equal(t, config.Provisioner.Daemons.Value, 5)
5151
require.Equal(t, config.SecureAuthCookie.Value, true)
5252
require.Equal(t, config.SSHKeygenAlgorithm.Value, "potato")
5353
require.Equal(t, config.Telemetry.Enable.Value, false)

0 commit comments

Comments
 (0)