Skip to content

Commit 1286898

Browse files
committed
feat(agent/agentcontainers): implement ignore customization for devcontainers
Fixes coder/internal#737
1 parent fcf9371 commit 1286898

File tree

3 files changed

+291
-43
lines changed

3 files changed

+291
-43
lines changed

agent/agentcontainers/api.go

Lines changed: 123 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ type API struct {
8383
recreateErrorTimes map[string]time.Time // By workspace folder.
8484
injectedSubAgentProcs map[string]subAgentProcess // By workspace folder.
8585
usingWorkspaceFolderName map[string]bool // By workspace folder.
86+
ignoredDevcontainers map[string]bool // By workspace folder. Tracks three states (true, false and not checked).
8687
asyncWg sync.WaitGroup
8788

8889
devcontainerLogSourceIDs map[string]uuid.UUID // By workspace folder.
@@ -276,6 +277,7 @@ func NewAPI(logger slog.Logger, options ...Option) *API {
276277
devcontainerNames: make(map[string]bool),
277278
knownDevcontainers: make(map[string]codersdk.WorkspaceAgentDevcontainer),
278279
configFileModifiedTimes: make(map[string]time.Time),
280+
ignoredDevcontainers: make(map[string]bool),
279281
recreateSuccessTimes: make(map[string]time.Time),
280282
recreateErrorTimes: make(map[string]time.Time),
281283
scriptLogger: func(uuid.UUID) ScriptLogger { return noopScriptLogger{} },
@@ -804,6 +806,10 @@ func (api *API) getContainers() (codersdk.WorkspaceAgentListContainersResponse,
804806
if len(api.knownDevcontainers) > 0 {
805807
devcontainers = make([]codersdk.WorkspaceAgentDevcontainer, 0, len(api.knownDevcontainers))
806808
for _, dc := range api.knownDevcontainers {
809+
if api.ignoredDevcontainers[dc.WorkspaceFolder] {
810+
continue
811+
}
812+
807813
// Include the agent if it's running (we're iterating over
808814
// copies, so mutating is fine).
809815
if proc := api.injectedSubAgentProcs[dc.WorkspaceFolder]; proc.agent.ID != uuid.Nil {
@@ -1036,11 +1042,46 @@ func (api *API) markDevcontainerDirty(configPath string, modifiedAt time.Time) {
10361042
logger.Info(api.ctx, "marking devcontainer as dirty")
10371043
dc.Dirty = true
10381044
}
1045+
if api.ignoredDevcontainers[dc.WorkspaceFolder] {
1046+
logger.Debug(api.ctx, "clearing devcontainer ignored state")
1047+
delete(api.ignoredDevcontainers, dc.WorkspaceFolder) // Allow re-reading config.
1048+
}
10391049

10401050
api.knownDevcontainers[dc.WorkspaceFolder] = dc
10411051
}
10421052
}
10431053

1054+
// markDevcontainerIgnored marks a devcontainer as ignored. Must not be called
1055+
// with the API lock held.
1056+
func (api *API) markDevcontainerIgnored(ctx context.Context, dc codersdk.WorkspaceAgentDevcontainer, proc subAgentProcess) subAgentProcess {
1057+
logger := api.logger.With(
1058+
slog.F("devcontainer_id", dc.ID),
1059+
slog.F("devcontainer_name", dc.Name),
1060+
slog.F("workspace_folder", dc.WorkspaceFolder),
1061+
slog.F("config_path", dc.ConfigPath),
1062+
)
1063+
1064+
// We only allow ignore to be set in the root customization layer to
1065+
// prevent weird interactions with devcontainer features.
1066+
logger.Debug(ctx, "marking devcontainer as ignored")
1067+
proc.stop()
1068+
if proc.agent.ID != uuid.Nil {
1069+
// If we stop the subagent, we also need to delete it.
1070+
client := *api.subAgentClient.Load()
1071+
if err := client.Delete(ctx, proc.agent.ID); err != nil {
1072+
api.logger.Error(ctx, "delete subagent failed", slog.Error(err), slog.F("subagent_id", proc.agent.ID))
1073+
}
1074+
}
1075+
// Reset agent and containerID to force config re-reading if ignore is toggled.
1076+
proc.agent = SubAgent{}
1077+
proc.containerID = ""
1078+
api.mu.Lock()
1079+
api.ignoredDevcontainers[dc.WorkspaceFolder] = true
1080+
api.mu.Unlock()
1081+
1082+
return proc
1083+
}
1084+
10441085
// cleanupSubAgents removes subagents that are no longer managed by
10451086
// this agent. This is usually only run at startup to ensure a clean
10461087
// slate. This method has an internal timeout to prevent blocking
@@ -1092,6 +1133,10 @@ func (api *API) cleanupSubAgents(ctx context.Context) error {
10921133
// This method uses an internal timeout to prevent blocking indefinitely
10931134
// if something goes wrong with the injection.
10941135
func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc codersdk.WorkspaceAgentDevcontainer) (err error) {
1136+
if api.ignoredDevcontainers[dc.WorkspaceFolder] {
1137+
return nil
1138+
}
1139+
10951140
ctx, cancel := context.WithTimeout(ctx, defaultOperationTimeout)
10961141
defer cancel()
10971142

@@ -1113,6 +1158,29 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c
11131158
maybeRecreateSubAgent := false
11141159
proc, injected := api.injectedSubAgentProcs[dc.WorkspaceFolder]
11151160
if injected {
1161+
if _, ignoreChecked := api.ignoredDevcontainers[dc.WorkspaceFolder]; !ignoreChecked {
1162+
// If ignore status has not yet been checked, or cleared by
1163+
// modifications to the devcontainer.json, we must read it
1164+
// to determine the current status. This can happen while
1165+
// the devcontainer subagent is already running or before
1166+
// we've had a chance to inject it.
1167+
//
1168+
// Note, for simplicity, we do not try to optimize to reduce
1169+
// ReadConfig calls here.
1170+
config, err := api.dccli.ReadConfig(ctx, dc.WorkspaceFolder, dc.ConfigPath, nil)
1171+
if err != nil {
1172+
return xerrors.Errorf("read devcontainer config: %w", err)
1173+
}
1174+
1175+
if config.Configuration.Customizations.Coder.Ignore {
1176+
api.mu.Unlock()
1177+
proc = api.markDevcontainerIgnored(ctx, dc, proc)
1178+
api.mu.Lock()
1179+
api.injectedSubAgentProcs[dc.WorkspaceFolder] = proc
1180+
return nil
1181+
}
1182+
}
1183+
11161184
if proc.containerID == container.ID && proc.ctx.Err() == nil {
11171185
// Same container and running, no need to reinject.
11181186
return nil
@@ -1131,7 +1199,8 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c
11311199
// Container ID changed or the subagent process is not running,
11321200
// stop the existing subagent context to replace it.
11331201
proc.stop()
1134-
} else {
1202+
}
1203+
if proc.agent.OperatingSystem == "" {
11351204
// Set SubAgent defaults.
11361205
proc.agent.OperatingSystem = "linux" // Assuming Linux for devcontainers.
11371206
}
@@ -1188,48 +1257,6 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c
11881257
proc.agent.Architecture = arch
11891258
}
11901259

1191-
agentBinaryPath, err := os.Executable()
1192-
if err != nil {
1193-
return xerrors.Errorf("get agent binary path: %w", err)
1194-
}
1195-
agentBinaryPath, err = filepath.EvalSymlinks(agentBinaryPath)
1196-
if err != nil {
1197-
return xerrors.Errorf("resolve agent binary path: %w", err)
1198-
}
1199-
1200-
// If we scripted this as a `/bin/sh` script, we could reduce these
1201-
// steps to one instruction, speeding up the injection process.
1202-
//
1203-
// Note: We use `path` instead of `filepath` here because we are
1204-
// working with Unix-style paths inside the container.
1205-
if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "mkdir", "-p", path.Dir(coderPathInsideContainer)); err != nil {
1206-
return xerrors.Errorf("create agent directory in container: %w", err)
1207-
}
1208-
1209-
if err := api.ccli.Copy(ctx, container.ID, agentBinaryPath, coderPathInsideContainer); err != nil {
1210-
return xerrors.Errorf("copy agent binary: %w", err)
1211-
}
1212-
1213-
logger.Info(ctx, "copied agent binary to container")
1214-
1215-
// Make sure the agent binary is executable so we can run it (the
1216-
// user doesn't matter since we're making it executable for all).
1217-
if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "chmod", "0755", path.Dir(coderPathInsideContainer), coderPathInsideContainer); err != nil {
1218-
return xerrors.Errorf("set agent binary executable: %w", err)
1219-
}
1220-
1221-
// Attempt to add CAP_NET_ADMIN to the binary to improve network
1222-
// performance (optional, allow to fail). See `bootstrap_linux.sh`.
1223-
// TODO(mafredri): Disable for now until we can figure out why this
1224-
// causes the following error on some images:
1225-
//
1226-
// Image: mcr.microsoft.com/devcontainers/base:ubuntu
1227-
// Error: /.coder-agent/coder: Operation not permitted
1228-
//
1229-
// if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "setcap", "cap_net_admin+ep", coderPathInsideContainer); err != nil {
1230-
// logger.Warn(ctx, "set CAP_NET_ADMIN on agent binary failed", slog.Error(err))
1231-
// }
1232-
12331260
subAgentConfig := proc.agent.CloneConfig(dc)
12341261
if proc.agent.ID == uuid.Nil || maybeRecreateSubAgent {
12351262
subAgentConfig.Architecture = arch
@@ -1246,6 +1273,7 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c
12461273
}
12471274

12481275
var (
1276+
ignore bool
12491277
appsWithPossibleDuplicates []SubAgentApp
12501278
workspaceFolder = DevcontainerDefaultContainerWorkspaceFolder
12511279
)
@@ -1269,8 +1297,13 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c
12691297
return err
12701298
}
12711299

1300+
ignore = config.Configuration.Customizations.Coder.Ignore
12721301
workspaceFolder = config.Workspace.WorkspaceFolder
12731302

1303+
if ignore {
1304+
return nil
1305+
}
1306+
12741307
// NOTE(DanielleMaywood):
12751308
// We only want to take an agent name specified in the root customization layer.
12761309
// This restricts the ability for a feature to specify the agent name. We may revisit
@@ -1317,6 +1350,11 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c
13171350
api.logger.Error(ctx, "unable to read devcontainer config", slog.Error(err))
13181351
}
13191352

1353+
if ignore {
1354+
proc = api.markDevcontainerIgnored(ctx, dc, proc)
1355+
return nil
1356+
}
1357+
13201358
displayApps := make([]codersdk.DisplayApp, 0, len(displayAppsMap))
13211359
for app, enabled := range displayAppsMap {
13221360
if enabled {
@@ -1349,6 +1387,48 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c
13491387
subAgentConfig.Directory = workspaceFolder
13501388
}
13511389

1390+
agentBinaryPath, err := os.Executable()
1391+
if err != nil {
1392+
return xerrors.Errorf("get agent binary path: %w", err)
1393+
}
1394+
agentBinaryPath, err = filepath.EvalSymlinks(agentBinaryPath)
1395+
if err != nil {
1396+
return xerrors.Errorf("resolve agent binary path: %w", err)
1397+
}
1398+
1399+
// If we scripted this as a `/bin/sh` script, we could reduce these
1400+
// steps to one instruction, speeding up the injection process.
1401+
//
1402+
// Note: We use `path` instead of `filepath` here because we are
1403+
// working with Unix-style paths inside the container.
1404+
if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "mkdir", "-p", path.Dir(coderPathInsideContainer)); err != nil {
1405+
return xerrors.Errorf("create agent directory in container: %w", err)
1406+
}
1407+
1408+
if err := api.ccli.Copy(ctx, container.ID, agentBinaryPath, coderPathInsideContainer); err != nil {
1409+
return xerrors.Errorf("copy agent binary: %w", err)
1410+
}
1411+
1412+
logger.Info(ctx, "copied agent binary to container")
1413+
1414+
// Make sure the agent binary is executable so we can run it (the
1415+
// user doesn't matter since we're making it executable for all).
1416+
if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "chmod", "0755", path.Dir(coderPathInsideContainer), coderPathInsideContainer); err != nil {
1417+
return xerrors.Errorf("set agent binary executable: %w", err)
1418+
}
1419+
1420+
// Attempt to add CAP_NET_ADMIN to the binary to improve network
1421+
// performance (optional, allow to fail). See `bootstrap_linux.sh`.
1422+
// TODO(mafredri): Disable for now until we can figure out why this
1423+
// causes the following error on some images:
1424+
//
1425+
// Image: mcr.microsoft.com/devcontainers/base:ubuntu
1426+
// Error: /.coder-agent/coder: Operation not permitted
1427+
//
1428+
// if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "setcap", "cap_net_admin+ep", coderPathInsideContainer); err != nil {
1429+
// logger.Warn(ctx, "set CAP_NET_ADMIN on agent binary failed", slog.Error(err))
1430+
// }
1431+
13521432
deleteSubAgent := proc.agent.ID != uuid.Nil && maybeRecreateSubAgent && !proc.agent.EqualConfig(subAgentConfig)
13531433
if deleteSubAgent {
13541434
logger.Debug(ctx, "deleting existing subagent for recreation", slog.F("agent_id", proc.agent.ID))

0 commit comments

Comments
 (0)