Skip to content

Commit 0c50774

Browse files
authored
fix: avoid missed logs when streaming startup logs (#8029)
* feat(coderd,agent): send startup log eof at the end * fix(coderd): fix edge case in startup log pubsub * fix(coderd): ensure startup logs are closed on lifecycle state change (fallback) * fix(codersdk): fix startup log channel shared memory bug * fix(site): remove the EOF log line
1 parent 247f8a9 commit 0c50774

28 files changed

+658
-131
lines changed

agent/agent.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -929,6 +929,11 @@ func (a *agent) trackScriptLogs(ctx context.Context, reader io.Reader) (chan str
929929
if errors.As(err, &sdkErr) {
930930
if sdkErr.StatusCode() == http.StatusRequestEntityTooLarge {
931931
a.logger.Warn(ctx, "startup logs too large, dropping logs")
932+
// Always send the EOF even if logs overflow.
933+
if len(logsToSend) > 1 && logsToSend[len(logsToSend)-1].EOF {
934+
logsToSend = logsToSend[len(logsToSend)-1:]
935+
continue
936+
}
932937
break
933938
}
934939
}
@@ -978,6 +983,14 @@ func (a *agent) trackScriptLogs(ctx context.Context, reader io.Reader) (chan str
978983
Output: scanner.Text(),
979984
})
980985
}
986+
if err := scanner.Err(); err != nil {
987+
a.logger.Error(ctx, "scan startup logs", slog.Error(err))
988+
}
989+
queueLog(agentsdk.StartupLog{
990+
CreatedAt: database.Now(),
991+
Output: "",
992+
EOF: true,
993+
})
981994
defer close(logsFinished)
982995
logsFlushed.L.Lock()
983996
for {

agent/agent_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -905,8 +905,10 @@ func TestAgent_StartupScript(t *testing.T) {
905905
return len(got) > 0 && got[len(got)-1] == codersdk.WorkspaceAgentLifecycleReady
906906
}, testutil.WaitShort, testutil.IntervalMedium)
907907

908-
require.Len(t, client.getStartupLogs(), 1)
908+
require.Len(t, client.getStartupLogs(), 2)
909909
require.Equal(t, output, client.getStartupLogs()[0].Output)
910+
require.False(t, client.getStartupLogs()[0].EOF)
911+
require.True(t, client.getStartupLogs()[1].EOF)
910912
})
911913
// This ensures that even when coderd sends back that the startup
912914
// script has written too many lines it will still succeed!

coderd/apidoc/docs.go

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/apidoc/swagger.json

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/dbauthz/dbauthz.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1407,6 +1407,14 @@ func (q *querier) GetWorkspaceAgentStartupLogsAfter(ctx context.Context, arg dat
14071407
return q.db.GetWorkspaceAgentStartupLogsAfter(ctx, arg)
14081408
}
14091409

1410+
func (q *querier) GetWorkspaceAgentStartupLogsEOF(ctx context.Context, agentID uuid.UUID) (bool, error) {
1411+
_, err := q.GetWorkspaceAgentByID(ctx, agentID)
1412+
if err != nil {
1413+
return false, err
1414+
}
1415+
return q.db.GetWorkspaceAgentStartupLogsEOF(ctx, agentID)
1416+
}
1417+
14101418
func (q *querier) GetWorkspaceAgentStats(ctx context.Context, createdAfter time.Time) ([]database.GetWorkspaceAgentStatsRow, error) {
14111419
return q.db.GetWorkspaceAgentStats(ctx, createdAfter)
14121420
}

coderd/database/dbfake/dbfake.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2722,14 +2722,30 @@ func (q *fakeQuerier) GetWorkspaceAgentStartupLogsAfter(_ context.Context, arg d
27222722
if log.AgentID != arg.AgentID {
27232723
continue
27242724
}
2725-
if arg.CreatedAfter != 0 && log.ID < arg.CreatedAfter {
2725+
if arg.CreatedAfter != 0 && log.ID <= arg.CreatedAfter {
27262726
continue
27272727
}
27282728
logs = append(logs, log)
27292729
}
27302730
return logs, nil
27312731
}
27322732

2733+
func (q *fakeQuerier) GetWorkspaceAgentStartupLogsEOF(_ context.Context, agentID uuid.UUID) (bool, error) {
2734+
q.mutex.RLock()
2735+
defer q.mutex.RUnlock()
2736+
2737+
var lastLog database.WorkspaceAgentStartupLog
2738+
for _, log := range q.workspaceAgentLogs {
2739+
if log.AgentID != agentID {
2740+
continue
2741+
}
2742+
if log.ID > lastLog.ID {
2743+
lastLog = log
2744+
}
2745+
}
2746+
return lastLog.EOF, nil
2747+
}
2748+
27332749
func (q *fakeQuerier) GetWorkspaceAgentStats(_ context.Context, createdAfter time.Time) ([]database.GetWorkspaceAgentStatsRow, error) {
27342750
q.mutex.RLock()
27352751
defer q.mutex.RUnlock()
@@ -4013,7 +4029,7 @@ func (q *fakeQuerier) InsertWorkspaceAgentStartupLogs(_ context.Context, arg dat
40134029
defer q.mutex.Unlock()
40144030

40154031
logs := []database.WorkspaceAgentStartupLog{}
4016-
id := int64(1)
4032+
id := int64(0)
40174033
if len(q.workspaceAgentLogs) > 0 {
40184034
id = q.workspaceAgentLogs[len(q.workspaceAgentLogs)-1].ID
40194035
}
@@ -4026,6 +4042,7 @@ func (q *fakeQuerier) InsertWorkspaceAgentStartupLogs(_ context.Context, arg dat
40264042
CreatedAt: arg.CreatedAt[index],
40274043
Level: arg.Level[index],
40284044
Output: output,
4045+
EOF: arg.EOF[index],
40294046
})
40304047
outputLength += int32(len(output))
40314048
}

coderd/database/dbmetrics/dbmetrics.go

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/dbmock/dbmock.go

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/dump.sql

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ALTER TABLE workspace_agent_startup_logs DROP COLUMN eof;

0 commit comments

Comments
 (0)