Skip to content

Commit 61d4e3e

Browse files
committed
feat: add workspace agent stat reporting to telemetry
This aggregates stats periodically and sends them by agent ID to our telemetry server. It should help us identify which editors are primarily in use.
1 parent 9b2abf0 commit 61d4e3e

File tree

7 files changed

+237
-0
lines changed

7 files changed

+237
-0
lines changed

coderd/database/dbauthz/system.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,10 @@ func (q *querier) GetDeploymentWorkspaceAgentStats(ctx context.Context, createdA
284284
return q.db.GetDeploymentWorkspaceAgentStats(ctx, createdAfter)
285285
}
286286

287+
func (q *querier) GetWorkspaceAgentStats(ctx context.Context, createdAfter time.Time) ([]database.GetWorkspaceAgentStatsRow, error) {
288+
return q.db.GetWorkspaceAgentStats(ctx, createdAfter)
289+
}
290+
287291
func (q *querier) GetDeploymentWorkspaceStats(ctx context.Context) (database.GetDeploymentWorkspaceStatsRow, error) {
288292
return q.db.GetDeploymentWorkspaceStats(ctx)
289293
}

coderd/database/dbfake/databasefake.go

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3707,6 +3707,79 @@ func (q *fakeQuerier) GetDeploymentWorkspaceStats(ctx context.Context) (database
37073707
return stat, nil
37083708
}
37093709

3710+
func (q *fakeQuerier) GetWorkspaceAgentStats(ctx context.Context, createdAfter time.Time) ([]database.GetWorkspaceAgentStatsRow, error) {
3711+
q.mutex.RLock()
3712+
defer q.mutex.RUnlock()
3713+
3714+
agentStatsCreatedAfter := make([]database.WorkspaceAgentStat, 0)
3715+
for _, agentStat := range q.workspaceAgentStats {
3716+
if agentStat.CreatedAt.After(createdAfter) {
3717+
agentStatsCreatedAfter = append(agentStatsCreatedAfter, agentStat)
3718+
}
3719+
}
3720+
3721+
latestAgentStats := map[uuid.UUID]database.WorkspaceAgentStat{}
3722+
for _, agentStat := range q.workspaceAgentStats {
3723+
if agentStat.CreatedAt.After(createdAfter) {
3724+
latestAgentStats[agentStat.AgentID] = agentStat
3725+
}
3726+
}
3727+
3728+
statByAgent := map[uuid.UUID]database.GetWorkspaceAgentStatsRow{}
3729+
for _, agentStat := range latestAgentStats {
3730+
stat := statByAgent[agentStat.AgentID]
3731+
stat.SessionCountVSCode += agentStat.SessionCountVSCode
3732+
stat.SessionCountJetBrains += agentStat.SessionCountJetBrains
3733+
stat.SessionCountReconnectingPTY += agentStat.SessionCountReconnectingPTY
3734+
stat.SessionCountSSH += agentStat.SessionCountSSH
3735+
statByAgent[agentStat.AgentID] = stat
3736+
}
3737+
3738+
latenciesByAgent := map[uuid.UUID][]float64{}
3739+
minimumDateByAgent := map[uuid.UUID]time.Time{}
3740+
for _, agentStat := range agentStatsCreatedAfter {
3741+
if agentStat.ConnectionMedianLatencyMS <= 0 {
3742+
continue
3743+
}
3744+
stat := statByAgent[agentStat.AgentID]
3745+
minimumDate := minimumDateByAgent[agentStat.AgentID]
3746+
if agentStat.CreatedAt.Before(minimumDate) || minimumDate.IsZero() {
3747+
minimumDateByAgent[agentStat.AgentID] = agentStat.CreatedAt
3748+
}
3749+
stat.WorkspaceRxBytes += agentStat.RxBytes
3750+
stat.WorkspaceTxBytes += agentStat.TxBytes
3751+
statByAgent[agentStat.AgentID] = stat
3752+
latenciesByAgent[agentStat.AgentID] = append(latenciesByAgent[agentStat.AgentID], agentStat.ConnectionMedianLatencyMS)
3753+
}
3754+
3755+
tryPercentile := func(fs []float64, p float64) float64 {
3756+
if len(fs) == 0 {
3757+
return -1
3758+
}
3759+
sort.Float64s(fs)
3760+
return fs[int(float64(len(fs))*p/100)]
3761+
}
3762+
3763+
for _, stat := range statByAgent {
3764+
stat.AggregatedFrom = minimumDateByAgent[stat.AgentID]
3765+
statByAgent[stat.AgentID] = stat
3766+
3767+
latencies, ok := latenciesByAgent[stat.AgentID]
3768+
if !ok {
3769+
continue
3770+
}
3771+
stat.WorkspaceConnectionLatency50 = tryPercentile(latencies, 50)
3772+
stat.WorkspaceConnectionLatency95 = tryPercentile(latencies, 95)
3773+
statByAgent[stat.AgentID] = stat
3774+
}
3775+
3776+
stats := make([]database.GetWorkspaceAgentStatsRow, 0, len(statByAgent))
3777+
for _, agent := range statByAgent {
3778+
stats = append(stats, agent)
3779+
}
3780+
return stats, nil
3781+
}
3782+
37103783
func (q *fakeQuerier) UpdateWorkspaceTTLToBeWithinTemplateMax(_ context.Context, arg database.UpdateWorkspaceTTLToBeWithinTemplateMaxParams) error {
37113784
if err := validateDatabaseType(arg); err != nil {
37123785
return err

coderd/database/querier.go

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/queries.sql.go

Lines changed: 82 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

coderd/database/queries/workspaceagentstats.sql

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,31 @@ WITH agent_stats AS (
7474
) AS a WHERE a.rn = 1
7575
)
7676
SELECT * FROM agent_stats, latest_agent_stats;
77+
78+
-- name: GetWorkspaceAgentStats :many
79+
WITH agent_stats AS (
80+
SELECT
81+
user_id,
82+
agent_id,
83+
workspace_id,
84+
template_id,
85+
MIN(created_at)::timestamptz AS aggregated_from,
86+
coalesce(SUM(rx_bytes), 0)::bigint AS workspace_rx_bytes,
87+
coalesce(SUM(tx_bytes), 0)::bigint AS workspace_tx_bytes,
88+
coalesce((PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY connection_median_latency_ms)), -1)::FLOAT AS workspace_connection_latency_50,
89+
coalesce((PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY connection_median_latency_ms)), -1)::FLOAT AS workspace_connection_latency_95
90+
FROM workspace_agent_stats
91+
-- The greater than 0 is to support legacy agents that don't report connection_median_latency_ms.
92+
WHERE workspace_agent_stats.created_at > $1 AND connection_median_latency_ms > 0 GROUP BY user_id, agent_id, workspace_id, template_id
93+
), latest_agent_stats AS (
94+
SELECT
95+
coalesce(SUM(session_count_vscode), 0)::bigint AS session_count_vscode,
96+
coalesce(SUM(session_count_ssh), 0)::bigint AS session_count_ssh,
97+
coalesce(SUM(session_count_jetbrains), 0)::bigint AS session_count_jetbrains,
98+
coalesce(SUM(session_count_reconnecting_pty), 0)::bigint AS session_count_reconnecting_pty
99+
FROM (
100+
SELECT *, ROW_NUMBER() OVER(PARTITION BY agent_id ORDER BY created_at DESC) AS rn
101+
FROM workspace_agent_stats WHERE created_at > $1
102+
) AS a WHERE a.rn = 1 GROUP BY a.user_id, a.agent_id, a.workspace_id, a.template_id
103+
)
104+
SELECT * FROM agent_stats, latest_agent_stats;

coderd/telemetry/telemetry.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,17 @@ func (r *remoteReporter) createSnapshot() (*Snapshot, error) {
465465
}
466466
return nil
467467
})
468+
eg.Go(func() error {
469+
stats, err := r.options.Database.GetWorkspaceAgentStats(ctx, createdAfter)
470+
if err != nil {
471+
return xerrors.Errorf("get workspace agent stats: %w", err)
472+
}
473+
snapshot.WorkspaceAgentStats = make([]WorkspaceAgentStat, 0, len(stats))
474+
for _, stat := range stats {
475+
snapshot.WorkspaceAgentStats = append(snapshot.WorkspaceAgentStats, ConvertWorkspaceAgentStat(stat))
476+
}
477+
return nil
478+
})
468479

469480
err := eg.Wait()
470481
if err != nil {
@@ -564,6 +575,25 @@ func ConvertWorkspaceAgent(agent database.WorkspaceAgent) WorkspaceAgent {
564575
return snapAgent
565576
}
566577

578+
// ConvertWorkspaceAgentStat anonymizes a workspace agent stat.
579+
func ConvertWorkspaceAgentStat(stat database.GetWorkspaceAgentStatsRow) WorkspaceAgentStat {
580+
return WorkspaceAgentStat{
581+
UserID: stat.UserID,
582+
TemplateID: stat.TemplateID,
583+
WorkspaceID: stat.WorkspaceID,
584+
AgentID: stat.AgentID,
585+
AggregatedFrom: stat.AggregatedFrom,
586+
ConnectionLatency50: stat.WorkspaceConnectionLatency50,
587+
ConnectionLatency95: stat.WorkspaceConnectionLatency95,
588+
RxBytes: stat.WorkspaceRxBytes,
589+
TxBytes: stat.WorkspaceTxBytes,
590+
SessionCountVSCode: stat.SessionCountVSCode,
591+
SessionCountJetBrains: stat.SessionCountJetBrains,
592+
SessionCountReconnectingPTY: stat.SessionCountReconnectingPTY,
593+
SessionCountSSH: stat.SessionCountSSH,
594+
}
595+
}
596+
567597
// ConvertWorkspaceApp anonymizes a workspace app.
568598
func ConvertWorkspaceApp(app database.WorkspaceApp) WorkspaceApp {
569599
return WorkspaceApp{
@@ -666,6 +696,7 @@ type Snapshot struct {
666696
Workspaces []Workspace `json:"workspaces"`
667697
WorkspaceApps []WorkspaceApp `json:"workspace_apps"`
668698
WorkspaceAgents []WorkspaceAgent `json:"workspace_agents"`
699+
WorkspaceAgentStats []WorkspaceAgentStat `json:"workspace_agent_stats"`
669700
WorkspaceBuilds []WorkspaceBuild `json:"workspace_build"`
670701
WorkspaceResources []WorkspaceResource `json:"workspace_resources"`
671702
WorkspaceResourceMetadata []WorkspaceResourceMetadata `json:"workspace_resource_metadata"`
@@ -754,6 +785,22 @@ type WorkspaceAgent struct {
754785
ShutdownScript bool `json:"shutdown_script"`
755786
}
756787

788+
type WorkspaceAgentStat struct {
789+
UserID uuid.UUID `json:"user_id"`
790+
TemplateID uuid.UUID `json:"template_id"`
791+
WorkspaceID uuid.UUID `json:"workspace_id"`
792+
AggregatedFrom time.Time `json:"aggregated_from"`
793+
AgentID uuid.UUID `json:"agent_id"`
794+
RxBytes int64 `json:"rx_bytes"`
795+
TxBytes int64 `json:"tx_bytes"`
796+
ConnectionLatency50 float64 `json:"connection_latency_50"`
797+
ConnectionLatency95 float64 `json:"connection_latency_95"`
798+
SessionCountVSCode int64 `json:"session_count_vscode"`
799+
SessionCountJetBrains int64 `json:"session_count_jetbrains"`
800+
SessionCountReconnectingPTY int64 `json:"session_count_reconnecting_pty"`
801+
SessionCountSSH int64 `json:"session_count_ssh"`
802+
}
803+
757804
type WorkspaceApp struct {
758805
ID uuid.UUID `json:"id"`
759806
CreatedAt time.Time `json:"created_at"`

coderd/telemetry/telemetry_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ func TestTelemetry(t *testing.T) {
6767
_ = dbgen.WorkspaceResource(t, db, database.WorkspaceResource{
6868
Transition: database.WorkspaceTransitionStart,
6969
})
70+
_ = dbgen.WorkspaceAgentStat(t, db, database.WorkspaceAgentStat{})
7071
_, err = db.InsertLicense(ctx, database.InsertLicenseParams{
7172
UploadedAt: database.Now(),
7273
JWT: "",
@@ -86,6 +87,7 @@ func TestTelemetry(t *testing.T) {
8687
require.Len(t, snapshot.WorkspaceAgents, 1)
8788
require.Len(t, snapshot.WorkspaceBuilds, 1)
8889
require.Len(t, snapshot.WorkspaceResources, 1)
90+
require.Len(t, snapshot.WorkspaceAgentStats, 1)
8991
})
9092
t.Run("HashedEmail", func(t *testing.T) {
9193
t.Parallel()

0 commit comments

Comments
 (0)