Skip to content

feat: add workspace agent stat reporting to telemetry #6577

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions coderd/database/dbauthz/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,10 @@ func (q *querier) GetDeploymentWorkspaceAgentStats(ctx context.Context, createdA
return q.db.GetDeploymentWorkspaceAgentStats(ctx, createdAfter)
}

func (q *querier) GetWorkspaceAgentStats(ctx context.Context, createdAfter time.Time) ([]database.GetWorkspaceAgentStatsRow, error) {
return q.db.GetWorkspaceAgentStats(ctx, createdAfter)
}

func (q *querier) GetDeploymentWorkspaceStats(ctx context.Context) (database.GetDeploymentWorkspaceStatsRow, error) {
return q.db.GetDeploymentWorkspaceStats(ctx)
}
Expand Down
73 changes: 73 additions & 0 deletions coderd/database/dbfake/databasefake.go
Original file line number Diff line number Diff line change
Expand Up @@ -3707,6 +3707,79 @@ func (q *fakeQuerier) GetDeploymentWorkspaceStats(ctx context.Context) (database
return stat, nil
}

func (q *fakeQuerier) GetWorkspaceAgentStats(_ context.Context, createdAfter time.Time) ([]database.GetWorkspaceAgentStatsRow, error) {
q.mutex.RLock()
defer q.mutex.RUnlock()

agentStatsCreatedAfter := make([]database.WorkspaceAgentStat, 0)
for _, agentStat := range q.workspaceAgentStats {
if agentStat.CreatedAt.After(createdAfter) {
agentStatsCreatedAfter = append(agentStatsCreatedAfter, agentStat)
}
}

latestAgentStats := map[uuid.UUID]database.WorkspaceAgentStat{}
for _, agentStat := range q.workspaceAgentStats {
if agentStat.CreatedAt.After(createdAfter) {
latestAgentStats[agentStat.AgentID] = agentStat
}
}

statByAgent := map[uuid.UUID]database.GetWorkspaceAgentStatsRow{}
for _, agentStat := range latestAgentStats {
stat := statByAgent[agentStat.AgentID]
stat.SessionCountVSCode += agentStat.SessionCountVSCode
stat.SessionCountJetBrains += agentStat.SessionCountJetBrains
stat.SessionCountReconnectingPTY += agentStat.SessionCountReconnectingPTY
stat.SessionCountSSH += agentStat.SessionCountSSH
statByAgent[stat.AgentID] = stat
}

latenciesByAgent := map[uuid.UUID][]float64{}
minimumDateByAgent := map[uuid.UUID]time.Time{}
for _, agentStat := range agentStatsCreatedAfter {
if agentStat.ConnectionMedianLatencyMS <= 0 {
continue
}
stat := statByAgent[agentStat.AgentID]
minimumDate := minimumDateByAgent[agentStat.AgentID]
if agentStat.CreatedAt.Before(minimumDate) || minimumDate.IsZero() {
minimumDateByAgent[agentStat.AgentID] = agentStat.CreatedAt
}
stat.WorkspaceRxBytes += agentStat.RxBytes
stat.WorkspaceTxBytes += agentStat.TxBytes
statByAgent[agentStat.AgentID] = stat
latenciesByAgent[agentStat.AgentID] = append(latenciesByAgent[agentStat.AgentID], agentStat.ConnectionMedianLatencyMS)
}

tryPercentile := func(fs []float64, p float64) float64 {
if len(fs) == 0 {
return -1
}
sort.Float64s(fs)
return fs[int(float64(len(fs))*p/100)]
}

for _, stat := range statByAgent {
stat.AggregatedFrom = minimumDateByAgent[stat.AgentID]
statByAgent[stat.AgentID] = stat

latencies, ok := latenciesByAgent[stat.AgentID]
if !ok {
continue
}
stat.WorkspaceConnectionLatency50 = tryPercentile(latencies, 50)
stat.WorkspaceConnectionLatency95 = tryPercentile(latencies, 95)
statByAgent[stat.AgentID] = stat
}

stats := make([]database.GetWorkspaceAgentStatsRow, 0, len(statByAgent))
for _, agent := range statByAgent {
stats = append(stats, agent)
}
return stats, nil
}

func (q *fakeQuerier) UpdateWorkspaceTTLToBeWithinTemplateMax(_ context.Context, arg database.UpdateWorkspaceTTLToBeWithinTemplateMaxParams) error {
if err := validateDatabaseType(arg); err != nil {
return err
Expand Down
1 change: 1 addition & 0 deletions coderd/database/querier.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

82 changes: 82 additions & 0 deletions coderd/database/queries.sql.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions coderd/database/queries/workspaceagentstats.sql
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,31 @@ WITH agent_stats AS (
) AS a WHERE a.rn = 1
)
SELECT * FROM agent_stats, latest_agent_stats;

-- name: GetWorkspaceAgentStats :many
WITH agent_stats AS (
SELECT
user_id,
agent_id,
workspace_id,
template_id,
MIN(created_at)::timestamptz AS aggregated_from,
coalesce(SUM(rx_bytes), 0)::bigint AS workspace_rx_bytes,
coalesce(SUM(tx_bytes), 0)::bigint AS workspace_tx_bytes,
coalesce((PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY connection_median_latency_ms)), -1)::FLOAT AS workspace_connection_latency_50,
coalesce((PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY connection_median_latency_ms)), -1)::FLOAT AS workspace_connection_latency_95
FROM workspace_agent_stats
-- The greater than 0 is to support legacy agents that don't report connection_median_latency_ms.
WHERE workspace_agent_stats.created_at > $1 AND connection_median_latency_ms > 0 GROUP BY user_id, agent_id, workspace_id, template_id
), latest_agent_stats AS (
SELECT
coalesce(SUM(session_count_vscode), 0)::bigint AS session_count_vscode,
coalesce(SUM(session_count_ssh), 0)::bigint AS session_count_ssh,
coalesce(SUM(session_count_jetbrains), 0)::bigint AS session_count_jetbrains,
coalesce(SUM(session_count_reconnecting_pty), 0)::bigint AS session_count_reconnecting_pty
FROM (
SELECT *, ROW_NUMBER() OVER(PARTITION BY agent_id ORDER BY created_at DESC) AS rn
FROM workspace_agent_stats WHERE created_at > $1
) AS a WHERE a.rn = 1 GROUP BY a.user_id, a.agent_id, a.workspace_id, a.template_id
)
SELECT * FROM agent_stats, latest_agent_stats;
47 changes: 47 additions & 0 deletions coderd/telemetry/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,17 @@ func (r *remoteReporter) createSnapshot() (*Snapshot, error) {
}
return nil
})
eg.Go(func() error {
stats, err := r.options.Database.GetWorkspaceAgentStats(ctx, createdAfter)
if err != nil {
return xerrors.Errorf("get workspace agent stats: %w", err)
}
snapshot.WorkspaceAgentStats = make([]WorkspaceAgentStat, 0, len(stats))
for _, stat := range stats {
snapshot.WorkspaceAgentStats = append(snapshot.WorkspaceAgentStats, ConvertWorkspaceAgentStat(stat))
}
return nil
})

err := eg.Wait()
if err != nil {
Expand Down Expand Up @@ -564,6 +575,25 @@ func ConvertWorkspaceAgent(agent database.WorkspaceAgent) WorkspaceAgent {
return snapAgent
}

// ConvertWorkspaceAgentStat anonymizes a workspace agent stat.
func ConvertWorkspaceAgentStat(stat database.GetWorkspaceAgentStatsRow) WorkspaceAgentStat {
return WorkspaceAgentStat{
UserID: stat.UserID,
TemplateID: stat.TemplateID,
WorkspaceID: stat.WorkspaceID,
AgentID: stat.AgentID,
AggregatedFrom: stat.AggregatedFrom,
ConnectionLatency50: stat.WorkspaceConnectionLatency50,
ConnectionLatency95: stat.WorkspaceConnectionLatency95,
RxBytes: stat.WorkspaceRxBytes,
TxBytes: stat.WorkspaceTxBytes,
SessionCountVSCode: stat.SessionCountVSCode,
SessionCountJetBrains: stat.SessionCountJetBrains,
SessionCountReconnectingPTY: stat.SessionCountReconnectingPTY,
SessionCountSSH: stat.SessionCountSSH,
}
}

// ConvertWorkspaceApp anonymizes a workspace app.
func ConvertWorkspaceApp(app database.WorkspaceApp) WorkspaceApp {
return WorkspaceApp{
Expand Down Expand Up @@ -666,6 +696,7 @@ type Snapshot struct {
Workspaces []Workspace `json:"workspaces"`
WorkspaceApps []WorkspaceApp `json:"workspace_apps"`
WorkspaceAgents []WorkspaceAgent `json:"workspace_agents"`
WorkspaceAgentStats []WorkspaceAgentStat `json:"workspace_agent_stats"`
WorkspaceBuilds []WorkspaceBuild `json:"workspace_build"`
WorkspaceResources []WorkspaceResource `json:"workspace_resources"`
WorkspaceResourceMetadata []WorkspaceResourceMetadata `json:"workspace_resource_metadata"`
Expand Down Expand Up @@ -754,6 +785,22 @@ type WorkspaceAgent struct {
ShutdownScript bool `json:"shutdown_script"`
}

type WorkspaceAgentStat struct {
UserID uuid.UUID `json:"user_id"`
TemplateID uuid.UUID `json:"template_id"`
WorkspaceID uuid.UUID `json:"workspace_id"`
AggregatedFrom time.Time `json:"aggregated_from"`
AgentID uuid.UUID `json:"agent_id"`
RxBytes int64 `json:"rx_bytes"`
TxBytes int64 `json:"tx_bytes"`
ConnectionLatency50 float64 `json:"connection_latency_50"`
ConnectionLatency95 float64 `json:"connection_latency_95"`
SessionCountVSCode int64 `json:"session_count_vscode"`
SessionCountJetBrains int64 `json:"session_count_jetbrains"`
SessionCountReconnectingPTY int64 `json:"session_count_reconnecting_pty"`
SessionCountSSH int64 `json:"session_count_ssh"`
}

type WorkspaceApp struct {
ID uuid.UUID `json:"id"`
CreatedAt time.Time `json:"created_at"`
Expand Down
2 changes: 2 additions & 0 deletions coderd/telemetry/telemetry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ func TestTelemetry(t *testing.T) {
_ = dbgen.WorkspaceResource(t, db, database.WorkspaceResource{
Transition: database.WorkspaceTransitionStart,
})
_ = dbgen.WorkspaceAgentStat(t, db, database.WorkspaceAgentStat{})
_, err = db.InsertLicense(ctx, database.InsertLicenseParams{
UploadedAt: database.Now(),
JWT: "",
Expand All @@ -86,6 +87,7 @@ func TestTelemetry(t *testing.T) {
require.Len(t, snapshot.WorkspaceAgents, 1)
require.Len(t, snapshot.WorkspaceBuilds, 1)
require.Len(t, snapshot.WorkspaceResources, 1)
require.Len(t, snapshot.WorkspaceAgentStats, 1)
})
t.Run("HashedEmail", func(t *testing.T) {
t.Parallel()
Expand Down