Skip to content

chore!: Standardize prometheus time metrics to seconds #5709

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 14 additions & 18 deletions coderd/httpmw/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@ import (
"github.com/prometheus/client_golang/prometheus/promauto"
)

func durationToFloatMs(d time.Duration) float64 {
return float64(d.Milliseconds())
}

func Prometheus(register prometheus.Registerer) func(http.Handler) http.Handler {
factory := promauto.With(register)
requestsProcessed := factory.NewCounterVec(prometheus.CounterOpts{
Expand All @@ -30,34 +26,34 @@ func Prometheus(register prometheus.Registerer) func(http.Handler) http.Handler
Namespace: "coderd",
Subsystem: "api",
Name: "concurrent_requests",
Help: "The number of concurrent API requests",
Help: "The number of concurrent API requests.",
})
websocketsConcurrent := factory.NewGauge(prometheus.GaugeOpts{
Namespace: "coderd",
Subsystem: "api",
Name: "concurrent_websockets",
Help: "The total number of concurrent API websockets",
Help: "The total number of concurrent API websockets.",
})
websocketsDist := factory.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "coderd",
Subsystem: "api",
Name: "websocket_durations_ms",
Help: "Websocket duration distribution of requests in milliseconds",
Name: "websocket_durations_seconds",
Help: "Websocket duration distribution of requests in seconds.",
Buckets: []float64{
durationToFloatMs(01 * time.Millisecond),
durationToFloatMs(01 * time.Second),
durationToFloatMs(01 * time.Minute),
durationToFloatMs(01 * time.Hour),
durationToFloatMs(15 * time.Hour),
durationToFloatMs(30 * time.Hour),
0.001, // 1ms
1,
60, // 1 minute
60 * 60, // 1 hour
60 * 60 * 15, // 15 hours
60 * 60 * 30, // 30 hours
},
}, []string{"path"})
requestsDist := factory.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "coderd",
Subsystem: "api",
Name: "request_latencies_ms",
Help: "Latency distribution of requests in milliseconds",
Buckets: []float64{1, 5, 10, 25, 50, 100, 500, 1000, 5000, 10000, 30000},
Name: "request_latencies_seconds",
Help: "Latency distribution of requests in seconds.",
Buckets: []float64{0.001, 0.005, 0.010, 0.025, 0.050, 0.100, 0.500, 1, 5, 10, 30},
}, []string{"method", "path"})

return func(next http.Handler) http.Handler {
Expand Down Expand Up @@ -98,7 +94,7 @@ func Prometheus(register prometheus.Registerer) func(http.Handler) http.Handler
statusStr := strconv.Itoa(sw.Status)

requestsProcessed.WithLabelValues(statusStr, method, path).Inc()
dist.WithLabelValues(distOpts...).Observe(float64(time.Since(start)) / 1e6)
dist.WithLabelValues(distOpts...).Observe(time.Since(start).Seconds())
})
}
}
17 changes: 12 additions & 5 deletions docs/admin/prometheus.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ The environment variable `CODER_PROMETHEUS_ENABLE` will be enabled automatically
| Name | Type | Description | Labels |
| -------------------------------------------- | --------- | ------------------------------------------------------------------ | ---------------------- |
| `coderd_api_active_users_duration_hour` | gauge | The number of users that have been active within the last hour. | |
| `coderd_api_concurrent_requests` | gauge | The number of concurrent API requests | |
| `coderd_api_concurrent_websockets` | gauge | The total number of concurrent API websockets | |
| `coderd_api_request_latencies_ms` | histogram | Latency distribution of requests in milliseconds | `method` `path` |
| `coderd_api_concurrent_requests` | gauge | The number of concurrent API requests. | |
| `coderd_api_concurrent_websockets` | gauge | The total number of concurrent API websockets. | |
| `coderd_api_request_latencies_seconds` | histogram | Latency distribution of requests in seconds. | `method` `path` |
| `coderd_api_requests_processed_total` | counter | The total number of processed API requests | `code` `method` `path` |
| `coderd_api_websocket_durations_ms` | histogram | Websocket duration distribution of requests in milliseconds | `path` |
| `coderd_api_websocket_durations_seconds` | histogram | Websocket duration distribution of requests in seconds. | `path` |
| `coderd_api_workspace_latest_build_total` | gauge | The latest workspace builds with a status. | `status` |
| `coderd_provisionerd_job_timings_ms` | histogram | The provisioner job time duration. | `provisioner` `status` |
| `coderd_provisionerd_job_timings_seconds` | histogram | The provisioner job time duration in seconds. | `provisioner` `status` |
| `coderd_provisionerd_jobs_current` | gauge | The number of currently running provisioner jobs. | `provisioner` |
| `go_gc_duration_seconds` | summary | A summary of the pause duration of garbage collection cycles. | |
| `go_goroutines` | gauge | Number of goroutines that currently exist. | |
Expand Down Expand Up @@ -67,6 +67,13 @@ The environment variable `CODER_PROMETHEUS_ENABLE` will be enabled automatically
| `go_memstats_stack_sys_bytes` | gauge | Number of bytes obtained from system for stack allocator. | |
| `go_memstats_sys_bytes` | gauge | Number of bytes obtained from system. | |
| `go_threads` | gauge | Number of OS threads created. | |
| `process_cpu_seconds_total` | counter | Total user and system CPU time spent in seconds. | |
| `process_max_fds` | gauge | Maximum number of open file descriptors. | |
| `process_open_fds` | gauge | Number of open file descriptors. | |
| `process_resident_memory_bytes` | gauge | Resident memory size in bytes. | |
| `process_start_time_seconds` | gauge | Start time of the process since unix epoch in seconds. | |
| `process_virtual_memory_bytes` | gauge | Virtual memory size in bytes. | |
| `process_virtual_memory_max_bytes` | gauge | Maximum amount of virtual memory available in bytes. | |
| `promhttp_metric_handler_requests_in_flight` | gauge | Current number of scrapes being served. | |
| `promhttp_metric_handler_requests_total` | counter | Total number of scrapes by HTTP status code. | `code` |

Expand Down
23 changes: 10 additions & 13 deletions provisionerd/provisionerd.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,6 @@ type Metrics struct {

func NewMetrics(reg prometheus.Registerer) Metrics {
auto := promauto.With(reg)
durationToFloatMs := func(d time.Duration) float64 {
return float64(d.Milliseconds())
}

return Metrics{
Runner: runner.Metrics{
Expand All @@ -147,17 +144,17 @@ func NewMetrics(reg prometheus.Registerer) Metrics {
JobTimings: auto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "coderd",
Subsystem: "provisionerd",
Name: "job_timings_ms",
Help: "The provisioner job time duration.",
Name: "job_timings_seconds",
Help: "The provisioner job time duration in seconds.",
Buckets: []float64{
durationToFloatMs(1 * time.Second),
durationToFloatMs(10 * time.Second),
durationToFloatMs(30 * time.Second),
durationToFloatMs(1 * time.Minute),
durationToFloatMs(5 * time.Minute),
durationToFloatMs(10 * time.Minute),
durationToFloatMs(30 * time.Minute),
durationToFloatMs(1 * time.Hour),
1, // 1s
10,
30,
60, // 1min
60 * 5,
60 * 10,
60 * 30, // 30min
60 * 60, // 1hr
},
}, []string{"provisioner", "status"}),
},
Expand Down
2 changes: 1 addition & 1 deletion provisionerd/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ func (r *Runner) Run() {
}

concurrentGauge.Dec()
r.metrics.JobTimings.WithLabelValues(r.job.Provisioner, status).Observe(float64(time.Since(start).Milliseconds()))
r.metrics.JobTimings.WithLabelValues(r.job.Provisioner, status).Observe(time.Since(start).Seconds())
}()

r.mutex.Lock()
Expand Down
Loading