fix(oohelperd): metrics improvements after design review (#903)

This diff updates the metrics according to https://github.com/ooni/probe/issues/2183#issuecomment-1230327725
This commit is contained in:
Simone Basso 2022-08-29 16:36:46 +02:00 committed by GitHub
parent ffc2527fc5
commit 8c855ca597
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 21 additions and 24 deletions

View File

@ -48,38 +48,37 @@ var _ http.Handler = &handler{}
func (h *handler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
metricRequestsInflight.Inc()
defer metricRequestsInflight.Dec()
metricRequestsTotal.Inc()
w.Header().Add("Server", fmt.Sprintf(
"oohelperd/%s ooniprobe-engine/%s", version.Version, version.Version,
))
if req.Method != "POST" {
metricRequestsByStatusCode.WithLabelValues("400").Inc()
metricRequestsCount.WithLabelValues("400", "bad_request_method").Inc()
w.WriteHeader(400)
return
}
reader := &io.LimitedReader{R: req.Body, N: h.MaxAcceptableBody}
data, err := netxlite.ReadAllContext(req.Context(), reader)
if err != nil {
metricRequestsByStatusCode.WithLabelValues("400").Inc()
metricRequestsCount.WithLabelValues("400", "request_body_too_large").Inc()
w.WriteHeader(400)
return
}
var creq ctrlRequest
if err := json.Unmarshal(data, &creq); err != nil {
metricRequestsByStatusCode.WithLabelValues("400").Inc()
metricRequestsCount.WithLabelValues("400", "cannot_unmarshal_request_body").Inc()
w.WriteHeader(400)
return
}
started := time.Now()
cresp, err := measure(req.Context(), h, &creq)
elapsed := time.Since(started)
metricMeasurementTime.Observe(float64(elapsed.Seconds()))
metricWCTaskDurationSeconds.Observe(float64(elapsed.Seconds()))
if err != nil {
metricRequestsByStatusCode.WithLabelValues("400").Inc()
metricRequestsCount.WithLabelValues("400", "measurement_failed").Inc()
w.WriteHeader(400)
return
}
metricRequestsByStatusCode.WithLabelValues("200").Inc()
metricRequestsCount.WithLabelValues("200", "ok").Inc()
// We assume that the following call cannot fail because it's a
// clearly-serializable data structure.
data, err = json.Marshal(cresp)

View File

@ -3,6 +3,8 @@ package main
//
// Metrics definitions
//
// See https://github.com/ooni/probe/issues/2183#issuecomment-1230327725
//
import (
"github.com/prometheus/client_golang/prometheus"
@ -10,30 +12,26 @@ import (
)
var (
// metricRequestsTotal counts the total number of requests
metricRequestsTotal = promauto.NewCounter(prometheus.CounterOpts{
Name: "oohelperd_requests_total",
Help: "The total number of processed requests",
})
// metricRequestsCount counts the number of requests we served.
metricRequestsCount = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "oohelperd_requests_count",
Help: "Total number of processed requests",
}, []string{"code", "reason"})
// metricRequestsByStatusCode counts the number of requests that
// have returned a given status code to the caller.
metricRequestsByStatusCode = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "oohelperd_requests_by_status_code",
Help: "Total number of processed requests by status code",
}, []string{"code"})
// metricRequestsInflight counts the number of requests currently inflight.
// metricRequestsInflight gauges the number of requests currently inflight.
metricRequestsInflight = promauto.NewGauge(prometheus.GaugeOpts{
Name: "oohelperd_requests_inflight",
Name: "oohelperd_requests_inflight_gauge",
Help: "The number or requests currently inflight",
})
// metricMeasurementTime summarizes the time to perform a measurement.
metricMeasurementTime = promauto.NewSummary(prometheus.SummaryOpts{
Name: "oohelperd_measurement_time",
// metricWCTaskDurationSeconds summarizes the duration of the web connectivity measurement task.
metricWCTaskDurationSeconds = promauto.NewSummary(prometheus.SummaryOpts{
Name: "oohelperd_wctask_duration_seconds",
Help: "Summarizes the time to perform a test-helper measurement (in seconds)",
// See https://grafana.com/blog/2022/03/01/how-summary-metrics-work-in-prometheus/
//
// TODO(bassosimone,FedericoCeratto): investigate whether using
// a shorter-than-10m observation interval is better for us
Objectives: map[float64]float64{
0.25: 0.010, // 0.240 <= φ <= 0.260
0.5: 0.010, // 0.490 <= φ <= 0.510