From 8c855ca59765b4c2834991412140f5a82ca2625d Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 29 Aug 2022 16:36:46 +0200 Subject: [PATCH] fix(oohelperd): metrics improvements after design review (#903) This diff updates the metrics according to https://github.com/ooni/probe/issues/2183#issuecomment-1230327725 --- internal/cmd/oohelperd/handler.go | 13 ++++++------- internal/cmd/oohelperd/metrics.go | 32 +++++++++++++++---------------- 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/internal/cmd/oohelperd/handler.go b/internal/cmd/oohelperd/handler.go index 494a0b6..4b3d343 100644 --- a/internal/cmd/oohelperd/handler.go +++ b/internal/cmd/oohelperd/handler.go @@ -48,38 +48,37 @@ var _ http.Handler = &handler{} func (h *handler) ServeHTTP(w http.ResponseWriter, req *http.Request) { metricRequestsInflight.Inc() defer metricRequestsInflight.Dec() - metricRequestsTotal.Inc() w.Header().Add("Server", fmt.Sprintf( "oohelperd/%s ooniprobe-engine/%s", version.Version, version.Version, )) if req.Method != "POST" { - metricRequestsByStatusCode.WithLabelValues("400").Inc() + metricRequestsCount.WithLabelValues("400", "bad_request_method").Inc() w.WriteHeader(400) return } reader := &io.LimitedReader{R: req.Body, N: h.MaxAcceptableBody} data, err := netxlite.ReadAllContext(req.Context(), reader) if err != nil { - metricRequestsByStatusCode.WithLabelValues("400").Inc() + metricRequestsCount.WithLabelValues("400", "request_body_too_large").Inc() w.WriteHeader(400) return } var creq ctrlRequest if err := json.Unmarshal(data, &creq); err != nil { - metricRequestsByStatusCode.WithLabelValues("400").Inc() + metricRequestsCount.WithLabelValues("400", "cannot_unmarshal_request_body").Inc() w.WriteHeader(400) return } started := time.Now() cresp, err := measure(req.Context(), h, &creq) elapsed := time.Since(started) - metricMeasurementTime.Observe(float64(elapsed.Seconds())) + metricWCTaskDurationSeconds.Observe(float64(elapsed.Seconds())) if err != nil { - metricRequestsByStatusCode.WithLabelValues("400").Inc() + metricRequestsCount.WithLabelValues("400", "measurement_failed").Inc() w.WriteHeader(400) return } - metricRequestsByStatusCode.WithLabelValues("200").Inc() + metricRequestsCount.WithLabelValues("200", "ok").Inc() // We assume that the following call cannot fail because it's a // clearly-serializable data structure. data, err = json.Marshal(cresp) diff --git a/internal/cmd/oohelperd/metrics.go b/internal/cmd/oohelperd/metrics.go index 13fb01e..73325ce 100644 --- a/internal/cmd/oohelperd/metrics.go +++ b/internal/cmd/oohelperd/metrics.go @@ -3,6 +3,8 @@ package main // // Metrics definitions // +// See https://github.com/ooni/probe/issues/2183#issuecomment-1230327725 +// import ( "github.com/prometheus/client_golang/prometheus" @@ -10,30 +12,26 @@ import ( ) var ( - // metricRequestsTotal counts the total number of requests - metricRequestsTotal = promauto.NewCounter(prometheus.CounterOpts{ - Name: "oohelperd_requests_total", - Help: "The total number of processed requests", - }) + // metricRequestsCount counts the number of requests we served. + metricRequestsCount = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "oohelperd_requests_count", + Help: "Total number of processed requests", + }, []string{"code", "reason"}) - // metricRequestsByStatusCode counts the number of requests that - // have returned a given status code to the caller. - metricRequestsByStatusCode = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "oohelperd_requests_by_status_code", - Help: "Total number of processed requests by status code", - }, []string{"code"}) - - // metricRequestsInflight counts the number of requests currently inflight. + // metricRequestsInflight gauges the number of requests currently inflight. metricRequestsInflight = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "oohelperd_requests_inflight", + Name: "oohelperd_requests_inflight_gauge", Help: "The number or requests currently inflight", }) - // metricMeasurementTime summarizes the time to perform a measurement. - metricMeasurementTime = promauto.NewSummary(prometheus.SummaryOpts{ - Name: "oohelperd_measurement_time", + // metricWCTaskDurationSeconds summarizes the duration of the web connectivity measurement task. + metricWCTaskDurationSeconds = promauto.NewSummary(prometheus.SummaryOpts{ + Name: "oohelperd_wctask_duration_seconds", Help: "Summarizes the time to perform a test-helper measurement (in seconds)", // See https://grafana.com/blog/2022/03/01/how-summary-metrics-work-in-prometheus/ + // + // TODO(bassosimone,FedericoCeratto): investigate whether using + // a shorter-than-10m observation interval is better for us Objectives: map[float64]float64{ 0.25: 0.010, // 0.240 <= φ <= 0.260 0.5: 0.010, // 0.490 <= φ <= 0.510