fix(oohelperd): metrics improvements after design review (#903)
This diff updates the metrics according to https://github.com/ooni/probe/issues/2183#issuecomment-1230327725
This commit is contained in:
parent
ffc2527fc5
commit
8c855ca597
|
@ -48,38 +48,37 @@ var _ http.Handler = &handler{}
|
|||
func (h *handler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
metricRequestsInflight.Inc()
|
||||
defer metricRequestsInflight.Dec()
|
||||
metricRequestsTotal.Inc()
|
||||
w.Header().Add("Server", fmt.Sprintf(
|
||||
"oohelperd/%s ooniprobe-engine/%s", version.Version, version.Version,
|
||||
))
|
||||
if req.Method != "POST" {
|
||||
metricRequestsByStatusCode.WithLabelValues("400").Inc()
|
||||
metricRequestsCount.WithLabelValues("400", "bad_request_method").Inc()
|
||||
w.WriteHeader(400)
|
||||
return
|
||||
}
|
||||
reader := &io.LimitedReader{R: req.Body, N: h.MaxAcceptableBody}
|
||||
data, err := netxlite.ReadAllContext(req.Context(), reader)
|
||||
if err != nil {
|
||||
metricRequestsByStatusCode.WithLabelValues("400").Inc()
|
||||
metricRequestsCount.WithLabelValues("400", "request_body_too_large").Inc()
|
||||
w.WriteHeader(400)
|
||||
return
|
||||
}
|
||||
var creq ctrlRequest
|
||||
if err := json.Unmarshal(data, &creq); err != nil {
|
||||
metricRequestsByStatusCode.WithLabelValues("400").Inc()
|
||||
metricRequestsCount.WithLabelValues("400", "cannot_unmarshal_request_body").Inc()
|
||||
w.WriteHeader(400)
|
||||
return
|
||||
}
|
||||
started := time.Now()
|
||||
cresp, err := measure(req.Context(), h, &creq)
|
||||
elapsed := time.Since(started)
|
||||
metricMeasurementTime.Observe(float64(elapsed.Seconds()))
|
||||
metricWCTaskDurationSeconds.Observe(float64(elapsed.Seconds()))
|
||||
if err != nil {
|
||||
metricRequestsByStatusCode.WithLabelValues("400").Inc()
|
||||
metricRequestsCount.WithLabelValues("400", "measurement_failed").Inc()
|
||||
w.WriteHeader(400)
|
||||
return
|
||||
}
|
||||
metricRequestsByStatusCode.WithLabelValues("200").Inc()
|
||||
metricRequestsCount.WithLabelValues("200", "ok").Inc()
|
||||
// We assume that the following call cannot fail because it's a
|
||||
// clearly-serializable data structure.
|
||||
data, err = json.Marshal(cresp)
|
||||
|
|
|
@ -3,6 +3,8 @@ package main
|
|||
//
|
||||
// Metrics definitions
|
||||
//
|
||||
// See https://github.com/ooni/probe/issues/2183#issuecomment-1230327725
|
||||
//
|
||||
|
||||
import (
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
|
@ -10,30 +12,26 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
// metricRequestsTotal counts the total number of requests
|
||||
metricRequestsTotal = promauto.NewCounter(prometheus.CounterOpts{
|
||||
Name: "oohelperd_requests_total",
|
||||
Help: "The total number of processed requests",
|
||||
})
|
||||
// metricRequestsCount counts the number of requests we served.
|
||||
metricRequestsCount = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "oohelperd_requests_count",
|
||||
Help: "Total number of processed requests",
|
||||
}, []string{"code", "reason"})
|
||||
|
||||
// metricRequestsByStatusCode counts the number of requests that
|
||||
// have returned a given status code to the caller.
|
||||
metricRequestsByStatusCode = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "oohelperd_requests_by_status_code",
|
||||
Help: "Total number of processed requests by status code",
|
||||
}, []string{"code"})
|
||||
|
||||
// metricRequestsInflight counts the number of requests currently inflight.
|
||||
// metricRequestsInflight gauges the number of requests currently inflight.
|
||||
metricRequestsInflight = promauto.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "oohelperd_requests_inflight",
|
||||
Name: "oohelperd_requests_inflight_gauge",
|
||||
Help: "The number or requests currently inflight",
|
||||
})
|
||||
|
||||
// metricMeasurementTime summarizes the time to perform a measurement.
|
||||
metricMeasurementTime = promauto.NewSummary(prometheus.SummaryOpts{
|
||||
Name: "oohelperd_measurement_time",
|
||||
// metricWCTaskDurationSeconds summarizes the duration of the web connectivity measurement task.
|
||||
metricWCTaskDurationSeconds = promauto.NewSummary(prometheus.SummaryOpts{
|
||||
Name: "oohelperd_wctask_duration_seconds",
|
||||
Help: "Summarizes the time to perform a test-helper measurement (in seconds)",
|
||||
// See https://grafana.com/blog/2022/03/01/how-summary-metrics-work-in-prometheus/
|
||||
//
|
||||
// TODO(bassosimone,FedericoCeratto): investigate whether using
|
||||
// a shorter-than-10m observation interval is better for us
|
||||
Objectives: map[float64]float64{
|
||||
0.25: 0.010, // 0.240 <= φ <= 0.260
|
||||
0.5: 0.010, // 0.490 <= φ <= 0.510
|
||||
|
|
Loading…
Reference in New Issue
Block a user