feat(oohelperd): add prometheus metrics (#897)
Closes https://github.com/ooni/probe/issues/2183 While there, avoid exposing nil values for optional fields of the THResponse struct (i.e., "ip_info" and "tls_handshake"). While there, fix `measurexlite`'s `OperationLogger` test and make it deterministic rather than racy.
This commit is contained in:
@@ -9,6 +9,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/ooni/probe-cli/v3/internal/atomicx"
|
||||
"github.com/ooni/probe-cli/v3/internal/model"
|
||||
@@ -45,29 +46,40 @@ var _ http.Handler = &handler{}
|
||||
|
||||
// ServeHTTP implements http.Handler.ServeHTTP.
|
||||
func (h *handler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
|
||||
metricRequestsInflight.Inc()
|
||||
defer metricRequestsInflight.Dec()
|
||||
metricRequestsTotal.Inc()
|
||||
w.Header().Add("Server", fmt.Sprintf(
|
||||
"oohelperd/%s ooniprobe-engine/%s", version.Version, version.Version,
|
||||
))
|
||||
if req.Method != "POST" {
|
||||
metricRequestsByStatusCode.WithLabelValues("400").Inc()
|
||||
w.WriteHeader(400)
|
||||
return
|
||||
}
|
||||
reader := &io.LimitedReader{R: req.Body, N: h.MaxAcceptableBody}
|
||||
data, err := netxlite.ReadAllContext(req.Context(), reader)
|
||||
if err != nil {
|
||||
metricRequestsByStatusCode.WithLabelValues("400").Inc()
|
||||
w.WriteHeader(400)
|
||||
return
|
||||
}
|
||||
var creq ctrlRequest
|
||||
if err := json.Unmarshal(data, &creq); err != nil {
|
||||
metricRequestsByStatusCode.WithLabelValues("400").Inc()
|
||||
w.WriteHeader(400)
|
||||
return
|
||||
}
|
||||
started := time.Now()
|
||||
cresp, err := measure(req.Context(), h, &creq)
|
||||
elapsed := time.Since(started)
|
||||
metricMeasurementTime.Observe(float64(elapsed.Seconds()))
|
||||
if err != nil {
|
||||
metricRequestsByStatusCode.WithLabelValues("400").Inc()
|
||||
w.WriteHeader(400)
|
||||
return
|
||||
}
|
||||
metricRequestsByStatusCode.WithLabelValues("200").Inc()
|
||||
// We assume that the following call cannot fail because it's a
|
||||
// clearly-serializable data structure.
|
||||
data, err = json.Marshal(cresp)
|
||||
|
||||
@@ -14,12 +14,13 @@ import (
|
||||
"github.com/ooni/probe-cli/v3/internal/model"
|
||||
"github.com/ooni/probe-cli/v3/internal/netxlite"
|
||||
"github.com/ooni/probe-cli/v3/internal/runtimex"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
const maxAcceptableBody = 1 << 24
|
||||
|
||||
var (
|
||||
endpoint = flag.String("endpoint", "127.0.0.1:8080", "Endpoint where to listen")
|
||||
endpoint = flag.String("endpoint", "127.0.0.1:8080", "API endpoint")
|
||||
srvAddr = make(chan string, 1) // with buffer
|
||||
srvCancel context.CancelFunc
|
||||
srvCtx context.Context
|
||||
@@ -49,6 +50,7 @@ func main() {
|
||||
true: log.DebugLevel,
|
||||
false: log.InfoLevel,
|
||||
}
|
||||
prometheus := flag.String("prometheus", "127.0.0.1:9091", "Prometheus endpoint")
|
||||
debug := flag.Bool("debug", false, "Toggle debug mode")
|
||||
flag.Parse()
|
||||
log.SetLevel(logmap[*debug])
|
||||
@@ -75,8 +77,13 @@ func main() {
|
||||
srvAddr <- listener.Addr().String()
|
||||
srvWg.Add(1)
|
||||
go srv.Serve(listener)
|
||||
promMux := http.NewServeMux()
|
||||
promMux.Handle("/metrics", promhttp.Handler())
|
||||
promSrv := &http.Server{Addr: *prometheus, Handler: promMux}
|
||||
go promSrv.ListenAndServe()
|
||||
<-srvCtx.Done()
|
||||
shutdown(srv)
|
||||
shutdown(promSrv)
|
||||
listener.Close()
|
||||
srvWg.Done()
|
||||
}
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
package main
|
||||
|
||||
//
|
||||
// Metrics definitions
|
||||
//
|
||||
|
||||
import (
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
)
|
||||
|
||||
var (
|
||||
// metricRequestsTotal counts the total number of requests
|
||||
metricRequestsTotal = promauto.NewCounter(prometheus.CounterOpts{
|
||||
Name: "oohelperd_requests_total",
|
||||
Help: "The total number of processed requests",
|
||||
})
|
||||
|
||||
// metricRequestsByStatusCode counts the number of requests that
|
||||
// have returned a given status code to the caller.
|
||||
metricRequestsByStatusCode = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "oohelperd_requests_by_status_code",
|
||||
Help: "Total number of processed requests by status code",
|
||||
}, []string{"code"})
|
||||
|
||||
// metricRequestsInflight counts the number of requests currently inflight.
|
||||
metricRequestsInflight = promauto.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "oohelperd_requests_inflight",
|
||||
Help: "The number or requests currently inflight",
|
||||
})
|
||||
|
||||
// metricMeasurementTime summarizes the time to perform a measurement.
|
||||
metricMeasurementTime = promauto.NewSummary(prometheus.SummaryOpts{
|
||||
Name: "oohelperd_measurement_time",
|
||||
Help: "Summarizes the time to perform a test-helper measurement (in seconds)",
|
||||
// See https://grafana.com/blog/2022/03/01/how-summary-metrics-work-in-prometheus/
|
||||
Objectives: map[float64]float64{
|
||||
0.25: 0.010, // 0.240 <= φ <= 0.260
|
||||
0.5: 0.010, // 0.490 <= φ <= 0.510
|
||||
0.75: 0.010, // 0.740 <= φ <= 0.760
|
||||
0.9: 0.010, // 0.899 <= φ <= 0.901
|
||||
0.99: 0.001, // 0.989 <= φ <= 0.991
|
||||
},
|
||||
})
|
||||
)
|
||||
Reference in New Issue
Block a user