feat(oohelperd): add prometheus metrics (#897)

Closes https://github.com/ooni/probe/issues/2183

While there, avoid exposing nil values for optional fields of the
THResponse struct (i.e., "ip_info" and "tls_handshake").

While there, fix `measurexlite`'s `OperationLogger` test
and make it deterministic rather than racy.
This commit is contained in:
Simone Basso
2022-08-28 23:54:22 +02:00
committed by GitHub
parent 4241ee4bc1
commit dcdd8fb712
7 changed files with 98 additions and 5 deletions
+12
View File
@@ -9,6 +9,7 @@ import (
"fmt"
"io"
"net/http"
"time"
"github.com/ooni/probe-cli/v3/internal/atomicx"
"github.com/ooni/probe-cli/v3/internal/model"
@@ -45,29 +46,40 @@ var _ http.Handler = &handler{}
// ServeHTTP implements http.Handler.ServeHTTP.
func (h *handler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
metricRequestsInflight.Inc()
defer metricRequestsInflight.Dec()
metricRequestsTotal.Inc()
w.Header().Add("Server", fmt.Sprintf(
"oohelperd/%s ooniprobe-engine/%s", version.Version, version.Version,
))
if req.Method != "POST" {
metricRequestsByStatusCode.WithLabelValues("400").Inc()
w.WriteHeader(400)
return
}
reader := &io.LimitedReader{R: req.Body, N: h.MaxAcceptableBody}
data, err := netxlite.ReadAllContext(req.Context(), reader)
if err != nil {
metricRequestsByStatusCode.WithLabelValues("400").Inc()
w.WriteHeader(400)
return
}
var creq ctrlRequest
if err := json.Unmarshal(data, &creq); err != nil {
metricRequestsByStatusCode.WithLabelValues("400").Inc()
w.WriteHeader(400)
return
}
started := time.Now()
cresp, err := measure(req.Context(), h, &creq)
elapsed := time.Since(started)
metricMeasurementTime.Observe(float64(elapsed.Seconds()))
if err != nil {
metricRequestsByStatusCode.WithLabelValues("400").Inc()
w.WriteHeader(400)
return
}
metricRequestsByStatusCode.WithLabelValues("200").Inc()
// We assume that the following call cannot fail because it's a
// clearly-serializable data structure.
data, err = json.Marshal(cresp)
+8 -1
View File
@@ -14,12 +14,13 @@ import (
"github.com/ooni/probe-cli/v3/internal/model"
"github.com/ooni/probe-cli/v3/internal/netxlite"
"github.com/ooni/probe-cli/v3/internal/runtimex"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
const maxAcceptableBody = 1 << 24
var (
endpoint = flag.String("endpoint", "127.0.0.1:8080", "Endpoint where to listen")
endpoint = flag.String("endpoint", "127.0.0.1:8080", "API endpoint")
srvAddr = make(chan string, 1) // with buffer
srvCancel context.CancelFunc
srvCtx context.Context
@@ -49,6 +50,7 @@ func main() {
true: log.DebugLevel,
false: log.InfoLevel,
}
prometheus := flag.String("prometheus", "127.0.0.1:9091", "Prometheus endpoint")
debug := flag.Bool("debug", false, "Toggle debug mode")
flag.Parse()
log.SetLevel(logmap[*debug])
@@ -75,8 +77,13 @@ func main() {
srvAddr <- listener.Addr().String()
srvWg.Add(1)
go srv.Serve(listener)
promMux := http.NewServeMux()
promMux.Handle("/metrics", promhttp.Handler())
promSrv := &http.Server{Addr: *prometheus, Handler: promMux}
go promSrv.ListenAndServe()
<-srvCtx.Done()
shutdown(srv)
shutdown(promSrv)
listener.Close()
srvWg.Done()
}
+45
View File
@@ -0,0 +1,45 @@
package main
//
// Metrics definitions
//
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
var (
// metricRequestsTotal counts the total number of requests
metricRequestsTotal = promauto.NewCounter(prometheus.CounterOpts{
Name: "oohelperd_requests_total",
Help: "The total number of processed requests",
})
// metricRequestsByStatusCode counts the number of requests that
// have returned a given status code to the caller.
metricRequestsByStatusCode = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "oohelperd_requests_by_status_code",
Help: "Total number of processed requests by status code",
}, []string{"code"})
// metricRequestsInflight counts the number of requests currently inflight.
metricRequestsInflight = promauto.NewGauge(prometheus.GaugeOpts{
Name: "oohelperd_requests_inflight",
Help: "The number or requests currently inflight",
})
// metricMeasurementTime summarizes the time to perform a measurement.
metricMeasurementTime = promauto.NewSummary(prometheus.SummaryOpts{
Name: "oohelperd_measurement_time",
Help: "Summarizes the time to perform a test-helper measurement (in seconds)",
// See https://grafana.com/blog/2022/03/01/how-summary-metrics-work-in-prometheus/
Objectives: map[float64]float64{
0.25: 0.010, // 0.240 <= φ <= 0.260
0.5: 0.010, // 0.490 <= φ <= 0.510
0.75: 0.010, // 0.740 <= φ <= 0.760
0.9: 0.010, // 0.899 <= φ <= 0.901
0.99: 0.001, // 0.989 <= φ <= 0.991
},
})
)
+2 -2
View File
@@ -72,7 +72,7 @@ func TestNewOperationLogger(t *testing.T) {
}
const maxwait = 100 * time.Microsecond
ol := newOperationLogger(maxwait, logger, "antani%d", 0)
time.Sleep(4 * ol.maxwait)
ol.wg.Wait() // wait for the message to be emitted
ol.Stop(nil)
if len(lines) != 2 {
t.Fatal("unexpected number of lines")
@@ -100,7 +100,7 @@ func TestNewOperationLogger(t *testing.T) {
}
const maxwait = 100 * time.Microsecond
ol := newOperationLogger(maxwait, logger, "antani%d", 0)
time.Sleep(4 * ol.maxwait)
ol.wg.Wait() // wait for the message to be emitted
ol.Stop(io.EOF)
if len(lines) != 2 {
t.Fatal("unexpected number of lines")
+2 -2
View File
@@ -78,8 +78,8 @@ const (
// THResponse is the response from the control service.
type THResponse struct {
TCPConnect map[string]THTCPConnectResult `json:"tcp_connect"`
TLSHandshake map[string]THTLSHandshakeResult `json:"tls_handshake"`
TLSHandshake map[string]THTLSHandshakeResult `json:"tls_handshake,omitempty"`
HTTPRequest THHTTPRequestResult `json:"http_request"`
DNS THDNSResult `json:"dns"`
IPInfo map[string]*THIPInfo `json:"ip_info"`
IPInfo map[string]*THIPInfo `json:"ip_info,omitempty"`
}