ooni-probe-cli/internal/measurex/measurement.go
Simone Basso 399d2f65da
feat(measurex): refactored measurement library (#528)
This commit introduce a measurement library that consists of
refactored code from earlier websteps experiments.

I am not going to add tests for the time being, because this library
is still a bit in flux, as we finalize websteps.

I will soon though commit documentation explaining in detail how
to use it, which currrently is at https://github.com/ooni/probe-cli/pull/506
and adds a new directory to internal/tutorial.

The core idea of this measurement library is to allow two
measurement modes:

1. tracing, which is what we're currently doing now, and the
tutorial shows how we can rewrite the measurement part of web
connectivity with measurex using less code. Under a tracing
approach, we construct a normal http.Client that however has
tracing configured, we gather events for resolve, connect, TLS
handshake, QUIC handshake, HTTP round trip, etc. and then we
try to make sense of what happened from the events stream;

2. step-by-step, which is what websteps does, and basically
means that after each operation you immediately write into
a Measurement structure its results and immediately draw the
conclusions on what seems odd (which later may become an
anomaly if we see what the test helper measured).

This library is also such that it produces a data format
compatible with the current OONI spec.

This work is part of https://github.com/ooni/probe/issues/1733.
2021-09-30 01:24:08 +02:00

265 lines
7.5 KiB
Go

package measurex
import (
"net"
"net/http"
"net/url"
"time"
)
//
// Measurement
//
// Here we define the fundamental measurement types
// produced by this package.
//
// URLMeasurement is the measurement of a whole URL. It contains
// a bunch of measurements detailing each measurement step.
type URLMeasurement struct {
// URL is the URL we're measuring.
URL string `json:"url"`
// DNS contains all the DNS related measurements.
DNS []*DNSMeasurement `json:"dns"`
// Endpoints contains a measurement for each endpoint
// that we discovered via DNS or TH.
Endpoints []*HTTPEndpointMeasurement `json:"endpoints"`
// RedirectURLs contain the URLs to which we should fetch
// if we choose to follow redirections.
RedirectURLs []string `json:"-"`
// THMeasurement is the measurement collected by the TH.
TH interface{} `json:"th,omitempty"`
// TotalRuntime is the total time to measure this URL.
TotalRuntime time.Duration `json:"-"`
// DNSRuntime is the time to run all DNS checks.
DNSRuntime time.Duration `json:"x_dns_runtime"`
// THRuntime is the total time to invoke all test helpers.
THRuntime time.Duration `json:"x_th_runtime"`
// EpntsRuntime is the total time to check all the endpoints.
EpntsRuntime time.Duration `json:"x_epnts_runtime"`
}
// fillRedirects takes in input a complete URLMeasurement and fills
// the field named Redirects with all redirections.
func (m *URLMeasurement) fillRedirects() {
dups := make(map[string]bool)
for _, epnt := range m.Endpoints {
for _, redir := range epnt.HTTPRedirect {
loc := redir.Location.String()
if _, found := dups[loc]; found {
continue
}
dups[loc] = true
m.RedirectURLs = append(m.RedirectURLs, loc)
}
}
}
// Measurement groups all the events that have the same MeasurementID. This
// data format is not compatible with the OONI data format.
type Measurement struct {
// Connect contains all the connect operations.
Connect []*NetworkEvent `json:"connect,omitempty"`
// ReadWrite contains all the read and write operations.
ReadWrite []*NetworkEvent `json:"read_write,omitempty"`
// Close contains all the close operations.
Close []*NetworkEvent `json:"-"`
// TLSHandshake contains all the TLS handshakes.
TLSHandshake []*TLSHandshakeEvent `json:"tls_handshake,omitempty"`
// QUICHandshake contains all the QUIC handshakes.
QUICHandshake []*QUICHandshakeEvent `json:"quic_handshake,omitempty"`
// LookupHost contains all the host lookups.
LookupHost []*DNSLookupEvent `json:"lookup_host,omitempty"`
// LookupHTTPSSvc contains all the HTTPSSvc lookups.
LookupHTTPSSvc []*DNSLookupEvent `json:"lookup_httpssvc,omitempty"`
// DNSRoundTrip contains all the DNS round trips.
DNSRoundTrip []*DNSRoundTripEvent `json:"dns_round_trip,omitempty"`
// HTTPRoundTrip contains all the HTTP round trips.
HTTPRoundTrip []*HTTPRoundTripEvent `json:"http_round_trip,omitempty"`
// HTTPRedirect contains all the redirections.
HTTPRedirect []*HTTPRedirectEvent `json:"-"`
}
// DNSMeasurement is a DNS measurement.
type DNSMeasurement struct {
// Domain is the domain this measurement refers to.
Domain string `json:"domain"`
// A DNSMeasurement is a Measurement.
*Measurement
}
// allEndpointsForDomain returns all the endpoints for
// a specific domain contained in a measurement.
//
// Arguments:
//
// - domain is the domain we want to connect to;
//
// - port is the port for the endpoint.
func (m *DNSMeasurement) allEndpointsForDomain(domain, port string) (out []*Endpoint) {
out = append(out, m.allTCPEndpoints(domain, port)...)
out = append(out, m.allQUICEndpoints(domain, port)...)
return
}
// AllEndpointsForDomain gathers all the endpoints for a given domain from
// a list of DNSMeasurements, removes duplicates and returns the result.
func AllEndpointsForDomain(domain, port string, meas ...*DNSMeasurement) ([]*Endpoint, error) {
var out []*Endpoint
for _, m := range meas {
epnt := m.allEndpointsForDomain(domain, port)
out = append(out, epnt...)
}
return removeDuplicateEndpoints(out...), nil
}
func (m *DNSMeasurement) allTCPEndpoints(domain, port string) (out []*Endpoint) {
for _, entry := range m.LookupHost {
if domain != entry.Domain {
continue
}
for _, addr := range entry.Addrs() {
if net.ParseIP(addr) == nil {
continue // skip CNAME entries courtesy the WCTH
}
out = append(out, m.newEndpoint(addr, port, NetworkTCP))
}
}
return
}
func (m *DNSMeasurement) allQUICEndpoints(domain, port string) (out []*Endpoint) {
for _, entry := range m.LookupHTTPSSvc {
if domain != entry.Domain {
continue
}
if !entry.SupportsHTTP3() {
continue
}
for _, addr := range entry.Addrs() {
out = append(out, m.newEndpoint(addr, port, NetworkQUIC))
}
}
return
}
func (m *DNSMeasurement) newEndpoint(addr, port string, network EndpointNetwork) *Endpoint {
return &Endpoint{Network: network, Address: net.JoinHostPort(addr, port)}
}
// allHTTPEndpointsForURL returns all the HTTPEndpoints matching
// a specific URL's domain inside this measurement.
//
// Arguments:
//
// - URL is the URL for which we want endpoints;
//
// - headers are the headers to use.
//
// Returns a list of endpoints or an error.
func (m *DNSMeasurement) allHTTPEndpointsForURL(
URL *url.URL, headers http.Header) ([]*HTTPEndpoint, error) {
domain := URL.Hostname()
port, err := PortFromURL(URL)
if err != nil {
return nil, err
}
epnts := m.allEndpointsForDomain(domain, port)
var out []*HTTPEndpoint
for _, epnt := range epnts {
if URL.Scheme != "https" && epnt.Network == NetworkQUIC {
continue // we'll only use QUIC with HTTPS
}
out = append(out, &HTTPEndpoint{
Domain: domain,
Network: epnt.Network,
Address: epnt.Address,
SNI: domain,
ALPN: ALPNForHTTPEndpoint(epnt.Network),
URL: URL,
Header: headers,
})
}
return out, nil
}
// AllEndpointsForURL is like AllHTTPEndpointsForURL but return
// simple Endpoints rather than HTTPEndpoints.
func AllEndpointsForURL(URL *url.URL, meas ...*DNSMeasurement) ([]*Endpoint, error) {
all, err := AllHTTPEndpointsForURL(URL, http.Header{}, meas...)
if err != nil {
return nil, err
}
var out []*Endpoint
for _, epnt := range all {
out = append(out, &Endpoint{
Network: epnt.Network,
Address: epnt.Address,
})
}
return out, nil
}
// AllHTTPEndpointsForURL gathers all the HTTP endpoints for a given
// URL from a list of DNSMeasurements, removes duplicates and returns
// the result. This call may fail if we cannot determine the port
// from the URL, in which case we return an error. You MUST supply
// the headers you want to use for measuring.
func AllHTTPEndpointsForURL(URL *url.URL,
headers http.Header, meas ...*DNSMeasurement) ([]*HTTPEndpoint, error) {
var out []*HTTPEndpoint
for _, m := range meas {
epnt, err := m.allHTTPEndpointsForURL(URL, headers)
if err != nil {
return nil, err
}
out = append(out, epnt...)
}
return removeDuplicateHTTPEndpoints(out...), nil
}
// EndpointMeasurement is an endpoint measurement.
type EndpointMeasurement struct {
// Network is the network of this endpoint.
Network EndpointNetwork `json:"network"`
// Address is the address of this endpoint.
Address string `json:"address"`
// An EndpointMeasurement is a Measurement.
*Measurement
}
// HTTPEndpointMeasurement is an HTTP endpoint measurement.
type HTTPEndpointMeasurement struct {
// URL is the URL this measurement refers to.
URL string `json:"url"`
// Network is the network of this endpoint.
Network EndpointNetwork `json:"network"`
// Address is the address of this endpoint.
Address string `json:"address"`
// An HTTPEndpointMeasurement is a Measurement.
*Measurement
}