From df0e099b739593a10ff4d5bbe5aa17bb8b90cd58 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Sun, 28 Aug 2022 13:49:24 +0200 Subject: [PATCH] feat(oohelperd): follow (and record) TH and probe endpoints (#890) This diff introduces the following `oohelperd` enhancements: 1. measure both IP addresses resolved by the TH and IP addresses resolved by the probe; 2. when the URL scheme is http and there's no explicit port, measure both 80 and 443 (which will pay off big once we introduce support for optionally performing TLS handshakes); 3. include information about the probe and TH IP addresses into the results: who resolved each IP address, whether an address is a bogon, the ASN associated to an address. This diff is part of https://github.com/ooni/probe/issues/2237 --- internal/cmd/oohelperd/handler_test.go | 2 +- internal/cmd/oohelperd/ipinfo.go | 98 +++++++ internal/cmd/oohelperd/ipinfo_test.go | 240 ++++++++++++++++++ internal/cmd/oohelperd/main_test.go | 2 +- internal/cmd/oohelperd/measure.go | 17 +- internal/cmd/oohelperd/tcpconnect.go | 34 ++- .../experiment/webconnectivity/control.go | 29 +++ 7 files changed, 401 insertions(+), 21 deletions(-) create mode 100644 internal/cmd/oohelperd/ipinfo.go create mode 100644 internal/cmd/oohelperd/ipinfo_test.go diff --git a/internal/cmd/oohelperd/handler_test.go b/internal/cmd/oohelperd/handler_test.go index 6613e1f..ac37ae1 100644 --- a/internal/cmd/oohelperd/handler_test.go +++ b/internal/cmd/oohelperd/handler_test.go @@ -51,7 +51,7 @@ const requestWithoutDomainName = `{ ] }` -func TestWorkingAsIntended(t *testing.T) { +func TestHandlerWorkingAsIntended(t *testing.T) { handler := &handler{ MaxAcceptableBody: 1 << 24, NewClient: func() model.HTTPClient { diff --git a/internal/cmd/oohelperd/ipinfo.go b/internal/cmd/oohelperd/ipinfo.go new file mode 100644 index 0000000..b542174 --- /dev/null +++ b/internal/cmd/oohelperd/ipinfo.go @@ -0,0 +1,98 @@ +package main + +// +// Generates IP and endpoint information. +// + +import ( + "net" + "net/url" + "sort" + "strings" + + "github.com/ooni/probe-cli/v3/internal/engine/experiment/webconnectivity" + "github.com/ooni/probe-cli/v3/internal/engine/geolocate" + "github.com/ooni/probe-cli/v3/internal/netxlite" +) + +// newIPInfo creates an IP to IPInfo mapping from addresses resolved +// by the probe (inside [creq]) or the TH (inside [addrs]). +func newIPInfo(creq *ctrlRequest, addrs []string) map[string]*webconnectivity.ControlIPInfo { + discoveredby := make(map[string]int64) + for _, epnt := range creq.TCPConnect { + addr, _, err := net.SplitHostPort(epnt) + if err != nil || net.ParseIP(addr) == nil { + continue + } + discoveredby[addr] |= webconnectivity.ControlIPInfoFlagResolvedByProbe + } + for _, addr := range addrs { + if net.ParseIP(addr) != nil { + discoveredby[addr] |= webconnectivity.ControlIPInfoFlagResolvedByTH + } + } + ipinfo := make(map[string]*webconnectivity.ControlIPInfo) + for addr, flags := range discoveredby { + if netxlite.IsBogon(addr) { // note: we already excluded non-IP addrs above + flags |= webconnectivity.ControlIPInfoFlagIsBogon + } + asn, _, _ := geolocate.LookupASN(addr) // AS0 on failure + ipinfo[addr] = &webconnectivity.ControlIPInfo{ + ASN: int64(asn), + Flags: flags, + } + } + return ipinfo +} + +// endpointInfo contains info about an endpoint to measure +type endpointInfo struct { + // Addr is the address to measure + Addr string + + // Epnt is the endpoint to measure + Epnt string +} + +// ipInfoToEndpoints takes in input the [ipinfo] returned by newIPInfo +// and the [URL] provided by the probe to generate the list of endpoints +// to measure. We choose ports as follows: +// +// 1. if the input URL contains a port, we use such a port; +// +// 2. if the input URL scheme is "https", we choose port 443; +// +// 3. if the input URL scheme is "http", we use both 443 and 80, which +// allows us to include in the measurement information useful to determine +// whether an IP address is valid for a domain; +// +// 4. otherwise, we don't generate any endpoint to measure. +func ipInfoToEndpoints(URL *url.URL, ipinfo map[string]*webconnectivity.ControlIPInfo) []endpointInfo { + var ports []string + if port := URL.Port(); port != "" { + ports = []string{port} // as documented + } else if URL.Scheme == "https" { + ports = []string{"443"} // as documented + } else if URL.Scheme == "http" { + ports = []string{"80", "443"} // as documented + } + out := []endpointInfo{} + for addr, info := range ipinfo { + if (info.Flags & webconnectivity.ControlIPInfoFlagIsBogon) != 0 { + continue // as documented + } + for _, port := range ports { + epnt := net.JoinHostPort(addr, port) + out = append(out, endpointInfo{ + Addr: addr, + Epnt: epnt, + }) + } + } + // sort the output to make testing work deterministically since iterating + // a map in golang isn't guaranteed to return ordered keys + sort.SliceStable(out, func(i, j int) bool { + return strings.Compare(out[i].Epnt, out[j].Epnt) < 0 + }) + return out +} diff --git a/internal/cmd/oohelperd/ipinfo_test.go b/internal/cmd/oohelperd/ipinfo_test.go new file mode 100644 index 0000000..ca2c902 --- /dev/null +++ b/internal/cmd/oohelperd/ipinfo_test.go @@ -0,0 +1,240 @@ +package main + +import ( + "net/url" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/ooni/probe-cli/v3/internal/engine/experiment/webconnectivity" +) + +func Test_newIPInfo(t *testing.T) { + type args struct { + creq *ctrlRequest + addrs []string + } + tests := []struct { + name string + args args + want map[string]*webconnectivity.ControlIPInfo + }{{ + name: "with empty input", + args: args{ + creq: &webconnectivity.ControlRequest{ + HTTPRequest: "", + HTTPRequestHeaders: map[string][]string{}, + TCPConnect: []string{}, + }, + addrs: []string{}, + }, + want: map[string]*webconnectivity.ControlIPInfo{}, + }, { + name: "typical case with also bogons", + args: args{ + creq: &webconnectivity.ControlRequest{ + HTTPRequest: "", + HTTPRequestHeaders: map[string][]string{}, + TCPConnect: []string{ + "10.0.0.1:443", + "8.8.8.8:443", + }, + }, + addrs: []string{ + "8.8.8.8", + "8.8.4.4", + }, + }, + want: map[string]*webconnectivity.ControlIPInfo{ + "10.0.0.1": { + ASN: 0, + Flags: webconnectivity.ControlIPInfoFlagIsBogon | webconnectivity.ControlIPInfoFlagResolvedByProbe, + }, + "8.8.8.8": { + ASN: 15169, + Flags: webconnectivity.ControlIPInfoFlagResolvedByProbe | webconnectivity.ControlIPInfoFlagResolvedByTH, + }, + "8.8.4.4": { + ASN: 15169, + Flags: webconnectivity.ControlIPInfoFlagResolvedByTH, + }, + }, + }, { + name: "with invalid endpoint", + args: args{ + creq: &webconnectivity.ControlRequest{ + HTTPRequest: "", + HTTPRequestHeaders: map[string][]string{}, + TCPConnect: []string{ + "1.2.3.4", + }, + }, + addrs: []string{}, + }, + want: map[string]*webconnectivity.ControlIPInfo{}, + }, { + name: "with invalid IP addr", + args: args{ + creq: &webconnectivity.ControlRequest{ + HTTPRequest: "", + HTTPRequestHeaders: map[string][]string{}, + TCPConnect: []string{ + "dns.google:443", + }, + }, + addrs: []string{}, + }, + want: map[string]*webconnectivity.ControlIPInfo{}, + }} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := newIPInfo(tt.args.creq, tt.args.addrs) + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Fatal(diff) + } + }) + } +} + +func Test_ipInfoToEndpoints(t *testing.T) { + type args struct { + URL *url.URL + ipinfo map[string]*webconnectivity.ControlIPInfo + } + tests := []struct { + name string + args args + want []endpointInfo + }{{ + name: "with nil map and empty URL", + args: args{ + URL: &url.URL{}, + ipinfo: nil, + }, + want: []endpointInfo{}, + }, { + name: "with empty map and empty URL", + args: args{ + URL: &url.URL{}, + ipinfo: map[string]*webconnectivity.ControlIPInfo{}, + }, + want: []endpointInfo{}, + }, { + name: "with http scheme, bogons, and and no port", + args: args{ + URL: &url.URL{ + Scheme: "http", + }, + ipinfo: map[string]*webconnectivity.ControlIPInfo{ + "10.0.0.1": { + ASN: 0, + Flags: webconnectivity.ControlIPInfoFlagIsBogon | webconnectivity.ControlIPInfoFlagResolvedByProbe, + }, + "8.8.8.8": { + ASN: 15169, + Flags: webconnectivity.ControlIPInfoFlagResolvedByProbe | webconnectivity.ControlIPInfoFlagResolvedByTH, + }, + "8.8.4.4": { + ASN: 15169, + Flags: webconnectivity.ControlIPInfoFlagResolvedByTH, + }, + }, + }, + want: []endpointInfo{{ + Addr: "8.8.4.4", + Epnt: "8.8.4.4:443", + }, { + Addr: "8.8.4.4", + Epnt: "8.8.4.4:80", + }, { + Addr: "8.8.8.8", + Epnt: "8.8.8.8:443", + }, { + Addr: "8.8.8.8", + Epnt: "8.8.8.8:80", + }}, + }, { + name: "with bogons and explicit port", + args: args{ + URL: &url.URL{ + Host: "dns.google:5432", + }, + ipinfo: map[string]*webconnectivity.ControlIPInfo{ + "10.0.0.1": { + ASN: 0, + Flags: webconnectivity.ControlIPInfoFlagIsBogon | webconnectivity.ControlIPInfoFlagResolvedByProbe, + }, + "8.8.8.8": { + ASN: 15169, + Flags: webconnectivity.ControlIPInfoFlagResolvedByProbe | webconnectivity.ControlIPInfoFlagResolvedByTH, + }, + "8.8.4.4": { + ASN: 15169, + Flags: webconnectivity.ControlIPInfoFlagResolvedByTH, + }, + }, + }, + want: []endpointInfo{{ + Addr: "8.8.4.4", + Epnt: "8.8.4.4:5432", + }, { + Addr: "8.8.8.8", + Epnt: "8.8.8.8:5432", + }}, + }, { + name: "with addresses and some bogons, no port, and unknown scheme", + args: args{ + URL: &url.URL{}, + ipinfo: map[string]*webconnectivity.ControlIPInfo{ + "10.0.0.1": { + ASN: 0, + Flags: webconnectivity.ControlIPInfoFlagIsBogon | webconnectivity.ControlIPInfoFlagResolvedByProbe, + }, + "8.8.8.8": { + ASN: 15169, + Flags: webconnectivity.ControlIPInfoFlagResolvedByProbe | webconnectivity.ControlIPInfoFlagResolvedByTH, + }, + "8.8.4.4": { + ASN: 15169, + Flags: webconnectivity.ControlIPInfoFlagResolvedByTH, + }, + }, + }, + want: []endpointInfo{}, + }, { + name: "with addresses and some bogons, no port, and https scheme", + args: args{ + URL: &url.URL{ + Scheme: "https", + }, + ipinfo: map[string]*webconnectivity.ControlIPInfo{ + "10.0.0.1": { + ASN: 0, + Flags: webconnectivity.ControlIPInfoFlagIsBogon | webconnectivity.ControlIPInfoFlagResolvedByProbe, + }, + "8.8.8.8": { + ASN: 15169, + Flags: webconnectivity.ControlIPInfoFlagResolvedByProbe | webconnectivity.ControlIPInfoFlagResolvedByTH, + }, + "8.8.4.4": { + ASN: 15169, + Flags: webconnectivity.ControlIPInfoFlagResolvedByTH, + }, + }, + }, + want: []endpointInfo{{ + Addr: "8.8.4.4", + Epnt: "8.8.4.4:443", + }, { + Addr: "8.8.8.8", + Epnt: "8.8.8.8:443", + }}, + }} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := ipInfoToEndpoints(tt.args.URL, tt.args.ipinfo) + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Fatal(diff) + } + }) + } +} diff --git a/internal/cmd/oohelperd/main_test.go b/internal/cmd/oohelperd/main_test.go index 54aab25..001a684 100644 --- a/internal/cmd/oohelperd/main_test.go +++ b/internal/cmd/oohelperd/main_test.go @@ -14,7 +14,7 @@ import ( "github.com/ooni/probe-cli/v3/internal/runtimex" ) -func TestWorkAsIntended(t *testing.T) { +func TestMainWorkingAsIntended(t *testing.T) { // let the kernel pick a random free port *endpoint = "127.0.0.1:0" diff --git a/internal/cmd/oohelperd/measure.go b/internal/cmd/oohelperd/measure.go index 887d2a0..6b277cb 100644 --- a/internal/cmd/oohelperd/measure.go +++ b/internal/cmd/oohelperd/measure.go @@ -47,7 +47,7 @@ func measure(ctx context.Context, config *handler, creq *ctrlRequest) (*ctrlResp wg.Wait() // start assembling the response - cresp := new(ctrlResponse) + cresp := &ctrlResponse{} select { case cresp.DNS = <-dnsch: default: @@ -59,12 +59,17 @@ func measure(ctx context.Context, config *handler, creq *ctrlRequest) (*ctrlResp } } - // tcpconnect: start - tcpconnch := make(chan tcpResultPair, len(creq.TCPConnect)) - for _, endpoint := range creq.TCPConnect { + // obtain IP info and figure out the endpoints measurement plan + cresp.IPInfo = newIPInfo(creq, cresp.DNS.Addrs) + endpoints := ipInfoToEndpoints(URL, cresp.IPInfo) + + // tcpconnect: start over all the endpoints + tcpconnch := make(chan *tcpResultPair, len(endpoints)) + for _, endpoint := range endpoints { wg.Add(1) go tcpDo(ctx, &tcpConfig{ - Endpoint: endpoint, + Address: endpoint.Addr, + Endpoint: endpoint.Epnt, NewDialer: config.NewDialer, Out: tcpconnch, Wg: wg, @@ -93,7 +98,7 @@ Loop: for { select { case tcpconn := <-tcpconnch: - cresp.TCPConnect[tcpconn.Endpoint] = tcpconn.Result + cresp.TCPConnect[tcpconn.Endpoint] = tcpconn.TCP default: break Loop } diff --git a/internal/cmd/oohelperd/tcpconnect.go b/internal/cmd/oohelperd/tcpconnect.go index b2df17b..a0e5adb 100644 --- a/internal/cmd/oohelperd/tcpconnect.go +++ b/internal/cmd/oohelperd/tcpconnect.go @@ -10,6 +10,7 @@ import ( "time" "github.com/ooni/probe-cli/v3/internal/engine/experiment/webconnectivity" + "github.com/ooni/probe-cli/v3/internal/measurexlite" "github.com/ooni/probe-cli/v3/internal/model" "github.com/ooni/probe-cli/v3/internal/netxlite" ) @@ -19,15 +20,21 @@ type ctrlTCPResult = webconnectivity.ControlTCPConnectResult // tcpResultPair contains the endpoint and the corresponding result. type tcpResultPair struct { + // Address is the IP address we measured. + Address string + // Endpoint is the endpoint we measured. Endpoint string - // Result contains the results. - Result ctrlTCPResult + // TCP contains the TCP results. + TCP ctrlTCPResult } // tcpConfig configures the TCP connect check. type tcpConfig struct { + // Address is the MANDATORY address to measure. + Address string + // Endpoint is the MANDATORY endpoint to connect to. Endpoint string @@ -35,7 +42,7 @@ type tcpConfig struct { NewDialer func() model.Dialer // Out is the MANDATORY where we'll post the TCP measurement results. - Out chan tcpResultPair + Out chan *tcpResultPair // Wg is MANDATORY and is used to sync with the parent. Wg *sync.WaitGroup @@ -47,19 +54,20 @@ func tcpDo(ctx context.Context, config *tcpConfig) { ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() defer config.Wg.Done() + out := &tcpResultPair{ + Address: config.Address, + Endpoint: config.Endpoint, + TCP: webconnectivity.ControlTCPConnectResult{}, + } + defer func() { + config.Out <- out + }() dialer := config.NewDialer() defer dialer.CloseIdleConnections() conn, err := dialer.DialContext(ctx, "tcp", config.Endpoint) - if conn != nil { - conn.Close() - } - config.Out <- tcpResultPair{ - Endpoint: config.Endpoint, - Result: ctrlTCPResult{ - Failure: tcpMapFailure(newfailure(err)), - Status: err == nil, - }, - } + out.TCP.Failure = tcpMapFailure(newfailure(err)) + out.TCP.Status = err == nil + measurexlite.MaybeClose(conn) } // tcpMapFailure attempts to map netxlite failures to the strings diff --git a/internal/engine/experiment/webconnectivity/control.go b/internal/engine/experiment/webconnectivity/control.go index a0819ff..91089ea 100644 --- a/internal/engine/experiment/webconnectivity/control.go +++ b/internal/engine/experiment/webconnectivity/control.go @@ -33,6 +33,11 @@ type ControlHTTPRequestResult struct { StatusCode int64 `json:"status_code"` } +// TODO(bassosimone): ASNs and FillASNs are private implementation details of v0.4 +// that are actually ~annoying because we are mixing the data model with fields used +// by just the v0.4 client implementation. We should avoid repeating this mistake +// when implementing v0.5 of the client. + // ControlDNSResult is the result of the DNS lookup // performed by the control vantage point. type ControlDNSResult struct { @@ -41,11 +46,35 @@ type ControlDNSResult struct { ASNs []int64 `json:"-"` // not visible from the JSON } +// ControlIPInfo contains information about IP addresses resolved either +// by the probe or by the TH and processed by the TH. +type ControlIPInfo struct { + // ASN contains the address' AS number. + ASN int64 `json:"asn"` + + // Flags contains flags describing this address. + Flags int64 `json:"flags"` +} + +const ( + // ControlIPInfoFlagResolvedByProbe indicates that the probe has + // resolved this IP address. + ControlIPInfoFlagResolvedByProbe = 1 << iota + + // ControlIPInfoFlagResolvedByTH indicates that the test helper + // has resolved this IP address. + ControlIPInfoFlagResolvedByTH + + // ControlIPInfoFlagIsBogon indicates that the address is a bogon + ControlIPInfoFlagIsBogon +) + // ControlResponse is the response from the control service. type ControlResponse struct { TCPConnect map[string]ControlTCPConnectResult `json:"tcp_connect"` HTTPRequest ControlHTTPRequestResult `json:"http_request"` DNS ControlDNSResult `json:"dns"` + IPInfo map[string]*ControlIPInfo `json:"ip_info"` } // Control performs the control request and returns the response.