From fc19c9901acf5c7df45139885b79f603f6a9adb3 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 23 Mar 2021 16:46:46 +0100 Subject: [PATCH] fix(webconnectivity): expose network events (#258) * fix(webconnectivity): expose network events By not exposing network events in webconnectivity, we are missing several interesting, explanatory data points. This diff fixes the issue by: 1. enriching the definition of network events to include extra data useful for performing (manual) data analysis; 2. adding a tags field to network events such that we can add tags to specific events and understand where they come from; 3. exposing all the (tagged) network events that happen when running a webconnectivity experiment. See https://github.com/ooni/probe-engine/issues/1157. * progress * more work towards landing this diff * Apply suggestions from code review --- .../experiment/webconnectivity/connects.go | 4 +- .../experiment/webconnectivity/dnslookup.go | 5 ++- .../experiment/webconnectivity/httpget.go | 3 ++ .../webconnectivity/webconnectivity.go | 45 +++++++++++++++++-- .../webconnectivity/webconnectivity_test.go | 2 +- internal/engine/netx/archival/archival.go | 24 +++++----- 6 files changed, 67 insertions(+), 16 deletions(-) diff --git a/internal/engine/experiment/webconnectivity/connects.go b/internal/engine/experiment/webconnectivity/connects.go index 4f047d2..1500a95 100644 --- a/internal/engine/experiment/webconnectivity/connects.go +++ b/internal/engine/experiment/webconnectivity/connects.go @@ -3,6 +3,7 @@ package webconnectivity import ( "context" "net/url" + "time" "github.com/ooni/probe-cli/v3/internal/engine/experiment/urlgetter" "github.com/ooni/probe-cli/v3/internal/engine/model" @@ -10,6 +11,7 @@ import ( // ConnectsConfig contains the config for Connects type ConnectsConfig struct { + Begin time.Time Session model.ExperimentSession TargetURL *url.URL URLGetterURLs []string @@ -28,7 +30,7 @@ type ConnectsResult struct { // check whether the resolved endpoints are reachable. func Connects(ctx context.Context, config ConnectsConfig) (out ConnectsResult) { out.AllKeys = []urlgetter.TestKeys{} - multi := urlgetter.Multi{Session: config.Session} + multi := urlgetter.Multi{Begin: config.Begin, Session: config.Session} inputs := []urlgetter.MultiInput{} for _, url := range config.URLGetterURLs { inputs = append(inputs, urlgetter.MultiInput{ diff --git a/internal/engine/experiment/webconnectivity/dnslookup.go b/internal/engine/experiment/webconnectivity/dnslookup.go index 6d0a324..e5624d1 100644 --- a/internal/engine/experiment/webconnectivity/dnslookup.go +++ b/internal/engine/experiment/webconnectivity/dnslookup.go @@ -5,6 +5,7 @@ import ( "fmt" "net/url" "sort" + "time" "github.com/ooni/probe-cli/v3/internal/engine/experiment/urlgetter" "github.com/ooni/probe-cli/v3/internal/engine/model" @@ -12,6 +13,7 @@ import ( // DNSLookupConfig contains settings for the DNS lookup. type DNSLookupConfig struct { + Begin time.Time Session model.ExperimentSession URL *url.URL } @@ -27,7 +29,8 @@ type DNSLookupResult struct { func DNSLookup(ctx context.Context, config DNSLookupConfig) (out DNSLookupResult) { target := fmt.Sprintf("dnslookup://%s", config.URL.Hostname()) config.Session.Logger().Infof("%s...", target) - result, err := urlgetter.Getter{Session: config.Session, Target: target}.Get(ctx) + result, err := urlgetter.Getter{ + Begin: config.Begin, Session: config.Session, Target: target}.Get(ctx) out.Addrs = make(map[string]int64) for _, query := range result.Queries { for _, answer := range query.Answers { diff --git a/internal/engine/experiment/webconnectivity/httpget.go b/internal/engine/experiment/webconnectivity/httpget.go index 35200ca..0c06d84 100644 --- a/internal/engine/experiment/webconnectivity/httpget.go +++ b/internal/engine/experiment/webconnectivity/httpget.go @@ -6,6 +6,7 @@ import ( "net" "net/url" "strings" + "time" "github.com/ooni/probe-cli/v3/internal/engine/experiment/urlgetter" "github.com/ooni/probe-cli/v3/internal/engine/model" @@ -14,6 +15,7 @@ import ( // HTTPGetConfig contains the config for HTTPGet type HTTPGetConfig struct { Addresses []string + Begin time.Time Session model.ExperimentSession TargetURL *url.URL } @@ -54,6 +56,7 @@ func HTTPGet(ctx context.Context, config HTTPGetConfig) (out HTTPGetResult) { config.Session.Logger().Infof("GET %s...", target) domain := config.TargetURL.Hostname() result, err := urlgetter.Getter{ + Begin: config.Begin, Config: urlgetter.Config{ DNSCache: HTTPGetMakeDNSCache(domain, addresses), }, diff --git a/internal/engine/experiment/webconnectivity/webconnectivity.go b/internal/engine/experiment/webconnectivity/webconnectivity.go index 7f76ed4..4b68cfa 100644 --- a/internal/engine/experiment/webconnectivity/webconnectivity.go +++ b/internal/engine/experiment/webconnectivity/webconnectivity.go @@ -19,7 +19,7 @@ import ( const ( testName = "web_connectivity" - testVersion = "0.3.0" + testVersion = "0.4.0" ) // Config contains the experiment config. @@ -32,6 +32,15 @@ type TestKeys struct { Retries *int64 `json:"retries"` // unused SOCKSProxy *string `json:"socksproxy"` // unused + // For now mostly TCP/TLS "connect" experiment but we are + // considering adding more events. An open question is + // currently how to properly tag these events so that it + // is rather obvious where they come from. + // + // See https://github.com/ooni/probe/issues/1413. + NetworkEvents []archival.NetworkEvent `json:"network_events"` + TLSHandshakes []archival.TLSHandshake `json:"tls_handshakes"` + // DNS experiment Queries []archival.DNSQueryEntry `json:"queries"` DNSExperimentFailure *string `json:"dns_experiment_failure"` @@ -42,7 +51,7 @@ type TestKeys struct { ControlRequest ControlRequest `json:"-"` Control ControlResponse `json:"control"` - // TCP connect experiment + // TCP/TLS "connect" experiment TCPConnect []archival.TCPConnectEntry `json:"tcp_connect"` TCPConnectSuccesses int `json:"-"` TCPConnectAttempts int `json:"-"` @@ -90,6 +99,19 @@ var ( ErrUnsupportedInput = errors.New("unsupported input scheme") ) +// Tags describing the section of this experiment in which +// the data has been collected. +const ( + // DNSExperimentTag is a tag indicating the DNS experiment. + DNSExperimentTag = "dns_experiment" + + // TCPTLSExperimentTag is a tag indicating the connect experiment. + TCPTLSExperimentTag = "tcptls_experiment" + + // HTTPExperimentTag is a tag indicating the HTTP experiment. + HTTPExperimentTag = "http_experiment" +) + // Run implements ExperimentMeasurer.Run. func (m Measurer) Run( ctx context.Context, @@ -129,7 +151,9 @@ func (m Measurer) Run( "backend": testhelper, } // 2. perform the DNS lookup step - dnsResult := DNSLookup(ctx, DNSLookupConfig{Session: sess, URL: URL}) + dnsResult := DNSLookup(ctx, DNSLookupConfig{ + Begin: measurement.MeasurementStartTimeSaved, + Session: sess, URL: URL}) tk.Queries = append(tk.Queries, dnsResult.TestKeys.Queries...) tk.DNSExperimentFailure = dnsResult.Failure epnts := NewEndpoints(URL, dnsResult.Addresses()) @@ -152,7 +176,13 @@ func (m Measurer) Run( sess.Logger().Infof("DNS analysis result: %+v", internal.StringPointerToString( tk.DNSAnalysisResult.DNSConsistency)) // 5. perform TCP/TLS connects + // + // TODO(bassosimone): here we should also follow the IP addresses + // returned by the control experiment. + // + // See https://github.com/ooni/probe/issues/1414 connectsResult := Connects(ctx, ConnectsConfig{ + Begin: measurement.MeasurementStartTimeSaved, Session: sess, TargetURL: URL, URLGetterURLs: epnts.URLs(), @@ -164,12 +194,21 @@ func (m Measurer) Run( // sad that we're storing analysis result inside the measurement tk.TCPConnect = append(tk.TCPConnect, ComputeTCPBlocking( tcpkeys.TCPConnect, tk.Control.TCPConnect)...) + for _, ev := range tcpkeys.NetworkEvents { + ev.Tags = []string{TCPTLSExperimentTag} + tk.NetworkEvents = append(tk.NetworkEvents, ev) + } + for _, ev := range tcpkeys.TLSHandshakes { + ev.Tags = []string{TCPTLSExperimentTag} + tk.TLSHandshakes = append(tk.TLSHandshakes, ev) + } } tk.TCPConnectAttempts = connectsResult.Total tk.TCPConnectSuccesses = connectsResult.Successes // 6. perform HTTP/HTTPS measurement httpResult := HTTPGet(ctx, HTTPGetConfig{ Addresses: dnsResult.Addresses(), + Begin: measurement.MeasurementStartTimeSaved, Session: sess, TargetURL: URL, }) diff --git a/internal/engine/experiment/webconnectivity/webconnectivity_test.go b/internal/engine/experiment/webconnectivity/webconnectivity_test.go index 8a04618..f61ba0b 100644 --- a/internal/engine/experiment/webconnectivity/webconnectivity_test.go +++ b/internal/engine/experiment/webconnectivity/webconnectivity_test.go @@ -21,7 +21,7 @@ func TestNewExperimentMeasurer(t *testing.T) { if measurer.ExperimentName() != "web_connectivity" { t.Fatal("unexpected name") } - if measurer.ExperimentVersion() != "0.3.0" { + if measurer.ExperimentVersion() != "0.4.0" { t.Fatal("unexpected version") } } diff --git a/internal/engine/netx/archival/archival.go b/internal/engine/netx/archival/archival.go index ec1fdac..67ef5f8 100644 --- a/internal/engine/netx/archival/archival.go +++ b/internal/engine/netx/archival/archival.go @@ -463,17 +463,20 @@ func (qtype dnsQueryType) makequeryentry(begin time.Time, ev trace.Event) DNSQue } } -// NetworkEvent is a network event. +// NetworkEvent is a network event. It contains all the possible fields +// and most fields are optional. They are only added when it makes sense +// for them to be there _and_ we have data to show. type NetworkEvent struct { - Address string `json:"address,omitempty"` - ConnID int64 `json:"conn_id,omitempty"` - DialID int64 `json:"dial_id,omitempty"` - Failure *string `json:"failure"` - NumBytes int64 `json:"num_bytes,omitempty"` - Operation string `json:"operation"` - Proto string `json:"proto,omitempty"` - T float64 `json:"t"` - TransactionID int64 `json:"transaction_id,omitempty"` + Address string `json:"address,omitempty"` + ConnID int64 `json:"conn_id,omitempty"` + DialID int64 `json:"dial_id,omitempty"` + Failure *string `json:"failure"` + NumBytes int64 `json:"num_bytes,omitempty"` + Operation string `json:"operation"` + Proto string `json:"proto,omitempty"` + T float64 `json:"t"` + Tags []string `json:"tags,omitempty"` + TransactionID int64 `json:"transaction_id,omitempty"` } // NewNetworkEventsList returns a list of DNS queries. @@ -547,6 +550,7 @@ type TLSHandshake struct { PeerCertificates []MaybeBinaryValue `json:"peer_certificates"` ServerName string `json:"server_name"` T float64 `json:"t"` + Tags []string `json:"tags"` TLSVersion string `json:"tls_version"` TransactionID int64 `json:"transaction_id,omitempty"` }