From 3766ab272194f701459d64eb48e6b660c03bcc06 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 5 Sep 2022 11:35:48 +0200 Subject: [PATCH] feat(webconnectivity@v0.5): use TLS info from TH (#933) This diff modifies webconnectivity@v0.5 to take decisions regarding TLS blocking by using the response from the TH rather than using questionable heuristics based on inspecting the TLSHandshake list alone. This change should improve correctness _when_ we're using the improved TH, which is currently used for 50% of the probes. See https://github.com/ooni/probe/issues/2257 While there, modify `control.go` to specify which control is being used. --- .../webconnectivity/analysiscore.go | 1 + .../webconnectivity/analysishttpcore.go | 136 ++++-------------- .../webconnectivity/analysistcpip.go | 12 +- .../experiment/webconnectivity/analysistls.go | 50 +++++++ .../experiment/webconnectivity/control.go | 11 +- .../experiment/webconnectivity/measurer.go | 2 +- .../experiment/webconnectivity/testkeys.go | 4 + 7 files changed, 98 insertions(+), 118 deletions(-) create mode 100644 internal/experiment/webconnectivity/analysistls.go diff --git a/internal/experiment/webconnectivity/analysiscore.go b/internal/experiment/webconnectivity/analysiscore.go index 1081dd6..1ed20bb 100644 --- a/internal/experiment/webconnectivity/analysiscore.go +++ b/internal/experiment/webconnectivity/analysiscore.go @@ -90,6 +90,7 @@ func (tk *TestKeys) analysisToplevel(logger model.Logger) { // these functions compute the value of XBlockingFlags tk.analysisDNSToplevel(logger) tk.analysisTCPIPToplevel(logger) + tk.analysisTLSToplevel(logger) tk.analysisHTTPToplevel(logger) // now, let's determine .Accessible and .Blocking diff --git a/internal/experiment/webconnectivity/analysishttpcore.go b/internal/experiment/webconnectivity/analysishttpcore.go index 51ab235..91ea2ff 100644 --- a/internal/experiment/webconnectivity/analysishttpcore.go +++ b/internal/experiment/webconnectivity/analysishttpcore.go @@ -5,8 +5,6 @@ package webconnectivity // import ( - "net/url" - "github.com/ooni/probe-cli/v3/internal/model" "github.com/ooni/probe-cli/v3/internal/netxlite" ) @@ -20,8 +18,6 @@ import ( // // This results in possibly setting these XBlockingFlags: // -// - analysisFlagTLSBlocking -// // - analysisFlagHTTPBlocking // // - analysisFlagHTTPDiff @@ -29,117 +25,45 @@ import ( // In websteps fashion, we don't stop at the first failure, rather we // process all the available data and evaluate all possible errors. func (tk *TestKeys) analysisHTTPToplevel(logger model.Logger) { - // don't perform any analysis without TH data - if tk.Control == nil || tk.ControlRequest == nil { - return - } - ctrl := tk.Control.HTTPRequest - - // don't perform any analysis if the TH's HTTP measurement failed - if ctrl.Failure != nil { - return - } - - // determine whether the original URL was HTTPS - origURL, err := url.Parse(tk.ControlRequest.HTTPRequest) - if err != nil { - return // this seeems like a bug? - } - isHTTPS := origURL.Scheme == "https" - - // determine whether we had any TLS handshake issue and, in such a case, - // declare that we had a case of "http-failure" through TLS. - // - // Note that this would eventually count as an "http-failure" for .Blocking - // because Web Connectivity did not have a concept of TLS based blocking. - if tk.hasWellKnownTLSHandshakeIssues(isHTTPS, logger) { - tk.BlockingFlags |= analysisFlagTLSBlocking - // continue processing - } - - // determine whether we had well known cleartext HTTP round trip issues - // and, in such a case, declare we had an "http-failure". - if tk.hasWellKnownHTTPRoundTripIssues(logger) { - tk.BlockingFlags |= analysisFlagHTTPBlocking - // continue processing - } - // if we don't have any request to check, there's not much more we // can actually do here, so let's just return. if len(tk.Requests) <= 0 { return } - - // if the request has failed in any other way, we don't know. By convention, the first - // entry in the tk.Requests array is the last entry that was measured. finalRequest := tk.Requests[0] - if finalRequest.Failure != nil { + tk.HTTPExperimentFailure = finalRequest.Failure + + // don't perform any futher analysis without TH data + if tk.Control == nil || tk.ControlRequest == nil { + return + } + ctrl := tk.Control.HTTPRequest + + // don't perform any analysis if the TH's HTTP measurement failed because + // performing more precise mapping is a job for the pipeline. + if ctrl.Failure != nil { + return + } + + // flag cases of known HTTP failures + if failure := finalRequest.Failure; failure != nil { + switch *failure { + case netxlite.FailureConnectionReset, + netxlite.FailureGenericTimeoutError, + netxlite.FailureEOFError: + tk.BlockingFlags |= analysisFlagHTTPBlocking + logger.Warnf( + "HTTP: endpoint %s is blocked (see #%d): %s", + finalRequest.Address, + finalRequest.TransactionID, + *failure, + ) + default: + // leave this case for ooni/pipeline + } return } // fallback to the HTTP diff algo. tk.analysisHTTPDiff(logger, finalRequest, &ctrl) } - -// hasWellKnownTLSHandshakeIssues returns true in case we observed -// a set of well-known issues during the TLS handshake. -func (tk *TestKeys) hasWellKnownTLSHandshakeIssues(isHTTPS bool, logger model.Logger) (result bool) { - // TODO(bassosimone): we should return TLS information in the TH - // such that we can perform a TCP-like check. For now, instead, we - // only perform comparison when the initial URL was HTTPS. Given - // that we unconditionally check for HTTPS even when the URL is HTTP, - // we cannot blindly treat all TLS errors as blocking. A website - // may just not have HTTPS. While in the obvious cases we will see - // certificate errors, in some cases it may actually timeout. - if isHTTPS { - for _, thx := range tk.TLSHandshakes { - fail := thx.Failure - if fail == nil { - continue // this handshake succeded, so skip it - } - switch *fail { - case netxlite.FailureConnectionReset, - netxlite.FailureGenericTimeoutError, - netxlite.FailureEOFError, - netxlite.FailureSSLInvalidHostname, - netxlite.FailureSSLInvalidCertificate, - netxlite.FailureSSLUnknownAuthority: - logger.Warnf( - "TLS: endpoint %s fails with %s (see #%d)", - thx.Address, *fail, thx.TransactionID, - ) - result = true // flip the result but continue looping so we print them all - default: - // check next handshake - } - } - } - return -} - -// hasWellKnownHTTPRoundTripIssues checks whether any HTTP round -// trip failed in a well-known suspicious way -func (tk *TestKeys) hasWellKnownHTTPRoundTripIssues(logger model.Logger) (result bool) { - for _, rtx := range tk.Requests { - fail := rtx.Failure - if fail == nil { - // This one succeded, so skip it. Note that, in principle, we know - // the fist entry is the last request occurred, but I really do not - // want to embed this bad assumption in one extra place! - continue - } - switch *fail { - case netxlite.FailureConnectionReset, - netxlite.FailureGenericTimeoutError, - netxlite.FailureEOFError: - logger.Warnf( - "TLS: endpoint %s fails with %s (see #%d)", - "N/A", *fail, rtx.TransactionID, // TODO(bassosimone): implement - ) - result = true // flip the result but continue looping so we print them all - default: - // check next round trip - } - } - return -} diff --git a/internal/experiment/webconnectivity/analysistcpip.go b/internal/experiment/webconnectivity/analysistcpip.go index 24f6086..4a901df 100644 --- a/internal/experiment/webconnectivity/analysistcpip.go +++ b/internal/experiment/webconnectivity/analysistcpip.go @@ -33,11 +33,6 @@ func (tk *TestKeys) analysisTCPIPToplevel(logger model.Logger) { isfalse = false ) - // TODO(bassosimone): the TH should measure also some of the IP addrs it discovered - // and the probe did not discover to improve the analysis. Otherwise, the probe - // is fooled by the TH also failing for countries that return random IP addresses - // that are actually not working. Yet, ooni/data would definitely see this. - // walk the list of probe results and compare with TH results for _, entry := range tk.TCPConnect { // skip successful entries @@ -75,7 +70,12 @@ func (tk *TestKeys) analysisTCPIPToplevel(logger model.Logger) { // for the pipeline rather than for the probe. continue } - logger.Warnf("TCP/IP: endpoint %s is blocked (see #%d)", epnt, entry.TransactionID) + logger.Warnf( + "TCP/IP: endpoint %s is blocked (see #%d): %s", + epnt, + entry.TransactionID, + *failure, + ) entry.Status.Blocked = &istrue tk.BlockingFlags |= analysisFlagTCPIPBlocking } diff --git a/internal/experiment/webconnectivity/analysistls.go b/internal/experiment/webconnectivity/analysistls.go new file mode 100644 index 0000000..6e3c5a9 --- /dev/null +++ b/internal/experiment/webconnectivity/analysistls.go @@ -0,0 +1,50 @@ +package webconnectivity + +// +// TLS analysis +// + +import "github.com/ooni/probe-cli/v3/internal/model" + +// analysisTLSToplevel is the toplevel analysis function for TLS. +// +// This algorithm aims to flag the TLS endpoints that failed unreasonably +// compared to what the TH has observed for the same endpoints. +func (tk *TestKeys) analysisTLSToplevel(logger model.Logger) { + // if we don't have a control result, do nothing. + if tk.Control == nil || len(tk.Control.TLSHandshake) <= 0 { + return + } + + // walk the list of probe results and compare with TH results + for _, entry := range tk.TLSHandshakes { + // skip successful entries + failure := entry.Failure + if failure == nil { + continue // did not fail + } + epnt := entry.Address + + // TODO(bassosimone,kelmenhorst): if, in the future, we choose to + // adapt this code to QUIC, we need to remember to treat EHOSTUNREACH + // and ENETUNREACH specially when the IP address is IPv6. + + // obtain the corresponding endpoint + ctrl, found := tk.Control.TLSHandshake[epnt] + if !found { + continue // only the probe tested this, so hard to say anything... + } + if ctrl.Failure != nil { + // If the TH failed as well, don't set XBlockingFlags. Performing + // precise error mapping should be a job for the pipeline. + continue + } + logger.Warnf( + "TLS: endpoint %s is blocked (see #%d): %s", + epnt, + entry.TransactionID, + *failure, + ) + tk.BlockingFlags |= analysisFlagTLSBlocking + } +} diff --git a/internal/experiment/webconnectivity/control.go b/internal/experiment/webconnectivity/control.go index 4a2b3ab..33eae4c 100644 --- a/internal/experiment/webconnectivity/control.go +++ b/internal/experiment/webconnectivity/control.go @@ -100,12 +100,13 @@ func (c *Control) Run(parentCtx context.Context) { } c.TestKeys.SetControlRequest(creq) - // TODO(bassosimone): the current TH will not perform TLS measurements for - // 443 endpoints. However, we should modify the TH to do that, such that we're - // able to be more confident about TLS measurements results. - // create logger for this operation - ol := measurexlite.NewOperationLogger(c.Logger, "control for %s", creq.HTTPRequest) + ol := measurexlite.NewOperationLogger( + c.Logger, + "control for %s using %s", + creq.HTTPRequest, + c.THAddr, + ) // create an API client clnt := (&httpx.APIClientTemplate{ diff --git a/internal/experiment/webconnectivity/measurer.go b/internal/experiment/webconnectivity/measurer.go index 2a1795d..e5b4e96 100644 --- a/internal/experiment/webconnectivity/measurer.go +++ b/internal/experiment/webconnectivity/measurer.go @@ -36,7 +36,7 @@ func (m *Measurer) ExperimentName() string { // ExperimentVersion implements model.ExperimentMeasurer. func (m *Measurer) ExperimentVersion() string { - return "0.5.2" + return "0.5.3" } // Run implements model.ExperimentMeasurer. diff --git a/internal/experiment/webconnectivity/testkeys.go b/internal/experiment/webconnectivity/testkeys.go index 267c9a4..813cee3 100644 --- a/internal/experiment/webconnectivity/testkeys.go +++ b/internal/experiment/webconnectivity/testkeys.go @@ -66,6 +66,10 @@ type TestKeys struct { // the TH's DNS results and the probe's DNS results. DNSConsistency string `json:"dns_consistency"` + // HTTPExperimentFailure indicates whether there was a failure in + // the final HTTP request that we recorded. + HTTPExperimentFailure *string `json:"http_experiment_failure"` + // BlockingFlags contains blocking flags. BlockingFlags int64 `json:"x_blocking_flags"`