feat(webconnectivity@v0.5): use TLS info from TH (#933)

This diff modifies webconnectivity@v0.5 to take decisions regarding
TLS blocking by using the response from the TH rather than using
questionable heuristics based on inspecting the TLSHandshake list
alone. This change should improve correctness _when_ we're using
the improved TH, which is currently used for 50% of the probes.

See https://github.com/ooni/probe/issues/2257

While there, modify `control.go` to specify which control is being used.
This commit is contained in:
Simone Basso 2022-09-05 11:35:48 +02:00 committed by GitHub
parent 34dc029b33
commit 3766ab2721
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 98 additions and 118 deletions

View File

@ -90,6 +90,7 @@ func (tk *TestKeys) analysisToplevel(logger model.Logger) {
// these functions compute the value of XBlockingFlags
tk.analysisDNSToplevel(logger)
tk.analysisTCPIPToplevel(logger)
tk.analysisTLSToplevel(logger)
tk.analysisHTTPToplevel(logger)
// now, let's determine .Accessible and .Blocking

View File

@ -5,8 +5,6 @@ package webconnectivity
//
import (
"net/url"
"github.com/ooni/probe-cli/v3/internal/model"
"github.com/ooni/probe-cli/v3/internal/netxlite"
)
@ -20,8 +18,6 @@ import (
//
// This results in possibly setting these XBlockingFlags:
//
// - analysisFlagTLSBlocking
//
// - analysisFlagHTTPBlocking
//
// - analysisFlagHTTPDiff
@ -29,117 +25,45 @@ import (
// In websteps fashion, we don't stop at the first failure, rather we
// process all the available data and evaluate all possible errors.
func (tk *TestKeys) analysisHTTPToplevel(logger model.Logger) {
// don't perform any analysis without TH data
if tk.Control == nil || tk.ControlRequest == nil {
return
}
ctrl := tk.Control.HTTPRequest
// don't perform any analysis if the TH's HTTP measurement failed
if ctrl.Failure != nil {
return
}
// determine whether the original URL was HTTPS
origURL, err := url.Parse(tk.ControlRequest.HTTPRequest)
if err != nil {
return // this seeems like a bug?
}
isHTTPS := origURL.Scheme == "https"
// determine whether we had any TLS handshake issue and, in such a case,
// declare that we had a case of "http-failure" through TLS.
//
// Note that this would eventually count as an "http-failure" for .Blocking
// because Web Connectivity did not have a concept of TLS based blocking.
if tk.hasWellKnownTLSHandshakeIssues(isHTTPS, logger) {
tk.BlockingFlags |= analysisFlagTLSBlocking
// continue processing
}
// determine whether we had well known cleartext HTTP round trip issues
// and, in such a case, declare we had an "http-failure".
if tk.hasWellKnownHTTPRoundTripIssues(logger) {
tk.BlockingFlags |= analysisFlagHTTPBlocking
// continue processing
}
// if we don't have any request to check, there's not much more we
// can actually do here, so let's just return.
if len(tk.Requests) <= 0 {
return
}
// if the request has failed in any other way, we don't know. By convention, the first
// entry in the tk.Requests array is the last entry that was measured.
finalRequest := tk.Requests[0]
if finalRequest.Failure != nil {
tk.HTTPExperimentFailure = finalRequest.Failure
// don't perform any futher analysis without TH data
if tk.Control == nil || tk.ControlRequest == nil {
return
}
ctrl := tk.Control.HTTPRequest
// don't perform any analysis if the TH's HTTP measurement failed because
// performing more precise mapping is a job for the pipeline.
if ctrl.Failure != nil {
return
}
// flag cases of known HTTP failures
if failure := finalRequest.Failure; failure != nil {
switch *failure {
case netxlite.FailureConnectionReset,
netxlite.FailureGenericTimeoutError,
netxlite.FailureEOFError:
tk.BlockingFlags |= analysisFlagHTTPBlocking
logger.Warnf(
"HTTP: endpoint %s is blocked (see #%d): %s",
finalRequest.Address,
finalRequest.TransactionID,
*failure,
)
default:
// leave this case for ooni/pipeline
}
return
}
// fallback to the HTTP diff algo.
tk.analysisHTTPDiff(logger, finalRequest, &ctrl)
}
// hasWellKnownTLSHandshakeIssues returns true in case we observed
// a set of well-known issues during the TLS handshake.
func (tk *TestKeys) hasWellKnownTLSHandshakeIssues(isHTTPS bool, logger model.Logger) (result bool) {
// TODO(bassosimone): we should return TLS information in the TH
// such that we can perform a TCP-like check. For now, instead, we
// only perform comparison when the initial URL was HTTPS. Given
// that we unconditionally check for HTTPS even when the URL is HTTP,
// we cannot blindly treat all TLS errors as blocking. A website
// may just not have HTTPS. While in the obvious cases we will see
// certificate errors, in some cases it may actually timeout.
if isHTTPS {
for _, thx := range tk.TLSHandshakes {
fail := thx.Failure
if fail == nil {
continue // this handshake succeded, so skip it
}
switch *fail {
case netxlite.FailureConnectionReset,
netxlite.FailureGenericTimeoutError,
netxlite.FailureEOFError,
netxlite.FailureSSLInvalidHostname,
netxlite.FailureSSLInvalidCertificate,
netxlite.FailureSSLUnknownAuthority:
logger.Warnf(
"TLS: endpoint %s fails with %s (see #%d)",
thx.Address, *fail, thx.TransactionID,
)
result = true // flip the result but continue looping so we print them all
default:
// check next handshake
}
}
}
return
}
// hasWellKnownHTTPRoundTripIssues checks whether any HTTP round
// trip failed in a well-known suspicious way
func (tk *TestKeys) hasWellKnownHTTPRoundTripIssues(logger model.Logger) (result bool) {
for _, rtx := range tk.Requests {
fail := rtx.Failure
if fail == nil {
// This one succeded, so skip it. Note that, in principle, we know
// the fist entry is the last request occurred, but I really do not
// want to embed this bad assumption in one extra place!
continue
}
switch *fail {
case netxlite.FailureConnectionReset,
netxlite.FailureGenericTimeoutError,
netxlite.FailureEOFError:
logger.Warnf(
"TLS: endpoint %s fails with %s (see #%d)",
"N/A", *fail, rtx.TransactionID, // TODO(bassosimone): implement
)
result = true // flip the result but continue looping so we print them all
default:
// check next round trip
}
}
return
}

View File

@ -33,11 +33,6 @@ func (tk *TestKeys) analysisTCPIPToplevel(logger model.Logger) {
isfalse = false
)
// TODO(bassosimone): the TH should measure also some of the IP addrs it discovered
// and the probe did not discover to improve the analysis. Otherwise, the probe
// is fooled by the TH also failing for countries that return random IP addresses
// that are actually not working. Yet, ooni/data would definitely see this.
// walk the list of probe results and compare with TH results
for _, entry := range tk.TCPConnect {
// skip successful entries
@ -75,7 +70,12 @@ func (tk *TestKeys) analysisTCPIPToplevel(logger model.Logger) {
// for the pipeline rather than for the probe.
continue
}
logger.Warnf("TCP/IP: endpoint %s is blocked (see #%d)", epnt, entry.TransactionID)
logger.Warnf(
"TCP/IP: endpoint %s is blocked (see #%d): %s",
epnt,
entry.TransactionID,
*failure,
)
entry.Status.Blocked = &istrue
tk.BlockingFlags |= analysisFlagTCPIPBlocking
}

View File

@ -0,0 +1,50 @@
package webconnectivity
//
// TLS analysis
//
import "github.com/ooni/probe-cli/v3/internal/model"
// analysisTLSToplevel is the toplevel analysis function for TLS.
//
// This algorithm aims to flag the TLS endpoints that failed unreasonably
// compared to what the TH has observed for the same endpoints.
func (tk *TestKeys) analysisTLSToplevel(logger model.Logger) {
// if we don't have a control result, do nothing.
if tk.Control == nil || len(tk.Control.TLSHandshake) <= 0 {
return
}
// walk the list of probe results and compare with TH results
for _, entry := range tk.TLSHandshakes {
// skip successful entries
failure := entry.Failure
if failure == nil {
continue // did not fail
}
epnt := entry.Address
// TODO(bassosimone,kelmenhorst): if, in the future, we choose to
// adapt this code to QUIC, we need to remember to treat EHOSTUNREACH
// and ENETUNREACH specially when the IP address is IPv6.
// obtain the corresponding endpoint
ctrl, found := tk.Control.TLSHandshake[epnt]
if !found {
continue // only the probe tested this, so hard to say anything...
}
if ctrl.Failure != nil {
// If the TH failed as well, don't set XBlockingFlags. Performing
// precise error mapping should be a job for the pipeline.
continue
}
logger.Warnf(
"TLS: endpoint %s is blocked (see #%d): %s",
epnt,
entry.TransactionID,
*failure,
)
tk.BlockingFlags |= analysisFlagTLSBlocking
}
}

View File

@ -100,12 +100,13 @@ func (c *Control) Run(parentCtx context.Context) {
}
c.TestKeys.SetControlRequest(creq)
// TODO(bassosimone): the current TH will not perform TLS measurements for
// 443 endpoints. However, we should modify the TH to do that, such that we're
// able to be more confident about TLS measurements results.
// create logger for this operation
ol := measurexlite.NewOperationLogger(c.Logger, "control for %s", creq.HTTPRequest)
ol := measurexlite.NewOperationLogger(
c.Logger,
"control for %s using %s",
creq.HTTPRequest,
c.THAddr,
)
// create an API client
clnt := (&httpx.APIClientTemplate{

View File

@ -36,7 +36,7 @@ func (m *Measurer) ExperimentName() string {
// ExperimentVersion implements model.ExperimentMeasurer.
func (m *Measurer) ExperimentVersion() string {
return "0.5.2"
return "0.5.3"
}
// Run implements model.ExperimentMeasurer.

View File

@ -66,6 +66,10 @@ type TestKeys struct {
// the TH's DNS results and the probe's DNS results.
DNSConsistency string `json:"dns_consistency"`
// HTTPExperimentFailure indicates whether there was a failure in
// the final HTTP request that we recorded.
HTTPExperimentFailure *string `json:"http_experiment_failure"`
// BlockingFlags contains blocking flags.
BlockingFlags int64 `json:"x_blocking_flags"`