ooni-probe-cli/internal/experiment/webconnectivity/control.go
Simone Basso c2ea0b4704
feat(webconnectivity): try all the available THs (#980)
We introduce a fork of internal/httpx, named internal/httpapi, where there is a clear split between the concept of an API endpoint (such as https://0.th.ooni.org/) and of an API descriptor (such as using `GET` to access /api/v1/test-list/url).

Additionally, httpapi allows to create a SequenceCaller that tries to call a given API descriptor using multiple API endpoints. The SequenceCaller will stop once an endpoint works or when all the available endpoints have been tried unsuccessfully.

The definition of "success" is the following: we consider "failure" any error that occurs during the HTTP round trip or when reading the response body. We DO NOT consider "failure" errors (1) when parsing the input URL; (2) when the server returns >= 400; (3) when the server returns a string that does not parse as valid JSON. The idea of this classification of failures is that we ONLY want to retry when we see what looks like a network error that may be caused by (collateral or targeted) censorship.

We take advantage of the availability of this new package and we refactor web_connectivity@v0.4 and web_connectivity@v0.5 to use a SequenceCaller for calling the web connectivity TH API. This means that we will now try all the available THs advertised by the backend rather than just selecting and using the first one provided by the backend.

Because this diff is designed to be backported to the `release/3.16` branch, we have omitted additional changes to always use httpapi where we are currently using httpx. Yet, to remind ourselves about the need to do that, we have deprecated the httpx package. We will rewrite all the code currently using httpx to use httpapi as part of future work.

It is also worth noting that httpapi will allow us to refactor the backend code such that (1) we remove code to select a backend URL endpoint at the beginning and (2) we try several endpoints. The design of the code is such that we can add to the mix some endpoints using as `http.Client` a special client using a tunnel. This will allow us to automatically fallback backend queries.

Closes https://github.com/ooni/probe/issues/2353.

Related to https://github.com/ooni/probe/issues/1519.
2022-11-21 16:28:53 +01:00

180 lines
5.6 KiB
Go

package webconnectivity
import (
"context"
"net"
"net/url"
"sync"
"time"
"github.com/ooni/probe-cli/v3/internal/engine/experiment/webconnectivity"
"github.com/ooni/probe-cli/v3/internal/httpapi"
"github.com/ooni/probe-cli/v3/internal/measurexlite"
"github.com/ooni/probe-cli/v3/internal/model"
"github.com/ooni/probe-cli/v3/internal/netxlite"
"github.com/ooni/probe-cli/v3/internal/runtimex"
)
// EndpointMeasurementsStarter is used by Control to start extra
// measurements using new IP addrs discovered by the TH.
type EndpointMeasurementsStarter interface {
// startCleartextFlows starts a TCP measurement flow for each IP addr. The [ps]
// argument determines whether this flow will be allowed to fetch the webpage.
startCleartextFlows(ctx context.Context, ps *prioritySelector, addresses []DNSEntry)
// startSecureFlows is like startCleartextFlows but for HTTPS.
startSecureFlows(ctx context.Context, ps *prioritySelector, addresses []DNSEntry)
}
// Control issues a Control request and saves the results
// inside of the experiment's TestKeys.
//
// The zero value of this structure IS NOT valid and you MUST initialize
// all the fields marked as MANDATORY before using this structure.
type Control struct {
// Addresses contains the MANDATORY addresses we've looked up.
Addresses []string
// ExtraMeasurementsStarter is MANDATORY and allows this struct to
// start additional measurements using new TH-discovered addrs.
ExtraMeasurementsStarter EndpointMeasurementsStarter
// Logger is the MANDATORY logger to use.
Logger model.Logger
// PrioSelector is the OPTIONAL priority selector to use to determine
// whether we will be allowed to fetch the webpage.
PrioSelector *prioritySelector
// TestKeys is MANDATORY and contains the TestKeys.
TestKeys *TestKeys
// Session is the MANDATORY session to use.
Session model.ExperimentSession
// TestHelpers is the MANDATORY list of test helpers.
TestHelpers []model.OOAPIService
// URL is the MANDATORY URL we are measuring.
URL *url.URL
// WaitGroup is the MANDATORY wait group this task belongs to.
WaitGroup *sync.WaitGroup
}
// Start starts this task in a background goroutine.
func (c *Control) Start(ctx context.Context) {
c.WaitGroup.Add(1)
go func() {
defer c.WaitGroup.Done() // synchronize with the parent
c.Run(ctx)
}()
}
// Run runs this task until completion.
func (c *Control) Run(parentCtx context.Context) {
// create a subcontext attached to a maximum timeout
const timeout = 30 * time.Second
opCtx, cancel := context.WithTimeout(parentCtx, timeout)
defer cancel()
// create control request
var endpoints []string
for _, address := range c.Addresses {
if port := c.URL.Port(); port != "" { // handle the case of a custom port
endpoints = append(endpoints, net.JoinHostPort(address, port))
continue
}
// otherwise, always attempt to measure both 443 and 80 endpoints
endpoints = append(endpoints, net.JoinHostPort(address, "443"))
endpoints = append(endpoints, net.JoinHostPort(address, "80"))
}
creq := &webconnectivity.ControlRequest{
HTTPRequest: c.URL.String(),
HTTPRequestHeaders: map[string][]string{
"Accept": {model.HTTPHeaderAccept},
"Accept-Language": {model.HTTPHeaderAcceptLanguage},
"User-Agent": {model.HTTPHeaderUserAgent},
},
TCPConnect: endpoints,
}
c.TestKeys.SetControlRequest(creq)
// create logger for this operation
ol := measurexlite.NewOperationLogger(
c.Logger,
"control for %s using %+v",
creq.HTTPRequest,
c.TestHelpers,
)
// create an httpapi sequence caller
seqCaller := httpapi.NewSequenceCaller(
httpapi.MustNewPOSTJSONWithJSONResponseDescriptor(c.Logger, "/", creq).WithBodyLogging(true),
httpapi.NewEndpointList(c.Session.DefaultHTTPClient(), c.Session.UserAgent(), c.TestHelpers...)...,
)
// issue the control request and wait for the response
var cresp webconnectivity.ControlResponse
idx, err := seqCaller.CallWithJSONResponse(opCtx, &cresp)
if err != nil {
// make sure error is wrapped
err = netxlite.NewTopLevelGenericErrWrapper(err)
c.TestKeys.SetControlFailure(err)
ol.Stop(err)
return
}
// on success, save the control response
c.TestKeys.SetControl(&cresp)
ol.Stop(nil)
// record the specific TH that worked
runtimex.Assert(idx >= 0 && idx < len(c.TestHelpers), "idx out of bounds")
c.TestKeys.setTestHelper(&c.TestHelpers[idx])
// if the TH returned us addresses we did not previously were
// aware of, make sure we also measure them
c.maybeStartExtraMeasurements(parentCtx, cresp.DNS.Addrs)
}
// This function determines whether we should start new
// background measurements for previously unknown IP addrs.
func (c *Control) maybeStartExtraMeasurements(ctx context.Context, thAddrs []string) {
// classify addeesses by who discovered them
const (
inProbe = 1 << iota
inTH
)
mapping := make(map[string]int)
for _, addr := range c.Addresses {
mapping[addr] |= inProbe
}
for _, addr := range thAddrs {
mapping[addr] |= inTH
}
// obtain the TH-only addresses
var thOnlyAddrs []string
for addr, flags := range mapping {
if (flags & inProbe) != 0 {
continue // discovered by the probe => already tested
}
thOnlyAddrs = append(thOnlyAddrs, addr)
}
c.Logger.Infof("additional addrs discovered by the TH: %+v", thOnlyAddrs)
var thOnly []DNSEntry
for _, addr := range thOnlyAddrs {
thOnly = append(thOnly, DNSEntry{
Addr: addr,
Flags: 0, // neither system, nor udp, nor doh
})
}
// Start extra measurements for TH-only addresses.
c.ExtraMeasurementsStarter.startCleartextFlows(ctx, c.PrioSelector, thOnly)
c.ExtraMeasurementsStarter.startSecureFlows(ctx, c.PrioSelector, thOnly)
}