ooni-probe-cli/internal/engine/experiment/urlgetter/getter.go
Simone Basso 85664f1e31
feat(torsf): collect tor logs, select rendezvous method, count bytes (#683)
This diff contains significant improvements over the previous
implementation of the torsf experiment.

We add support for configuring different rendezvous methods after
the convo at https://github.com/ooni/probe/issues/2004. In doing
that, I've tried to use a terminology that is consistent with the
names being actually used by tor developers.

In terms of what to do next, this diff basically instruments
torsf to always rendezvous using domain fronting. Yet, it's also
possible to change the rendezvous method from the command line,
when using miniooni, which allows to experiment a bit more. In the
same vein, by default we use a persistent tor datadir, but it's
also possible to use a temporary datadir using the cmdline.

Here's how a generic invocation of `torsf` looks like:

```bash
./miniooni -O DisablePersistentDatadir=true \
           -O RendezvousMethod=amp \
           -O DisableProgress=true \
           torsf
```

(The default is `DisablePersistentDatadir=false` and
`RendezvousMethod=domain_fronting`.)

With this implementation, we can start measuring whether snowflake
and tor together can boostrap, which seems the most important thing
to focus on at the beginning. Understanding why the bootstrap most
often does not converge with a temporary datadir on Android devices
remains instead an open problem for now. (I'll also update the
relevant issues or create new issues after commit this.)

We also address some methodology improvements that were proposed
in https://github.com/ooni/probe/issues/1686. Namely:

1. we record the tor version;

2. we include the bootstrap percentage by reading the logs;

3. we set the anomaly key correctly;

4. we measure the bytes send and received (by `tor` not by `snowflake`, since
doing it for snowflake seems more complex at this stage).

What remains to be done is the possibility of including Snowflake
events into the measurement, which is not possible until the new
improvements at common/event in snowflake.git are included into a
tagged version of snowflake itself. (I'll make sure to mention
this aspect to @cohosh in https://github.com/ooni/probe/issues/2004.)
2022-02-07 17:05:36 +01:00

149 lines
4.3 KiB
Go

package urlgetter
import (
"context"
"io/ioutil"
"net/url"
"time"
"github.com/ooni/probe-cli/v3/internal/engine/netx/archival"
"github.com/ooni/probe-cli/v3/internal/engine/netx/trace"
"github.com/ooni/probe-cli/v3/internal/model"
"github.com/ooni/probe-cli/v3/internal/netxlite"
"github.com/ooni/probe-cli/v3/internal/tunnel"
)
// The Getter gets the specified target in the context of the
// given session and with the specified config.
//
// Other OONI experiment should use the Getter to factor code when
// the Getter implements the operations they wanna perform.
type Getter struct {
// Begin is the time when the experiment begun. If you do not
// set this field, every target is measured independently.
Begin time.Time
// Config contains settings for this run. If not set, then
// we will use the default config.
Config Config
// Session is the session for this run. This field must
// be set otherwise the code will panic.
Session model.ExperimentSession
// Target is the thing to measure in this run. This field must
// be set otherwise the code won't know what to do.
Target string
// testIOUtilTempDir allows us to mock ioutil.TempDir
testIOUtilTempDir func(dir, pattern string) (string, error)
}
// Get performs the action described by g using the given context
// and returning the test keys and eventually an error
func (g Getter) Get(ctx context.Context) (TestKeys, error) {
if g.Config.Timeout > 0 {
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, g.Config.Timeout)
defer cancel()
}
if g.Begin.IsZero() {
g.Begin = time.Now()
}
saver := new(trace.Saver)
tk, err := g.get(ctx, saver)
// Make sure we have an operation in cases where we fail before
// hitting our httptransport that does error wrapping.
if err != nil {
err = netxlite.NewTopLevelGenericErrWrapper(err)
}
tk.FailedOperation = archival.NewFailedOperation(err)
tk.Failure = archival.NewFailure(err)
events := saver.Read()
tk.Queries = append(tk.Queries, archival.NewDNSQueriesList(g.Begin, events)...)
tk.NetworkEvents = append(
tk.NetworkEvents, archival.NewNetworkEventsList(g.Begin, events)...,
)
tk.Requests = append(
tk.Requests, archival.NewRequestList(g.Begin, events)...,
)
if len(tk.Requests) > 0 {
// OONI's convention is that the last request appears first
tk.HTTPResponseStatus = tk.Requests[0].Response.Code
tk.HTTPResponseBody = tk.Requests[0].Response.Body.Value
tk.HTTPResponseLocations = tk.Requests[0].Response.Locations
}
tk.TCPConnect = append(
tk.TCPConnect, archival.NewTCPConnectList(g.Begin, events)...,
)
tk.TLSHandshakes = append(
tk.TLSHandshakes, archival.NewTLSHandshakesList(g.Begin, events)...,
)
return tk, err
}
// ioutilTempDir calls either g.testIOUtilTempDir or ioutil.TempDir
func (g Getter) ioutilTempDir(dir, pattern string) (string, error) {
if g.testIOUtilTempDir != nil {
return g.testIOUtilTempDir(dir, pattern)
}
return ioutil.TempDir(dir, pattern)
}
func (g Getter) get(ctx context.Context, saver *trace.Saver) (TestKeys, error) {
tk := TestKeys{
Agent: "redirect",
Tunnel: g.Config.Tunnel,
}
if g.Config.DNSCache != "" {
tk.DNSCache = []string{g.Config.DNSCache}
}
if g.Config.NoFollowRedirects {
tk.Agent = "agent"
}
// start tunnel
var proxyURL *url.URL
if g.Config.Tunnel != "" {
// Every new instance of the tunnel goes into a separate
// directory within the temporary directory. Calling
// Session.Close will delete such a directory.
tundir, err := g.ioutilTempDir(g.Session.TempDir(), "urlgetter-tunnel-")
if err != nil {
return tk, err
}
tun, _, err := tunnel.Start(ctx, &tunnel.Config{
Name: g.Config.Tunnel,
Session: g.Session,
TorArgs: g.Session.TorArgs(),
TorBinary: g.Session.TorBinary(),
TunnelDir: tundir,
})
if err != nil {
return tk, err
}
tk.BootstrapTime = tun.BootstrapTime().Seconds()
proxyURL = tun.SOCKS5ProxyURL()
tk.SOCKSProxy = proxyURL.String()
defer tun.Stop()
}
// create configuration
configurer := Configurer{
Config: g.Config,
Logger: g.Session.Logger(),
ProxyURL: proxyURL,
Saver: saver,
}
configuration, err := configurer.NewConfiguration()
if err != nil {
return tk, err
}
defer configuration.CloseIdleConnections()
// run the measurement
runner := Runner{
Config: g.Config,
HTTPConfig: configuration.HTTPConfig,
Target: g.Target,
}
return tk, runner.Run(ctx)
}