feat: re-implement the vanilla_tor experiment (#718)
This diff re-implements the vanilla_tor experiment. This experiment was part of the ooni/probe-legacy implementation. The reference issue is https://github.com/ooni/probe/issues/803. We didn't consider the possible improvements mentioned by the https://github.com/ooni/probe/issues/803#issuecomment-598715694 comment, which means we'll need to create a follow-up issue for them. We will then decide whether, when, and how to implement those follow-up measurements either into `vanilla_tor` or into the existing `tor` experiment. This novel `vanilla_tor` implementation emits test_keys that are mostly compatible with the original implementation, however: 1. the `timeout` is a `float64` rather than integer (but the default timeout is an integer, so there are no JSON-visible changes); 2. the `tor_log` string is gone and replaced by the `tor_logs` list of strings, which contains the same information; 3. the definition of `error` has been augmented to include the case in which there is an unknown error; 4. the implementation of vanilla_tor mirrors closely the one of torsf and we have taken steps to make the two implementations as comparable as possible in terms of the generated JSON measurement. The main reason why we replaced `tor_log` with `tor_logs` are: 1. that `torsf` already used that; 2. that reading the JSON is easier with this implementation compared to an implementation where all logs are into the same string. If one is processing the new data format using Python, then it will not be difficult convert `tor_log` to `tor_logs`. In any case, because we extract the most interesting fields (e.g., the percentage of the bootstrap where tor fails), it seems that logs are probably more useful as something you want to read in edge cases (I guess). Also, because we want `torsf` and `vanilla_tor` to have similar JSONs, we renamed `torsf`'s `default_timeout` to `timeout`. This change has little to none real-world impact, because no stable version of OONI Probe has ever shipped a `torsf` producing the `default_timeout` field. Regarding the structure of this diff, we have: 1. factored code to parse tor logs into a separate package; 2. implemented `vanilla_tor` as a stripped down `torsf` and added further changes to ensure compatibility with the previous `vanilla_tor`'s data format; 3. improved `torsf` to merge back the changes in `vanilla_tor`, so the two data formats of the two experiments are as similar as possible. We believe producing as similar as possible data formats helps anyone who's reading measurements generated by both experiments. We have retained/introduced `vanilla_tor`'s `error` field, which is not very useful when one has a more precise failure but is still what `vanilla_tor` used to emit, so it makes sense to also have this field. In addition to changing the implementation, we also updated the specs. As part of our future work, we may want to consider factoring the common code of these two experiments into the same underlying support library.
This commit is contained in:
Vendored
+31
@@ -0,0 +1,31 @@
|
||||
Feb 04 15:04:29.000 [notice] Tor 0.4.6.9 opening new log file.
|
||||
Feb 04 15:04:29.360 [notice] We compiled with OpenSSL 101010cf: OpenSSL 1.1.1l FIPS 24 Aug 2021 and we are running with OpenSSL 101010cf: 1.1.1l. These two versions should be binary compatible.
|
||||
Feb 04 15:04:29.363 [notice] Tor 0.4.6.9 running on Linux with Libevent 2.1.12-stable, OpenSSL 1.1.1l, Zlib 1.2.11, Liblzma 5.2.5, Libzstd 1.5.2 and Glibc 2.34 as libc.
|
||||
Feb 04 15:04:29.363 [notice] Tor can't help you if you use it wrong! Learn how to be safe at https://www.torproject.org/download/download#warning
|
||||
Feb 04 15:04:29.363 [warn] Tor was compiled with zstd 1.5.1, but is running with zstd 1.5.2. For safety, we'll avoid using advanced zstd functionality.
|
||||
Feb 04 15:04:29.363 [notice] Read configuration file "/home/sbs/.miniooni/tunnel/torsf/tor/torrc-2981077975".
|
||||
Feb 04 15:04:29.366 [notice] Opening Control listener on 127.0.0.1:0
|
||||
Feb 04 15:04:29.367 [notice] Control listener listening on port 41423.
|
||||
Feb 04 15:04:29.367 [notice] Opened Control listener connection (ready) on 127.0.0.1:41423
|
||||
Feb 04 15:04:29.367 [notice] DisableNetwork is set. Tor will not make or accept non-control network connections. Shutting down all existing connections.
|
||||
Feb 04 15:04:29.000 [notice] Parsing GEOIP IPv4 file /usr/share/tor/geoip.
|
||||
Feb 04 15:04:29.000 [notice] Parsing GEOIP IPv6 file /usr/share/tor/geoip6.
|
||||
Feb 04 15:04:29.000 [notice] Bootstrapped 0% (starting): Starting
|
||||
Feb 04 15:04:29.000 [notice] Starting with guard context "bridges"
|
||||
Feb 04 15:04:29.000 [notice] new bridge descriptor 'flakey4' (cached): $2B280B23E1107BB62ABFC40DDCC8824814F80A72~flakey4 [1zOHpg+FxqQfi/6jDLtCpHHqBTH8gjYmCKXkus1D5Ko] at 192.0.2.3
|
||||
Feb 04 15:04:29.000 [notice] Delaying directory fetches: DisableNetwork is set.
|
||||
Feb 04 15:04:29.000 [notice] New control connection opened from 127.0.0.1.
|
||||
Feb 04 15:04:29.000 [notice] Opening Socks listener on 127.0.0.1:0
|
||||
Feb 04 15:04:29.000 [notice] Socks listener listening on port 42089.
|
||||
Feb 04 15:04:29.000 [notice] Opened Socks listener connection (ready) on 127.0.0.1:42089
|
||||
Feb 04 15:04:29.000 [notice] Tor 0.4.6.9 opening log file.
|
||||
Feb 04 15:04:29.000 [notice] Bootstrapped 1% (conn_pt): Connecting to pluggable transport
|
||||
Feb 04 15:04:30.000 [notice] Bootstrapped 2% (conn_done_pt): Connected to pluggable transport
|
||||
Feb 04 15:04:30.000 [notice] Bootstrapped 10% (conn_done): Connected to a relay
|
||||
Feb 04 15:06:20.000 [notice] Bootstrapped 14% (handshake): Handshaking with a relay
|
||||
Feb 04 15:06:24.000 [notice] Bootstrapped 15% (handshake_done): Handshake with a relay done
|
||||
Feb 04 15:06:24.000 [notice] Bootstrapped 75% (enough_dirinfo): Loaded enough directory info to build circuits
|
||||
Feb 04 15:06:24.000 [notice] Bootstrapped 95% (circuit_create): Establishing a Tor circuit
|
||||
Feb 04 15:06:26.000 [notice] new bridge descriptor 'flakey4' (fresh): $2B280B23E1107BB62ABFC40DDCC8824814F80A72~flakey4 [1zOHpg+FxqQfi/6jDLtCpHHqBTH8gjYmCKXkus1D5Ko] at 192.0.2.3
|
||||
Feb 04 15:06:39.000 [notice] Bootstrapped 100% (done): Done
|
||||
Feb 04 15:06:39.000 [notice] Catching signal TERM, exiting cleanly.
|
||||
@@ -0,0 +1,105 @@
|
||||
// Package torlogs contains code to read tor logs.
|
||||
package torlogs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
|
||||
"github.com/ooni/probe-cli/v3/internal/model"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrEmptyLogFilePath indicates that the log file path is empty.
|
||||
ErrEmptyLogFilePath = errors.New("torlogs: empty log file path")
|
||||
|
||||
// ErrCannotReadLogFile indicates we cannot read the log file.
|
||||
ErrCannotReadLogFile = errors.New("torlogs: cannot read the log file")
|
||||
|
||||
// ErrNoBootstrapLogs indicates we could not find any bootstrap log in the log file.
|
||||
ErrNoBootstrapLogs = errors.New("torlogs: no bootstrap logs")
|
||||
|
||||
// ErrCannotFindSubmatches indicates we cannot find submatches.
|
||||
ErrCannotFindSubmatches = errors.New("torlogs: cannot find submatches")
|
||||
)
|
||||
|
||||
// torBootstrapRegexp helps to extract progress info from logs.
|
||||
//
|
||||
// See https://regex101.com/r/Do07qd/1.
|
||||
var torBootstrapRegexp = regexp.MustCompile(
|
||||
`^[A-Za-z0-9.: ]+ \[notice\] Bootstrapped ([0-9]+)% \(([A-Za-z_]+)\): ([A-Za-z0-9 ]+)$`)
|
||||
|
||||
// ReadBootstrapLogs reads tor logs from the given file and
|
||||
// returns a list of bootstrap-related logs.
|
||||
func ReadBootstrapLogs(logFilePath string) ([]string, error) {
|
||||
// Implementation note:
|
||||
//
|
||||
// Tor is know to be good software that does not break its output
|
||||
// unnecessarily and that does not include PII into its logs unless
|
||||
// explicitly asked to. This fact gives me confidence that we can
|
||||
// safely include this subset of the logs into the results.
|
||||
//
|
||||
// On this note, I think it's safe to include timestamps from the
|
||||
// logs into the output, since we have a timestamp for the whole
|
||||
// experiment already, so we don't leak much more by also including
|
||||
// the Tor proper timestamps into the results.
|
||||
if logFilePath == "" {
|
||||
return nil, ErrEmptyLogFilePath
|
||||
}
|
||||
data, err := os.ReadFile(logFilePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %s", ErrCannotReadLogFile, err.Error())
|
||||
}
|
||||
var out []string
|
||||
for _, bline := range bytes.Split(data, []byte("\n")) {
|
||||
if torBootstrapRegexp.Match(bline) {
|
||||
out = append(out, string(bline))
|
||||
}
|
||||
}
|
||||
if len(out) <= 0 {
|
||||
return nil, ErrNoBootstrapLogs
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// ReadBootstrapLogsOrWarn is like ReadBootstrapLogs except that it does
|
||||
// not return an error on failure, rather it emits a warning.
|
||||
func ReadBootstrapLogsOrWarn(logger model.Logger, logFilePath string) []string {
|
||||
logs, err := ReadBootstrapLogs(logFilePath)
|
||||
if err != nil {
|
||||
logger.Warnf("%s", err.Error())
|
||||
return nil
|
||||
}
|
||||
return logs
|
||||
}
|
||||
|
||||
// BootstrapInfo contains info extracted from a bootstrap log line.
|
||||
type BootstrapInfo struct {
|
||||
// Progress is the progress (between 0 and 100)
|
||||
Progress int64
|
||||
|
||||
// Tag is the machine readable description of the bootstrap state.
|
||||
Tag string
|
||||
|
||||
// Summary is the human readable summary.
|
||||
Summary string
|
||||
}
|
||||
|
||||
// ParseBootstrapLogLine takes in input a bootstrap log line and returns
|
||||
// in output the components of such a log line.
|
||||
func ParseBootstrapLogLine(logLine string) (*BootstrapInfo, error) {
|
||||
values := torBootstrapRegexp.FindStringSubmatch(logLine)
|
||||
if len(values) != 4 {
|
||||
return nil, ErrCannotFindSubmatches
|
||||
}
|
||||
progress, _ := strconv.ParseInt(values[1], 10, 64)
|
||||
bi := &BootstrapInfo{
|
||||
Progress: progress,
|
||||
Tag: values[2],
|
||||
Summary: values[3],
|
||||
}
|
||||
return bi, nil
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
package torlogs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/ooni/probe-cli/v3/internal/model"
|
||||
"github.com/ooni/probe-cli/v3/internal/model/mocks"
|
||||
)
|
||||
|
||||
func TestReadBootstrapLogs(t *testing.T) {
|
||||
t.Run("with empty file path", func(t *testing.T) {
|
||||
out, err := ReadBootstrapLogs("")
|
||||
if !errors.Is(err, ErrEmptyLogFilePath) {
|
||||
t.Fatal("unexpected err", err)
|
||||
}
|
||||
if len(out) > 0 {
|
||||
t.Fatal("expected no tor logs")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("with nonexistent file path", func(t *testing.T) {
|
||||
out, err := ReadBootstrapLogs(filepath.Join("testdata", "nonexistent"))
|
||||
if !errors.Is(err, ErrCannotReadLogFile) {
|
||||
t.Fatal("unexpected err", err)
|
||||
}
|
||||
if len(out) != 0 {
|
||||
t.Fatal("expected no tor logs")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("with existing file path not containing bootstrap logs", func(t *testing.T) {
|
||||
out, err := ReadBootstrapLogs(filepath.Join("testdata", "empty.log"))
|
||||
if !errors.Is(err, ErrNoBootstrapLogs) {
|
||||
t.Fatal("unexpected err", err)
|
||||
}
|
||||
if len(out) != 0 {
|
||||
t.Fatal("expected no tor logs")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("with existing file path containing bootstrap logs", func(t *testing.T) {
|
||||
out, err := ReadBootstrapLogs(filepath.Join("testdata", "tor.log"))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if count := len(out); count != 9 {
|
||||
t.Fatal("unexpected number of tor logs", count)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestReadBootstrapLogsOrWarn(t *testing.T) {
|
||||
t.Run("on success", func(t *testing.T) {
|
||||
filename := filepath.Join("testdata", "tor.log")
|
||||
logs := ReadBootstrapLogsOrWarn(model.DiscardLogger, filename)
|
||||
if count := len(logs); count != 9 {
|
||||
t.Fatal("unexpected number of tor logs", count)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("on failure", func(t *testing.T) {
|
||||
var called bool
|
||||
logger := &mocks.Logger{
|
||||
MockWarnf: func(format string, v ...interface{}) {
|
||||
called = true
|
||||
},
|
||||
}
|
||||
filename := filepath.Join("testdata", "empty.log")
|
||||
logs := ReadBootstrapLogsOrWarn(logger, filename)
|
||||
if !called {
|
||||
t.Fatal("not called")
|
||||
}
|
||||
if len(logs) != 0 {
|
||||
t.Fatal("expected no tor logs")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestParseBootstrapLogLine(t *testing.T) {
|
||||
type args struct {
|
||||
logLine string
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want *BootstrapInfo
|
||||
wantErr error
|
||||
}{{
|
||||
name: "with empty string",
|
||||
args: args{
|
||||
logLine: "",
|
||||
},
|
||||
want: nil,
|
||||
wantErr: ErrCannotFindSubmatches,
|
||||
}, {
|
||||
name: "with correct line",
|
||||
args: args{
|
||||
logLine: "May 10 09:19:28.000 [notice] Bootstrapped 80% (ap_conn): Connecting to a relay to build circuits",
|
||||
},
|
||||
want: &BootstrapInfo{
|
||||
Progress: 80,
|
||||
Tag: "ap_conn",
|
||||
Summary: "Connecting to a relay to build circuits",
|
||||
},
|
||||
wantErr: nil,
|
||||
}}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := ParseBootstrapLogLine(tt.args.logLine)
|
||||
if !errors.Is(err, tt.wantErr) {
|
||||
t.Fatal("unexpected err", err)
|
||||
}
|
||||
if diff := cmp.Diff(tt.want, got); diff != "" {
|
||||
t.Fatal(diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user