diff --git a/internal/engine/netx/archival/archival.go b/internal/engine/netx/archival/archival.go index 3655b5e..c2aa62a 100644 --- a/internal/engine/netx/archival/archival.go +++ b/internal/engine/netx/archival/archival.go @@ -5,8 +5,6 @@ package archival import ( "crypto/x509" - "encoding/base64" - "encoding/json" "errors" "net" "net/http" @@ -14,7 +12,6 @@ import ( "strconv" "strings" "time" - "unicode/utf8" "github.com/ooni/probe-cli/v3/internal/engine/geolocate" "github.com/ooni/probe-cli/v3/internal/engine/netx/trace" @@ -22,59 +19,32 @@ import ( "github.com/ooni/probe-cli/v3/internal/netxlite" ) -// ExtSpec describes a data format extension -type ExtSpec struct { - Name string // extension name - V int64 // extension version -} - -// AddTo adds the current ExtSpec to the specified measurement -func (spec ExtSpec) AddTo(m *model.Measurement) { - if m.Extensions == nil { - m.Extensions = make(map[string]int64) - } - m.Extensions[spec.Name] = spec.V -} - -var ( - // ExtDNS is the version of df-002-dnst.md - ExtDNS = ExtSpec{Name: "dnst", V: 0} - - // ExtNetevents is the version of df-008-netevents.md - ExtNetevents = ExtSpec{Name: "netevents", V: 0} - - // ExtHTTP is the version of df-001-httpt.md - ExtHTTP = ExtSpec{Name: "httpt", V: 0} - - // ExtTCPConnect is the version of df-005-tcpconnect.md - ExtTCPConnect = ExtSpec{Name: "tcpconnect", V: 0} - - // ExtTLSHandshake is the version of df-006-tlshandshake.md - ExtTLSHandshake = ExtSpec{Name: "tlshandshake", V: 0} - - // ExtTunnel is the version of df-009-tunnel.md - ExtTunnel = ExtSpec{Name: "tunnel", V: 0} +// Compatibility types +type ( + ExtSpec = model.ArchivalExtSpec + TCPConnectEntry = model.ArchivalTCPConnectResult + TCPConnectStatus = model.ArchivalTCPConnectStatus + MaybeBinaryValue = model.ArchivalMaybeBinaryData + DNSQueryEntry = model.ArchivalDNSLookupResult + DNSAnswerEntry = model.ArchivalDNSAnswer + TLSHandshake = model.ArchivalTLSOrQUICHandshakeResult + HTTPBody = model.ArchivalHTTPBody + HTTPHeader = model.ArchivalHTTPHeader + RequestEntry = model.ArchivalHTTPRequestResult + HTTPRequest = model.ArchivalHTTPRequest + HTTPResponse = model.ArchivalHTTPResponse + NetworkEvent = model.ArchivalNetworkEvent ) -// TCPConnectStatus contains the TCP connect status. -// -// The Blocked field breaks the separation between measurement and analysis -// we have been enforcing for quite some time now. It is a legacy from the -// Web Connectivity experiment and it should be here because of that. -type TCPConnectStatus struct { - Blocked *bool `json:"blocked,omitempty"` // Web Connectivity only - Failure *string `json:"failure"` - Success bool `json:"success"` -} - -// TCPConnectEntry contains one of the entries that are part -// of the "tcp_connect" key of a OONI report. -type TCPConnectEntry struct { - IP string `json:"ip"` - Port int `json:"port"` - Status TCPConnectStatus `json:"status"` - T float64 `json:"t"` -} +// Compatibility variables +var ( + ExtDNS = model.ArchivalExtDNS + ExtNetevents = model.ArchivalExtNetevents + ExtHTTP = model.ArchivalExtHTTP + ExtTCPConnect = model.ArchivalExtTCPConnect + ExtTLSHandshake = model.ArchivalExtTLSHandshake + ExtTunnel = model.ArchivalExtTunnel +) // NewTCPConnectList creates a new TCPConnectList func NewTCPConnectList(begin time.Time, events []trace.Event) []TCPConnectEntry { @@ -134,161 +104,6 @@ func NewFailedOperation(err error) *string { return &s } -// HTTPTor contains Tor information -type HTTPTor struct { - ExitIP *string `json:"exit_ip"` - ExitName *string `json:"exit_name"` - IsTor bool `json:"is_tor"` -} - -// MaybeBinaryValue is a possibly binary string. We use this helper class -// to define a custom JSON encoder that allows us to choose the proper -// representation depending on whether the Value field is valid UTF-8 or not. -type MaybeBinaryValue struct { - Value string -} - -// MarshalJSON marshals a string-like to JSON following the OONI spec that -// says that UTF-8 content is represened as string and non-UTF-8 content is -// instead represented using `{"format":"base64","data":"..."}`. -func (hb MaybeBinaryValue) MarshalJSON() ([]byte, error) { - if utf8.ValidString(hb.Value) { - return json.Marshal(hb.Value) - } - er := make(map[string]string) - er["format"] = "base64" - er["data"] = base64.StdEncoding.EncodeToString([]byte(hb.Value)) - return json.Marshal(er) -} - -// UnmarshalJSON is the opposite of MarshalJSON. -func (hb *MaybeBinaryValue) UnmarshalJSON(d []byte) error { - if err := json.Unmarshal(d, &hb.Value); err == nil { - return nil - } - er := make(map[string]string) - if err := json.Unmarshal(d, &er); err != nil { - return err - } - if v, ok := er["format"]; !ok || v != "base64" { - return errors.New("missing or invalid format field") - } - if _, ok := er["data"]; !ok { - return errors.New("missing data field") - } - b64, err := base64.StdEncoding.DecodeString(er["data"]) - if err != nil { - return err - } - hb.Value = string(b64) - return nil -} - -// HTTPBody is an HTTP body. As an implementation note, this type must be -// an alias for the MaybeBinaryValue type, otherwise the specific serialisation -// mechanism implemented by MaybeBinaryValue is not working. -type HTTPBody = MaybeBinaryValue - -// HTTPHeader is a single HTTP header. -type HTTPHeader struct { - Key string - Value MaybeBinaryValue -} - -// MarshalJSON marshals a single HTTP header to a tuple where the first -// element is a string and the second element is maybe-binary data. -func (hh HTTPHeader) MarshalJSON() ([]byte, error) { - if utf8.ValidString(hh.Value.Value) { - return json.Marshal([]string{hh.Key, hh.Value.Value}) - } - value := make(map[string]string) - value["format"] = "base64" - value["data"] = base64.StdEncoding.EncodeToString([]byte(hh.Value.Value)) - return json.Marshal([]interface{}{hh.Key, value}) -} - -// UnmarshalJSON is the opposite of MarshalJSON. -func (hh *HTTPHeader) UnmarshalJSON(d []byte) error { - var pair []interface{} - if err := json.Unmarshal(d, &pair); err != nil { - return err - } - if len(pair) != 2 { - return errors.New("unexpected pair length") - } - key, ok := pair[0].(string) - if !ok { - return errors.New("the key is not a string") - } - value, ok := pair[1].(string) - if !ok { - mapvalue, ok := pair[1].(map[string]interface{}) - if !ok { - return errors.New("the value is neither a string nor a map[string]interface{}") - } - if _, ok := mapvalue["format"]; !ok { - return errors.New("missing format") - } - if v, ok := mapvalue["format"].(string); !ok || v != "base64" { - return errors.New("invalid format") - } - if _, ok := mapvalue["data"]; !ok { - return errors.New("missing data field") - } - v, ok := mapvalue["data"].(string) - if !ok { - return errors.New("the data field is not a string") - } - b64, err := base64.StdEncoding.DecodeString(v) - if err != nil { - return err - } - value = string(b64) - } - hh.Key, hh.Value = key, MaybeBinaryValue{Value: value} - return nil -} - -// HTTPRequest contains an HTTP request. -// -// Headers are a map in Web Connectivity data format but -// we have added support for a list since January 2020. -type HTTPRequest struct { - Body HTTPBody `json:"body"` - BodyIsTruncated bool `json:"body_is_truncated"` - HeadersList []HTTPHeader `json:"headers_list"` - Headers map[string]MaybeBinaryValue `json:"headers"` - Method string `json:"method"` - Tor HTTPTor `json:"tor"` - Transport string `json:"x_transport"` - URL string `json:"url"` -} - -// HTTPResponse contains an HTTP response. -// -// Headers are a map in Web Connectivity data format but -// we have added support for a list since January 2020. -type HTTPResponse struct { - Body HTTPBody `json:"body"` - BodyIsTruncated bool `json:"body_is_truncated"` - Code int64 `json:"code"` - HeadersList []HTTPHeader `json:"headers_list"` - Headers map[string]MaybeBinaryValue `json:"headers"` - - // The following fields are not serialised but are useful to simplify - // analysing the measurements in telegram, whatsapp, etc. - Locations []string `json:"-"` -} - -// RequestEntry is one of the entries that are part of -// the "requests" key of a OONI report. -type RequestEntry struct { - Failure *string `json:"failure"` - Request HTTPRequest `json:"request"` - Response HTTPResponse `json:"response"` - T float64 `json:"t"` -} - func addheaders( source http.Header, destList *[]HTTPHeader, @@ -361,30 +176,6 @@ func newRequestList(begin time.Time, events []trace.Event) []RequestEntry { return out } -// DNSAnswerEntry is the answer to a DNS query -type DNSAnswerEntry struct { - ASN int64 `json:"asn,omitempty"` - ASOrgName string `json:"as_org_name,omitempty"` - AnswerType string `json:"answer_type"` - Hostname string `json:"hostname,omitempty"` - IPv4 string `json:"ipv4,omitempty"` - IPv6 string `json:"ipv6,omitempty"` - TTL *uint32 `json:"ttl"` -} - -// DNSQueryEntry is a DNS query with possibly an answer -type DNSQueryEntry struct { - Answers []DNSAnswerEntry `json:"answers"` - Engine string `json:"engine"` - Failure *string `json:"failure"` - Hostname string `json:"hostname"` - QueryType string `json:"query_type"` - ResolverHostname *string `json:"resolver_hostname"` - ResolverPort *string `json:"resolver_port"` - ResolverAddress string `json:"resolver_address"` - T float64 `json:"t"` -} - type dnsQueryType string // NewDNSQueriesList returns a list of DNS queries. @@ -454,19 +245,6 @@ func (qtype dnsQueryType) makequeryentry(begin time.Time, ev trace.Event) DNSQue } } -// NetworkEvent is a network event. It contains all the possible fields -// and most fields are optional. They are only added when it makes sense -// for them to be there _and_ we have data to show. -type NetworkEvent struct { - Address string `json:"address,omitempty"` - Failure *string `json:"failure"` - NumBytes int64 `json:"num_bytes,omitempty"` - Operation string `json:"operation"` - Proto string `json:"proto,omitempty"` - T float64 `json:"t"` - Tags []string `json:"tags,omitempty"` -} - // NewNetworkEventsList returns a list of DNS queries. func NewNetworkEventsList(begin time.Time, events []trace.Event) []NetworkEvent { var out []NetworkEvent @@ -528,19 +306,6 @@ func NewNetworkEventsList(begin time.Time, events []trace.Event) []NetworkEvent return out } -// TLSHandshake contains TLS handshake data -type TLSHandshake struct { - CipherSuite string `json:"cipher_suite"` - Failure *string `json:"failure"` - NegotiatedProtocol string `json:"negotiated_protocol"` - NoTLSVerify bool `json:"no_tls_verify"` - PeerCertificates []MaybeBinaryValue `json:"peer_certificates"` - ServerName string `json:"server_name"` - T float64 `json:"t"` - Tags []string `json:"tags"` - TLSVersion string `json:"tls_version"` -} - // NewTLSHandshakesList creates a new TLSHandshakesList func NewTLSHandshakesList(begin time.Time, events []trace.Event) []TLSHandshake { var out []TLSHandshake diff --git a/internal/engine/netx/archival/archival_test.go b/internal/engine/netx/archival/archival_test.go index 058da72..5a5e630 100644 --- a/internal/engine/netx/archival/archival_test.go +++ b/internal/engine/netx/archival/archival_test.go @@ -14,7 +14,6 @@ import ( "github.com/gorilla/websocket" "github.com/ooni/probe-cli/v3/internal/engine/netx/archival" "github.com/ooni/probe-cli/v3/internal/engine/netx/trace" - "github.com/ooni/probe-cli/v3/internal/model" "github.com/ooni/probe-cli/v3/internal/netxlite" ) @@ -566,352 +565,6 @@ func TestNewTLSHandshakesList(t *testing.T) { } } -func TestExtSpec_AddTo(t *testing.T) { - m := new(model.Measurement) - archival.ExtDNS.AddTo(m) - expected := map[string]int64{"dnst": 0} - if d := cmp.Diff(m.Extensions, expected); d != "" { - t.Fatal(d) - } -} - -var binaryInput = []uint8{ - 0x57, 0xe5, 0x79, 0xfb, 0xa6, 0xbb, 0x0d, 0xbc, 0xce, 0xbd, 0xa7, 0xa0, - 0xba, 0xa4, 0x78, 0x78, 0x12, 0x59, 0xee, 0x68, 0x39, 0xa4, 0x07, 0x98, - 0xc5, 0x3e, 0xbc, 0x55, 0xcb, 0xfe, 0x34, 0x3c, 0x7e, 0x1b, 0x5a, 0xb3, - 0x22, 0x9d, 0xc1, 0x2d, 0x6e, 0xca, 0x5b, 0xf1, 0x10, 0x25, 0x47, 0x1e, - 0x44, 0xe2, 0x2d, 0x60, 0x08, 0xea, 0xb0, 0x0a, 0xcc, 0x05, 0x48, 0xa0, - 0xf5, 0x78, 0x38, 0xf0, 0xdb, 0x3f, 0x9d, 0x9f, 0x25, 0x6f, 0x89, 0x00, - 0x96, 0x93, 0xaf, 0x43, 0xac, 0x4d, 0xc9, 0xac, 0x13, 0xdb, 0x22, 0xbe, - 0x7a, 0x7d, 0xd9, 0x24, 0xa2, 0x52, 0x69, 0xd8, 0x89, 0xc1, 0xd1, 0x57, - 0xaa, 0x04, 0x2b, 0xa2, 0xd8, 0xb1, 0x19, 0xf6, 0xd5, 0x11, 0x39, 0xbb, - 0x80, 0xcf, 0x86, 0xf9, 0x5f, 0x9d, 0x8c, 0xab, 0xf5, 0xc5, 0x74, 0x24, - 0x3a, 0xa2, 0xd4, 0x40, 0x4e, 0xd7, 0x10, 0x1f, -} - -var encodedBinaryInput = []byte(`{"data":"V+V5+6a7DbzOvaeguqR4eBJZ7mg5pAeYxT68Vcv+NDx+G1qzIp3BLW7KW/EQJUceROItYAjqsArMBUig9Xg48Ns/nZ8lb4kAlpOvQ6xNyawT2yK+en3ZJKJSadiJwdFXqgQrotixGfbVETm7gM+G+V+djKv1xXQkOqLUQE7XEB8=","format":"base64"}`) - -func TestMaybeBinaryValue_MarshalJSON(t *testing.T) { - type fields struct { - Value string - } - tests := []struct { - name string - fields fields - want []byte - wantErr bool - }{{ - name: "with string input", - fields: fields{ - Value: "antani", - }, - want: []byte(`"antani"`), - wantErr: false, - }, { - name: "with binary input", - fields: fields{ - Value: string(binaryInput), - }, - want: encodedBinaryInput, - wantErr: false, - }} - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - hb := archival.MaybeBinaryValue{ - Value: tt.fields.Value, - } - got, err := hb.MarshalJSON() - if (err != nil) != tt.wantErr { - t.Errorf("MaybeBinaryValue.MarshalJSON() error = %v, wantErr %v", err, tt.wantErr) - return - } - if !reflect.DeepEqual(got, tt.want) { - t.Error(cmp.Diff(got, tt.want)) - } - }) - } -} - -func TestMaybeBinaryValue_UnmarshalJSON(t *testing.T) { - type fields struct { - WantValue string - } - type args struct { - d []byte - } - tests := []struct { - name string - fields fields - args args - wantErr bool - }{{ - name: "with string input", - fields: fields{ - WantValue: "xo", - }, - args: args{d: []byte(`"xo"`)}, - wantErr: false, - }, { - name: "with nil input", - fields: fields{ - WantValue: "", - }, - args: args{d: nil}, - wantErr: true, - }, { - name: "with missing/invalid format", - fields: fields{ - WantValue: "", - }, - args: args{d: []byte(`{"format": "foo"}`)}, - wantErr: true, - }, { - name: "with missing data", - fields: fields{ - WantValue: "", - }, - args: args{d: []byte(`{"format": "base64"}`)}, - wantErr: true, - }, { - name: "with invalid base64 data", - fields: fields{ - WantValue: "", - }, - args: args{d: []byte(`{"format": "base64", "data": "x"}`)}, - wantErr: true, - }, { - name: "with valid base64 data", - fields: fields{ - WantValue: string(binaryInput), - }, - args: args{d: encodedBinaryInput}, - wantErr: false, - }} - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - hb := &archival.MaybeBinaryValue{} - if err := hb.UnmarshalJSON(tt.args.d); (err != nil) != tt.wantErr { - t.Errorf("MaybeBinaryValue.UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr) - } - if d := cmp.Diff(tt.fields.WantValue, hb.Value); d != "" { - t.Error(d) - } - }) - } -} - -func TestHTTPHeader_MarshalJSON(t *testing.T) { - type fields struct { - Key string - Value archival.MaybeBinaryValue - } - tests := []struct { - name string - fields fields - want []byte - wantErr bool - }{{ - name: "with string value", - fields: fields{ - Key: "Content-Type", - Value: archival.MaybeBinaryValue{ - Value: "text/plain", - }, - }, - want: []byte(`["Content-Type","text/plain"]`), - wantErr: false, - }, { - name: "with binary value", - fields: fields{ - Key: "Content-Type", - Value: archival.MaybeBinaryValue{ - Value: string(binaryInput), - }, - }, - want: []byte(`["Content-Type",` + string(encodedBinaryInput) + `]`), - wantErr: false, - }} - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - hh := archival.HTTPHeader{ - Key: tt.fields.Key, - Value: tt.fields.Value, - } - got, err := hh.MarshalJSON() - if (err != nil) != tt.wantErr { - t.Errorf("HTTPHeader.MarshalJSON() error = %v, wantErr %v", err, tt.wantErr) - return - } - if !reflect.DeepEqual(got, tt.want) { - t.Error(cmp.Diff(got, tt.want)) - } - }) - } -} - -func TestHTTPHeader_UnmarshalJSON(t *testing.T) { - type fields struct { - WantKey string - WantValue archival.MaybeBinaryValue - } - type args struct { - d []byte - } - tests := []struct { - name string - fields fields - args args - wantErr bool - }{{ - name: "with invalid input", - fields: fields{ - WantKey: "", - WantValue: archival.MaybeBinaryValue{ - Value: "", - }, - }, - args: args{ - d: []byte(`{}`), - }, - wantErr: true, - }, { - name: "with unexpected number of items", - fields: fields{ - WantKey: "", - WantValue: archival.MaybeBinaryValue{ - Value: "", - }, - }, - args: args{ - d: []byte(`[]`), - }, - wantErr: true, - }, { - name: "with first item not being a string", - fields: fields{ - WantKey: "", - WantValue: archival.MaybeBinaryValue{ - Value: "", - }, - }, - args: args{ - d: []byte(`[0,0]`), - }, - wantErr: true, - }, { - name: "with both items being a string", - fields: fields{ - WantKey: "x", - WantValue: archival.MaybeBinaryValue{ - Value: "y", - }, - }, - args: args{ - d: []byte(`["x","y"]`), - }, - wantErr: false, - }, { - name: "with second item not being a map[string]interface{}", - fields: fields{ - WantKey: "", - WantValue: archival.MaybeBinaryValue{ - Value: "", - }, - }, - args: args{ - d: []byte(`["x",[]]`), - }, - wantErr: true, - }, { - name: "with missing format key in second item", - fields: fields{ - WantKey: "", - WantValue: archival.MaybeBinaryValue{ - Value: "", - }, - }, - args: args{ - d: []byte(`["x",{}]`), - }, - wantErr: true, - }, { - name: "with format value not being base64", - fields: fields{ - WantKey: "", - WantValue: archival.MaybeBinaryValue{ - Value: "", - }, - }, - args: args{ - d: []byte(`["x",{"format":1}]`), - }, - wantErr: true, - }, { - name: "with missing data field", - fields: fields{ - WantKey: "", - WantValue: archival.MaybeBinaryValue{ - Value: "", - }, - }, - args: args{ - d: []byte(`["x",{"format":"base64"}]`), - }, - wantErr: true, - }, { - name: "with data not being a string", - fields: fields{ - WantKey: "", - WantValue: archival.MaybeBinaryValue{ - Value: "", - }, - }, - args: args{ - d: []byte(`["x",{"format":"base64","data":1}]`), - }, - wantErr: true, - }, { - name: "with data not being base64", - fields: fields{ - WantKey: "", - WantValue: archival.MaybeBinaryValue{ - Value: "", - }, - }, - args: args{ - d: []byte(`["x",{"format":"base64","data":"xx"}]`), - }, - wantErr: true, - }, { - name: "with correctly encoded base64 data", - fields: fields{ - WantKey: "x", - WantValue: archival.MaybeBinaryValue{ - Value: string(binaryInput), - }, - }, - args: args{ - d: []byte(`["x",` + string(encodedBinaryInput) + `]`), - }, - wantErr: false, - }} - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - hh := &archival.HTTPHeader{} - if err := hh.UnmarshalJSON(tt.args.d); (err != nil) != tt.wantErr { - t.Errorf("HTTPHeader.UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr) - } - expect := &archival.HTTPHeader{ - Key: tt.fields.WantKey, - Value: tt.fields.WantValue, - } - if d := cmp.Diff(hh, expect); d != "" { - t.Error(d) - } - }) - } -} - func TestNewFailure(t *testing.T) { type args struct { err error diff --git a/internal/measurex/resolver.go b/internal/measurex/resolver.go index 0ce0b31..7dacf47 100644 --- a/internal/measurex/resolver.go +++ b/internal/measurex/resolver.go @@ -26,7 +26,7 @@ func WrapResolver(begin time.Time, db WritableDB, r model.Resolver) model.Resolv } // NewResolverSystem creates a system resolver and then wraps -// it using the WrapResolver function/ +// it using the WrapResolver function. func (mx *Measurer) NewResolverSystem(db WritableDB, logger model.Logger) model.Resolver { return mx.WrapResolver(db, netxlite.NewResolverStdlib(logger)) } diff --git a/internal/model/archival.go b/internal/model/archival.go new file mode 100644 index 0000000..b140433 --- /dev/null +++ b/internal/model/archival.go @@ -0,0 +1,311 @@ +package model + +import ( + "encoding/base64" + "encoding/json" + "errors" + "unicode/utf8" +) + +// +// Archival format for individual measurement results +// such as TCP connect, TLS handshake, DNS lookup. +// +// These types end up inside the TestKeys field of an +// OONI measurement (see measurement.go). +// +// See https://github.com/ooni/spec/tree/master/data-formats. +// + +// +// Data format extension specification +// + +// ArchivalExtSpec describes a data format extension +type ArchivalExtSpec struct { + Name string // extension name + V int64 // extension version +} + +// AddTo adds the current ExtSpec to the specified measurement +func (spec ArchivalExtSpec) AddTo(m *Measurement) { + if m.Extensions == nil { + m.Extensions = make(map[string]int64) + } + m.Extensions[spec.Name] = spec.V +} + +var ( + // ArchivalExtDNS is the version of df-002-dnst.md + ArchivalExtDNS = ArchivalExtSpec{Name: "dnst", V: 0} + + // ArchivalExtNetevents is the version of df-008-netevents.md + ArchivalExtNetevents = ArchivalExtSpec{Name: "netevents", V: 0} + + // ArchivalExtHTTP is the version of df-001-httpt.md + ArchivalExtHTTP = ArchivalExtSpec{Name: "httpt", V: 0} + + // ArchivalExtTCPConnect is the version of df-005-tcpconnect.md + ArchivalExtTCPConnect = ArchivalExtSpec{Name: "tcpconnect", V: 0} + + // ArchivalExtTLSHandshake is the version of df-006-tlshandshake.md + ArchivalExtTLSHandshake = ArchivalExtSpec{Name: "tlshandshake", V: 0} + + // ArchivalExtTunnel is the version of df-009-tunnel.md + ArchivalExtTunnel = ArchivalExtSpec{Name: "tunnel", V: 0} +) + +// +// Base types +// + +// ArchivalMaybeBinaryData is a possibly binary string. We use this helper class +// to define a custom JSON encoder that allows us to choose the proper +// representation depending on whether the Value field is valid UTF-8 or not. +// +// See https://github.com/ooni/spec/blob/master/data-formats/df-001-httpt.md#maybebinarydata +type ArchivalMaybeBinaryData struct { + Value string +} + +// MarshalJSON marshals a string-like to JSON following the OONI spec that +// says that UTF-8 content is represented as string and non-UTF-8 content is +// instead represented using `{"format":"base64","data":"..."}`. +func (hb ArchivalMaybeBinaryData) MarshalJSON() ([]byte, error) { + if utf8.ValidString(hb.Value) { + return json.Marshal(hb.Value) + } + er := make(map[string]string) + er["format"] = "base64" + er["data"] = base64.StdEncoding.EncodeToString([]byte(hb.Value)) + return json.Marshal(er) +} + +// UnmarshalJSON is the opposite of MarshalJSON. +func (hb *ArchivalMaybeBinaryData) UnmarshalJSON(d []byte) error { + if err := json.Unmarshal(d, &hb.Value); err == nil { + return nil + } + er := make(map[string]string) + if err := json.Unmarshal(d, &er); err != nil { + return err + } + if v, ok := er["format"]; !ok || v != "base64" { + return errors.New("missing or invalid format field") + } + if _, ok := er["data"]; !ok { + return errors.New("missing data field") + } + b64, err := base64.StdEncoding.DecodeString(er["data"]) + if err != nil { + return err + } + hb.Value = string(b64) + return nil +} + +// +// DNS lookup +// + +// ArchivalDNSLookupResult is the result of a DNS lookup. +// +// See https://github.com/ooni/spec/blob/master/data-formats/df-002-dnst.md. +type ArchivalDNSLookupResult struct { + Answers []ArchivalDNSAnswer `json:"answers"` + Engine string `json:"engine"` + Failure *string `json:"failure"` + Hostname string `json:"hostname"` + QueryType string `json:"query_type"` + ResolverHostname *string `json:"resolver_hostname"` + ResolverPort *string `json:"resolver_port"` + ResolverAddress string `json:"resolver_address"` + T float64 `json:"t"` +} + +// ArchivalDNSAnswer is a DNS answer. +type ArchivalDNSAnswer struct { + ASN int64 `json:"asn,omitempty"` + ASOrgName string `json:"as_org_name,omitempty"` + AnswerType string `json:"answer_type"` + Hostname string `json:"hostname,omitempty"` + IPv4 string `json:"ipv4,omitempty"` + IPv6 string `json:"ipv6,omitempty"` + TTL *uint32 `json:"ttl"` +} + +// +// TCP connect +// + +// ArchivalTCPConnectResult contains the result of a TCP connect. +// +// See https://github.com/ooni/spec/blob/master/data-formats/df-005-tcpconnect.md. +type ArchivalTCPConnectResult struct { + IP string `json:"ip"` + Port int `json:"port"` + Status ArchivalTCPConnectStatus `json:"status"` + T float64 `json:"t"` +} + +// ArchivalTCPConnectStatus is the status of ArchivalTCPConnectResult. +type ArchivalTCPConnectStatus struct { + Blocked *bool `json:"blocked,omitempty"` + Failure *string `json:"failure"` + Success bool `json:"success"` +} + +// +// TLS or QUIC handshake +// + +// ArchivalTLSOrQUICHandshakeResult is the result of a TLS or QUIC handshake. +// +// See https://github.com/ooni/spec/blob/master/data-formats/df-006-tlshandshake.md +type ArchivalTLSOrQUICHandshakeResult struct { + CipherSuite string `json:"cipher_suite"` + Failure *string `json:"failure"` + NegotiatedProtocol string `json:"negotiated_protocol"` + NoTLSVerify bool `json:"no_tls_verify"` + PeerCertificates []ArchivalMaybeBinaryData `json:"peer_certificates"` + ServerName string `json:"server_name"` + T float64 `json:"t"` + Tags []string `json:"tags"` + TLSVersion string `json:"tls_version"` +} + +// +// HTTP +// + +// ArchivalHTTPRequestResult is the result of sending an HTTP request. +// +// See https://github.com/ooni/spec/blob/master/data-formats/df-001-httpt.md. +type ArchivalHTTPRequestResult struct { + Failure *string `json:"failure"` + Request ArchivalHTTPRequest `json:"request"` + Response ArchivalHTTPResponse `json:"response"` + T float64 `json:"t"` +} + +// ArchivalHTTPRequest contains an HTTP request. +// +// Headers are a map in Web Connectivity data format but +// we have added support for a list since January 2020. +type ArchivalHTTPRequest struct { + Body ArchivalHTTPBody `json:"body"` + BodyIsTruncated bool `json:"body_is_truncated"` + HeadersList []ArchivalHTTPHeader `json:"headers_list"` + Headers map[string]ArchivalMaybeBinaryData `json:"headers"` + Method string `json:"method"` + Tor ArchivalHTTPTor `json:"tor"` + Transport string `json:"x_transport"` + URL string `json:"url"` +} + +// ArchivalHTTPResponse contains an HTTP response. +// +// Headers are a map in Web Connectivity data format but +// we have added support for a list since January 2020. +type ArchivalHTTPResponse struct { + Body ArchivalHTTPBody `json:"body"` + BodyIsTruncated bool `json:"body_is_truncated"` + Code int64 `json:"code"` + HeadersList []ArchivalHTTPHeader `json:"headers_list"` + Headers map[string]ArchivalMaybeBinaryData `json:"headers"` + + // The following fields are not serialised but are useful to simplify + // analysing the measurements in telegram, whatsapp, etc. + Locations []string `json:"-"` +} + +// ArchivalHTTPBody is an HTTP body. As an implementation note, this type must +// be an alias for the MaybeBinaryValue type, otherwise the specific serialisation +// mechanism implemented by MaybeBinaryValue is not working. +type ArchivalHTTPBody = ArchivalMaybeBinaryData + +// ArchivalHTTPHeader is a single HTTP header. +type ArchivalHTTPHeader struct { + Key string + Value ArchivalMaybeBinaryData +} + +// MarshalJSON marshals a single HTTP header to a tuple where the first +// element is a string and the second element is maybe-binary data. +func (hh ArchivalHTTPHeader) MarshalJSON() ([]byte, error) { + if utf8.ValidString(hh.Value.Value) { + return json.Marshal([]string{hh.Key, hh.Value.Value}) + } + value := make(map[string]string) + value["format"] = "base64" + value["data"] = base64.StdEncoding.EncodeToString([]byte(hh.Value.Value)) + return json.Marshal([]interface{}{hh.Key, value}) +} + +// UnmarshalJSON is the opposite of MarshalJSON. +func (hh *ArchivalHTTPHeader) UnmarshalJSON(d []byte) error { + var pair []interface{} + if err := json.Unmarshal(d, &pair); err != nil { + return err + } + if len(pair) != 2 { + return errors.New("unexpected pair length") + } + key, ok := pair[0].(string) + if !ok { + return errors.New("the key is not a string") + } + value, ok := pair[1].(string) + if !ok { + mapvalue, ok := pair[1].(map[string]interface{}) + if !ok { + return errors.New("the value is neither a string nor a map[string]interface{}") + } + if _, ok := mapvalue["format"]; !ok { + return errors.New("missing format") + } + if v, ok := mapvalue["format"].(string); !ok || v != "base64" { + return errors.New("invalid format") + } + if _, ok := mapvalue["data"]; !ok { + return errors.New("missing data field") + } + v, ok := mapvalue["data"].(string) + if !ok { + return errors.New("the data field is not a string") + } + b64, err := base64.StdEncoding.DecodeString(v) + if err != nil { + return err + } + value = string(b64) + } + hh.Key, hh.Value = key, ArchivalMaybeBinaryData{Value: value} + return nil +} + +// ArchivalHTTPTor contains Tor information. +type ArchivalHTTPTor struct { + ExitIP *string `json:"exit_ip"` + ExitName *string `json:"exit_name"` + IsTor bool `json:"is_tor"` +} + +// +// NetworkEvent +// + +// ArchivalNetworkEvent is a network event. It contains all the possible fields +// and most fields are optional. They are only added when it makes sense +// for them to be there _and_ we have data to show. +// +// See https://github.com/ooni/spec/blob/master/data-formats/df-008-netevents.md. +type ArchivalNetworkEvent struct { + Address string `json:"address,omitempty"` + Failure *string `json:"failure"` + NumBytes int64 `json:"num_bytes,omitempty"` + Operation string `json:"operation"` + Proto string `json:"proto,omitempty"` + T float64 `json:"t"` + Tags []string `json:"tags,omitempty"` +} diff --git a/internal/model/archival_test.go b/internal/model/archival_test.go new file mode 100644 index 0000000..eec139a --- /dev/null +++ b/internal/model/archival_test.go @@ -0,0 +1,310 @@ +package model + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/ooni/probe-cli/v3/internal/fakefill" +) + +func TestArchivalExtSpec(t *testing.T) { + t.Run("AddTo", func(t *testing.T) { + m := &Measurement{} + ArchivalExtDNS.AddTo(m) + expected := map[string]int64{"dnst": 0} + if d := cmp.Diff(m.Extensions, expected); d != "" { + t.Fatal(d) + } + }) +} + +// we use this value below to test we can handle binary data +var archivalBinaryInput = []uint8{ + 0x57, 0xe5, 0x79, 0xfb, 0xa6, 0xbb, 0x0d, 0xbc, 0xce, 0xbd, 0xa7, 0xa0, + 0xba, 0xa4, 0x78, 0x78, 0x12, 0x59, 0xee, 0x68, 0x39, 0xa4, 0x07, 0x98, + 0xc5, 0x3e, 0xbc, 0x55, 0xcb, 0xfe, 0x34, 0x3c, 0x7e, 0x1b, 0x5a, 0xb3, + 0x22, 0x9d, 0xc1, 0x2d, 0x6e, 0xca, 0x5b, 0xf1, 0x10, 0x25, 0x47, 0x1e, + 0x44, 0xe2, 0x2d, 0x60, 0x08, 0xea, 0xb0, 0x0a, 0xcc, 0x05, 0x48, 0xa0, + 0xf5, 0x78, 0x38, 0xf0, 0xdb, 0x3f, 0x9d, 0x9f, 0x25, 0x6f, 0x89, 0x00, + 0x96, 0x93, 0xaf, 0x43, 0xac, 0x4d, 0xc9, 0xac, 0x13, 0xdb, 0x22, 0xbe, + 0x7a, 0x7d, 0xd9, 0x24, 0xa2, 0x52, 0x69, 0xd8, 0x89, 0xc1, 0xd1, 0x57, + 0xaa, 0x04, 0x2b, 0xa2, 0xd8, 0xb1, 0x19, 0xf6, 0xd5, 0x11, 0x39, 0xbb, + 0x80, 0xcf, 0x86, 0xf9, 0x5f, 0x9d, 0x8c, 0xab, 0xf5, 0xc5, 0x74, 0x24, + 0x3a, 0xa2, 0xd4, 0x40, 0x4e, 0xd7, 0x10, 0x1f, +} + +// we use this value below to test we can handle binary data +var archivalEncodedBinaryInput = []byte(`{"data":"V+V5+6a7DbzOvaeguqR4eBJZ7mg5pAeYxT68Vcv+NDx+G1qzIp3BLW7KW/EQJUceROItYAjqsArMBUig9Xg48Ns/nZ8lb4kAlpOvQ6xNyawT2yK+en3ZJKJSadiJwdFXqgQrotixGfbVETm7gM+G+V+djKv1xXQkOqLUQE7XEB8=","format":"base64"}`) + +func TestMaybeBinaryValue(t *testing.T) { + t.Run("MarshalJSON", func(t *testing.T) { + tests := []struct { + name string // test name + input string // value to marshal + want []byte // expected result + wantErr bool // whether we expect an error + }{{ + name: "with string input", + input: "antani", + want: []byte(`"antani"`), + wantErr: false, + }, { + name: "with binary input", + input: string(archivalBinaryInput), + want: archivalEncodedBinaryInput, + wantErr: false, + }} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + hb := ArchivalMaybeBinaryData{ + Value: tt.input, + } + got, err := hb.MarshalJSON() + if (err != nil) != tt.wantErr { + t.Fatalf("ArchivalMaybeBinaryData.MarshalJSON() error = %v, wantErr %v", err, tt.wantErr) + } + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Fatal(diff) + } + }) + } + }) + + t.Run("UnmarshalJSON", func(t *testing.T) { + tests := []struct { + name string // test name + input []byte // value to unmarshal + want string // expected result + wantErr bool // whether we want an error + }{{ + name: "with string input", + input: []byte(`"xo"`), + want: "xo", + wantErr: false, + }, { + name: "with nil input", + input: nil, + want: "", + wantErr: true, + }, { + name: "with missing/invalid format", + input: []byte(`{"format": "foo"}`), + want: "", + wantErr: true, + }, { + name: "with missing data", + input: []byte(`{"format": "base64"}`), + want: "", + wantErr: true, + }, { + name: "with invalid base64 data", + input: []byte(`{"format": "base64", "data": "x"}`), + want: "", + wantErr: true, + }, { + name: "with valid base64 data", + input: archivalEncodedBinaryInput, + want: string(archivalBinaryInput), + wantErr: false, + }} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + hb := &ArchivalMaybeBinaryData{} + if err := hb.UnmarshalJSON(tt.input); (err != nil) != tt.wantErr { + t.Fatalf("ArchivalMaybeBinaryData.UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr) + } + if d := cmp.Diff(tt.want, hb.Value); d != "" { + t.Fatal(d) + } + }) + } + }) +} + +func TestHTTPHeader(t *testing.T) { + t.Run("MarshalJSON", func(t *testing.T) { + tests := []struct { + name string // test name + input ArchivalHTTPHeader // what to marshal + want []byte // expected data + wantErr bool // whether we expect an error + }{{ + name: "with string value", + input: ArchivalHTTPHeader{ + Key: "Content-Type", + Value: ArchivalMaybeBinaryData{ + Value: "text/plain", + }, + }, + want: []byte(`["Content-Type","text/plain"]`), + wantErr: false, + }, { + name: "with binary value", + input: ArchivalHTTPHeader{ + Key: "Content-Type", + Value: ArchivalMaybeBinaryData{ + Value: string(archivalBinaryInput), + }, + }, + want: []byte(`["Content-Type",` + string(archivalEncodedBinaryInput) + `]`), + wantErr: false, + }} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := tt.input.MarshalJSON() + if (err != nil) != tt.wantErr { + t.Fatalf("ArchivalHTTPHeader.MarshalJSON() error = %v, wantErr %v", err, tt.wantErr) + } + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Fatal(diff) + } + }) + } + }) + + t.Run("UnmarshalJSON", func(t *testing.T) { + tests := []struct { + name string // test name + input []byte // input for the test + want ArchivalHTTPHeader // expected output + wantErr bool // whether we want an error + }{{ + name: "with invalid input", + input: []byte(`{}`), + want: ArchivalHTTPHeader{ + Key: "", + Value: ArchivalMaybeBinaryData{Value: ""}, + }, + wantErr: true, + }, { + name: "with unexpected number of items", + input: []byte(`[]`), + want: ArchivalHTTPHeader{ + Key: "", + Value: ArchivalMaybeBinaryData{Value: ""}, + }, + wantErr: true, + }, { + name: "with first item not being a string", + input: []byte(`[0,0]`), + want: ArchivalHTTPHeader{ + Key: "", + Value: ArchivalMaybeBinaryData{Value: ""}, + }, + wantErr: true, + }, { + name: "with both items being a string", + input: []byte(`["x","y"]`), + want: ArchivalHTTPHeader{ + Key: "x", + Value: ArchivalMaybeBinaryData{ + Value: "y", + }, + }, + wantErr: false, + }, { + name: "with second item not being a map[string]interface{}", + input: []byte(`["x",[]]`), + want: ArchivalHTTPHeader{ + Key: "", + Value: ArchivalMaybeBinaryData{ + Value: "", + }, + }, + wantErr: true, + }, { + name: "with missing format key in second item", + input: []byte(`["x",{}]`), + want: ArchivalHTTPHeader{ + Key: "", + Value: ArchivalMaybeBinaryData{ + Value: "", + }, + }, + wantErr: true, + }, { + name: "with format value not being base64", + input: []byte(`["x",{"format":1}]`), + want: ArchivalHTTPHeader{ + Key: "", + Value: ArchivalMaybeBinaryData{ + Value: "", + }, + }, + wantErr: true, + }, { + name: "with missing data field", + input: []byte(`["x",{"format":"base64"}]`), + want: ArchivalHTTPHeader{ + Key: "", + Value: ArchivalMaybeBinaryData{ + Value: "", + }, + }, + wantErr: true, + }, { + name: "with data not being a string", + input: []byte(`["x",{"format":"base64","data":1}]`), + want: ArchivalHTTPHeader{ + Key: "", + Value: ArchivalMaybeBinaryData{ + Value: "", + }, + }, + wantErr: true, + }, { + name: "with data not being base64", + input: []byte(`["x",{"format":"base64","data":"xx"}]`), + want: ArchivalHTTPHeader{ + Key: "", + Value: ArchivalMaybeBinaryData{ + Value: "", + }, + }, + wantErr: true, + }, { + name: "with correctly encoded base64 data", + input: []byte(`["x",` + string(archivalEncodedBinaryInput) + `]`), + want: ArchivalHTTPHeader{ + Key: "x", + Value: ArchivalMaybeBinaryData{ + Value: string(archivalBinaryInput), + }, + }, + wantErr: false, + }} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + hh := &ArchivalHTTPHeader{} + if err := hh.UnmarshalJSON(tt.input); (err != nil) != tt.wantErr { + t.Fatalf("ArchivalHTTPHeader.UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr) + } + if diff := cmp.Diff(&tt.want, hh); diff != "" { + t.Error(diff) + } + }) + } + }) +} + +func TestHTTPBody(t *testing.T) { + // Implementation note: the content is always going to be the same + // even if we modify the implementation to become: + // + // type ArchivalHTTPBody ArchivalMaybeBinaryData + // + // instead of the correct: + // + // type ArchivalHTTPBody = ArchivalMaybeBinaryData + // + // However, cmp.Diff also takes into account the data type. Hence, if + // we make a mistake and apply the above change (which will in turn + // break correct JSON serialization), the this test will fail. + var body ArchivalHTTPBody + ff := &fakefill.Filler{} + ff.Fill(&body) + data := ArchivalMaybeBinaryData(body) + if diff := cmp.Diff(body, data); diff != "" { + t.Fatal(diff) + } +}