From 730373cc7576edd7cb215f9db55881ff0a75e63f Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 10 Jan 2022 11:25:52 +0100 Subject: [PATCH] refactor: move i/netx/archival structs to i/model (#659) We recently started moving core data structures inside of the internal/model package as detailed in https://github.com/ooni/probe/issues/1885. The chief reason to do that is to have a set of fundamental shared data types to help us rationalize the codebase. This specific diff moves internal/netx/archival's core data types inside the internal/model package. While there, it also refactors the existing tests to improve their quality. Additionally, we also added an extra test to ensure `ArchivalHTTPBody` is an alias for `ArchivalMaybeBinaryData`, which is required to ensure the custom JSON serialization process works for it. We're doing that because both internal/netx/archival and internal/measurex define their own archival data structures. We developed measurex using its own structures because it allowed to iterate more quickly. Now that we have sketched out measurex, the time has come to consolidate. My overall aim is to spend a few more hours this week on engineering measurex. This work is preliminary work before we finish up both measurex and websteps. We described this cleanup in https://github.com/ooni/probe/issues/1957. --- internal/engine/netx/archival/archival.go | 283 ++------------ .../engine/netx/archival/archival_test.go | 347 ------------------ internal/measurex/resolver.go | 2 +- internal/model/archival.go | 311 ++++++++++++++++ internal/model/archival_test.go | 310 ++++++++++++++++ 5 files changed, 646 insertions(+), 607 deletions(-) create mode 100644 internal/model/archival.go create mode 100644 internal/model/archival_test.go diff --git a/internal/engine/netx/archival/archival.go b/internal/engine/netx/archival/archival.go index 3655b5e..c2aa62a 100644 --- a/internal/engine/netx/archival/archival.go +++ b/internal/engine/netx/archival/archival.go @@ -5,8 +5,6 @@ package archival import ( "crypto/x509" - "encoding/base64" - "encoding/json" "errors" "net" "net/http" @@ -14,7 +12,6 @@ import ( "strconv" "strings" "time" - "unicode/utf8" "github.com/ooni/probe-cli/v3/internal/engine/geolocate" "github.com/ooni/probe-cli/v3/internal/engine/netx/trace" @@ -22,59 +19,32 @@ import ( "github.com/ooni/probe-cli/v3/internal/netxlite" ) -// ExtSpec describes a data format extension -type ExtSpec struct { - Name string // extension name - V int64 // extension version -} - -// AddTo adds the current ExtSpec to the specified measurement -func (spec ExtSpec) AddTo(m *model.Measurement) { - if m.Extensions == nil { - m.Extensions = make(map[string]int64) - } - m.Extensions[spec.Name] = spec.V -} - -var ( - // ExtDNS is the version of df-002-dnst.md - ExtDNS = ExtSpec{Name: "dnst", V: 0} - - // ExtNetevents is the version of df-008-netevents.md - ExtNetevents = ExtSpec{Name: "netevents", V: 0} - - // ExtHTTP is the version of df-001-httpt.md - ExtHTTP = ExtSpec{Name: "httpt", V: 0} - - // ExtTCPConnect is the version of df-005-tcpconnect.md - ExtTCPConnect = ExtSpec{Name: "tcpconnect", V: 0} - - // ExtTLSHandshake is the version of df-006-tlshandshake.md - ExtTLSHandshake = ExtSpec{Name: "tlshandshake", V: 0} - - // ExtTunnel is the version of df-009-tunnel.md - ExtTunnel = ExtSpec{Name: "tunnel", V: 0} +// Compatibility types +type ( + ExtSpec = model.ArchivalExtSpec + TCPConnectEntry = model.ArchivalTCPConnectResult + TCPConnectStatus = model.ArchivalTCPConnectStatus + MaybeBinaryValue = model.ArchivalMaybeBinaryData + DNSQueryEntry = model.ArchivalDNSLookupResult + DNSAnswerEntry = model.ArchivalDNSAnswer + TLSHandshake = model.ArchivalTLSOrQUICHandshakeResult + HTTPBody = model.ArchivalHTTPBody + HTTPHeader = model.ArchivalHTTPHeader + RequestEntry = model.ArchivalHTTPRequestResult + HTTPRequest = model.ArchivalHTTPRequest + HTTPResponse = model.ArchivalHTTPResponse + NetworkEvent = model.ArchivalNetworkEvent ) -// TCPConnectStatus contains the TCP connect status. -// -// The Blocked field breaks the separation between measurement and analysis -// we have been enforcing for quite some time now. It is a legacy from the -// Web Connectivity experiment and it should be here because of that. -type TCPConnectStatus struct { - Blocked *bool `json:"blocked,omitempty"` // Web Connectivity only - Failure *string `json:"failure"` - Success bool `json:"success"` -} - -// TCPConnectEntry contains one of the entries that are part -// of the "tcp_connect" key of a OONI report. -type TCPConnectEntry struct { - IP string `json:"ip"` - Port int `json:"port"` - Status TCPConnectStatus `json:"status"` - T float64 `json:"t"` -} +// Compatibility variables +var ( + ExtDNS = model.ArchivalExtDNS + ExtNetevents = model.ArchivalExtNetevents + ExtHTTP = model.ArchivalExtHTTP + ExtTCPConnect = model.ArchivalExtTCPConnect + ExtTLSHandshake = model.ArchivalExtTLSHandshake + ExtTunnel = model.ArchivalExtTunnel +) // NewTCPConnectList creates a new TCPConnectList func NewTCPConnectList(begin time.Time, events []trace.Event) []TCPConnectEntry { @@ -134,161 +104,6 @@ func NewFailedOperation(err error) *string { return &s } -// HTTPTor contains Tor information -type HTTPTor struct { - ExitIP *string `json:"exit_ip"` - ExitName *string `json:"exit_name"` - IsTor bool `json:"is_tor"` -} - -// MaybeBinaryValue is a possibly binary string. We use this helper class -// to define a custom JSON encoder that allows us to choose the proper -// representation depending on whether the Value field is valid UTF-8 or not. -type MaybeBinaryValue struct { - Value string -} - -// MarshalJSON marshals a string-like to JSON following the OONI spec that -// says that UTF-8 content is represened as string and non-UTF-8 content is -// instead represented using `{"format":"base64","data":"..."}`. -func (hb MaybeBinaryValue) MarshalJSON() ([]byte, error) { - if utf8.ValidString(hb.Value) { - return json.Marshal(hb.Value) - } - er := make(map[string]string) - er["format"] = "base64" - er["data"] = base64.StdEncoding.EncodeToString([]byte(hb.Value)) - return json.Marshal(er) -} - -// UnmarshalJSON is the opposite of MarshalJSON. -func (hb *MaybeBinaryValue) UnmarshalJSON(d []byte) error { - if err := json.Unmarshal(d, &hb.Value); err == nil { - return nil - } - er := make(map[string]string) - if err := json.Unmarshal(d, &er); err != nil { - return err - } - if v, ok := er["format"]; !ok || v != "base64" { - return errors.New("missing or invalid format field") - } - if _, ok := er["data"]; !ok { - return errors.New("missing data field") - } - b64, err := base64.StdEncoding.DecodeString(er["data"]) - if err != nil { - return err - } - hb.Value = string(b64) - return nil -} - -// HTTPBody is an HTTP body. As an implementation note, this type must be -// an alias for the MaybeBinaryValue type, otherwise the specific serialisation -// mechanism implemented by MaybeBinaryValue is not working. -type HTTPBody = MaybeBinaryValue - -// HTTPHeader is a single HTTP header. -type HTTPHeader struct { - Key string - Value MaybeBinaryValue -} - -// MarshalJSON marshals a single HTTP header to a tuple where the first -// element is a string and the second element is maybe-binary data. -func (hh HTTPHeader) MarshalJSON() ([]byte, error) { - if utf8.ValidString(hh.Value.Value) { - return json.Marshal([]string{hh.Key, hh.Value.Value}) - } - value := make(map[string]string) - value["format"] = "base64" - value["data"] = base64.StdEncoding.EncodeToString([]byte(hh.Value.Value)) - return json.Marshal([]interface{}{hh.Key, value}) -} - -// UnmarshalJSON is the opposite of MarshalJSON. -func (hh *HTTPHeader) UnmarshalJSON(d []byte) error { - var pair []interface{} - if err := json.Unmarshal(d, &pair); err != nil { - return err - } - if len(pair) != 2 { - return errors.New("unexpected pair length") - } - key, ok := pair[0].(string) - if !ok { - return errors.New("the key is not a string") - } - value, ok := pair[1].(string) - if !ok { - mapvalue, ok := pair[1].(map[string]interface{}) - if !ok { - return errors.New("the value is neither a string nor a map[string]interface{}") - } - if _, ok := mapvalue["format"]; !ok { - return errors.New("missing format") - } - if v, ok := mapvalue["format"].(string); !ok || v != "base64" { - return errors.New("invalid format") - } - if _, ok := mapvalue["data"]; !ok { - return errors.New("missing data field") - } - v, ok := mapvalue["data"].(string) - if !ok { - return errors.New("the data field is not a string") - } - b64, err := base64.StdEncoding.DecodeString(v) - if err != nil { - return err - } - value = string(b64) - } - hh.Key, hh.Value = key, MaybeBinaryValue{Value: value} - return nil -} - -// HTTPRequest contains an HTTP request. -// -// Headers are a map in Web Connectivity data format but -// we have added support for a list since January 2020. -type HTTPRequest struct { - Body HTTPBody `json:"body"` - BodyIsTruncated bool `json:"body_is_truncated"` - HeadersList []HTTPHeader `json:"headers_list"` - Headers map[string]MaybeBinaryValue `json:"headers"` - Method string `json:"method"` - Tor HTTPTor `json:"tor"` - Transport string `json:"x_transport"` - URL string `json:"url"` -} - -// HTTPResponse contains an HTTP response. -// -// Headers are a map in Web Connectivity data format but -// we have added support for a list since January 2020. -type HTTPResponse struct { - Body HTTPBody `json:"body"` - BodyIsTruncated bool `json:"body_is_truncated"` - Code int64 `json:"code"` - HeadersList []HTTPHeader `json:"headers_list"` - Headers map[string]MaybeBinaryValue `json:"headers"` - - // The following fields are not serialised but are useful to simplify - // analysing the measurements in telegram, whatsapp, etc. - Locations []string `json:"-"` -} - -// RequestEntry is one of the entries that are part of -// the "requests" key of a OONI report. -type RequestEntry struct { - Failure *string `json:"failure"` - Request HTTPRequest `json:"request"` - Response HTTPResponse `json:"response"` - T float64 `json:"t"` -} - func addheaders( source http.Header, destList *[]HTTPHeader, @@ -361,30 +176,6 @@ func newRequestList(begin time.Time, events []trace.Event) []RequestEntry { return out } -// DNSAnswerEntry is the answer to a DNS query -type DNSAnswerEntry struct { - ASN int64 `json:"asn,omitempty"` - ASOrgName string `json:"as_org_name,omitempty"` - AnswerType string `json:"answer_type"` - Hostname string `json:"hostname,omitempty"` - IPv4 string `json:"ipv4,omitempty"` - IPv6 string `json:"ipv6,omitempty"` - TTL *uint32 `json:"ttl"` -} - -// DNSQueryEntry is a DNS query with possibly an answer -type DNSQueryEntry struct { - Answers []DNSAnswerEntry `json:"answers"` - Engine string `json:"engine"` - Failure *string `json:"failure"` - Hostname string `json:"hostname"` - QueryType string `json:"query_type"` - ResolverHostname *string `json:"resolver_hostname"` - ResolverPort *string `json:"resolver_port"` - ResolverAddress string `json:"resolver_address"` - T float64 `json:"t"` -} - type dnsQueryType string // NewDNSQueriesList returns a list of DNS queries. @@ -454,19 +245,6 @@ func (qtype dnsQueryType) makequeryentry(begin time.Time, ev trace.Event) DNSQue } } -// NetworkEvent is a network event. It contains all the possible fields -// and most fields are optional. They are only added when it makes sense -// for them to be there _and_ we have data to show. -type NetworkEvent struct { - Address string `json:"address,omitempty"` - Failure *string `json:"failure"` - NumBytes int64 `json:"num_bytes,omitempty"` - Operation string `json:"operation"` - Proto string `json:"proto,omitempty"` - T float64 `json:"t"` - Tags []string `json:"tags,omitempty"` -} - // NewNetworkEventsList returns a list of DNS queries. func NewNetworkEventsList(begin time.Time, events []trace.Event) []NetworkEvent { var out []NetworkEvent @@ -528,19 +306,6 @@ func NewNetworkEventsList(begin time.Time, events []trace.Event) []NetworkEvent return out } -// TLSHandshake contains TLS handshake data -type TLSHandshake struct { - CipherSuite string `json:"cipher_suite"` - Failure *string `json:"failure"` - NegotiatedProtocol string `json:"negotiated_protocol"` - NoTLSVerify bool `json:"no_tls_verify"` - PeerCertificates []MaybeBinaryValue `json:"peer_certificates"` - ServerName string `json:"server_name"` - T float64 `json:"t"` - Tags []string `json:"tags"` - TLSVersion string `json:"tls_version"` -} - // NewTLSHandshakesList creates a new TLSHandshakesList func NewTLSHandshakesList(begin time.Time, events []trace.Event) []TLSHandshake { var out []TLSHandshake diff --git a/internal/engine/netx/archival/archival_test.go b/internal/engine/netx/archival/archival_test.go index 058da72..5a5e630 100644 --- a/internal/engine/netx/archival/archival_test.go +++ b/internal/engine/netx/archival/archival_test.go @@ -14,7 +14,6 @@ import ( "github.com/gorilla/websocket" "github.com/ooni/probe-cli/v3/internal/engine/netx/archival" "github.com/ooni/probe-cli/v3/internal/engine/netx/trace" - "github.com/ooni/probe-cli/v3/internal/model" "github.com/ooni/probe-cli/v3/internal/netxlite" ) @@ -566,352 +565,6 @@ func TestNewTLSHandshakesList(t *testing.T) { } } -func TestExtSpec_AddTo(t *testing.T) { - m := new(model.Measurement) - archival.ExtDNS.AddTo(m) - expected := map[string]int64{"dnst": 0} - if d := cmp.Diff(m.Extensions, expected); d != "" { - t.Fatal(d) - } -} - -var binaryInput = []uint8{ - 0x57, 0xe5, 0x79, 0xfb, 0xa6, 0xbb, 0x0d, 0xbc, 0xce, 0xbd, 0xa7, 0xa0, - 0xba, 0xa4, 0x78, 0x78, 0x12, 0x59, 0xee, 0x68, 0x39, 0xa4, 0x07, 0x98, - 0xc5, 0x3e, 0xbc, 0x55, 0xcb, 0xfe, 0x34, 0x3c, 0x7e, 0x1b, 0x5a, 0xb3, - 0x22, 0x9d, 0xc1, 0x2d, 0x6e, 0xca, 0x5b, 0xf1, 0x10, 0x25, 0x47, 0x1e, - 0x44, 0xe2, 0x2d, 0x60, 0x08, 0xea, 0xb0, 0x0a, 0xcc, 0x05, 0x48, 0xa0, - 0xf5, 0x78, 0x38, 0xf0, 0xdb, 0x3f, 0x9d, 0x9f, 0x25, 0x6f, 0x89, 0x00, - 0x96, 0x93, 0xaf, 0x43, 0xac, 0x4d, 0xc9, 0xac, 0x13, 0xdb, 0x22, 0xbe, - 0x7a, 0x7d, 0xd9, 0x24, 0xa2, 0x52, 0x69, 0xd8, 0x89, 0xc1, 0xd1, 0x57, - 0xaa, 0x04, 0x2b, 0xa2, 0xd8, 0xb1, 0x19, 0xf6, 0xd5, 0x11, 0x39, 0xbb, - 0x80, 0xcf, 0x86, 0xf9, 0x5f, 0x9d, 0x8c, 0xab, 0xf5, 0xc5, 0x74, 0x24, - 0x3a, 0xa2, 0xd4, 0x40, 0x4e, 0xd7, 0x10, 0x1f, -} - -var encodedBinaryInput = []byte(`{"data":"V+V5+6a7DbzOvaeguqR4eBJZ7mg5pAeYxT68Vcv+NDx+G1qzIp3BLW7KW/EQJUceROItYAjqsArMBUig9Xg48Ns/nZ8lb4kAlpOvQ6xNyawT2yK+en3ZJKJSadiJwdFXqgQrotixGfbVETm7gM+G+V+djKv1xXQkOqLUQE7XEB8=","format":"base64"}`) - -func TestMaybeBinaryValue_MarshalJSON(t *testing.T) { - type fields struct { - Value string - } - tests := []struct { - name string - fields fields - want []byte - wantErr bool - }{{ - name: "with string input", - fields: fields{ - Value: "antani", - }, - want: []byte(`"antani"`), - wantErr: false, - }, { - name: "with binary input", - fields: fields{ - Value: string(binaryInput), - }, - want: encodedBinaryInput, - wantErr: false, - }} - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - hb := archival.MaybeBinaryValue{ - Value: tt.fields.Value, - } - got, err := hb.MarshalJSON() - if (err != nil) != tt.wantErr { - t.Errorf("MaybeBinaryValue.MarshalJSON() error = %v, wantErr %v", err, tt.wantErr) - return - } - if !reflect.DeepEqual(got, tt.want) { - t.Error(cmp.Diff(got, tt.want)) - } - }) - } -} - -func TestMaybeBinaryValue_UnmarshalJSON(t *testing.T) { - type fields struct { - WantValue string - } - type args struct { - d []byte - } - tests := []struct { - name string - fields fields - args args - wantErr bool - }{{ - name: "with string input", - fields: fields{ - WantValue: "xo", - }, - args: args{d: []byte(`"xo"`)}, - wantErr: false, - }, { - name: "with nil input", - fields: fields{ - WantValue: "", - }, - args: args{d: nil}, - wantErr: true, - }, { - name: "with missing/invalid format", - fields: fields{ - WantValue: "", - }, - args: args{d: []byte(`{"format": "foo"}`)}, - wantErr: true, - }, { - name: "with missing data", - fields: fields{ - WantValue: "", - }, - args: args{d: []byte(`{"format": "base64"}`)}, - wantErr: true, - }, { - name: "with invalid base64 data", - fields: fields{ - WantValue: "", - }, - args: args{d: []byte(`{"format": "base64", "data": "x"}`)}, - wantErr: true, - }, { - name: "with valid base64 data", - fields: fields{ - WantValue: string(binaryInput), - }, - args: args{d: encodedBinaryInput}, - wantErr: false, - }} - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - hb := &archival.MaybeBinaryValue{} - if err := hb.UnmarshalJSON(tt.args.d); (err != nil) != tt.wantErr { - t.Errorf("MaybeBinaryValue.UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr) - } - if d := cmp.Diff(tt.fields.WantValue, hb.Value); d != "" { - t.Error(d) - } - }) - } -} - -func TestHTTPHeader_MarshalJSON(t *testing.T) { - type fields struct { - Key string - Value archival.MaybeBinaryValue - } - tests := []struct { - name string - fields fields - want []byte - wantErr bool - }{{ - name: "with string value", - fields: fields{ - Key: "Content-Type", - Value: archival.MaybeBinaryValue{ - Value: "text/plain", - }, - }, - want: []byte(`["Content-Type","text/plain"]`), - wantErr: false, - }, { - name: "with binary value", - fields: fields{ - Key: "Content-Type", - Value: archival.MaybeBinaryValue{ - Value: string(binaryInput), - }, - }, - want: []byte(`["Content-Type",` + string(encodedBinaryInput) + `]`), - wantErr: false, - }} - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - hh := archival.HTTPHeader{ - Key: tt.fields.Key, - Value: tt.fields.Value, - } - got, err := hh.MarshalJSON() - if (err != nil) != tt.wantErr { - t.Errorf("HTTPHeader.MarshalJSON() error = %v, wantErr %v", err, tt.wantErr) - return - } - if !reflect.DeepEqual(got, tt.want) { - t.Error(cmp.Diff(got, tt.want)) - } - }) - } -} - -func TestHTTPHeader_UnmarshalJSON(t *testing.T) { - type fields struct { - WantKey string - WantValue archival.MaybeBinaryValue - } - type args struct { - d []byte - } - tests := []struct { - name string - fields fields - args args - wantErr bool - }{{ - name: "with invalid input", - fields: fields{ - WantKey: "", - WantValue: archival.MaybeBinaryValue{ - Value: "", - }, - }, - args: args{ - d: []byte(`{}`), - }, - wantErr: true, - }, { - name: "with unexpected number of items", - fields: fields{ - WantKey: "", - WantValue: archival.MaybeBinaryValue{ - Value: "", - }, - }, - args: args{ - d: []byte(`[]`), - }, - wantErr: true, - }, { - name: "with first item not being a string", - fields: fields{ - WantKey: "", - WantValue: archival.MaybeBinaryValue{ - Value: "", - }, - }, - args: args{ - d: []byte(`[0,0]`), - }, - wantErr: true, - }, { - name: "with both items being a string", - fields: fields{ - WantKey: "x", - WantValue: archival.MaybeBinaryValue{ - Value: "y", - }, - }, - args: args{ - d: []byte(`["x","y"]`), - }, - wantErr: false, - }, { - name: "with second item not being a map[string]interface{}", - fields: fields{ - WantKey: "", - WantValue: archival.MaybeBinaryValue{ - Value: "", - }, - }, - args: args{ - d: []byte(`["x",[]]`), - }, - wantErr: true, - }, { - name: "with missing format key in second item", - fields: fields{ - WantKey: "", - WantValue: archival.MaybeBinaryValue{ - Value: "", - }, - }, - args: args{ - d: []byte(`["x",{}]`), - }, - wantErr: true, - }, { - name: "with format value not being base64", - fields: fields{ - WantKey: "", - WantValue: archival.MaybeBinaryValue{ - Value: "", - }, - }, - args: args{ - d: []byte(`["x",{"format":1}]`), - }, - wantErr: true, - }, { - name: "with missing data field", - fields: fields{ - WantKey: "", - WantValue: archival.MaybeBinaryValue{ - Value: "", - }, - }, - args: args{ - d: []byte(`["x",{"format":"base64"}]`), - }, - wantErr: true, - }, { - name: "with data not being a string", - fields: fields{ - WantKey: "", - WantValue: archival.MaybeBinaryValue{ - Value: "", - }, - }, - args: args{ - d: []byte(`["x",{"format":"base64","data":1}]`), - }, - wantErr: true, - }, { - name: "with data not being base64", - fields: fields{ - WantKey: "", - WantValue: archival.MaybeBinaryValue{ - Value: "", - }, - }, - args: args{ - d: []byte(`["x",{"format":"base64","data":"xx"}]`), - }, - wantErr: true, - }, { - name: "with correctly encoded base64 data", - fields: fields{ - WantKey: "x", - WantValue: archival.MaybeBinaryValue{ - Value: string(binaryInput), - }, - }, - args: args{ - d: []byte(`["x",` + string(encodedBinaryInput) + `]`), - }, - wantErr: false, - }} - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - hh := &archival.HTTPHeader{} - if err := hh.UnmarshalJSON(tt.args.d); (err != nil) != tt.wantErr { - t.Errorf("HTTPHeader.UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr) - } - expect := &archival.HTTPHeader{ - Key: tt.fields.WantKey, - Value: tt.fields.WantValue, - } - if d := cmp.Diff(hh, expect); d != "" { - t.Error(d) - } - }) - } -} - func TestNewFailure(t *testing.T) { type args struct { err error diff --git a/internal/measurex/resolver.go b/internal/measurex/resolver.go index 0ce0b31..7dacf47 100644 --- a/internal/measurex/resolver.go +++ b/internal/measurex/resolver.go @@ -26,7 +26,7 @@ func WrapResolver(begin time.Time, db WritableDB, r model.Resolver) model.Resolv } // NewResolverSystem creates a system resolver and then wraps -// it using the WrapResolver function/ +// it using the WrapResolver function. func (mx *Measurer) NewResolverSystem(db WritableDB, logger model.Logger) model.Resolver { return mx.WrapResolver(db, netxlite.NewResolverStdlib(logger)) } diff --git a/internal/model/archival.go b/internal/model/archival.go new file mode 100644 index 0000000..b140433 --- /dev/null +++ b/internal/model/archival.go @@ -0,0 +1,311 @@ +package model + +import ( + "encoding/base64" + "encoding/json" + "errors" + "unicode/utf8" +) + +// +// Archival format for individual measurement results +// such as TCP connect, TLS handshake, DNS lookup. +// +// These types end up inside the TestKeys field of an +// OONI measurement (see measurement.go). +// +// See https://github.com/ooni/spec/tree/master/data-formats. +// + +// +// Data format extension specification +// + +// ArchivalExtSpec describes a data format extension +type ArchivalExtSpec struct { + Name string // extension name + V int64 // extension version +} + +// AddTo adds the current ExtSpec to the specified measurement +func (spec ArchivalExtSpec) AddTo(m *Measurement) { + if m.Extensions == nil { + m.Extensions = make(map[string]int64) + } + m.Extensions[spec.Name] = spec.V +} + +var ( + // ArchivalExtDNS is the version of df-002-dnst.md + ArchivalExtDNS = ArchivalExtSpec{Name: "dnst", V: 0} + + // ArchivalExtNetevents is the version of df-008-netevents.md + ArchivalExtNetevents = ArchivalExtSpec{Name: "netevents", V: 0} + + // ArchivalExtHTTP is the version of df-001-httpt.md + ArchivalExtHTTP = ArchivalExtSpec{Name: "httpt", V: 0} + + // ArchivalExtTCPConnect is the version of df-005-tcpconnect.md + ArchivalExtTCPConnect = ArchivalExtSpec{Name: "tcpconnect", V: 0} + + // ArchivalExtTLSHandshake is the version of df-006-tlshandshake.md + ArchivalExtTLSHandshake = ArchivalExtSpec{Name: "tlshandshake", V: 0} + + // ArchivalExtTunnel is the version of df-009-tunnel.md + ArchivalExtTunnel = ArchivalExtSpec{Name: "tunnel", V: 0} +) + +// +// Base types +// + +// ArchivalMaybeBinaryData is a possibly binary string. We use this helper class +// to define a custom JSON encoder that allows us to choose the proper +// representation depending on whether the Value field is valid UTF-8 or not. +// +// See https://github.com/ooni/spec/blob/master/data-formats/df-001-httpt.md#maybebinarydata +type ArchivalMaybeBinaryData struct { + Value string +} + +// MarshalJSON marshals a string-like to JSON following the OONI spec that +// says that UTF-8 content is represented as string and non-UTF-8 content is +// instead represented using `{"format":"base64","data":"..."}`. +func (hb ArchivalMaybeBinaryData) MarshalJSON() ([]byte, error) { + if utf8.ValidString(hb.Value) { + return json.Marshal(hb.Value) + } + er := make(map[string]string) + er["format"] = "base64" + er["data"] = base64.StdEncoding.EncodeToString([]byte(hb.Value)) + return json.Marshal(er) +} + +// UnmarshalJSON is the opposite of MarshalJSON. +func (hb *ArchivalMaybeBinaryData) UnmarshalJSON(d []byte) error { + if err := json.Unmarshal(d, &hb.Value); err == nil { + return nil + } + er := make(map[string]string) + if err := json.Unmarshal(d, &er); err != nil { + return err + } + if v, ok := er["format"]; !ok || v != "base64" { + return errors.New("missing or invalid format field") + } + if _, ok := er["data"]; !ok { + return errors.New("missing data field") + } + b64, err := base64.StdEncoding.DecodeString(er["data"]) + if err != nil { + return err + } + hb.Value = string(b64) + return nil +} + +// +// DNS lookup +// + +// ArchivalDNSLookupResult is the result of a DNS lookup. +// +// See https://github.com/ooni/spec/blob/master/data-formats/df-002-dnst.md. +type ArchivalDNSLookupResult struct { + Answers []ArchivalDNSAnswer `json:"answers"` + Engine string `json:"engine"` + Failure *string `json:"failure"` + Hostname string `json:"hostname"` + QueryType string `json:"query_type"` + ResolverHostname *string `json:"resolver_hostname"` + ResolverPort *string `json:"resolver_port"` + ResolverAddress string `json:"resolver_address"` + T float64 `json:"t"` +} + +// ArchivalDNSAnswer is a DNS answer. +type ArchivalDNSAnswer struct { + ASN int64 `json:"asn,omitempty"` + ASOrgName string `json:"as_org_name,omitempty"` + AnswerType string `json:"answer_type"` + Hostname string `json:"hostname,omitempty"` + IPv4 string `json:"ipv4,omitempty"` + IPv6 string `json:"ipv6,omitempty"` + TTL *uint32 `json:"ttl"` +} + +// +// TCP connect +// + +// ArchivalTCPConnectResult contains the result of a TCP connect. +// +// See https://github.com/ooni/spec/blob/master/data-formats/df-005-tcpconnect.md. +type ArchivalTCPConnectResult struct { + IP string `json:"ip"` + Port int `json:"port"` + Status ArchivalTCPConnectStatus `json:"status"` + T float64 `json:"t"` +} + +// ArchivalTCPConnectStatus is the status of ArchivalTCPConnectResult. +type ArchivalTCPConnectStatus struct { + Blocked *bool `json:"blocked,omitempty"` + Failure *string `json:"failure"` + Success bool `json:"success"` +} + +// +// TLS or QUIC handshake +// + +// ArchivalTLSOrQUICHandshakeResult is the result of a TLS or QUIC handshake. +// +// See https://github.com/ooni/spec/blob/master/data-formats/df-006-tlshandshake.md +type ArchivalTLSOrQUICHandshakeResult struct { + CipherSuite string `json:"cipher_suite"` + Failure *string `json:"failure"` + NegotiatedProtocol string `json:"negotiated_protocol"` + NoTLSVerify bool `json:"no_tls_verify"` + PeerCertificates []ArchivalMaybeBinaryData `json:"peer_certificates"` + ServerName string `json:"server_name"` + T float64 `json:"t"` + Tags []string `json:"tags"` + TLSVersion string `json:"tls_version"` +} + +// +// HTTP +// + +// ArchivalHTTPRequestResult is the result of sending an HTTP request. +// +// See https://github.com/ooni/spec/blob/master/data-formats/df-001-httpt.md. +type ArchivalHTTPRequestResult struct { + Failure *string `json:"failure"` + Request ArchivalHTTPRequest `json:"request"` + Response ArchivalHTTPResponse `json:"response"` + T float64 `json:"t"` +} + +// ArchivalHTTPRequest contains an HTTP request. +// +// Headers are a map in Web Connectivity data format but +// we have added support for a list since January 2020. +type ArchivalHTTPRequest struct { + Body ArchivalHTTPBody `json:"body"` + BodyIsTruncated bool `json:"body_is_truncated"` + HeadersList []ArchivalHTTPHeader `json:"headers_list"` + Headers map[string]ArchivalMaybeBinaryData `json:"headers"` + Method string `json:"method"` + Tor ArchivalHTTPTor `json:"tor"` + Transport string `json:"x_transport"` + URL string `json:"url"` +} + +// ArchivalHTTPResponse contains an HTTP response. +// +// Headers are a map in Web Connectivity data format but +// we have added support for a list since January 2020. +type ArchivalHTTPResponse struct { + Body ArchivalHTTPBody `json:"body"` + BodyIsTruncated bool `json:"body_is_truncated"` + Code int64 `json:"code"` + HeadersList []ArchivalHTTPHeader `json:"headers_list"` + Headers map[string]ArchivalMaybeBinaryData `json:"headers"` + + // The following fields are not serialised but are useful to simplify + // analysing the measurements in telegram, whatsapp, etc. + Locations []string `json:"-"` +} + +// ArchivalHTTPBody is an HTTP body. As an implementation note, this type must +// be an alias for the MaybeBinaryValue type, otherwise the specific serialisation +// mechanism implemented by MaybeBinaryValue is not working. +type ArchivalHTTPBody = ArchivalMaybeBinaryData + +// ArchivalHTTPHeader is a single HTTP header. +type ArchivalHTTPHeader struct { + Key string + Value ArchivalMaybeBinaryData +} + +// MarshalJSON marshals a single HTTP header to a tuple where the first +// element is a string and the second element is maybe-binary data. +func (hh ArchivalHTTPHeader) MarshalJSON() ([]byte, error) { + if utf8.ValidString(hh.Value.Value) { + return json.Marshal([]string{hh.Key, hh.Value.Value}) + } + value := make(map[string]string) + value["format"] = "base64" + value["data"] = base64.StdEncoding.EncodeToString([]byte(hh.Value.Value)) + return json.Marshal([]interface{}{hh.Key, value}) +} + +// UnmarshalJSON is the opposite of MarshalJSON. +func (hh *ArchivalHTTPHeader) UnmarshalJSON(d []byte) error { + var pair []interface{} + if err := json.Unmarshal(d, &pair); err != nil { + return err + } + if len(pair) != 2 { + return errors.New("unexpected pair length") + } + key, ok := pair[0].(string) + if !ok { + return errors.New("the key is not a string") + } + value, ok := pair[1].(string) + if !ok { + mapvalue, ok := pair[1].(map[string]interface{}) + if !ok { + return errors.New("the value is neither a string nor a map[string]interface{}") + } + if _, ok := mapvalue["format"]; !ok { + return errors.New("missing format") + } + if v, ok := mapvalue["format"].(string); !ok || v != "base64" { + return errors.New("invalid format") + } + if _, ok := mapvalue["data"]; !ok { + return errors.New("missing data field") + } + v, ok := mapvalue["data"].(string) + if !ok { + return errors.New("the data field is not a string") + } + b64, err := base64.StdEncoding.DecodeString(v) + if err != nil { + return err + } + value = string(b64) + } + hh.Key, hh.Value = key, ArchivalMaybeBinaryData{Value: value} + return nil +} + +// ArchivalHTTPTor contains Tor information. +type ArchivalHTTPTor struct { + ExitIP *string `json:"exit_ip"` + ExitName *string `json:"exit_name"` + IsTor bool `json:"is_tor"` +} + +// +// NetworkEvent +// + +// ArchivalNetworkEvent is a network event. It contains all the possible fields +// and most fields are optional. They are only added when it makes sense +// for them to be there _and_ we have data to show. +// +// See https://github.com/ooni/spec/blob/master/data-formats/df-008-netevents.md. +type ArchivalNetworkEvent struct { + Address string `json:"address,omitempty"` + Failure *string `json:"failure"` + NumBytes int64 `json:"num_bytes,omitempty"` + Operation string `json:"operation"` + Proto string `json:"proto,omitempty"` + T float64 `json:"t"` + Tags []string `json:"tags,omitempty"` +} diff --git a/internal/model/archival_test.go b/internal/model/archival_test.go new file mode 100644 index 0000000..eec139a --- /dev/null +++ b/internal/model/archival_test.go @@ -0,0 +1,310 @@ +package model + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/ooni/probe-cli/v3/internal/fakefill" +) + +func TestArchivalExtSpec(t *testing.T) { + t.Run("AddTo", func(t *testing.T) { + m := &Measurement{} + ArchivalExtDNS.AddTo(m) + expected := map[string]int64{"dnst": 0} + if d := cmp.Diff(m.Extensions, expected); d != "" { + t.Fatal(d) + } + }) +} + +// we use this value below to test we can handle binary data +var archivalBinaryInput = []uint8{ + 0x57, 0xe5, 0x79, 0xfb, 0xa6, 0xbb, 0x0d, 0xbc, 0xce, 0xbd, 0xa7, 0xa0, + 0xba, 0xa4, 0x78, 0x78, 0x12, 0x59, 0xee, 0x68, 0x39, 0xa4, 0x07, 0x98, + 0xc5, 0x3e, 0xbc, 0x55, 0xcb, 0xfe, 0x34, 0x3c, 0x7e, 0x1b, 0x5a, 0xb3, + 0x22, 0x9d, 0xc1, 0x2d, 0x6e, 0xca, 0x5b, 0xf1, 0x10, 0x25, 0x47, 0x1e, + 0x44, 0xe2, 0x2d, 0x60, 0x08, 0xea, 0xb0, 0x0a, 0xcc, 0x05, 0x48, 0xa0, + 0xf5, 0x78, 0x38, 0xf0, 0xdb, 0x3f, 0x9d, 0x9f, 0x25, 0x6f, 0x89, 0x00, + 0x96, 0x93, 0xaf, 0x43, 0xac, 0x4d, 0xc9, 0xac, 0x13, 0xdb, 0x22, 0xbe, + 0x7a, 0x7d, 0xd9, 0x24, 0xa2, 0x52, 0x69, 0xd8, 0x89, 0xc1, 0xd1, 0x57, + 0xaa, 0x04, 0x2b, 0xa2, 0xd8, 0xb1, 0x19, 0xf6, 0xd5, 0x11, 0x39, 0xbb, + 0x80, 0xcf, 0x86, 0xf9, 0x5f, 0x9d, 0x8c, 0xab, 0xf5, 0xc5, 0x74, 0x24, + 0x3a, 0xa2, 0xd4, 0x40, 0x4e, 0xd7, 0x10, 0x1f, +} + +// we use this value below to test we can handle binary data +var archivalEncodedBinaryInput = []byte(`{"data":"V+V5+6a7DbzOvaeguqR4eBJZ7mg5pAeYxT68Vcv+NDx+G1qzIp3BLW7KW/EQJUceROItYAjqsArMBUig9Xg48Ns/nZ8lb4kAlpOvQ6xNyawT2yK+en3ZJKJSadiJwdFXqgQrotixGfbVETm7gM+G+V+djKv1xXQkOqLUQE7XEB8=","format":"base64"}`) + +func TestMaybeBinaryValue(t *testing.T) { + t.Run("MarshalJSON", func(t *testing.T) { + tests := []struct { + name string // test name + input string // value to marshal + want []byte // expected result + wantErr bool // whether we expect an error + }{{ + name: "with string input", + input: "antani", + want: []byte(`"antani"`), + wantErr: false, + }, { + name: "with binary input", + input: string(archivalBinaryInput), + want: archivalEncodedBinaryInput, + wantErr: false, + }} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + hb := ArchivalMaybeBinaryData{ + Value: tt.input, + } + got, err := hb.MarshalJSON() + if (err != nil) != tt.wantErr { + t.Fatalf("ArchivalMaybeBinaryData.MarshalJSON() error = %v, wantErr %v", err, tt.wantErr) + } + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Fatal(diff) + } + }) + } + }) + + t.Run("UnmarshalJSON", func(t *testing.T) { + tests := []struct { + name string // test name + input []byte // value to unmarshal + want string // expected result + wantErr bool // whether we want an error + }{{ + name: "with string input", + input: []byte(`"xo"`), + want: "xo", + wantErr: false, + }, { + name: "with nil input", + input: nil, + want: "", + wantErr: true, + }, { + name: "with missing/invalid format", + input: []byte(`{"format": "foo"}`), + want: "", + wantErr: true, + }, { + name: "with missing data", + input: []byte(`{"format": "base64"}`), + want: "", + wantErr: true, + }, { + name: "with invalid base64 data", + input: []byte(`{"format": "base64", "data": "x"}`), + want: "", + wantErr: true, + }, { + name: "with valid base64 data", + input: archivalEncodedBinaryInput, + want: string(archivalBinaryInput), + wantErr: false, + }} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + hb := &ArchivalMaybeBinaryData{} + if err := hb.UnmarshalJSON(tt.input); (err != nil) != tt.wantErr { + t.Fatalf("ArchivalMaybeBinaryData.UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr) + } + if d := cmp.Diff(tt.want, hb.Value); d != "" { + t.Fatal(d) + } + }) + } + }) +} + +func TestHTTPHeader(t *testing.T) { + t.Run("MarshalJSON", func(t *testing.T) { + tests := []struct { + name string // test name + input ArchivalHTTPHeader // what to marshal + want []byte // expected data + wantErr bool // whether we expect an error + }{{ + name: "with string value", + input: ArchivalHTTPHeader{ + Key: "Content-Type", + Value: ArchivalMaybeBinaryData{ + Value: "text/plain", + }, + }, + want: []byte(`["Content-Type","text/plain"]`), + wantErr: false, + }, { + name: "with binary value", + input: ArchivalHTTPHeader{ + Key: "Content-Type", + Value: ArchivalMaybeBinaryData{ + Value: string(archivalBinaryInput), + }, + }, + want: []byte(`["Content-Type",` + string(archivalEncodedBinaryInput) + `]`), + wantErr: false, + }} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := tt.input.MarshalJSON() + if (err != nil) != tt.wantErr { + t.Fatalf("ArchivalHTTPHeader.MarshalJSON() error = %v, wantErr %v", err, tt.wantErr) + } + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Fatal(diff) + } + }) + } + }) + + t.Run("UnmarshalJSON", func(t *testing.T) { + tests := []struct { + name string // test name + input []byte // input for the test + want ArchivalHTTPHeader // expected output + wantErr bool // whether we want an error + }{{ + name: "with invalid input", + input: []byte(`{}`), + want: ArchivalHTTPHeader{ + Key: "", + Value: ArchivalMaybeBinaryData{Value: ""}, + }, + wantErr: true, + }, { + name: "with unexpected number of items", + input: []byte(`[]`), + want: ArchivalHTTPHeader{ + Key: "", + Value: ArchivalMaybeBinaryData{Value: ""}, + }, + wantErr: true, + }, { + name: "with first item not being a string", + input: []byte(`[0,0]`), + want: ArchivalHTTPHeader{ + Key: "", + Value: ArchivalMaybeBinaryData{Value: ""}, + }, + wantErr: true, + }, { + name: "with both items being a string", + input: []byte(`["x","y"]`), + want: ArchivalHTTPHeader{ + Key: "x", + Value: ArchivalMaybeBinaryData{ + Value: "y", + }, + }, + wantErr: false, + }, { + name: "with second item not being a map[string]interface{}", + input: []byte(`["x",[]]`), + want: ArchivalHTTPHeader{ + Key: "", + Value: ArchivalMaybeBinaryData{ + Value: "", + }, + }, + wantErr: true, + }, { + name: "with missing format key in second item", + input: []byte(`["x",{}]`), + want: ArchivalHTTPHeader{ + Key: "", + Value: ArchivalMaybeBinaryData{ + Value: "", + }, + }, + wantErr: true, + }, { + name: "with format value not being base64", + input: []byte(`["x",{"format":1}]`), + want: ArchivalHTTPHeader{ + Key: "", + Value: ArchivalMaybeBinaryData{ + Value: "", + }, + }, + wantErr: true, + }, { + name: "with missing data field", + input: []byte(`["x",{"format":"base64"}]`), + want: ArchivalHTTPHeader{ + Key: "", + Value: ArchivalMaybeBinaryData{ + Value: "", + }, + }, + wantErr: true, + }, { + name: "with data not being a string", + input: []byte(`["x",{"format":"base64","data":1}]`), + want: ArchivalHTTPHeader{ + Key: "", + Value: ArchivalMaybeBinaryData{ + Value: "", + }, + }, + wantErr: true, + }, { + name: "with data not being base64", + input: []byte(`["x",{"format":"base64","data":"xx"}]`), + want: ArchivalHTTPHeader{ + Key: "", + Value: ArchivalMaybeBinaryData{ + Value: "", + }, + }, + wantErr: true, + }, { + name: "with correctly encoded base64 data", + input: []byte(`["x",` + string(archivalEncodedBinaryInput) + `]`), + want: ArchivalHTTPHeader{ + Key: "x", + Value: ArchivalMaybeBinaryData{ + Value: string(archivalBinaryInput), + }, + }, + wantErr: false, + }} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + hh := &ArchivalHTTPHeader{} + if err := hh.UnmarshalJSON(tt.input); (err != nil) != tt.wantErr { + t.Fatalf("ArchivalHTTPHeader.UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr) + } + if diff := cmp.Diff(&tt.want, hh); diff != "" { + t.Error(diff) + } + }) + } + }) +} + +func TestHTTPBody(t *testing.T) { + // Implementation note: the content is always going to be the same + // even if we modify the implementation to become: + // + // type ArchivalHTTPBody ArchivalMaybeBinaryData + // + // instead of the correct: + // + // type ArchivalHTTPBody = ArchivalMaybeBinaryData + // + // However, cmp.Diff also takes into account the data type. Hence, if + // we make a mistake and apply the above change (which will in turn + // break correct JSON serialization), the this test will fail. + var body ArchivalHTTPBody + ff := &fakefill.Filler{} + ff.Fill(&body) + data := ArchivalMaybeBinaryData(body) + if diff := cmp.Diff(body, data); diff != "" { + t.Fatal(diff) + } +}