refactor: move i/netx/archival structs to i/model (#659)

We recently started moving core data structures inside of the
internal/model package as detailed in https://github.com/ooni/probe/issues/1885.

The chief reason to do that is to have a set of fundamental
shared data types to help us rationalize the codebase.

This specific diff moves internal/netx/archival's core data types
inside the internal/model package. While there, it also refactors the
existing tests to improve their quality. Additionally, we also added
an extra test to ensure `ArchivalHTTPBody` is an alias for
`ArchivalMaybeBinaryData`, which is required to ensure the
custom JSON serialization process works for it.

We're doing that because both internal/netx/archival and
internal/measurex define their own archival data structures.

We developed measurex using its own structures because it
allowed to iterate more quickly. Now that we have sketched
out measurex, the time has come to consolidate.

My overall aim is to spend a few more hours this week on
engineering measurex. This work is preliminary work before
we finish up both measurex and websteps.

We described this cleanup in https://github.com/ooni/probe/issues/1957.
This commit is contained in:
Simone Basso 2022-01-10 11:25:52 +01:00 committed by GitHub
parent 554ae47c5a
commit 730373cc75
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 646 additions and 607 deletions

View File

@ -5,8 +5,6 @@ package archival
import (
"crypto/x509"
"encoding/base64"
"encoding/json"
"errors"
"net"
"net/http"
@ -14,7 +12,6 @@ import (
"strconv"
"strings"
"time"
"unicode/utf8"
"github.com/ooni/probe-cli/v3/internal/engine/geolocate"
"github.com/ooni/probe-cli/v3/internal/engine/netx/trace"
@ -22,59 +19,32 @@ import (
"github.com/ooni/probe-cli/v3/internal/netxlite"
)
// ExtSpec describes a data format extension
type ExtSpec struct {
Name string // extension name
V int64 // extension version
}
// AddTo adds the current ExtSpec to the specified measurement
func (spec ExtSpec) AddTo(m *model.Measurement) {
if m.Extensions == nil {
m.Extensions = make(map[string]int64)
}
m.Extensions[spec.Name] = spec.V
}
var (
// ExtDNS is the version of df-002-dnst.md
ExtDNS = ExtSpec{Name: "dnst", V: 0}
// ExtNetevents is the version of df-008-netevents.md
ExtNetevents = ExtSpec{Name: "netevents", V: 0}
// ExtHTTP is the version of df-001-httpt.md
ExtHTTP = ExtSpec{Name: "httpt", V: 0}
// ExtTCPConnect is the version of df-005-tcpconnect.md
ExtTCPConnect = ExtSpec{Name: "tcpconnect", V: 0}
// ExtTLSHandshake is the version of df-006-tlshandshake.md
ExtTLSHandshake = ExtSpec{Name: "tlshandshake", V: 0}
// ExtTunnel is the version of df-009-tunnel.md
ExtTunnel = ExtSpec{Name: "tunnel", V: 0}
// Compatibility types
type (
ExtSpec = model.ArchivalExtSpec
TCPConnectEntry = model.ArchivalTCPConnectResult
TCPConnectStatus = model.ArchivalTCPConnectStatus
MaybeBinaryValue = model.ArchivalMaybeBinaryData
DNSQueryEntry = model.ArchivalDNSLookupResult
DNSAnswerEntry = model.ArchivalDNSAnswer
TLSHandshake = model.ArchivalTLSOrQUICHandshakeResult
HTTPBody = model.ArchivalHTTPBody
HTTPHeader = model.ArchivalHTTPHeader
RequestEntry = model.ArchivalHTTPRequestResult
HTTPRequest = model.ArchivalHTTPRequest
HTTPResponse = model.ArchivalHTTPResponse
NetworkEvent = model.ArchivalNetworkEvent
)
// TCPConnectStatus contains the TCP connect status.
//
// The Blocked field breaks the separation between measurement and analysis
// we have been enforcing for quite some time now. It is a legacy from the
// Web Connectivity experiment and it should be here because of that.
type TCPConnectStatus struct {
Blocked *bool `json:"blocked,omitempty"` // Web Connectivity only
Failure *string `json:"failure"`
Success bool `json:"success"`
}
// TCPConnectEntry contains one of the entries that are part
// of the "tcp_connect" key of a OONI report.
type TCPConnectEntry struct {
IP string `json:"ip"`
Port int `json:"port"`
Status TCPConnectStatus `json:"status"`
T float64 `json:"t"`
}
// Compatibility variables
var (
ExtDNS = model.ArchivalExtDNS
ExtNetevents = model.ArchivalExtNetevents
ExtHTTP = model.ArchivalExtHTTP
ExtTCPConnect = model.ArchivalExtTCPConnect
ExtTLSHandshake = model.ArchivalExtTLSHandshake
ExtTunnel = model.ArchivalExtTunnel
)
// NewTCPConnectList creates a new TCPConnectList
func NewTCPConnectList(begin time.Time, events []trace.Event) []TCPConnectEntry {
@ -134,161 +104,6 @@ func NewFailedOperation(err error) *string {
return &s
}
// HTTPTor contains Tor information
type HTTPTor struct {
ExitIP *string `json:"exit_ip"`
ExitName *string `json:"exit_name"`
IsTor bool `json:"is_tor"`
}
// MaybeBinaryValue is a possibly binary string. We use this helper class
// to define a custom JSON encoder that allows us to choose the proper
// representation depending on whether the Value field is valid UTF-8 or not.
type MaybeBinaryValue struct {
Value string
}
// MarshalJSON marshals a string-like to JSON following the OONI spec that
// says that UTF-8 content is represened as string and non-UTF-8 content is
// instead represented using `{"format":"base64","data":"..."}`.
func (hb MaybeBinaryValue) MarshalJSON() ([]byte, error) {
if utf8.ValidString(hb.Value) {
return json.Marshal(hb.Value)
}
er := make(map[string]string)
er["format"] = "base64"
er["data"] = base64.StdEncoding.EncodeToString([]byte(hb.Value))
return json.Marshal(er)
}
// UnmarshalJSON is the opposite of MarshalJSON.
func (hb *MaybeBinaryValue) UnmarshalJSON(d []byte) error {
if err := json.Unmarshal(d, &hb.Value); err == nil {
return nil
}
er := make(map[string]string)
if err := json.Unmarshal(d, &er); err != nil {
return err
}
if v, ok := er["format"]; !ok || v != "base64" {
return errors.New("missing or invalid format field")
}
if _, ok := er["data"]; !ok {
return errors.New("missing data field")
}
b64, err := base64.StdEncoding.DecodeString(er["data"])
if err != nil {
return err
}
hb.Value = string(b64)
return nil
}
// HTTPBody is an HTTP body. As an implementation note, this type must be
// an alias for the MaybeBinaryValue type, otherwise the specific serialisation
// mechanism implemented by MaybeBinaryValue is not working.
type HTTPBody = MaybeBinaryValue
// HTTPHeader is a single HTTP header.
type HTTPHeader struct {
Key string
Value MaybeBinaryValue
}
// MarshalJSON marshals a single HTTP header to a tuple where the first
// element is a string and the second element is maybe-binary data.
func (hh HTTPHeader) MarshalJSON() ([]byte, error) {
if utf8.ValidString(hh.Value.Value) {
return json.Marshal([]string{hh.Key, hh.Value.Value})
}
value := make(map[string]string)
value["format"] = "base64"
value["data"] = base64.StdEncoding.EncodeToString([]byte(hh.Value.Value))
return json.Marshal([]interface{}{hh.Key, value})
}
// UnmarshalJSON is the opposite of MarshalJSON.
func (hh *HTTPHeader) UnmarshalJSON(d []byte) error {
var pair []interface{}
if err := json.Unmarshal(d, &pair); err != nil {
return err
}
if len(pair) != 2 {
return errors.New("unexpected pair length")
}
key, ok := pair[0].(string)
if !ok {
return errors.New("the key is not a string")
}
value, ok := pair[1].(string)
if !ok {
mapvalue, ok := pair[1].(map[string]interface{})
if !ok {
return errors.New("the value is neither a string nor a map[string]interface{}")
}
if _, ok := mapvalue["format"]; !ok {
return errors.New("missing format")
}
if v, ok := mapvalue["format"].(string); !ok || v != "base64" {
return errors.New("invalid format")
}
if _, ok := mapvalue["data"]; !ok {
return errors.New("missing data field")
}
v, ok := mapvalue["data"].(string)
if !ok {
return errors.New("the data field is not a string")
}
b64, err := base64.StdEncoding.DecodeString(v)
if err != nil {
return err
}
value = string(b64)
}
hh.Key, hh.Value = key, MaybeBinaryValue{Value: value}
return nil
}
// HTTPRequest contains an HTTP request.
//
// Headers are a map in Web Connectivity data format but
// we have added support for a list since January 2020.
type HTTPRequest struct {
Body HTTPBody `json:"body"`
BodyIsTruncated bool `json:"body_is_truncated"`
HeadersList []HTTPHeader `json:"headers_list"`
Headers map[string]MaybeBinaryValue `json:"headers"`
Method string `json:"method"`
Tor HTTPTor `json:"tor"`
Transport string `json:"x_transport"`
URL string `json:"url"`
}
// HTTPResponse contains an HTTP response.
//
// Headers are a map in Web Connectivity data format but
// we have added support for a list since January 2020.
type HTTPResponse struct {
Body HTTPBody `json:"body"`
BodyIsTruncated bool `json:"body_is_truncated"`
Code int64 `json:"code"`
HeadersList []HTTPHeader `json:"headers_list"`
Headers map[string]MaybeBinaryValue `json:"headers"`
// The following fields are not serialised but are useful to simplify
// analysing the measurements in telegram, whatsapp, etc.
Locations []string `json:"-"`
}
// RequestEntry is one of the entries that are part of
// the "requests" key of a OONI report.
type RequestEntry struct {
Failure *string `json:"failure"`
Request HTTPRequest `json:"request"`
Response HTTPResponse `json:"response"`
T float64 `json:"t"`
}
func addheaders(
source http.Header,
destList *[]HTTPHeader,
@ -361,30 +176,6 @@ func newRequestList(begin time.Time, events []trace.Event) []RequestEntry {
return out
}
// DNSAnswerEntry is the answer to a DNS query
type DNSAnswerEntry struct {
ASN int64 `json:"asn,omitempty"`
ASOrgName string `json:"as_org_name,omitempty"`
AnswerType string `json:"answer_type"`
Hostname string `json:"hostname,omitempty"`
IPv4 string `json:"ipv4,omitempty"`
IPv6 string `json:"ipv6,omitempty"`
TTL *uint32 `json:"ttl"`
}
// DNSQueryEntry is a DNS query with possibly an answer
type DNSQueryEntry struct {
Answers []DNSAnswerEntry `json:"answers"`
Engine string `json:"engine"`
Failure *string `json:"failure"`
Hostname string `json:"hostname"`
QueryType string `json:"query_type"`
ResolverHostname *string `json:"resolver_hostname"`
ResolverPort *string `json:"resolver_port"`
ResolverAddress string `json:"resolver_address"`
T float64 `json:"t"`
}
type dnsQueryType string
// NewDNSQueriesList returns a list of DNS queries.
@ -454,19 +245,6 @@ func (qtype dnsQueryType) makequeryentry(begin time.Time, ev trace.Event) DNSQue
}
}
// NetworkEvent is a network event. It contains all the possible fields
// and most fields are optional. They are only added when it makes sense
// for them to be there _and_ we have data to show.
type NetworkEvent struct {
Address string `json:"address,omitempty"`
Failure *string `json:"failure"`
NumBytes int64 `json:"num_bytes,omitempty"`
Operation string `json:"operation"`
Proto string `json:"proto,omitempty"`
T float64 `json:"t"`
Tags []string `json:"tags,omitempty"`
}
// NewNetworkEventsList returns a list of DNS queries.
func NewNetworkEventsList(begin time.Time, events []trace.Event) []NetworkEvent {
var out []NetworkEvent
@ -528,19 +306,6 @@ func NewNetworkEventsList(begin time.Time, events []trace.Event) []NetworkEvent
return out
}
// TLSHandshake contains TLS handshake data
type TLSHandshake struct {
CipherSuite string `json:"cipher_suite"`
Failure *string `json:"failure"`
NegotiatedProtocol string `json:"negotiated_protocol"`
NoTLSVerify bool `json:"no_tls_verify"`
PeerCertificates []MaybeBinaryValue `json:"peer_certificates"`
ServerName string `json:"server_name"`
T float64 `json:"t"`
Tags []string `json:"tags"`
TLSVersion string `json:"tls_version"`
}
// NewTLSHandshakesList creates a new TLSHandshakesList
func NewTLSHandshakesList(begin time.Time, events []trace.Event) []TLSHandshake {
var out []TLSHandshake

View File

@ -14,7 +14,6 @@ import (
"github.com/gorilla/websocket"
"github.com/ooni/probe-cli/v3/internal/engine/netx/archival"
"github.com/ooni/probe-cli/v3/internal/engine/netx/trace"
"github.com/ooni/probe-cli/v3/internal/model"
"github.com/ooni/probe-cli/v3/internal/netxlite"
)
@ -566,352 +565,6 @@ func TestNewTLSHandshakesList(t *testing.T) {
}
}
func TestExtSpec_AddTo(t *testing.T) {
m := new(model.Measurement)
archival.ExtDNS.AddTo(m)
expected := map[string]int64{"dnst": 0}
if d := cmp.Diff(m.Extensions, expected); d != "" {
t.Fatal(d)
}
}
var binaryInput = []uint8{
0x57, 0xe5, 0x79, 0xfb, 0xa6, 0xbb, 0x0d, 0xbc, 0xce, 0xbd, 0xa7, 0xa0,
0xba, 0xa4, 0x78, 0x78, 0x12, 0x59, 0xee, 0x68, 0x39, 0xa4, 0x07, 0x98,
0xc5, 0x3e, 0xbc, 0x55, 0xcb, 0xfe, 0x34, 0x3c, 0x7e, 0x1b, 0x5a, 0xb3,
0x22, 0x9d, 0xc1, 0x2d, 0x6e, 0xca, 0x5b, 0xf1, 0x10, 0x25, 0x47, 0x1e,
0x44, 0xe2, 0x2d, 0x60, 0x08, 0xea, 0xb0, 0x0a, 0xcc, 0x05, 0x48, 0xa0,
0xf5, 0x78, 0x38, 0xf0, 0xdb, 0x3f, 0x9d, 0x9f, 0x25, 0x6f, 0x89, 0x00,
0x96, 0x93, 0xaf, 0x43, 0xac, 0x4d, 0xc9, 0xac, 0x13, 0xdb, 0x22, 0xbe,
0x7a, 0x7d, 0xd9, 0x24, 0xa2, 0x52, 0x69, 0xd8, 0x89, 0xc1, 0xd1, 0x57,
0xaa, 0x04, 0x2b, 0xa2, 0xd8, 0xb1, 0x19, 0xf6, 0xd5, 0x11, 0x39, 0xbb,
0x80, 0xcf, 0x86, 0xf9, 0x5f, 0x9d, 0x8c, 0xab, 0xf5, 0xc5, 0x74, 0x24,
0x3a, 0xa2, 0xd4, 0x40, 0x4e, 0xd7, 0x10, 0x1f,
}
var encodedBinaryInput = []byte(`{"data":"V+V5+6a7DbzOvaeguqR4eBJZ7mg5pAeYxT68Vcv+NDx+G1qzIp3BLW7KW/EQJUceROItYAjqsArMBUig9Xg48Ns/nZ8lb4kAlpOvQ6xNyawT2yK+en3ZJKJSadiJwdFXqgQrotixGfbVETm7gM+G+V+djKv1xXQkOqLUQE7XEB8=","format":"base64"}`)
func TestMaybeBinaryValue_MarshalJSON(t *testing.T) {
type fields struct {
Value string
}
tests := []struct {
name string
fields fields
want []byte
wantErr bool
}{{
name: "with string input",
fields: fields{
Value: "antani",
},
want: []byte(`"antani"`),
wantErr: false,
}, {
name: "with binary input",
fields: fields{
Value: string(binaryInput),
},
want: encodedBinaryInput,
wantErr: false,
}}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
hb := archival.MaybeBinaryValue{
Value: tt.fields.Value,
}
got, err := hb.MarshalJSON()
if (err != nil) != tt.wantErr {
t.Errorf("MaybeBinaryValue.MarshalJSON() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(got, tt.want) {
t.Error(cmp.Diff(got, tt.want))
}
})
}
}
func TestMaybeBinaryValue_UnmarshalJSON(t *testing.T) {
type fields struct {
WantValue string
}
type args struct {
d []byte
}
tests := []struct {
name string
fields fields
args args
wantErr bool
}{{
name: "with string input",
fields: fields{
WantValue: "xo",
},
args: args{d: []byte(`"xo"`)},
wantErr: false,
}, {
name: "with nil input",
fields: fields{
WantValue: "",
},
args: args{d: nil},
wantErr: true,
}, {
name: "with missing/invalid format",
fields: fields{
WantValue: "",
},
args: args{d: []byte(`{"format": "foo"}`)},
wantErr: true,
}, {
name: "with missing data",
fields: fields{
WantValue: "",
},
args: args{d: []byte(`{"format": "base64"}`)},
wantErr: true,
}, {
name: "with invalid base64 data",
fields: fields{
WantValue: "",
},
args: args{d: []byte(`{"format": "base64", "data": "x"}`)},
wantErr: true,
}, {
name: "with valid base64 data",
fields: fields{
WantValue: string(binaryInput),
},
args: args{d: encodedBinaryInput},
wantErr: false,
}}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
hb := &archival.MaybeBinaryValue{}
if err := hb.UnmarshalJSON(tt.args.d); (err != nil) != tt.wantErr {
t.Errorf("MaybeBinaryValue.UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr)
}
if d := cmp.Diff(tt.fields.WantValue, hb.Value); d != "" {
t.Error(d)
}
})
}
}
func TestHTTPHeader_MarshalJSON(t *testing.T) {
type fields struct {
Key string
Value archival.MaybeBinaryValue
}
tests := []struct {
name string
fields fields
want []byte
wantErr bool
}{{
name: "with string value",
fields: fields{
Key: "Content-Type",
Value: archival.MaybeBinaryValue{
Value: "text/plain",
},
},
want: []byte(`["Content-Type","text/plain"]`),
wantErr: false,
}, {
name: "with binary value",
fields: fields{
Key: "Content-Type",
Value: archival.MaybeBinaryValue{
Value: string(binaryInput),
},
},
want: []byte(`["Content-Type",` + string(encodedBinaryInput) + `]`),
wantErr: false,
}}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
hh := archival.HTTPHeader{
Key: tt.fields.Key,
Value: tt.fields.Value,
}
got, err := hh.MarshalJSON()
if (err != nil) != tt.wantErr {
t.Errorf("HTTPHeader.MarshalJSON() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(got, tt.want) {
t.Error(cmp.Diff(got, tt.want))
}
})
}
}
func TestHTTPHeader_UnmarshalJSON(t *testing.T) {
type fields struct {
WantKey string
WantValue archival.MaybeBinaryValue
}
type args struct {
d []byte
}
tests := []struct {
name string
fields fields
args args
wantErr bool
}{{
name: "with invalid input",
fields: fields{
WantKey: "",
WantValue: archival.MaybeBinaryValue{
Value: "",
},
},
args: args{
d: []byte(`{}`),
},
wantErr: true,
}, {
name: "with unexpected number of items",
fields: fields{
WantKey: "",
WantValue: archival.MaybeBinaryValue{
Value: "",
},
},
args: args{
d: []byte(`[]`),
},
wantErr: true,
}, {
name: "with first item not being a string",
fields: fields{
WantKey: "",
WantValue: archival.MaybeBinaryValue{
Value: "",
},
},
args: args{
d: []byte(`[0,0]`),
},
wantErr: true,
}, {
name: "with both items being a string",
fields: fields{
WantKey: "x",
WantValue: archival.MaybeBinaryValue{
Value: "y",
},
},
args: args{
d: []byte(`["x","y"]`),
},
wantErr: false,
}, {
name: "with second item not being a map[string]interface{}",
fields: fields{
WantKey: "",
WantValue: archival.MaybeBinaryValue{
Value: "",
},
},
args: args{
d: []byte(`["x",[]]`),
},
wantErr: true,
}, {
name: "with missing format key in second item",
fields: fields{
WantKey: "",
WantValue: archival.MaybeBinaryValue{
Value: "",
},
},
args: args{
d: []byte(`["x",{}]`),
},
wantErr: true,
}, {
name: "with format value not being base64",
fields: fields{
WantKey: "",
WantValue: archival.MaybeBinaryValue{
Value: "",
},
},
args: args{
d: []byte(`["x",{"format":1}]`),
},
wantErr: true,
}, {
name: "with missing data field",
fields: fields{
WantKey: "",
WantValue: archival.MaybeBinaryValue{
Value: "",
},
},
args: args{
d: []byte(`["x",{"format":"base64"}]`),
},
wantErr: true,
}, {
name: "with data not being a string",
fields: fields{
WantKey: "",
WantValue: archival.MaybeBinaryValue{
Value: "",
},
},
args: args{
d: []byte(`["x",{"format":"base64","data":1}]`),
},
wantErr: true,
}, {
name: "with data not being base64",
fields: fields{
WantKey: "",
WantValue: archival.MaybeBinaryValue{
Value: "",
},
},
args: args{
d: []byte(`["x",{"format":"base64","data":"xx"}]`),
},
wantErr: true,
}, {
name: "with correctly encoded base64 data",
fields: fields{
WantKey: "x",
WantValue: archival.MaybeBinaryValue{
Value: string(binaryInput),
},
},
args: args{
d: []byte(`["x",` + string(encodedBinaryInput) + `]`),
},
wantErr: false,
}}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
hh := &archival.HTTPHeader{}
if err := hh.UnmarshalJSON(tt.args.d); (err != nil) != tt.wantErr {
t.Errorf("HTTPHeader.UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr)
}
expect := &archival.HTTPHeader{
Key: tt.fields.WantKey,
Value: tt.fields.WantValue,
}
if d := cmp.Diff(hh, expect); d != "" {
t.Error(d)
}
})
}
}
func TestNewFailure(t *testing.T) {
type args struct {
err error

View File

@ -26,7 +26,7 @@ func WrapResolver(begin time.Time, db WritableDB, r model.Resolver) model.Resolv
}
// NewResolverSystem creates a system resolver and then wraps
// it using the WrapResolver function/
// it using the WrapResolver function.
func (mx *Measurer) NewResolverSystem(db WritableDB, logger model.Logger) model.Resolver {
return mx.WrapResolver(db, netxlite.NewResolverStdlib(logger))
}

311
internal/model/archival.go Normal file
View File

@ -0,0 +1,311 @@
package model
import (
"encoding/base64"
"encoding/json"
"errors"
"unicode/utf8"
)
//
// Archival format for individual measurement results
// such as TCP connect, TLS handshake, DNS lookup.
//
// These types end up inside the TestKeys field of an
// OONI measurement (see measurement.go).
//
// See https://github.com/ooni/spec/tree/master/data-formats.
//
//
// Data format extension specification
//
// ArchivalExtSpec describes a data format extension
type ArchivalExtSpec struct {
Name string // extension name
V int64 // extension version
}
// AddTo adds the current ExtSpec to the specified measurement
func (spec ArchivalExtSpec) AddTo(m *Measurement) {
if m.Extensions == nil {
m.Extensions = make(map[string]int64)
}
m.Extensions[spec.Name] = spec.V
}
var (
// ArchivalExtDNS is the version of df-002-dnst.md
ArchivalExtDNS = ArchivalExtSpec{Name: "dnst", V: 0}
// ArchivalExtNetevents is the version of df-008-netevents.md
ArchivalExtNetevents = ArchivalExtSpec{Name: "netevents", V: 0}
// ArchivalExtHTTP is the version of df-001-httpt.md
ArchivalExtHTTP = ArchivalExtSpec{Name: "httpt", V: 0}
// ArchivalExtTCPConnect is the version of df-005-tcpconnect.md
ArchivalExtTCPConnect = ArchivalExtSpec{Name: "tcpconnect", V: 0}
// ArchivalExtTLSHandshake is the version of df-006-tlshandshake.md
ArchivalExtTLSHandshake = ArchivalExtSpec{Name: "tlshandshake", V: 0}
// ArchivalExtTunnel is the version of df-009-tunnel.md
ArchivalExtTunnel = ArchivalExtSpec{Name: "tunnel", V: 0}
)
//
// Base types
//
// ArchivalMaybeBinaryData is a possibly binary string. We use this helper class
// to define a custom JSON encoder that allows us to choose the proper
// representation depending on whether the Value field is valid UTF-8 or not.
//
// See https://github.com/ooni/spec/blob/master/data-formats/df-001-httpt.md#maybebinarydata
type ArchivalMaybeBinaryData struct {
Value string
}
// MarshalJSON marshals a string-like to JSON following the OONI spec that
// says that UTF-8 content is represented as string and non-UTF-8 content is
// instead represented using `{"format":"base64","data":"..."}`.
func (hb ArchivalMaybeBinaryData) MarshalJSON() ([]byte, error) {
if utf8.ValidString(hb.Value) {
return json.Marshal(hb.Value)
}
er := make(map[string]string)
er["format"] = "base64"
er["data"] = base64.StdEncoding.EncodeToString([]byte(hb.Value))
return json.Marshal(er)
}
// UnmarshalJSON is the opposite of MarshalJSON.
func (hb *ArchivalMaybeBinaryData) UnmarshalJSON(d []byte) error {
if err := json.Unmarshal(d, &hb.Value); err == nil {
return nil
}
er := make(map[string]string)
if err := json.Unmarshal(d, &er); err != nil {
return err
}
if v, ok := er["format"]; !ok || v != "base64" {
return errors.New("missing or invalid format field")
}
if _, ok := er["data"]; !ok {
return errors.New("missing data field")
}
b64, err := base64.StdEncoding.DecodeString(er["data"])
if err != nil {
return err
}
hb.Value = string(b64)
return nil
}
//
// DNS lookup
//
// ArchivalDNSLookupResult is the result of a DNS lookup.
//
// See https://github.com/ooni/spec/blob/master/data-formats/df-002-dnst.md.
type ArchivalDNSLookupResult struct {
Answers []ArchivalDNSAnswer `json:"answers"`
Engine string `json:"engine"`
Failure *string `json:"failure"`
Hostname string `json:"hostname"`
QueryType string `json:"query_type"`
ResolverHostname *string `json:"resolver_hostname"`
ResolverPort *string `json:"resolver_port"`
ResolverAddress string `json:"resolver_address"`
T float64 `json:"t"`
}
// ArchivalDNSAnswer is a DNS answer.
type ArchivalDNSAnswer struct {
ASN int64 `json:"asn,omitempty"`
ASOrgName string `json:"as_org_name,omitempty"`
AnswerType string `json:"answer_type"`
Hostname string `json:"hostname,omitempty"`
IPv4 string `json:"ipv4,omitempty"`
IPv6 string `json:"ipv6,omitempty"`
TTL *uint32 `json:"ttl"`
}
//
// TCP connect
//
// ArchivalTCPConnectResult contains the result of a TCP connect.
//
// See https://github.com/ooni/spec/blob/master/data-formats/df-005-tcpconnect.md.
type ArchivalTCPConnectResult struct {
IP string `json:"ip"`
Port int `json:"port"`
Status ArchivalTCPConnectStatus `json:"status"`
T float64 `json:"t"`
}
// ArchivalTCPConnectStatus is the status of ArchivalTCPConnectResult.
type ArchivalTCPConnectStatus struct {
Blocked *bool `json:"blocked,omitempty"`
Failure *string `json:"failure"`
Success bool `json:"success"`
}
//
// TLS or QUIC handshake
//
// ArchivalTLSOrQUICHandshakeResult is the result of a TLS or QUIC handshake.
//
// See https://github.com/ooni/spec/blob/master/data-formats/df-006-tlshandshake.md
type ArchivalTLSOrQUICHandshakeResult struct {
CipherSuite string `json:"cipher_suite"`
Failure *string `json:"failure"`
NegotiatedProtocol string `json:"negotiated_protocol"`
NoTLSVerify bool `json:"no_tls_verify"`
PeerCertificates []ArchivalMaybeBinaryData `json:"peer_certificates"`
ServerName string `json:"server_name"`
T float64 `json:"t"`
Tags []string `json:"tags"`
TLSVersion string `json:"tls_version"`
}
//
// HTTP
//
// ArchivalHTTPRequestResult is the result of sending an HTTP request.
//
// See https://github.com/ooni/spec/blob/master/data-formats/df-001-httpt.md.
type ArchivalHTTPRequestResult struct {
Failure *string `json:"failure"`
Request ArchivalHTTPRequest `json:"request"`
Response ArchivalHTTPResponse `json:"response"`
T float64 `json:"t"`
}
// ArchivalHTTPRequest contains an HTTP request.
//
// Headers are a map in Web Connectivity data format but
// we have added support for a list since January 2020.
type ArchivalHTTPRequest struct {
Body ArchivalHTTPBody `json:"body"`
BodyIsTruncated bool `json:"body_is_truncated"`
HeadersList []ArchivalHTTPHeader `json:"headers_list"`
Headers map[string]ArchivalMaybeBinaryData `json:"headers"`
Method string `json:"method"`
Tor ArchivalHTTPTor `json:"tor"`
Transport string `json:"x_transport"`
URL string `json:"url"`
}
// ArchivalHTTPResponse contains an HTTP response.
//
// Headers are a map in Web Connectivity data format but
// we have added support for a list since January 2020.
type ArchivalHTTPResponse struct {
Body ArchivalHTTPBody `json:"body"`
BodyIsTruncated bool `json:"body_is_truncated"`
Code int64 `json:"code"`
HeadersList []ArchivalHTTPHeader `json:"headers_list"`
Headers map[string]ArchivalMaybeBinaryData `json:"headers"`
// The following fields are not serialised but are useful to simplify
// analysing the measurements in telegram, whatsapp, etc.
Locations []string `json:"-"`
}
// ArchivalHTTPBody is an HTTP body. As an implementation note, this type must
// be an alias for the MaybeBinaryValue type, otherwise the specific serialisation
// mechanism implemented by MaybeBinaryValue is not working.
type ArchivalHTTPBody = ArchivalMaybeBinaryData
// ArchivalHTTPHeader is a single HTTP header.
type ArchivalHTTPHeader struct {
Key string
Value ArchivalMaybeBinaryData
}
// MarshalJSON marshals a single HTTP header to a tuple where the first
// element is a string and the second element is maybe-binary data.
func (hh ArchivalHTTPHeader) MarshalJSON() ([]byte, error) {
if utf8.ValidString(hh.Value.Value) {
return json.Marshal([]string{hh.Key, hh.Value.Value})
}
value := make(map[string]string)
value["format"] = "base64"
value["data"] = base64.StdEncoding.EncodeToString([]byte(hh.Value.Value))
return json.Marshal([]interface{}{hh.Key, value})
}
// UnmarshalJSON is the opposite of MarshalJSON.
func (hh *ArchivalHTTPHeader) UnmarshalJSON(d []byte) error {
var pair []interface{}
if err := json.Unmarshal(d, &pair); err != nil {
return err
}
if len(pair) != 2 {
return errors.New("unexpected pair length")
}
key, ok := pair[0].(string)
if !ok {
return errors.New("the key is not a string")
}
value, ok := pair[1].(string)
if !ok {
mapvalue, ok := pair[1].(map[string]interface{})
if !ok {
return errors.New("the value is neither a string nor a map[string]interface{}")
}
if _, ok := mapvalue["format"]; !ok {
return errors.New("missing format")
}
if v, ok := mapvalue["format"].(string); !ok || v != "base64" {
return errors.New("invalid format")
}
if _, ok := mapvalue["data"]; !ok {
return errors.New("missing data field")
}
v, ok := mapvalue["data"].(string)
if !ok {
return errors.New("the data field is not a string")
}
b64, err := base64.StdEncoding.DecodeString(v)
if err != nil {
return err
}
value = string(b64)
}
hh.Key, hh.Value = key, ArchivalMaybeBinaryData{Value: value}
return nil
}
// ArchivalHTTPTor contains Tor information.
type ArchivalHTTPTor struct {
ExitIP *string `json:"exit_ip"`
ExitName *string `json:"exit_name"`
IsTor bool `json:"is_tor"`
}
//
// NetworkEvent
//
// ArchivalNetworkEvent is a network event. It contains all the possible fields
// and most fields are optional. They are only added when it makes sense
// for them to be there _and_ we have data to show.
//
// See https://github.com/ooni/spec/blob/master/data-formats/df-008-netevents.md.
type ArchivalNetworkEvent struct {
Address string `json:"address,omitempty"`
Failure *string `json:"failure"`
NumBytes int64 `json:"num_bytes,omitempty"`
Operation string `json:"operation"`
Proto string `json:"proto,omitempty"`
T float64 `json:"t"`
Tags []string `json:"tags,omitempty"`
}

View File

@ -0,0 +1,310 @@
package model
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/ooni/probe-cli/v3/internal/fakefill"
)
func TestArchivalExtSpec(t *testing.T) {
t.Run("AddTo", func(t *testing.T) {
m := &Measurement{}
ArchivalExtDNS.AddTo(m)
expected := map[string]int64{"dnst": 0}
if d := cmp.Diff(m.Extensions, expected); d != "" {
t.Fatal(d)
}
})
}
// we use this value below to test we can handle binary data
var archivalBinaryInput = []uint8{
0x57, 0xe5, 0x79, 0xfb, 0xa6, 0xbb, 0x0d, 0xbc, 0xce, 0xbd, 0xa7, 0xa0,
0xba, 0xa4, 0x78, 0x78, 0x12, 0x59, 0xee, 0x68, 0x39, 0xa4, 0x07, 0x98,
0xc5, 0x3e, 0xbc, 0x55, 0xcb, 0xfe, 0x34, 0x3c, 0x7e, 0x1b, 0x5a, 0xb3,
0x22, 0x9d, 0xc1, 0x2d, 0x6e, 0xca, 0x5b, 0xf1, 0x10, 0x25, 0x47, 0x1e,
0x44, 0xe2, 0x2d, 0x60, 0x08, 0xea, 0xb0, 0x0a, 0xcc, 0x05, 0x48, 0xa0,
0xf5, 0x78, 0x38, 0xf0, 0xdb, 0x3f, 0x9d, 0x9f, 0x25, 0x6f, 0x89, 0x00,
0x96, 0x93, 0xaf, 0x43, 0xac, 0x4d, 0xc9, 0xac, 0x13, 0xdb, 0x22, 0xbe,
0x7a, 0x7d, 0xd9, 0x24, 0xa2, 0x52, 0x69, 0xd8, 0x89, 0xc1, 0xd1, 0x57,
0xaa, 0x04, 0x2b, 0xa2, 0xd8, 0xb1, 0x19, 0xf6, 0xd5, 0x11, 0x39, 0xbb,
0x80, 0xcf, 0x86, 0xf9, 0x5f, 0x9d, 0x8c, 0xab, 0xf5, 0xc5, 0x74, 0x24,
0x3a, 0xa2, 0xd4, 0x40, 0x4e, 0xd7, 0x10, 0x1f,
}
// we use this value below to test we can handle binary data
var archivalEncodedBinaryInput = []byte(`{"data":"V+V5+6a7DbzOvaeguqR4eBJZ7mg5pAeYxT68Vcv+NDx+G1qzIp3BLW7KW/EQJUceROItYAjqsArMBUig9Xg48Ns/nZ8lb4kAlpOvQ6xNyawT2yK+en3ZJKJSadiJwdFXqgQrotixGfbVETm7gM+G+V+djKv1xXQkOqLUQE7XEB8=","format":"base64"}`)
func TestMaybeBinaryValue(t *testing.T) {
t.Run("MarshalJSON", func(t *testing.T) {
tests := []struct {
name string // test name
input string // value to marshal
want []byte // expected result
wantErr bool // whether we expect an error
}{{
name: "with string input",
input: "antani",
want: []byte(`"antani"`),
wantErr: false,
}, {
name: "with binary input",
input: string(archivalBinaryInput),
want: archivalEncodedBinaryInput,
wantErr: false,
}}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
hb := ArchivalMaybeBinaryData{
Value: tt.input,
}
got, err := hb.MarshalJSON()
if (err != nil) != tt.wantErr {
t.Fatalf("ArchivalMaybeBinaryData.MarshalJSON() error = %v, wantErr %v", err, tt.wantErr)
}
if diff := cmp.Diff(tt.want, got); diff != "" {
t.Fatal(diff)
}
})
}
})
t.Run("UnmarshalJSON", func(t *testing.T) {
tests := []struct {
name string // test name
input []byte // value to unmarshal
want string // expected result
wantErr bool // whether we want an error
}{{
name: "with string input",
input: []byte(`"xo"`),
want: "xo",
wantErr: false,
}, {
name: "with nil input",
input: nil,
want: "",
wantErr: true,
}, {
name: "with missing/invalid format",
input: []byte(`{"format": "foo"}`),
want: "",
wantErr: true,
}, {
name: "with missing data",
input: []byte(`{"format": "base64"}`),
want: "",
wantErr: true,
}, {
name: "with invalid base64 data",
input: []byte(`{"format": "base64", "data": "x"}`),
want: "",
wantErr: true,
}, {
name: "with valid base64 data",
input: archivalEncodedBinaryInput,
want: string(archivalBinaryInput),
wantErr: false,
}}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
hb := &ArchivalMaybeBinaryData{}
if err := hb.UnmarshalJSON(tt.input); (err != nil) != tt.wantErr {
t.Fatalf("ArchivalMaybeBinaryData.UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr)
}
if d := cmp.Diff(tt.want, hb.Value); d != "" {
t.Fatal(d)
}
})
}
})
}
func TestHTTPHeader(t *testing.T) {
t.Run("MarshalJSON", func(t *testing.T) {
tests := []struct {
name string // test name
input ArchivalHTTPHeader // what to marshal
want []byte // expected data
wantErr bool // whether we expect an error
}{{
name: "with string value",
input: ArchivalHTTPHeader{
Key: "Content-Type",
Value: ArchivalMaybeBinaryData{
Value: "text/plain",
},
},
want: []byte(`["Content-Type","text/plain"]`),
wantErr: false,
}, {
name: "with binary value",
input: ArchivalHTTPHeader{
Key: "Content-Type",
Value: ArchivalMaybeBinaryData{
Value: string(archivalBinaryInput),
},
},
want: []byte(`["Content-Type",` + string(archivalEncodedBinaryInput) + `]`),
wantErr: false,
}}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := tt.input.MarshalJSON()
if (err != nil) != tt.wantErr {
t.Fatalf("ArchivalHTTPHeader.MarshalJSON() error = %v, wantErr %v", err, tt.wantErr)
}
if diff := cmp.Diff(tt.want, got); diff != "" {
t.Fatal(diff)
}
})
}
})
t.Run("UnmarshalJSON", func(t *testing.T) {
tests := []struct {
name string // test name
input []byte // input for the test
want ArchivalHTTPHeader // expected output
wantErr bool // whether we want an error
}{{
name: "with invalid input",
input: []byte(`{}`),
want: ArchivalHTTPHeader{
Key: "",
Value: ArchivalMaybeBinaryData{Value: ""},
},
wantErr: true,
}, {
name: "with unexpected number of items",
input: []byte(`[]`),
want: ArchivalHTTPHeader{
Key: "",
Value: ArchivalMaybeBinaryData{Value: ""},
},
wantErr: true,
}, {
name: "with first item not being a string",
input: []byte(`[0,0]`),
want: ArchivalHTTPHeader{
Key: "",
Value: ArchivalMaybeBinaryData{Value: ""},
},
wantErr: true,
}, {
name: "with both items being a string",
input: []byte(`["x","y"]`),
want: ArchivalHTTPHeader{
Key: "x",
Value: ArchivalMaybeBinaryData{
Value: "y",
},
},
wantErr: false,
}, {
name: "with second item not being a map[string]interface{}",
input: []byte(`["x",[]]`),
want: ArchivalHTTPHeader{
Key: "",
Value: ArchivalMaybeBinaryData{
Value: "",
},
},
wantErr: true,
}, {
name: "with missing format key in second item",
input: []byte(`["x",{}]`),
want: ArchivalHTTPHeader{
Key: "",
Value: ArchivalMaybeBinaryData{
Value: "",
},
},
wantErr: true,
}, {
name: "with format value not being base64",
input: []byte(`["x",{"format":1}]`),
want: ArchivalHTTPHeader{
Key: "",
Value: ArchivalMaybeBinaryData{
Value: "",
},
},
wantErr: true,
}, {
name: "with missing data field",
input: []byte(`["x",{"format":"base64"}]`),
want: ArchivalHTTPHeader{
Key: "",
Value: ArchivalMaybeBinaryData{
Value: "",
},
},
wantErr: true,
}, {
name: "with data not being a string",
input: []byte(`["x",{"format":"base64","data":1}]`),
want: ArchivalHTTPHeader{
Key: "",
Value: ArchivalMaybeBinaryData{
Value: "",
},
},
wantErr: true,
}, {
name: "with data not being base64",
input: []byte(`["x",{"format":"base64","data":"xx"}]`),
want: ArchivalHTTPHeader{
Key: "",
Value: ArchivalMaybeBinaryData{
Value: "",
},
},
wantErr: true,
}, {
name: "with correctly encoded base64 data",
input: []byte(`["x",` + string(archivalEncodedBinaryInput) + `]`),
want: ArchivalHTTPHeader{
Key: "x",
Value: ArchivalMaybeBinaryData{
Value: string(archivalBinaryInput),
},
},
wantErr: false,
}}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
hh := &ArchivalHTTPHeader{}
if err := hh.UnmarshalJSON(tt.input); (err != nil) != tt.wantErr {
t.Fatalf("ArchivalHTTPHeader.UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr)
}
if diff := cmp.Diff(&tt.want, hh); diff != "" {
t.Error(diff)
}
})
}
})
}
func TestHTTPBody(t *testing.T) {
// Implementation note: the content is always going to be the same
// even if we modify the implementation to become:
//
// type ArchivalHTTPBody ArchivalMaybeBinaryData
//
// instead of the correct:
//
// type ArchivalHTTPBody = ArchivalMaybeBinaryData
//
// However, cmp.Diff also takes into account the data type. Hence, if
// we make a mistake and apply the above change (which will in turn
// break correct JSON serialization), the this test will fail.
var body ArchivalHTTPBody
ff := &fakefill.Filler{}
ff.Fill(&body)
data := ArchivalMaybeBinaryData(body)
if diff := cmp.Diff(body, data); diff != "" {
t.Fatal(diff)
}
}