refactor: move i/netx/archival structs to i/model (#659)

We recently started moving core data structures inside of the
internal/model package as detailed in https://github.com/ooni/probe/issues/1885.

The chief reason to do that is to have a set of fundamental
shared data types to help us rationalize the codebase.

This specific diff moves internal/netx/archival's core data types
inside the internal/model package. While there, it also refactors the
existing tests to improve their quality. Additionally, we also added
an extra test to ensure `ArchivalHTTPBody` is an alias for
`ArchivalMaybeBinaryData`, which is required to ensure the
custom JSON serialization process works for it.

We're doing that because both internal/netx/archival and
internal/measurex define their own archival data structures.

We developed measurex using its own structures because it
allowed to iterate more quickly. Now that we have sketched
out measurex, the time has come to consolidate.

My overall aim is to spend a few more hours this week on
engineering measurex. This work is preliminary work before
we finish up both measurex and websteps.

We described this cleanup in https://github.com/ooni/probe/issues/1957.
This commit is contained in:
Simone Basso
2022-01-10 11:25:52 +01:00
committed by GitHub
parent 554ae47c5a
commit 730373cc75
5 changed files with 646 additions and 607 deletions
+24 -259
View File
@@ -5,8 +5,6 @@ package archival
import (
"crypto/x509"
"encoding/base64"
"encoding/json"
"errors"
"net"
"net/http"
@@ -14,7 +12,6 @@ import (
"strconv"
"strings"
"time"
"unicode/utf8"
"github.com/ooni/probe-cli/v3/internal/engine/geolocate"
"github.com/ooni/probe-cli/v3/internal/engine/netx/trace"
@@ -22,59 +19,32 @@ import (
"github.com/ooni/probe-cli/v3/internal/netxlite"
)
// ExtSpec describes a data format extension
type ExtSpec struct {
Name string // extension name
V int64 // extension version
}
// AddTo adds the current ExtSpec to the specified measurement
func (spec ExtSpec) AddTo(m *model.Measurement) {
if m.Extensions == nil {
m.Extensions = make(map[string]int64)
}
m.Extensions[spec.Name] = spec.V
}
var (
// ExtDNS is the version of df-002-dnst.md
ExtDNS = ExtSpec{Name: "dnst", V: 0}
// ExtNetevents is the version of df-008-netevents.md
ExtNetevents = ExtSpec{Name: "netevents", V: 0}
// ExtHTTP is the version of df-001-httpt.md
ExtHTTP = ExtSpec{Name: "httpt", V: 0}
// ExtTCPConnect is the version of df-005-tcpconnect.md
ExtTCPConnect = ExtSpec{Name: "tcpconnect", V: 0}
// ExtTLSHandshake is the version of df-006-tlshandshake.md
ExtTLSHandshake = ExtSpec{Name: "tlshandshake", V: 0}
// ExtTunnel is the version of df-009-tunnel.md
ExtTunnel = ExtSpec{Name: "tunnel", V: 0}
// Compatibility types
type (
ExtSpec = model.ArchivalExtSpec
TCPConnectEntry = model.ArchivalTCPConnectResult
TCPConnectStatus = model.ArchivalTCPConnectStatus
MaybeBinaryValue = model.ArchivalMaybeBinaryData
DNSQueryEntry = model.ArchivalDNSLookupResult
DNSAnswerEntry = model.ArchivalDNSAnswer
TLSHandshake = model.ArchivalTLSOrQUICHandshakeResult
HTTPBody = model.ArchivalHTTPBody
HTTPHeader = model.ArchivalHTTPHeader
RequestEntry = model.ArchivalHTTPRequestResult
HTTPRequest = model.ArchivalHTTPRequest
HTTPResponse = model.ArchivalHTTPResponse
NetworkEvent = model.ArchivalNetworkEvent
)
// TCPConnectStatus contains the TCP connect status.
//
// The Blocked field breaks the separation between measurement and analysis
// we have been enforcing for quite some time now. It is a legacy from the
// Web Connectivity experiment and it should be here because of that.
type TCPConnectStatus struct {
Blocked *bool `json:"blocked,omitempty"` // Web Connectivity only
Failure *string `json:"failure"`
Success bool `json:"success"`
}
// TCPConnectEntry contains one of the entries that are part
// of the "tcp_connect" key of a OONI report.
type TCPConnectEntry struct {
IP string `json:"ip"`
Port int `json:"port"`
Status TCPConnectStatus `json:"status"`
T float64 `json:"t"`
}
// Compatibility variables
var (
ExtDNS = model.ArchivalExtDNS
ExtNetevents = model.ArchivalExtNetevents
ExtHTTP = model.ArchivalExtHTTP
ExtTCPConnect = model.ArchivalExtTCPConnect
ExtTLSHandshake = model.ArchivalExtTLSHandshake
ExtTunnel = model.ArchivalExtTunnel
)
// NewTCPConnectList creates a new TCPConnectList
func NewTCPConnectList(begin time.Time, events []trace.Event) []TCPConnectEntry {
@@ -134,161 +104,6 @@ func NewFailedOperation(err error) *string {
return &s
}
// HTTPTor contains Tor information
type HTTPTor struct {
ExitIP *string `json:"exit_ip"`
ExitName *string `json:"exit_name"`
IsTor bool `json:"is_tor"`
}
// MaybeBinaryValue is a possibly binary string. We use this helper class
// to define a custom JSON encoder that allows us to choose the proper
// representation depending on whether the Value field is valid UTF-8 or not.
type MaybeBinaryValue struct {
Value string
}
// MarshalJSON marshals a string-like to JSON following the OONI spec that
// says that UTF-8 content is represened as string and non-UTF-8 content is
// instead represented using `{"format":"base64","data":"..."}`.
func (hb MaybeBinaryValue) MarshalJSON() ([]byte, error) {
if utf8.ValidString(hb.Value) {
return json.Marshal(hb.Value)
}
er := make(map[string]string)
er["format"] = "base64"
er["data"] = base64.StdEncoding.EncodeToString([]byte(hb.Value))
return json.Marshal(er)
}
// UnmarshalJSON is the opposite of MarshalJSON.
func (hb *MaybeBinaryValue) UnmarshalJSON(d []byte) error {
if err := json.Unmarshal(d, &hb.Value); err == nil {
return nil
}
er := make(map[string]string)
if err := json.Unmarshal(d, &er); err != nil {
return err
}
if v, ok := er["format"]; !ok || v != "base64" {
return errors.New("missing or invalid format field")
}
if _, ok := er["data"]; !ok {
return errors.New("missing data field")
}
b64, err := base64.StdEncoding.DecodeString(er["data"])
if err != nil {
return err
}
hb.Value = string(b64)
return nil
}
// HTTPBody is an HTTP body. As an implementation note, this type must be
// an alias for the MaybeBinaryValue type, otherwise the specific serialisation
// mechanism implemented by MaybeBinaryValue is not working.
type HTTPBody = MaybeBinaryValue
// HTTPHeader is a single HTTP header.
type HTTPHeader struct {
Key string
Value MaybeBinaryValue
}
// MarshalJSON marshals a single HTTP header to a tuple where the first
// element is a string and the second element is maybe-binary data.
func (hh HTTPHeader) MarshalJSON() ([]byte, error) {
if utf8.ValidString(hh.Value.Value) {
return json.Marshal([]string{hh.Key, hh.Value.Value})
}
value := make(map[string]string)
value["format"] = "base64"
value["data"] = base64.StdEncoding.EncodeToString([]byte(hh.Value.Value))
return json.Marshal([]interface{}{hh.Key, value})
}
// UnmarshalJSON is the opposite of MarshalJSON.
func (hh *HTTPHeader) UnmarshalJSON(d []byte) error {
var pair []interface{}
if err := json.Unmarshal(d, &pair); err != nil {
return err
}
if len(pair) != 2 {
return errors.New("unexpected pair length")
}
key, ok := pair[0].(string)
if !ok {
return errors.New("the key is not a string")
}
value, ok := pair[1].(string)
if !ok {
mapvalue, ok := pair[1].(map[string]interface{})
if !ok {
return errors.New("the value is neither a string nor a map[string]interface{}")
}
if _, ok := mapvalue["format"]; !ok {
return errors.New("missing format")
}
if v, ok := mapvalue["format"].(string); !ok || v != "base64" {
return errors.New("invalid format")
}
if _, ok := mapvalue["data"]; !ok {
return errors.New("missing data field")
}
v, ok := mapvalue["data"].(string)
if !ok {
return errors.New("the data field is not a string")
}
b64, err := base64.StdEncoding.DecodeString(v)
if err != nil {
return err
}
value = string(b64)
}
hh.Key, hh.Value = key, MaybeBinaryValue{Value: value}
return nil
}
// HTTPRequest contains an HTTP request.
//
// Headers are a map in Web Connectivity data format but
// we have added support for a list since January 2020.
type HTTPRequest struct {
Body HTTPBody `json:"body"`
BodyIsTruncated bool `json:"body_is_truncated"`
HeadersList []HTTPHeader `json:"headers_list"`
Headers map[string]MaybeBinaryValue `json:"headers"`
Method string `json:"method"`
Tor HTTPTor `json:"tor"`
Transport string `json:"x_transport"`
URL string `json:"url"`
}
// HTTPResponse contains an HTTP response.
//
// Headers are a map in Web Connectivity data format but
// we have added support for a list since January 2020.
type HTTPResponse struct {
Body HTTPBody `json:"body"`
BodyIsTruncated bool `json:"body_is_truncated"`
Code int64 `json:"code"`
HeadersList []HTTPHeader `json:"headers_list"`
Headers map[string]MaybeBinaryValue `json:"headers"`
// The following fields are not serialised but are useful to simplify
// analysing the measurements in telegram, whatsapp, etc.
Locations []string `json:"-"`
}
// RequestEntry is one of the entries that are part of
// the "requests" key of a OONI report.
type RequestEntry struct {
Failure *string `json:"failure"`
Request HTTPRequest `json:"request"`
Response HTTPResponse `json:"response"`
T float64 `json:"t"`
}
func addheaders(
source http.Header,
destList *[]HTTPHeader,
@@ -361,30 +176,6 @@ func newRequestList(begin time.Time, events []trace.Event) []RequestEntry {
return out
}
// DNSAnswerEntry is the answer to a DNS query
type DNSAnswerEntry struct {
ASN int64 `json:"asn,omitempty"`
ASOrgName string `json:"as_org_name,omitempty"`
AnswerType string `json:"answer_type"`
Hostname string `json:"hostname,omitempty"`
IPv4 string `json:"ipv4,omitempty"`
IPv6 string `json:"ipv6,omitempty"`
TTL *uint32 `json:"ttl"`
}
// DNSQueryEntry is a DNS query with possibly an answer
type DNSQueryEntry struct {
Answers []DNSAnswerEntry `json:"answers"`
Engine string `json:"engine"`
Failure *string `json:"failure"`
Hostname string `json:"hostname"`
QueryType string `json:"query_type"`
ResolverHostname *string `json:"resolver_hostname"`
ResolverPort *string `json:"resolver_port"`
ResolverAddress string `json:"resolver_address"`
T float64 `json:"t"`
}
type dnsQueryType string
// NewDNSQueriesList returns a list of DNS queries.
@@ -454,19 +245,6 @@ func (qtype dnsQueryType) makequeryentry(begin time.Time, ev trace.Event) DNSQue
}
}
// NetworkEvent is a network event. It contains all the possible fields
// and most fields are optional. They are only added when it makes sense
// for them to be there _and_ we have data to show.
type NetworkEvent struct {
Address string `json:"address,omitempty"`
Failure *string `json:"failure"`
NumBytes int64 `json:"num_bytes,omitempty"`
Operation string `json:"operation"`
Proto string `json:"proto,omitempty"`
T float64 `json:"t"`
Tags []string `json:"tags,omitempty"`
}
// NewNetworkEventsList returns a list of DNS queries.
func NewNetworkEventsList(begin time.Time, events []trace.Event) []NetworkEvent {
var out []NetworkEvent
@@ -528,19 +306,6 @@ func NewNetworkEventsList(begin time.Time, events []trace.Event) []NetworkEvent
return out
}
// TLSHandshake contains TLS handshake data
type TLSHandshake struct {
CipherSuite string `json:"cipher_suite"`
Failure *string `json:"failure"`
NegotiatedProtocol string `json:"negotiated_protocol"`
NoTLSVerify bool `json:"no_tls_verify"`
PeerCertificates []MaybeBinaryValue `json:"peer_certificates"`
ServerName string `json:"server_name"`
T float64 `json:"t"`
Tags []string `json:"tags"`
TLSVersion string `json:"tls_version"`
}
// NewTLSHandshakesList creates a new TLSHandshakesList
func NewTLSHandshakesList(begin time.Time, events []trace.Event) []TLSHandshake {
var out []TLSHandshake
@@ -14,7 +14,6 @@ import (
"github.com/gorilla/websocket"
"github.com/ooni/probe-cli/v3/internal/engine/netx/archival"
"github.com/ooni/probe-cli/v3/internal/engine/netx/trace"
"github.com/ooni/probe-cli/v3/internal/model"
"github.com/ooni/probe-cli/v3/internal/netxlite"
)
@@ -566,352 +565,6 @@ func TestNewTLSHandshakesList(t *testing.T) {
}
}
func TestExtSpec_AddTo(t *testing.T) {
m := new(model.Measurement)
archival.ExtDNS.AddTo(m)
expected := map[string]int64{"dnst": 0}
if d := cmp.Diff(m.Extensions, expected); d != "" {
t.Fatal(d)
}
}
var binaryInput = []uint8{
0x57, 0xe5, 0x79, 0xfb, 0xa6, 0xbb, 0x0d, 0xbc, 0xce, 0xbd, 0xa7, 0xa0,
0xba, 0xa4, 0x78, 0x78, 0x12, 0x59, 0xee, 0x68, 0x39, 0xa4, 0x07, 0x98,
0xc5, 0x3e, 0xbc, 0x55, 0xcb, 0xfe, 0x34, 0x3c, 0x7e, 0x1b, 0x5a, 0xb3,
0x22, 0x9d, 0xc1, 0x2d, 0x6e, 0xca, 0x5b, 0xf1, 0x10, 0x25, 0x47, 0x1e,
0x44, 0xe2, 0x2d, 0x60, 0x08, 0xea, 0xb0, 0x0a, 0xcc, 0x05, 0x48, 0xa0,
0xf5, 0x78, 0x38, 0xf0, 0xdb, 0x3f, 0x9d, 0x9f, 0x25, 0x6f, 0x89, 0x00,
0x96, 0x93, 0xaf, 0x43, 0xac, 0x4d, 0xc9, 0xac, 0x13, 0xdb, 0x22, 0xbe,
0x7a, 0x7d, 0xd9, 0x24, 0xa2, 0x52, 0x69, 0xd8, 0x89, 0xc1, 0xd1, 0x57,
0xaa, 0x04, 0x2b, 0xa2, 0xd8, 0xb1, 0x19, 0xf6, 0xd5, 0x11, 0x39, 0xbb,
0x80, 0xcf, 0x86, 0xf9, 0x5f, 0x9d, 0x8c, 0xab, 0xf5, 0xc5, 0x74, 0x24,
0x3a, 0xa2, 0xd4, 0x40, 0x4e, 0xd7, 0x10, 0x1f,
}
var encodedBinaryInput = []byte(`{"data":"V+V5+6a7DbzOvaeguqR4eBJZ7mg5pAeYxT68Vcv+NDx+G1qzIp3BLW7KW/EQJUceROItYAjqsArMBUig9Xg48Ns/nZ8lb4kAlpOvQ6xNyawT2yK+en3ZJKJSadiJwdFXqgQrotixGfbVETm7gM+G+V+djKv1xXQkOqLUQE7XEB8=","format":"base64"}`)
func TestMaybeBinaryValue_MarshalJSON(t *testing.T) {
type fields struct {
Value string
}
tests := []struct {
name string
fields fields
want []byte
wantErr bool
}{{
name: "with string input",
fields: fields{
Value: "antani",
},
want: []byte(`"antani"`),
wantErr: false,
}, {
name: "with binary input",
fields: fields{
Value: string(binaryInput),
},
want: encodedBinaryInput,
wantErr: false,
}}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
hb := archival.MaybeBinaryValue{
Value: tt.fields.Value,
}
got, err := hb.MarshalJSON()
if (err != nil) != tt.wantErr {
t.Errorf("MaybeBinaryValue.MarshalJSON() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(got, tt.want) {
t.Error(cmp.Diff(got, tt.want))
}
})
}
}
func TestMaybeBinaryValue_UnmarshalJSON(t *testing.T) {
type fields struct {
WantValue string
}
type args struct {
d []byte
}
tests := []struct {
name string
fields fields
args args
wantErr bool
}{{
name: "with string input",
fields: fields{
WantValue: "xo",
},
args: args{d: []byte(`"xo"`)},
wantErr: false,
}, {
name: "with nil input",
fields: fields{
WantValue: "",
},
args: args{d: nil},
wantErr: true,
}, {
name: "with missing/invalid format",
fields: fields{
WantValue: "",
},
args: args{d: []byte(`{"format": "foo"}`)},
wantErr: true,
}, {
name: "with missing data",
fields: fields{
WantValue: "",
},
args: args{d: []byte(`{"format": "base64"}`)},
wantErr: true,
}, {
name: "with invalid base64 data",
fields: fields{
WantValue: "",
},
args: args{d: []byte(`{"format": "base64", "data": "x"}`)},
wantErr: true,
}, {
name: "with valid base64 data",
fields: fields{
WantValue: string(binaryInput),
},
args: args{d: encodedBinaryInput},
wantErr: false,
}}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
hb := &archival.MaybeBinaryValue{}
if err := hb.UnmarshalJSON(tt.args.d); (err != nil) != tt.wantErr {
t.Errorf("MaybeBinaryValue.UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr)
}
if d := cmp.Diff(tt.fields.WantValue, hb.Value); d != "" {
t.Error(d)
}
})
}
}
func TestHTTPHeader_MarshalJSON(t *testing.T) {
type fields struct {
Key string
Value archival.MaybeBinaryValue
}
tests := []struct {
name string
fields fields
want []byte
wantErr bool
}{{
name: "with string value",
fields: fields{
Key: "Content-Type",
Value: archival.MaybeBinaryValue{
Value: "text/plain",
},
},
want: []byte(`["Content-Type","text/plain"]`),
wantErr: false,
}, {
name: "with binary value",
fields: fields{
Key: "Content-Type",
Value: archival.MaybeBinaryValue{
Value: string(binaryInput),
},
},
want: []byte(`["Content-Type",` + string(encodedBinaryInput) + `]`),
wantErr: false,
}}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
hh := archival.HTTPHeader{
Key: tt.fields.Key,
Value: tt.fields.Value,
}
got, err := hh.MarshalJSON()
if (err != nil) != tt.wantErr {
t.Errorf("HTTPHeader.MarshalJSON() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(got, tt.want) {
t.Error(cmp.Diff(got, tt.want))
}
})
}
}
func TestHTTPHeader_UnmarshalJSON(t *testing.T) {
type fields struct {
WantKey string
WantValue archival.MaybeBinaryValue
}
type args struct {
d []byte
}
tests := []struct {
name string
fields fields
args args
wantErr bool
}{{
name: "with invalid input",
fields: fields{
WantKey: "",
WantValue: archival.MaybeBinaryValue{
Value: "",
},
},
args: args{
d: []byte(`{}`),
},
wantErr: true,
}, {
name: "with unexpected number of items",
fields: fields{
WantKey: "",
WantValue: archival.MaybeBinaryValue{
Value: "",
},
},
args: args{
d: []byte(`[]`),
},
wantErr: true,
}, {
name: "with first item not being a string",
fields: fields{
WantKey: "",
WantValue: archival.MaybeBinaryValue{
Value: "",
},
},
args: args{
d: []byte(`[0,0]`),
},
wantErr: true,
}, {
name: "with both items being a string",
fields: fields{
WantKey: "x",
WantValue: archival.MaybeBinaryValue{
Value: "y",
},
},
args: args{
d: []byte(`["x","y"]`),
},
wantErr: false,
}, {
name: "with second item not being a map[string]interface{}",
fields: fields{
WantKey: "",
WantValue: archival.MaybeBinaryValue{
Value: "",
},
},
args: args{
d: []byte(`["x",[]]`),
},
wantErr: true,
}, {
name: "with missing format key in second item",
fields: fields{
WantKey: "",
WantValue: archival.MaybeBinaryValue{
Value: "",
},
},
args: args{
d: []byte(`["x",{}]`),
},
wantErr: true,
}, {
name: "with format value not being base64",
fields: fields{
WantKey: "",
WantValue: archival.MaybeBinaryValue{
Value: "",
},
},
args: args{
d: []byte(`["x",{"format":1}]`),
},
wantErr: true,
}, {
name: "with missing data field",
fields: fields{
WantKey: "",
WantValue: archival.MaybeBinaryValue{
Value: "",
},
},
args: args{
d: []byte(`["x",{"format":"base64"}]`),
},
wantErr: true,
}, {
name: "with data not being a string",
fields: fields{
WantKey: "",
WantValue: archival.MaybeBinaryValue{
Value: "",
},
},
args: args{
d: []byte(`["x",{"format":"base64","data":1}]`),
},
wantErr: true,
}, {
name: "with data not being base64",
fields: fields{
WantKey: "",
WantValue: archival.MaybeBinaryValue{
Value: "",
},
},
args: args{
d: []byte(`["x",{"format":"base64","data":"xx"}]`),
},
wantErr: true,
}, {
name: "with correctly encoded base64 data",
fields: fields{
WantKey: "x",
WantValue: archival.MaybeBinaryValue{
Value: string(binaryInput),
},
},
args: args{
d: []byte(`["x",` + string(encodedBinaryInput) + `]`),
},
wantErr: false,
}}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
hh := &archival.HTTPHeader{}
if err := hh.UnmarshalJSON(tt.args.d); (err != nil) != tt.wantErr {
t.Errorf("HTTPHeader.UnmarshalJSON() error = %v, wantErr %v", err, tt.wantErr)
}
expect := &archival.HTTPHeader{
Key: tt.fields.WantKey,
Value: tt.fields.WantValue,
}
if d := cmp.Diff(hh, expect); d != "" {
t.Error(d)
}
})
}
}
func TestNewFailure(t *testing.T) {
type args struct {
err error