83440cf110
The legacy part for now is internal/errorsx. It will stay there until I figure out whether it also needs some extra bug fixing. The good part is now in internal/netxlite/errorsx and contains all the logic for mapping errors. We need to further improve upon this logic by writing more thorough integration tests for QUIC. We also need to copy the various dialer, conn, etc adapters that set errors. We will put them inside netxlite and we will generate errors in a way that is less crazy with respect to the major operation. (The idea is to always wrap, given that now we measure in an incremental way and we don't measure every operation together.) Part of https://github.com/ooni/probe/issues/1591
575 lines
17 KiB
Go
575 lines
17 KiB
Go
// Package archival contains data formats used for archival.
|
|
//
|
|
// See https://github.com/ooni/spec.
|
|
package archival
|
|
|
|
import (
|
|
"crypto/x509"
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"errors"
|
|
"net"
|
|
"net/http"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
"unicode/utf8"
|
|
|
|
"github.com/ooni/probe-cli/v3/internal/engine/geolocate"
|
|
"github.com/ooni/probe-cli/v3/internal/engine/model"
|
|
"github.com/ooni/probe-cli/v3/internal/engine/netx/trace"
|
|
errorsxlegacy "github.com/ooni/probe-cli/v3/internal/errorsx"
|
|
"github.com/ooni/probe-cli/v3/internal/netxlite/errorsx"
|
|
)
|
|
|
|
// ExtSpec describes a data format extension
|
|
type ExtSpec struct {
|
|
Name string // extension name
|
|
V int64 // extension version
|
|
}
|
|
|
|
// AddTo adds the current ExtSpec to the specified measurement
|
|
func (spec ExtSpec) AddTo(m *model.Measurement) {
|
|
if m.Extensions == nil {
|
|
m.Extensions = make(map[string]int64)
|
|
}
|
|
m.Extensions[spec.Name] = spec.V
|
|
}
|
|
|
|
var (
|
|
// ExtDNS is the version of df-002-dnst.md
|
|
ExtDNS = ExtSpec{Name: "dnst", V: 0}
|
|
|
|
// ExtNetevents is the version of df-008-netevents.md
|
|
ExtNetevents = ExtSpec{Name: "netevents", V: 0}
|
|
|
|
// ExtHTTP is the version of df-001-httpt.md
|
|
ExtHTTP = ExtSpec{Name: "httpt", V: 0}
|
|
|
|
// ExtTCPConnect is the version of df-005-tcpconnect.md
|
|
ExtTCPConnect = ExtSpec{Name: "tcpconnect", V: 0}
|
|
|
|
// ExtTLSHandshake is the version of df-006-tlshandshake.md
|
|
ExtTLSHandshake = ExtSpec{Name: "tlshandshake", V: 0}
|
|
|
|
// ExtTunnel is the version of df-009-tunnel.md
|
|
ExtTunnel = ExtSpec{Name: "tunnel", V: 0}
|
|
)
|
|
|
|
// TCPConnectStatus contains the TCP connect status.
|
|
//
|
|
// The Blocked field breaks the separation between measurement and analysis
|
|
// we have been enforcing for quite some time now. It is a legacy from the
|
|
// Web Connectivity experiment and it should be here because of that.
|
|
type TCPConnectStatus struct {
|
|
Blocked *bool `json:"blocked,omitempty"` // Web Connectivity only
|
|
Failure *string `json:"failure"`
|
|
Success bool `json:"success"`
|
|
}
|
|
|
|
// TCPConnectEntry contains one of the entries that are part
|
|
// of the "tcp_connect" key of a OONI report.
|
|
type TCPConnectEntry struct {
|
|
IP string `json:"ip"`
|
|
Port int `json:"port"`
|
|
Status TCPConnectStatus `json:"status"`
|
|
T float64 `json:"t"`
|
|
}
|
|
|
|
// NewTCPConnectList creates a new TCPConnectList
|
|
func NewTCPConnectList(begin time.Time, events []trace.Event) []TCPConnectEntry {
|
|
var out []TCPConnectEntry
|
|
for _, event := range events {
|
|
if event.Name != errorsx.ConnectOperation {
|
|
continue
|
|
}
|
|
if event.Proto != "tcp" {
|
|
continue
|
|
}
|
|
// We assume Go is passing us legit data structures
|
|
ip, sport, _ := net.SplitHostPort(event.Address)
|
|
iport, _ := strconv.Atoi(sport)
|
|
out = append(out, TCPConnectEntry{
|
|
IP: ip,
|
|
Port: iport,
|
|
Status: TCPConnectStatus{
|
|
Failure: NewFailure(event.Err),
|
|
Success: event.Err == nil,
|
|
},
|
|
T: event.Time.Sub(begin).Seconds(),
|
|
})
|
|
}
|
|
return out
|
|
}
|
|
|
|
// NewFailure creates a failure nullable string from the given error
|
|
func NewFailure(err error) *string {
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
// The following code guarantees that the error is always wrapped even
|
|
// when we could not actually hit our code that does the wrapping. A case
|
|
// in which this happen is with context deadline for HTTP.
|
|
err = errorsxlegacy.SafeErrWrapperBuilder{
|
|
Error: err,
|
|
Operation: errorsx.TopLevelOperation,
|
|
}.MaybeBuild()
|
|
errWrapper := err.(*errorsx.ErrWrapper)
|
|
s := errWrapper.Failure
|
|
if s == "" {
|
|
s = "unknown_failure: errWrapper.Failure is empty"
|
|
}
|
|
return &s
|
|
}
|
|
|
|
// NewFailedOperation creates a failed operation string from the given error.
|
|
func NewFailedOperation(err error) *string {
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
var (
|
|
errWrapper *errorsx.ErrWrapper
|
|
s = errorsx.UnknownOperation
|
|
)
|
|
if errors.As(err, &errWrapper) && errWrapper.Operation != "" {
|
|
s = errWrapper.Operation
|
|
}
|
|
return &s
|
|
}
|
|
|
|
// HTTPTor contains Tor information
|
|
type HTTPTor struct {
|
|
ExitIP *string `json:"exit_ip"`
|
|
ExitName *string `json:"exit_name"`
|
|
IsTor bool `json:"is_tor"`
|
|
}
|
|
|
|
// MaybeBinaryValue is a possibly binary string. We use this helper class
|
|
// to define a custom JSON encoder that allows us to choose the proper
|
|
// representation depending on whether the Value field is valid UTF-8 or not.
|
|
type MaybeBinaryValue struct {
|
|
Value string
|
|
}
|
|
|
|
// MarshalJSON marshals a string-like to JSON following the OONI spec that
|
|
// says that UTF-8 content is represened as string and non-UTF-8 content is
|
|
// instead represented using `{"format":"base64","data":"..."}`.
|
|
func (hb MaybeBinaryValue) MarshalJSON() ([]byte, error) {
|
|
if utf8.ValidString(hb.Value) {
|
|
return json.Marshal(hb.Value)
|
|
}
|
|
er := make(map[string]string)
|
|
er["format"] = "base64"
|
|
er["data"] = base64.StdEncoding.EncodeToString([]byte(hb.Value))
|
|
return json.Marshal(er)
|
|
}
|
|
|
|
// UnmarshalJSON is the opposite of MarshalJSON.
|
|
func (hb *MaybeBinaryValue) UnmarshalJSON(d []byte) error {
|
|
if err := json.Unmarshal(d, &hb.Value); err == nil {
|
|
return nil
|
|
}
|
|
er := make(map[string]string)
|
|
if err := json.Unmarshal(d, &er); err != nil {
|
|
return err
|
|
}
|
|
if v, ok := er["format"]; !ok || v != "base64" {
|
|
return errors.New("missing or invalid format field")
|
|
}
|
|
if _, ok := er["data"]; !ok {
|
|
return errors.New("missing data field")
|
|
}
|
|
b64, err := base64.StdEncoding.DecodeString(er["data"])
|
|
if err != nil {
|
|
return err
|
|
}
|
|
hb.Value = string(b64)
|
|
return nil
|
|
}
|
|
|
|
// HTTPBody is an HTTP body. As an implementation note, this type must be
|
|
// an alias for the MaybeBinaryValue type, otherwise the specific serialisation
|
|
// mechanism implemented by MaybeBinaryValue is not working.
|
|
type HTTPBody = MaybeBinaryValue
|
|
|
|
// HTTPHeader is a single HTTP header.
|
|
type HTTPHeader struct {
|
|
Key string
|
|
Value MaybeBinaryValue
|
|
}
|
|
|
|
// MarshalJSON marshals a single HTTP header to a tuple where the first
|
|
// element is a string and the second element is maybe-binary data.
|
|
func (hh HTTPHeader) MarshalJSON() ([]byte, error) {
|
|
if utf8.ValidString(hh.Value.Value) {
|
|
return json.Marshal([]string{hh.Key, hh.Value.Value})
|
|
}
|
|
value := make(map[string]string)
|
|
value["format"] = "base64"
|
|
value["data"] = base64.StdEncoding.EncodeToString([]byte(hh.Value.Value))
|
|
return json.Marshal([]interface{}{hh.Key, value})
|
|
}
|
|
|
|
// UnmarshalJSON is the opposite of MarshalJSON.
|
|
func (hh *HTTPHeader) UnmarshalJSON(d []byte) error {
|
|
var pair []interface{}
|
|
if err := json.Unmarshal(d, &pair); err != nil {
|
|
return err
|
|
}
|
|
if len(pair) != 2 {
|
|
return errors.New("unexpected pair length")
|
|
}
|
|
key, ok := pair[0].(string)
|
|
if !ok {
|
|
return errors.New("the key is not a string")
|
|
}
|
|
value, ok := pair[1].(string)
|
|
if !ok {
|
|
mapvalue, ok := pair[1].(map[string]interface{})
|
|
if !ok {
|
|
return errors.New("the value is neither a string nor a map[string]interface{}")
|
|
}
|
|
if _, ok := mapvalue["format"]; !ok {
|
|
return errors.New("missing format")
|
|
}
|
|
if v, ok := mapvalue["format"].(string); !ok || v != "base64" {
|
|
return errors.New("invalid format")
|
|
}
|
|
if _, ok := mapvalue["data"]; !ok {
|
|
return errors.New("missing data field")
|
|
}
|
|
v, ok := mapvalue["data"].(string)
|
|
if !ok {
|
|
return errors.New("the data field is not a string")
|
|
}
|
|
b64, err := base64.StdEncoding.DecodeString(v)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
value = string(b64)
|
|
}
|
|
hh.Key, hh.Value = key, MaybeBinaryValue{Value: value}
|
|
return nil
|
|
}
|
|
|
|
// HTTPRequest contains an HTTP request.
|
|
//
|
|
// Headers are a map in Web Connectivity data format but
|
|
// we have added support for a list since January 2020.
|
|
type HTTPRequest struct {
|
|
Body HTTPBody `json:"body"`
|
|
BodyIsTruncated bool `json:"body_is_truncated"`
|
|
HeadersList []HTTPHeader `json:"headers_list"`
|
|
Headers map[string]MaybeBinaryValue `json:"headers"`
|
|
Method string `json:"method"`
|
|
Tor HTTPTor `json:"tor"`
|
|
Transport string `json:"x_transport"`
|
|
URL string `json:"url"`
|
|
}
|
|
|
|
// HTTPResponse contains an HTTP response.
|
|
//
|
|
// Headers are a map in Web Connectivity data format but
|
|
// we have added support for a list since January 2020.
|
|
type HTTPResponse struct {
|
|
Body HTTPBody `json:"body"`
|
|
BodyIsTruncated bool `json:"body_is_truncated"`
|
|
Code int64 `json:"code"`
|
|
HeadersList []HTTPHeader `json:"headers_list"`
|
|
Headers map[string]MaybeBinaryValue `json:"headers"`
|
|
|
|
// The following fields are not serialised but are useful to simplify
|
|
// analysing the measurements in telegram, whatsapp, etc.
|
|
Locations []string `json:"-"`
|
|
}
|
|
|
|
// RequestEntry is one of the entries that are part of
|
|
// the "requests" key of a OONI report.
|
|
type RequestEntry struct {
|
|
Failure *string `json:"failure"`
|
|
Request HTTPRequest `json:"request"`
|
|
Response HTTPResponse `json:"response"`
|
|
T float64 `json:"t"`
|
|
}
|
|
|
|
func addheaders(
|
|
source http.Header,
|
|
destList *[]HTTPHeader,
|
|
destMap *map[string]MaybeBinaryValue,
|
|
) {
|
|
for key, values := range source {
|
|
for index, value := range values {
|
|
value := MaybeBinaryValue{Value: value}
|
|
// With the map representation we can only represent a single
|
|
// value for every key. Hence the list representation.
|
|
if index == 0 {
|
|
(*destMap)[key] = value
|
|
}
|
|
*destList = append(*destList, HTTPHeader{
|
|
Key: key,
|
|
Value: value,
|
|
})
|
|
}
|
|
}
|
|
sort.Slice(*destList, func(i, j int) bool {
|
|
return (*destList)[i].Key < (*destList)[j].Key
|
|
})
|
|
}
|
|
|
|
// NewRequestList returns the list for "requests"
|
|
func NewRequestList(begin time.Time, events []trace.Event) []RequestEntry {
|
|
// OONI wants the last request to appear first
|
|
var out []RequestEntry
|
|
tmp := newRequestList(begin, events)
|
|
for i := len(tmp) - 1; i >= 0; i-- {
|
|
out = append(out, tmp[i])
|
|
}
|
|
return out
|
|
}
|
|
|
|
func newRequestList(begin time.Time, events []trace.Event) []RequestEntry {
|
|
var (
|
|
out []RequestEntry
|
|
entry RequestEntry
|
|
)
|
|
for _, ev := range events {
|
|
switch ev.Name {
|
|
case "http_transaction_start":
|
|
entry = RequestEntry{}
|
|
entry.T = ev.Time.Sub(begin).Seconds()
|
|
case "http_request_body_snapshot":
|
|
entry.Request.Body.Value = string(ev.Data)
|
|
entry.Request.BodyIsTruncated = ev.DataIsTruncated
|
|
case "http_request_metadata":
|
|
entry.Request.Headers = make(map[string]MaybeBinaryValue)
|
|
addheaders(
|
|
ev.HTTPHeaders, &entry.Request.HeadersList, &entry.Request.Headers)
|
|
entry.Request.Method = ev.HTTPMethod
|
|
entry.Request.URL = ev.HTTPURL
|
|
entry.Request.Transport = ev.Transport
|
|
case "http_response_metadata":
|
|
entry.Response.Headers = make(map[string]MaybeBinaryValue)
|
|
addheaders(
|
|
ev.HTTPHeaders, &entry.Response.HeadersList, &entry.Response.Headers)
|
|
entry.Response.Code = int64(ev.HTTPStatusCode)
|
|
entry.Response.Locations = ev.HTTPHeaders.Values("Location")
|
|
case "http_response_body_snapshot":
|
|
entry.Response.Body.Value = string(ev.Data)
|
|
entry.Response.BodyIsTruncated = ev.DataIsTruncated
|
|
case "http_transaction_done":
|
|
entry.Failure = NewFailure(ev.Err)
|
|
out = append(out, entry)
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// DNSAnswerEntry is the answer to a DNS query
|
|
type DNSAnswerEntry struct {
|
|
ASN int64 `json:"asn,omitempty"`
|
|
ASOrgName string `json:"as_org_name,omitempty"`
|
|
AnswerType string `json:"answer_type"`
|
|
Hostname string `json:"hostname,omitempty"`
|
|
IPv4 string `json:"ipv4,omitempty"`
|
|
IPv6 string `json:"ipv6,omitempty"`
|
|
TTL *uint32 `json:"ttl"`
|
|
}
|
|
|
|
// DNSQueryEntry is a DNS query with possibly an answer
|
|
type DNSQueryEntry struct {
|
|
Answers []DNSAnswerEntry `json:"answers"`
|
|
Engine string `json:"engine"`
|
|
Failure *string `json:"failure"`
|
|
Hostname string `json:"hostname"`
|
|
QueryType string `json:"query_type"`
|
|
ResolverHostname *string `json:"resolver_hostname"`
|
|
ResolverPort *string `json:"resolver_port"`
|
|
ResolverAddress string `json:"resolver_address"`
|
|
T float64 `json:"t"`
|
|
}
|
|
|
|
type dnsQueryType string
|
|
|
|
// NewDNSQueriesList returns a list of DNS queries.
|
|
func NewDNSQueriesList(begin time.Time, events []trace.Event) []DNSQueryEntry {
|
|
// TODO(bassosimone): add support for CNAME lookups.
|
|
var out []DNSQueryEntry
|
|
for _, ev := range events {
|
|
if ev.Name != "resolve_done" {
|
|
continue
|
|
}
|
|
for _, qtype := range []dnsQueryType{"A", "AAAA"} {
|
|
entry := qtype.makequeryentry(begin, ev)
|
|
for _, addr := range ev.Addresses {
|
|
if qtype.ipoftype(addr) {
|
|
entry.Answers = append(
|
|
entry.Answers, qtype.makeanswerentry(addr))
|
|
}
|
|
}
|
|
if len(entry.Answers) <= 0 && ev.Err == nil {
|
|
// This allows us to skip cases where the server does not have
|
|
// an IPv6 address but has an IPv4 address. Instead, when we
|
|
// receive an error, we want to track its existence. The main
|
|
// issue here is that we are cheating, because we are creating
|
|
// entries representing queries, but we don't know what the
|
|
// resolver actually did, especially the system resolver. So,
|
|
// this output is just our best guess.
|
|
continue
|
|
}
|
|
out = append(out, entry)
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
func (qtype dnsQueryType) ipoftype(addr string) bool {
|
|
switch qtype {
|
|
case "A":
|
|
return !strings.Contains(addr, ":")
|
|
case "AAAA":
|
|
return strings.Contains(addr, ":")
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (qtype dnsQueryType) makeanswerentry(addr string) DNSAnswerEntry {
|
|
answer := DNSAnswerEntry{AnswerType: string(qtype)}
|
|
asn, org, _ := geolocate.LookupASN(addr)
|
|
answer.ASN = int64(asn)
|
|
answer.ASOrgName = org
|
|
switch qtype {
|
|
case "A":
|
|
answer.IPv4 = addr
|
|
case "AAAA":
|
|
answer.IPv6 = addr
|
|
}
|
|
return answer
|
|
}
|
|
|
|
func (qtype dnsQueryType) makequeryentry(begin time.Time, ev trace.Event) DNSQueryEntry {
|
|
return DNSQueryEntry{
|
|
Engine: ev.Proto,
|
|
Failure: NewFailure(ev.Err),
|
|
Hostname: ev.Hostname,
|
|
QueryType: string(qtype),
|
|
ResolverAddress: ev.Address,
|
|
T: ev.Time.Sub(begin).Seconds(),
|
|
}
|
|
}
|
|
|
|
// NetworkEvent is a network event. It contains all the possible fields
|
|
// and most fields are optional. They are only added when it makes sense
|
|
// for them to be there _and_ we have data to show.
|
|
type NetworkEvent struct {
|
|
Address string `json:"address,omitempty"`
|
|
Failure *string `json:"failure"`
|
|
NumBytes int64 `json:"num_bytes,omitempty"`
|
|
Operation string `json:"operation"`
|
|
Proto string `json:"proto,omitempty"`
|
|
T float64 `json:"t"`
|
|
Tags []string `json:"tags,omitempty"`
|
|
}
|
|
|
|
// NewNetworkEventsList returns a list of DNS queries.
|
|
func NewNetworkEventsList(begin time.Time, events []trace.Event) []NetworkEvent {
|
|
var out []NetworkEvent
|
|
for _, ev := range events {
|
|
if ev.Name == errorsx.ConnectOperation {
|
|
out = append(out, NetworkEvent{
|
|
Address: ev.Address,
|
|
Failure: NewFailure(ev.Err),
|
|
Operation: ev.Name,
|
|
Proto: ev.Proto,
|
|
T: ev.Time.Sub(begin).Seconds(),
|
|
})
|
|
continue
|
|
}
|
|
if ev.Name == errorsx.ReadOperation {
|
|
out = append(out, NetworkEvent{
|
|
Failure: NewFailure(ev.Err),
|
|
Operation: ev.Name,
|
|
NumBytes: int64(ev.NumBytes),
|
|
T: ev.Time.Sub(begin).Seconds(),
|
|
})
|
|
continue
|
|
}
|
|
if ev.Name == errorsx.WriteOperation {
|
|
out = append(out, NetworkEvent{
|
|
Failure: NewFailure(ev.Err),
|
|
Operation: ev.Name,
|
|
NumBytes: int64(ev.NumBytes),
|
|
T: ev.Time.Sub(begin).Seconds(),
|
|
})
|
|
continue
|
|
}
|
|
if ev.Name == errorsx.ReadFromOperation {
|
|
out = append(out, NetworkEvent{
|
|
Address: ev.Address,
|
|
Failure: NewFailure(ev.Err),
|
|
Operation: ev.Name,
|
|
NumBytes: int64(ev.NumBytes),
|
|
T: ev.Time.Sub(begin).Seconds(),
|
|
})
|
|
continue
|
|
}
|
|
if ev.Name == errorsx.WriteToOperation {
|
|
out = append(out, NetworkEvent{
|
|
Address: ev.Address,
|
|
Failure: NewFailure(ev.Err),
|
|
Operation: ev.Name,
|
|
NumBytes: int64(ev.NumBytes),
|
|
T: ev.Time.Sub(begin).Seconds(),
|
|
})
|
|
continue
|
|
}
|
|
out = append(out, NetworkEvent{
|
|
Failure: NewFailure(ev.Err),
|
|
Operation: ev.Name,
|
|
T: ev.Time.Sub(begin).Seconds(),
|
|
})
|
|
}
|
|
return out
|
|
}
|
|
|
|
// TLSHandshake contains TLS handshake data
|
|
type TLSHandshake struct {
|
|
CipherSuite string `json:"cipher_suite"`
|
|
Failure *string `json:"failure"`
|
|
NegotiatedProtocol string `json:"negotiated_protocol"`
|
|
NoTLSVerify bool `json:"no_tls_verify"`
|
|
PeerCertificates []MaybeBinaryValue `json:"peer_certificates"`
|
|
ServerName string `json:"server_name"`
|
|
T float64 `json:"t"`
|
|
Tags []string `json:"tags"`
|
|
TLSVersion string `json:"tls_version"`
|
|
}
|
|
|
|
// NewTLSHandshakesList creates a new TLSHandshakesList
|
|
func NewTLSHandshakesList(begin time.Time, events []trace.Event) []TLSHandshake {
|
|
var out []TLSHandshake
|
|
for _, ev := range events {
|
|
if !strings.Contains(ev.Name, "_handshake_done") {
|
|
continue
|
|
}
|
|
out = append(out, TLSHandshake{
|
|
CipherSuite: ev.TLSCipherSuite,
|
|
Failure: NewFailure(ev.Err),
|
|
NegotiatedProtocol: ev.TLSNegotiatedProto,
|
|
NoTLSVerify: ev.NoTLSVerify,
|
|
PeerCertificates: makePeerCerts(ev.TLSPeerCerts),
|
|
ServerName: ev.TLSServerName,
|
|
T: ev.Time.Sub(begin).Seconds(),
|
|
TLSVersion: ev.TLSVersion,
|
|
})
|
|
}
|
|
return out
|
|
}
|
|
|
|
func makePeerCerts(in []*x509.Certificate) (out []MaybeBinaryValue) {
|
|
for _, e := range in {
|
|
out = append(out, MaybeBinaryValue{Value: string(e.Raw)})
|
|
}
|
|
return
|
|
}
|