2022-05-31 21:53:01 +02:00
|
|
|
package tracex
|
2021-02-02 12:05:47 +01:00
|
|
|
|
2022-06-01 23:15:47 +02:00
|
|
|
//
|
|
|
|
// Code to generate the OONI archival data format from events
|
|
|
|
//
|
|
|
|
|
2021-02-02 12:05:47 +01:00
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
"net"
|
|
|
|
"net/http"
|
|
|
|
"sort"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/ooni/probe-cli/v3/internal/engine/geolocate"
|
2022-01-03 13:53:23 +01:00
|
|
|
"github.com/ooni/probe-cli/v3/internal/model"
|
2021-09-28 12:42:01 +02:00
|
|
|
"github.com/ooni/probe-cli/v3/internal/netxlite"
|
2021-02-02 12:05:47 +01:00
|
|
|
)
|
|
|
|
|
2022-06-01 07:44:54 +02:00
|
|
|
// Compatibility types. Most experiments still use these names.
|
2022-01-10 11:25:52 +01:00
|
|
|
type (
|
|
|
|
ExtSpec = model.ArchivalExtSpec
|
|
|
|
TCPConnectEntry = model.ArchivalTCPConnectResult
|
|
|
|
TCPConnectStatus = model.ArchivalTCPConnectStatus
|
|
|
|
MaybeBinaryValue = model.ArchivalMaybeBinaryData
|
|
|
|
DNSQueryEntry = model.ArchivalDNSLookupResult
|
|
|
|
DNSAnswerEntry = model.ArchivalDNSAnswer
|
|
|
|
TLSHandshake = model.ArchivalTLSOrQUICHandshakeResult
|
|
|
|
HTTPBody = model.ArchivalHTTPBody
|
|
|
|
HTTPHeader = model.ArchivalHTTPHeader
|
|
|
|
RequestEntry = model.ArchivalHTTPRequestResult
|
|
|
|
HTTPRequest = model.ArchivalHTTPRequest
|
|
|
|
HTTPResponse = model.ArchivalHTTPResponse
|
|
|
|
NetworkEvent = model.ArchivalNetworkEvent
|
|
|
|
)
|
2021-02-02 12:05:47 +01:00
|
|
|
|
2022-06-01 07:44:54 +02:00
|
|
|
// Compatibility variables. Most experiments still use these names.
|
2021-02-02 12:05:47 +01:00
|
|
|
var (
|
2022-01-10 11:25:52 +01:00
|
|
|
ExtDNS = model.ArchivalExtDNS
|
|
|
|
ExtNetevents = model.ArchivalExtNetevents
|
|
|
|
ExtHTTP = model.ArchivalExtHTTP
|
|
|
|
ExtTCPConnect = model.ArchivalExtTCPConnect
|
|
|
|
ExtTLSHandshake = model.ArchivalExtTLSHandshake
|
|
|
|
ExtTunnel = model.ArchivalExtTunnel
|
2021-02-02 12:05:47 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
// NewTCPConnectList creates a new TCPConnectList
|
2022-06-01 23:15:47 +02:00
|
|
|
func NewTCPConnectList(begin time.Time, events []Event) (out []TCPConnectEntry) {
|
2022-06-01 14:32:16 +02:00
|
|
|
for _, wrapper := range events {
|
|
|
|
if _, ok := wrapper.(*EventConnectOperation); !ok {
|
2021-02-02 12:05:47 +01:00
|
|
|
continue
|
|
|
|
}
|
2022-06-01 14:32:16 +02:00
|
|
|
event := wrapper.Value()
|
2021-02-02 12:05:47 +01:00
|
|
|
if event.Proto != "tcp" {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
// We assume Go is passing us legit data structures
|
|
|
|
ip, sport, _ := net.SplitHostPort(event.Address)
|
|
|
|
iport, _ := strconv.Atoi(sport)
|
|
|
|
out = append(out, TCPConnectEntry{
|
|
|
|
IP: ip,
|
|
|
|
Port: iport,
|
|
|
|
Status: TCPConnectStatus{
|
2022-06-01 23:15:47 +02:00
|
|
|
Blocked: nil, // only used by Web Connectivity
|
refactor(tracex): internally represent errors as strings (#786)
There are two reasons why this is beneficial:
1. github.com/google/go-cmp is more annoying to use for comparing
data structures when there are interfaces to compare. Sure, there's
a recipe for teaching it to compare errors, but how about making
the errors trivially comparable instead?
2. if we want to send errors over the network, JSON serialization
works but we cannot unmarshal the resulting string back to an error,
so how about making this representation trivial to serialize (we
are not going this now, but we need this property for websteps and
it may be sensible to try to avoid to have duplicate code because
of that -- measurex currently duplicates many tracex functionality
and this is quite unfortunate because it slows development down)
Additionally, if an error is a string:
3. we can very easily use a switch for comparing its possible
values with "" representing the absence of errors, while it is
more complex to do the same when using a nullable string or even
an error (i.e., an interface)
4. if a type is not nullable, it's easier to write safe code for
it and we may want to refactor experiments to use the internal
representation of measurements for more robust processing code
For all these reasons, let's internally use strings in tracex.
The overall aim here is to reduce the duplicated code between pre
and post-measurex measurements (see https://github.com/ooni/probe/issues/2035).
2022-06-02 10:37:07 +02:00
|
|
|
Failure: event.Err.ToFailure(),
|
|
|
|
Success: event.Err.IsNil(),
|
2021-02-02 12:05:47 +01:00
|
|
|
},
|
|
|
|
T: event.Time.Sub(begin).Seconds(),
|
|
|
|
})
|
|
|
|
}
|
2022-06-01 23:15:47 +02:00
|
|
|
return
|
2021-02-02 12:05:47 +01:00
|
|
|
}
|
|
|
|
|
refactor(tracex): internally represent errors as strings (#786)
There are two reasons why this is beneficial:
1. github.com/google/go-cmp is more annoying to use for comparing
data structures when there are interfaces to compare. Sure, there's
a recipe for teaching it to compare errors, but how about making
the errors trivially comparable instead?
2. if we want to send errors over the network, JSON serialization
works but we cannot unmarshal the resulting string back to an error,
so how about making this representation trivial to serialize (we
are not going this now, but we need this property for websteps and
it may be sensible to try to avoid to have duplicate code because
of that -- measurex currently duplicates many tracex functionality
and this is quite unfortunate because it slows development down)
Additionally, if an error is a string:
3. we can very easily use a switch for comparing its possible
values with "" representing the absence of errors, while it is
more complex to do the same when using a nullable string or even
an error (i.e., an interface)
4. if a type is not nullable, it's easier to write safe code for
it and we may want to refactor experiments to use the internal
representation of measurements for more robust processing code
For all these reasons, let's internally use strings in tracex.
The overall aim here is to reduce the duplicated code between pre
and post-measurex measurements (see https://github.com/ooni/probe/issues/2035).
2022-06-02 10:37:07 +02:00
|
|
|
// NewFailure creates a failure nullable string from the given error. This function
|
|
|
|
// is equivalent to NewFailureStr(err).ToFailure().
|
2021-02-02 12:05:47 +01:00
|
|
|
func NewFailure(err error) *string {
|
refactor(tracex): internally represent errors as strings (#786)
There are two reasons why this is beneficial:
1. github.com/google/go-cmp is more annoying to use for comparing
data structures when there are interfaces to compare. Sure, there's
a recipe for teaching it to compare errors, but how about making
the errors trivially comparable instead?
2. if we want to send errors over the network, JSON serialization
works but we cannot unmarshal the resulting string back to an error,
so how about making this representation trivial to serialize (we
are not going this now, but we need this property for websteps and
it may be sensible to try to avoid to have duplicate code because
of that -- measurex currently duplicates many tracex functionality
and this is quite unfortunate because it slows development down)
Additionally, if an error is a string:
3. we can very easily use a switch for comparing its possible
values with "" representing the absence of errors, while it is
more complex to do the same when using a nullable string or even
an error (i.e., an interface)
4. if a type is not nullable, it's easier to write safe code for
it and we may want to refactor experiments to use the internal
representation of measurements for more robust processing code
For all these reasons, let's internally use strings in tracex.
The overall aim here is to reduce the duplicated code between pre
and post-measurex measurements (see https://github.com/ooni/probe/issues/2035).
2022-06-02 10:37:07 +02:00
|
|
|
return NewFailureStr(err).ToFailure()
|
2021-02-02 12:05:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewFailedOperation creates a failed operation string from the given error.
|
|
|
|
func NewFailedOperation(err error) *string {
|
|
|
|
if err == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
var (
|
2021-09-28 12:42:01 +02:00
|
|
|
errWrapper *netxlite.ErrWrapper
|
|
|
|
s = netxlite.UnknownOperation
|
2021-02-02 12:05:47 +01:00
|
|
|
)
|
|
|
|
if errors.As(err, &errWrapper) && errWrapper.Operation != "" {
|
|
|
|
s = errWrapper.Operation
|
|
|
|
}
|
|
|
|
return &s
|
|
|
|
}
|
|
|
|
|
2022-06-01 23:15:47 +02:00
|
|
|
// httpAddHeaders adds the headers inside source into destList and destMap.
|
|
|
|
func httpAddHeaders(source http.Header, destList *[]HTTPHeader,
|
|
|
|
destMap *map[string]MaybeBinaryValue) {
|
2022-06-01 19:27:47 +02:00
|
|
|
*destList = []HTTPHeader{}
|
|
|
|
*destMap = make(map[string]model.ArchivalMaybeBinaryData)
|
2021-02-02 12:05:47 +01:00
|
|
|
for key, values := range source {
|
|
|
|
for index, value := range values {
|
|
|
|
value := MaybeBinaryValue{Value: value}
|
|
|
|
// With the map representation we can only represent a single
|
|
|
|
// value for every key. Hence the list representation.
|
|
|
|
if index == 0 {
|
|
|
|
(*destMap)[key] = value
|
|
|
|
}
|
|
|
|
*destList = append(*destList, HTTPHeader{
|
|
|
|
Key: key,
|
|
|
|
Value: value,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
2022-06-01 23:15:47 +02:00
|
|
|
// Sorting helps with unit testing (map keys are unordered)
|
2021-02-02 12:05:47 +01:00
|
|
|
sort.Slice(*destList, func(i, j int) bool {
|
|
|
|
return (*destList)[i].Key < (*destList)[j].Key
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewRequestList returns the list for "requests"
|
2022-06-01 23:15:47 +02:00
|
|
|
func NewRequestList(begin time.Time, events []Event) (out []RequestEntry) {
|
2021-02-02 12:05:47 +01:00
|
|
|
// OONI wants the last request to appear first
|
|
|
|
tmp := newRequestList(begin, events)
|
|
|
|
for i := len(tmp) - 1; i >= 0; i-- {
|
|
|
|
out = append(out, tmp[i])
|
|
|
|
}
|
2022-06-01 23:15:47 +02:00
|
|
|
return
|
2021-02-02 12:05:47 +01:00
|
|
|
}
|
|
|
|
|
2022-06-01 23:15:47 +02:00
|
|
|
func newRequestList(begin time.Time, events []Event) (out []RequestEntry) {
|
2022-06-01 14:32:16 +02:00
|
|
|
for _, wrapper := range events {
|
|
|
|
ev := wrapper.Value()
|
|
|
|
switch wrapper.(type) {
|
2022-06-01 19:27:47 +02:00
|
|
|
case *EventHTTPTransactionDone:
|
2022-06-01 23:15:47 +02:00
|
|
|
entry := RequestEntry{}
|
2021-02-02 12:05:47 +01:00
|
|
|
entry.T = ev.Time.Sub(begin).Seconds()
|
2022-06-01 07:44:54 +02:00
|
|
|
httpAddHeaders(
|
2022-06-01 15:20:28 +02:00
|
|
|
ev.HTTPRequestHeaders, &entry.Request.HeadersList, &entry.Request.Headers)
|
2021-02-02 12:05:47 +01:00
|
|
|
entry.Request.Method = ev.HTTPMethod
|
|
|
|
entry.Request.URL = ev.HTTPURL
|
|
|
|
entry.Request.Transport = ev.Transport
|
2022-06-01 07:44:54 +02:00
|
|
|
httpAddHeaders(
|
2022-06-01 15:20:28 +02:00
|
|
|
ev.HTTPResponseHeaders, &entry.Response.HeadersList, &entry.Response.Headers)
|
2021-02-02 12:05:47 +01:00
|
|
|
entry.Response.Code = int64(ev.HTTPStatusCode)
|
2022-06-01 15:20:28 +02:00
|
|
|
entry.Response.Locations = ev.HTTPResponseHeaders.Values("Location")
|
2022-06-01 19:27:47 +02:00
|
|
|
entry.Response.Body.Value = string(ev.HTTPResponseBody)
|
|
|
|
entry.Response.BodyIsTruncated = ev.HTTPResponseBodyIsTruncated
|
refactor(tracex): internally represent errors as strings (#786)
There are two reasons why this is beneficial:
1. github.com/google/go-cmp is more annoying to use for comparing
data structures when there are interfaces to compare. Sure, there's
a recipe for teaching it to compare errors, but how about making
the errors trivially comparable instead?
2. if we want to send errors over the network, JSON serialization
works but we cannot unmarshal the resulting string back to an error,
so how about making this representation trivial to serialize (we
are not going this now, but we need this property for websteps and
it may be sensible to try to avoid to have duplicate code because
of that -- measurex currently duplicates many tracex functionality
and this is quite unfortunate because it slows development down)
Additionally, if an error is a string:
3. we can very easily use a switch for comparing its possible
values with "" representing the absence of errors, while it is
more complex to do the same when using a nullable string or even
an error (i.e., an interface)
4. if a type is not nullable, it's easier to write safe code for
it and we may want to refactor experiments to use the internal
representation of measurements for more robust processing code
For all these reasons, let's internally use strings in tracex.
The overall aim here is to reduce the duplicated code between pre
and post-measurex measurements (see https://github.com/ooni/probe/issues/2035).
2022-06-02 10:37:07 +02:00
|
|
|
entry.Failure = ev.Err.ToFailure()
|
2021-02-02 12:05:47 +01:00
|
|
|
out = append(out, entry)
|
|
|
|
}
|
|
|
|
}
|
2022-06-01 23:15:47 +02:00
|
|
|
return
|
2021-02-02 12:05:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
type dnsQueryType string
|
|
|
|
|
|
|
|
// NewDNSQueriesList returns a list of DNS queries.
|
2022-06-01 23:15:47 +02:00
|
|
|
func NewDNSQueriesList(begin time.Time, events []Event) (out []DNSQueryEntry) {
|
2021-02-02 12:05:47 +01:00
|
|
|
// TODO(bassosimone): add support for CNAME lookups.
|
2022-06-01 14:32:16 +02:00
|
|
|
for _, wrapper := range events {
|
|
|
|
if _, ok := wrapper.(*EventResolveDone); !ok {
|
2021-02-02 12:05:47 +01:00
|
|
|
continue
|
|
|
|
}
|
2022-06-01 14:32:16 +02:00
|
|
|
ev := wrapper.Value()
|
2021-02-02 12:05:47 +01:00
|
|
|
for _, qtype := range []dnsQueryType{"A", "AAAA"} {
|
2022-06-01 07:44:54 +02:00
|
|
|
entry := qtype.makeQueryEntry(begin, ev)
|
2021-02-02 12:05:47 +01:00
|
|
|
for _, addr := range ev.Addresses {
|
2022-06-01 07:44:54 +02:00
|
|
|
if qtype.ipOfType(addr) {
|
2021-02-02 12:05:47 +01:00
|
|
|
entry.Answers = append(
|
2022-06-01 07:44:54 +02:00
|
|
|
entry.Answers, qtype.makeAnswerEntry(addr))
|
2021-02-02 12:05:47 +01:00
|
|
|
}
|
|
|
|
}
|
refactor(tracex): internally represent errors as strings (#786)
There are two reasons why this is beneficial:
1. github.com/google/go-cmp is more annoying to use for comparing
data structures when there are interfaces to compare. Sure, there's
a recipe for teaching it to compare errors, but how about making
the errors trivially comparable instead?
2. if we want to send errors over the network, JSON serialization
works but we cannot unmarshal the resulting string back to an error,
so how about making this representation trivial to serialize (we
are not going this now, but we need this property for websteps and
it may be sensible to try to avoid to have duplicate code because
of that -- measurex currently duplicates many tracex functionality
and this is quite unfortunate because it slows development down)
Additionally, if an error is a string:
3. we can very easily use a switch for comparing its possible
values with "" representing the absence of errors, while it is
more complex to do the same when using a nullable string or even
an error (i.e., an interface)
4. if a type is not nullable, it's easier to write safe code for
it and we may want to refactor experiments to use the internal
representation of measurements for more robust processing code
For all these reasons, let's internally use strings in tracex.
The overall aim here is to reduce the duplicated code between pre
and post-measurex measurements (see https://github.com/ooni/probe/issues/2035).
2022-06-02 10:37:07 +02:00
|
|
|
if len(entry.Answers) <= 0 && ev.Err.IsNil() {
|
2021-02-02 12:05:47 +01:00
|
|
|
// This allows us to skip cases where the server does not have
|
|
|
|
// an IPv6 address but has an IPv4 address. Instead, when we
|
|
|
|
// receive an error, we want to track its existence. The main
|
|
|
|
// issue here is that we are cheating, because we are creating
|
|
|
|
// entries representing queries, but we don't know what the
|
|
|
|
// resolver actually did, especially the system resolver. So,
|
|
|
|
// this output is just our best guess.
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
out = append(out, entry)
|
|
|
|
}
|
|
|
|
}
|
2022-06-01 23:15:47 +02:00
|
|
|
return
|
2021-02-02 12:05:47 +01:00
|
|
|
}
|
|
|
|
|
2022-06-01 07:44:54 +02:00
|
|
|
func (qtype dnsQueryType) ipOfType(addr string) bool {
|
2021-02-02 12:05:47 +01:00
|
|
|
switch qtype {
|
|
|
|
case "A":
|
2021-04-01 16:57:31 +02:00
|
|
|
return !strings.Contains(addr, ":")
|
2021-02-02 12:05:47 +01:00
|
|
|
case "AAAA":
|
2021-04-01 16:57:31 +02:00
|
|
|
return strings.Contains(addr, ":")
|
2021-02-02 12:05:47 +01:00
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2022-06-01 07:44:54 +02:00
|
|
|
func (qtype dnsQueryType) makeAnswerEntry(addr string) DNSAnswerEntry {
|
2021-02-02 12:05:47 +01:00
|
|
|
answer := DNSAnswerEntry{AnswerType: string(qtype)}
|
2022-06-01 23:15:47 +02:00
|
|
|
// Figuring out the ASN and the org here is not just a service to whoever
|
|
|
|
// is reading a JSON: Web Connectivity also depends on it!
|
2021-04-01 16:57:31 +02:00
|
|
|
asn, org, _ := geolocate.LookupASN(addr)
|
2021-02-02 12:05:47 +01:00
|
|
|
answer.ASN = int64(asn)
|
|
|
|
answer.ASOrgName = org
|
|
|
|
switch qtype {
|
|
|
|
case "A":
|
|
|
|
answer.IPv4 = addr
|
|
|
|
case "AAAA":
|
|
|
|
answer.IPv6 = addr
|
|
|
|
}
|
|
|
|
return answer
|
|
|
|
}
|
|
|
|
|
2022-06-01 14:32:16 +02:00
|
|
|
func (qtype dnsQueryType) makeQueryEntry(begin time.Time, ev *EventValue) DNSQueryEntry {
|
2021-02-02 12:05:47 +01:00
|
|
|
return DNSQueryEntry{
|
|
|
|
Engine: ev.Proto,
|
refactor(tracex): internally represent errors as strings (#786)
There are two reasons why this is beneficial:
1. github.com/google/go-cmp is more annoying to use for comparing
data structures when there are interfaces to compare. Sure, there's
a recipe for teaching it to compare errors, but how about making
the errors trivially comparable instead?
2. if we want to send errors over the network, JSON serialization
works but we cannot unmarshal the resulting string back to an error,
so how about making this representation trivial to serialize (we
are not going this now, but we need this property for websteps and
it may be sensible to try to avoid to have duplicate code because
of that -- measurex currently duplicates many tracex functionality
and this is quite unfortunate because it slows development down)
Additionally, if an error is a string:
3. we can very easily use a switch for comparing its possible
values with "" representing the absence of errors, while it is
more complex to do the same when using a nullable string or even
an error (i.e., an interface)
4. if a type is not nullable, it's easier to write safe code for
it and we may want to refactor experiments to use the internal
representation of measurements for more robust processing code
For all these reasons, let's internally use strings in tracex.
The overall aim here is to reduce the duplicated code between pre
and post-measurex measurements (see https://github.com/ooni/probe/issues/2035).
2022-06-02 10:37:07 +02:00
|
|
|
Failure: ev.Err.ToFailure(),
|
2021-02-02 12:05:47 +01:00
|
|
|
Hostname: ev.Hostname,
|
|
|
|
QueryType: string(qtype),
|
|
|
|
ResolverAddress: ev.Address,
|
|
|
|
T: ev.Time.Sub(begin).Seconds(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-01 23:15:47 +02:00
|
|
|
// NewNetworkEventsList returns a list of network events.
|
|
|
|
func NewNetworkEventsList(begin time.Time, events []Event) (out []NetworkEvent) {
|
2022-06-01 14:32:16 +02:00
|
|
|
for _, wrapper := range events {
|
|
|
|
ev := wrapper.Value()
|
|
|
|
switch wrapper.(type) {
|
|
|
|
case *EventConnectOperation:
|
2021-02-02 12:05:47 +01:00
|
|
|
out = append(out, NetworkEvent{
|
|
|
|
Address: ev.Address,
|
refactor(tracex): internally represent errors as strings (#786)
There are two reasons why this is beneficial:
1. github.com/google/go-cmp is more annoying to use for comparing
data structures when there are interfaces to compare. Sure, there's
a recipe for teaching it to compare errors, but how about making
the errors trivially comparable instead?
2. if we want to send errors over the network, JSON serialization
works but we cannot unmarshal the resulting string back to an error,
so how about making this representation trivial to serialize (we
are not going this now, but we need this property for websteps and
it may be sensible to try to avoid to have duplicate code because
of that -- measurex currently duplicates many tracex functionality
and this is quite unfortunate because it slows development down)
Additionally, if an error is a string:
3. we can very easily use a switch for comparing its possible
values with "" representing the absence of errors, while it is
more complex to do the same when using a nullable string or even
an error (i.e., an interface)
4. if a type is not nullable, it's easier to write safe code for
it and we may want to refactor experiments to use the internal
representation of measurements for more robust processing code
For all these reasons, let's internally use strings in tracex.
The overall aim here is to reduce the duplicated code between pre
and post-measurex measurements (see https://github.com/ooni/probe/issues/2035).
2022-06-02 10:37:07 +02:00
|
|
|
Failure: ev.Err.ToFailure(),
|
2022-06-01 14:32:16 +02:00
|
|
|
Operation: wrapper.Name(),
|
2021-02-02 12:05:47 +01:00
|
|
|
Proto: ev.Proto,
|
|
|
|
T: ev.Time.Sub(begin).Seconds(),
|
|
|
|
})
|
2022-06-01 14:32:16 +02:00
|
|
|
case *EventReadOperation:
|
2021-02-02 12:05:47 +01:00
|
|
|
out = append(out, NetworkEvent{
|
refactor(tracex): internally represent errors as strings (#786)
There are two reasons why this is beneficial:
1. github.com/google/go-cmp is more annoying to use for comparing
data structures when there are interfaces to compare. Sure, there's
a recipe for teaching it to compare errors, but how about making
the errors trivially comparable instead?
2. if we want to send errors over the network, JSON serialization
works but we cannot unmarshal the resulting string back to an error,
so how about making this representation trivial to serialize (we
are not going this now, but we need this property for websteps and
it may be sensible to try to avoid to have duplicate code because
of that -- measurex currently duplicates many tracex functionality
and this is quite unfortunate because it slows development down)
Additionally, if an error is a string:
3. we can very easily use a switch for comparing its possible
values with "" representing the absence of errors, while it is
more complex to do the same when using a nullable string or even
an error (i.e., an interface)
4. if a type is not nullable, it's easier to write safe code for
it and we may want to refactor experiments to use the internal
representation of measurements for more robust processing code
For all these reasons, let's internally use strings in tracex.
The overall aim here is to reduce the duplicated code between pre
and post-measurex measurements (see https://github.com/ooni/probe/issues/2035).
2022-06-02 10:37:07 +02:00
|
|
|
Failure: ev.Err.ToFailure(),
|
2022-06-01 14:32:16 +02:00
|
|
|
Operation: wrapper.Name(),
|
2021-02-02 12:05:47 +01:00
|
|
|
NumBytes: int64(ev.NumBytes),
|
|
|
|
T: ev.Time.Sub(begin).Seconds(),
|
|
|
|
})
|
2022-06-01 14:32:16 +02:00
|
|
|
case *EventWriteOperation:
|
2021-02-02 12:05:47 +01:00
|
|
|
out = append(out, NetworkEvent{
|
refactor(tracex): internally represent errors as strings (#786)
There are two reasons why this is beneficial:
1. github.com/google/go-cmp is more annoying to use for comparing
data structures when there are interfaces to compare. Sure, there's
a recipe for teaching it to compare errors, but how about making
the errors trivially comparable instead?
2. if we want to send errors over the network, JSON serialization
works but we cannot unmarshal the resulting string back to an error,
so how about making this representation trivial to serialize (we
are not going this now, but we need this property for websteps and
it may be sensible to try to avoid to have duplicate code because
of that -- measurex currently duplicates many tracex functionality
and this is quite unfortunate because it slows development down)
Additionally, if an error is a string:
3. we can very easily use a switch for comparing its possible
values with "" representing the absence of errors, while it is
more complex to do the same when using a nullable string or even
an error (i.e., an interface)
4. if a type is not nullable, it's easier to write safe code for
it and we may want to refactor experiments to use the internal
representation of measurements for more robust processing code
For all these reasons, let's internally use strings in tracex.
The overall aim here is to reduce the duplicated code between pre
and post-measurex measurements (see https://github.com/ooni/probe/issues/2035).
2022-06-02 10:37:07 +02:00
|
|
|
Failure: ev.Err.ToFailure(),
|
2022-06-01 14:32:16 +02:00
|
|
|
Operation: wrapper.Name(),
|
2021-02-02 12:05:47 +01:00
|
|
|
NumBytes: int64(ev.NumBytes),
|
|
|
|
T: ev.Time.Sub(begin).Seconds(),
|
|
|
|
})
|
2022-06-01 14:32:16 +02:00
|
|
|
case *EventReadFromOperation:
|
2021-02-02 12:05:47 +01:00
|
|
|
out = append(out, NetworkEvent{
|
|
|
|
Address: ev.Address,
|
refactor(tracex): internally represent errors as strings (#786)
There are two reasons why this is beneficial:
1. github.com/google/go-cmp is more annoying to use for comparing
data structures when there are interfaces to compare. Sure, there's
a recipe for teaching it to compare errors, but how about making
the errors trivially comparable instead?
2. if we want to send errors over the network, JSON serialization
works but we cannot unmarshal the resulting string back to an error,
so how about making this representation trivial to serialize (we
are not going this now, but we need this property for websteps and
it may be sensible to try to avoid to have duplicate code because
of that -- measurex currently duplicates many tracex functionality
and this is quite unfortunate because it slows development down)
Additionally, if an error is a string:
3. we can very easily use a switch for comparing its possible
values with "" representing the absence of errors, while it is
more complex to do the same when using a nullable string or even
an error (i.e., an interface)
4. if a type is not nullable, it's easier to write safe code for
it and we may want to refactor experiments to use the internal
representation of measurements for more robust processing code
For all these reasons, let's internally use strings in tracex.
The overall aim here is to reduce the duplicated code between pre
and post-measurex measurements (see https://github.com/ooni/probe/issues/2035).
2022-06-02 10:37:07 +02:00
|
|
|
Failure: ev.Err.ToFailure(),
|
2022-06-01 14:32:16 +02:00
|
|
|
Operation: wrapper.Name(),
|
2021-02-02 12:05:47 +01:00
|
|
|
NumBytes: int64(ev.NumBytes),
|
|
|
|
T: ev.Time.Sub(begin).Seconds(),
|
|
|
|
})
|
2022-06-01 14:32:16 +02:00
|
|
|
case *EventWriteToOperation:
|
2021-02-02 12:05:47 +01:00
|
|
|
out = append(out, NetworkEvent{
|
|
|
|
Address: ev.Address,
|
refactor(tracex): internally represent errors as strings (#786)
There are two reasons why this is beneficial:
1. github.com/google/go-cmp is more annoying to use for comparing
data structures when there are interfaces to compare. Sure, there's
a recipe for teaching it to compare errors, but how about making
the errors trivially comparable instead?
2. if we want to send errors over the network, JSON serialization
works but we cannot unmarshal the resulting string back to an error,
so how about making this representation trivial to serialize (we
are not going this now, but we need this property for websteps and
it may be sensible to try to avoid to have duplicate code because
of that -- measurex currently duplicates many tracex functionality
and this is quite unfortunate because it slows development down)
Additionally, if an error is a string:
3. we can very easily use a switch for comparing its possible
values with "" representing the absence of errors, while it is
more complex to do the same when using a nullable string or even
an error (i.e., an interface)
4. if a type is not nullable, it's easier to write safe code for
it and we may want to refactor experiments to use the internal
representation of measurements for more robust processing code
For all these reasons, let's internally use strings in tracex.
The overall aim here is to reduce the duplicated code between pre
and post-measurex measurements (see https://github.com/ooni/probe/issues/2035).
2022-06-02 10:37:07 +02:00
|
|
|
Failure: ev.Err.ToFailure(),
|
2022-06-01 14:32:16 +02:00
|
|
|
Operation: wrapper.Name(),
|
2021-02-02 12:05:47 +01:00
|
|
|
NumBytes: int64(ev.NumBytes),
|
|
|
|
T: ev.Time.Sub(begin).Seconds(),
|
|
|
|
})
|
2022-06-01 23:15:47 +02:00
|
|
|
default: // For example, "tls_handshake_done" (used in data analysis!)
|
2022-06-01 14:32:16 +02:00
|
|
|
out = append(out, NetworkEvent{
|
refactor(tracex): internally represent errors as strings (#786)
There are two reasons why this is beneficial:
1. github.com/google/go-cmp is more annoying to use for comparing
data structures when there are interfaces to compare. Sure, there's
a recipe for teaching it to compare errors, but how about making
the errors trivially comparable instead?
2. if we want to send errors over the network, JSON serialization
works but we cannot unmarshal the resulting string back to an error,
so how about making this representation trivial to serialize (we
are not going this now, but we need this property for websteps and
it may be sensible to try to avoid to have duplicate code because
of that -- measurex currently duplicates many tracex functionality
and this is quite unfortunate because it slows development down)
Additionally, if an error is a string:
3. we can very easily use a switch for comparing its possible
values with "" representing the absence of errors, while it is
more complex to do the same when using a nullable string or even
an error (i.e., an interface)
4. if a type is not nullable, it's easier to write safe code for
it and we may want to refactor experiments to use the internal
representation of measurements for more robust processing code
For all these reasons, let's internally use strings in tracex.
The overall aim here is to reduce the duplicated code between pre
and post-measurex measurements (see https://github.com/ooni/probe/issues/2035).
2022-06-02 10:37:07 +02:00
|
|
|
Failure: ev.Err.ToFailure(),
|
2022-06-01 14:32:16 +02:00
|
|
|
Operation: wrapper.Name(),
|
|
|
|
T: ev.Time.Sub(begin).Seconds(),
|
|
|
|
})
|
2021-02-02 12:05:47 +01:00
|
|
|
}
|
|
|
|
}
|
2022-06-01 23:15:47 +02:00
|
|
|
return
|
2021-02-02 12:05:47 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewTLSHandshakesList creates a new TLSHandshakesList
|
2022-06-01 23:15:47 +02:00
|
|
|
func NewTLSHandshakesList(begin time.Time, events []Event) (out []TLSHandshake) {
|
2022-06-01 14:32:16 +02:00
|
|
|
for _, wrapper := range events {
|
|
|
|
switch wrapper.(type) {
|
2022-06-01 23:15:47 +02:00
|
|
|
case *EventQUICHandshakeDone, *EventTLSHandshakeDone: // interested
|
2022-06-01 14:32:16 +02:00
|
|
|
default:
|
|
|
|
continue // not interested
|
2021-02-02 12:05:47 +01:00
|
|
|
}
|
2022-06-01 14:32:16 +02:00
|
|
|
ev := wrapper.Value()
|
2021-02-02 12:05:47 +01:00
|
|
|
out = append(out, TLSHandshake{
|
2022-05-06 11:09:54 +02:00
|
|
|
Address: ev.Address,
|
2021-02-02 12:05:47 +01:00
|
|
|
CipherSuite: ev.TLSCipherSuite,
|
refactor(tracex): internally represent errors as strings (#786)
There are two reasons why this is beneficial:
1. github.com/google/go-cmp is more annoying to use for comparing
data structures when there are interfaces to compare. Sure, there's
a recipe for teaching it to compare errors, but how about making
the errors trivially comparable instead?
2. if we want to send errors over the network, JSON serialization
works but we cannot unmarshal the resulting string back to an error,
so how about making this representation trivial to serialize (we
are not going this now, but we need this property for websteps and
it may be sensible to try to avoid to have duplicate code because
of that -- measurex currently duplicates many tracex functionality
and this is quite unfortunate because it slows development down)
Additionally, if an error is a string:
3. we can very easily use a switch for comparing its possible
values with "" representing the absence of errors, while it is
more complex to do the same when using a nullable string or even
an error (i.e., an interface)
4. if a type is not nullable, it's easier to write safe code for
it and we may want to refactor experiments to use the internal
representation of measurements for more robust processing code
For all these reasons, let's internally use strings in tracex.
The overall aim here is to reduce the duplicated code between pre
and post-measurex measurements (see https://github.com/ooni/probe/issues/2035).
2022-06-02 10:37:07 +02:00
|
|
|
Failure: ev.Err.ToFailure(),
|
2021-02-02 12:05:47 +01:00
|
|
|
NegotiatedProtocol: ev.TLSNegotiatedProto,
|
|
|
|
NoTLSVerify: ev.NoTLSVerify,
|
2022-06-01 07:44:54 +02:00
|
|
|
PeerCertificates: tlsMakePeerCerts(ev.TLSPeerCerts),
|
2021-02-02 12:05:47 +01:00
|
|
|
ServerName: ev.TLSServerName,
|
|
|
|
T: ev.Time.Sub(begin).Seconds(),
|
|
|
|
TLSVersion: ev.TLSVersion,
|
|
|
|
})
|
|
|
|
}
|
2022-06-01 23:15:47 +02:00
|
|
|
return
|
2021-02-02 12:05:47 +01:00
|
|
|
}
|
|
|
|
|
2022-06-02 11:07:02 +02:00
|
|
|
func tlsMakePeerCerts(in [][]byte) (out []MaybeBinaryValue) {
|
2022-06-01 23:15:47 +02:00
|
|
|
for _, entry := range in {
|
2022-06-02 11:07:02 +02:00
|
|
|
out = append(out, MaybeBinaryValue{Value: string(entry)})
|
2021-02-02 12:05:47 +01:00
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|