feat(measurexlite): generate HTTP traces (#881)

In a pure step-by-step model, we don't need to trace HTTP round trips like we did before. We _may_ want in the future to also have some form of HTTP tracing (see https://github.com/ooni/probe-cli/pull/868 for a prototype) but doing that is currently not in scope for moving forward the step-by-step design. For this reason, I only added a public convenience function for formatting an OONI spec compatible request. I also added new fields, which should be documented inside the ooni/spec repository (see https://github.com/ooni/probe/issues/2238).

Required by https://github.com/ooni/probe/issues/2237
This commit is contained in:
Simone Basso 2022-08-26 13:11:43 +02:00 committed by GitHub
parent 0ef1f24617
commit 9ba6f8dcbb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 573 additions and 0 deletions

View File

@ -0,0 +1,205 @@
package measurexlite
//
// Support for generating HTTP traces
//
import (
"net/http"
"sort"
"time"
"github.com/ooni/probe-cli/v3/internal/model"
"github.com/ooni/probe-cli/v3/internal/tracex"
)
// NewArchivalHTTPRequestResult creates a new model.ArchivalHTTPRequestResult.
//
// Arguments:
//
// - index is the index of the trace;
//
// - started is when we started sending the request;
//
// - network is the underlying network in use ("tcp" or "udp");
//
// - address is the remote endpoint's address;
//
// - alpn is the negotiated ALPN or an empty string when not applicable;
//
// - transport is the HTTP transport's protocol we're using ("quic" or "tcp"): this field
// was introduced a long time ago to support QUIC measurements and we keep it for backwards
// compatibility but network, address, and alpn are much more informative;
//
// - req is the certainly-non-nil HTTP request;
//
// - resp is the possibly-nil HTTP response;
//
// - maxRespBodySize is the maximum body snapshot size;
//
// - body is the possibly-nil HTTP response body;
//
// - err is the possibly-nil error that occurred during the transaction;
//
// - finished is when we finished reading the response's body.
func NewArchivalHTTPRequestResult(index int64, started time.Duration, network, address, alpn string,
transport string, req *http.Request, resp *http.Response, maxRespBodySize int64, body []byte, err error,
finished time.Duration) *model.ArchivalHTTPRequestResult {
return &model.ArchivalHTTPRequestResult{
Network: network,
Address: address,
ALPN: alpn,
Failure: tracex.NewFailure(err),
Request: model.ArchivalHTTPRequest{
Body: model.ArchivalMaybeBinaryData{},
BodyIsTruncated: false,
HeadersList: newHTTPRequestHeaderList(req),
Headers: newHTTPRequestHeaderMap(req),
Method: httpRequestMethod(req),
Tor: model.ArchivalHTTPTor{},
Transport: transport, // kept for backward compat
URL: httpRequestURL(req),
},
Response: model.ArchivalHTTPResponse{
Body: httpResponseBody(body),
BodyIsTruncated: httpResponseBodyIsTruncated(body, maxRespBodySize),
Code: httpResponseStatusCode(resp),
HeadersList: newHTTPResponseHeaderList(resp),
Headers: newHTTPResponseHeaderMap(resp),
Locations: httpResponseLocations(resp),
},
T0: started.Seconds(),
T: finished.Seconds(),
TransactionID: index,
}
}
// httpRequestMethod returns the HTTP request method or an empty string
func httpRequestMethod(req *http.Request) (out string) {
if req != nil {
out = req.Method
}
return
}
// newHTTPRequestHeaderList calls newHTTPHeaderList with the request headers or
// return an empty array in case the request is nil.
func newHTTPRequestHeaderList(req *http.Request) []model.ArchivalHTTPHeader {
m := http.Header{}
if req != nil {
m = req.Header
}
return newHTTPHeaderList(m)
}
// newHTTPRequestHeaderMap calls newHTTPHeaderMap with the request headers or
// return an empty map in case the request is nil.
func newHTTPRequestHeaderMap(req *http.Request) map[string]model.ArchivalMaybeBinaryData {
m := http.Header{}
if req != nil {
m = req.Header
}
return newHTTPHeaderMap(m)
}
// httpRequestURL returns the req.URL.String() or an empty string.
func httpRequestURL(req *http.Request) (out string) {
if req != nil && req.URL != nil {
out = req.URL.String()
}
return
}
// httpResponseBody returns the response body, if possible, or an empty body.
func httpResponseBody(body []byte) (out model.ArchivalMaybeBinaryData) {
if body != nil {
out.Value = string(body)
}
return
}
// httpResponseBodyIsTruncated determines whether the body is truncated (if possible)
func httpResponseBodyIsTruncated(body []byte, maxSnapSize int64) (out bool) {
if len(body) > 0 && maxSnapSize > 0 {
out = int64(len(body)) >= maxSnapSize
}
return
}
// httpResponseStatusCode returns the status code, if possible
func httpResponseStatusCode(resp *http.Response) (code int64) {
if resp != nil {
code = int64(resp.StatusCode)
}
return
}
// newHTTPResponseHeaderList calls newHTTPHeaderList with the request headers or
// return an empty array in case the request is nil.
func newHTTPResponseHeaderList(resp *http.Response) (out []model.ArchivalHTTPHeader) {
m := http.Header{}
if resp != nil {
m = resp.Header
}
return newHTTPHeaderList(m)
}
// newHTTPResponseHeaderMap calls newHTTPHeaderMap with the request headers or
// return an empty map in case the request is nil.
func newHTTPResponseHeaderMap(resp *http.Response) (out map[string]model.ArchivalMaybeBinaryData) {
m := http.Header{}
if resp != nil {
m = resp.Header
}
return newHTTPHeaderMap(m)
}
// httpResponseLocations returns the locations inside the response (if possible)
func httpResponseLocations(resp *http.Response) []string {
if resp == nil {
return []string{}
}
loc, err := resp.Location()
if err != nil {
return []string{}
}
return []string{loc.String()}
}
// newHTTPHeaderList creates a list representation of HTTP headers
func newHTTPHeaderList(header http.Header) (out []model.ArchivalHTTPHeader) {
out = []model.ArchivalHTTPHeader{}
keys := []string{}
for key := range header {
keys = append(keys, key)
}
// ensure the output is consistent, which helps with testing
// for an example of why we need to sort headers, see
// https://github.com/ooni/probe-engine/pull/751/checks?check_run_id=853562310
sort.Strings(keys)
for _, key := range keys {
for _, value := range header[key] {
out = append(out, model.ArchivalHTTPHeader{
Key: key,
Value: model.ArchivalMaybeBinaryData{
Value: value,
},
})
}
}
return
}
// newHTTPHeaderMap creates a map representation of HTTP headers
func newHTTPHeaderMap(header http.Header) (out map[string]model.ArchivalMaybeBinaryData) {
out = make(map[string]model.ArchivalMaybeBinaryData)
for key, values := range header {
for _, value := range values {
out[key] = model.ArchivalMaybeBinaryData{
Value: value,
}
break
}
}
return
}

View File

@ -0,0 +1,365 @@
package measurexlite
import (
"net/http"
"net/url"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/ooni/probe-cli/v3/internal/model"
"github.com/ooni/probe-cli/v3/internal/netxlite"
"github.com/ooni/probe-cli/v3/internal/netxlite/filtering"
)
func TestNewArchivalHTTPRequestResult(t *testing.T) {
type args struct {
index int64
started time.Duration
network string
address string
alpn string
transport string
req *http.Request
resp *http.Response
maxRespBodySize int64
body []byte
err error
finished time.Duration
}
type config struct {
name string
args args
expect *model.ArchivalHTTPRequestResult
}
configs := []config{{
name: "the code is defensive with all zero-value inputs",
args: args{
index: 0,
started: 0,
network: "",
address: "",
alpn: "",
transport: "",
req: nil,
resp: nil,
maxRespBodySize: 0,
body: nil,
err: nil,
finished: 0,
},
expect: &model.ArchivalHTTPRequestResult{
Network: "",
Address: "",
ALPN: "",
Failure: nil,
Request: model.ArchivalHTTPRequest{
Body: model.ArchivalMaybeBinaryData{},
BodyIsTruncated: false,
HeadersList: []model.ArchivalHTTPHeader{},
Headers: map[string]model.ArchivalMaybeBinaryData{},
Method: "",
Tor: model.ArchivalHTTPTor{},
Transport: "",
URL: "",
},
Response: model.ArchivalHTTPResponse{
Body: model.ArchivalMaybeBinaryData{},
BodyIsTruncated: false,
Code: 0,
HeadersList: []model.ArchivalHTTPHeader{},
Headers: map[string]model.ArchivalMaybeBinaryData{},
Locations: []string{},
},
T0: 0,
T: 0,
TransactionID: 0,
},
}, {
name: "case of request that failed with I/O issues",
args: args{
index: 1,
started: 250 * time.Millisecond,
network: "tcp",
address: "8.8.8.8:80",
alpn: "",
transport: "tcp",
req: &http.Request{
Method: "GET",
URL: &url.URL{
Scheme: "http",
Host: "dns.google",
Path: "/",
},
Header: http.Header{
"Accept": {"*/*"},
"User-Agent": {"miniooni/0.1.0-dev"},
},
},
resp: nil,
maxRespBodySize: 1 << 19,
body: nil,
err: netxlite.NewTopLevelGenericErrWrapper(netxlite.ECONNRESET),
finished: 750 * time.Millisecond,
},
expect: &model.ArchivalHTTPRequestResult{
Network: "tcp",
Address: "8.8.8.8:80",
ALPN: "",
Failure: func() *string {
s := netxlite.FailureConnectionReset
return &s
}(),
Request: model.ArchivalHTTPRequest{
Body: model.ArchivalMaybeBinaryData{},
BodyIsTruncated: false,
HeadersList: []model.ArchivalHTTPHeader{{
Key: "Accept",
Value: model.ArchivalMaybeBinaryData{
Value: "*/*",
},
}, {
Key: "User-Agent",
Value: model.ArchivalMaybeBinaryData{
Value: "miniooni/0.1.0-dev",
},
}},
Headers: map[string]model.ArchivalMaybeBinaryData{
"Accept": {Value: "*/*"},
"User-Agent": {Value: "miniooni/0.1.0-dev"},
},
Method: "GET",
Tor: model.ArchivalHTTPTor{},
Transport: "tcp",
URL: "http://dns.google/",
},
Response: model.ArchivalHTTPResponse{
Body: model.ArchivalMaybeBinaryData{},
BodyIsTruncated: false,
Code: 0,
HeadersList: []model.ArchivalHTTPHeader{},
Headers: map[string]model.ArchivalMaybeBinaryData{},
Locations: []string{},
},
T0: 0.25,
T: 0.75,
TransactionID: 1,
},
}, {
name: "case of request that succeded",
args: args{
index: 44,
started: 1400 * time.Millisecond,
network: "udp",
address: "8.8.8.8:443",
alpn: "h3",
transport: "quic",
req: &http.Request{
Method: "GET",
URL: &url.URL{
Scheme: "https",
Host: "dns.google",
Path: "/",
},
Header: http.Header{
"Accept": {"*/*"},
"User-Agent": {"miniooni/0.1.0-dev"},
},
},
resp: &http.Response{
StatusCode: 200,
Header: http.Header{
"Content-Type": {"text/html; charset=iso-8859-1"},
"Server": {"Apache"},
},
},
maxRespBodySize: 1 << 19,
body: filtering.HTTPBlockpage451,
err: nil,
finished: 1500 * time.Millisecond,
},
expect: &model.ArchivalHTTPRequestResult{
Network: "udp",
Address: "8.8.8.8:443",
ALPN: "h3",
Failure: nil,
Request: model.ArchivalHTTPRequest{
Body: model.ArchivalMaybeBinaryData{},
BodyIsTruncated: false,
HeadersList: []model.ArchivalHTTPHeader{{
Key: "Accept",
Value: model.ArchivalMaybeBinaryData{
Value: "*/*",
},
}, {
Key: "User-Agent",
Value: model.ArchivalMaybeBinaryData{
Value: "miniooni/0.1.0-dev",
},
}},
Headers: map[string]model.ArchivalMaybeBinaryData{
"Accept": {Value: "*/*"},
"User-Agent": {Value: "miniooni/0.1.0-dev"},
},
Method: "GET",
Tor: model.ArchivalHTTPTor{},
Transport: "quic",
URL: "https://dns.google/",
},
Response: model.ArchivalHTTPResponse{
Body: model.ArchivalMaybeBinaryData{
Value: string(filtering.HTTPBlockpage451),
},
BodyIsTruncated: false,
Code: 200,
HeadersList: []model.ArchivalHTTPHeader{{
Key: "Content-Type",
Value: model.ArchivalMaybeBinaryData{
Value: "text/html; charset=iso-8859-1",
},
}, {
Key: "Server",
Value: model.ArchivalMaybeBinaryData{
Value: "Apache",
},
}},
Headers: map[string]model.ArchivalMaybeBinaryData{
"Content-Type": {Value: "text/html; charset=iso-8859-1"},
"Server": {Value: "Apache"},
},
Locations: []string{},
},
T0: 1.4,
T: 1.5,
TransactionID: 44,
},
}, {
name: "case of redirect",
args: args{
index: 47,
started: 1400 * time.Millisecond,
network: "udp",
address: "8.8.8.8:443",
alpn: "h3",
transport: "quic",
req: &http.Request{
Method: "GET",
URL: &url.URL{
Scheme: "https",
Host: "dns.google",
Path: "/",
},
Header: http.Header{
"Accept": {"*/*"},
"User-Agent": {"miniooni/0.1.0-dev"},
},
},
resp: &http.Response{
StatusCode: 302,
Header: http.Header{
"Content-Type": {"text/html; charset=iso-8859-1"},
"Location": {"/v2/index.html"},
"Server": {"Apache"},
},
Request: &http.Request{ // necessary for Location to WAI
URL: &url.URL{
Scheme: "https",
Host: "dns.google",
Path: "/",
},
},
},
maxRespBodySize: 1 << 19,
body: nil,
err: nil,
finished: 1500 * time.Millisecond,
},
expect: &model.ArchivalHTTPRequestResult{
Network: "udp",
Address: "8.8.8.8:443",
ALPN: "h3",
Failure: nil,
Request: model.ArchivalHTTPRequest{
Body: model.ArchivalMaybeBinaryData{},
BodyIsTruncated: false,
HeadersList: []model.ArchivalHTTPHeader{{
Key: "Accept",
Value: model.ArchivalMaybeBinaryData{
Value: "*/*",
},
}, {
Key: "User-Agent",
Value: model.ArchivalMaybeBinaryData{
Value: "miniooni/0.1.0-dev",
},
}},
Headers: map[string]model.ArchivalMaybeBinaryData{
"Accept": {Value: "*/*"},
"User-Agent": {Value: "miniooni/0.1.0-dev"},
},
Method: "GET",
Tor: model.ArchivalHTTPTor{},
Transport: "quic",
URL: "https://dns.google/",
},
Response: model.ArchivalHTTPResponse{
Body: model.ArchivalMaybeBinaryData{
Value: "",
},
BodyIsTruncated: false,
Code: 302,
HeadersList: []model.ArchivalHTTPHeader{{
Key: "Content-Type",
Value: model.ArchivalMaybeBinaryData{
Value: "text/html; charset=iso-8859-1",
},
}, {
Key: "Location",
Value: model.ArchivalMaybeBinaryData{
Value: "/v2/index.html",
},
}, {
Key: "Server",
Value: model.ArchivalMaybeBinaryData{
Value: "Apache",
},
}},
Headers: map[string]model.ArchivalMaybeBinaryData{
"Content-Type": {Value: "text/html; charset=iso-8859-1"},
"Location": {Value: "/v2/index.html"},
"Server": {Value: "Apache"},
},
Locations: []string{
"https://dns.google/v2/index.html",
},
},
T0: 1.4,
T: 1.5,
TransactionID: 47,
},
}}
for _, cnf := range configs {
t.Run(cnf.name, func(t *testing.T) {
out := NewArchivalHTTPRequestResult(
cnf.args.index,
cnf.args.started,
cnf.args.network,
cnf.args.address,
cnf.args.alpn,
cnf.args.transport,
cnf.args.req,
cnf.args.resp,
cnf.args.maxRespBodySize,
cnf.args.body,
cnf.args.err,
cnf.args.finished,
)
if diff := cmp.Diff(cnf.expect, out); diff != "" {
t.Fatal(diff)
}
})
}
}

View File

@ -193,6 +193,9 @@ type ArchivalTLSOrQUICHandshakeResult struct {
//
// See https://github.com/ooni/spec/blob/master/data-formats/df-001-httpt.md.
type ArchivalHTTPRequestResult struct {
Network string `json:"network,omitempty"`
Address string `json:"address,omitempty"`
ALPN string `json:"alpn,omitempty"`
Failure *string `json:"failure"`
Request ArchivalHTTPRequest `json:"request"`
Response ArchivalHTTPResponse `json:"response"`