From 6212daa54aea1001737b57f0391a4dc5ea062929 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 1 Jun 2022 19:27:47 +0200 Subject: [PATCH] fix(tracex): generate archival from single transaction-done event (#780) Tracex contained some fragile code that assembled HTTP measurements from scattered events, which worked because we were sure we were performing a single measurement at any given time. This diff restructures the code to emit a transaction-start and a transaction-done events only. We have basically removed all the other events (which we were not using). We kept the transaction-start though, because it may be useful to see it when reading events. In any case, what matters here is that we're now using the transaction-done event aline to generate the archival HTTP measurement. Hence, the original issue has been addressed. We will possibly do more refactoring in the future, but for now this seems sufficient. Part of https://github.com/ooni/probe/issues/2121 --- .../urlgetter/getter_integration_test.go | 80 +-- internal/engine/netx/netx.go | 4 - internal/engine/netx/netx_test.go | 15 +- internal/engine/netx/tracex/archival.go | 17 +- internal/engine/netx/tracex/archival_test.go | 40 +- internal/engine/netx/tracex/event.go | 103 +-- internal/engine/netx/tracex/http.go | 154 ++--- internal/engine/netx/tracex/http_test.go | 639 ++++++------------ internal/netxlite/filtering/http.go | 5 +- 9 files changed, 357 insertions(+), 700 deletions(-) diff --git a/internal/engine/experiment/urlgetter/getter_integration_test.go b/internal/engine/experiment/urlgetter/getter_integration_test.go index ef68465..2998635 100644 --- a/internal/engine/experiment/urlgetter/getter_integration_test.go +++ b/internal/engine/experiment/urlgetter/getter_integration_test.go @@ -37,18 +37,15 @@ func TestGetterWithVeryShortTimeout(t *testing.T) { if tk.Failure == nil || *tk.Failure != "generic_timeout_error" { t.Fatal("not the Failure we expected") } - if len(tk.NetworkEvents) != 3 { + if len(tk.NetworkEvents) != 2 { t.Fatal("not the NetworkEvents we expected") } if tk.NetworkEvents[0].Operation != "http_transaction_start" { t.Fatal("not the NetworkEvents[0].Operation we expected") } - if tk.NetworkEvents[1].Operation != "http_request_metadata" { + if tk.NetworkEvents[1].Operation != "http_transaction_done" { t.Fatal("not the NetworkEvents[1].Operation we expected") } - if tk.NetworkEvents[2].Operation != "http_transaction_done" { - t.Fatal("not the NetworkEvents[2].Operation we expected") - } if len(tk.Queries) != 0 { t.Fatal("not the Queries we expected") } @@ -104,18 +101,15 @@ func TestGetterWithCancelledContextVanilla(t *testing.T) { if tk.Failure == nil || !strings.HasSuffix(*tk.Failure, "interrupted") { t.Fatal("not the Failure we expected") } - if len(tk.NetworkEvents) != 3 { + if len(tk.NetworkEvents) != 2 { t.Fatal("not the NetworkEvents we expected") } if tk.NetworkEvents[0].Operation != "http_transaction_start" { t.Fatal("not the NetworkEvents[0].Operation we expected") } - if tk.NetworkEvents[1].Operation != "http_request_metadata" { + if tk.NetworkEvents[1].Operation != "http_transaction_done" { t.Fatal("not the NetworkEvents[1].Operation we expected") } - if tk.NetworkEvents[2].Operation != "http_transaction_done" { - t.Fatal("not the NetworkEvents[2].Operation we expected") - } if len(tk.Queries) != 0 { t.Fatal("not the Queries we expected") } @@ -172,18 +166,15 @@ func TestGetterWithCancelledContextAndMethod(t *testing.T) { if tk.Failure == nil || !strings.HasSuffix(*tk.Failure, "interrupted") { t.Fatal("not the Failure we expected") } - if len(tk.NetworkEvents) != 3 { + if len(tk.NetworkEvents) != 2 { t.Fatal("not the NetworkEvents we expected") } if tk.NetworkEvents[0].Operation != "http_transaction_start" { t.Fatal("not the NetworkEvents[0].Operation we expected") } - if tk.NetworkEvents[1].Operation != "http_request_metadata" { + if tk.NetworkEvents[1].Operation != "http_transaction_done" { t.Fatal("not the NetworkEvents[1].Operation we expected") } - if tk.NetworkEvents[2].Operation != "http_transaction_done" { - t.Fatal("not the NetworkEvents[2].Operation we expected") - } if len(tk.Queries) != 0 { t.Fatal("not the Queries we expected") } @@ -242,16 +233,13 @@ func TestGetterWithCancelledContextNoFollowRedirects(t *testing.T) { if tk.Failure == nil || !strings.HasSuffix(*tk.Failure, "interrupted") { t.Fatal("not the Failure we expected") } - if len(tk.NetworkEvents) != 3 { + if len(tk.NetworkEvents) != 2 { t.Fatal("not the NetworkEvents we expected") } if tk.NetworkEvents[0].Operation != "http_transaction_start" { t.Fatal("not the NetworkEvents[0].Operation we expected") } - if tk.NetworkEvents[1].Operation != "http_request_metadata" { - t.Fatal("not the NetworkEvents[1].Operation we expected") - } - if tk.NetworkEvents[2].Operation != "http_transaction_done" { + if tk.NetworkEvents[1].Operation != "http_transaction_done" { t.Fatal("not the NetworkEvents[2].Operation we expected") } if len(tk.Queries) != 0 { @@ -422,23 +410,18 @@ func TestGetterIntegrationHTTPS(t *testing.T) { t.Fatal("not the Failure we expected") } var ( - httpTransactionStart bool - httpRequestMetadata bool - resolveStart bool - resolveDone bool - connect bool - tlsHandshakeStart bool - tlsHandshakeDone bool - httpResponseMetadata bool - httpResponseBodySnapshot bool - httpTransactionDone bool + httpTransactionStart bool + resolveStart bool + resolveDone bool + connect bool + tlsHandshakeStart bool + tlsHandshakeDone bool + httpTransactionDone bool ) for _, ev := range tk.NetworkEvents { switch ev.Operation { case "http_transaction_start": httpTransactionStart = true - case "http_request_metadata": - httpRequestMetadata = true case "resolve_start": resolveStart = true case "resolve_done": @@ -449,24 +432,17 @@ func TestGetterIntegrationHTTPS(t *testing.T) { tlsHandshakeStart = true case "tls_handshake_done": tlsHandshakeDone = true - case "http_response_metadata": - httpResponseMetadata = true - case "http_response_body_snapshot": - httpResponseBodySnapshot = true case "http_transaction_done": httpTransactionDone = true } } ok := true ok = ok && httpTransactionStart - ok = ok && httpRequestMetadata ok = ok && resolveStart ok = ok && resolveDone ok = ok && connect ok = ok && tlsHandshakeStart ok = ok && tlsHandshakeDone - ok = ok && httpResponseMetadata - ok = ok && httpResponseBodySnapshot ok = ok && httpTransactionDone if !ok { t.Fatal("not the NetworkEvents we expected") @@ -657,23 +633,18 @@ func TestGetterHTTPSWithTunnel(t *testing.T) { t.Fatal("not the Failure we expected") } var ( - httpTransactionStart bool - httpRequestMetadata bool - resolveStart bool - resolveDone bool - connect bool - tlsHandshakeStart bool - tlsHandshakeDone bool - httpResponseMetadata bool - httpResponseBodySnapshot bool - httpTransactionDone bool + httpTransactionStart bool + resolveStart bool + resolveDone bool + connect bool + tlsHandshakeStart bool + tlsHandshakeDone bool + httpTransactionDone bool ) for _, ev := range tk.NetworkEvents { switch ev.Operation { case "http_transaction_start": httpTransactionStart = true - case "http_request_metadata": - httpRequestMetadata = true case "resolve_start": resolveStart = true case "resolve_done": @@ -684,24 +655,17 @@ func TestGetterHTTPSWithTunnel(t *testing.T) { tlsHandshakeStart = true case "tls_handshake_done": tlsHandshakeDone = true - case "http_response_metadata": - httpResponseMetadata = true - case "http_response_body_snapshot": - httpResponseBodySnapshot = true case "http_transaction_done": httpTransactionDone = true } } ok := true ok = ok && httpTransactionStart - ok = ok && httpRequestMetadata ok = ok && resolveStart == false ok = ok && resolveDone == false ok = ok && connect ok = ok && tlsHandshakeStart ok = ok && tlsHandshakeDone - ok = ok && httpResponseMetadata - ok = ok && httpResponseBodySnapshot ok = ok && httpTransactionDone if !ok { t.Fatalf("not the NetworkEvents we expected: %+v", tk.NetworkEvents) diff --git a/internal/engine/netx/netx.go b/internal/engine/netx/netx.go index 3c3d68f..3b07e56 100644 --- a/internal/engine/netx/netx.go +++ b/internal/engine/netx/netx.go @@ -187,10 +187,6 @@ func NewHTTPTransport(config Config) model.HTTPTransport { txp = &netxlite.HTTPTransportLogger{Logger: config.Logger, HTTPTransport: txp} } if config.HTTPSaver != nil { - txp = &tracex.SaverMetadataHTTPTransport{ - HTTPTransport: txp, Saver: config.HTTPSaver} - txp = &tracex.SaverBodyHTTPTransport{ - HTTPTransport: txp, Saver: config.HTTPSaver} txp = &tracex.SaverTransactionHTTPTransport{ HTTPTransport: txp, Saver: config.HTTPSaver} } diff --git a/internal/engine/netx/netx_test.go b/internal/engine/netx/netx_test.go index c0e99ce..d51bdf8 100644 --- a/internal/engine/netx/netx_test.go +++ b/internal/engine/netx/netx_test.go @@ -511,21 +511,10 @@ func TestNewWithSaver(t *testing.T) { if stxptxp.Saver != saver { t.Fatal("not the logger we expected") } - sbtxp, ok := stxptxp.HTTPTransport.(*tracex.SaverBodyHTTPTransport) - if !ok { - t.Fatal("not the transport we expected") - } - if sbtxp.Saver != saver { + if stxptxp.Saver != saver { t.Fatal("not the logger we expected") } - smtxp, ok := sbtxp.HTTPTransport.(*tracex.SaverMetadataHTTPTransport) - if !ok { - t.Fatal("not the transport we expected") - } - if smtxp.Saver != saver { - t.Fatal("not the logger we expected") - } - if _, ok := smtxp.HTTPTransport.(*netxlite.HTTPTransportWrapper); !ok { + if _, ok := stxptxp.HTTPTransport.(*netxlite.HTTPTransportWrapper); !ok { t.Fatal("not the transport we expected") } } diff --git a/internal/engine/netx/tracex/archival.go b/internal/engine/netx/tracex/archival.go index 8d4a4c8..15dd5e2 100644 --- a/internal/engine/netx/tracex/archival.go +++ b/internal/engine/netx/tracex/archival.go @@ -106,6 +106,8 @@ func httpAddHeaders( destList *[]HTTPHeader, destMap *map[string]MaybeBinaryValue, ) { + *destList = []HTTPHeader{} + *destMap = make(map[string]model.ArchivalMaybeBinaryData) for key, values := range source { for index, value := range values { value := MaybeBinaryValue{Value: value} @@ -144,29 +146,20 @@ func newRequestList(begin time.Time, events []Event) []RequestEntry { for _, wrapper := range events { ev := wrapper.Value() switch wrapper.(type) { - case *EventHTTPTransactionStart: + case *EventHTTPTransactionDone: entry = RequestEntry{} entry.T = ev.Time.Sub(begin).Seconds() - case *EventHTTPRequestBodySnapshot: - entry.Request.Body.Value = string(ev.Data) - entry.Request.BodyIsTruncated = ev.DataIsTruncated - case *EventHTTPRequestMetadata: - entry.Request.Headers = make(map[string]MaybeBinaryValue) httpAddHeaders( ev.HTTPRequestHeaders, &entry.Request.HeadersList, &entry.Request.Headers) entry.Request.Method = ev.HTTPMethod entry.Request.URL = ev.HTTPURL entry.Request.Transport = ev.Transport - case *EventHTTPResponseMetadata: - entry.Response.Headers = make(map[string]MaybeBinaryValue) httpAddHeaders( ev.HTTPResponseHeaders, &entry.Response.HeadersList, &entry.Response.Headers) entry.Response.Code = int64(ev.HTTPStatusCode) entry.Response.Locations = ev.HTTPResponseHeaders.Values("Location") - case *EventHTTPResponseBodySnapshot: - entry.Response.Body.Value = string(ev.Data) - entry.Response.BodyIsTruncated = ev.DataIsTruncated - case *EventHTTPTransactionDone: + entry.Response.Body.Value = string(ev.HTTPResponseBody) + entry.Response.BodyIsTruncated = ev.HTTPResponseBodyIsTruncated entry.Failure = NewFailure(ev.Err) out = append(out, entry) } diff --git a/internal/engine/netx/tracex/archival_test.go b/internal/engine/netx/tracex/archival_test.go index 213a7e6..5b9d8ed 100644 --- a/internal/engine/netx/tracex/archival_test.go +++ b/internal/engine/netx/tracex/archival_test.go @@ -143,35 +143,27 @@ func TestNewRequestList(t *testing.T) { name: "realistic run", args: args{ begin: begin, - events: []Event{&EventHTTPTransactionStart{&EventValue{ - Time: begin.Add(10 * time.Millisecond), - }}, &EventHTTPRequestBodySnapshot{&EventValue{ - Data: []byte("deadbeef"), - DataIsTruncated: false, - }}, &EventHTTPRequestMetadata{&EventValue{ + events: []Event{&EventHTTPTransactionDone{&EventValue{ HTTPRequestHeaders: http.Header{ "User-Agent": []string{"miniooni/0.1.0-dev"}, }, HTTPMethod: "POST", HTTPURL: "https://www.example.com/submit", - }}, &EventHTTPResponseMetadata{&EventValue{ HTTPResponseHeaders: http.Header{ "Server": []string{"miniooni/0.1.0-dev"}, }, - HTTPStatusCode: 200, - }}, &EventHTTPResponseBodySnapshot{&EventValue{ - Data: []byte("{}"), - DataIsTruncated: false, - }}, &EventHTTPTransactionDone{&EventValue{}}, &EventHTTPTransactionStart{&EventValue{ - Time: begin.Add(20 * time.Millisecond), - }}, &EventHTTPRequestMetadata{&EventValue{ + HTTPStatusCode: 200, + HTTPResponseBody: []byte("{}"), + HTTPResponseBodyIsTruncated: false, + Time: begin.Add(10 * time.Millisecond), + }}, &EventHTTPTransactionDone{&EventValue{ HTTPRequestHeaders: http.Header{ "User-Agent": []string{"miniooni/0.1.0-dev"}, }, HTTPMethod: "GET", HTTPURL: "https://www.example.com/result", - }}, &EventHTTPTransactionDone{&EventValue{ - Err: io.EOF, + Err: io.EOF, + Time: begin.Add(20 * time.Millisecond), }}}, }, want: []RequestEntry{{ @@ -189,11 +181,15 @@ func TestNewRequestList(t *testing.T) { Method: "GET", URL: "https://www.example.com/result", }, + Response: HTTPResponse{ + HeadersList: []HTTPHeader{}, + Headers: make(map[string]MaybeBinaryValue), + }, T: 0.02, }, { Request: HTTPRequest{ Body: MaybeBinaryValue{ - Value: "deadbeef", + Value: "", }, HeadersList: []HTTPHeader{{ Key: "User-Agent", @@ -231,21 +227,19 @@ func TestNewRequestList(t *testing.T) { name: "run with redirect and headers to sort", args: args{ begin: begin, - events: []Event{&EventHTTPTransactionStart{&EventValue{ - Time: begin.Add(10 * time.Millisecond), - }}, &EventHTTPRequestMetadata{&EventValue{ + events: []Event{&EventHTTPTransactionDone{&EventValue{ HTTPRequestHeaders: http.Header{ "User-Agent": []string{"miniooni/0.1.0-dev"}, }, HTTPMethod: "GET", HTTPURL: "https://www.example.com/", - }}, &EventHTTPResponseMetadata{&EventValue{ HTTPResponseHeaders: http.Header{ "Server": []string{"miniooni/0.1.0-dev"}, "Location": []string{"https://x.example.com", "https://y.example.com"}, }, HTTPStatusCode: 302, - }}, &EventHTTPTransactionDone{&EventValue{}}}, + Time: begin.Add(10 * time.Millisecond), + }}}, }, want: []RequestEntry{{ Request: HTTPRequest{ @@ -293,7 +287,7 @@ func TestNewRequestList(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { if got := NewRequestList(tt.args.begin, tt.args.events); !reflect.DeepEqual(got, tt.want) { - t.Error(cmp.Diff(got, tt.want)) + t.Error(cmp.Diff(tt.want, got)) } }) } diff --git a/internal/engine/netx/tracex/event.go b/internal/engine/netx/tracex/event.go index cb5214b..41a23af 100644 --- a/internal/engine/netx/tracex/event.go +++ b/internal/engine/netx/tracex/event.go @@ -147,32 +147,6 @@ func (ev *EventReadFromOperation) Name() string { return netxlite.ReadFromOperation } -// EventHTTPRequestMetadata contains HTTP request metadata. -type EventHTTPRequestMetadata struct { - V *EventValue -} - -func (ev *EventHTTPRequestMetadata) Value() *EventValue { - return ev.V -} - -func (ev *EventHTTPRequestMetadata) Name() string { - return "http_request_metadata" -} - -// EventHTTPResponseMetadata contains HTTP response metadata. -type EventHTTPResponseMetadata struct { - V *EventValue -} - -func (ev *EventHTTPResponseMetadata) Value() *EventValue { - return ev.V -} - -func (ev *EventHTTPResponseMetadata) Name() string { - return "http_response_metadata" -} - // EventHTTPTransactionStart is the beginning of an HTTP transaction. type EventHTTPTransactionStart struct { V *EventValue @@ -199,32 +173,6 @@ func (ev *EventHTTPTransactionDone) Name() string { return "http_transaction_done" } -// EventHTTPRequestBodySnapshot contains a snapshot of the request body. -type EventHTTPRequestBodySnapshot struct { - V *EventValue -} - -func (ev *EventHTTPRequestBodySnapshot) Value() *EventValue { - return ev.V -} - -func (ev *EventHTTPRequestBodySnapshot) Name() string { - return "http_request_body_snapshot" -} - -// EventHTTPResponseBodySnapshot contains a snapshot of the response body. -type EventHTTPResponseBodySnapshot struct { - V *EventValue -} - -func (ev *EventHTTPResponseBodySnapshot) Value() *EventValue { - return ev.V -} - -func (ev *EventHTTPResponseBodySnapshot) Name() string { - return "http_response_body_snapshot" -} - // EventConnectOperation contains information about the connect operation. type EventConnectOperation struct { V *EventValue @@ -266,29 +214,30 @@ func (ev *EventWriteOperation) Name() string { // Event is one of the events within a trace type EventValue struct { - Addresses []string `json:",omitempty"` - Address string `json:",omitempty"` - DNSQuery []byte `json:",omitempty"` - DNSResponse []byte `json:",omitempty"` - DataIsTruncated bool `json:",omitempty"` - Data []byte `json:",omitempty"` - Duration time.Duration `json:",omitempty"` - Err error `json:",omitempty"` - HTTPMethod string `json:",omitempty"` - HTTPRequestHeaders http.Header `json:",omitempty"` - HTTPResponseHeaders http.Header `json:",omitempty"` - HTTPStatusCode int `json:",omitempty"` - HTTPURL string `json:",omitempty"` - Hostname string `json:",omitempty"` - NoTLSVerify bool `json:",omitempty"` - NumBytes int `json:",omitempty"` - Proto string `json:",omitempty"` - TLSServerName string `json:",omitempty"` - TLSCipherSuite string `json:",omitempty"` - TLSNegotiatedProto string `json:",omitempty"` - TLSNextProtos []string `json:",omitempty"` - TLSPeerCerts []*x509.Certificate `json:",omitempty"` - TLSVersion string `json:",omitempty"` - Time time.Time `json:",omitempty"` - Transport string `json:",omitempty"` + Addresses []string `json:",omitempty"` + Address string `json:",omitempty"` + DNSQuery []byte `json:",omitempty"` + DNSResponse []byte `json:",omitempty"` + Data []byte `json:",omitempty"` + Duration time.Duration `json:",omitempty"` + Err error `json:",omitempty"` + HTTPMethod string `json:",omitempty"` + HTTPRequestHeaders http.Header `json:",omitempty"` + HTTPResponseHeaders http.Header `json:",omitempty"` + HTTPResponseBody []byte `json:",omitempty"` + HTTPResponseBodyIsTruncated bool `json:",omitempty"` + HTTPStatusCode int `json:",omitempty"` + HTTPURL string `json:",omitempty"` + Hostname string `json:",omitempty"` + NoTLSVerify bool `json:",omitempty"` + NumBytes int `json:",omitempty"` + Proto string `json:",omitempty"` + TLSServerName string `json:",omitempty"` + TLSCipherSuite string `json:",omitempty"` + TLSNegotiatedProto string `json:",omitempty"` + TLSNextProtos []string `json:",omitempty"` + TLSPeerCerts []*x509.Certificate `json:",omitempty"` + TLSVersion string `json:",omitempty"` + Time time.Time `json:",omitempty"` + Transport string `json:",omitempty"` } diff --git a/internal/engine/netx/tracex/http.go b/internal/engine/netx/tracex/http.go index e4cce39..744a305 100644 --- a/internal/engine/netx/tracex/http.go +++ b/internal/engine/netx/tracex/http.go @@ -6,7 +6,6 @@ package tracex import ( "bytes" - "context" "io" "net/http" "time" @@ -15,34 +14,6 @@ import ( "github.com/ooni/probe-cli/v3/internal/netxlite" ) -// SaverMetadataHTTPTransport is a RoundTripper that saves -// events related to HTTP request and response metadata -type SaverMetadataHTTPTransport struct { - model.HTTPTransport - Saver *Saver -} - -// RoundTrip implements RoundTripper.RoundTrip -func (txp *SaverMetadataHTTPTransport) RoundTrip(req *http.Request) (*http.Response, error) { - txp.Saver.Write(&EventHTTPRequestMetadata{&EventValue{ - HTTPRequestHeaders: httpCloneRequestHeaders(req), - HTTPMethod: req.Method, - HTTPURL: req.URL.String(), - Transport: txp.HTTPTransport.Network(), - Time: time.Now(), - }}) - resp, err := txp.HTTPTransport.RoundTrip(req) - if err != nil { - return nil, err - } - txp.Saver.Write(&EventHTTPResponseMetadata{&EventValue{ - HTTPResponseHeaders: resp.Header, - HTTPStatusCode: resp.StatusCode, - Time: time.Now(), - }}) - return resp, err -} - // httpCloneRequestHeaders returns a clone of the headers where we have // also set the host header, which normally is not set by // golang until it serializes the request itself. @@ -59,81 +30,78 @@ func httpCloneRequestHeaders(req *http.Request) http.Header { // SaverTransactionHTTPTransport is a RoundTripper that saves // events related to the HTTP transaction type SaverTransactionHTTPTransport struct { - model.HTTPTransport - Saver *Saver -} - -// RoundTrip implements RoundTripper.RoundTrip -func (txp *SaverTransactionHTTPTransport) RoundTrip(req *http.Request) (*http.Response, error) { - txp.Saver.Write(&EventHTTPTransactionStart{&EventValue{ - Time: time.Now(), - }}) - resp, err := txp.HTTPTransport.RoundTrip(req) - txp.Saver.Write(&EventHTTPTransactionDone{&EventValue{ - Err: err, - Time: time.Now(), - }}) - return resp, err -} - -// SaverBodyHTTPTransport is a RoundTripper that saves -// body events occurring during the round trip -type SaverBodyHTTPTransport struct { model.HTTPTransport Saver *Saver - SnapshotSize int + SnapshotSize int64 } -// RoundTrip implements RoundTripper.RoundTrip -func (txp *SaverBodyHTTPTransport) RoundTrip(req *http.Request) (*http.Response, error) { - const defaultSnapSize = 1 << 17 - snapsize := defaultSnapSize - if txp.SnapshotSize != 0 { - snapsize = txp.SnapshotSize - } - if req.Body != nil { - data, err := httpSaverSnapRead(req.Context(), req.Body, snapsize) - if err != nil { - return nil, err - } - req.Body = httpSaverCompose(data, req.Body) - txp.Saver.Write(&EventHTTPRequestBodySnapshot{&EventValue{ - DataIsTruncated: len(data) >= snapsize, - Data: data, - Time: time.Now(), - }}) - } - resp, err := txp.HTTPTransport.RoundTrip(req) - if err != nil { - return nil, err - } - data, err := httpSaverSnapRead(req.Context(), resp.Body, snapsize) - if err != nil { - resp.Body.Close() - return nil, err - } - resp.Body = httpSaverCompose(data, resp.Body) - txp.Saver.Write(&EventHTTPResponseBodySnapshot{&EventValue{ - DataIsTruncated: len(data) >= snapsize, - Data: data, - Time: time.Now(), +// HTTPRoundTrip performs the round trip with the given transport and +// the given arguments and saves the results into the saver. +// +// The maxBodySnapshotSize argument controls the maximum size of the +// body snapshot that we collect along with the HTTP round trip. +func (txp *SaverTransactionHTTPTransport) RoundTrip(req *http.Request) (*http.Response, error) { + + started := time.Now() + txp.Saver.Write(&EventHTTPTransactionStart{&EventValue{ + HTTPRequestHeaders: httpCloneRequestHeaders(req), + HTTPMethod: req.Method, + HTTPURL: req.URL.String(), + Transport: txp.HTTPTransport.Network(), + Time: started, }}) + ev := &EventValue{ + HTTPRequestHeaders: httpCloneRequestHeaders(req), + HTTPMethod: req.Method, + HTTPURL: req.URL.String(), + Transport: txp.HTTPTransport.Network(), + Time: started, + } + defer txp.Saver.Write(&EventHTTPTransactionDone{ev}) + + resp, err := txp.HTTPTransport.RoundTrip(req) + + if err != nil { + ev.Duration = time.Since(started) + ev.Err = err + return nil, err + } + + ev.HTTPStatusCode = resp.StatusCode + ev.HTTPResponseHeaders = resp.Header.Clone() + + maxBodySnapshotSize := txp.snapshotSize() + r := io.LimitReader(resp.Body, maxBodySnapshotSize) + body, err := netxlite.ReadAllContext(req.Context(), r) + + if err != nil { + ev.Duration = time.Since(started) + ev.Err = err + return nil, err + } + + resp.Body = &httpReadableAgainBody{ // allow for reading again the whole body + Reader: io.MultiReader(bytes.NewReader(body), resp.Body), + Closer: resp.Body, + } + + ev.Duration = time.Since(started) + ev.HTTPResponseBody = body + ev.HTTPResponseBodyIsTruncated = int64(len(body)) >= maxBodySnapshotSize + return resp, nil } -func httpSaverSnapRead(ctx context.Context, r io.ReadCloser, snapsize int) ([]byte, error) { - return netxlite.ReadAllContext(ctx, io.LimitReader(r, int64(snapsize))) +func (txp *SaverTransactionHTTPTransport) snapshotSize() int64 { + if txp.SnapshotSize > 0 { + return txp.SnapshotSize + } + return 1 << 17 } -func httpSaverCompose(data []byte, r io.ReadCloser) io.ReadCloser { - return httpSaverReadCloser{Closer: r, Reader: io.MultiReader(bytes.NewReader(data), r)} -} - -type httpSaverReadCloser struct { - io.Closer +type httpReadableAgainBody struct { io.Reader + io.Closer } -var _ model.HTTPTransport = &SaverMetadataHTTPTransport{} -var _ model.HTTPTransport = &SaverBodyHTTPTransport{} var _ model.HTTPTransport = &SaverTransactionHTTPTransport{} diff --git a/internal/engine/netx/tracex/http_test.go b/internal/engine/netx/tracex/http_test.go index d64cacc..1ed764f 100644 --- a/internal/engine/netx/tracex/http_test.go +++ b/internal/engine/netx/tracex/http_test.go @@ -1,391 +1,246 @@ package tracex import ( + "bytes" "context" "errors" "io" "net" "net/http" "net/url" - "strings" "testing" - "time" "github.com/ooni/probe-cli/v3/internal/model" + "github.com/ooni/probe-cli/v3/internal/model/mocks" "github.com/ooni/probe-cli/v3/internal/netxlite" + "github.com/ooni/probe-cli/v3/internal/netxlite/filtering" ) -func TestSaverMetadataSuccess(t *testing.T) { - if testing.Short() { - t.Skip("skip test in short mode") - } - saver := &Saver{} - txp := SaverMetadataHTTPTransport{ - HTTPTransport: netxlite.NewHTTPTransportStdlib(model.DiscardLogger), - Saver: saver, - } - req, err := http.NewRequest("GET", "https://www.google.com", nil) - if err != nil { - t.Fatal(err) - } - req.Header.Add("User-Agent", "miniooni/0.1.0-dev") - resp, err := txp.RoundTrip(req) - if err != nil { - t.Fatal("not the error we expected") - } - if resp == nil { - t.Fatal("expected non nil response here") - } - ev := saver.Read() - if len(ev) != 2 { - t.Fatal("expected two events") - } - // - if ev[0].Value().HTTPMethod != "GET" { - t.Fatal("unexpected Method") - } - if len(ev[0].Value().HTTPRequestHeaders) <= 0 { - t.Fatal("unexpected Headers") - } - if ev[0].Value().HTTPURL != "https://www.google.com" { - t.Fatal("unexpected URL") - } - if ev[0].Name() != "http_request_metadata" { - t.Fatal("unexpected Name") - } - if !ev[0].Value().Time.Before(time.Now()) { - t.Fatal("unexpected Time") - } - // - if ev[1].Value().HTTPStatusCode != 200 { - t.Fatal("unexpected StatusCode") - } - if len(ev[1].Value().HTTPResponseHeaders) <= 0 { - t.Fatal("unexpected Headers") - } - if ev[1].Name() != "http_response_metadata" { - t.Fatal("unexpected Name") - } - if !ev[1].Value().Time.After(ev[0].Value().Time) { - t.Fatal("unexpected Time") - } -} +func TestSaverTransactionHTTPTransport(t *testing.T) { -func TestSaverMetadataFailure(t *testing.T) { - expected := errors.New("mocked error") - saver := &Saver{} - txp := SaverMetadataHTTPTransport{ - HTTPTransport: FakeTransport{ - Err: expected, - }, - Saver: saver, - } - req, err := http.NewRequest("GET", "http://www.google.com", nil) - if err != nil { - t.Fatal(err) - } - req.Header.Add("User-Agent", "miniooni/0.1.0-dev") - resp, err := txp.RoundTrip(req) - if !errors.Is(err, expected) { - t.Fatal("not the error we expected") - } - if resp != nil { - t.Fatal("expected nil response here") - } - ev := saver.Read() - if len(ev) != 1 { - t.Fatal("expected one event") - } - if ev[0].Value().HTTPMethod != "GET" { - t.Fatal("unexpected Method") - } - if len(ev[0].Value().HTTPRequestHeaders) <= 0 { - t.Fatal("unexpected Headers") - } - if ev[0].Value().HTTPURL != "http://www.google.com" { - t.Fatal("unexpected URL") - } - if ev[0].Name() != "http_request_metadata" { - t.Fatal("unexpected Name") - } - if !ev[0].Value().Time.Before(time.Now()) { - t.Fatal("unexpected Time") - } -} - -func TestSaverTransactionSuccess(t *testing.T) { - if testing.Short() { - t.Skip("skip test in short mode") - } - saver := &Saver{} - txp := SaverTransactionHTTPTransport{ - HTTPTransport: netxlite.NewHTTPTransportStdlib(model.DiscardLogger), - Saver: saver, - } - req, err := http.NewRequest("GET", "https://www.google.com", nil) - if err != nil { - t.Fatal(err) - } - resp, err := txp.RoundTrip(req) - if err != nil { - t.Fatal("not the error we expected") - } - if resp == nil { - t.Fatal("expected non nil response here") - } - ev := saver.Read() - if len(ev) != 2 { - t.Fatal("expected two events") - } - // - if ev[0].Name() != "http_transaction_start" { - t.Fatal("unexpected Name") - } - if !ev[0].Value().Time.Before(time.Now()) { - t.Fatal("unexpected Time") - } - // - if ev[1].Value().Err != nil { - t.Fatal("unexpected Err") - } - if ev[1].Name() != "http_transaction_done" { - t.Fatal("unexpected Name") - } - if !ev[1].Value().Time.After(ev[0].Value().Time) { - t.Fatal("unexpected Time") - } -} - -func TestSaverTransactionFailure(t *testing.T) { - expected := errors.New("mocked error") - saver := &Saver{} - txp := SaverTransactionHTTPTransport{ - HTTPTransport: FakeTransport{ - Err: expected, - }, - Saver: saver, - } - req, err := http.NewRequest("GET", "http://www.google.com", nil) - if err != nil { - t.Fatal(err) - } - resp, err := txp.RoundTrip(req) - if !errors.Is(err, expected) { - t.Fatal("not the error we expected") - } - if resp != nil { - t.Fatal("expected nil response here") - } - ev := saver.Read() - if len(ev) != 2 { - t.Fatal("expected two events") - } - if ev[0].Name() != "http_transaction_start" { - t.Fatal("unexpected Name") - } - if !ev[0].Value().Time.Before(time.Now()) { - t.Fatal("unexpected Time") - } - if ev[1].Name() != "http_transaction_done" { - t.Fatal("unexpected Name") - } - if !errors.Is(ev[1].Value().Err, expected) { - t.Fatal("unexpected Err") - } - if !ev[1].Value().Time.After(ev[0].Value().Time) { - t.Fatal("unexpected Time") - } -} - -func TestSaverBodySuccess(t *testing.T) { - saver := new(Saver) - txp := SaverBodyHTTPTransport{ - HTTPTransport: FakeTransport{ - Func: func(req *http.Request) (*http.Response, error) { - data, err := netxlite.ReadAllContext(context.Background(), req.Body) - if err != nil { - t.Fatal(err) - } - if string(data) != "deadbeef" { - t.Fatal("invalid data") - } - return &http.Response{ - StatusCode: 501, - Body: io.NopCloser(strings.NewReader("abad1dea")), - }, nil + startServer := func(t *testing.T, action filtering.HTTPAction) (net.Listener, *url.URL) { + server := &filtering.HTTPProxy{ + OnIncomingHost: func(host string) filtering.HTTPAction { + return action }, - }, - SnapshotSize: 4, - Saver: saver, + } + listener, err := server.Start("127.0.0.1:0") + if err != nil { + t.Fatal(err) + } + URL := &url.URL{ + Scheme: "http", + Host: listener.Addr().String(), + Path: "/", + } + return listener, URL } - body := strings.NewReader("deadbeef") - req, err := http.NewRequest("POST", "http://x.org/y", body) - if err != nil { - t.Fatal(err) - } - resp, err := txp.RoundTrip(req) - if err != nil { - t.Fatal(err) - } - if resp.StatusCode != 501 { - t.Fatal("unexpected status code") - } - defer resp.Body.Close() - data, err := netxlite.ReadAllContext(context.Background(), resp.Body) - if err != nil { - t.Fatal(err) - } - if string(data) != "abad1dea" { - t.Fatal("unexpected body") - } - ev := saver.Read() - if len(ev) != 2 { - t.Fatal("unexpected number of events") - } - if string(ev[0].Value().Data) != "dead" { - t.Fatal("invalid Data") - } - if ev[0].Value().DataIsTruncated != true { - t.Fatal("invalid DataIsTruncated") - } - if ev[0].Name() != "http_request_body_snapshot" { - t.Fatal("invalid Name") - } - if ev[0].Value().Time.After(time.Now()) { - t.Fatal("invalid Time") - } - if string(ev[1].Value().Data) != "abad" { - t.Fatal("invalid Data") - } - if ev[1].Value().DataIsTruncated != true { - t.Fatal("invalid DataIsTruncated") - } - if ev[1].Name() != "http_response_body_snapshot" { - t.Fatal("invalid Name") - } - if ev[1].Value().Time.Before(ev[0].Value().Time) { - t.Fatal("invalid Time") - } -} -func TestSaverBodyRequestReadError(t *testing.T) { - saver := new(Saver) - txp := SaverBodyHTTPTransport{ - HTTPTransport: FakeTransport{ - Func: func(req *http.Request) (*http.Response, error) { - panic("should not be called") + measureHTTP := func(t *testing.T, URL *url.URL) (*http.Response, *Saver, error) { + saver := &Saver{} + txp := &SaverTransactionHTTPTransport{ + HTTPTransport: netxlite.NewHTTPTransportStdlib(model.DiscardLogger), + Saver: saver, + } + req, err := http.NewRequest("GET", URL.String(), nil) + if err != nil { + t.Fatal(err) + } + req.Header.Add("User-Agent", "miniooni") + resp, err := txp.RoundTrip(req) + return resp, saver, err + } + + validateRequestFields := func(t *testing.T, value *EventValue, URL *url.URL) { + if value.HTTPMethod != "GET" { + t.Fatal("invalid method") + } + if value.HTTPRequestHeaders.Get("Host") != URL.Host { + t.Fatal("invalid Host header") + } + if value.HTTPRequestHeaders.Get("User-Agent") != "miniooni" { + t.Fatal("invalid User-Agent header") + } + if value.HTTPURL != URL.String() { + t.Fatal("invalid URL") + } + if value.Time.IsZero() { + t.Fatal("expected nonzero Time") + } + if value.Transport != "tcp" { + t.Fatal("expected Transport to be tcp") + } + } + + validateRequest := func(t *testing.T, ev Event, URL *url.URL) { + if _, good := ev.(*EventHTTPTransactionStart); !good { + t.Fatal("invalid event type") + } + if ev.Name() != "http_transaction_start" { + t.Fatal("invalid event name") + } + value := ev.Value() + validateRequestFields(t, value, URL) + } + + validateResponseSuccess := func(t *testing.T, ev Event, URL *url.URL) { + if _, good := ev.(*EventHTTPTransactionDone); !good { + t.Fatal("invalid event type") + } + if ev.Name() != "http_transaction_done" { + t.Fatal("invalid event name") + } + value := ev.Value() + validateRequestFields(t, value, URL) + if value.Duration <= 0 { + t.Fatal("expected nonzero duration") + } + if len(value.HTTPResponseHeaders) <= 0 { + t.Fatal("expected at least one response header") + } + if !bytes.Equal(value.HTTPResponseBody, filtering.HTTPBlockpage451) { + t.Fatal("unexpected value for response body") + } + if value.HTTPStatusCode != 451 { + t.Fatal("unexpected status code") + } + } + + t.Run("on success", func(t *testing.T) { + listener, URL := startServer(t, filtering.HTTPAction451) + defer listener.Close() + resp, saver, err := measureHTTP(t, URL) + if err != nil { + t.Fatal(err) + } + defer resp.Body.Close() + if resp.StatusCode != 451 { + t.Fatal("unexpected status code", resp.StatusCode) + } + events := saver.Read() + if len(events) != 2 { + t.Fatal("unexpected number of events") + } + validateRequest(t, events[0], URL) + validateResponseSuccess(t, events[1], URL) + data, err := netxlite.ReadAllContext(context.Background(), resp.Body) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(data, filtering.HTTPBlockpage451) { + t.Fatal("we cannot re-read the same body") + } + }) + + validateResponseFailure := func(t *testing.T, ev Event, URL *url.URL) { + if _, good := ev.(*EventHTTPTransactionDone); !good { + t.Fatal("invalid event type") + } + if ev.Name() != "http_transaction_done" { + t.Fatal("invalid event name") + } + value := ev.Value() + validateRequestFields(t, value, URL) + if value.Duration <= 0 { + t.Fatal("expected nonzero duration") + } + if value.Err.Error() != "connection_reset" { + t.Fatal("unexpected Err value") + } + if len(value.HTTPResponseHeaders) > 0 { + t.Fatal("expected zero response headers") + } + if !bytes.Equal(value.HTTPResponseBody, nil) { + t.Fatal("unexpected value for response body") + } + if value.HTTPStatusCode != 0 { + t.Fatal("unexpected status code") + } + } + + t.Run("on round trip failure", func(t *testing.T) { + listener, URL := startServer(t, filtering.HTTPActionReset) + defer listener.Close() + resp, saver, err := measureHTTP(t, URL) + if err == nil || err.Error() != "connection_reset" { + t.Fatal("unexpected err", err) + } + if resp != nil { + t.Fatal("expected nil response") + } + events := saver.Read() + if len(events) != 2 { + t.Fatal("unexpected number of events") + } + validateRequest(t, events[0], URL) + validateResponseFailure(t, events[1], URL) + }) + + // Sometimes useful for testing + /* + dumplog := func(t *testing.T, ev Event) { + data, _ := json.MarshalIndent(ev.Value(), " ", " ") + t.Log(string(data)) + t.FailNow() + } + */ + + t.Run("on error reading the response body", func(t *testing.T) { + saver := &Saver{} + expected := errors.New("mocked error") + txp := SaverTransactionHTTPTransport{ + HTTPTransport: &mocks.HTTPTransport{ + MockRoundTrip: func(req *http.Request) (*http.Response, error) { + return &http.Response{ + Header: http.Header{ + "Server": {"antani"}, + }, + StatusCode: 200, + Body: io.NopCloser(&mocks.Reader{ + MockRead: func(b []byte) (int, error) { + return 0, expected + }, + }), + }, nil + }, + MockNetwork: func() string { + return "tcp" + }, }, - }, - SnapshotSize: 4, - Saver: saver, - } - expected := errors.New("mocked error") - body := FakeBody{Err: expected} - req, err := http.NewRequest("POST", "http://x.org/y", body) - if err != nil { - t.Fatal(err) - } - resp, err := txp.RoundTrip(req) - if !errors.Is(err, expected) { - t.Fatal("not the error we expected") - } - if resp != nil { - t.Fatal("expected nil response") - } - ev := saver.Read() - if len(ev) != 0 { - t.Fatal("unexpected number of events") - } + SnapshotSize: 4, + Saver: saver, + } + URL := &url.URL{ + Scheme: "http", + Host: "127.0.0.1:9050", + } + req, err := http.NewRequest("GET", URL.String(), nil) + if err != nil { + t.Fatal(err) + } + req.Header.Add("User-Agent", "miniooni") + resp, err := txp.RoundTrip(req) + if !errors.Is(err, expected) { + t.Fatal("not the error we expected") + } + if resp != nil { + t.Fatal("expected nil response") + } + ev := saver.Read() + validateRequest(t, ev[0], URL) + if ev[1].Value().HTTPStatusCode != 200 { + t.Fatal("invalid status code") + } + if ev[1].Value().HTTPResponseHeaders.Get("Server") != "antani" { + t.Fatal("invalid Server header") + } + if ev[1].Value().Err.Error() != "unknown_failure: mocked error" { + t.Fatal("invalid error") + } + }) } -func TestSaverBodyRoundTripError(t *testing.T) { - saver := new(Saver) - expected := errors.New("mocked error") - txp := SaverBodyHTTPTransport{ - HTTPTransport: FakeTransport{ - Err: expected, - }, - SnapshotSize: 4, - Saver: saver, - } - body := strings.NewReader("deadbeef") - req, err := http.NewRequest("POST", "http://x.org/y", body) - if err != nil { - t.Fatal(err) - } - resp, err := txp.RoundTrip(req) - if !errors.Is(err, expected) { - t.Fatal("not the error we expected") - } - if resp != nil { - t.Fatal("expected nil response") - } - ev := saver.Read() - if len(ev) != 1 { - t.Fatal("unexpected number of events") - } - if string(ev[0].Value().Data) != "dead" { - t.Fatal("invalid Data") - } - if ev[0].Value().DataIsTruncated != true { - t.Fatal("invalid DataIsTruncated") - } - if ev[0].Name() != "http_request_body_snapshot" { - t.Fatal("invalid Name") - } - if ev[0].Value().Time.After(time.Now()) { - t.Fatal("invalid Time") - } -} - -func TestSaverBodyResponseReadError(t *testing.T) { - saver := new(Saver) - expected := errors.New("mocked error") - txp := SaverBodyHTTPTransport{ - HTTPTransport: FakeTransport{ - Func: func(req *http.Request) (*http.Response, error) { - return &http.Response{ - StatusCode: 200, - Body: FakeBody{ - Err: expected, - }, - }, nil - }, - }, - SnapshotSize: 4, - Saver: saver, - } - body := strings.NewReader("deadbeef") - req, err := http.NewRequest("POST", "http://x.org/y", body) - if err != nil { - t.Fatal(err) - } - resp, err := txp.RoundTrip(req) - if !errors.Is(err, expected) { - t.Fatal("not the error we expected") - } - if resp != nil { - t.Fatal("expected nil response") - } - ev := saver.Read() - if len(ev) != 1 { - t.Fatal("unexpected number of events") - } - if string(ev[0].Value().Data) != "dead" { - t.Fatal("invalid Data") - } - if ev[0].Value().DataIsTruncated != true { - t.Fatal("invalid DataIsTruncated") - } - if ev[0].Name() != "http_request_body_snapshot" { - t.Fatal("invalid Name") - } - if ev[0].Value().Time.After(time.Now()) { - t.Fatal("invalid Time") - } -} - -func TestCloneHeaders(t *testing.T) { +func TestHTTPCloneRequestHeaders(t *testing.T) { t.Run("with req.Host set", func(t *testing.T) { req := &http.Request{ Host: "www.example.com", @@ -414,55 +269,3 @@ func TestCloneHeaders(t *testing.T) { } }) } - -type FakeDialer struct { - Conn net.Conn - Err error -} - -func (d FakeDialer) DialContext(ctx context.Context, network, address string) (net.Conn, error) { - time.Sleep(10 * time.Microsecond) - return d.Conn, d.Err -} - -type FakeTransport struct { - Name string - Err error - Func func(*http.Request) (*http.Response, error) - Resp *http.Response -} - -func (txp FakeTransport) Network() string { - return txp.Name -} - -func (txp FakeTransport) RoundTrip(req *http.Request) (*http.Response, error) { - time.Sleep(10 * time.Microsecond) - if txp.Func != nil { - return txp.Func(req) - } - if req.Body != nil { - netxlite.ReadAllContext(req.Context(), req.Body) - req.Body.Close() - } - if txp.Err != nil { - return nil, txp.Err - } - txp.Resp.Request = req // non thread safe but it doesn't matter - return txp.Resp, nil -} - -func (txp FakeTransport) CloseIdleConnections() {} - -type FakeBody struct { - Err error -} - -func (fb FakeBody) Read(p []byte) (int, error) { - time.Sleep(10 * time.Microsecond) - return 0, fb.Err -} - -func (fb FakeBody) Close() error { - return nil -} diff --git a/internal/netxlite/filtering/http.go b/internal/netxlite/filtering/http.go index 6097fca..96b71ff 100644 --- a/internal/netxlite/filtering/http.go +++ b/internal/netxlite/filtering/http.go @@ -51,7 +51,8 @@ func (p *HTTPProxy) Start(address string) (net.Listener, error) { return listener, nil } -var httpBlockpage451 = []byte(` +// HTTPBlockPage451 is the block page returned along with status 451 +var HTTPBlockpage451 = []byte(` 451 Unavailable For Legal Reasons

451 Unavailable For Legal Reasons

@@ -80,7 +81,7 @@ func (p *HTTPProxy) handle(w http.ResponseWriter, r *http.Request) { p.hijack(w, r, policy) case HTTPAction451: w.WriteHeader(http.StatusUnavailableForLegalReasons) - w.Write(httpBlockpage451) + w.Write(HTTPBlockpage451) default: w.WriteHeader(http.StatusInternalServerError) }