Simone Basso 6212daa54a
fix(tracex): generate archival from single transaction-done event (#780)
Tracex contained some fragile code that assembled HTTP measurements
from scattered events, which worked because we were sure we were
performing a single measurement at any given time.

This diff restructures the code to emit a transaction-start and a
transaction-done events only. We have basically removed all the other
events (which we were not using). We kept the transaction-start
though, because it may be useful to see it when reading events. In
any case, what matters here is that we're now using the transaction-done
event aline to generate the archival HTTP measurement.

Hence, the original issue has been addressed. We will possibly
do more refactoring in the future, but for now this seems sufficient.

Part of https://github.com/ooni/probe/issues/2121
2022-06-01 19:27:47 +02:00

108 lines
2.7 KiB
Go

package tracex
//
// HTTP
//
import (
"bytes"
"io"
"net/http"
"time"
"github.com/ooni/probe-cli/v3/internal/model"
"github.com/ooni/probe-cli/v3/internal/netxlite"
)
// httpCloneRequestHeaders returns a clone of the headers where we have
// also set the host header, which normally is not set by
// golang until it serializes the request itself.
func httpCloneRequestHeaders(req *http.Request) http.Header {
header := req.Header.Clone()
if req.Host != "" {
header.Set("Host", req.Host)
} else {
header.Set("Host", req.URL.Host)
}
return header
}
// SaverTransactionHTTPTransport is a RoundTripper that saves
// events related to the HTTP transaction
type SaverTransactionHTTPTransport struct {
model.HTTPTransport
Saver *Saver
SnapshotSize int64
}
// HTTPRoundTrip performs the round trip with the given transport and
// the given arguments and saves the results into the saver.
//
// The maxBodySnapshotSize argument controls the maximum size of the
// body snapshot that we collect along with the HTTP round trip.
func (txp *SaverTransactionHTTPTransport) RoundTrip(req *http.Request) (*http.Response, error) {
started := time.Now()
txp.Saver.Write(&EventHTTPTransactionStart{&EventValue{
HTTPRequestHeaders: httpCloneRequestHeaders(req),
HTTPMethod: req.Method,
HTTPURL: req.URL.String(),
Transport: txp.HTTPTransport.Network(),
Time: started,
}})
ev := &EventValue{
HTTPRequestHeaders: httpCloneRequestHeaders(req),
HTTPMethod: req.Method,
HTTPURL: req.URL.String(),
Transport: txp.HTTPTransport.Network(),
Time: started,
}
defer txp.Saver.Write(&EventHTTPTransactionDone{ev})
resp, err := txp.HTTPTransport.RoundTrip(req)
if err != nil {
ev.Duration = time.Since(started)
ev.Err = err
return nil, err
}
ev.HTTPStatusCode = resp.StatusCode
ev.HTTPResponseHeaders = resp.Header.Clone()
maxBodySnapshotSize := txp.snapshotSize()
r := io.LimitReader(resp.Body, maxBodySnapshotSize)
body, err := netxlite.ReadAllContext(req.Context(), r)
if err != nil {
ev.Duration = time.Since(started)
ev.Err = err
return nil, err
}
resp.Body = &httpReadableAgainBody{ // allow for reading again the whole body
Reader: io.MultiReader(bytes.NewReader(body), resp.Body),
Closer: resp.Body,
}
ev.Duration = time.Since(started)
ev.HTTPResponseBody = body
ev.HTTPResponseBodyIsTruncated = int64(len(body)) >= maxBodySnapshotSize
return resp, nil
}
func (txp *SaverTransactionHTTPTransport) snapshotSize() int64 {
if txp.SnapshotSize > 0 {
return txp.SnapshotSize
}
return 1 << 17
}
type httpReadableAgainBody struct {
io.Reader
io.Closer
}
var _ model.HTTPTransport = &SaverTransactionHTTPTransport{}