ooni-probe-cli/internal/experiment/webconnectivity/cleartextflow.go
Simone Basso 5ade2d9568
fix(webconnectivity@v0.5): include http transaction start/done (#943)
Code based on urlgetter had this event and we would like to have this
event with step-by-step code as well.

Because there's no tracing for HTTP when using step-by-step, we will
need to include emitting these events inside the boilerplate.

By doing that, we emit events out of order, so make sure we sort
them by T, which is "the moment when the event was collected".

Part of https://github.com/ooni/probe/issues/2238
2022-09-08 10:37:08 +02:00

294 lines
7.6 KiB
Go

package webconnectivity
//
// CleartextFlow
//
// Generated by `boilerplate' using the http template.
//
import (
"context"
"io"
"net"
"net/http"
"net/url"
"sync"
"time"
"github.com/ooni/probe-cli/v3/internal/atomicx"
"github.com/ooni/probe-cli/v3/internal/measurexlite"
"github.com/ooni/probe-cli/v3/internal/model"
"github.com/ooni/probe-cli/v3/internal/netxlite"
)
// Measures HTTP endpoints.
//
// The zero value of this structure IS NOT valid and you MUST initialize
// all the fields marked as MANDATORY before using this structure.
type CleartextFlow struct {
// Address is the MANDATORY address to connect to.
Address string
// DNSCache is the MANDATORY DNS cache.
DNSCache *DNSCache
// IDGenerator is the MANDATORY atomic int64 to generate task IDs.
IDGenerator *atomicx.Int64
// Logger is the MANDATORY logger to use.
Logger model.Logger
// Sema is the MANDATORY semaphore to allow just a single
// connection to perform the HTTP transaction.
Sema <-chan any
// TestKeys is MANDATORY and contains the TestKeys.
TestKeys *TestKeys
// ZeroTime is the MANDATORY measurement's zero time.
ZeroTime time.Time
// WaitGroup is the MANDATORY wait group this task belongs to.
WaitGroup *sync.WaitGroup
// CookieJar contains the OPTIONAL cookie jar, used for redirects.
CookieJar http.CookieJar
// FollowRedirects is OPTIONAL and instructs this flow
// to follow HTTP redirects (if any).
FollowRedirects bool
// HostHeader is the OPTIONAL host header to use.
HostHeader string
// Referer contains the OPTIONAL referer, used for redirects.
Referer string
// UDPAddress is the OPTIONAL address of the UDP resolver to use. If this
// field is not set we use a default one (e.g., `8.8.8.8:53`).
UDPAddress string
// URLPath is the OPTIONAL URL path.
URLPath string
// URLRawQuery is the OPTIONAL URL raw query.
URLRawQuery string
}
// Start starts this task in a background goroutine.
func (t *CleartextFlow) Start(ctx context.Context) {
t.WaitGroup.Add(1)
index := t.IDGenerator.Add(1)
go func() {
defer t.WaitGroup.Done() // synchronize with the parent
t.Run(ctx, index)
}()
}
// Run runs this task in the current goroutine.
func (t *CleartextFlow) Run(parentCtx context.Context, index int64) {
// create trace
trace := measurexlite.NewTrace(index, t.ZeroTime)
// start the operation logger
ol := measurexlite.NewOperationLogger(
t.Logger, "[#%d] GET http://%s using %s", index, t.HostHeader, t.Address,
)
// perform the TCP connect
const tcpTimeout = 10 * time.Second
tcpCtx, tcpCancel := context.WithTimeout(parentCtx, tcpTimeout)
defer tcpCancel()
tcpDialer := trace.NewDialerWithoutResolver(t.Logger)
tcpConn, err := tcpDialer.DialContext(tcpCtx, "tcp", t.Address)
t.TestKeys.AppendTCPConnectResults(trace.TCPConnects()...)
if err != nil {
ol.Stop(err)
return
}
defer func() {
t.TestKeys.AppendNetworkEvents(trace.NetworkEvents()...)
tcpConn.Close()
}()
alpn := "" // no ALPN because we're not using TLS
// Only allow N flows to _use_ the connection
select {
case <-t.Sema:
default:
ol.Stop(nil)
return
}
// create HTTP transport
httpTransport := netxlite.NewHTTPTransport(
t.Logger,
netxlite.NewSingleUseDialer(tcpConn),
netxlite.NewNullTLSDialer(),
)
// create HTTP request
const httpTimeout = 10 * time.Second
httpCtx, httpCancel := context.WithTimeout(parentCtx, httpTimeout)
defer httpCancel()
httpReq, err := t.newHTTPRequest(httpCtx)
if err != nil {
if t.Referer == "" {
// when the referer is empty, the failing URL comes from our backend
// or from the user, so it's a fundamental failure. After that, we
// are dealing with websites provided URLs, so we should not flag a
// fundamental failure, because we want to see the measurement submitted.
t.TestKeys.SetFundamentalFailure(err)
}
ol.Stop(err)
return
}
// perform HTTP transaction
httpResp, httpRespBody, err := t.httpTransaction(
httpCtx,
"tcp",
t.Address,
alpn,
httpTransport,
httpReq,
trace,
)
if err != nil {
ol.Stop(err)
return
}
// if enabled, follow possible redirects
t.maybeFollowRedirects(parentCtx, httpResp)
// TODO: insert here additional code if needed
_ = httpRespBody
// completed successfully
ol.Stop(nil)
}
// urlHost computes the host to include into the URL
func (t *CleartextFlow) urlHost(scheme string) (string, error) {
addr, port, err := net.SplitHostPort(t.Address)
if err != nil {
t.Logger.Warnf("BUG: net.SplitHostPort failed for %s: %s", t.Address, err.Error())
return "", err
}
urlHost := t.HostHeader
if urlHost == "" {
urlHost = addr
}
if port == "80" && scheme == "http" {
return urlHost, nil
}
urlHost = net.JoinHostPort(urlHost, port)
return urlHost, nil
}
// newHTTPRequest creates a new HTTP request.
func (t *CleartextFlow) newHTTPRequest(ctx context.Context) (*http.Request, error) {
const urlScheme = "http"
urlHost, err := t.urlHost(urlScheme)
if err != nil {
return nil, err
}
httpURL := &url.URL{
Scheme: urlScheme,
Host: urlHost,
Path: t.URLPath,
RawQuery: t.URLRawQuery,
}
httpReq, err := http.NewRequestWithContext(ctx, "GET", httpURL.String(), nil)
if err != nil {
return nil, err
}
httpReq.Header.Set("Host", t.HostHeader)
httpReq.Header.Set("Accept", model.HTTPHeaderAccept)
httpReq.Header.Set("Accept-Language", model.HTTPHeaderAcceptLanguage)
httpReq.Header.Set("Referer", t.Referer)
httpReq.Header.Set("User-Agent", model.HTTPHeaderUserAgent)
httpReq.Host = t.HostHeader
if t.CookieJar != nil {
for _, cookie := range t.CookieJar.Cookies(httpURL) {
httpReq.AddCookie(cookie)
}
}
return httpReq, nil
}
// httpTransaction runs the HTTP transaction and saves the results.
func (t *CleartextFlow) httpTransaction(ctx context.Context, network, address, alpn string,
txp model.HTTPTransport, req *http.Request, trace *measurexlite.Trace) (*http.Response, []byte, error) {
const maxbody = 1 << 19
started := trace.TimeSince(trace.ZeroTime)
t.TestKeys.AppendNetworkEvents(measurexlite.NewAnnotationArchivalNetworkEvent(
trace.Index, started, "http_transaction_start",
))
resp, err := txp.RoundTrip(req)
var body []byte
if err == nil {
defer resp.Body.Close()
if cookies := resp.Cookies(); t.CookieJar != nil && len(cookies) > 0 {
t.CookieJar.SetCookies(req.URL, cookies)
}
reader := io.LimitReader(resp.Body, maxbody)
body, err = netxlite.ReadAllContext(ctx, reader)
}
finished := trace.TimeSince(trace.ZeroTime)
t.TestKeys.AppendNetworkEvents(measurexlite.NewAnnotationArchivalNetworkEvent(
trace.Index, finished, "http_transaction_done",
))
ev := measurexlite.NewArchivalHTTPRequestResult(
trace.Index,
started,
network,
address,
alpn,
txp.Network(),
req,
resp,
maxbody,
body,
err,
finished,
)
t.TestKeys.AppendRequests(ev)
return resp, body, err
}
// maybeFollowRedirects follows redirects if configured and needed
func (t *CleartextFlow) maybeFollowRedirects(ctx context.Context, resp *http.Response) {
if !t.FollowRedirects {
return // not configured
}
switch resp.StatusCode {
case 301, 302, 307, 308:
location, err := resp.Location()
if err != nil {
return // broken response from server
}
t.Logger.Infof("redirect to: %s", location.String())
resolvers := &DNSResolvers{
CookieJar: t.CookieJar,
DNSCache: t.DNSCache,
Domain: location.Hostname(),
IDGenerator: t.IDGenerator,
Logger: t.Logger,
TestKeys: t.TestKeys,
URL: location,
ZeroTime: t.ZeroTime,
WaitGroup: t.WaitGroup,
Referer: resp.Request.URL.String(),
Session: nil, // no need to issue another control request
THAddr: "", // ditto
UDPAddress: t.UDPAddress,
}
resolvers.Start(ctx)
default:
// no redirect to follow
}
}