ooni-probe-cli/internal/experiment/webconnectivity/cleartextflow.go

296 lines
7.9 KiB
Go
Raw Normal View History

package webconnectivity
//
// CleartextFlow
//
// Generated by `boilerplate' using the http template.
//
import (
"context"
"io"
"net"
"net/http"
"net/url"
"sync"
"time"
"github.com/ooni/probe-cli/v3/internal/atomicx"
"github.com/ooni/probe-cli/v3/internal/measurexlite"
"github.com/ooni/probe-cli/v3/internal/model"
"github.com/ooni/probe-cli/v3/internal/netxlite"
)
// Measures HTTP endpoints.
//
// The zero value of this structure IS NOT valid and you MUST initialize
// all the fields marked as MANDATORY before using this structure.
type CleartextFlow struct {
// Address is the MANDATORY address to connect to.
Address string
// DNSCache is the MANDATORY DNS cache.
DNSCache *DNSCache
// IDGenerator is the MANDATORY atomic int64 to generate task IDs.
IDGenerator *atomicx.Int64
// Logger is the MANDATORY logger to use.
Logger model.Logger
// NumRedirects it the MANDATORY counter of the number of redirects.
NumRedirects *NumRedirects
// TestKeys is MANDATORY and contains the TestKeys.
TestKeys *TestKeys
// ZeroTime is the MANDATORY measurement's zero time.
ZeroTime time.Time
// WaitGroup is the MANDATORY wait group this task belongs to.
WaitGroup *sync.WaitGroup
// CookieJar contains the OPTIONAL cookie jar, used for redirects.
CookieJar http.CookieJar
// FollowRedirects is OPTIONAL and instructs this flow
// to follow HTTP redirects (if any).
FollowRedirects bool
// HostHeader is the OPTIONAL host header to use.
HostHeader string
// PrioSelector is the OPTIONAL priority selector to use to determine
// whether this flow is allowed to fetch the webpage.
PrioSelector *prioritySelector
// Referer contains the OPTIONAL referer, used for redirects.
Referer string
// UDPAddress is the OPTIONAL address of the UDP resolver to use. If this
// field is not set we use a default one (e.g., `8.8.8.8:53`).
UDPAddress string
// URLPath is the OPTIONAL URL path.
URLPath string
// URLRawQuery is the OPTIONAL URL raw query.
URLRawQuery string
}
// Start starts this task in a background goroutine.
func (t *CleartextFlow) Start(ctx context.Context) {
t.WaitGroup.Add(1)
index := t.IDGenerator.Add(1)
go func() {
defer t.WaitGroup.Done() // synchronize with the parent
t.Run(ctx, index)
}()
}
// Run runs this task in the current goroutine.
func (t *CleartextFlow) Run(parentCtx context.Context, index int64) {
// create trace
trace := measurexlite.NewTrace(index, t.ZeroTime)
// start the operation logger
ol := measurexlite.NewOperationLogger(
t.Logger, "[#%d] GET http://%s using %s", index, t.HostHeader, t.Address,
)
// perform the TCP connect
const tcpTimeout = 10 * time.Second
tcpCtx, tcpCancel := context.WithTimeout(parentCtx, tcpTimeout)
defer tcpCancel()
tcpDialer := trace.NewDialerWithoutResolver(t.Logger)
tcpConn, err := tcpDialer.DialContext(tcpCtx, "tcp", t.Address)
t.TestKeys.AppendTCPConnectResults(trace.TCPConnects()...)
if err != nil {
ol.Stop(err)
return
}
defer func() {
t.TestKeys.AppendNetworkEvents(trace.NetworkEvents()...)
tcpConn.Close()
}()
alpn := "" // no ALPN because we're not using TLS
// Determine whether we're allowed to fetch the webpage
if t.PrioSelector == nil || !t.PrioSelector.permissionToFetch(t.Address) {
ol.Stop("stop after TCP connect")
return
}
// create HTTP transport
httpTransport := netxlite.NewHTTPTransport(
t.Logger,
netxlite.NewSingleUseDialer(tcpConn),
netxlite.NewNullTLSDialer(),
)
// create HTTP request
const httpTimeout = 10 * time.Second
httpCtx, httpCancel := context.WithTimeout(parentCtx, httpTimeout)
defer httpCancel()
httpReq, err := t.newHTTPRequest(httpCtx)
if err != nil {
if t.Referer == "" {
// when the referer is empty, the failing URL comes from our backend
// or from the user, so it's a fundamental failure. After that, we
// are dealing with websites provided URLs, so we should not flag a
// fundamental failure, because we want to see the measurement submitted.
t.TestKeys.SetFundamentalFailure(err)
}
ol.Stop(err)
return
}
// perform HTTP transaction
httpResp, httpRespBody, err := t.httpTransaction(
httpCtx,
"tcp",
t.Address,
alpn,
httpTransport,
httpReq,
trace,
)
if err != nil {
ol.Stop(err)
return
}
// if enabled, follow possible redirects
t.maybeFollowRedirects(parentCtx, httpResp)
// TODO: insert here additional code if needed
_ = httpRespBody
// completed successfully
ol.Stop(nil)
}
// urlHost computes the host to include into the URL
func (t *CleartextFlow) urlHost(scheme string) (string, error) {
addr, port, err := net.SplitHostPort(t.Address)
if err != nil {
t.Logger.Warnf("BUG: net.SplitHostPort failed for %s: %s", t.Address, err.Error())
return "", err
}
urlHost := t.HostHeader
if urlHost == "" {
urlHost = addr
}
if port == "80" && scheme == "http" {
return urlHost, nil
}
urlHost = net.JoinHostPort(urlHost, port)
return urlHost, nil
}
// newHTTPRequest creates a new HTTP request.
func (t *CleartextFlow) newHTTPRequest(ctx context.Context) (*http.Request, error) {
const urlScheme = "http"
urlHost, err := t.urlHost(urlScheme)
if err != nil {
return nil, err
}
httpURL := &url.URL{
Scheme: urlScheme,
Host: urlHost,
Path: t.URLPath,
RawQuery: t.URLRawQuery,
}
httpReq, err := http.NewRequestWithContext(ctx, "GET", httpURL.String(), nil)
if err != nil {
return nil, err
}
httpReq.Header.Set("Host", t.HostHeader)
httpReq.Header.Set("Accept", model.HTTPHeaderAccept)
httpReq.Header.Set("Accept-Language", model.HTTPHeaderAcceptLanguage)
httpReq.Header.Set("Referer", t.Referer)
httpReq.Header.Set("User-Agent", model.HTTPHeaderUserAgent)
httpReq.Host = t.HostHeader
if t.CookieJar != nil {
for _, cookie := range t.CookieJar.Cookies(httpURL) {
httpReq.AddCookie(cookie)
}
}
return httpReq, nil
}
// httpTransaction runs the HTTP transaction and saves the results.
func (t *CleartextFlow) httpTransaction(ctx context.Context, network, address, alpn string,
txp model.HTTPTransport, req *http.Request, trace *measurexlite.Trace) (*http.Response, []byte, error) {
const maxbody = 1 << 19
started := trace.TimeSince(trace.ZeroTime)
t.TestKeys.AppendNetworkEvents(measurexlite.NewAnnotationArchivalNetworkEvent(
trace.Index, started, "http_transaction_start",
))
resp, err := txp.RoundTrip(req)
var body []byte
if err == nil {
defer resp.Body.Close()
if cookies := resp.Cookies(); t.CookieJar != nil && len(cookies) > 0 {
t.CookieJar.SetCookies(req.URL, cookies)
}
reader := io.LimitReader(resp.Body, maxbody)
body, err = StreamAllContext(ctx, reader)
}
finished := trace.TimeSince(trace.ZeroTime)
t.TestKeys.AppendNetworkEvents(measurexlite.NewAnnotationArchivalNetworkEvent(
trace.Index, finished, "http_transaction_done",
))
ev := measurexlite.NewArchivalHTTPRequestResult(
trace.Index,
started,
network,
address,
alpn,
txp.Network(),
req,
resp,
maxbody,
body,
err,
finished,
)
t.TestKeys.AppendRequests(ev)
return resp, body, err
}
// maybeFollowRedirects follows redirects if configured and needed
func (t *CleartextFlow) maybeFollowRedirects(ctx context.Context, resp *http.Response) {
if !t.FollowRedirects || !t.NumRedirects.CanFollowOneMoreRedirect() {
return // not configured or too many redirects
}
switch resp.StatusCode {
case 301, 302, 307, 308:
location, err := resp.Location()
if err != nil {
return // broken response from server
}
t.Logger.Infof("redirect to: %s", location.String())
resolvers := &DNSResolvers{
CookieJar: t.CookieJar,
DNSCache: t.DNSCache,
Domain: location.Hostname(),
IDGenerator: t.IDGenerator,
Logger: t.Logger,
NumRedirects: t.NumRedirects,
TestKeys: t.TestKeys,
URL: location,
ZeroTime: t.ZeroTime,
WaitGroup: t.WaitGroup,
Referer: resp.Request.URL.String(),
Session: nil, // no need to issue another control request
THAddr: "", // ditto
UDPAddress: t.UDPAddress,
}
resolvers.Start(ctx)
default:
// no redirect to follow
}
}