package webstepsx // // TH (Test Helper) // // This file contains an implementation of the // (proposed) websteps test helper spec. // import ( "bytes" "context" "encoding/json" "errors" "fmt" "io" "net/http" "net/url" "github.com/apex/log" "github.com/ooni/probe-cli/v3/internal/measurex" "github.com/ooni/probe-cli/v3/internal/model" "github.com/ooni/probe-cli/v3/internal/netxlite" "github.com/ooni/probe-cli/v3/internal/runtimex" "github.com/ooni/probe-cli/v3/internal/version" ) // // Messages exchanged by the TH client and server // // THClientRequest is the request received by the test helper. type THClientRequest struct { // Endpoints is a list of endpoints to measure. Endpoints []*measurex.Endpoint // URL is the URL we want to measure. URL string // HTTPRequestHeaders contains the request headers. HTTPRequestHeaders http.Header } // THServerResponse is the response from the test helper. type THServerResponse = measurex.THMeasurement // thMaxAcceptableBodySize is the maximum acceptable body size by TH code. const thMaxAcceptableBodySize = 1 << 20 // // TH client implementation // // THClient is the high-level API to invoke the TH. This API // should be used by command line clients. type THClient struct { // DNSServers is the MANDATORY list of DNS-over-UDP // servers to use to discover endpoints locally. DNServers []*measurex.ResolverInfo // HTTPClient is the MANDATORY HTTP client to // use for contacting the TH. HTTPClient model.HTTPClient // ServerURL is the MANDATORY URL of the TH HTTP endpoint. ServerURL string } // Run calls the TH and returns the response or an error. // // Arguments: // // - ctx is the context with timeout/deadline/cancellation // // - URL is the URL the TH server should measure for us // // Algorithm: // // - use DNSServers to discover extra endpoints for the target URL // // - call the TH using the HTTPClient and the ServerURL // // - return response or error. func (c *THClient) Run(ctx context.Context, URL string) (*THServerResponse, error) { parsed, err := url.Parse(URL) if err != nil { return nil, err } mx := measurex.NewMeasurerWithDefaultSettings() var dns []*measurex.DNSMeasurement const parallelism = 3 for m := range mx.LookupURLHostParallel(ctx, parallelism, parsed, c.DNServers...) { dns = append(dns, m) } endpoints, err := measurex.AllEndpointsForURL(parsed, dns...) if err != nil { return nil, err } return (&THClientCall{ Endpoints: endpoints, HTTPClient: c.HTTPClient, Header: measurex.NewHTTPRequestHeaderForMeasuring(), THURL: c.ServerURL, TargetURL: URL, }).Call(ctx) } // THClientCall allows to perform a single TH client call. Make sure // you fill all the fields marked as MANDATORY before use. type THClientCall struct { // Endpoints contains the MANDATORY endpoints we discovered. Endpoints []*measurex.Endpoint // HTTPClient is the MANDATORY HTTP client to // use for contacting the TH. HTTPClient model.HTTPClient // Header contains the MANDATORY request headers. Header http.Header // THURL is the MANDATORY test helper URL. THURL string // TargetURL is the MANDATORY URL to measure. TargetURL string // UserAgent is the OPTIONAL user-agent to use. UserAgent string } // Call performs the specified TH call and returns either a response or an error. func (c *THClientCall) Call(ctx context.Context) (*THServerResponse, error) { creq := &THClientRequest{ Endpoints: c.Endpoints, URL: c.TargetURL, HTTPRequestHeaders: c.Header, } reqBody, err := json.Marshal(creq) if err != nil { return nil, err } req, err := http.NewRequestWithContext( ctx, "POST", c.THURL, bytes.NewReader(reqBody)) if err != nil { return nil, err } req.Header.Set("User-Agent", c.UserAgent) return c.httpClientDo(req) } // errTHRequestFailed is the error returned if the TH response is not 200 Ok. var errTHRequestFailed = errors.New("th: request failed") func (c *THClientCall) httpClientDo(req *http.Request) (*THServerResponse, error) { resp, err := c.HTTPClient.Do(req) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != 200 { // THHandler returns either 400 or 200 return nil, errTHRequestFailed } r := io.LimitReader(resp.Body, thMaxAcceptableBodySize) respBody, err := netxlite.ReadAllContext(req.Context(), r) if err != nil { return nil, err } var sresp THServerResponse if err := json.Unmarshal(respBody, &sresp); err != nil { return nil, err } return &sresp, nil } // // TH server implementation // // THHandler implements the test helper API. // // This handler exposes a unique HTTP endpoint that you need to // mount to the desired path when creating the server. // // The canonical mount point for the HTTP endpoint is /api/v1/websteps. // // Accepted methods and request body: // // - we only accept POST; // // - we expect a THClientRequest as the body. // // Status code and response body: // // - on success, status is 200 and THServerResponse is the body; // // - on failure, status is 400 and there is no body. // type THHandler struct{} // ServerHTTP implements http.Handler.ServeHTTP. func (h *THHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { w.Header().Add("Server", fmt.Sprintf("oohelperd/%s", version.Version)) if req.Method != "POST" { w.WriteHeader(400) return } reader := io.LimitReader(req.Body, thMaxAcceptableBodySize) data, err := netxlite.ReadAllContext(req.Context(), reader) if err != nil { w.WriteHeader(400) return } var creq THClientRequest if err := json.Unmarshal(data, &creq); err != nil { w.WriteHeader(400) return } cresp, err := h.singleStep(req.Context(), &creq) if err != nil { w.WriteHeader(400) return } // We assume that the following call cannot fail because it's a // clearly serializable data structure. data, err = json.Marshal(cresp) runtimex.PanicOnError(err, "json.Marshal failed") w.Header().Add("Content-Type", "application/json") w.Write(data) } // singleStep performs a singleStep measurement. // // The function name derives from the definition (we invented) // of "web steps". Each redirection is a step. For each step you // need to figure out the endpoints to use with the DNS. After // that, you need to check all endpoints. Because here we do not // perform redirection, this is just a single "step". // // The algorithm is the following: // // 1. parse the URL and return error if it does not parse or // the scheme is neither HTTP nor HTTPS; // // 2. discover additional endpoints using a suitable DoH // resolver and the URL's hostname as the domain; // // 3. measure each discovered endpoint. // // The return value is either a THServerResponse or an error. func (h *THHandler) singleStep( ctx context.Context, req *THClientRequest) (*THServerResponse, error) { mx := measurex.NewMeasurerWithDefaultSettings() mx.MeasureURLHelper = &thMeasureURLHelper{req.Endpoints} mx.Resolvers = []*measurex.ResolverInfo{{ Network: measurex.ResolverForeign, ForeignResolver: thResolver, }} jar := measurex.NewCookieJar() const parallelism = 3 meas, err := mx.MeasureURL(ctx, parallelism, req.URL, req.HTTPRequestHeaders, jar) if err != nil { return nil, err } return &THServerResponse{ DNS: meas.DNS, Endpoints: h.simplifyEndpoints(meas.Endpoints), }, nil } func (h *THHandler) simplifyEndpoints( in []*measurex.HTTPEndpointMeasurement) (out []*measurex.HTTPEndpointMeasurement) { for _, epnt := range in { out = append(out, &measurex.HTTPEndpointMeasurement{ URL: epnt.URL, Network: epnt.Network, Address: epnt.Address, Measurement: h.simplifyMeasurement(epnt.Measurement), }) } return } func (h *THHandler) simplifyMeasurement(in *measurex.Measurement) (out *measurex.Measurement) { out = &measurex.Measurement{ Connect: in.Connect, TLSHandshake: h.simplifyHandshake(in.TLSHandshake), QUICHandshake: h.simplifyHandshake(in.QUICHandshake), LookupHost: in.LookupHost, LookupHTTPSSvc: in.LookupHTTPSSvc, HTTPRoundTrip: h.simplifyHTTPRoundTrip(in.HTTPRoundTrip), } return } func (h *THHandler) simplifyHandshake( in []*measurex.QUICTLSHandshakeEvent) (out []*measurex.QUICTLSHandshakeEvent) { for _, ev := range in { out = append(out, &measurex.QUICTLSHandshakeEvent{ CipherSuite: ev.CipherSuite, Failure: ev.Failure, NegotiatedProto: ev.NegotiatedProto, TLSVersion: ev.TLSVersion, PeerCerts: nil, Finished: 0, RemoteAddr: ev.RemoteAddr, SNI: ev.SNI, ALPN: ev.ALPN, SkipVerify: ev.SkipVerify, Oddity: ev.Oddity, Network: ev.Network, Started: 0, }) } return } func (h *THHandler) simplifyHTTPRoundTrip( in []*measurex.HTTPRoundTripEvent) (out []*measurex.HTTPRoundTripEvent) { for _, ev := range in { out = append(out, &measurex.HTTPRoundTripEvent{ Failure: ev.Failure, Method: ev.Method, URL: ev.URL, RequestHeaders: ev.RequestHeaders, StatusCode: ev.StatusCode, ResponseHeaders: ev.ResponseHeaders, ResponseBody: nil, // we don't transfer the body ResponseBodyLength: ev.ResponseBodyLength, ResponseBodyIsTruncated: ev.ResponseBodyIsTruncated, ResponseBodyIsUTF8: ev.ResponseBodyIsUTF8, Finished: ev.Finished, Started: ev.Started, Oddity: ev.Oddity, }) } return } type thMeasureURLHelper struct { epnts []*measurex.Endpoint } func (thh *thMeasureURLHelper) LookupExtraHTTPEndpoints( ctx context.Context, URL *url.URL, headers http.Header, serverEpnts ...*measurex.HTTPEndpoint) ( epnts []*measurex.HTTPEndpoint, thMeaurement *measurex.THMeasurement, err error) { for _, epnt := range thh.epnts { epnts = append(epnts, &measurex.HTTPEndpoint{ Domain: URL.Hostname(), Network: epnt.Network, Address: epnt.Address, SNI: URL.Hostname(), ALPN: measurex.ALPNForHTTPEndpoint(epnt.Network), URL: URL, Header: headers, // but overriden later anyway }) } return } // thResolverURL is the DNS resolver URL used by the TH. We use an // encrypted resolver to reduce the risk that there is DNS-over-UDP // censorship in the place where we deploy the TH. const thResolverURL = "https://dns.google/dns-query" // thResolver is the DNS resolver used by the TH. // // Here we're using github.com/apex/log as the logger, which // is fine because this is backend only code. var thResolver = netxlite.WrapResolver(log.Log, netxlite.NewSerialResolver( netxlite.NewDNSOverHTTPSTransport(http.DefaultClient, thResolverURL), ))