ooni-probe-cli/internal/httpapi/descriptor.go

156 lines
4.8 KiB
Go
Raw Normal View History

feat(webconnectivity): try all the available THs (#980) We introduce a fork of internal/httpx, named internal/httpapi, where there is a clear split between the concept of an API endpoint (such as https://0.th.ooni.org/) and of an API descriptor (such as using `GET` to access /api/v1/test-list/url). Additionally, httpapi allows to create a SequenceCaller that tries to call a given API descriptor using multiple API endpoints. The SequenceCaller will stop once an endpoint works or when all the available endpoints have been tried unsuccessfully. The definition of "success" is the following: we consider "failure" any error that occurs during the HTTP round trip or when reading the response body. We DO NOT consider "failure" errors (1) when parsing the input URL; (2) when the server returns >= 400; (3) when the server returns a string that does not parse as valid JSON. The idea of this classification of failures is that we ONLY want to retry when we see what looks like a network error that may be caused by (collateral or targeted) censorship. We take advantage of the availability of this new package and we refactor web_connectivity@v0.4 and web_connectivity@v0.5 to use a SequenceCaller for calling the web connectivity TH API. This means that we will now try all the available THs advertised by the backend rather than just selecting and using the first one provided by the backend. Because this diff is designed to be backported to the `release/3.16` branch, we have omitted additional changes to always use httpapi where we are currently using httpx. Yet, to remind ourselves about the need to do that, we have deprecated the httpx package. We will rewrite all the code currently using httpx to use httpapi as part of future work. It is also worth noting that httpapi will allow us to refactor the backend code such that (1) we remove code to select a backend URL endpoint at the beginning and (2) we try several endpoints. The design of the code is such that we can add to the mix some endpoints using as `http.Client` a special client using a tunnel. This will allow us to automatically fallback backend queries. Closes https://github.com/ooni/probe/issues/2353. Related to https://github.com/ooni/probe/issues/1519.
2022-11-21 16:28:53 +01:00
package httpapi
//
// HTTP API descriptor (e.g., GET /api/v1/test-list/urls)
//
import (
"encoding/json"
"net/http"
"net/url"
"time"
"github.com/ooni/probe-cli/v3/internal/model"
"github.com/ooni/probe-cli/v3/internal/runtimex"
)
// Descriptor contains the parameters for calling a given HTTP
// API (e.g., GET /api/v1/test-list/urls).
//
// The zero value of this struct is invalid. Please, fill all the
// fields marked as MANDATORY for correct initialization.
type Descriptor struct {
// Accept contains the OPTIONAL accept header.
Accept string
// Authorization is the OPTIONAL authorization.
Authorization string
// ContentType is the OPTIONAL content-type header.
ContentType string
// LogBody OPTIONALLY enables logging bodies.
LogBody bool
// Logger is the MANDATORY logger to use.
//
// For example, model.DiscardLogger.
Logger model.Logger
// MaxBodySize is the OPTIONAL maximum response body size. If
// not set, we use the |DefaultMaxBodySize| constant.
MaxBodySize int64
// Method is the MANDATORY request method.
Method string
// RequestBody is the OPTIONAL request body.
RequestBody []byte
// Timeout is the OPTIONAL timeout for this call. If no timeout
// is specified we will use the |DefaultCallTimeout| const.
Timeout time.Duration
// URLPath is the MANDATORY URL path.
URLPath string
// URLQuery is the OPTIONAL query.
URLQuery url.Values
}
// WithBodyLogging returns a SHALLOW COPY of |Descriptor| with LogBody set to |value|. You SHOULD
// only use this method when initializing the descriptor you want to use.
func (desc *Descriptor) WithBodyLogging(value bool) *Descriptor {
out := &Descriptor{}
*out = *desc
out.LogBody = value
return out
}
// DefaultMaxBodySize is the default value for the maximum
// body size you can fetch using the httpapi package.
const DefaultMaxBodySize = 1 << 22
// DefaultCallTimeout is the default timeout for an httpapi call.
const DefaultCallTimeout = 60 * time.Second
// NewGETJSONDescriptor is a convenience factory for creating a new descriptor
// that uses the GET method and expects a JSON response.
func NewGETJSONDescriptor(logger model.Logger, urlPath string) *Descriptor {
return NewGETJSONWithQueryDescriptor(logger, urlPath, url.Values{})
}
// applicationJSON is the content-type for JSON
const applicationJSON = "application/json"
// NewGETJSONWithQueryDescriptor is like NewGETJSONDescriptor but it also
// allows you to provide |query| arguments. Leaving |query| nil or empty
// is equivalent to calling NewGETJSONDescriptor directly.
func NewGETJSONWithQueryDescriptor(logger model.Logger, urlPath string, query url.Values) *Descriptor {
return &Descriptor{
Accept: applicationJSON,
Authorization: "",
ContentType: "",
LogBody: false,
Logger: logger,
MaxBodySize: DefaultMaxBodySize,
Method: http.MethodGet,
RequestBody: nil,
Timeout: DefaultCallTimeout,
URLPath: urlPath,
URLQuery: query,
}
}
// NewPOSTJSONWithJSONResponseDescriptor creates a descriptor that POSTs a JSON document
// and expects to receive back a JSON document from the API.
//
// This function ONLY fails if we cannot serialize the |request| to JSON. So, if you know
// that |request| is JSON-serializable, you can safely call MustNewPostJSONWithJSONResponseDescriptor instead.
func NewPOSTJSONWithJSONResponseDescriptor(logger model.Logger, urlPath string, request any) (*Descriptor, error) {
rawRequest, err := json.Marshal(request)
if err != nil {
return nil, err
}
desc := &Descriptor{
Accept: applicationJSON,
Authorization: "",
ContentType: applicationJSON,
LogBody: false,
Logger: logger,
MaxBodySize: DefaultMaxBodySize,
Method: http.MethodPost,
RequestBody: rawRequest,
Timeout: DefaultCallTimeout,
URLPath: urlPath,
URLQuery: nil,
}
return desc, nil
}
// MustNewPOSTJSONWithJSONResponseDescriptor is like NewPOSTJSONWithJSONResponseDescriptor except that
// it panics in case it's not possible to JSON serialize the |request|.
func MustNewPOSTJSONWithJSONResponseDescriptor(logger model.Logger, urlPath string, request any) *Descriptor {
desc, err := NewPOSTJSONWithJSONResponseDescriptor(logger, urlPath, request)
runtimex.PanicOnError(err, "NewPOSTJSONWithJSONResponseDescriptor failed")
return desc
}
// NewGETResourceDescriptor creates a generic descriptor for GETting a
// resource of unspecified type using the given |urlPath|.
func NewGETResourceDescriptor(logger model.Logger, urlPath string) *Descriptor {
return &Descriptor{
Accept: "",
Authorization: "",
ContentType: "",
LogBody: false,
Logger: logger,
MaxBodySize: DefaultMaxBodySize,
Method: http.MethodGet,
RequestBody: nil,
Timeout: DefaultCallTimeout,
URLPath: urlPath,
URLQuery: url.Values{},
}
}