ooni-probe-cli/internal/engine/inputloader.go

387 lines
13 KiB
Go
Raw Permalink Normal View History

package engine
import (
"bufio"
"context"
"errors"
"fmt"
"io/fs"
"net/url"
"github.com/apex/log"
refactor: flatten and separate (#353) * refactor(atomicx): move outside the engine package After merging probe-engine into probe-cli, my impression is that we have too much unnecessary nesting of packages in this repository. The idea of this commit and of a bunch of following commits will instead be to reduce the nesting and simplify the structure. While there, improve the documentation. * fix: always use the atomicx package For consistency, never use sync/atomic and always use ./internal/atomicx so we can just grep and make sure we're not risking to crash if we make a subtle mistake on a 32 bit platform. While there, mention in the contributing guidelines that we want to always prefer the ./internal/atomicx package over sync/atomic. * fix(atomicx): remove unnecessary constructor We don't need a constructor here. The default constructed `&Int64{}` instance is already usable and the constructor does not add anything to what we are doing, rather it just creates extra confusion. * cleanup(atomicx): we are not using Float64 Because atomicx.Float64 is unused, we can safely zap it. * cleanup(atomicx): simplify impl and improve tests We can simplify the implementation by using defer and by letting the Load() method call Add(0). We can improve tests by making many goroutines updated the atomic int64 value concurrently. * refactor(fsx): can live in the ./internal pkg Let us reduce the amount of nesting. While there, ensure that the package only exports the bare minimum, and improve the documentation of the tests, to ease reading the code. * refactor: move runtimex to ./internal * refactor: move shellx into the ./internal package While there, remove unnecessary dependency between packages. While there, specify in the contributing guidelines that one should use x/sys/execabs instead of os/exec. * refactor: move ooapi into the ./internal pkg * refactor(humanize): move to ./internal and better docs * refactor: move platform to ./internal * refactor(randx): move to ./internal * refactor(multierror): move into the ./internal pkg * refactor(kvstore): all kvstores in ./internal Rather than having part of the kvstore inside ./internal/engine/kvstore and part in ./internal/engine/kvstore.go, let us put every piece of code that is kvstore related into the ./internal/kvstore package. * fix(kvstore): always return ErrNoSuchKey on Get() error It should help to use the kvstore everywhere removing all the copies that are lingering around the tree. * sessionresolver: make KVStore mandatory Simplifies implementation. While there, use the ./internal/kvstore package rather than having our private implementation. * fix(ooapi): use the ./internal/kvstore package * fix(platform): better documentation
2021-06-04 10:34:18 +02:00
"github.com/ooni/probe-cli/v3/internal/fsx"
"github.com/ooni/probe-cli/v3/internal/model"
"github.com/ooni/probe-cli/v3/internal/registry"
"github.com/ooni/probe-cli/v3/internal/stuninput"
)
// These errors are returned by the InputLoader.
var (
ErrNoURLsReturned = errors.New("no URLs returned")
ErrDetectedEmptyFile = errors.New("file did not contain any input")
ErrInputRequired = errors.New("no input provided")
ErrNoInputExpected = errors.New("we did not expect any input")
ErrNoStaticInput = errors.New("no static input for this experiment")
)
// InputLoaderSession is the session according to an InputLoader. We
// introduce this abstraction because it helps us with testing.
type InputLoaderSession interface {
CheckIn(ctx context.Context,
config *model.OOAPICheckInConfig) (*model.OOAPICheckInInfo, error)
}
// InputLoaderLogger is the logger according to an InputLoader.
type InputLoaderLogger interface {
// Warnf formats and emits a warning message.
Warnf(format string, v ...interface{})
}
// InputLoader loads input according to the specified policy
// either from command line and input files or from OONI services. The
// behaviour depends on the input policy as described below.
//
// You MUST NOT change any public field of this structure when
// in use, because that MAY lead to data races.
//
// InputNone
//
// We fail if there is any StaticInput or any SourceFiles. If
// there's no input, we return a single, empty entry that causes
// experiments that don't require input to run once.
//
// InputOptional
//
// We gather input from StaticInput and SourceFiles. If there is
// input, we return it. Otherwise we return a single, empty entry
// that causes experiments that don't require input to run once.
//
// InputOrQueryBackend
//
// We gather input from StaticInput and SourceFiles. If there is
// input, we return it. Otherwise, we use OONI's probe services
// to gather input using the best API for the task.
//
// InputOrStaticDefault
//
// We gather input from StaticInput and SourceFiles. If there is
// input, we return it. Otherwise, we return an internal static
// list of inputs to be used with this experiment.
//
// InputStrictlyRequired
//
// We gather input from StaticInput and SourceFiles. If there is
// input, we return it. Otherwise, we return an error.
type InputLoader struct {
// CheckInConfig contains options for the CheckIn API. If
// not set, then we'll create a default config. If set but
// there are fields inside it that are not set, then we
// will set them to a default value.
CheckInConfig *model.OOAPICheckInConfig
// ExperimentName is the name of the experiment. This field
// is only used together with the InputOrStaticDefault policy.
ExperimentName string
// InputPolicy specifies the input policy for the
// current experiment. We will not load any input if
// the policy says we should not. You MUST fill in
// this field.
InputPolicy model.InputPolicy
// Logger is the optional logger that the InputLoader
// should be using. If not set, we will use the default
// logger of github.com/apex/log.
Logger InputLoaderLogger
// Session is the current measurement session. You
// MUST fill in this field.
Session InputLoaderSession
// StaticInputs contains optional input to be added
// to the resulting input list if possible.
StaticInputs []string
// SourceFiles contains optional files to read input
// from. Each file should contain a single input string
// per line. We will fail if any file is unreadable
// as well as if any file is empty.
SourceFiles []string
}
// Load attempts to load input using the specified input loader. We will
// return a list of URLs because this is the only input we support.
func (il *InputLoader) Load(ctx context.Context) ([]model.OOAPIURLInfo, error) {
switch il.InputPolicy {
case model.InputOptional:
return il.loadOptional()
case model.InputOrQueryBackend:
return il.loadOrQueryBackend(ctx)
case model.InputStrictlyRequired:
return il.loadStrictlyRequired(ctx)
case model.InputOrStaticDefault:
return il.loadOrStaticDefault(ctx)
default:
return il.loadNone()
}
}
// loadNone implements the InputNone policy.
func (il *InputLoader) loadNone() ([]model.OOAPIURLInfo, error) {
if len(il.StaticInputs) > 0 || len(il.SourceFiles) > 0 {
return nil, ErrNoInputExpected
}
// Note that we need to return a single empty entry.
return []model.OOAPIURLInfo{{}}, nil
}
// loadOptional implements the InputOptional policy.
func (il *InputLoader) loadOptional() ([]model.OOAPIURLInfo, error) {
inputs, err := il.loadLocal()
if err == nil && len(inputs) <= 0 {
// Note that we need to return a single empty entry.
inputs = []model.OOAPIURLInfo{{}}
}
return inputs, err
}
// loadStrictlyRequired implements the InputStrictlyRequired policy.
func (il *InputLoader) loadStrictlyRequired(ctx context.Context) ([]model.OOAPIURLInfo, error) {
inputs, err := il.loadLocal()
if err != nil || len(inputs) > 0 {
return inputs, err
}
return nil, ErrInputRequired
}
// loadOrQueryBackend implements the InputOrQueryBackend policy.
func (il *InputLoader) loadOrQueryBackend(ctx context.Context) ([]model.OOAPIURLInfo, error) {
inputs, err := il.loadLocal()
if err != nil || len(inputs) > 0 {
return inputs, err
}
return il.loadRemote(ctx)
}
// TODO(https://github.com/ooni/probe/issues/1390): we need to
// implement serving DNSCheck targets from the API
var dnsCheckDefaultInput = []string{
"https://dns.google/dns-query",
"https://8.8.8.8/dns-query",
"dot://8.8.8.8:853/",
"dot://8.8.4.4:853/",
"https://8.8.4.4/dns-query",
"https://cloudflare-dns.com/dns-query",
"https://1.1.1.1/dns-query",
"https://1.0.0.1/dns-query",
"dot://1.1.1.1:853/",
"dot://1.0.0.1:853/",
"https://dns.quad9.net/dns-query",
"https://9.9.9.9/dns-query",
"dot://9.9.9.9:853/",
"dot://dns.quad9.net/",
"https://family.cloudflare-dns.com/dns-query",
"dot://family.cloudflare-dns.com/dns-query",
"https://dns11.quad9.net/dns-query",
"dot://dns11.quad9.net/dns-query",
"https://dns9.quad9.net/dns-query",
"dot://dns9.quad9.net/dns-query",
"https://dns12.quad9.net/dns-query",
"dot://dns12.quad9.net/dns-query",
"https://1dot1dot1dot1.cloudflare-dns.com/dns-query",
"dot://1dot1dot1dot1.cloudflare-dns.com/dns-query",
"https://dns.adguard.com/dns-query",
"dot://dns.adguard.com/dns-query",
"https://dns-family.adguard.com/dns-query",
"dot://dns-family.adguard.com/dns-query",
"https://dns.cloudflare.com/dns-query",
"https://adblock.doh.mullvad.net/dns-query",
"dot://adblock.doh.mullvad.net/dns-query",
"https://dns.alidns.com/dns-query",
"dot://dns.alidns.com/dns-query",
"https://doh.opendns.com/dns-query",
"https://dns.nextdns.io/dns-query",
"dot://dns.nextdns.io/dns-query",
"https://dns10.quad9.net/dns-query",
"dot://dns10.quad9.net/dns-query",
"https://security.cloudflare-dns.com/dns-query",
"dot://security.cloudflare-dns.com/dns-query",
"https://dns.switch.ch/dns-query",
"dot://dns.switch.ch/dns-query",
}
var stunReachabilityDefaultInput = stuninput.AsnStunReachabilityInput()
// StaticBareInputForExperiment returns the list of strings an
// experiment should use as static input. In case there is no
// static input for this experiment, we return an error.
func StaticBareInputForExperiment(name string) ([]string, error) {
// Implementation note: we may be called from pkg/oonimkall
// with a non-canonical experiment name, so we need to convert
// the experiment name to be canonical before proceeding.
switch registry.CanonicalizeExperimentName(name) {
case "dnscheck":
return dnsCheckDefaultInput, nil
case "stunreachability":
return stunReachabilityDefaultInput, nil
default:
return nil, ErrNoStaticInput
}
}
// staticInputForExperiment returns the static input for the given experiment
// or an error if there's no static input for the experiment.
func staticInputForExperiment(name string) ([]model.OOAPIURLInfo, error) {
return stringListToModelURLInfo(StaticBareInputForExperiment(name))
}
// loadOrStaticDefault implements the InputOrStaticDefault policy.
func (il *InputLoader) loadOrStaticDefault(ctx context.Context) ([]model.OOAPIURLInfo, error) {
inputs, err := il.loadLocal()
if err != nil || len(inputs) > 0 {
return inputs, err
}
return staticInputForExperiment(il.ExperimentName)
}
// loadLocal loads inputs from StaticInputs and SourceFiles.
func (il *InputLoader) loadLocal() ([]model.OOAPIURLInfo, error) {
inputs := []model.OOAPIURLInfo{}
for _, input := range il.StaticInputs {
inputs = append(inputs, model.OOAPIURLInfo{URL: input})
}
for _, filepath := range il.SourceFiles {
refactor: flatten and separate (#353) * refactor(atomicx): move outside the engine package After merging probe-engine into probe-cli, my impression is that we have too much unnecessary nesting of packages in this repository. The idea of this commit and of a bunch of following commits will instead be to reduce the nesting and simplify the structure. While there, improve the documentation. * fix: always use the atomicx package For consistency, never use sync/atomic and always use ./internal/atomicx so we can just grep and make sure we're not risking to crash if we make a subtle mistake on a 32 bit platform. While there, mention in the contributing guidelines that we want to always prefer the ./internal/atomicx package over sync/atomic. * fix(atomicx): remove unnecessary constructor We don't need a constructor here. The default constructed `&Int64{}` instance is already usable and the constructor does not add anything to what we are doing, rather it just creates extra confusion. * cleanup(atomicx): we are not using Float64 Because atomicx.Float64 is unused, we can safely zap it. * cleanup(atomicx): simplify impl and improve tests We can simplify the implementation by using defer and by letting the Load() method call Add(0). We can improve tests by making many goroutines updated the atomic int64 value concurrently. * refactor(fsx): can live in the ./internal pkg Let us reduce the amount of nesting. While there, ensure that the package only exports the bare minimum, and improve the documentation of the tests, to ease reading the code. * refactor: move runtimex to ./internal * refactor: move shellx into the ./internal package While there, remove unnecessary dependency between packages. While there, specify in the contributing guidelines that one should use x/sys/execabs instead of os/exec. * refactor: move ooapi into the ./internal pkg * refactor(humanize): move to ./internal and better docs * refactor: move platform to ./internal * refactor(randx): move to ./internal * refactor(multierror): move into the ./internal pkg * refactor(kvstore): all kvstores in ./internal Rather than having part of the kvstore inside ./internal/engine/kvstore and part in ./internal/engine/kvstore.go, let us put every piece of code that is kvstore related into the ./internal/kvstore package. * fix(kvstore): always return ErrNoSuchKey on Get() error It should help to use the kvstore everywhere removing all the copies that are lingering around the tree. * sessionresolver: make KVStore mandatory Simplifies implementation. While there, use the ./internal/kvstore package rather than having our private implementation. * fix(ooapi): use the ./internal/kvstore package * fix(platform): better documentation
2021-06-04 10:34:18 +02:00
extra, err := il.readfile(filepath, fsx.OpenFile)
if err != nil {
return nil, err
}
// See https://github.com/ooni/probe-engine/issues/1123.
if len(extra) <= 0 {
return nil, fmt.Errorf("%w: %s", ErrDetectedEmptyFile, filepath)
}
inputs = append(inputs, extra...)
}
return inputs, nil
}
// inputLoaderOpenFn is the type of the function to open a file.
type inputLoaderOpenFn func(filepath string) (fs.File, error)
// readfile reads inputs from the specified file. The open argument should be
// compatible with stdlib's fs.Open and helps us with unit testing.
func (il *InputLoader) readfile(filepath string, open inputLoaderOpenFn) ([]model.OOAPIURLInfo, error) {
inputs := []model.OOAPIURLInfo{}
filep, err := open(filepath)
if err != nil {
return nil, err
}
defer filep.Close()
// Implementation note: when you save file with vim, you have newline at
// end of file and you don't want to consider that an input line. While there
// ignore any other empty line that may occur inside the file.
scanner := bufio.NewScanner(filep)
for scanner.Scan() {
line := scanner.Text()
if line != "" {
inputs = append(inputs, model.OOAPIURLInfo{URL: line})
}
}
if scanner.Err() != nil {
return nil, scanner.Err()
}
return inputs, nil
}
// loadRemote loads inputs from a remote source.
func (il *InputLoader) loadRemote(ctx context.Context) ([]model.OOAPIURLInfo, error) {
config := il.CheckInConfig
if config == nil {
// Note: Session.CheckIn documentation says it will fill in
// any field with a required value with a reasonable default
// if such value is missing. So, here we just need to be
// concerned about NOT passing it a NULL pointer.
config = &model.OOAPICheckInConfig{}
}
reply, err := il.checkIn(ctx, config)
if err != nil {
return nil, err
}
if reply.WebConnectivity == nil || len(reply.WebConnectivity.URLs) <= 0 {
return nil, ErrNoURLsReturned
}
return reply.WebConnectivity.URLs, nil
}
// checkIn executes the check-in and filters the returned URLs to exclude
// the URLs that are not part of the requested categories. This is done for
// robustness, just in case we or the API do something wrong.
func (il *InputLoader) checkIn(
ctx context.Context, config *model.OOAPICheckInConfig) (*model.OOAPICheckInInfo, error) {
reply, err := il.Session.CheckIn(ctx, config)
if err != nil {
return nil, err
}
// Note: safe to assume that reply is not nil if err is nil
if reply.WebConnectivity != nil && len(reply.WebConnectivity.URLs) > 0 {
reply.WebConnectivity.URLs = il.preventMistakes(
reply.WebConnectivity.URLs, config.WebConnectivity.CategoryCodes,
)
}
return reply, nil
}
// preventMistakes makes the code more robust with respect to any possible
// integration issue where the backend returns to us URLs that don't
// belong to the category codes we requested.
func (il *InputLoader) preventMistakes(input []model.OOAPIURLInfo, categories []string) (output []model.OOAPIURLInfo) {
if len(categories) <= 0 {
return input
}
for _, entry := range input {
var found bool
for _, cat := range categories {
if entry.CategoryCode == cat {
found = true
break
}
}
if !found {
il.logger().Warnf("URL %+v not in %+v; skipping", entry, categories)
continue
}
output = append(output, entry)
}
return
}
// logger returns the configured logger or apex/log's default.
func (il *InputLoader) logger() InputLoaderLogger {
if il.Logger != nil {
return il.Logger
}
return log.Log
}
// stringListToModelURLInfo is an utility function to convert
// a list of strings containing URLs into a list of model.URLInfo
// which would have been returned by an hypothetical backend
// API serving input for a test for which we don't have an API
// yet (e.g., stunreachability and dnscheck).
func stringListToModelURLInfo(input []string, err error) ([]model.OOAPIURLInfo, error) {
if err != nil {
return nil, err
}
var output []model.OOAPIURLInfo
for _, URL := range input {
if _, err := url.Parse(URL); err != nil {
return nil, err
}
output = append(output, model.OOAPIURLInfo{
CategoryCode: "MISC", // hard to find a category
CountryCode: "XX", // representing no country
URL: URL,
})
}
return output, nil
}