feat(inputloader): use check-in to fetch URLs (#267)

* ongoing work

* reduce diff with master

* feat(inputloader): use the check-in API

Part of https://github.com/ooni/probe/issues/1299

* fix: better naming for a variable

* chore: add more tests

* fix: add one more TODO
This commit is contained in:
Simone Basso 2021-03-29 18:46:26 +02:00 committed by GitHub
parent e0b0dfedc1
commit 5973c88a05
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 156 additions and 107 deletions

View File

@ -1,8 +1,6 @@
package run package run
import ( import (
"runtime"
"github.com/alecthomas/kingpin" "github.com/alecthomas/kingpin"
"github.com/apex/log" "github.com/apex/log"
"github.com/fatih/color" "github.com/fatih/color"
@ -77,14 +75,6 @@ func init() {
unattendedCmd := cmd.Command("unattended", "") unattendedCmd := cmd.Command("unattended", "")
unattendedCmd.Action(func(_ *kingpin.ParseContext) error { unattendedCmd.Action(func(_ *kingpin.ParseContext) error {
// Until we have enabled the check-in API we're called every
// hour on darwin and we need to self throttle.
// TODO(bassosimone): switch to check-in and remove this hack.
switch runtime.GOOS {
case "darwin", "windows":
const veryFew = 10
probe.Config().Nettests.WebsitesURLLimit = veryFew
}
return functionalRun(func(name string, gr nettests.Group) bool { return functionalRun(func(name string, gr nettests.Group) bool {
return gr.UnattendedOK == true return gr.UnattendedOK == true
}) })

View File

@ -6,16 +6,29 @@ import (
"github.com/apex/log" "github.com/apex/log"
"github.com/ooni/probe-cli/v3/cmd/ooniprobe/internal/database" "github.com/ooni/probe-cli/v3/cmd/ooniprobe/internal/database"
engine "github.com/ooni/probe-cli/v3/internal/engine" engine "github.com/ooni/probe-cli/v3/internal/engine"
"github.com/ooni/probe-cli/v3/internal/engine/model"
) )
// TODO(bassosimone): we should remove the limit argument and
// we should also remove it from the config.
// TODO(bassosimone): we should propagate the kind of run
// to here such that we get the right runType.
// TODO(bassosimone): we are breaking the use case in which
// someone choose the number of URLs explicitly via the config.
func lookupURLs(ctl *Controller, limit int64, categories []string) ([]string, map[int64]int64, error) { func lookupURLs(ctl *Controller, limit int64, categories []string) ([]string, map[int64]int64, error) {
inputloader := engine.NewInputLoader(engine.InputLoaderConfig{ inputloader := engine.NewInputLoader(engine.InputLoaderConfig{
InputPolicy: engine.InputOrQueryBackend, CheckInConfig: &model.CheckInConfig{
Session: ctl.Session, WebConnectivity: model.CheckInConfigWebConnectivity{
SourceFiles: ctl.InputFiles, CategoryCodes: categories,
StaticInputs: ctl.Inputs, },
URLCategories: categories, },
URLLimit: limit, InputPolicy: engine.InputOrQueryBackend,
Session: ctl.Session,
SourceFiles: ctl.InputFiles,
StaticInputs: ctl.Inputs,
}) })
testlist, err := inputloader.Load(context.Background()) testlist, err := inputloader.Load(context.Background())
var urls []string var urls []string

View File

@ -12,6 +12,7 @@ import (
// These errors are returned by the InputLoader. // These errors are returned by the InputLoader.
var ( var (
ErrNoURLsReturned = errors.New("no URLs returned")
ErrDetectedEmptyFile = errors.New("file did not contain any input") ErrDetectedEmptyFile = errors.New("file did not contain any input")
ErrInputRequired = errors.New("no input provided") ErrInputRequired = errors.New("no input provided")
ErrNoInputExpected = errors.New("we did not expect any input") ErrNoInputExpected = errors.New("we did not expect any input")
@ -20,9 +21,8 @@ var (
// InputLoaderSession is the session according to an InputLoader. We // InputLoaderSession is the session according to an InputLoader. We
// introduce this abstraction because it helps us with testing. // introduce this abstraction because it helps us with testing.
type InputLoaderSession interface { type InputLoaderSession interface {
MaybeLookupLocationContext(ctx context.Context) error CheckIn(ctx context.Context,
NewOrchestraClient(ctx context.Context) (model.ExperimentOrchestraClient, error) config *model.CheckInConfig) (*model.CheckInInfo, error)
ProbeCC() string
} }
// InputLoader loads input according to the specified policy // InputLoader loads input according to the specified policy
@ -59,6 +59,22 @@ type InputLoader interface {
// InputLoaderConfig contains config for InputLoader. // InputLoaderConfig contains config for InputLoader.
type InputLoaderConfig struct { type InputLoaderConfig struct {
// CheckInConfig contains options for the CheckIn API. If
// not set, then we'll create a default config. If set but
// there are fields inside it that are not set, then we
// will set them to a default value.
CheckInConfig *model.CheckInConfig
// InputPolicy specifies the input policy for the
// current experiment. We will not load any input if
// the policy says we should not. You MUST fill in
// this field.
InputPolicy InputPolicy
// Session is the current measurement session. You
// MUST fill in this field.
Session InputLoaderSession
// StaticInputs contains optional input to be added // StaticInputs contains optional input to be added
// to the resulting input list if possible. // to the resulting input list if possible.
StaticInputs []string StaticInputs []string
@ -68,22 +84,6 @@ type InputLoaderConfig struct {
// per line. We will fail if any file is unreadable // per line. We will fail if any file is unreadable
// as well as if any file is empty. // as well as if any file is empty.
SourceFiles []string SourceFiles []string
// InputPolicy specifies the input policy for the
// current experiment. We will not load any input if
// the policy says we should not.
InputPolicy InputPolicy
// Session is the current measurement session.
Session InputLoaderSession
// URLLimit is the optional limit on the number of URLs
// that probe services should return to us.
URLLimit int64
// URLCategories limits the categories of URLs that
// probe services should return to us.
URLCategories []string
} }
// NewInputLoader creates a new InputLoader. // NewInputLoader creates a new InputLoader.
@ -218,16 +218,20 @@ type inputLoaderLoadRemoteConfig struct {
// loadRemote loads inputs from a remote source. // loadRemote loads inputs from a remote source.
func (il inputLoader) loadRemote(conf inputLoaderLoadRemoteConfig) ([]model.URLInfo, error) { func (il inputLoader) loadRemote(conf inputLoaderLoadRemoteConfig) ([]model.URLInfo, error) {
if err := conf.session.MaybeLookupLocationContext(conf.ctx); err != nil { config := il.CheckInConfig
return nil, err if config == nil {
// Note: Session.CheckIn documentation says it will fill in
// any field with a required value with a reasonable default
// if such value is missing. So, here we just need to be
// concerned about NOT passing it a NULL pointer.
config = &model.CheckInConfig{}
} }
client, err := conf.session.NewOrchestraClient(conf.ctx) reply, err := conf.session.CheckIn(conf.ctx, config)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return client.FetchURLList(conf.ctx, model.URLListConfig{ if reply.WebConnectivity == nil || len(reply.WebConnectivity.URLs) <= 0 {
CountryCode: conf.session.ProbeCC(), return nil, ErrNoURLsReturned
Limit: il.URLLimit, }
Categories: il.URLCategories, return reply.WebConnectivity.URLs, nil
})
} }

View File

@ -204,7 +204,7 @@ func TestInputLoaderInputStrictlyRequiredWithEmptyFile(t *testing.T) {
} }
} }
func TestInputLoaderInputOrQueryTestListsWithInput(t *testing.T) { func TestInputLoaderInputOrQueryBackendWithInput(t *testing.T) {
il := engine.NewInputLoader(engine.InputLoaderConfig{ il := engine.NewInputLoader(engine.InputLoaderConfig{
StaticInputs: []string{"https://www.google.com/"}, StaticInputs: []string{"https://www.google.com/"},
SourceFiles: []string{ SourceFiles: []string{
@ -233,7 +233,7 @@ func TestInputLoaderInputOrQueryTestListsWithInput(t *testing.T) {
} }
} }
func TestInputLoaderInputOrQueryTestListsWithNoInputAndCancelledContext(t *testing.T) { func TestInputLoaderInputOrQueryBackendWithNoInputAndCancelledContext(t *testing.T) {
sess, err := engine.NewSession(engine.SessionConfig{ sess, err := engine.NewSession(engine.SessionConfig{
AssetsDir: "testdata", AssetsDir: "testdata",
KVStore: kvstore.NewMemoryKeyValueStore(), KVStore: kvstore.NewMemoryKeyValueStore(),
@ -261,7 +261,7 @@ func TestInputLoaderInputOrQueryTestListsWithNoInputAndCancelledContext(t *testi
} }
} }
func TestInputLoaderInputOrQueryTestListsWithNoInput(t *testing.T) { func TestInputLoaderInputOrQueryBackendWithNoInput(t *testing.T) {
if testing.Short() { if testing.Short() {
t.Skip("skip test in short mode") t.Skip("skip test in short mode")
} }
@ -284,7 +284,6 @@ func TestInputLoaderInputOrQueryTestListsWithNoInput(t *testing.T) {
il := engine.NewInputLoader(engine.InputLoaderConfig{ il := engine.NewInputLoader(engine.InputLoaderConfig{
InputPolicy: engine.InputOrQueryBackend, InputPolicy: engine.InputOrQueryBackend,
Session: sess, Session: sess,
URLLimit: 30,
}) })
ctx := context.Background() ctx := context.Background()
out, err := il.Load(ctx) out, err := il.Load(ctx)
@ -292,11 +291,12 @@ func TestInputLoaderInputOrQueryTestListsWithNoInput(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
if len(out) < 10 { if len(out) < 10 {
// check-in SHOULD return AT LEAST 20 URLs at a time.
t.Fatal("not the output length we expected") t.Fatal("not the output length we expected")
} }
} }
func TestInputLoaderInputOrQueryTestListsWithEmptyFile(t *testing.T) { func TestInputLoaderInputOrQueryBackendWithEmptyFile(t *testing.T) {
il := engine.NewInputLoader(engine.InputLoaderConfig{ il := engine.NewInputLoader(engine.InputLoaderConfig{
InputPolicy: engine.InputOrQueryBackend, InputPolicy: engine.InputOrQueryBackend,
SourceFiles: []string{ SourceFiles: []string{

View File

@ -8,6 +8,7 @@ import (
"syscall" "syscall"
"testing" "testing"
"github.com/google/go-cmp/cmp"
"github.com/ooni/probe-cli/v3/internal/engine/internal/fsx" "github.com/ooni/probe-cli/v3/internal/engine/internal/fsx"
"github.com/ooni/probe-cli/v3/internal/engine/model" "github.com/ooni/probe-cli/v3/internal/engine/model"
) )
@ -43,65 +44,33 @@ func TestInputLoaderReadfileScannerFailure(t *testing.T) {
} }
} }
type InputLoaderBrokenSession struct { // InputLoaderMockableSession is a mockable session
OrchestraClient model.ExperimentOrchestraClient // used by InputLoader tests.
Error error type InputLoaderMockableSession struct {
// Output contains the output of CheckIn. It should
// be nil when Error is not-nil.
Output *model.CheckInInfo
// Error is the error to be returned by CheckIn. It
// should be nil when Output is not-nil.
Error error
} }
func (InputLoaderBrokenSession) MaybeLookupLocationContext(ctx context.Context) error { // CheckIn implements InputLoaderSession.CheckIn.
return nil func (sess *InputLoaderMockableSession) CheckIn(
} ctx context.Context, config *model.CheckInConfig) (*model.CheckInInfo, error) {
if sess.Output == nil && sess.Error == nil {
func (ilbs InputLoaderBrokenSession) NewOrchestraClient(ctx context.Context) (model.ExperimentOrchestraClient, error) { return nil, errors.New("both Output and Error are nil")
if ilbs.OrchestraClient != nil {
return ilbs.OrchestraClient, nil
} }
return nil, io.EOF return sess.Output, sess.Error
} }
func (InputLoaderBrokenSession) ProbeCC() string { func TestInputLoaderCheckInFailure(t *testing.T) {
return "IT"
}
func TestInputLoaderNewOrchestraClientFailure(t *testing.T) {
il := inputLoader{}
lrc := inputLoaderLoadRemoteConfig{
ctx: context.Background(),
session: InputLoaderBrokenSession{},
}
out, err := il.loadRemote(lrc)
if !errors.Is(err, io.EOF) {
t.Fatalf("not the error we expected: %+v", err)
}
if out != nil {
t.Fatal("expected nil output here")
}
}
type InputLoaderBrokenOrchestraClient struct{}
func (InputLoaderBrokenOrchestraClient) CheckIn(ctx context.Context, config model.CheckInConfig) (*model.CheckInInfo, error) {
return nil, io.EOF
}
func (InputLoaderBrokenOrchestraClient) FetchPsiphonConfig(ctx context.Context) ([]byte, error) {
return nil, io.EOF
}
func (InputLoaderBrokenOrchestraClient) FetchTorTargets(ctx context.Context, cc string) (map[string]model.TorTarget, error) {
return nil, io.EOF
}
func (InputLoaderBrokenOrchestraClient) FetchURLList(ctx context.Context, config model.URLListConfig) ([]model.URLInfo, error) {
return nil, io.EOF
}
func TestInputLoaderFetchURLListFailure(t *testing.T) {
il := inputLoader{} il := inputLoader{}
lrc := inputLoaderLoadRemoteConfig{ lrc := inputLoaderLoadRemoteConfig{
ctx: context.Background(), ctx: context.Background(),
session: InputLoaderBrokenSession{ session: &InputLoaderMockableSession{
OrchestraClient: InputLoaderBrokenOrchestraClient{}, Error: io.EOF,
}, },
} }
out, err := il.loadRemote(lrc) out, err := il.loadRemote(lrc)
@ -112,3 +81,69 @@ func TestInputLoaderFetchURLListFailure(t *testing.T) {
t.Fatal("expected nil output here") t.Fatal("expected nil output here")
} }
} }
func TestInputLoaderCheckInSuccessWithNilWebConnectivity(t *testing.T) {
il := inputLoader{}
lrc := inputLoaderLoadRemoteConfig{
ctx: context.Background(),
session: &InputLoaderMockableSession{
Output: &model.CheckInInfo{},
},
}
out, err := il.loadRemote(lrc)
if !errors.Is(err, ErrNoURLsReturned) {
t.Fatalf("not the error we expected: %+v", err)
}
if out != nil {
t.Fatal("expected nil output here")
}
}
func TestInputLoaderCheckInSuccessWithNoURLs(t *testing.T) {
il := inputLoader{}
lrc := inputLoaderLoadRemoteConfig{
ctx: context.Background(),
session: &InputLoaderMockableSession{
Output: &model.CheckInInfo{
WebConnectivity: &model.CheckInInfoWebConnectivity{},
},
},
}
out, err := il.loadRemote(lrc)
if !errors.Is(err, ErrNoURLsReturned) {
t.Fatalf("not the error we expected: %+v", err)
}
if out != nil {
t.Fatal("expected nil output here")
}
}
func TestInputLoaderCheckInSuccessWithSomeURLs(t *testing.T) {
expect := []model.URLInfo{{
CategoryCode: "NEWS",
CountryCode: "IT",
URL: "https://repubblica.it",
}, {
CategoryCode: "NEWS",
CountryCode: "IT",
URL: "https://corriere.it",
}}
il := inputLoader{}
lrc := inputLoaderLoadRemoteConfig{
ctx: context.Background(),
session: &InputLoaderMockableSession{
Output: &model.CheckInInfo{
WebConnectivity: &model.CheckInInfoWebConnectivity{
URLs: expect,
},
},
},
}
out, err := il.loadRemote(lrc)
if err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(expect, out); diff != "" {
t.Fatal(diff)
}
}

View File

@ -12,6 +12,13 @@
// code at github.com/bassosimone/aladdin). // code at github.com/bassosimone/aladdin).
package libminiooni package libminiooni
// TODO(bassosimone): now that this code cannot be imported from
// external sources anymore, we should move it back into the
// internal/cmd/miniooni folder and reduce the number of packages
// that are unnecessarily exposed inside ./internal/engine.
// TODO(bassosimone): we need to deprecate or remove --limit.
import ( import (
"context" "context"
"errors" "errors"
@ -140,10 +147,6 @@ func init() {
) )
} }
func fatalWithString(msg string) {
panic(msg)
}
func fatalIfFalse(cond bool, msg string) { func fatalIfFalse(cond bool, msg string) {
if !cond { if !cond {
panic(msg) panic(msg)
@ -371,11 +374,15 @@ func MainWithConfiguration(experimentName string, currentOptions Options) {
fatalOnError(err, "cannot create experiment builder") fatalOnError(err, "cannot create experiment builder")
inputLoader := engine.NewInputLoader(engine.InputLoaderConfig{ inputLoader := engine.NewInputLoader(engine.InputLoaderConfig{
CheckInConfig: &model.CheckInConfig{
RunType: "manual",
OnWiFi: true, // meaning: not on 4G
Charging: true,
},
InputPolicy: builder.InputPolicy(),
StaticInputs: currentOptions.Inputs, StaticInputs: currentOptions.Inputs,
SourceFiles: currentOptions.InputFilePaths, SourceFiles: currentOptions.InputFilePaths,
InputPolicy: builder.InputPolicy(),
Session: sess, Session: sess,
URLLimit: currentOptions.Limit,
}) })
inputs, err := inputLoader.Load(context.Background()) inputs, err := inputLoader.Load(context.Background())
fatalOnError(err, "cannot load inputs") fatalOnError(err, "cannot load inputs")
@ -399,14 +406,14 @@ func MainWithConfiguration(experimentName string, currentOptions Options) {
}() }()
submitter, err := engine.NewSubmitter(ctx, engine.SubmitterConfig{ submitter, err := engine.NewSubmitter(ctx, engine.SubmitterConfig{
Enabled: currentOptions.NoCollector == false, Enabled: !currentOptions.NoCollector,
Session: sess, Session: sess,
Logger: log.Log, Logger: log.Log,
}) })
fatalOnError(err, "cannot create submitter") fatalOnError(err, "cannot create submitter")
saver, err := engine.NewSaver(engine.SaverConfig{ saver, err := engine.NewSaver(engine.SaverConfig{
Enabled: currentOptions.NoJSON == false, Enabled: !currentOptions.NoJSON,
Experiment: experiment, Experiment: experiment,
FilePath: currentOptions.ReportFile, FilePath: currentOptions.ReportFile,
Logger: log.Log, Logger: log.Log,