From 5973c88a0548ebed2f0e9cd8c3291cbfed6b25a9 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 29 Mar 2021 18:46:26 +0200 Subject: [PATCH] feat(inputloader): use check-in to fetch URLs (#267) * ongoing work * reduce diff with master * feat(inputloader): use the check-in API Part of https://github.com/ooni/probe/issues/1299 * fix: better naming for a variable * chore: add more tests * fix: add one more TODO --- cmd/ooniprobe/internal/cli/run/run.go | 10 -- .../internal/nettests/web_connectivity.go | 25 +++- internal/engine/inputloader.go | 58 ++++---- .../engine/inputloader_integration_test.go | 10 +- internal/engine/inputloader_test.go | 137 +++++++++++------- internal/libminiooni/libminiooni.go | 23 ++- 6 files changed, 156 insertions(+), 107 deletions(-) diff --git a/cmd/ooniprobe/internal/cli/run/run.go b/cmd/ooniprobe/internal/cli/run/run.go index 32d27cd..be23822 100644 --- a/cmd/ooniprobe/internal/cli/run/run.go +++ b/cmd/ooniprobe/internal/cli/run/run.go @@ -1,8 +1,6 @@ package run import ( - "runtime" - "github.com/alecthomas/kingpin" "github.com/apex/log" "github.com/fatih/color" @@ -77,14 +75,6 @@ func init() { unattendedCmd := cmd.Command("unattended", "") unattendedCmd.Action(func(_ *kingpin.ParseContext) error { - // Until we have enabled the check-in API we're called every - // hour on darwin and we need to self throttle. - // TODO(bassosimone): switch to check-in and remove this hack. - switch runtime.GOOS { - case "darwin", "windows": - const veryFew = 10 - probe.Config().Nettests.WebsitesURLLimit = veryFew - } return functionalRun(func(name string, gr nettests.Group) bool { return gr.UnattendedOK == true }) diff --git a/cmd/ooniprobe/internal/nettests/web_connectivity.go b/cmd/ooniprobe/internal/nettests/web_connectivity.go index 1286845..c4752ad 100644 --- a/cmd/ooniprobe/internal/nettests/web_connectivity.go +++ b/cmd/ooniprobe/internal/nettests/web_connectivity.go @@ -6,16 +6,29 @@ import ( "github.com/apex/log" "github.com/ooni/probe-cli/v3/cmd/ooniprobe/internal/database" engine "github.com/ooni/probe-cli/v3/internal/engine" + "github.com/ooni/probe-cli/v3/internal/engine/model" ) +// TODO(bassosimone): we should remove the limit argument and +// we should also remove it from the config. + +// TODO(bassosimone): we should propagate the kind of run +// to here such that we get the right runType. + +// TODO(bassosimone): we are breaking the use case in which +// someone choose the number of URLs explicitly via the config. + func lookupURLs(ctl *Controller, limit int64, categories []string) ([]string, map[int64]int64, error) { inputloader := engine.NewInputLoader(engine.InputLoaderConfig{ - InputPolicy: engine.InputOrQueryBackend, - Session: ctl.Session, - SourceFiles: ctl.InputFiles, - StaticInputs: ctl.Inputs, - URLCategories: categories, - URLLimit: limit, + CheckInConfig: &model.CheckInConfig{ + WebConnectivity: model.CheckInConfigWebConnectivity{ + CategoryCodes: categories, + }, + }, + InputPolicy: engine.InputOrQueryBackend, + Session: ctl.Session, + SourceFiles: ctl.InputFiles, + StaticInputs: ctl.Inputs, }) testlist, err := inputloader.Load(context.Background()) var urls []string diff --git a/internal/engine/inputloader.go b/internal/engine/inputloader.go index ccd1583..d96e06d 100644 --- a/internal/engine/inputloader.go +++ b/internal/engine/inputloader.go @@ -12,6 +12,7 @@ import ( // These errors are returned by the InputLoader. var ( + ErrNoURLsReturned = errors.New("no URLs returned") ErrDetectedEmptyFile = errors.New("file did not contain any input") ErrInputRequired = errors.New("no input provided") ErrNoInputExpected = errors.New("we did not expect any input") @@ -20,9 +21,8 @@ var ( // InputLoaderSession is the session according to an InputLoader. We // introduce this abstraction because it helps us with testing. type InputLoaderSession interface { - MaybeLookupLocationContext(ctx context.Context) error - NewOrchestraClient(ctx context.Context) (model.ExperimentOrchestraClient, error) - ProbeCC() string + CheckIn(ctx context.Context, + config *model.CheckInConfig) (*model.CheckInInfo, error) } // InputLoader loads input according to the specified policy @@ -59,6 +59,22 @@ type InputLoader interface { // InputLoaderConfig contains config for InputLoader. type InputLoaderConfig struct { + // CheckInConfig contains options for the CheckIn API. If + // not set, then we'll create a default config. If set but + // there are fields inside it that are not set, then we + // will set them to a default value. + CheckInConfig *model.CheckInConfig + + // InputPolicy specifies the input policy for the + // current experiment. We will not load any input if + // the policy says we should not. You MUST fill in + // this field. + InputPolicy InputPolicy + + // Session is the current measurement session. You + // MUST fill in this field. + Session InputLoaderSession + // StaticInputs contains optional input to be added // to the resulting input list if possible. StaticInputs []string @@ -68,22 +84,6 @@ type InputLoaderConfig struct { // per line. We will fail if any file is unreadable // as well as if any file is empty. SourceFiles []string - - // InputPolicy specifies the input policy for the - // current experiment. We will not load any input if - // the policy says we should not. - InputPolicy InputPolicy - - // Session is the current measurement session. - Session InputLoaderSession - - // URLLimit is the optional limit on the number of URLs - // that probe services should return to us. - URLLimit int64 - - // URLCategories limits the categories of URLs that - // probe services should return to us. - URLCategories []string } // NewInputLoader creates a new InputLoader. @@ -218,16 +218,20 @@ type inputLoaderLoadRemoteConfig struct { // loadRemote loads inputs from a remote source. func (il inputLoader) loadRemote(conf inputLoaderLoadRemoteConfig) ([]model.URLInfo, error) { - if err := conf.session.MaybeLookupLocationContext(conf.ctx); err != nil { - return nil, err + config := il.CheckInConfig + if config == nil { + // Note: Session.CheckIn documentation says it will fill in + // any field with a required value with a reasonable default + // if such value is missing. So, here we just need to be + // concerned about NOT passing it a NULL pointer. + config = &model.CheckInConfig{} } - client, err := conf.session.NewOrchestraClient(conf.ctx) + reply, err := conf.session.CheckIn(conf.ctx, config) if err != nil { return nil, err } - return client.FetchURLList(conf.ctx, model.URLListConfig{ - CountryCode: conf.session.ProbeCC(), - Limit: il.URLLimit, - Categories: il.URLCategories, - }) + if reply.WebConnectivity == nil || len(reply.WebConnectivity.URLs) <= 0 { + return nil, ErrNoURLsReturned + } + return reply.WebConnectivity.URLs, nil } diff --git a/internal/engine/inputloader_integration_test.go b/internal/engine/inputloader_integration_test.go index 744157a..1662746 100644 --- a/internal/engine/inputloader_integration_test.go +++ b/internal/engine/inputloader_integration_test.go @@ -204,7 +204,7 @@ func TestInputLoaderInputStrictlyRequiredWithEmptyFile(t *testing.T) { } } -func TestInputLoaderInputOrQueryTestListsWithInput(t *testing.T) { +func TestInputLoaderInputOrQueryBackendWithInput(t *testing.T) { il := engine.NewInputLoader(engine.InputLoaderConfig{ StaticInputs: []string{"https://www.google.com/"}, SourceFiles: []string{ @@ -233,7 +233,7 @@ func TestInputLoaderInputOrQueryTestListsWithInput(t *testing.T) { } } -func TestInputLoaderInputOrQueryTestListsWithNoInputAndCancelledContext(t *testing.T) { +func TestInputLoaderInputOrQueryBackendWithNoInputAndCancelledContext(t *testing.T) { sess, err := engine.NewSession(engine.SessionConfig{ AssetsDir: "testdata", KVStore: kvstore.NewMemoryKeyValueStore(), @@ -261,7 +261,7 @@ func TestInputLoaderInputOrQueryTestListsWithNoInputAndCancelledContext(t *testi } } -func TestInputLoaderInputOrQueryTestListsWithNoInput(t *testing.T) { +func TestInputLoaderInputOrQueryBackendWithNoInput(t *testing.T) { if testing.Short() { t.Skip("skip test in short mode") } @@ -284,7 +284,6 @@ func TestInputLoaderInputOrQueryTestListsWithNoInput(t *testing.T) { il := engine.NewInputLoader(engine.InputLoaderConfig{ InputPolicy: engine.InputOrQueryBackend, Session: sess, - URLLimit: 30, }) ctx := context.Background() out, err := il.Load(ctx) @@ -292,11 +291,12 @@ func TestInputLoaderInputOrQueryTestListsWithNoInput(t *testing.T) { t.Fatal(err) } if len(out) < 10 { + // check-in SHOULD return AT LEAST 20 URLs at a time. t.Fatal("not the output length we expected") } } -func TestInputLoaderInputOrQueryTestListsWithEmptyFile(t *testing.T) { +func TestInputLoaderInputOrQueryBackendWithEmptyFile(t *testing.T) { il := engine.NewInputLoader(engine.InputLoaderConfig{ InputPolicy: engine.InputOrQueryBackend, SourceFiles: []string{ diff --git a/internal/engine/inputloader_test.go b/internal/engine/inputloader_test.go index d40483e..43bfd47 100644 --- a/internal/engine/inputloader_test.go +++ b/internal/engine/inputloader_test.go @@ -8,6 +8,7 @@ import ( "syscall" "testing" + "github.com/google/go-cmp/cmp" "github.com/ooni/probe-cli/v3/internal/engine/internal/fsx" "github.com/ooni/probe-cli/v3/internal/engine/model" ) @@ -43,65 +44,33 @@ func TestInputLoaderReadfileScannerFailure(t *testing.T) { } } -type InputLoaderBrokenSession struct { - OrchestraClient model.ExperimentOrchestraClient - Error error +// InputLoaderMockableSession is a mockable session +// used by InputLoader tests. +type InputLoaderMockableSession struct { + // Output contains the output of CheckIn. It should + // be nil when Error is not-nil. + Output *model.CheckInInfo + + // Error is the error to be returned by CheckIn. It + // should be nil when Output is not-nil. + Error error } -func (InputLoaderBrokenSession) MaybeLookupLocationContext(ctx context.Context) error { - return nil -} - -func (ilbs InputLoaderBrokenSession) NewOrchestraClient(ctx context.Context) (model.ExperimentOrchestraClient, error) { - if ilbs.OrchestraClient != nil { - return ilbs.OrchestraClient, nil +// CheckIn implements InputLoaderSession.CheckIn. +func (sess *InputLoaderMockableSession) CheckIn( + ctx context.Context, config *model.CheckInConfig) (*model.CheckInInfo, error) { + if sess.Output == nil && sess.Error == nil { + return nil, errors.New("both Output and Error are nil") } - return nil, io.EOF + return sess.Output, sess.Error } -func (InputLoaderBrokenSession) ProbeCC() string { - return "IT" -} - -func TestInputLoaderNewOrchestraClientFailure(t *testing.T) { - il := inputLoader{} - lrc := inputLoaderLoadRemoteConfig{ - ctx: context.Background(), - session: InputLoaderBrokenSession{}, - } - out, err := il.loadRemote(lrc) - if !errors.Is(err, io.EOF) { - t.Fatalf("not the error we expected: %+v", err) - } - if out != nil { - t.Fatal("expected nil output here") - } -} - -type InputLoaderBrokenOrchestraClient struct{} - -func (InputLoaderBrokenOrchestraClient) CheckIn(ctx context.Context, config model.CheckInConfig) (*model.CheckInInfo, error) { - return nil, io.EOF -} - -func (InputLoaderBrokenOrchestraClient) FetchPsiphonConfig(ctx context.Context) ([]byte, error) { - return nil, io.EOF -} - -func (InputLoaderBrokenOrchestraClient) FetchTorTargets(ctx context.Context, cc string) (map[string]model.TorTarget, error) { - return nil, io.EOF -} - -func (InputLoaderBrokenOrchestraClient) FetchURLList(ctx context.Context, config model.URLListConfig) ([]model.URLInfo, error) { - return nil, io.EOF -} - -func TestInputLoaderFetchURLListFailure(t *testing.T) { +func TestInputLoaderCheckInFailure(t *testing.T) { il := inputLoader{} lrc := inputLoaderLoadRemoteConfig{ ctx: context.Background(), - session: InputLoaderBrokenSession{ - OrchestraClient: InputLoaderBrokenOrchestraClient{}, + session: &InputLoaderMockableSession{ + Error: io.EOF, }, } out, err := il.loadRemote(lrc) @@ -112,3 +81,69 @@ func TestInputLoaderFetchURLListFailure(t *testing.T) { t.Fatal("expected nil output here") } } + +func TestInputLoaderCheckInSuccessWithNilWebConnectivity(t *testing.T) { + il := inputLoader{} + lrc := inputLoaderLoadRemoteConfig{ + ctx: context.Background(), + session: &InputLoaderMockableSession{ + Output: &model.CheckInInfo{}, + }, + } + out, err := il.loadRemote(lrc) + if !errors.Is(err, ErrNoURLsReturned) { + t.Fatalf("not the error we expected: %+v", err) + } + if out != nil { + t.Fatal("expected nil output here") + } +} + +func TestInputLoaderCheckInSuccessWithNoURLs(t *testing.T) { + il := inputLoader{} + lrc := inputLoaderLoadRemoteConfig{ + ctx: context.Background(), + session: &InputLoaderMockableSession{ + Output: &model.CheckInInfo{ + WebConnectivity: &model.CheckInInfoWebConnectivity{}, + }, + }, + } + out, err := il.loadRemote(lrc) + if !errors.Is(err, ErrNoURLsReturned) { + t.Fatalf("not the error we expected: %+v", err) + } + if out != nil { + t.Fatal("expected nil output here") + } +} + +func TestInputLoaderCheckInSuccessWithSomeURLs(t *testing.T) { + expect := []model.URLInfo{{ + CategoryCode: "NEWS", + CountryCode: "IT", + URL: "https://repubblica.it", + }, { + CategoryCode: "NEWS", + CountryCode: "IT", + URL: "https://corriere.it", + }} + il := inputLoader{} + lrc := inputLoaderLoadRemoteConfig{ + ctx: context.Background(), + session: &InputLoaderMockableSession{ + Output: &model.CheckInInfo{ + WebConnectivity: &model.CheckInInfoWebConnectivity{ + URLs: expect, + }, + }, + }, + } + out, err := il.loadRemote(lrc) + if err != nil { + t.Fatal(err) + } + if diff := cmp.Diff(expect, out); diff != "" { + t.Fatal(diff) + } +} diff --git a/internal/libminiooni/libminiooni.go b/internal/libminiooni/libminiooni.go index df6c540..d0daa6e 100644 --- a/internal/libminiooni/libminiooni.go +++ b/internal/libminiooni/libminiooni.go @@ -12,6 +12,13 @@ // code at github.com/bassosimone/aladdin). package libminiooni +// TODO(bassosimone): now that this code cannot be imported from +// external sources anymore, we should move it back into the +// internal/cmd/miniooni folder and reduce the number of packages +// that are unnecessarily exposed inside ./internal/engine. + +// TODO(bassosimone): we need to deprecate or remove --limit. + import ( "context" "errors" @@ -140,10 +147,6 @@ func init() { ) } -func fatalWithString(msg string) { - panic(msg) -} - func fatalIfFalse(cond bool, msg string) { if !cond { panic(msg) @@ -371,11 +374,15 @@ func MainWithConfiguration(experimentName string, currentOptions Options) { fatalOnError(err, "cannot create experiment builder") inputLoader := engine.NewInputLoader(engine.InputLoaderConfig{ + CheckInConfig: &model.CheckInConfig{ + RunType: "manual", + OnWiFi: true, // meaning: not on 4G + Charging: true, + }, + InputPolicy: builder.InputPolicy(), StaticInputs: currentOptions.Inputs, SourceFiles: currentOptions.InputFilePaths, - InputPolicy: builder.InputPolicy(), Session: sess, - URLLimit: currentOptions.Limit, }) inputs, err := inputLoader.Load(context.Background()) fatalOnError(err, "cannot load inputs") @@ -399,14 +406,14 @@ func MainWithConfiguration(experimentName string, currentOptions Options) { }() submitter, err := engine.NewSubmitter(ctx, engine.SubmitterConfig{ - Enabled: currentOptions.NoCollector == false, + Enabled: !currentOptions.NoCollector, Session: sess, Logger: log.Log, }) fatalOnError(err, "cannot create submitter") saver, err := engine.NewSaver(engine.SaverConfig{ - Enabled: currentOptions.NoJSON == false, + Enabled: !currentOptions.NoJSON, Experiment: experiment, FilePath: currentOptions.ReportFile, Logger: log.Log,