From 654441f5cd5268a39fa7e1c2f86b8bdecd50540e Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 7 Apr 2021 14:14:25 +0200 Subject: [PATCH] fix: move preventMistakes in InputLoader (#304) This fixes an issue where URLs provided with --input are not accepted by the preventMistakes filter. The filter itself needs to execute _only_ on URLs returned by the checkIn API, rather than on URLs returned by the InputLoader, which may instead be user provided. Reference issue: https://github.com/ooni/probe/issues/1435 --- .../internal/nettests/web_connectivity.go | 25 ------ .../nettests/web_connectivity_test.go | 77 ---------------- internal/engine/inputloader.go | 64 +++++++++++++- internal/engine/inputloader_test.go | 87 +++++++++++++++++++ 4 files changed, 150 insertions(+), 103 deletions(-) delete mode 100644 cmd/ooniprobe/internal/nettests/web_connectivity_test.go diff --git a/cmd/ooniprobe/internal/nettests/web_connectivity.go b/cmd/ooniprobe/internal/nettests/web_connectivity.go index c304701..746e040 100644 --- a/cmd/ooniprobe/internal/nettests/web_connectivity.go +++ b/cmd/ooniprobe/internal/nettests/web_connectivity.go @@ -9,30 +9,6 @@ import ( "github.com/ooni/probe-cli/v3/internal/engine/model" ) -// preventMistakes makes the code more robust with respect to any possible -// integration issue where the backend returns to us URLs that don't -// belong to the category codes we requested. -func preventMistakes(input []model.URLInfo, categories []string) (output []model.URLInfo) { - if len(categories) <= 0 { - return input - } - for _, entry := range input { - var found bool - for _, cat := range categories { - if entry.CategoryCode == cat { - found = true - break - } - } - if !found { - log.Warnf("URL %+v not in %+v; skipping", entry, categories) - continue - } - output = append(output, entry) - } - return -} - func lookupURLs(ctl *Controller, categories []string) ([]string, map[int64]int64, error) { inputloader := &engine.InputLoader{ CheckInConfig: &model.CheckInConfig{ @@ -56,7 +32,6 @@ func lookupURLs(ctl *Controller, categories []string) ([]string, map[int64]int64 if err != nil { return nil, nil, err } - testlist = preventMistakes(testlist, categories) var urls []string urlIDMap := make(map[int64]int64) for idx, url := range testlist { diff --git a/cmd/ooniprobe/internal/nettests/web_connectivity_test.go b/cmd/ooniprobe/internal/nettests/web_connectivity_test.go deleted file mode 100644 index 5c17c64..0000000 --- a/cmd/ooniprobe/internal/nettests/web_connectivity_test.go +++ /dev/null @@ -1,77 +0,0 @@ -package nettests - -import ( - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/ooni/probe-cli/v3/internal/engine/model" -) - -func TestPreventMistakesWithCategories(t *testing.T) { - input := []model.URLInfo{{ - CategoryCode: "NEWS", - URL: "https://repubblica.it/", - CountryCode: "IT", - }, { - CategoryCode: "HACK", - URL: "https://2600.com", - CountryCode: "XX", - }, { - CategoryCode: "FILE", - URL: "https://addons.mozilla.org/", - CountryCode: "XX", - }} - desired := []model.URLInfo{{ - CategoryCode: "NEWS", - URL: "https://repubblica.it/", - CountryCode: "IT", - }, { - CategoryCode: "FILE", - URL: "https://addons.mozilla.org/", - CountryCode: "XX", - }} - output := preventMistakes(input, []string{"NEWS", "FILE"}) - if diff := cmp.Diff(desired, output); diff != "" { - t.Fatal(diff) - } -} - -func TestPreventMistakesWithoutCategoriesAndNil(t *testing.T) { - input := []model.URLInfo{{ - CategoryCode: "NEWS", - URL: "https://repubblica.it/", - CountryCode: "IT", - }, { - CategoryCode: "HACK", - URL: "https://2600.com", - CountryCode: "XX", - }, { - CategoryCode: "FILE", - URL: "https://addons.mozilla.org/", - CountryCode: "XX", - }} - output := preventMistakes(input, nil) - if diff := cmp.Diff(input, output); diff != "" { - t.Fatal(diff) - } -} - -func TestPreventMistakesWithoutCategoriesAndEmpty(t *testing.T) { - input := []model.URLInfo{{ - CategoryCode: "NEWS", - URL: "https://repubblica.it/", - CountryCode: "IT", - }, { - CategoryCode: "HACK", - URL: "https://2600.com", - CountryCode: "XX", - }, { - CategoryCode: "FILE", - URL: "https://addons.mozilla.org/", - CountryCode: "XX", - }} - output := preventMistakes(input, []string{}) - if diff := cmp.Diff(input, output); diff != "" { - t.Fatal(diff) - } -} diff --git a/internal/engine/inputloader.go b/internal/engine/inputloader.go index 4f28d3e..d3d3d6e 100644 --- a/internal/engine/inputloader.go +++ b/internal/engine/inputloader.go @@ -7,6 +7,7 @@ import ( "fmt" "io/fs" + "github.com/apex/log" "github.com/ooni/probe-cli/v3/internal/engine/internal/fsx" "github.com/ooni/probe-cli/v3/internal/engine/model" ) @@ -26,6 +27,12 @@ type InputLoaderSession interface { config *model.CheckInConfig) (*model.CheckInInfo, error) } +// InputLoaderLogger is the logger according to an InputLoader. +type InputLoaderLogger interface { + // Warnf formats and emits a warning message. + Warnf(format string, v ...interface{}) +} + // InputLoader loads input according to the specified policy // either from command line and input files or from OONI services. The // behaviour depends on the input policy as described below. @@ -68,6 +75,11 @@ type InputLoader struct { // this field. InputPolicy InputPolicy + // Logger is the optional logger that the InputLoader + // should be using. If not set, we will use the default + // logger of github.com/apex/log. + Logger InputLoaderLogger + // Session is the current measurement session. You // MUST fill in this field. Session InputLoaderSession @@ -193,7 +205,7 @@ func (il *InputLoader) loadRemote(ctx context.Context) ([]model.URLInfo, error) // concerned about NOT passing it a NULL pointer. config = &model.CheckInConfig{} } - reply, err := il.Session.CheckIn(ctx, config) + reply, err := il.checkIn(ctx, config) if err != nil { return nil, err } @@ -202,3 +214,53 @@ func (il *InputLoader) loadRemote(ctx context.Context) ([]model.URLInfo, error) } return reply.WebConnectivity.URLs, nil } + +// checkIn executes the check-in and filters the returned URLs to exclude +// the URLs that are not part of the requested categories. This is done for +// robustness, just in case we or the API do something wrong. +func (il *InputLoader) checkIn( + ctx context.Context, config *model.CheckInConfig) (*model.CheckInInfo, error) { + reply, err := il.Session.CheckIn(ctx, config) + if err != nil { + return nil, err + } + // Note: safe to assume that reply is not nil if err is nil + if reply.WebConnectivity != nil && len(reply.WebConnectivity.URLs) > 0 { + reply.WebConnectivity.URLs = il.preventMistakes( + reply.WebConnectivity.URLs, config.WebConnectivity.CategoryCodes, + ) + } + return reply, nil +} + +// preventMistakes makes the code more robust with respect to any possible +// integration issue where the backend returns to us URLs that don't +// belong to the category codes we requested. +func (il *InputLoader) preventMistakes(input []model.URLInfo, categories []string) (output []model.URLInfo) { + if len(categories) <= 0 { + return input + } + for _, entry := range input { + var found bool + for _, cat := range categories { + if entry.CategoryCode == cat { + found = true + break + } + } + if !found { + il.logger().Warnf("URL %+v not in %+v; skipping", entry, categories) + continue + } + output = append(output, entry) + } + return +} + +// logger returns the configured logger or apex/log's default. +func (il *InputLoader) logger() InputLoaderLogger { + if il.Logger != nil { + return il.Logger + } + return log.Log +} diff --git a/internal/engine/inputloader_test.go b/internal/engine/inputloader_test.go index 251b5dc..9b2ee35 100644 --- a/internal/engine/inputloader_test.go +++ b/internal/engine/inputloader_test.go @@ -407,3 +407,90 @@ func TestInputLoaderCheckInSuccessWithSomeURLs(t *testing.T) { t.Fatal(diff) } } + +func TestPreventMistakesWithCategories(t *testing.T) { + input := []model.URLInfo{{ + CategoryCode: "NEWS", + URL: "https://repubblica.it/", + CountryCode: "IT", + }, { + CategoryCode: "HACK", + URL: "https://2600.com", + CountryCode: "XX", + }, { + CategoryCode: "FILE", + URL: "https://addons.mozilla.org/", + CountryCode: "XX", + }} + desired := []model.URLInfo{{ + CategoryCode: "NEWS", + URL: "https://repubblica.it/", + CountryCode: "IT", + }, { + CategoryCode: "FILE", + URL: "https://addons.mozilla.org/", + CountryCode: "XX", + }} + il := &InputLoader{} + output := il.preventMistakes(input, []string{"NEWS", "FILE"}) + if diff := cmp.Diff(desired, output); diff != "" { + t.Fatal(diff) + } +} + +func TestPreventMistakesWithoutCategoriesAndNil(t *testing.T) { + input := []model.URLInfo{{ + CategoryCode: "NEWS", + URL: "https://repubblica.it/", + CountryCode: "IT", + }, { + CategoryCode: "HACK", + URL: "https://2600.com", + CountryCode: "XX", + }, { + CategoryCode: "FILE", + URL: "https://addons.mozilla.org/", + CountryCode: "XX", + }} + il := &InputLoader{} + output := il.preventMistakes(input, nil) + if diff := cmp.Diff(input, output); diff != "" { + t.Fatal(diff) + } +} + +func TestPreventMistakesWithoutCategoriesAndEmpty(t *testing.T) { + input := []model.URLInfo{{ + CategoryCode: "NEWS", + URL: "https://repubblica.it/", + CountryCode: "IT", + }, { + CategoryCode: "HACK", + URL: "https://2600.com", + CountryCode: "XX", + }, { + CategoryCode: "FILE", + URL: "https://addons.mozilla.org/", + CountryCode: "XX", + }} + il := &InputLoader{} + output := il.preventMistakes(input, []string{}) + if diff := cmp.Diff(input, output); diff != "" { + t.Fatal(diff) + } +} + +// InputLoaderFakeLogger is a fake InputLoaderLogger. +type InputLoaderFakeLogger struct{} + +// Warnf implements InputLoaderLogger.Warnf +func (ilfl *InputLoaderFakeLogger) Warnf(format string, v ...interface{}) {} + +func TestInputLoaderLoggerWorksAsIntended(t *testing.T) { + logger := &InputLoaderFakeLogger{} + inputLoader := &InputLoader{Logger: logger} + out := inputLoader.logger() + if out != logger { + t.Fatal("logger not working as intended") + } +}