fix: move preventMistakes in InputLoader (#304)

This fixes an issue where URLs provided with --input are not
accepted by the preventMistakes filter.

The filter itself needs to execute _only_ on URLs returned
by the checkIn API, rather than on URLs returned by the
InputLoader, which may instead be user provided.

Reference issue: https://github.com/ooni/probe/issues/1435
This commit is contained in:
Simone Basso 2021-04-07 14:14:25 +02:00 committed by GitHub
parent 46d19f47ec
commit 654441f5cd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 150 additions and 103 deletions

View File

@ -9,30 +9,6 @@ import (
"github.com/ooni/probe-cli/v3/internal/engine/model" "github.com/ooni/probe-cli/v3/internal/engine/model"
) )
// preventMistakes makes the code more robust with respect to any possible
// integration issue where the backend returns to us URLs that don't
// belong to the category codes we requested.
func preventMistakes(input []model.URLInfo, categories []string) (output []model.URLInfo) {
if len(categories) <= 0 {
return input
}
for _, entry := range input {
var found bool
for _, cat := range categories {
if entry.CategoryCode == cat {
found = true
break
}
}
if !found {
log.Warnf("URL %+v not in %+v; skipping", entry, categories)
continue
}
output = append(output, entry)
}
return
}
func lookupURLs(ctl *Controller, categories []string) ([]string, map[int64]int64, error) { func lookupURLs(ctl *Controller, categories []string) ([]string, map[int64]int64, error) {
inputloader := &engine.InputLoader{ inputloader := &engine.InputLoader{
CheckInConfig: &model.CheckInConfig{ CheckInConfig: &model.CheckInConfig{
@ -56,7 +32,6 @@ func lookupURLs(ctl *Controller, categories []string) ([]string, map[int64]int64
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
testlist = preventMistakes(testlist, categories)
var urls []string var urls []string
urlIDMap := make(map[int64]int64) urlIDMap := make(map[int64]int64)
for idx, url := range testlist { for idx, url := range testlist {

View File

@ -1,77 +0,0 @@
package nettests
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/ooni/probe-cli/v3/internal/engine/model"
)
func TestPreventMistakesWithCategories(t *testing.T) {
input := []model.URLInfo{{
CategoryCode: "NEWS",
URL: "https://repubblica.it/",
CountryCode: "IT",
}, {
CategoryCode: "HACK",
URL: "https://2600.com",
CountryCode: "XX",
}, {
CategoryCode: "FILE",
URL: "https://addons.mozilla.org/",
CountryCode: "XX",
}}
desired := []model.URLInfo{{
CategoryCode: "NEWS",
URL: "https://repubblica.it/",
CountryCode: "IT",
}, {
CategoryCode: "FILE",
URL: "https://addons.mozilla.org/",
CountryCode: "XX",
}}
output := preventMistakes(input, []string{"NEWS", "FILE"})
if diff := cmp.Diff(desired, output); diff != "" {
t.Fatal(diff)
}
}
func TestPreventMistakesWithoutCategoriesAndNil(t *testing.T) {
input := []model.URLInfo{{
CategoryCode: "NEWS",
URL: "https://repubblica.it/",
CountryCode: "IT",
}, {
CategoryCode: "HACK",
URL: "https://2600.com",
CountryCode: "XX",
}, {
CategoryCode: "FILE",
URL: "https://addons.mozilla.org/",
CountryCode: "XX",
}}
output := preventMistakes(input, nil)
if diff := cmp.Diff(input, output); diff != "" {
t.Fatal(diff)
}
}
func TestPreventMistakesWithoutCategoriesAndEmpty(t *testing.T) {
input := []model.URLInfo{{
CategoryCode: "NEWS",
URL: "https://repubblica.it/",
CountryCode: "IT",
}, {
CategoryCode: "HACK",
URL: "https://2600.com",
CountryCode: "XX",
}, {
CategoryCode: "FILE",
URL: "https://addons.mozilla.org/",
CountryCode: "XX",
}}
output := preventMistakes(input, []string{})
if diff := cmp.Diff(input, output); diff != "" {
t.Fatal(diff)
}
}

View File

@ -7,6 +7,7 @@ import (
"fmt" "fmt"
"io/fs" "io/fs"
"github.com/apex/log"
"github.com/ooni/probe-cli/v3/internal/engine/internal/fsx" "github.com/ooni/probe-cli/v3/internal/engine/internal/fsx"
"github.com/ooni/probe-cli/v3/internal/engine/model" "github.com/ooni/probe-cli/v3/internal/engine/model"
) )
@ -26,6 +27,12 @@ type InputLoaderSession interface {
config *model.CheckInConfig) (*model.CheckInInfo, error) config *model.CheckInConfig) (*model.CheckInInfo, error)
} }
// InputLoaderLogger is the logger according to an InputLoader.
type InputLoaderLogger interface {
// Warnf formats and emits a warning message.
Warnf(format string, v ...interface{})
}
// InputLoader loads input according to the specified policy // InputLoader loads input according to the specified policy
// either from command line and input files or from OONI services. The // either from command line and input files or from OONI services. The
// behaviour depends on the input policy as described below. // behaviour depends on the input policy as described below.
@ -68,6 +75,11 @@ type InputLoader struct {
// this field. // this field.
InputPolicy InputPolicy InputPolicy InputPolicy
// Logger is the optional logger that the InputLoader
// should be using. If not set, we will use the default
// logger of github.com/apex/log.
Logger InputLoaderLogger
// Session is the current measurement session. You // Session is the current measurement session. You
// MUST fill in this field. // MUST fill in this field.
Session InputLoaderSession Session InputLoaderSession
@ -193,7 +205,7 @@ func (il *InputLoader) loadRemote(ctx context.Context) ([]model.URLInfo, error)
// concerned about NOT passing it a NULL pointer. // concerned about NOT passing it a NULL pointer.
config = &model.CheckInConfig{} config = &model.CheckInConfig{}
} }
reply, err := il.Session.CheckIn(ctx, config) reply, err := il.checkIn(ctx, config)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -202,3 +214,53 @@ func (il *InputLoader) loadRemote(ctx context.Context) ([]model.URLInfo, error)
} }
return reply.WebConnectivity.URLs, nil return reply.WebConnectivity.URLs, nil
} }
// checkIn executes the check-in and filters the returned URLs to exclude
// the URLs that are not part of the requested categories. This is done for
// robustness, just in case we or the API do something wrong.
func (il *InputLoader) checkIn(
ctx context.Context, config *model.CheckInConfig) (*model.CheckInInfo, error) {
reply, err := il.Session.CheckIn(ctx, config)
if err != nil {
return nil, err
}
// Note: safe to assume that reply is not nil if err is nil
if reply.WebConnectivity != nil && len(reply.WebConnectivity.URLs) > 0 {
reply.WebConnectivity.URLs = il.preventMistakes(
reply.WebConnectivity.URLs, config.WebConnectivity.CategoryCodes,
)
}
return reply, nil
}
// preventMistakes makes the code more robust with respect to any possible
// integration issue where the backend returns to us URLs that don't
// belong to the category codes we requested.
func (il *InputLoader) preventMistakes(input []model.URLInfo, categories []string) (output []model.URLInfo) {
if len(categories) <= 0 {
return input
}
for _, entry := range input {
var found bool
for _, cat := range categories {
if entry.CategoryCode == cat {
found = true
break
}
}
if !found {
il.logger().Warnf("URL %+v not in %+v; skipping", entry, categories)
continue
}
output = append(output, entry)
}
return
}
// logger returns the configured logger or apex/log's default.
func (il *InputLoader) logger() InputLoaderLogger {
if il.Logger != nil {
return il.Logger
}
return log.Log
}

View File

@ -407,3 +407,90 @@ func TestInputLoaderCheckInSuccessWithSomeURLs(t *testing.T) {
t.Fatal(diff) t.Fatal(diff)
} }
} }
func TestPreventMistakesWithCategories(t *testing.T) {
input := []model.URLInfo{{
CategoryCode: "NEWS",
URL: "https://repubblica.it/",
CountryCode: "IT",
}, {
CategoryCode: "HACK",
URL: "https://2600.com",
CountryCode: "XX",
}, {
CategoryCode: "FILE",
URL: "https://addons.mozilla.org/",
CountryCode: "XX",
}}
desired := []model.URLInfo{{
CategoryCode: "NEWS",
URL: "https://repubblica.it/",
CountryCode: "IT",
}, {
CategoryCode: "FILE",
URL: "https://addons.mozilla.org/",
CountryCode: "XX",
}}
il := &InputLoader{}
output := il.preventMistakes(input, []string{"NEWS", "FILE"})
if diff := cmp.Diff(desired, output); diff != "" {
t.Fatal(diff)
}
}
func TestPreventMistakesWithoutCategoriesAndNil(t *testing.T) {
input := []model.URLInfo{{
CategoryCode: "NEWS",
URL: "https://repubblica.it/",
CountryCode: "IT",
}, {
CategoryCode: "HACK",
URL: "https://2600.com",
CountryCode: "XX",
}, {
CategoryCode: "FILE",
URL: "https://addons.mozilla.org/",
CountryCode: "XX",
}}
il := &InputLoader{}
output := il.preventMistakes(input, nil)
if diff := cmp.Diff(input, output); diff != "" {
t.Fatal(diff)
}
}
func TestPreventMistakesWithoutCategoriesAndEmpty(t *testing.T) {
input := []model.URLInfo{{
CategoryCode: "NEWS",
URL: "https://repubblica.it/",
CountryCode: "IT",
}, {
CategoryCode: "HACK",
URL: "https://2600.com",
CountryCode: "XX",
}, {
CategoryCode: "FILE",
URL: "https://addons.mozilla.org/",
CountryCode: "XX",
}}
il := &InputLoader{}
output := il.preventMistakes(input, []string{})
if diff := cmp.Diff(input, output); diff != "" {
t.Fatal(diff)
}
}
// InputLoaderFakeLogger is a fake InputLoaderLogger.
type InputLoaderFakeLogger struct{}
// Warnf implements InputLoaderLogger.Warnf
func (ilfl *InputLoaderFakeLogger) Warnf(format string, v ...interface{}) {}
func TestInputLoaderLoggerWorksAsIntended(t *testing.T) {
logger := &InputLoaderFakeLogger{}
inputLoader := &InputLoader{Logger: logger}
out := inputLoader.logger()
if out != logger {
t.Fatal("logger not working as intended")
}
}