feat(inputloader): use check-in to fetch URLs (#267)

* ongoing work

* reduce diff with master

* feat(inputloader): use the check-in API

Part of https://github.com/ooni/probe/issues/1299

* fix: better naming for a variable

* chore: add more tests

* fix: add one more TODO
This commit is contained in:
Simone Basso 2021-03-29 18:46:26 +02:00 committed by GitHub
parent e0b0dfedc1
commit 5973c88a05
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 156 additions and 107 deletions

View File

@ -1,8 +1,6 @@
package run
import (
"runtime"
"github.com/alecthomas/kingpin"
"github.com/apex/log"
"github.com/fatih/color"
@ -77,14 +75,6 @@ func init() {
unattendedCmd := cmd.Command("unattended", "")
unattendedCmd.Action(func(_ *kingpin.ParseContext) error {
// Until we have enabled the check-in API we're called every
// hour on darwin and we need to self throttle.
// TODO(bassosimone): switch to check-in and remove this hack.
switch runtime.GOOS {
case "darwin", "windows":
const veryFew = 10
probe.Config().Nettests.WebsitesURLLimit = veryFew
}
return functionalRun(func(name string, gr nettests.Group) bool {
return gr.UnattendedOK == true
})

View File

@ -6,16 +6,29 @@ import (
"github.com/apex/log"
"github.com/ooni/probe-cli/v3/cmd/ooniprobe/internal/database"
engine "github.com/ooni/probe-cli/v3/internal/engine"
"github.com/ooni/probe-cli/v3/internal/engine/model"
)
// TODO(bassosimone): we should remove the limit argument and
// we should also remove it from the config.
// TODO(bassosimone): we should propagate the kind of run
// to here such that we get the right runType.
// TODO(bassosimone): we are breaking the use case in which
// someone choose the number of URLs explicitly via the config.
func lookupURLs(ctl *Controller, limit int64, categories []string) ([]string, map[int64]int64, error) {
inputloader := engine.NewInputLoader(engine.InputLoaderConfig{
InputPolicy: engine.InputOrQueryBackend,
Session: ctl.Session,
SourceFiles: ctl.InputFiles,
StaticInputs: ctl.Inputs,
URLCategories: categories,
URLLimit: limit,
CheckInConfig: &model.CheckInConfig{
WebConnectivity: model.CheckInConfigWebConnectivity{
CategoryCodes: categories,
},
},
InputPolicy: engine.InputOrQueryBackend,
Session: ctl.Session,
SourceFiles: ctl.InputFiles,
StaticInputs: ctl.Inputs,
})
testlist, err := inputloader.Load(context.Background())
var urls []string

View File

@ -12,6 +12,7 @@ import (
// These errors are returned by the InputLoader.
var (
ErrNoURLsReturned = errors.New("no URLs returned")
ErrDetectedEmptyFile = errors.New("file did not contain any input")
ErrInputRequired = errors.New("no input provided")
ErrNoInputExpected = errors.New("we did not expect any input")
@ -20,9 +21,8 @@ var (
// InputLoaderSession is the session according to an InputLoader. We
// introduce this abstraction because it helps us with testing.
type InputLoaderSession interface {
MaybeLookupLocationContext(ctx context.Context) error
NewOrchestraClient(ctx context.Context) (model.ExperimentOrchestraClient, error)
ProbeCC() string
CheckIn(ctx context.Context,
config *model.CheckInConfig) (*model.CheckInInfo, error)
}
// InputLoader loads input according to the specified policy
@ -59,6 +59,22 @@ type InputLoader interface {
// InputLoaderConfig contains config for InputLoader.
type InputLoaderConfig struct {
// CheckInConfig contains options for the CheckIn API. If
// not set, then we'll create a default config. If set but
// there are fields inside it that are not set, then we
// will set them to a default value.
CheckInConfig *model.CheckInConfig
// InputPolicy specifies the input policy for the
// current experiment. We will not load any input if
// the policy says we should not. You MUST fill in
// this field.
InputPolicy InputPolicy
// Session is the current measurement session. You
// MUST fill in this field.
Session InputLoaderSession
// StaticInputs contains optional input to be added
// to the resulting input list if possible.
StaticInputs []string
@ -68,22 +84,6 @@ type InputLoaderConfig struct {
// per line. We will fail if any file is unreadable
// as well as if any file is empty.
SourceFiles []string
// InputPolicy specifies the input policy for the
// current experiment. We will not load any input if
// the policy says we should not.
InputPolicy InputPolicy
// Session is the current measurement session.
Session InputLoaderSession
// URLLimit is the optional limit on the number of URLs
// that probe services should return to us.
URLLimit int64
// URLCategories limits the categories of URLs that
// probe services should return to us.
URLCategories []string
}
// NewInputLoader creates a new InputLoader.
@ -218,16 +218,20 @@ type inputLoaderLoadRemoteConfig struct {
// loadRemote loads inputs from a remote source.
func (il inputLoader) loadRemote(conf inputLoaderLoadRemoteConfig) ([]model.URLInfo, error) {
if err := conf.session.MaybeLookupLocationContext(conf.ctx); err != nil {
return nil, err
config := il.CheckInConfig
if config == nil {
// Note: Session.CheckIn documentation says it will fill in
// any field with a required value with a reasonable default
// if such value is missing. So, here we just need to be
// concerned about NOT passing it a NULL pointer.
config = &model.CheckInConfig{}
}
client, err := conf.session.NewOrchestraClient(conf.ctx)
reply, err := conf.session.CheckIn(conf.ctx, config)
if err != nil {
return nil, err
}
return client.FetchURLList(conf.ctx, model.URLListConfig{
CountryCode: conf.session.ProbeCC(),
Limit: il.URLLimit,
Categories: il.URLCategories,
})
if reply.WebConnectivity == nil || len(reply.WebConnectivity.URLs) <= 0 {
return nil, ErrNoURLsReturned
}
return reply.WebConnectivity.URLs, nil
}

View File

@ -204,7 +204,7 @@ func TestInputLoaderInputStrictlyRequiredWithEmptyFile(t *testing.T) {
}
}
func TestInputLoaderInputOrQueryTestListsWithInput(t *testing.T) {
func TestInputLoaderInputOrQueryBackendWithInput(t *testing.T) {
il := engine.NewInputLoader(engine.InputLoaderConfig{
StaticInputs: []string{"https://www.google.com/"},
SourceFiles: []string{
@ -233,7 +233,7 @@ func TestInputLoaderInputOrQueryTestListsWithInput(t *testing.T) {
}
}
func TestInputLoaderInputOrQueryTestListsWithNoInputAndCancelledContext(t *testing.T) {
func TestInputLoaderInputOrQueryBackendWithNoInputAndCancelledContext(t *testing.T) {
sess, err := engine.NewSession(engine.SessionConfig{
AssetsDir: "testdata",
KVStore: kvstore.NewMemoryKeyValueStore(),
@ -261,7 +261,7 @@ func TestInputLoaderInputOrQueryTestListsWithNoInputAndCancelledContext(t *testi
}
}
func TestInputLoaderInputOrQueryTestListsWithNoInput(t *testing.T) {
func TestInputLoaderInputOrQueryBackendWithNoInput(t *testing.T) {
if testing.Short() {
t.Skip("skip test in short mode")
}
@ -284,7 +284,6 @@ func TestInputLoaderInputOrQueryTestListsWithNoInput(t *testing.T) {
il := engine.NewInputLoader(engine.InputLoaderConfig{
InputPolicy: engine.InputOrQueryBackend,
Session: sess,
URLLimit: 30,
})
ctx := context.Background()
out, err := il.Load(ctx)
@ -292,11 +291,12 @@ func TestInputLoaderInputOrQueryTestListsWithNoInput(t *testing.T) {
t.Fatal(err)
}
if len(out) < 10 {
// check-in SHOULD return AT LEAST 20 URLs at a time.
t.Fatal("not the output length we expected")
}
}
func TestInputLoaderInputOrQueryTestListsWithEmptyFile(t *testing.T) {
func TestInputLoaderInputOrQueryBackendWithEmptyFile(t *testing.T) {
il := engine.NewInputLoader(engine.InputLoaderConfig{
InputPolicy: engine.InputOrQueryBackend,
SourceFiles: []string{

View File

@ -8,6 +8,7 @@ import (
"syscall"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/ooni/probe-cli/v3/internal/engine/internal/fsx"
"github.com/ooni/probe-cli/v3/internal/engine/model"
)
@ -43,65 +44,33 @@ func TestInputLoaderReadfileScannerFailure(t *testing.T) {
}
}
type InputLoaderBrokenSession struct {
OrchestraClient model.ExperimentOrchestraClient
Error error
// InputLoaderMockableSession is a mockable session
// used by InputLoader tests.
type InputLoaderMockableSession struct {
// Output contains the output of CheckIn. It should
// be nil when Error is not-nil.
Output *model.CheckInInfo
// Error is the error to be returned by CheckIn. It
// should be nil when Output is not-nil.
Error error
}
func (InputLoaderBrokenSession) MaybeLookupLocationContext(ctx context.Context) error {
return nil
}
func (ilbs InputLoaderBrokenSession) NewOrchestraClient(ctx context.Context) (model.ExperimentOrchestraClient, error) {
if ilbs.OrchestraClient != nil {
return ilbs.OrchestraClient, nil
// CheckIn implements InputLoaderSession.CheckIn.
func (sess *InputLoaderMockableSession) CheckIn(
ctx context.Context, config *model.CheckInConfig) (*model.CheckInInfo, error) {
if sess.Output == nil && sess.Error == nil {
return nil, errors.New("both Output and Error are nil")
}
return nil, io.EOF
return sess.Output, sess.Error
}
func (InputLoaderBrokenSession) ProbeCC() string {
return "IT"
}
func TestInputLoaderNewOrchestraClientFailure(t *testing.T) {
il := inputLoader{}
lrc := inputLoaderLoadRemoteConfig{
ctx: context.Background(),
session: InputLoaderBrokenSession{},
}
out, err := il.loadRemote(lrc)
if !errors.Is(err, io.EOF) {
t.Fatalf("not the error we expected: %+v", err)
}
if out != nil {
t.Fatal("expected nil output here")
}
}
type InputLoaderBrokenOrchestraClient struct{}
func (InputLoaderBrokenOrchestraClient) CheckIn(ctx context.Context, config model.CheckInConfig) (*model.CheckInInfo, error) {
return nil, io.EOF
}
func (InputLoaderBrokenOrchestraClient) FetchPsiphonConfig(ctx context.Context) ([]byte, error) {
return nil, io.EOF
}
func (InputLoaderBrokenOrchestraClient) FetchTorTargets(ctx context.Context, cc string) (map[string]model.TorTarget, error) {
return nil, io.EOF
}
func (InputLoaderBrokenOrchestraClient) FetchURLList(ctx context.Context, config model.URLListConfig) ([]model.URLInfo, error) {
return nil, io.EOF
}
func TestInputLoaderFetchURLListFailure(t *testing.T) {
func TestInputLoaderCheckInFailure(t *testing.T) {
il := inputLoader{}
lrc := inputLoaderLoadRemoteConfig{
ctx: context.Background(),
session: InputLoaderBrokenSession{
OrchestraClient: InputLoaderBrokenOrchestraClient{},
session: &InputLoaderMockableSession{
Error: io.EOF,
},
}
out, err := il.loadRemote(lrc)
@ -112,3 +81,69 @@ func TestInputLoaderFetchURLListFailure(t *testing.T) {
t.Fatal("expected nil output here")
}
}
func TestInputLoaderCheckInSuccessWithNilWebConnectivity(t *testing.T) {
il := inputLoader{}
lrc := inputLoaderLoadRemoteConfig{
ctx: context.Background(),
session: &InputLoaderMockableSession{
Output: &model.CheckInInfo{},
},
}
out, err := il.loadRemote(lrc)
if !errors.Is(err, ErrNoURLsReturned) {
t.Fatalf("not the error we expected: %+v", err)
}
if out != nil {
t.Fatal("expected nil output here")
}
}
func TestInputLoaderCheckInSuccessWithNoURLs(t *testing.T) {
il := inputLoader{}
lrc := inputLoaderLoadRemoteConfig{
ctx: context.Background(),
session: &InputLoaderMockableSession{
Output: &model.CheckInInfo{
WebConnectivity: &model.CheckInInfoWebConnectivity{},
},
},
}
out, err := il.loadRemote(lrc)
if !errors.Is(err, ErrNoURLsReturned) {
t.Fatalf("not the error we expected: %+v", err)
}
if out != nil {
t.Fatal("expected nil output here")
}
}
func TestInputLoaderCheckInSuccessWithSomeURLs(t *testing.T) {
expect := []model.URLInfo{{
CategoryCode: "NEWS",
CountryCode: "IT",
URL: "https://repubblica.it",
}, {
CategoryCode: "NEWS",
CountryCode: "IT",
URL: "https://corriere.it",
}}
il := inputLoader{}
lrc := inputLoaderLoadRemoteConfig{
ctx: context.Background(),
session: &InputLoaderMockableSession{
Output: &model.CheckInInfo{
WebConnectivity: &model.CheckInInfoWebConnectivity{
URLs: expect,
},
},
},
}
out, err := il.loadRemote(lrc)
if err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(expect, out); diff != "" {
t.Fatal(diff)
}
}

View File

@ -12,6 +12,13 @@
// code at github.com/bassosimone/aladdin).
package libminiooni
// TODO(bassosimone): now that this code cannot be imported from
// external sources anymore, we should move it back into the
// internal/cmd/miniooni folder and reduce the number of packages
// that are unnecessarily exposed inside ./internal/engine.
// TODO(bassosimone): we need to deprecate or remove --limit.
import (
"context"
"errors"
@ -140,10 +147,6 @@ func init() {
)
}
func fatalWithString(msg string) {
panic(msg)
}
func fatalIfFalse(cond bool, msg string) {
if !cond {
panic(msg)
@ -371,11 +374,15 @@ func MainWithConfiguration(experimentName string, currentOptions Options) {
fatalOnError(err, "cannot create experiment builder")
inputLoader := engine.NewInputLoader(engine.InputLoaderConfig{
CheckInConfig: &model.CheckInConfig{
RunType: "manual",
OnWiFi: true, // meaning: not on 4G
Charging: true,
},
InputPolicy: builder.InputPolicy(),
StaticInputs: currentOptions.Inputs,
SourceFiles: currentOptions.InputFilePaths,
InputPolicy: builder.InputPolicy(),
Session: sess,
URLLimit: currentOptions.Limit,
})
inputs, err := inputLoader.Load(context.Background())
fatalOnError(err, "cannot load inputs")
@ -399,14 +406,14 @@ func MainWithConfiguration(experimentName string, currentOptions Options) {
}()
submitter, err := engine.NewSubmitter(ctx, engine.SubmitterConfig{
Enabled: currentOptions.NoCollector == false,
Enabled: !currentOptions.NoCollector,
Session: sess,
Logger: log.Log,
})
fatalOnError(err, "cannot create submitter")
saver, err := engine.NewSaver(engine.SaverConfig{
Enabled: currentOptions.NoJSON == false,
Enabled: !currentOptions.NoJSON,
Experiment: experiment,
FilePath: currentOptions.ReportFile,
Logger: log.Log,