feat(inputloader): use check-in to fetch URLs (#267)
* ongoing work * reduce diff with master * feat(inputloader): use the check-in API Part of https://github.com/ooni/probe/issues/1299 * fix: better naming for a variable * chore: add more tests * fix: add one more TODO
This commit is contained in:
parent
e0b0dfedc1
commit
5973c88a05
|
@ -1,8 +1,6 @@
|
|||
package run
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
|
||||
"github.com/alecthomas/kingpin"
|
||||
"github.com/apex/log"
|
||||
"github.com/fatih/color"
|
||||
|
@ -77,14 +75,6 @@ func init() {
|
|||
|
||||
unattendedCmd := cmd.Command("unattended", "")
|
||||
unattendedCmd.Action(func(_ *kingpin.ParseContext) error {
|
||||
// Until we have enabled the check-in API we're called every
|
||||
// hour on darwin and we need to self throttle.
|
||||
// TODO(bassosimone): switch to check-in and remove this hack.
|
||||
switch runtime.GOOS {
|
||||
case "darwin", "windows":
|
||||
const veryFew = 10
|
||||
probe.Config().Nettests.WebsitesURLLimit = veryFew
|
||||
}
|
||||
return functionalRun(func(name string, gr nettests.Group) bool {
|
||||
return gr.UnattendedOK == true
|
||||
})
|
||||
|
|
|
@ -6,16 +6,29 @@ import (
|
|||
"github.com/apex/log"
|
||||
"github.com/ooni/probe-cli/v3/cmd/ooniprobe/internal/database"
|
||||
engine "github.com/ooni/probe-cli/v3/internal/engine"
|
||||
"github.com/ooni/probe-cli/v3/internal/engine/model"
|
||||
)
|
||||
|
||||
// TODO(bassosimone): we should remove the limit argument and
|
||||
// we should also remove it from the config.
|
||||
|
||||
// TODO(bassosimone): we should propagate the kind of run
|
||||
// to here such that we get the right runType.
|
||||
|
||||
// TODO(bassosimone): we are breaking the use case in which
|
||||
// someone choose the number of URLs explicitly via the config.
|
||||
|
||||
func lookupURLs(ctl *Controller, limit int64, categories []string) ([]string, map[int64]int64, error) {
|
||||
inputloader := engine.NewInputLoader(engine.InputLoaderConfig{
|
||||
CheckInConfig: &model.CheckInConfig{
|
||||
WebConnectivity: model.CheckInConfigWebConnectivity{
|
||||
CategoryCodes: categories,
|
||||
},
|
||||
},
|
||||
InputPolicy: engine.InputOrQueryBackend,
|
||||
Session: ctl.Session,
|
||||
SourceFiles: ctl.InputFiles,
|
||||
StaticInputs: ctl.Inputs,
|
||||
URLCategories: categories,
|
||||
URLLimit: limit,
|
||||
})
|
||||
testlist, err := inputloader.Load(context.Background())
|
||||
var urls []string
|
||||
|
|
|
@ -12,6 +12,7 @@ import (
|
|||
|
||||
// These errors are returned by the InputLoader.
|
||||
var (
|
||||
ErrNoURLsReturned = errors.New("no URLs returned")
|
||||
ErrDetectedEmptyFile = errors.New("file did not contain any input")
|
||||
ErrInputRequired = errors.New("no input provided")
|
||||
ErrNoInputExpected = errors.New("we did not expect any input")
|
||||
|
@ -20,9 +21,8 @@ var (
|
|||
// InputLoaderSession is the session according to an InputLoader. We
|
||||
// introduce this abstraction because it helps us with testing.
|
||||
type InputLoaderSession interface {
|
||||
MaybeLookupLocationContext(ctx context.Context) error
|
||||
NewOrchestraClient(ctx context.Context) (model.ExperimentOrchestraClient, error)
|
||||
ProbeCC() string
|
||||
CheckIn(ctx context.Context,
|
||||
config *model.CheckInConfig) (*model.CheckInInfo, error)
|
||||
}
|
||||
|
||||
// InputLoader loads input according to the specified policy
|
||||
|
@ -59,6 +59,22 @@ type InputLoader interface {
|
|||
|
||||
// InputLoaderConfig contains config for InputLoader.
|
||||
type InputLoaderConfig struct {
|
||||
// CheckInConfig contains options for the CheckIn API. If
|
||||
// not set, then we'll create a default config. If set but
|
||||
// there are fields inside it that are not set, then we
|
||||
// will set them to a default value.
|
||||
CheckInConfig *model.CheckInConfig
|
||||
|
||||
// InputPolicy specifies the input policy for the
|
||||
// current experiment. We will not load any input if
|
||||
// the policy says we should not. You MUST fill in
|
||||
// this field.
|
||||
InputPolicy InputPolicy
|
||||
|
||||
// Session is the current measurement session. You
|
||||
// MUST fill in this field.
|
||||
Session InputLoaderSession
|
||||
|
||||
// StaticInputs contains optional input to be added
|
||||
// to the resulting input list if possible.
|
||||
StaticInputs []string
|
||||
|
@ -68,22 +84,6 @@ type InputLoaderConfig struct {
|
|||
// per line. We will fail if any file is unreadable
|
||||
// as well as if any file is empty.
|
||||
SourceFiles []string
|
||||
|
||||
// InputPolicy specifies the input policy for the
|
||||
// current experiment. We will not load any input if
|
||||
// the policy says we should not.
|
||||
InputPolicy InputPolicy
|
||||
|
||||
// Session is the current measurement session.
|
||||
Session InputLoaderSession
|
||||
|
||||
// URLLimit is the optional limit on the number of URLs
|
||||
// that probe services should return to us.
|
||||
URLLimit int64
|
||||
|
||||
// URLCategories limits the categories of URLs that
|
||||
// probe services should return to us.
|
||||
URLCategories []string
|
||||
}
|
||||
|
||||
// NewInputLoader creates a new InputLoader.
|
||||
|
@ -218,16 +218,20 @@ type inputLoaderLoadRemoteConfig struct {
|
|||
|
||||
// loadRemote loads inputs from a remote source.
|
||||
func (il inputLoader) loadRemote(conf inputLoaderLoadRemoteConfig) ([]model.URLInfo, error) {
|
||||
if err := conf.session.MaybeLookupLocationContext(conf.ctx); err != nil {
|
||||
return nil, err
|
||||
config := il.CheckInConfig
|
||||
if config == nil {
|
||||
// Note: Session.CheckIn documentation says it will fill in
|
||||
// any field with a required value with a reasonable default
|
||||
// if such value is missing. So, here we just need to be
|
||||
// concerned about NOT passing it a NULL pointer.
|
||||
config = &model.CheckInConfig{}
|
||||
}
|
||||
client, err := conf.session.NewOrchestraClient(conf.ctx)
|
||||
reply, err := conf.session.CheckIn(conf.ctx, config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return client.FetchURLList(conf.ctx, model.URLListConfig{
|
||||
CountryCode: conf.session.ProbeCC(),
|
||||
Limit: il.URLLimit,
|
||||
Categories: il.URLCategories,
|
||||
})
|
||||
if reply.WebConnectivity == nil || len(reply.WebConnectivity.URLs) <= 0 {
|
||||
return nil, ErrNoURLsReturned
|
||||
}
|
||||
return reply.WebConnectivity.URLs, nil
|
||||
}
|
||||
|
|
|
@ -204,7 +204,7 @@ func TestInputLoaderInputStrictlyRequiredWithEmptyFile(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestInputLoaderInputOrQueryTestListsWithInput(t *testing.T) {
|
||||
func TestInputLoaderInputOrQueryBackendWithInput(t *testing.T) {
|
||||
il := engine.NewInputLoader(engine.InputLoaderConfig{
|
||||
StaticInputs: []string{"https://www.google.com/"},
|
||||
SourceFiles: []string{
|
||||
|
@ -233,7 +233,7 @@ func TestInputLoaderInputOrQueryTestListsWithInput(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestInputLoaderInputOrQueryTestListsWithNoInputAndCancelledContext(t *testing.T) {
|
||||
func TestInputLoaderInputOrQueryBackendWithNoInputAndCancelledContext(t *testing.T) {
|
||||
sess, err := engine.NewSession(engine.SessionConfig{
|
||||
AssetsDir: "testdata",
|
||||
KVStore: kvstore.NewMemoryKeyValueStore(),
|
||||
|
@ -261,7 +261,7 @@ func TestInputLoaderInputOrQueryTestListsWithNoInputAndCancelledContext(t *testi
|
|||
}
|
||||
}
|
||||
|
||||
func TestInputLoaderInputOrQueryTestListsWithNoInput(t *testing.T) {
|
||||
func TestInputLoaderInputOrQueryBackendWithNoInput(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skip test in short mode")
|
||||
}
|
||||
|
@ -284,7 +284,6 @@ func TestInputLoaderInputOrQueryTestListsWithNoInput(t *testing.T) {
|
|||
il := engine.NewInputLoader(engine.InputLoaderConfig{
|
||||
InputPolicy: engine.InputOrQueryBackend,
|
||||
Session: sess,
|
||||
URLLimit: 30,
|
||||
})
|
||||
ctx := context.Background()
|
||||
out, err := il.Load(ctx)
|
||||
|
@ -292,11 +291,12 @@ func TestInputLoaderInputOrQueryTestListsWithNoInput(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
if len(out) < 10 {
|
||||
// check-in SHOULD return AT LEAST 20 URLs at a time.
|
||||
t.Fatal("not the output length we expected")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInputLoaderInputOrQueryTestListsWithEmptyFile(t *testing.T) {
|
||||
func TestInputLoaderInputOrQueryBackendWithEmptyFile(t *testing.T) {
|
||||
il := engine.NewInputLoader(engine.InputLoaderConfig{
|
||||
InputPolicy: engine.InputOrQueryBackend,
|
||||
SourceFiles: []string{
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"syscall"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/ooni/probe-cli/v3/internal/engine/internal/fsx"
|
||||
"github.com/ooni/probe-cli/v3/internal/engine/model"
|
||||
)
|
||||
|
@ -43,65 +44,33 @@ func TestInputLoaderReadfileScannerFailure(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
type InputLoaderBrokenSession struct {
|
||||
OrchestraClient model.ExperimentOrchestraClient
|
||||
// InputLoaderMockableSession is a mockable session
|
||||
// used by InputLoader tests.
|
||||
type InputLoaderMockableSession struct {
|
||||
// Output contains the output of CheckIn. It should
|
||||
// be nil when Error is not-nil.
|
||||
Output *model.CheckInInfo
|
||||
|
||||
// Error is the error to be returned by CheckIn. It
|
||||
// should be nil when Output is not-nil.
|
||||
Error error
|
||||
}
|
||||
|
||||
func (InputLoaderBrokenSession) MaybeLookupLocationContext(ctx context.Context) error {
|
||||
return nil
|
||||
// CheckIn implements InputLoaderSession.CheckIn.
|
||||
func (sess *InputLoaderMockableSession) CheckIn(
|
||||
ctx context.Context, config *model.CheckInConfig) (*model.CheckInInfo, error) {
|
||||
if sess.Output == nil && sess.Error == nil {
|
||||
return nil, errors.New("both Output and Error are nil")
|
||||
}
|
||||
return sess.Output, sess.Error
|
||||
}
|
||||
|
||||
func (ilbs InputLoaderBrokenSession) NewOrchestraClient(ctx context.Context) (model.ExperimentOrchestraClient, error) {
|
||||
if ilbs.OrchestraClient != nil {
|
||||
return ilbs.OrchestraClient, nil
|
||||
}
|
||||
return nil, io.EOF
|
||||
}
|
||||
|
||||
func (InputLoaderBrokenSession) ProbeCC() string {
|
||||
return "IT"
|
||||
}
|
||||
|
||||
func TestInputLoaderNewOrchestraClientFailure(t *testing.T) {
|
||||
func TestInputLoaderCheckInFailure(t *testing.T) {
|
||||
il := inputLoader{}
|
||||
lrc := inputLoaderLoadRemoteConfig{
|
||||
ctx: context.Background(),
|
||||
session: InputLoaderBrokenSession{},
|
||||
}
|
||||
out, err := il.loadRemote(lrc)
|
||||
if !errors.Is(err, io.EOF) {
|
||||
t.Fatalf("not the error we expected: %+v", err)
|
||||
}
|
||||
if out != nil {
|
||||
t.Fatal("expected nil output here")
|
||||
}
|
||||
}
|
||||
|
||||
type InputLoaderBrokenOrchestraClient struct{}
|
||||
|
||||
func (InputLoaderBrokenOrchestraClient) CheckIn(ctx context.Context, config model.CheckInConfig) (*model.CheckInInfo, error) {
|
||||
return nil, io.EOF
|
||||
}
|
||||
|
||||
func (InputLoaderBrokenOrchestraClient) FetchPsiphonConfig(ctx context.Context) ([]byte, error) {
|
||||
return nil, io.EOF
|
||||
}
|
||||
|
||||
func (InputLoaderBrokenOrchestraClient) FetchTorTargets(ctx context.Context, cc string) (map[string]model.TorTarget, error) {
|
||||
return nil, io.EOF
|
||||
}
|
||||
|
||||
func (InputLoaderBrokenOrchestraClient) FetchURLList(ctx context.Context, config model.URLListConfig) ([]model.URLInfo, error) {
|
||||
return nil, io.EOF
|
||||
}
|
||||
|
||||
func TestInputLoaderFetchURLListFailure(t *testing.T) {
|
||||
il := inputLoader{}
|
||||
lrc := inputLoaderLoadRemoteConfig{
|
||||
ctx: context.Background(),
|
||||
session: InputLoaderBrokenSession{
|
||||
OrchestraClient: InputLoaderBrokenOrchestraClient{},
|
||||
session: &InputLoaderMockableSession{
|
||||
Error: io.EOF,
|
||||
},
|
||||
}
|
||||
out, err := il.loadRemote(lrc)
|
||||
|
@ -112,3 +81,69 @@ func TestInputLoaderFetchURLListFailure(t *testing.T) {
|
|||
t.Fatal("expected nil output here")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInputLoaderCheckInSuccessWithNilWebConnectivity(t *testing.T) {
|
||||
il := inputLoader{}
|
||||
lrc := inputLoaderLoadRemoteConfig{
|
||||
ctx: context.Background(),
|
||||
session: &InputLoaderMockableSession{
|
||||
Output: &model.CheckInInfo{},
|
||||
},
|
||||
}
|
||||
out, err := il.loadRemote(lrc)
|
||||
if !errors.Is(err, ErrNoURLsReturned) {
|
||||
t.Fatalf("not the error we expected: %+v", err)
|
||||
}
|
||||
if out != nil {
|
||||
t.Fatal("expected nil output here")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInputLoaderCheckInSuccessWithNoURLs(t *testing.T) {
|
||||
il := inputLoader{}
|
||||
lrc := inputLoaderLoadRemoteConfig{
|
||||
ctx: context.Background(),
|
||||
session: &InputLoaderMockableSession{
|
||||
Output: &model.CheckInInfo{
|
||||
WebConnectivity: &model.CheckInInfoWebConnectivity{},
|
||||
},
|
||||
},
|
||||
}
|
||||
out, err := il.loadRemote(lrc)
|
||||
if !errors.Is(err, ErrNoURLsReturned) {
|
||||
t.Fatalf("not the error we expected: %+v", err)
|
||||
}
|
||||
if out != nil {
|
||||
t.Fatal("expected nil output here")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInputLoaderCheckInSuccessWithSomeURLs(t *testing.T) {
|
||||
expect := []model.URLInfo{{
|
||||
CategoryCode: "NEWS",
|
||||
CountryCode: "IT",
|
||||
URL: "https://repubblica.it",
|
||||
}, {
|
||||
CategoryCode: "NEWS",
|
||||
CountryCode: "IT",
|
||||
URL: "https://corriere.it",
|
||||
}}
|
||||
il := inputLoader{}
|
||||
lrc := inputLoaderLoadRemoteConfig{
|
||||
ctx: context.Background(),
|
||||
session: &InputLoaderMockableSession{
|
||||
Output: &model.CheckInInfo{
|
||||
WebConnectivity: &model.CheckInInfoWebConnectivity{
|
||||
URLs: expect,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
out, err := il.loadRemote(lrc)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if diff := cmp.Diff(expect, out); diff != "" {
|
||||
t.Fatal(diff)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,6 +12,13 @@
|
|||
// code at github.com/bassosimone/aladdin).
|
||||
package libminiooni
|
||||
|
||||
// TODO(bassosimone): now that this code cannot be imported from
|
||||
// external sources anymore, we should move it back into the
|
||||
// internal/cmd/miniooni folder and reduce the number of packages
|
||||
// that are unnecessarily exposed inside ./internal/engine.
|
||||
|
||||
// TODO(bassosimone): we need to deprecate or remove --limit.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
|
@ -140,10 +147,6 @@ func init() {
|
|||
)
|
||||
}
|
||||
|
||||
func fatalWithString(msg string) {
|
||||
panic(msg)
|
||||
}
|
||||
|
||||
func fatalIfFalse(cond bool, msg string) {
|
||||
if !cond {
|
||||
panic(msg)
|
||||
|
@ -371,11 +374,15 @@ func MainWithConfiguration(experimentName string, currentOptions Options) {
|
|||
fatalOnError(err, "cannot create experiment builder")
|
||||
|
||||
inputLoader := engine.NewInputLoader(engine.InputLoaderConfig{
|
||||
CheckInConfig: &model.CheckInConfig{
|
||||
RunType: "manual",
|
||||
OnWiFi: true, // meaning: not on 4G
|
||||
Charging: true,
|
||||
},
|
||||
InputPolicy: builder.InputPolicy(),
|
||||
StaticInputs: currentOptions.Inputs,
|
||||
SourceFiles: currentOptions.InputFilePaths,
|
||||
InputPolicy: builder.InputPolicy(),
|
||||
Session: sess,
|
||||
URLLimit: currentOptions.Limit,
|
||||
})
|
||||
inputs, err := inputLoader.Load(context.Background())
|
||||
fatalOnError(err, "cannot load inputs")
|
||||
|
@ -399,14 +406,14 @@ func MainWithConfiguration(experimentName string, currentOptions Options) {
|
|||
}()
|
||||
|
||||
submitter, err := engine.NewSubmitter(ctx, engine.SubmitterConfig{
|
||||
Enabled: currentOptions.NoCollector == false,
|
||||
Enabled: !currentOptions.NoCollector,
|
||||
Session: sess,
|
||||
Logger: log.Log,
|
||||
})
|
||||
fatalOnError(err, "cannot create submitter")
|
||||
|
||||
saver, err := engine.NewSaver(engine.SaverConfig{
|
||||
Enabled: currentOptions.NoJSON == false,
|
||||
Enabled: !currentOptions.NoJSON,
|
||||
Experiment: experiment,
|
||||
FilePath: currentOptions.ReportFile,
|
||||
Logger: log.Log,
|
||||
|
|
Loading…
Reference in New Issue
Block a user