feat(inputloader): use check-in to fetch URLs (#267)

* ongoing work

* reduce diff with master

* feat(inputloader): use the check-in API

Part of https://github.com/ooni/probe/issues/1299

* fix: better naming for a variable

* chore: add more tests

* fix: add one more TODO
This commit is contained in:
Simone Basso
2021-03-29 18:46:26 +02:00
committed by GitHub
parent e0b0dfedc1
commit 5973c88a05
6 changed files with 156 additions and 107 deletions
+31 -27
View File
@@ -12,6 +12,7 @@ import (
// These errors are returned by the InputLoader.
var (
ErrNoURLsReturned = errors.New("no URLs returned")
ErrDetectedEmptyFile = errors.New("file did not contain any input")
ErrInputRequired = errors.New("no input provided")
ErrNoInputExpected = errors.New("we did not expect any input")
@@ -20,9 +21,8 @@ var (
// InputLoaderSession is the session according to an InputLoader. We
// introduce this abstraction because it helps us with testing.
type InputLoaderSession interface {
MaybeLookupLocationContext(ctx context.Context) error
NewOrchestraClient(ctx context.Context) (model.ExperimentOrchestraClient, error)
ProbeCC() string
CheckIn(ctx context.Context,
config *model.CheckInConfig) (*model.CheckInInfo, error)
}
// InputLoader loads input according to the specified policy
@@ -59,6 +59,22 @@ type InputLoader interface {
// InputLoaderConfig contains config for InputLoader.
type InputLoaderConfig struct {
// CheckInConfig contains options for the CheckIn API. If
// not set, then we'll create a default config. If set but
// there are fields inside it that are not set, then we
// will set them to a default value.
CheckInConfig *model.CheckInConfig
// InputPolicy specifies the input policy for the
// current experiment. We will not load any input if
// the policy says we should not. You MUST fill in
// this field.
InputPolicy InputPolicy
// Session is the current measurement session. You
// MUST fill in this field.
Session InputLoaderSession
// StaticInputs contains optional input to be added
// to the resulting input list if possible.
StaticInputs []string
@@ -68,22 +84,6 @@ type InputLoaderConfig struct {
// per line. We will fail if any file is unreadable
// as well as if any file is empty.
SourceFiles []string
// InputPolicy specifies the input policy for the
// current experiment. We will not load any input if
// the policy says we should not.
InputPolicy InputPolicy
// Session is the current measurement session.
Session InputLoaderSession
// URLLimit is the optional limit on the number of URLs
// that probe services should return to us.
URLLimit int64
// URLCategories limits the categories of URLs that
// probe services should return to us.
URLCategories []string
}
// NewInputLoader creates a new InputLoader.
@@ -218,16 +218,20 @@ type inputLoaderLoadRemoteConfig struct {
// loadRemote loads inputs from a remote source.
func (il inputLoader) loadRemote(conf inputLoaderLoadRemoteConfig) ([]model.URLInfo, error) {
if err := conf.session.MaybeLookupLocationContext(conf.ctx); err != nil {
return nil, err
config := il.CheckInConfig
if config == nil {
// Note: Session.CheckIn documentation says it will fill in
// any field with a required value with a reasonable default
// if such value is missing. So, here we just need to be
// concerned about NOT passing it a NULL pointer.
config = &model.CheckInConfig{}
}
client, err := conf.session.NewOrchestraClient(conf.ctx)
reply, err := conf.session.CheckIn(conf.ctx, config)
if err != nil {
return nil, err
}
return client.FetchURLList(conf.ctx, model.URLListConfig{
CountryCode: conf.session.ProbeCC(),
Limit: il.URLLimit,
Categories: il.URLCategories,
})
if reply.WebConnectivity == nil || len(reply.WebConnectivity.URLs) <= 0 {
return nil, ErrNoURLsReturned
}
return reply.WebConnectivity.URLs, nil
}
@@ -204,7 +204,7 @@ func TestInputLoaderInputStrictlyRequiredWithEmptyFile(t *testing.T) {
}
}
func TestInputLoaderInputOrQueryTestListsWithInput(t *testing.T) {
func TestInputLoaderInputOrQueryBackendWithInput(t *testing.T) {
il := engine.NewInputLoader(engine.InputLoaderConfig{
StaticInputs: []string{"https://www.google.com/"},
SourceFiles: []string{
@@ -233,7 +233,7 @@ func TestInputLoaderInputOrQueryTestListsWithInput(t *testing.T) {
}
}
func TestInputLoaderInputOrQueryTestListsWithNoInputAndCancelledContext(t *testing.T) {
func TestInputLoaderInputOrQueryBackendWithNoInputAndCancelledContext(t *testing.T) {
sess, err := engine.NewSession(engine.SessionConfig{
AssetsDir: "testdata",
KVStore: kvstore.NewMemoryKeyValueStore(),
@@ -261,7 +261,7 @@ func TestInputLoaderInputOrQueryTestListsWithNoInputAndCancelledContext(t *testi
}
}
func TestInputLoaderInputOrQueryTestListsWithNoInput(t *testing.T) {
func TestInputLoaderInputOrQueryBackendWithNoInput(t *testing.T) {
if testing.Short() {
t.Skip("skip test in short mode")
}
@@ -284,7 +284,6 @@ func TestInputLoaderInputOrQueryTestListsWithNoInput(t *testing.T) {
il := engine.NewInputLoader(engine.InputLoaderConfig{
InputPolicy: engine.InputOrQueryBackend,
Session: sess,
URLLimit: 30,
})
ctx := context.Background()
out, err := il.Load(ctx)
@@ -292,11 +291,12 @@ func TestInputLoaderInputOrQueryTestListsWithNoInput(t *testing.T) {
t.Fatal(err)
}
if len(out) < 10 {
// check-in SHOULD return AT LEAST 20 URLs at a time.
t.Fatal("not the output length we expected")
}
}
func TestInputLoaderInputOrQueryTestListsWithEmptyFile(t *testing.T) {
func TestInputLoaderInputOrQueryBackendWithEmptyFile(t *testing.T) {
il := engine.NewInputLoader(engine.InputLoaderConfig{
InputPolicy: engine.InputOrQueryBackend,
SourceFiles: []string{
+86 -51
View File
@@ -8,6 +8,7 @@ import (
"syscall"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/ooni/probe-cli/v3/internal/engine/internal/fsx"
"github.com/ooni/probe-cli/v3/internal/engine/model"
)
@@ -43,65 +44,33 @@ func TestInputLoaderReadfileScannerFailure(t *testing.T) {
}
}
type InputLoaderBrokenSession struct {
OrchestraClient model.ExperimentOrchestraClient
Error error
// InputLoaderMockableSession is a mockable session
// used by InputLoader tests.
type InputLoaderMockableSession struct {
// Output contains the output of CheckIn. It should
// be nil when Error is not-nil.
Output *model.CheckInInfo
// Error is the error to be returned by CheckIn. It
// should be nil when Output is not-nil.
Error error
}
func (InputLoaderBrokenSession) MaybeLookupLocationContext(ctx context.Context) error {
return nil
}
func (ilbs InputLoaderBrokenSession) NewOrchestraClient(ctx context.Context) (model.ExperimentOrchestraClient, error) {
if ilbs.OrchestraClient != nil {
return ilbs.OrchestraClient, nil
// CheckIn implements InputLoaderSession.CheckIn.
func (sess *InputLoaderMockableSession) CheckIn(
ctx context.Context, config *model.CheckInConfig) (*model.CheckInInfo, error) {
if sess.Output == nil && sess.Error == nil {
return nil, errors.New("both Output and Error are nil")
}
return nil, io.EOF
return sess.Output, sess.Error
}
func (InputLoaderBrokenSession) ProbeCC() string {
return "IT"
}
func TestInputLoaderNewOrchestraClientFailure(t *testing.T) {
il := inputLoader{}
lrc := inputLoaderLoadRemoteConfig{
ctx: context.Background(),
session: InputLoaderBrokenSession{},
}
out, err := il.loadRemote(lrc)
if !errors.Is(err, io.EOF) {
t.Fatalf("not the error we expected: %+v", err)
}
if out != nil {
t.Fatal("expected nil output here")
}
}
type InputLoaderBrokenOrchestraClient struct{}
func (InputLoaderBrokenOrchestraClient) CheckIn(ctx context.Context, config model.CheckInConfig) (*model.CheckInInfo, error) {
return nil, io.EOF
}
func (InputLoaderBrokenOrchestraClient) FetchPsiphonConfig(ctx context.Context) ([]byte, error) {
return nil, io.EOF
}
func (InputLoaderBrokenOrchestraClient) FetchTorTargets(ctx context.Context, cc string) (map[string]model.TorTarget, error) {
return nil, io.EOF
}
func (InputLoaderBrokenOrchestraClient) FetchURLList(ctx context.Context, config model.URLListConfig) ([]model.URLInfo, error) {
return nil, io.EOF
}
func TestInputLoaderFetchURLListFailure(t *testing.T) {
func TestInputLoaderCheckInFailure(t *testing.T) {
il := inputLoader{}
lrc := inputLoaderLoadRemoteConfig{
ctx: context.Background(),
session: InputLoaderBrokenSession{
OrchestraClient: InputLoaderBrokenOrchestraClient{},
session: &InputLoaderMockableSession{
Error: io.EOF,
},
}
out, err := il.loadRemote(lrc)
@@ -112,3 +81,69 @@ func TestInputLoaderFetchURLListFailure(t *testing.T) {
t.Fatal("expected nil output here")
}
}
func TestInputLoaderCheckInSuccessWithNilWebConnectivity(t *testing.T) {
il := inputLoader{}
lrc := inputLoaderLoadRemoteConfig{
ctx: context.Background(),
session: &InputLoaderMockableSession{
Output: &model.CheckInInfo{},
},
}
out, err := il.loadRemote(lrc)
if !errors.Is(err, ErrNoURLsReturned) {
t.Fatalf("not the error we expected: %+v", err)
}
if out != nil {
t.Fatal("expected nil output here")
}
}
func TestInputLoaderCheckInSuccessWithNoURLs(t *testing.T) {
il := inputLoader{}
lrc := inputLoaderLoadRemoteConfig{
ctx: context.Background(),
session: &InputLoaderMockableSession{
Output: &model.CheckInInfo{
WebConnectivity: &model.CheckInInfoWebConnectivity{},
},
},
}
out, err := il.loadRemote(lrc)
if !errors.Is(err, ErrNoURLsReturned) {
t.Fatalf("not the error we expected: %+v", err)
}
if out != nil {
t.Fatal("expected nil output here")
}
}
func TestInputLoaderCheckInSuccessWithSomeURLs(t *testing.T) {
expect := []model.URLInfo{{
CategoryCode: "NEWS",
CountryCode: "IT",
URL: "https://repubblica.it",
}, {
CategoryCode: "NEWS",
CountryCode: "IT",
URL: "https://corriere.it",
}}
il := inputLoader{}
lrc := inputLoaderLoadRemoteConfig{
ctx: context.Background(),
session: &InputLoaderMockableSession{
Output: &model.CheckInInfo{
WebConnectivity: &model.CheckInInfoWebConnectivity{
URLs: expect,
},
},
},
}
out, err := il.loadRemote(lrc)
if err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(expect, out); diff != "" {
t.Fatal(diff)
}
}