ooni-probe-cli/pkg/oonimkall/task_integration_test.go
Simone Basso c4eb682606
[forwardport] fix(oonimkall): don't close channel to signal end of task (#619) (#620)
This diff forward ports 90bf0629b957c912a5a6f3bb6c98ad3abb5a2ff6 to `master`.

If we close the channel to signal the end of a task we may panic
when some background goroutine tries to post on the channel.

This bug is rare but may happen.

See for example https://github.com/ooni/probe/issues/1438.

How can we improve?

First, let us add a timeout when sending to the channel. Given that
the channel is buffered and we have a generous timeout (1/4 of a
second), it's unlikely we will really block. But, in the event in
which a late message appears, we'll eventually _unblock_ when
sending with a timeout. So, now we don't have to worry anymore
about leaking forever a goroutine.

Then, let us change the protocol with which we signal that a task
is done. We used to close the channel. Now, instead we just
synchronously post a nil on the channel when done.

In turn, we interpret this nil to mean that the task is done when
we receive messages.

The _main_ different with respect to before is that now we are
asking the consumer of our API to drain the channel. Because
before we had a blocking channel, it seems to me we were already
requiring the consumer of the API to do that. Which means, I think
in practical terms it did not change much.

Finally, acknowledge that we don't need a specific state variable
to tell us we're done and simplify a little bit the API by
just making isRunning private and using the "we're done" signal
to determine whether we've stopped running the task.

All these changes should be enough to close https://github.com/ooni/probe/issues/1438.
2021-11-26 22:18:45 +01:00

527 lines
12 KiB
Go

package oonimkall
import (
"encoding/json"
"errors"
"strings"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/ooni/probe-cli/v3/internal/engine/model"
)
type eventlike struct {
Key string `json:"key"`
Value map[string]interface{} `json:"value"`
}
func TestGood(t *testing.T) {
if testing.Short() {
t.Skip("skip test in short mode")
}
task, err := StartTask(`{
"assets_dir": "../testdata/oonimkall/assets",
"log_level": "DEBUG",
"name": "Example",
"options": {
"software_name": "oonimkall-test",
"software_version": "0.1.0"
},
"state_dir": "../testdata/oonimkall/state",
"version": 1
}`)
if err != nil {
t.Fatal(err)
}
// interrupt the task so we also exercise this functionality
go func() {
<-time.After(time.Second)
task.Interrupt()
}()
for !task.IsDone() {
eventstr := task.WaitForNextEvent()
var event eventlike
if err := json.Unmarshal([]byte(eventstr), &event); err != nil {
t.Fatal(err)
}
if event.Key == "failure.startup" {
t.Fatal("unexpected failure.startup event")
}
}
// make sure we only see task_terminated at this point
for {
eventstr := task.WaitForNextEvent()
var event eventlike
if err := json.Unmarshal([]byte(eventstr), &event); err != nil {
t.Fatal(err)
}
if event.Key == "task_terminated" {
break
}
t.Fatalf("unexpected event.Key: %s", event.Key)
}
}
func TestWithMeasurementFailure(t *testing.T) {
if testing.Short() {
t.Skip("skip test in short mode")
}
task, err := StartTask(`{
"assets_dir": "../testdata/oonimkall/assets",
"log_level": "DEBUG",
"name": "ExampleWithFailure",
"options": {
"no_geoip": true,
"no_resolver_lookup": true,
"software_name": "oonimkall-test",
"software_version": "0.1.0"
},
"state_dir": "../testdata/oonimkall/state",
"version": 1
}`)
if err != nil {
t.Fatal(err)
}
for !task.IsDone() {
eventstr := task.WaitForNextEvent()
var event eventlike
if err := json.Unmarshal([]byte(eventstr), &event); err != nil {
t.Fatal(err)
}
if event.Key == "failure.startup" {
t.Fatal("unexpected failure.startup event")
}
}
}
func TestInvalidJSON(t *testing.T) {
task, err := StartTask(`{`)
var syntaxerr *json.SyntaxError
if !errors.As(err, &syntaxerr) {
t.Fatal("not the expected error")
}
if task != nil {
t.Fatal("task is not nil")
}
}
func TestUnsupportedSetting(t *testing.T) {
task, err := StartTask(`{
"assets_dir": "../testdata/oonimkall/assets",
"log_level": "DEBUG",
"name": "Example",
"options": {
"software_name": "oonimkall-test",
"software_version": "0.1.0"
},
"state_dir": "../testdata/oonimkall/state"
}`)
if err != nil {
t.Fatal(err)
}
var seen bool
for !task.IsDone() {
eventstr := task.WaitForNextEvent()
var event eventlike
if err := json.Unmarshal([]byte(eventstr), &event); err != nil {
t.Fatal(err)
}
if event.Key == "failure.startup" {
if strings.Contains(eventstr, failureInvalidVersion) {
seen = true
}
}
}
if !seen {
t.Fatal("did not see failure.startup with invalid version info")
}
}
func TestEmptyStateDir(t *testing.T) {
task, err := StartTask(`{
"assets_dir": "../testdata/oonimkall/assets",
"log_level": "DEBUG",
"name": "Example",
"options": {
"software_name": "oonimkall-test",
"software_version": "0.1.0"
},
"version": 1
}`)
if err != nil {
t.Fatal(err)
}
var seen bool
for !task.IsDone() {
eventstr := task.WaitForNextEvent()
var event eventlike
if err := json.Unmarshal([]byte(eventstr), &event); err != nil {
t.Fatal(err)
}
if event.Key == "failure.startup" {
if strings.Contains(eventstr, "mkdir : no such file or directory") {
seen = true
}
}
}
if !seen {
t.Fatal("did not see failure.startup with info that state dir is empty")
}
}
func TestUnknownExperiment(t *testing.T) {
task, err := StartTask(`{
"assets_dir": "../testdata/oonimkall/assets",
"log_level": "DEBUG",
"name": "Antani",
"options": {
"software_name": "oonimkall-test",
"software_version": "0.1.0"
},
"state_dir": "../testdata/oonimkall/state",
"version": 1
}`)
if err != nil {
t.Fatal(err)
}
var seen bool
for !task.IsDone() {
eventstr := task.WaitForNextEvent()
var event eventlike
if err := json.Unmarshal([]byte(eventstr), &event); err != nil {
t.Fatal(err)
}
if event.Key == "failure.startup" {
if strings.Contains(eventstr, "no such experiment: ") {
seen = true
}
}
}
if !seen {
t.Fatal("did not see failure.startup")
}
}
func TestInputIsRequired(t *testing.T) {
if testing.Short() {
t.Skip("skip test in short mode")
}
task, err := StartTask(`{
"assets_dir": "../testdata/oonimkall/assets",
"log_level": "DEBUG",
"name": "ExampleWithInput",
"options": {
"software_name": "oonimkall-test",
"software_version": "0.1.0"
},
"state_dir": "../testdata/oonimkall/state",
"version": 1
}`)
if err != nil {
t.Fatal(err)
}
var seen bool
for !task.IsDone() {
eventstr := task.WaitForNextEvent()
var event eventlike
if err := json.Unmarshal([]byte(eventstr), &event); err != nil {
t.Fatal(err)
}
if event.Key == "failure.startup" {
if strings.Contains(eventstr, "no input provided") {
seen = true
}
}
}
if !seen {
t.Fatal("did not see failure.startup")
}
}
func TestMaxRuntime(t *testing.T) {
if testing.Short() {
t.Skip("skip test in short mode")
}
begin := time.Now()
task, err := StartTask(`{
"assets_dir": "../testdata/oonimkall/assets",
"inputs": ["a", "b", "c"],
"name": "ExampleWithInput",
"options": {
"max_runtime": 1,
"software_name": "oonimkall-test",
"software_version": "0.1.0"
},
"state_dir": "../testdata/oonimkall/state",
"version": 1
}`)
if err != nil {
t.Fatal(err)
}
for !task.IsDone() {
eventstr := task.WaitForNextEvent()
var event eventlike
if err := json.Unmarshal([]byte(eventstr), &event); err != nil {
t.Fatal(err)
}
if event.Key == "failure.startup" {
t.Fatal(eventstr)
}
}
// The runtime is long because of ancillary operations and is even more
// longer because of self shaping we may be performing (especially in
// CI builds) using `-tags shaping`). We have experimentally determined
// that ~10 seconds is the typical CI test run time. See:
//
// 1. https://github.com/ooni/probe-cli/v3/internal/engine/pull/588/checks?check_run_id=667263788
//
// 2. https://github.com/ooni/probe-cli/v3/internal/engine/pull/588/checks?check_run_id=667263855
//
// In case there are further timeouts, e.g. in the sessionresolver, the
// time used by the experiment will be much more. This is for example the
// case in https://github.com/ooni/probe-engine/issues/1005.
if time.Since(begin) > 10*time.Second {
t.Fatal("expected shorter runtime")
}
}
func TestInterruptExampleWithInput(t *testing.T) {
if testing.Short() {
t.Skip("skip test in short mode")
}
t.Skip("Skipping broken test; see https://github.com/ooni/probe-cli/v3/internal/engine/issues/992")
task, err := StartTask(`{
"assets_dir": "../testdata/oonimkall/assets",
"inputs": [
"http://www.kernel.org/",
"http://www.x.org/",
"http://www.microsoft.com/",
"http://www.slashdot.org/",
"http://www.repubblica.it/",
"http://www.google.it/",
"http://ooni.org/"
],
"name": "ExampleWithInputNonInterruptible",
"options": {
"software_name": "oonimkall-test",
"software_version": "0.1.0"
},
"state_dir": "../testdata/oonimkall/state",
"version": 1
}`)
if err != nil {
t.Fatal(err)
}
var keys []string
for !task.IsDone() {
eventstr := task.WaitForNextEvent()
var event eventlike
if err := json.Unmarshal([]byte(eventstr), &event); err != nil {
t.Fatal(err)
}
switch event.Key {
case "failure.startup":
t.Fatal(eventstr)
case "status.measurement_start":
go task.Interrupt()
}
// We compress the keys. What matters is basically that we
// see just one of the many possible measurements here.
if keys == nil || keys[len(keys)-1] != event.Key {
keys = append(keys, event.Key)
}
}
expect := []string{
"status.queued",
"status.started",
"status.progress",
"status.geoip_lookup",
"status.resolver_lookup",
"status.progress",
"status.report_create",
"status.measurement_start",
"log",
"status.progress",
"measurement",
"status.measurement_submission",
"status.measurement_done",
"status.end",
"task_terminated",
}
if diff := cmp.Diff(expect, keys); diff != "" {
t.Fatal(diff)
}
}
func TestInterruptNdt7(t *testing.T) {
if testing.Short() {
t.Skip("skip test in short mode")
}
task, err := StartTask(`{
"assets_dir": "../testdata/oonimkall/assets",
"name": "Ndt7",
"options": {
"software_name": "oonimkall-test",
"software_version": "0.1.0"
},
"state_dir": "../testdata/oonimkall/state",
"version": 1
}`)
if err != nil {
t.Fatal(err)
}
go func() {
<-time.After(11 * time.Second)
task.Interrupt()
}()
var keys []string
for !task.IsDone() {
eventstr := task.WaitForNextEvent()
var event eventlike
if err := json.Unmarshal([]byte(eventstr), &event); err != nil {
t.Fatal(err)
}
if event.Key == "failure.startup" {
t.Fatal(eventstr)
}
// We compress the keys because we don't know how many
// status.progress we will see. What matters is that we
// don't see a measurement submission, since it means
// that we have interrupted the measurement.
if keys == nil || keys[len(keys)-1] != event.Key {
keys = append(keys, event.Key)
}
}
expect := []string{
"status.queued",
"status.started",
"status.progress",
"status.geoip_lookup",
"status.resolver_lookup",
"status.progress",
"status.report_create",
"status.measurement_start",
"status.progress",
"status.end",
"task_terminated",
}
if diff := cmp.Diff(expect, keys); diff != "" {
t.Fatal(diff)
}
}
func TestCountBytesForExample(t *testing.T) {
if testing.Short() {
t.Skip("skip test in short mode")
}
task, err := StartTask(`{
"assets_dir": "../testdata/oonimkall/assets",
"name": "Example",
"options": {
"software_name": "oonimkall-test",
"software_version": "0.1.0"
},
"state_dir": "../testdata/oonimkall/state",
"version": 1
}`)
if err != nil {
t.Fatal(err)
}
var downloadKB, uploadKB float64
for !task.IsDone() {
eventstr := task.WaitForNextEvent()
var event eventlike
if err := json.Unmarshal([]byte(eventstr), &event); err != nil {
t.Fatal(err)
}
switch event.Key {
case "failure.startup":
t.Fatal(eventstr)
case "status.end":
downloadKB = event.Value["downloaded_kb"].(float64)
uploadKB = event.Value["uploaded_kb"].(float64)
}
}
if downloadKB == 0 {
t.Fatal("downloadKB is zero")
}
if uploadKB == 0 {
t.Fatal("uploadKB is zero")
}
}
func TestPrivacyAndScrubbing(t *testing.T) {
if testing.Short() {
t.Skip("skip test in short mode")
}
task, err := StartTask(`{
"assets_dir": "../testdata/oonimkall/assets",
"name": "Example",
"options": {
"software_name": "oonimkall-test",
"software_version": "0.1.0"
},
"state_dir": "../testdata/oonimkall/state",
"version": 1
}`)
if err != nil {
t.Fatal(err)
}
var m *model.Measurement
for !task.IsDone() {
eventstr := task.WaitForNextEvent()
var event eventlike
if err := json.Unmarshal([]byte(eventstr), &event); err != nil {
t.Fatal(err)
}
switch event.Key {
case "failure.startup":
t.Fatal(eventstr)
case "measurement":
v := []byte(event.Value["json_str"].(string))
m = new(model.Measurement)
if err := json.Unmarshal(v, &m); err != nil {
t.Fatal(err)
}
}
}
if m == nil {
t.Fatal("measurement is nil")
}
if m.ProbeASN == "AS0" || m.ProbeCC == "ZZ" || m.ProbeIP != "127.0.0.1" {
t.Fatal("unexpected result")
}
}
func TestNonblock(t *testing.T) {
if testing.Short() {
t.Skip("skip test in short mode")
}
task, err := StartTask(`{
"assets_dir": "../testdata/oonimkall/assets",
"name": "Example",
"options": {
"software_name": "oonimkall-test",
"software_version": "0.1.0"
},
"state_dir": "../testdata/oonimkall/state",
"version": 1
}`)
if err != nil {
t.Fatal(err)
}
if !task.isRunning() {
t.Fatal("The runner should be running at this point")
}
// If the task blocks because it emits too much events, this test
// will run forever and will be killed. Because we have room for up
// to 128 events in the buffer, we should hopefully be fine.
for task.isRunning() {
time.Sleep(time.Second)
}
for !task.IsDone() {
task.WaitForNextEvent()
}
}