From 1638c450f0751a2fac7ed8b8bbdc94dd8e4e9677 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 12 Sep 2022 22:22:25 +0200 Subject: [PATCH] refactor(engine): scrub the whole measurement (#956) Part of https://github.com/ooni/probe/issues/2297 --- internal/engine/experiment.go | 2 +- internal/model/measurement.go | 64 +++++++++------- internal/model/measurement_test.go | 119 +++++++++++++---------------- 3 files changed, 92 insertions(+), 93 deletions(-) diff --git a/internal/engine/experiment.go b/internal/engine/experiment.go index d4d1266..f40f4c3 100644 --- a/internal/engine/experiment.go +++ b/internal/engine/experiment.go @@ -139,7 +139,7 @@ func (e *experiment) MeasureAsync( measurement.MeasurementRuntime = tk.MeasurementRuntime measurement.TestHelpers = tk.TestHelpers measurement.TestKeys = tk.TestKeys - if err := measurement.Scrub(e.session.ProbeIP()); err != nil { + if err := model.ScrubMeasurement(measurement, e.session.ProbeIP()); err != nil { // If we fail to scrub the measurement then we are not going to // submit it. Most likely causes of error here are unlikely, // e.g., the TestKeys being not serializable. diff --git a/internal/model/measurement.go b/internal/model/measurement.go index 0af0be3..86700d7 100644 --- a/internal/model/measurement.go +++ b/internal/model/measurement.go @@ -11,6 +11,8 @@ import ( "fmt" "net" "time" + + "github.com/ooni/probe-cli/v3/internal/runtimex" ) const ( @@ -168,39 +170,49 @@ func (m *Measurement) AddAnnotation(key, value string) { // is not the valid serialization of an IP address. var ErrInvalidProbeIP = errors.New("model: invalid probe IP") -// Scrub scrubs the probeIP out of the measurement. -func (m *Measurement) Scrub(probeIP string) (err error) { - // We now behave like we can share everything except the - // probe IP, which we instead cannot ever share - m.ProbeIP = DefaultProbeIP - return m.MaybeRewriteTestKeys(probeIP, json.Marshal) -} - // Scrubbed is the string that replaces IP addresses. const Scrubbed = `[scrubbed]` -// MaybeRewriteTestKeys is the function called by Scrub that -// ensures that m's serialization doesn't include the IP -func (m *Measurement) MaybeRewriteTestKeys( - currentIP string, marshal func(interface{}) ([]byte, error)) error { +// ScrubMeasurement removes [currentIP] from [m] by rewriting +// it in place while preserving the underlying types +func ScrubMeasurement(m *Measurement, currentIP string) error { if net.ParseIP(currentIP) == nil { return ErrInvalidProbeIP } - data, err := marshal(m.TestKeys) - if err != nil { + m.ProbeIP = DefaultProbeIP + m.AddAnnotation("_probe_engine_sanitize_test_keys", "true") + if err := scrubTestKeys(m, currentIP); err != nil { return err } - // The check using Count is to save an unnecessary copy performed by - // ReplaceAll when there are no matches into the body. This is what - // we would like the common case to be, meaning that the code has done - // its job correctly and has not leaked the IP. - bpip := []byte(currentIP) - if bytes.Count(data, bpip) <= 0 { - return nil + testKeys := m.TestKeys + m.TestKeys = nil + if err := scrubTopLevelKeys(m, currentIP); err != nil { + return err } - data = bytes.ReplaceAll(data, bpip, []byte(Scrubbed)) - // We add an annotation such that hopefully later we can measure the - // number of cases where we failed to sanitize properly. - m.AddAnnotation("_probe_engine_sanitize_test_keys", "true") - return json.Unmarshal(data, &m.TestKeys) + m.TestKeys = testKeys + return nil +} + +// scrubJSONUnmarshalTopLevelKeys allows to mock json.Unmarshal +var scrubJSONUnmarshalTopLevelKeys = json.Unmarshal + +// scrubTopLevelKeys removes [currentIP] from the top-level keys +// of [m] by rewriting these keys in place. +func scrubTopLevelKeys(m *Measurement, currentIP string) error { + data, err := json.Marshal(m) + runtimex.PanicOnError(err, "json.Marshal(m) failed") // m must serialize + data = bytes.ReplaceAll(data, []byte(currentIP), []byte(Scrubbed)) + return scrubJSONUnmarshalTopLevelKeys(data, &m) +} + +// scrubJSONUnmarshalTestKeys allows to mock json.Unmarshal +var scrubJSONUnmarshalTestKeys = json.Unmarshal + +// scrubTestKeys removes [currentIP] from the TestKeys by rewriting +// them in place while preserving their original type +func scrubTestKeys(m *Measurement, currentIP string) error { + data, err := json.Marshal(m.TestKeys) + runtimex.PanicOnError(err, "json.Marshal(m.TestKeys) failed") // m.TestKeys must serialize + data = bytes.ReplaceAll(data, []byte(currentIP), []byte(Scrubbed)) + return scrubJSONUnmarshalTestKeys(data, &m.TestKeys) } diff --git a/internal/model/measurement_test.go b/internal/model/measurement_test.go index efe7872..be868c9 100644 --- a/internal/model/measurement_test.go +++ b/internal/model/measurement_test.go @@ -57,6 +57,7 @@ func TestAddAnnotations(t *testing.T) { } type makeMeasurementConfig struct { + Input string ProbeIP string ProbeASN string ProbeNetworkName string @@ -66,10 +67,11 @@ type makeMeasurementConfig struct { ResolverASN string } -func makeMeasurement(config makeMeasurementConfig) Measurement { - return Measurement{ +func makeMeasurement(config makeMeasurementConfig) *Measurement { + return &Measurement{ DataFormatVersion: "0.3.0", ID: "bdd20d7a-bba5-40dd-a111-9863d7908572", + Input: MeasurementTarget(config.Input), MeasurementStartTime: "2018-11-01 15:33:20", ProbeIP: config.ProbeIP, ProbeASN: config.ProbeASN, @@ -82,7 +84,7 @@ func makeMeasurement(config makeMeasurementConfig) Measurement { SoftwareName: "probe-engine", SoftwareVersion: "0.1.0", TestKeys: &fakeTestKeys{ - ClientResolver: "91.80.37.104", + ClientResolver: config.ResolverIP, Body: fmt.Sprintf(` Your IP is %s

Hey you, I see your IP and it's %s!

@@ -95,8 +97,9 @@ func makeMeasurement(config makeMeasurementConfig) Measurement { } } -func TestScrubWeAreScrubbing(t *testing.T) { +func TestScrubMeasurementWeAreScrubbing(t *testing.T) { config := makeMeasurementConfig{ + Input: "130.192.91.211", ProbeIP: "130.192.91.211", ProbeASN: "AS137", ProbeCC: "IT", @@ -106,16 +109,19 @@ func TestScrubWeAreScrubbing(t *testing.T) { ResolverASN: "AS12345", } m := makeMeasurement(config) - if err := m.Scrub(config.ProbeIP); err != nil { + if err := ScrubMeasurement(m, config.ProbeIP); err != nil { t.Fatal(err) } + if m.Input != Scrubbed { + t.Fatal("Input HAS NOT been scrubbed") + } if m.ProbeASN != config.ProbeASN { t.Fatal("ProbeASN has been scrubbed") } if m.ProbeCC != config.ProbeCC { t.Fatal("ProbeCC has been scrubbed") } - if m.ProbeIP == config.ProbeIP { + if m.ProbeIP != DefaultProbeIP { t.Fatal("ProbeIP HAS NOT been scrubbed") } if m.ProbeNetworkName != config.ProbeNetworkName { @@ -137,75 +143,56 @@ func TestScrubWeAreScrubbing(t *testing.T) { if bytes.Count(data, []byte(config.ProbeIP)) != 0 { t.Fatal("ProbeIP not fully redacted") } -} - -func TestScrubNoScrubbingRequired(t *testing.T) { - config := makeMeasurementConfig{ - ProbeIP: "130.192.91.211", - ProbeASN: "AS137", - ProbeCC: "IT", - ProbeNetworkName: "Vodafone Italia S.p.A.", - ResolverIP: "8.8.8.8", - ResolverNetworkName: "Google LLC", - ResolverASN: "AS12345", + // Note: ooniprobe requires the test keys to keep their original + // type otherwise the summary extraction process fails + testkeys, good := m.TestKeys.(*fakeTestKeys) + if !good { + t.Fatal("the underlying type of the test keys changed") } - m := makeMeasurement(config) - m.TestKeys.(*fakeTestKeys).Body = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." - if err := m.Scrub(config.ProbeIP); err != nil { - t.Fatal(err) - } - if m.ProbeASN != config.ProbeASN { - t.Fatal("ProbeASN has been scrubbed") - } - if m.ProbeCC != config.ProbeCC { - t.Fatal("ProbeCC has been scrubbed") - } - if m.ProbeIP == config.ProbeIP { - t.Fatal("ProbeIP HAS NOT been scrubbed") - } - if m.ProbeNetworkName != config.ProbeNetworkName { - t.Fatal("ProbeNetworkName has been scrubbed") - } - if m.ResolverIP != config.ResolverIP { - t.Fatal("ResolverIP has been scrubbed") - } - if m.ResolverNetworkName != config.ResolverNetworkName { - t.Fatal("ResolverNetworkName has been scrubbed") - } - if m.ResolverASN != config.ResolverASN { - t.Fatal("ResolverASN has been scrubbed") - } - data, err := json.Marshal(m) - if err != nil { - t.Fatal(err) - } - if bytes.Count(data, []byte(Scrubbed)) > 0 { - t.Fatal("We should not see any scrubbing") + if testkeys.ClientResolver != config.ResolverIP { + t.Fatal("it seems the test keys did not round trip") } } -func TestScrubInvalidIP(t *testing.T) { +func TestScrubMeasurementCannotUnmarshalTopLevelKeys(t *testing.T) { + saved := scrubJSONUnmarshalTopLevelKeys + expected := errors.New("mocked err") + scrubJSONUnmarshalTopLevelKeys = func(data []byte, v any) error { + return expected + } + defer func() { + scrubJSONUnmarshalTopLevelKeys = saved + }() + m := &Measurement{} + err := ScrubMeasurement(m, "10.0.0.1") + if !errors.Is(err, expected) { + t.Fatal("unexpected error", err) + } +} + +func TestScrubMeasurementCannotUnmarshalTestKeys(t *testing.T) { + saved := scrubJSONUnmarshalTestKeys + expected := errors.New("mocked err") + scrubJSONUnmarshalTestKeys = func(data []byte, v any) error { + return expected + } + defer func() { + scrubJSONUnmarshalTestKeys = saved + }() + m := &Measurement{} + err := ScrubMeasurement(m, "10.0.0.1") + if !errors.Is(err, expected) { + t.Fatal("unexpected error", err) + } +} + +func TestScrubMeasurementInvalidIP(t *testing.T) { m := &Measurement{ ProbeASN: "AS1234", ProbeCC: "IT", } - err := m.Scrub("") // invalid IP + err := ScrubMeasurement(m, "") // invalid IP if !errors.Is(err, ErrInvalidProbeIP) { t.Fatal("not the error we expected") } } - -func TestScrubMarshalError(t *testing.T) { - expected := errors.New("mocked error") - m := &Measurement{ - ProbeASN: "AS1234", - ProbeCC: "IT", - } - err := m.MaybeRewriteTestKeys( - "8.8.8.8", func(v interface{}) ([]byte, error) { - return nil, expected - }) - if !errors.Is(err, expected) { - t.Fatal("not the error we expected") - } -}