refactor(engine): scrub the whole measurement (#956)

Part of https://github.com/ooni/probe/issues/2297
This commit is contained in:
Simone Basso 2022-09-12 22:22:25 +02:00 committed by GitHub
parent f77474a91a
commit 1638c450f0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 92 additions and 93 deletions

View File

@ -139,7 +139,7 @@ func (e *experiment) MeasureAsync(
measurement.MeasurementRuntime = tk.MeasurementRuntime measurement.MeasurementRuntime = tk.MeasurementRuntime
measurement.TestHelpers = tk.TestHelpers measurement.TestHelpers = tk.TestHelpers
measurement.TestKeys = tk.TestKeys measurement.TestKeys = tk.TestKeys
if err := measurement.Scrub(e.session.ProbeIP()); err != nil { if err := model.ScrubMeasurement(measurement, e.session.ProbeIP()); err != nil {
// If we fail to scrub the measurement then we are not going to // If we fail to scrub the measurement then we are not going to
// submit it. Most likely causes of error here are unlikely, // submit it. Most likely causes of error here are unlikely,
// e.g., the TestKeys being not serializable. // e.g., the TestKeys being not serializable.

View File

@ -11,6 +11,8 @@ import (
"fmt" "fmt"
"net" "net"
"time" "time"
"github.com/ooni/probe-cli/v3/internal/runtimex"
) )
const ( const (
@ -168,39 +170,49 @@ func (m *Measurement) AddAnnotation(key, value string) {
// is not the valid serialization of an IP address. // is not the valid serialization of an IP address.
var ErrInvalidProbeIP = errors.New("model: invalid probe IP") var ErrInvalidProbeIP = errors.New("model: invalid probe IP")
// Scrub scrubs the probeIP out of the measurement.
func (m *Measurement) Scrub(probeIP string) (err error) {
// We now behave like we can share everything except the
// probe IP, which we instead cannot ever share
m.ProbeIP = DefaultProbeIP
return m.MaybeRewriteTestKeys(probeIP, json.Marshal)
}
// Scrubbed is the string that replaces IP addresses. // Scrubbed is the string that replaces IP addresses.
const Scrubbed = `[scrubbed]` const Scrubbed = `[scrubbed]`
// MaybeRewriteTestKeys is the function called by Scrub that // ScrubMeasurement removes [currentIP] from [m] by rewriting
// ensures that m's serialization doesn't include the IP // it in place while preserving the underlying types
func (m *Measurement) MaybeRewriteTestKeys( func ScrubMeasurement(m *Measurement, currentIP string) error {
currentIP string, marshal func(interface{}) ([]byte, error)) error {
if net.ParseIP(currentIP) == nil { if net.ParseIP(currentIP) == nil {
return ErrInvalidProbeIP return ErrInvalidProbeIP
} }
data, err := marshal(m.TestKeys) m.ProbeIP = DefaultProbeIP
if err != nil { m.AddAnnotation("_probe_engine_sanitize_test_keys", "true")
if err := scrubTestKeys(m, currentIP); err != nil {
return err return err
} }
// The check using Count is to save an unnecessary copy performed by testKeys := m.TestKeys
// ReplaceAll when there are no matches into the body. This is what m.TestKeys = nil
// we would like the common case to be, meaning that the code has done if err := scrubTopLevelKeys(m, currentIP); err != nil {
// its job correctly and has not leaked the IP. return err
bpip := []byte(currentIP)
if bytes.Count(data, bpip) <= 0 {
return nil
} }
data = bytes.ReplaceAll(data, bpip, []byte(Scrubbed)) m.TestKeys = testKeys
// We add an annotation such that hopefully later we can measure the return nil
// number of cases where we failed to sanitize properly. }
m.AddAnnotation("_probe_engine_sanitize_test_keys", "true")
return json.Unmarshal(data, &m.TestKeys) // scrubJSONUnmarshalTopLevelKeys allows to mock json.Unmarshal
var scrubJSONUnmarshalTopLevelKeys = json.Unmarshal
// scrubTopLevelKeys removes [currentIP] from the top-level keys
// of [m] by rewriting these keys in place.
func scrubTopLevelKeys(m *Measurement, currentIP string) error {
data, err := json.Marshal(m)
runtimex.PanicOnError(err, "json.Marshal(m) failed") // m must serialize
data = bytes.ReplaceAll(data, []byte(currentIP), []byte(Scrubbed))
return scrubJSONUnmarshalTopLevelKeys(data, &m)
}
// scrubJSONUnmarshalTestKeys allows to mock json.Unmarshal
var scrubJSONUnmarshalTestKeys = json.Unmarshal
// scrubTestKeys removes [currentIP] from the TestKeys by rewriting
// them in place while preserving their original type
func scrubTestKeys(m *Measurement, currentIP string) error {
data, err := json.Marshal(m.TestKeys)
runtimex.PanicOnError(err, "json.Marshal(m.TestKeys) failed") // m.TestKeys must serialize
data = bytes.ReplaceAll(data, []byte(currentIP), []byte(Scrubbed))
return scrubJSONUnmarshalTestKeys(data, &m.TestKeys)
} }

View File

@ -57,6 +57,7 @@ func TestAddAnnotations(t *testing.T) {
} }
type makeMeasurementConfig struct { type makeMeasurementConfig struct {
Input string
ProbeIP string ProbeIP string
ProbeASN string ProbeASN string
ProbeNetworkName string ProbeNetworkName string
@ -66,10 +67,11 @@ type makeMeasurementConfig struct {
ResolverASN string ResolverASN string
} }
func makeMeasurement(config makeMeasurementConfig) Measurement { func makeMeasurement(config makeMeasurementConfig) *Measurement {
return Measurement{ return &Measurement{
DataFormatVersion: "0.3.0", DataFormatVersion: "0.3.0",
ID: "bdd20d7a-bba5-40dd-a111-9863d7908572", ID: "bdd20d7a-bba5-40dd-a111-9863d7908572",
Input: MeasurementTarget(config.Input),
MeasurementStartTime: "2018-11-01 15:33:20", MeasurementStartTime: "2018-11-01 15:33:20",
ProbeIP: config.ProbeIP, ProbeIP: config.ProbeIP,
ProbeASN: config.ProbeASN, ProbeASN: config.ProbeASN,
@ -82,7 +84,7 @@ func makeMeasurement(config makeMeasurementConfig) Measurement {
SoftwareName: "probe-engine", SoftwareName: "probe-engine",
SoftwareVersion: "0.1.0", SoftwareVersion: "0.1.0",
TestKeys: &fakeTestKeys{ TestKeys: &fakeTestKeys{
ClientResolver: "91.80.37.104", ClientResolver: config.ResolverIP,
Body: fmt.Sprintf(` Body: fmt.Sprintf(`
<HTML><HEAD><TITLE>Your IP is %s</TITLE></HEAD> <HTML><HEAD><TITLE>Your IP is %s</TITLE></HEAD>
<BODY><P>Hey you, I see your IP and it's %s!</P></BODY> <BODY><P>Hey you, I see your IP and it's %s!</P></BODY>
@ -95,8 +97,9 @@ func makeMeasurement(config makeMeasurementConfig) Measurement {
} }
} }
func TestScrubWeAreScrubbing(t *testing.T) { func TestScrubMeasurementWeAreScrubbing(t *testing.T) {
config := makeMeasurementConfig{ config := makeMeasurementConfig{
Input: "130.192.91.211",
ProbeIP: "130.192.91.211", ProbeIP: "130.192.91.211",
ProbeASN: "AS137", ProbeASN: "AS137",
ProbeCC: "IT", ProbeCC: "IT",
@ -106,16 +109,19 @@ func TestScrubWeAreScrubbing(t *testing.T) {
ResolverASN: "AS12345", ResolverASN: "AS12345",
} }
m := makeMeasurement(config) m := makeMeasurement(config)
if err := m.Scrub(config.ProbeIP); err != nil { if err := ScrubMeasurement(m, config.ProbeIP); err != nil {
t.Fatal(err) t.Fatal(err)
} }
if m.Input != Scrubbed {
t.Fatal("Input HAS NOT been scrubbed")
}
if m.ProbeASN != config.ProbeASN { if m.ProbeASN != config.ProbeASN {
t.Fatal("ProbeASN has been scrubbed") t.Fatal("ProbeASN has been scrubbed")
} }
if m.ProbeCC != config.ProbeCC { if m.ProbeCC != config.ProbeCC {
t.Fatal("ProbeCC has been scrubbed") t.Fatal("ProbeCC has been scrubbed")
} }
if m.ProbeIP == config.ProbeIP { if m.ProbeIP != DefaultProbeIP {
t.Fatal("ProbeIP HAS NOT been scrubbed") t.Fatal("ProbeIP HAS NOT been scrubbed")
} }
if m.ProbeNetworkName != config.ProbeNetworkName { if m.ProbeNetworkName != config.ProbeNetworkName {
@ -137,75 +143,56 @@ func TestScrubWeAreScrubbing(t *testing.T) {
if bytes.Count(data, []byte(config.ProbeIP)) != 0 { if bytes.Count(data, []byte(config.ProbeIP)) != 0 {
t.Fatal("ProbeIP not fully redacted") t.Fatal("ProbeIP not fully redacted")
} }
} // Note: ooniprobe requires the test keys to keep their original
// type otherwise the summary extraction process fails
func TestScrubNoScrubbingRequired(t *testing.T) { testkeys, good := m.TestKeys.(*fakeTestKeys)
config := makeMeasurementConfig{ if !good {
ProbeIP: "130.192.91.211", t.Fatal("the underlying type of the test keys changed")
ProbeASN: "AS137",
ProbeCC: "IT",
ProbeNetworkName: "Vodafone Italia S.p.A.",
ResolverIP: "8.8.8.8",
ResolverNetworkName: "Google LLC",
ResolverASN: "AS12345",
} }
m := makeMeasurement(config) if testkeys.ClientResolver != config.ResolverIP {
m.TestKeys.(*fakeTestKeys).Body = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." t.Fatal("it seems the test keys did not round trip")
if err := m.Scrub(config.ProbeIP); err != nil {
t.Fatal(err)
}
if m.ProbeASN != config.ProbeASN {
t.Fatal("ProbeASN has been scrubbed")
}
if m.ProbeCC != config.ProbeCC {
t.Fatal("ProbeCC has been scrubbed")
}
if m.ProbeIP == config.ProbeIP {
t.Fatal("ProbeIP HAS NOT been scrubbed")
}
if m.ProbeNetworkName != config.ProbeNetworkName {
t.Fatal("ProbeNetworkName has been scrubbed")
}
if m.ResolverIP != config.ResolverIP {
t.Fatal("ResolverIP has been scrubbed")
}
if m.ResolverNetworkName != config.ResolverNetworkName {
t.Fatal("ResolverNetworkName has been scrubbed")
}
if m.ResolverASN != config.ResolverASN {
t.Fatal("ResolverASN has been scrubbed")
}
data, err := json.Marshal(m)
if err != nil {
t.Fatal(err)
}
if bytes.Count(data, []byte(Scrubbed)) > 0 {
t.Fatal("We should not see any scrubbing")
} }
} }
func TestScrubInvalidIP(t *testing.T) { func TestScrubMeasurementCannotUnmarshalTopLevelKeys(t *testing.T) {
saved := scrubJSONUnmarshalTopLevelKeys
expected := errors.New("mocked err")
scrubJSONUnmarshalTopLevelKeys = func(data []byte, v any) error {
return expected
}
defer func() {
scrubJSONUnmarshalTopLevelKeys = saved
}()
m := &Measurement{}
err := ScrubMeasurement(m, "10.0.0.1")
if !errors.Is(err, expected) {
t.Fatal("unexpected error", err)
}
}
func TestScrubMeasurementCannotUnmarshalTestKeys(t *testing.T) {
saved := scrubJSONUnmarshalTestKeys
expected := errors.New("mocked err")
scrubJSONUnmarshalTestKeys = func(data []byte, v any) error {
return expected
}
defer func() {
scrubJSONUnmarshalTestKeys = saved
}()
m := &Measurement{}
err := ScrubMeasurement(m, "10.0.0.1")
if !errors.Is(err, expected) {
t.Fatal("unexpected error", err)
}
}
func TestScrubMeasurementInvalidIP(t *testing.T) {
m := &Measurement{ m := &Measurement{
ProbeASN: "AS1234", ProbeASN: "AS1234",
ProbeCC: "IT", ProbeCC: "IT",
} }
err := m.Scrub("") // invalid IP err := ScrubMeasurement(m, "") // invalid IP
if !errors.Is(err, ErrInvalidProbeIP) { if !errors.Is(err, ErrInvalidProbeIP) {
t.Fatal("not the error we expected") t.Fatal("not the error we expected")
} }
} }
func TestScrubMarshalError(t *testing.T) {
expected := errors.New("mocked error")
m := &Measurement{
ProbeASN: "AS1234",
ProbeCC: "IT",
}
err := m.MaybeRewriteTestKeys(
"8.8.8.8", func(v interface{}) ([]byte, error) {
return nil, expected
})
if !errors.Is(err, expected) {
t.Fatal("not the error we expected")
}
}