refactor(engine): scrub the whole measurement (#956)

Part of https://github.com/ooni/probe/issues/2297
This commit is contained in:
Simone Basso 2022-09-12 22:22:25 +02:00 committed by GitHub
parent f77474a91a
commit 1638c450f0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 92 additions and 93 deletions

View File

@ -139,7 +139,7 @@ func (e *experiment) MeasureAsync(
measurement.MeasurementRuntime = tk.MeasurementRuntime
measurement.TestHelpers = tk.TestHelpers
measurement.TestKeys = tk.TestKeys
if err := measurement.Scrub(e.session.ProbeIP()); err != nil {
if err := model.ScrubMeasurement(measurement, e.session.ProbeIP()); err != nil {
// If we fail to scrub the measurement then we are not going to
// submit it. Most likely causes of error here are unlikely,
// e.g., the TestKeys being not serializable.

View File

@ -11,6 +11,8 @@ import (
"fmt"
"net"
"time"
"github.com/ooni/probe-cli/v3/internal/runtimex"
)
const (
@ -168,39 +170,49 @@ func (m *Measurement) AddAnnotation(key, value string) {
// is not the valid serialization of an IP address.
var ErrInvalidProbeIP = errors.New("model: invalid probe IP")
// Scrub scrubs the probeIP out of the measurement.
func (m *Measurement) Scrub(probeIP string) (err error) {
// We now behave like we can share everything except the
// probe IP, which we instead cannot ever share
m.ProbeIP = DefaultProbeIP
return m.MaybeRewriteTestKeys(probeIP, json.Marshal)
}
// Scrubbed is the string that replaces IP addresses.
const Scrubbed = `[scrubbed]`
// MaybeRewriteTestKeys is the function called by Scrub that
// ensures that m's serialization doesn't include the IP
func (m *Measurement) MaybeRewriteTestKeys(
currentIP string, marshal func(interface{}) ([]byte, error)) error {
// ScrubMeasurement removes [currentIP] from [m] by rewriting
// it in place while preserving the underlying types
func ScrubMeasurement(m *Measurement, currentIP string) error {
if net.ParseIP(currentIP) == nil {
return ErrInvalidProbeIP
}
data, err := marshal(m.TestKeys)
if err != nil {
m.ProbeIP = DefaultProbeIP
m.AddAnnotation("_probe_engine_sanitize_test_keys", "true")
if err := scrubTestKeys(m, currentIP); err != nil {
return err
}
// The check using Count is to save an unnecessary copy performed by
// ReplaceAll when there are no matches into the body. This is what
// we would like the common case to be, meaning that the code has done
// its job correctly and has not leaked the IP.
bpip := []byte(currentIP)
if bytes.Count(data, bpip) <= 0 {
return nil
testKeys := m.TestKeys
m.TestKeys = nil
if err := scrubTopLevelKeys(m, currentIP); err != nil {
return err
}
data = bytes.ReplaceAll(data, bpip, []byte(Scrubbed))
// We add an annotation such that hopefully later we can measure the
// number of cases where we failed to sanitize properly.
m.AddAnnotation("_probe_engine_sanitize_test_keys", "true")
return json.Unmarshal(data, &m.TestKeys)
m.TestKeys = testKeys
return nil
}
// scrubJSONUnmarshalTopLevelKeys allows to mock json.Unmarshal
var scrubJSONUnmarshalTopLevelKeys = json.Unmarshal
// scrubTopLevelKeys removes [currentIP] from the top-level keys
// of [m] by rewriting these keys in place.
func scrubTopLevelKeys(m *Measurement, currentIP string) error {
data, err := json.Marshal(m)
runtimex.PanicOnError(err, "json.Marshal(m) failed") // m must serialize
data = bytes.ReplaceAll(data, []byte(currentIP), []byte(Scrubbed))
return scrubJSONUnmarshalTopLevelKeys(data, &m)
}
// scrubJSONUnmarshalTestKeys allows to mock json.Unmarshal
var scrubJSONUnmarshalTestKeys = json.Unmarshal
// scrubTestKeys removes [currentIP] from the TestKeys by rewriting
// them in place while preserving their original type
func scrubTestKeys(m *Measurement, currentIP string) error {
data, err := json.Marshal(m.TestKeys)
runtimex.PanicOnError(err, "json.Marshal(m.TestKeys) failed") // m.TestKeys must serialize
data = bytes.ReplaceAll(data, []byte(currentIP), []byte(Scrubbed))
return scrubJSONUnmarshalTestKeys(data, &m.TestKeys)
}

View File

@ -57,6 +57,7 @@ func TestAddAnnotations(t *testing.T) {
}
type makeMeasurementConfig struct {
Input string
ProbeIP string
ProbeASN string
ProbeNetworkName string
@ -66,10 +67,11 @@ type makeMeasurementConfig struct {
ResolverASN string
}
func makeMeasurement(config makeMeasurementConfig) Measurement {
return Measurement{
func makeMeasurement(config makeMeasurementConfig) *Measurement {
return &Measurement{
DataFormatVersion: "0.3.0",
ID: "bdd20d7a-bba5-40dd-a111-9863d7908572",
Input: MeasurementTarget(config.Input),
MeasurementStartTime: "2018-11-01 15:33:20",
ProbeIP: config.ProbeIP,
ProbeASN: config.ProbeASN,
@ -82,7 +84,7 @@ func makeMeasurement(config makeMeasurementConfig) Measurement {
SoftwareName: "probe-engine",
SoftwareVersion: "0.1.0",
TestKeys: &fakeTestKeys{
ClientResolver: "91.80.37.104",
ClientResolver: config.ResolverIP,
Body: fmt.Sprintf(`
<HTML><HEAD><TITLE>Your IP is %s</TITLE></HEAD>
<BODY><P>Hey you, I see your IP and it's %s!</P></BODY>
@ -95,8 +97,9 @@ func makeMeasurement(config makeMeasurementConfig) Measurement {
}
}
func TestScrubWeAreScrubbing(t *testing.T) {
func TestScrubMeasurementWeAreScrubbing(t *testing.T) {
config := makeMeasurementConfig{
Input: "130.192.91.211",
ProbeIP: "130.192.91.211",
ProbeASN: "AS137",
ProbeCC: "IT",
@ -106,16 +109,19 @@ func TestScrubWeAreScrubbing(t *testing.T) {
ResolverASN: "AS12345",
}
m := makeMeasurement(config)
if err := m.Scrub(config.ProbeIP); err != nil {
if err := ScrubMeasurement(m, config.ProbeIP); err != nil {
t.Fatal(err)
}
if m.Input != Scrubbed {
t.Fatal("Input HAS NOT been scrubbed")
}
if m.ProbeASN != config.ProbeASN {
t.Fatal("ProbeASN has been scrubbed")
}
if m.ProbeCC != config.ProbeCC {
t.Fatal("ProbeCC has been scrubbed")
}
if m.ProbeIP == config.ProbeIP {
if m.ProbeIP != DefaultProbeIP {
t.Fatal("ProbeIP HAS NOT been scrubbed")
}
if m.ProbeNetworkName != config.ProbeNetworkName {
@ -137,75 +143,56 @@ func TestScrubWeAreScrubbing(t *testing.T) {
if bytes.Count(data, []byte(config.ProbeIP)) != 0 {
t.Fatal("ProbeIP not fully redacted")
}
}
func TestScrubNoScrubbingRequired(t *testing.T) {
config := makeMeasurementConfig{
ProbeIP: "130.192.91.211",
ProbeASN: "AS137",
ProbeCC: "IT",
ProbeNetworkName: "Vodafone Italia S.p.A.",
ResolverIP: "8.8.8.8",
ResolverNetworkName: "Google LLC",
ResolverASN: "AS12345",
// Note: ooniprobe requires the test keys to keep their original
// type otherwise the summary extraction process fails
testkeys, good := m.TestKeys.(*fakeTestKeys)
if !good {
t.Fatal("the underlying type of the test keys changed")
}
m := makeMeasurement(config)
m.TestKeys.(*fakeTestKeys).Body = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
if err := m.Scrub(config.ProbeIP); err != nil {
t.Fatal(err)
}
if m.ProbeASN != config.ProbeASN {
t.Fatal("ProbeASN has been scrubbed")
}
if m.ProbeCC != config.ProbeCC {
t.Fatal("ProbeCC has been scrubbed")
}
if m.ProbeIP == config.ProbeIP {
t.Fatal("ProbeIP HAS NOT been scrubbed")
}
if m.ProbeNetworkName != config.ProbeNetworkName {
t.Fatal("ProbeNetworkName has been scrubbed")
}
if m.ResolverIP != config.ResolverIP {
t.Fatal("ResolverIP has been scrubbed")
}
if m.ResolverNetworkName != config.ResolverNetworkName {
t.Fatal("ResolverNetworkName has been scrubbed")
}
if m.ResolverASN != config.ResolverASN {
t.Fatal("ResolverASN has been scrubbed")
}
data, err := json.Marshal(m)
if err != nil {
t.Fatal(err)
}
if bytes.Count(data, []byte(Scrubbed)) > 0 {
t.Fatal("We should not see any scrubbing")
if testkeys.ClientResolver != config.ResolverIP {
t.Fatal("it seems the test keys did not round trip")
}
}
func TestScrubInvalidIP(t *testing.T) {
func TestScrubMeasurementCannotUnmarshalTopLevelKeys(t *testing.T) {
saved := scrubJSONUnmarshalTopLevelKeys
expected := errors.New("mocked err")
scrubJSONUnmarshalTopLevelKeys = func(data []byte, v any) error {
return expected
}
defer func() {
scrubJSONUnmarshalTopLevelKeys = saved
}()
m := &Measurement{}
err := ScrubMeasurement(m, "10.0.0.1")
if !errors.Is(err, expected) {
t.Fatal("unexpected error", err)
}
}
func TestScrubMeasurementCannotUnmarshalTestKeys(t *testing.T) {
saved := scrubJSONUnmarshalTestKeys
expected := errors.New("mocked err")
scrubJSONUnmarshalTestKeys = func(data []byte, v any) error {
return expected
}
defer func() {
scrubJSONUnmarshalTestKeys = saved
}()
m := &Measurement{}
err := ScrubMeasurement(m, "10.0.0.1")
if !errors.Is(err, expected) {
t.Fatal("unexpected error", err)
}
}
func TestScrubMeasurementInvalidIP(t *testing.T) {
m := &Measurement{
ProbeASN: "AS1234",
ProbeCC: "IT",
}
err := m.Scrub("") // invalid IP
err := ScrubMeasurement(m, "") // invalid IP
if !errors.Is(err, ErrInvalidProbeIP) {
t.Fatal("not the error we expected")
}
}
func TestScrubMarshalError(t *testing.T) {
expected := errors.New("mocked error")
m := &Measurement{
ProbeASN: "AS1234",
ProbeCC: "IT",
}
err := m.MaybeRewriteTestKeys(
"8.8.8.8", func(v interface{}) ([]byte, error) {
return nil, expected
})
if !errors.Is(err, expected) {
t.Fatal("not the error we expected")
}
}