From 7c1b2bbcb0f619c0815333b868efeb24c70b1ff7 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Sun, 28 Aug 2022 20:26:40 +0200 Subject: [PATCH] refactor: move WebGetTitle inside measurexlite (#895) Part of https://github.com/ooni/probe/issues/2240 --- internal/cmd/oohelperd/http.go | 4 +- .../webconnectivity/httpanalysis.go | 16 +---- .../webconnectivity/analysishttpdiff.go | 3 +- internal/measurexlite/web.go | 19 ++++++ internal/measurexlite/web_test.go | 63 +++++++++++++++++++ 5 files changed, 88 insertions(+), 17 deletions(-) create mode 100644 internal/measurexlite/web.go create mode 100644 internal/measurexlite/web_test.go diff --git a/internal/cmd/oohelperd/http.go b/internal/cmd/oohelperd/http.go index 9b987eb..d0444d9 100644 --- a/internal/cmd/oohelperd/http.go +++ b/internal/cmd/oohelperd/http.go @@ -12,7 +12,7 @@ import ( "sync" "time" - "github.com/ooni/probe-cli/v3/internal/engine/experiment/webconnectivity" + "github.com/ooni/probe-cli/v3/internal/measurexlite" "github.com/ooni/probe-cli/v3/internal/model" "github.com/ooni/probe-cli/v3/internal/netxlite" "github.com/ooni/probe-cli/v3/internal/tracex" @@ -100,7 +100,7 @@ func httpDo(ctx context.Context, config *httpConfig) { Failure: httpMapFailure(err), StatusCode: int64(resp.StatusCode), Headers: headers, - Title: webconnectivity.GetTitle(string(data)), + Title: measurexlite.WebGetTitle(string(data)), } } diff --git a/internal/engine/experiment/webconnectivity/httpanalysis.go b/internal/engine/experiment/webconnectivity/httpanalysis.go index 346707f..5ed2ec9 100644 --- a/internal/engine/experiment/webconnectivity/httpanalysis.go +++ b/internal/engine/experiment/webconnectivity/httpanalysis.go @@ -2,11 +2,11 @@ package webconnectivity import ( "reflect" - "regexp" "strings" "github.com/ooni/probe-cli/v3/internal/engine/experiment/urlgetter" "github.com/ooni/probe-cli/v3/internal/engine/experiment/webconnectivity/internal" + "github.com/ooni/probe-cli/v3/internal/measurexlite" "github.com/ooni/probe-cli/v3/internal/model" ) @@ -184,18 +184,6 @@ func HTTPHeadersMatch(tk urlgetter.TestKeys, ctrl ControlResponse) *bool { return &good } -// GetTitle returns the title or an empty string. -func GetTitle(measurementBody string) string { - // MK used {1,128} but we're making it larger here to get longer titles - // e.g. 's one - re := regexp.MustCompile(`(?i)([^<]{1,512})`) - v := re.FindStringSubmatch(measurementBody) - if len(v) < 2 { - return "" - } - return v[1] -} - // HTTPTitleMatch returns whether the measurement and the control titles // reasonably match, or nil if not applicable. func HTTPTitleMatch(tk urlgetter.TestKeys, ctrl ControlResponse) (out *bool) { @@ -214,7 +202,7 @@ func HTTPTitleMatch(tk urlgetter.TestKeys, ctrl ControlResponse) (out *bool) { } control := ctrl.HTTPRequest.Title measurementBody := response.Body.Value - measurement := GetTitle(measurementBody) + measurement := measurexlite.WebGetTitle(measurementBody) if measurement == "" { return } diff --git a/internal/experiment/webconnectivity/analysishttpdiff.go b/internal/experiment/webconnectivity/analysishttpdiff.go index 1773cac..2eb3916 100644 --- a/internal/experiment/webconnectivity/analysishttpdiff.go +++ b/internal/experiment/webconnectivity/analysishttpdiff.go @@ -10,6 +10,7 @@ import ( "strings" "github.com/ooni/probe-cli/v3/internal/engine/experiment/webconnectivity" + "github.com/ooni/probe-cli/v3/internal/measurexlite" "github.com/ooni/probe-cli/v3/internal/model" "github.com/ooni/probe-cli/v3/internal/runtimex" ) @@ -208,7 +209,7 @@ func (tk *TestKeys) httpDiffTitleMatch( } control := ctrl.Title measurementBody := response.Body.Value - measurement := webconnectivity.GetTitle(measurementBody) + measurement := measurexlite.WebGetTitle(measurementBody) if control == "" || measurement == "" { return } diff --git a/internal/measurexlite/web.go b/internal/measurexlite/web.go new file mode 100644 index 0000000..adc03da --- /dev/null +++ b/internal/measurexlite/web.go @@ -0,0 +1,19 @@ +package measurexlite + +// +// Code to process web results (e.g., from web connectivity) +// + +import "regexp" + +// WebGetTitle returns the title or an empty string. +func WebGetTitle(measurementBody string) string { + // MK used {1,128} but we're making it larger here to get longer titles + // e.g. 's one + re := regexp.MustCompile(`(?i)([^<]{1,512})`) + v := re.FindStringSubmatch(measurementBody) + if len(v) < 2 { + return "" + } + return v[1] +} diff --git a/internal/measurexlite/web_test.go b/internal/measurexlite/web_test.go new file mode 100644 index 0000000..115ae34 --- /dev/null +++ b/internal/measurexlite/web_test.go @@ -0,0 +1,63 @@ +package measurexlite + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/ooni/probe-cli/v3/internal/randx" +) + +func TestWebGetTitle(t *testing.T) { + type args struct { + body string + } + tests := []struct { + name string + args args + wantOut string + }{{ + name: "with empty input", + args: args{ + body: "", + }, + wantOut: "", + }, { + name: "with body containing no titles", + args: args{ + body: "", + }, + wantOut: "", + }, { + name: "success with UTF-7 body", + args: args{ + body: "La community di MSN", + }, + wantOut: "La community di MSN", + }, { + name: "success with UTF-8 body", + args: args{ + body: "La comunità di MSN", + }, + wantOut: "La comunità di MSN", + }, { + name: "when the title is too long", + args: args{ + body: "" + randx.Letters(1024) + "", + }, + wantOut: "", + }, { + name: "success with case variations", + args: args{ + body: "La commUNity di MSN", + }, + wantOut: "La commUNity di MSN", + }} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotOut := WebGetTitle(tt.args.body) + if diff := cmp.Diff(tt.wantOut, gotOut); diff != "" { + t.Fatal(diff) + } + }) + } +}