refactor: move WebGetTitle inside measurexlite (#895)
Part of https://github.com/ooni/probe/issues/2240
This commit is contained in:
parent
bb6563f363
commit
7c1b2bbcb0
|
@ -12,7 +12,7 @@ import (
|
|||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ooni/probe-cli/v3/internal/engine/experiment/webconnectivity"
|
||||
"github.com/ooni/probe-cli/v3/internal/measurexlite"
|
||||
"github.com/ooni/probe-cli/v3/internal/model"
|
||||
"github.com/ooni/probe-cli/v3/internal/netxlite"
|
||||
"github.com/ooni/probe-cli/v3/internal/tracex"
|
||||
|
@ -100,7 +100,7 @@ func httpDo(ctx context.Context, config *httpConfig) {
|
|||
Failure: httpMapFailure(err),
|
||||
StatusCode: int64(resp.StatusCode),
|
||||
Headers: headers,
|
||||
Title: webconnectivity.GetTitle(string(data)),
|
||||
Title: measurexlite.WebGetTitle(string(data)),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2,11 +2,11 @@ package webconnectivity
|
|||
|
||||
import (
|
||||
"reflect"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/ooni/probe-cli/v3/internal/engine/experiment/urlgetter"
|
||||
"github.com/ooni/probe-cli/v3/internal/engine/experiment/webconnectivity/internal"
|
||||
"github.com/ooni/probe-cli/v3/internal/measurexlite"
|
||||
"github.com/ooni/probe-cli/v3/internal/model"
|
||||
)
|
||||
|
||||
|
@ -184,18 +184,6 @@ func HTTPHeadersMatch(tk urlgetter.TestKeys, ctrl ControlResponse) *bool {
|
|||
return &good
|
||||
}
|
||||
|
||||
// GetTitle returns the title or an empty string.
|
||||
func GetTitle(measurementBody string) string {
|
||||
// MK used {1,128} but we're making it larger here to get longer titles
|
||||
// e.g. <http://www.isa.gov.il/Pages/default.aspx>'s one
|
||||
re := regexp.MustCompile(`(?i)<title>([^<]{1,512})</title>`)
|
||||
v := re.FindStringSubmatch(measurementBody)
|
||||
if len(v) < 2 {
|
||||
return ""
|
||||
}
|
||||
return v[1]
|
||||
}
|
||||
|
||||
// HTTPTitleMatch returns whether the measurement and the control titles
|
||||
// reasonably match, or nil if not applicable.
|
||||
func HTTPTitleMatch(tk urlgetter.TestKeys, ctrl ControlResponse) (out *bool) {
|
||||
|
@ -214,7 +202,7 @@ func HTTPTitleMatch(tk urlgetter.TestKeys, ctrl ControlResponse) (out *bool) {
|
|||
}
|
||||
control := ctrl.HTTPRequest.Title
|
||||
measurementBody := response.Body.Value
|
||||
measurement := GetTitle(measurementBody)
|
||||
measurement := measurexlite.WebGetTitle(measurementBody)
|
||||
if measurement == "" {
|
||||
return
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
"strings"
|
||||
|
||||
"github.com/ooni/probe-cli/v3/internal/engine/experiment/webconnectivity"
|
||||
"github.com/ooni/probe-cli/v3/internal/measurexlite"
|
||||
"github.com/ooni/probe-cli/v3/internal/model"
|
||||
"github.com/ooni/probe-cli/v3/internal/runtimex"
|
||||
)
|
||||
|
@ -208,7 +209,7 @@ func (tk *TestKeys) httpDiffTitleMatch(
|
|||
}
|
||||
control := ctrl.Title
|
||||
measurementBody := response.Body.Value
|
||||
measurement := webconnectivity.GetTitle(measurementBody)
|
||||
measurement := measurexlite.WebGetTitle(measurementBody)
|
||||
if control == "" || measurement == "" {
|
||||
return
|
||||
}
|
||||
|
|
19
internal/measurexlite/web.go
Normal file
19
internal/measurexlite/web.go
Normal file
|
@ -0,0 +1,19 @@
|
|||
package measurexlite
|
||||
|
||||
//
|
||||
// Code to process web results (e.g., from web connectivity)
|
||||
//
|
||||
|
||||
import "regexp"
|
||||
|
||||
// WebGetTitle returns the title or an empty string.
|
||||
func WebGetTitle(measurementBody string) string {
|
||||
// MK used {1,128} but we're making it larger here to get longer titles
|
||||
// e.g. <http://www.isa.gov.il/Pages/default.aspx>'s one
|
||||
re := regexp.MustCompile(`(?i)<title>([^<]{1,512})</title>`)
|
||||
v := re.FindStringSubmatch(measurementBody)
|
||||
if len(v) < 2 {
|
||||
return ""
|
||||
}
|
||||
return v[1]
|
||||
}
|
63
internal/measurexlite/web_test.go
Normal file
63
internal/measurexlite/web_test.go
Normal file
|
@ -0,0 +1,63 @@
|
|||
package measurexlite
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/ooni/probe-cli/v3/internal/randx"
|
||||
)
|
||||
|
||||
func TestWebGetTitle(t *testing.T) {
|
||||
type args struct {
|
||||
body string
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
wantOut string
|
||||
}{{
|
||||
name: "with empty input",
|
||||
args: args{
|
||||
body: "",
|
||||
},
|
||||
wantOut: "",
|
||||
}, {
|
||||
name: "with body containing no titles",
|
||||
args: args{
|
||||
body: "<HTML/>",
|
||||
},
|
||||
wantOut: "",
|
||||
}, {
|
||||
name: "success with UTF-7 body",
|
||||
args: args{
|
||||
body: "<HTML><TITLE>La community di MSN</TITLE></HTML>",
|
||||
},
|
||||
wantOut: "La community di MSN",
|
||||
}, {
|
||||
name: "success with UTF-8 body",
|
||||
args: args{
|
||||
body: "<HTML><TITLE>La comunità di MSN</TITLE></HTML>",
|
||||
},
|
||||
wantOut: "La comunità di MSN",
|
||||
}, {
|
||||
name: "when the title is too long",
|
||||
args: args{
|
||||
body: "<HTML><TITLE>" + randx.Letters(1024) + "</TITLE></HTML>",
|
||||
},
|
||||
wantOut: "",
|
||||
}, {
|
||||
name: "success with case variations",
|
||||
args: args{
|
||||
body: "<HTML><TiTLe>La commUNity di MSN</tITLE></HTML>",
|
||||
},
|
||||
wantOut: "La commUNity di MSN",
|
||||
}}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
gotOut := WebGetTitle(tt.args.body)
|
||||
if diff := cmp.Diff(tt.wantOut, gotOut); diff != "" {
|
||||
t.Fatal(diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user