refactor: move WebGetTitle inside measurexlite (#895)

Part of https://github.com/ooni/probe/issues/2240
This commit is contained in:
Simone Basso
2022-08-28 20:26:40 +02:00
committed by GitHub
parent bb6563f363
commit 7c1b2bbcb0
5 changed files with 88 additions and 17 deletions
@@ -2,11 +2,11 @@ package webconnectivity
import (
"reflect"
"regexp"
"strings"
"github.com/ooni/probe-cli/v3/internal/engine/experiment/urlgetter"
"github.com/ooni/probe-cli/v3/internal/engine/experiment/webconnectivity/internal"
"github.com/ooni/probe-cli/v3/internal/measurexlite"
"github.com/ooni/probe-cli/v3/internal/model"
)
@@ -184,18 +184,6 @@ func HTTPHeadersMatch(tk urlgetter.TestKeys, ctrl ControlResponse) *bool {
return &good
}
// GetTitle returns the title or an empty string.
func GetTitle(measurementBody string) string {
// MK used {1,128} but we're making it larger here to get longer titles
// e.g. <http://www.isa.gov.il/Pages/default.aspx>'s one
re := regexp.MustCompile(`(?i)<title>([^<]{1,512})</title>`)
v := re.FindStringSubmatch(measurementBody)
if len(v) < 2 {
return ""
}
return v[1]
}
// HTTPTitleMatch returns whether the measurement and the control titles
// reasonably match, or nil if not applicable.
func HTTPTitleMatch(tk urlgetter.TestKeys, ctrl ControlResponse) (out *bool) {
@@ -214,7 +202,7 @@ func HTTPTitleMatch(tk urlgetter.TestKeys, ctrl ControlResponse) (out *bool) {
}
control := ctrl.HTTPRequest.Title
measurementBody := response.Body.Value
measurement := GetTitle(measurementBody)
measurement := measurexlite.WebGetTitle(measurementBody)
if measurement == "" {
return
}