fix(webconnectivity): gather longer HTML titles (#542)

Allows us to get http://www.isa.gov.il/Pages/default.aspx's one.

Discovered when working on https://github.com/ooni/probe/issues/1707.
This commit is contained in:
Simone Basso 2021-10-13 13:50:22 +02:00 committed by GitHub
parent 9e365661f0
commit 299834174a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -186,7 +186,9 @@ func HTTPHeadersMatch(tk urlgetter.TestKeys, ctrl ControlResponse) *bool {
// GetTitle returns the title or an empty string.
func GetTitle(measurementBody string) string {
re := regexp.MustCompile(`(?i)<title>([^<]{1,128})</title>`) // like MK
// MK used {1,128} but we're making it larger here to get longer titles
// e.g. <http://www.isa.gov.il/Pages/default.aspx>'s one
re := regexp.MustCompile(`(?i)<title>([^<]{1,512})</title>`)
v := re.FindStringSubmatch(measurementBody)
if len(v) < 2 {
return ""