From 299834174a6c1814a5f9c08c5abea76042779839 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Wed, 13 Oct 2021 13:50:22 +0200 Subject: [PATCH] fix(webconnectivity): gather longer HTML titles (#542) Allows us to get http://www.isa.gov.il/Pages/default.aspx's one. Discovered when working on https://github.com/ooni/probe/issues/1707. --- internal/engine/experiment/webconnectivity/httpanalysis.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/internal/engine/experiment/webconnectivity/httpanalysis.go b/internal/engine/experiment/webconnectivity/httpanalysis.go index f81baf1..757acac 100644 --- a/internal/engine/experiment/webconnectivity/httpanalysis.go +++ b/internal/engine/experiment/webconnectivity/httpanalysis.go @@ -186,7 +186,9 @@ func HTTPHeadersMatch(tk urlgetter.TestKeys, ctrl ControlResponse) *bool { // GetTitle returns the title or an empty string. func GetTitle(measurementBody string) string { - re := regexp.MustCompile(`(?i)([^<]{1,128})`) // like MK + // MK used {1,128} but we're making it larger here to get longer titles + // e.g. 's one + re := regexp.MustCompile(`(?i)([^<]{1,512})`) v := re.FindStringSubmatch(measurementBody) if len(v) < 2 { return ""