273b70bacc
## Checklist - [x] I have read the [contribution guidelines](https://github.com/ooni/probe-cli/blob/master/CONTRIBUTING.md) - [x] reference issue for this pull request: https://github.com/ooni/probe/issues/1885 - [x] related ooni/spec pull request: N/A Location of the issue tracker: https://github.com/ooni/probe ## Description This PR contains a set of changes to move important interfaces and data types into the `./internal/model` package. The criteria for including an interface or data type in here is roughly that the type should be important and used by several packages. We are especially interested to move more interfaces here to increase modularity. An additional side effect is that, by reading this package, one should be able to understand more quickly how different parts of the codebase interact with each other. This is what I want to move in `internal/model`: - [x] most important interfaces from `internal/netxlite` - [x] everything that was previously part of `internal/engine/model` - [x] mocks from `internal/netxlite/mocks` should also be moved in here as a subpackage
253 lines
7.5 KiB
Go
253 lines
7.5 KiB
Go
package webconnectivity
|
|
|
|
import (
|
|
"reflect"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/ooni/probe-cli/v3/internal/engine/experiment/urlgetter"
|
|
"github.com/ooni/probe-cli/v3/internal/engine/experiment/webconnectivity/internal"
|
|
"github.com/ooni/probe-cli/v3/internal/model"
|
|
)
|
|
|
|
// HTTPAnalysisResult contains the results of the analysis performed on the
|
|
// client. We obtain it by comparing the measurement and the control.
|
|
type HTTPAnalysisResult struct {
|
|
BodyLengthMatch *bool `json:"body_length_match"`
|
|
BodyProportion float64 `json:"body_proportion"`
|
|
StatusCodeMatch *bool `json:"status_code_match"`
|
|
HeadersMatch *bool `json:"headers_match"`
|
|
TitleMatch *bool `json:"title_match"`
|
|
}
|
|
|
|
// Log logs the results of the analysis
|
|
func (har HTTPAnalysisResult) Log(logger model.Logger) {
|
|
logger.Infof("BodyLengthMatch: %+v", internal.BoolPointerToString(har.BodyLengthMatch))
|
|
logger.Infof("BodyProportion: %+v", har.BodyProportion)
|
|
logger.Infof("StatusCodeMatch: %+v", internal.BoolPointerToString(har.StatusCodeMatch))
|
|
logger.Infof("HeadersMatch: %+v", internal.BoolPointerToString(har.HeadersMatch))
|
|
logger.Infof("TitleMatch: %+v", internal.BoolPointerToString(har.TitleMatch))
|
|
}
|
|
|
|
// HTTPAnalysis performs follow-up analysis on the webconnectivity measurement by
|
|
// comparing the measurement test keys and the control.
|
|
func HTTPAnalysis(tk urlgetter.TestKeys, ctrl ControlResponse) (out HTTPAnalysisResult) {
|
|
out.BodyLengthMatch, out.BodyProportion = HTTPBodyLengthChecks(tk, ctrl)
|
|
out.StatusCodeMatch = HTTPStatusCodeMatch(tk, ctrl)
|
|
out.HeadersMatch = HTTPHeadersMatch(tk, ctrl)
|
|
out.TitleMatch = HTTPTitleMatch(tk, ctrl)
|
|
return
|
|
}
|
|
|
|
// HTTPBodyLengthChecks returns whether the measured body is reasonably
|
|
// long as much as the control body as well as the proportion between
|
|
// the two bodies. This check may return nil, nil when such a
|
|
// comparison would actually not be applicable.
|
|
func HTTPBodyLengthChecks(
|
|
tk urlgetter.TestKeys, ctrl ControlResponse) (match *bool, proportion float64) {
|
|
control := ctrl.HTTPRequest.BodyLength
|
|
if control <= 0 {
|
|
return
|
|
}
|
|
if len(tk.Requests) <= 0 {
|
|
return
|
|
}
|
|
response := tk.Requests[0].Response
|
|
if response.BodyIsTruncated {
|
|
return
|
|
}
|
|
measurement := int64(len(response.Body.Value))
|
|
if measurement <= 0 {
|
|
return
|
|
}
|
|
const bodyProportionFactor = 0.7
|
|
if measurement >= control {
|
|
proportion = float64(control) / float64(measurement)
|
|
} else {
|
|
proportion = float64(measurement) / float64(control)
|
|
}
|
|
v := proportion > bodyProportionFactor
|
|
match = &v
|
|
return
|
|
}
|
|
|
|
// HTTPStatusCodeMatch returns whether the status code of the measurement
|
|
// matches the status code of the control, or nil if such comparison
|
|
// is actually not applicable.
|
|
func HTTPStatusCodeMatch(tk urlgetter.TestKeys, ctrl ControlResponse) (out *bool) {
|
|
control := ctrl.HTTPRequest.StatusCode
|
|
if len(tk.Requests) < 1 {
|
|
return // no real status code
|
|
}
|
|
measurement := tk.Requests[0].Response.Code
|
|
if control <= 0 {
|
|
return // no real status code
|
|
}
|
|
if measurement <= 0 {
|
|
return // no real status code
|
|
}
|
|
value := control == measurement
|
|
if value {
|
|
// if the status codes are equal, they clearly match
|
|
out = &value
|
|
return
|
|
}
|
|
// This fix is part of Web Connectivity in MK and in Python since
|
|
// basically forever; my recollection is that we want to work around
|
|
// cases where the test helper is failing(?!). Unlike previous
|
|
// implementations, this implementation avoids a false positive
|
|
// when both measurement and control statuses are 500.
|
|
if control/100 == 5 {
|
|
return
|
|
}
|
|
out = &value
|
|
return
|
|
}
|
|
|
|
// HTTPHeadersMatch returns whether uncommon headers match between control and
|
|
// measurement, or nil if check is not applicable.
|
|
func HTTPHeadersMatch(tk urlgetter.TestKeys, ctrl ControlResponse) *bool {
|
|
if len(tk.Requests) <= 0 {
|
|
return nil
|
|
}
|
|
if tk.Requests[0].Response.Code <= 0 {
|
|
return nil
|
|
}
|
|
if ctrl.HTTPRequest.StatusCode <= 0 {
|
|
return nil
|
|
}
|
|
control := ctrl.HTTPRequest.Headers
|
|
// Implementation note: using map because we only care about the
|
|
// keys being different and we ignore the values.
|
|
measurement := tk.Requests[0].Response.Headers
|
|
const (
|
|
inMeasurement = 1 << 0
|
|
inControl = 1 << 1
|
|
inBoth = inMeasurement | inControl
|
|
)
|
|
commonHeaders := map[string]bool{
|
|
"date": true,
|
|
"content-type": true,
|
|
"server": true,
|
|
"cache-control": true,
|
|
"vary": true,
|
|
"set-cookie": true,
|
|
"location": true,
|
|
"expires": true,
|
|
"x-powered-by": true,
|
|
"content-encoding": true,
|
|
"last-modified": true,
|
|
"accept-ranges": true,
|
|
"pragma": true,
|
|
"x-frame-options": true,
|
|
"etag": true,
|
|
"x-content-type-options": true,
|
|
"age": true,
|
|
"via": true,
|
|
"p3p": true,
|
|
"x-xss-protection": true,
|
|
"content-language": true,
|
|
"cf-ray": true,
|
|
"strict-transport-security": true,
|
|
"link": true,
|
|
"x-varnish": true,
|
|
}
|
|
matching := make(map[string]int)
|
|
ours := make(map[string]bool)
|
|
for key := range measurement {
|
|
key = strings.ToLower(key)
|
|
if _, ok := commonHeaders[key]; !ok {
|
|
matching[key] |= inMeasurement
|
|
}
|
|
ours[key] = true
|
|
}
|
|
theirs := make(map[string]bool)
|
|
for key := range control {
|
|
key = strings.ToLower(key)
|
|
if _, ok := commonHeaders[key]; !ok {
|
|
matching[key] |= inControl
|
|
}
|
|
theirs[key] = true
|
|
}
|
|
// if they are equal we're done
|
|
if good := reflect.DeepEqual(ours, theirs); good {
|
|
return &good
|
|
}
|
|
// compute the intersection of uncommon headers
|
|
var intersection int
|
|
for _, value := range matching {
|
|
if (value & inBoth) == inBoth {
|
|
intersection++
|
|
}
|
|
}
|
|
good := intersection > 0
|
|
return &good
|
|
}
|
|
|
|
// GetTitle returns the title or an empty string.
|
|
func GetTitle(measurementBody string) string {
|
|
// MK used {1,128} but we're making it larger here to get longer titles
|
|
// e.g. <http://www.isa.gov.il/Pages/default.aspx>'s one
|
|
re := regexp.MustCompile(`(?i)<title>([^<]{1,512})</title>`)
|
|
v := re.FindStringSubmatch(measurementBody)
|
|
if len(v) < 2 {
|
|
return ""
|
|
}
|
|
return v[1]
|
|
}
|
|
|
|
// HTTPTitleMatch returns whether the measurement and the control titles
|
|
// reasonably match, or nil if not applicable.
|
|
func HTTPTitleMatch(tk urlgetter.TestKeys, ctrl ControlResponse) (out *bool) {
|
|
if len(tk.Requests) <= 0 {
|
|
return
|
|
}
|
|
response := tk.Requests[0].Response
|
|
if response.Code <= 0 {
|
|
return
|
|
}
|
|
if response.BodyIsTruncated {
|
|
return
|
|
}
|
|
if ctrl.HTTPRequest.StatusCode <= 0 {
|
|
return
|
|
}
|
|
control := ctrl.HTTPRequest.Title
|
|
measurementBody := response.Body.Value
|
|
measurement := GetTitle(measurementBody)
|
|
if measurement == "" {
|
|
return
|
|
}
|
|
const (
|
|
inMeasurement = 1 << 0
|
|
inControl = 1 << 1
|
|
inBoth = inMeasurement | inControl
|
|
)
|
|
words := make(map[string]int)
|
|
// We don't consider to match words that are shorter than 5
|
|
// characters (5 is the average word length for english)
|
|
//
|
|
// The original implementation considered the word order but
|
|
// considering different languages it seems we could have less
|
|
// false positives by ignoring the word order.
|
|
const minWordLength = 5
|
|
for _, word := range strings.Split(measurement, " ") {
|
|
if len(word) >= minWordLength {
|
|
words[strings.ToLower(word)] |= inMeasurement
|
|
}
|
|
}
|
|
for _, word := range strings.Split(control, " ") {
|
|
if len(word) >= minWordLength {
|
|
words[strings.ToLower(word)] |= inControl
|
|
}
|
|
}
|
|
good := true
|
|
for _, score := range words {
|
|
if (score & inBoth) != inBoth {
|
|
good = false
|
|
break
|
|
}
|
|
}
|
|
return &good
|
|
}
|