From 3b24b1196d8c2e04a2df78ce67ed4bb5d09dd955 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Mon, 5 Sep 2022 13:33:59 +0200 Subject: [PATCH] fix(webconnectivity@v0.5): fetch HTTP only using system-resolver addrs (#935) While there, change the emoji logger to emit whitespace on info logs. This makes warnings stand out even more. Closes https://github.com/ooni/probe/issues/2258 --- .../experiment/webconnectivity/control.go | 18 +++- .../experiment/webconnectivity/dnscache.go | 28 ++++++- .../webconnectivity/dnsresolvers.go | 83 ++++++++++++------- .../experiment/webconnectivity/measurer.go | 2 +- internal/logx/logx.go | 2 +- internal/logx/logx_test.go | 2 +- 6 files changed, 92 insertions(+), 43 deletions(-) diff --git a/internal/experiment/webconnectivity/control.go b/internal/experiment/webconnectivity/control.go index 33eae4c..994fcbc 100644 --- a/internal/experiment/webconnectivity/control.go +++ b/internal/experiment/webconnectivity/control.go @@ -23,11 +23,11 @@ type EndpointMeasurementsStarter interface { // nonblocking read fails. Hence, you must create a [sema] channel with buffer equal // to N and N elements inside it to allow N flows to perform HTTP measurements. Passing // a nil [sema] causes no flow to attempt HTTP measurements. - startCleartextFlowsWithSema(ctx context.Context, sema <-chan any, addresses []string) + startCleartextFlowsWithSema(ctx context.Context, sema <-chan any, addresses []DNSEntry) // startSecureFlowsWithSema starts a TCP+TLS measurement flow for each IP addr. See // the docs of startCleartextFlowsWithSema for more info on the [sema] arg. - startSecureFlowsWithSema(ctx context.Context, sema <-chan any, addresses []string) + startSecureFlowsWithSema(ctx context.Context, sema <-chan any, addresses []DNSEntry) } // Control issues a Control request and saves the results @@ -157,12 +157,22 @@ func (c *Control) maybeStartExtraMeasurements(ctx context.Context, thAddrs []str } // obtain the TH-only addresses - var thOnly []string + var thOnlyAddrs []string for addr, flags := range mapping { if (flags & inProbe) != 0 { continue // discovered by the probe => already tested } - thOnly = append(thOnly, addr) + thOnlyAddrs = append(thOnlyAddrs, addr) + } + + c.Logger.Infof("measuring additional addrs from TH: %+v", thOnlyAddrs) + + var thOnly []DNSEntry + for _, addr := range thOnlyAddrs { + thOnly = append(thOnly, DNSEntry{ + Addr: addr, + Flags: 0, // neither system, nor udp, nor doh + }) } // Start extra measurements for TH-only addresses. Because we already diff --git a/internal/experiment/webconnectivity/dnscache.go b/internal/experiment/webconnectivity/dnscache.go index 7b62c26..18dce1f 100644 --- a/internal/experiment/webconnectivity/dnscache.go +++ b/internal/experiment/webconnectivity/dnscache.go @@ -2,6 +2,26 @@ package webconnectivity import "sync" +// DNSEntry is an entry in the DNS cache. +type DNSEntry struct { + // Addr is the cached address + Addr string + + // Flags contains flags + Flags int64 +} + +const ( + // DNSAddrFlagSystemResolver means we discovered this addr using the system resolver. + DNSAddrFlagSystemResolver = 1 << iota + + // DNSAddrFlagUDP means we discovered this addr using the UDP resolver. + DNSAddrFlagUDP + + // DNSAddrFlagHTTPS means we discovered this addr using the DNS-over-HTTPS resolver. + DNSAddrFlagHTTPS +) + // DNSCache wraps a model.Resolver to provide DNS caching. // // The zero value is invalid; please, use NewDNSCache to construct. @@ -10,11 +30,11 @@ type DNSCache struct { mu *sync.Mutex // values contains already resolved values. - values map[string][]string + values map[string][]DNSEntry } // Get gets values from the cache -func (c *DNSCache) Get(domain string) ([]string, bool) { +func (c *DNSCache) Get(domain string) ([]DNSEntry, bool) { c.mu.Lock() values, found := c.values[domain] c.mu.Unlock() @@ -22,7 +42,7 @@ func (c *DNSCache) Get(domain string) ([]string, bool) { } // Set inserts into the cache -func (c *DNSCache) Set(domain string, values []string) { +func (c *DNSCache) Set(domain string, values []DNSEntry) { c.mu.Lock() c.values[domain] = values c.mu.Unlock() @@ -32,6 +52,6 @@ func (c *DNSCache) Set(domain string, values []string) { func NewDNSCache() *DNSCache { return &DNSCache{ mu: &sync.Mutex{}, - values: map[string][]string{}, + values: map[string][]DNSEntry{}, } } diff --git a/internal/experiment/webconnectivity/dnsresolvers.go b/internal/experiment/webconnectivity/dnsresolvers.go index 4627f1d..58a8871 100644 --- a/internal/experiment/webconnectivity/dnsresolvers.go +++ b/internal/experiment/webconnectivity/dnsresolvers.go @@ -82,7 +82,7 @@ func (t *DNSResolvers) Start(ctx context.Context) { } // run performs a DNS lookup and returns the looked up addrs -func (t *DNSResolvers) run(parentCtx context.Context) []string { +func (t *DNSResolvers) run(parentCtx context.Context) []DNSEntry { // create output channels for the lookup systemOut := make(chan []string) udpOut := make(chan []string) @@ -117,39 +117,42 @@ func (t *DNSResolvers) run(parentCtx context.Context) []string { }) // merge the resolved IP addresses - merged := map[string]bool{} + merged := map[string]*DNSEntry{} for _, addr := range systemAddrs { - merged[addr] = true + if _, found := merged[addr]; !found { + merged[addr] = &DNSEntry{} + } + merged[addr].Addr = addr + merged[addr].Flags |= DNSAddrFlagSystemResolver } for _, addr := range udpAddrs { - merged[addr] = true + if _, found := merged[addr]; !found { + merged[addr] = &DNSEntry{} + } + merged[addr].Addr = addr + merged[addr].Flags |= DNSAddrFlagUDP } for _, addr := range httpsAddrs { - merged[addr] = true - } - - // rearrange addresses to have IPv4 first - sorted := []string{} - for addr := range merged { - if v6, err := netxlite.IsIPv6(addr); err == nil && !v6 { - sorted = append(sorted, addr) + if _, found := merged[addr]; !found { + merged[addr] = &DNSEntry{} } + merged[addr].Addr = addr + merged[addr].Flags |= DNSAddrFlagHTTPS } - for addr := range merged { - if v6, err := netxlite.IsIPv6(addr); err == nil && v6 { - sorted = append(sorted, addr) - } + // implementation note: we don't remove bogons because accessing + // them can lead us to discover block pages + var entries []DNSEntry + for _, entry := range merged { + entries = append(entries, *entry) } - // TODO(bassosimone): remove bogons - - return sorted + return entries } // Run runs this task in the current goroutine. func (t *DNSResolvers) Run(parentCtx context.Context) { var ( - addresses []string + addresses []DNSEntry found bool ) @@ -162,9 +165,11 @@ func (t *DNSResolvers) Run(parentCtx context.Context) { // insert the addresses we just looked us into the cache t.DNSCache.Set(t.Domain, addresses) - } - log.Infof("using resolved addrs: %+v", addresses) + log.Infof("using resolved addrs: %+v", addresses) + } else { + log.Infof("using previously-cached addrs: %+v", addresses) + } // fan out a number of child async tasks to use the IP addrs t.startCleartextFlows(parentCtx, addresses) @@ -409,14 +414,14 @@ func (t *DNSResolvers) dohSplitQueries( } // startCleartextFlows starts a TCP measurement flow for each IP addr. -func (t *DNSResolvers) startCleartextFlows(ctx context.Context, addresses []string) { +func (t *DNSResolvers) startCleartextFlows(ctx context.Context, addresses []DNSEntry) { sema := make(chan any, 1) sema <- true // allow a single flow to fetch the HTTP body t.startCleartextFlowsWithSema(ctx, sema, addresses) } // startCleartextFlowsWithSema implements EndpointMeasurementsStarter. -func (t *DNSResolvers) startCleartextFlowsWithSema(ctx context.Context, sema <-chan any, addresses []string) { +func (t *DNSResolvers) startCleartextFlowsWithSema(ctx context.Context, sema <-chan any, addresses []DNSEntry) { if t.URL.Scheme != "http" { // Do not bother with measuring HTTP when the user // has asked us to measure an HTTPS URL. @@ -427,12 +432,16 @@ func (t *DNSResolvers) startCleartextFlowsWithSema(ctx context.Context, sema <-c port = urlPort } for _, addr := range addresses { + maybeNilSema := sema + if (addr.Flags & DNSAddrFlagSystemResolver) == 0 { + maybeNilSema = nil // see https://github.com/ooni/probe/issues/2258 + } task := &CleartextFlow{ - Address: net.JoinHostPort(addr, port), + Address: net.JoinHostPort(addr.Addr, port), DNSCache: t.DNSCache, IDGenerator: t.IDGenerator, Logger: t.Logger, - Sema: sema, + Sema: maybeNilSema, TestKeys: t.TestKeys, ZeroTime: t.ZeroTime, WaitGroup: t.WaitGroup, @@ -449,7 +458,7 @@ func (t *DNSResolvers) startCleartextFlowsWithSema(ctx context.Context, sema <-c } // startSecureFlows starts a TCP+TLS measurement flow for each IP addr. -func (t *DNSResolvers) startSecureFlows(ctx context.Context, addresses []string) { +func (t *DNSResolvers) startSecureFlows(ctx context.Context, addresses []DNSEntry) { sema := make(chan any, 1) if t.URL.Scheme == "https" { // Allows just a single worker to fetch the response body but do that @@ -461,7 +470,7 @@ func (t *DNSResolvers) startSecureFlows(ctx context.Context, addresses []string) } // startSecureFlowsWithSema implements EndpointMeasurementsStarter. -func (t *DNSResolvers) startSecureFlowsWithSema(ctx context.Context, sema <-chan any, addresses []string) { +func (t *DNSResolvers) startSecureFlowsWithSema(ctx context.Context, sema <-chan any, addresses []DNSEntry) { port := "443" if urlPort := t.URL.Port(); urlPort != "" { if t.URL.Scheme != "https" { @@ -472,12 +481,16 @@ func (t *DNSResolvers) startSecureFlowsWithSema(ctx context.Context, sema <-chan port = urlPort } for _, addr := range addresses { + maybeNilSema := sema + if (addr.Flags & DNSAddrFlagSystemResolver) == 0 { + maybeNilSema = nil // see https://github.com/ooni/probe/issues/2258 + } task := &SecureFlow{ - Address: net.JoinHostPort(addr, port), + Address: net.JoinHostPort(addr.Addr, port), DNSCache: t.DNSCache, IDGenerator: t.IDGenerator, Logger: t.Logger, - Sema: sema, + Sema: maybeNilSema, TestKeys: t.TestKeys, ZeroTime: t.ZeroTime, WaitGroup: t.WaitGroup, @@ -496,10 +509,16 @@ func (t *DNSResolvers) startSecureFlowsWithSema(ctx context.Context, sema <-chan } // maybeStartControlFlow starts the control flow iff .Session and .THAddr are set. -func (t *DNSResolvers) maybeStartControlFlow(ctx context.Context, addresses []string) { +func (t *DNSResolvers) maybeStartControlFlow(ctx context.Context, addresses []DNSEntry) { + // note: for subsequent requests we don't set .Session and .THAddr hence + // we are not going to query the test helper more than once if t.Session != nil && t.THAddr != "" { + var addrs []string + for _, addr := range addresses { + addrs = append(addrs, addr.Addr) + } ctrl := &Control{ - Addresses: addresses, + Addresses: addrs, ExtraMeasurementsStarter: t, // allows starting follow-up measurement flows Logger: t.Logger, TestKeys: t.TestKeys, diff --git a/internal/experiment/webconnectivity/measurer.go b/internal/experiment/webconnectivity/measurer.go index e5b4e96..41eef70 100644 --- a/internal/experiment/webconnectivity/measurer.go +++ b/internal/experiment/webconnectivity/measurer.go @@ -36,7 +36,7 @@ func (m *Measurer) ExperimentName() string { // ExperimentVersion implements model.ExperimentMeasurer. func (m *Measurer) ExperimentVersion() string { - return "0.5.3" + return "0.5.4" } // Run implements model.ExperimentMeasurer. diff --git a/internal/logx/logx.go b/internal/logx/logx.go index 55bf841..c4123db 100644 --- a/internal/logx/logx.go +++ b/internal/logx/logx.go @@ -45,7 +45,7 @@ func (h *Handler) HandleLog(e *log.Entry) (err error) { case log.DebugLevel: level = "🧐" case log.InfoLevel: - level = "🗒️" + level = " " case log.WarnLevel: level = "🔥" default: diff --git a/internal/logx/logx_test.go b/internal/logx/logx_test.go index 4e99aeb..3f7d532 100644 --- a/internal/logx/logx_test.go +++ b/internal/logx/logx_test.go @@ -74,7 +74,7 @@ func TestLogHandlerHandleLog(t *testing.T) { Name: "info level with emoji", Emoji: true, Level: log.InfoLevel, - ExpectSeverity: "🗒️", + ExpectSeverity: " ", }, { Name: "warn level with emoji", Emoji: true,