From a3a27b1ebfc29c70282ba94fc73009686cc8ab8d Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Sun, 5 Sep 2021 18:50:05 +0200 Subject: [PATCH] fix(netxlite): make default resolver converge faster (#456) * fix(netxlite): make default resolver converge faster Closes https://github.com/ooni/probe/issues/1726 * Update internal/netxlite/resolver.go * fix(ndt7): adapt tests after previous change Because now we're running the DNS resolution inside a goroutine with a child context, the returned error string is different. The previous error said we canceled the whole dialing operation, while now we see directly that the context was canceled. --- internal/engine/experiment/ndt7/dial_test.go | 8 +-- internal/engine/experiment/ndt7/ndt7_test.go | 16 ++--- internal/netxlite/resolver.go | 43 +++++++++++- internal/netxlite/resolver_test.go | 74 +++++++++++++++++++- 4 files changed, 125 insertions(+), 16 deletions(-) diff --git a/internal/engine/experiment/ndt7/dial_test.go b/internal/engine/experiment/ndt7/dial_test.go index c809953..56f3629 100644 --- a/internal/engine/experiment/ndt7/dial_test.go +++ b/internal/engine/experiment/ndt7/dial_test.go @@ -18,8 +18,8 @@ func TestDialDownloadWithCancelledContext(t *testing.T) { cancel() // immediately halt mgr := newDialManager("wss://hostname.fake", log.Log, "miniooni/0.1.0-dev") conn, err := mgr.dialDownload(ctx) - if err == nil || !strings.HasSuffix(err.Error(), "operation was canceled") { - t.Fatal("not the error we expected") + if err == nil || !strings.HasSuffix(err.Error(), "context canceled") { + t.Fatal("not the error we expected", err) } if conn != nil { t.Fatal("expected nil conn here") @@ -31,8 +31,8 @@ func TestDialUploadWithCancelledContext(t *testing.T) { cancel() // immediately halt mgr := newDialManager("wss://hostname.fake", log.Log, "miniooni/0.1.0-dev") conn, err := mgr.dialUpload(ctx) - if err == nil || !strings.HasSuffix(err.Error(), "operation was canceled") { - t.Fatal("not the error we expected") + if err == nil || !strings.HasSuffix(err.Error(), "context canceled") { + t.Fatal("not the error we expected", err) } if conn != nil { t.Fatal("expected nil conn here") diff --git a/internal/engine/experiment/ndt7/ndt7_test.go b/internal/engine/experiment/ndt7/ndt7_test.go index 232875a..81dc06b 100644 --- a/internal/engine/experiment/ndt7/ndt7_test.go +++ b/internal/engine/experiment/ndt7/ndt7_test.go @@ -52,8 +52,8 @@ func TestDoDownloadWithCancelledContext(t *testing.T) { err := m.doDownload( ctx, sess, model.NewPrinterCallbacks(log.Log), new(TestKeys), "ws://host.name") - if err == nil || !strings.HasSuffix(err.Error(), "operation was canceled") { - t.Fatal("not the error we expected") + if err == nil || !strings.HasSuffix(err.Error(), "context canceled") { + t.Fatal("not the error we expected", err) } } @@ -69,8 +69,8 @@ func TestDoUploadWithCancelledContext(t *testing.T) { err := m.doUpload( ctx, sess, model.NewPrinterCallbacks(log.Log), new(TestKeys), "ws://host.name") - if err == nil || !strings.HasSuffix(err.Error(), "operation was canceled") { - t.Fatal("not the error we expected") + if err == nil || !strings.HasSuffix(err.Error(), "context canceled") { + t.Fatal("not the error we expected", err) } } @@ -132,8 +132,8 @@ func TestFailDownload(t *testing.T) { new(model.Measurement), model.NewPrinterCallbacks(log.Log), ) - if err == nil || !strings.HasSuffix(err.Error(), "operation was canceled") { - t.Fatal(err) + if err == nil || !strings.HasSuffix(err.Error(), "context canceled") { + t.Fatal("not the error we expected", err) } } @@ -153,8 +153,8 @@ func TestFailUpload(t *testing.T) { new(model.Measurement), model.NewPrinterCallbacks(log.Log), ) - if err == nil || !strings.HasSuffix(err.Error(), "operation was canceled") { - t.Fatal(err) + if err == nil || !strings.HasSuffix(err.Error(), "context canceled") { + t.Fatal("not the error we expected", err) } } diff --git a/internal/netxlite/resolver.go b/internal/netxlite/resolver.go index cd79a92..f81dc5d 100644 --- a/internal/netxlite/resolver.go +++ b/internal/netxlite/resolver.go @@ -40,13 +40,52 @@ func NewResolver(config *ResolverConfig) Resolver { } // resolverSystem is the system resolver. -type resolverSystem struct{} +type resolverSystem struct { + testableTimeout time.Duration + testableLookupHost func(ctx context.Context, domain string) ([]string, error) +} var _ Resolver = &resolverSystem{} // LookupHost implements Resolver.LookupHost. func (r *resolverSystem) LookupHost(ctx context.Context, hostname string) ([]string, error) { - return net.DefaultResolver.LookupHost(ctx, hostname) + // This code forces adding a shorter timeout to the domain name + // resolutions when using the system resolver. We have seen cases + // in which such a timeout becomes too large. One such case is + // described in https://github.com/ooni/probe/issues/1726. + addrsch, errch := make(chan []string, 1), make(chan error, 1) + ctx, cancel := context.WithTimeout(ctx, r.timeout()) + defer cancel() + go func() { + addrs, err := r.lookupHost()(ctx, hostname) + if err != nil { + errch <- err + return + } + addrsch <- addrs + }() + select { + case <-ctx.Done(): + return nil, ctx.Err() + case addrs := <-addrsch: + return addrs, nil + case err := <-errch: + return nil, err + } +} + +func (r *resolverSystem) timeout() time.Duration { + if r.testableTimeout > 0 { + return r.testableTimeout + } + return 15 * time.Second +} + +func (r *resolverSystem) lookupHost() func(ctx context.Context, domain string) ([]string, error) { + if r.testableLookupHost != nil { + return r.testableLookupHost + } + return net.DefaultResolver.LookupHost } // Network implements Resolver.Network. diff --git a/internal/netxlite/resolver_test.go b/internal/netxlite/resolver_test.go index 601350a..274cdb1 100644 --- a/internal/netxlite/resolver_test.go +++ b/internal/netxlite/resolver_test.go @@ -4,7 +4,9 @@ import ( "context" "errors" "strings" + "sync" "testing" + "time" "github.com/apex/log" "github.com/google/go-cmp/cmp" @@ -12,7 +14,7 @@ import ( ) func TestResolverSystemNetworkAddress(t *testing.T) { - r := resolverSystem{} + r := &resolverSystem{} if r.Network() != "system" { t.Fatal("invalid Network") } @@ -22,7 +24,8 @@ func TestResolverSystemNetworkAddress(t *testing.T) { } func TestResolverSystemWorksAsIntended(t *testing.T) { - r := resolverSystem{} + r := &resolverSystem{} + defer r.CloseIdleConnections() addrs, err := r.LookupHost(context.Background(), "dns.google.com") if err != nil { t.Fatal(err) @@ -32,6 +35,73 @@ func TestResolverSystemWorksAsIntended(t *testing.T) { } } +func TestResolverSystemDefaultTimeout(t *testing.T) { + r := &resolverSystem{} + if r.timeout() != 15*time.Second { + t.Fatal("unexpected default timeout") + } +} + +func TestResolverSystemWithTimeoutAndSuccess(t *testing.T) { + wg := &sync.WaitGroup{} + wg.Add(1) + r := &resolverSystem{ + testableTimeout: 1 * time.Microsecond, + testableLookupHost: func(ctx context.Context, domain string) ([]string, error) { + defer wg.Done() + time.Sleep(1 * time.Millisecond) + return []string{"8.8.8.8"}, nil + }, + } + ctx := context.Background() + addrs, err := r.LookupHost(ctx, "example.antani") + if !errors.Is(err, context.DeadlineExceeded) { + t.Fatal("not the error we expected", err) + } + if addrs != nil { + t.Fatal("invalid addrs") + } + wg.Wait() +} + +func TestResolverSystemWithTimeoutAndFailure(t *testing.T) { + wg := &sync.WaitGroup{} + wg.Add(1) + r := &resolverSystem{ + testableTimeout: 1 * time.Microsecond, + testableLookupHost: func(ctx context.Context, domain string) ([]string, error) { + defer wg.Done() + time.Sleep(1 * time.Millisecond) + return nil, errors.New("no such host") + }, + } + ctx := context.Background() + addrs, err := r.LookupHost(ctx, "example.antani") + if !errors.Is(err, context.DeadlineExceeded) { + t.Fatal("not the error we expected", err) + } + if addrs != nil { + t.Fatal("invalid addrs") + } + wg.Wait() +} + +func TestResolverSystemWithNXDOMAIN(t *testing.T) { + r := &resolverSystem{ + testableLookupHost: func(ctx context.Context, domain string) ([]string, error) { + return nil, errors.New("no such host") + }, + } + ctx := context.Background() + addrs, err := r.LookupHost(ctx, "example.antani") + if err == nil || !strings.HasSuffix(err.Error(), "no such host") { + t.Fatal("not the error we expected", err) + } + if addrs != nil { + t.Fatal("invalid addrs") + } +} + func TestResolverLoggerWithSuccess(t *testing.T) { expected := []string{"1.1.1.1"} r := resolverLogger{