fix(netxlite): make default resolver converge faster (#456)

* fix(netxlite): make default resolver converge faster

Closes https://github.com/ooni/probe/issues/1726

* Update internal/netxlite/resolver.go

* fix(ndt7): adapt tests after previous change

Because now we're running the DNS resolution inside a goroutine
with a child context, the returned error string is different.

The previous error said we canceled the whole dialing operation,
while now we see directly that the context was canceled.
This commit is contained in:
Simone Basso 2021-09-05 18:50:05 +02:00 committed by GitHub
parent a3654f60b7
commit a3a27b1ebf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 125 additions and 16 deletions

View File

@ -18,8 +18,8 @@ func TestDialDownloadWithCancelledContext(t *testing.T) {
cancel() // immediately halt
mgr := newDialManager("wss://hostname.fake", log.Log, "miniooni/0.1.0-dev")
conn, err := mgr.dialDownload(ctx)
if err == nil || !strings.HasSuffix(err.Error(), "operation was canceled") {
t.Fatal("not the error we expected")
if err == nil || !strings.HasSuffix(err.Error(), "context canceled") {
t.Fatal("not the error we expected", err)
}
if conn != nil {
t.Fatal("expected nil conn here")
@ -31,8 +31,8 @@ func TestDialUploadWithCancelledContext(t *testing.T) {
cancel() // immediately halt
mgr := newDialManager("wss://hostname.fake", log.Log, "miniooni/0.1.0-dev")
conn, err := mgr.dialUpload(ctx)
if err == nil || !strings.HasSuffix(err.Error(), "operation was canceled") {
t.Fatal("not the error we expected")
if err == nil || !strings.HasSuffix(err.Error(), "context canceled") {
t.Fatal("not the error we expected", err)
}
if conn != nil {
t.Fatal("expected nil conn here")

View File

@ -52,8 +52,8 @@ func TestDoDownloadWithCancelledContext(t *testing.T) {
err := m.doDownload(
ctx, sess, model.NewPrinterCallbacks(log.Log), new(TestKeys),
"ws://host.name")
if err == nil || !strings.HasSuffix(err.Error(), "operation was canceled") {
t.Fatal("not the error we expected")
if err == nil || !strings.HasSuffix(err.Error(), "context canceled") {
t.Fatal("not the error we expected", err)
}
}
@ -69,8 +69,8 @@ func TestDoUploadWithCancelledContext(t *testing.T) {
err := m.doUpload(
ctx, sess, model.NewPrinterCallbacks(log.Log), new(TestKeys),
"ws://host.name")
if err == nil || !strings.HasSuffix(err.Error(), "operation was canceled") {
t.Fatal("not the error we expected")
if err == nil || !strings.HasSuffix(err.Error(), "context canceled") {
t.Fatal("not the error we expected", err)
}
}
@ -132,8 +132,8 @@ func TestFailDownload(t *testing.T) {
new(model.Measurement),
model.NewPrinterCallbacks(log.Log),
)
if err == nil || !strings.HasSuffix(err.Error(), "operation was canceled") {
t.Fatal(err)
if err == nil || !strings.HasSuffix(err.Error(), "context canceled") {
t.Fatal("not the error we expected", err)
}
}
@ -153,8 +153,8 @@ func TestFailUpload(t *testing.T) {
new(model.Measurement),
model.NewPrinterCallbacks(log.Log),
)
if err == nil || !strings.HasSuffix(err.Error(), "operation was canceled") {
t.Fatal(err)
if err == nil || !strings.HasSuffix(err.Error(), "context canceled") {
t.Fatal("not the error we expected", err)
}
}

View File

@ -40,13 +40,52 @@ func NewResolver(config *ResolverConfig) Resolver {
}
// resolverSystem is the system resolver.
type resolverSystem struct{}
type resolverSystem struct {
testableTimeout time.Duration
testableLookupHost func(ctx context.Context, domain string) ([]string, error)
}
var _ Resolver = &resolverSystem{}
// LookupHost implements Resolver.LookupHost.
func (r *resolverSystem) LookupHost(ctx context.Context, hostname string) ([]string, error) {
return net.DefaultResolver.LookupHost(ctx, hostname)
// This code forces adding a shorter timeout to the domain name
// resolutions when using the system resolver. We have seen cases
// in which such a timeout becomes too large. One such case is
// described in https://github.com/ooni/probe/issues/1726.
addrsch, errch := make(chan []string, 1), make(chan error, 1)
ctx, cancel := context.WithTimeout(ctx, r.timeout())
defer cancel()
go func() {
addrs, err := r.lookupHost()(ctx, hostname)
if err != nil {
errch <- err
return
}
addrsch <- addrs
}()
select {
case <-ctx.Done():
return nil, ctx.Err()
case addrs := <-addrsch:
return addrs, nil
case err := <-errch:
return nil, err
}
}
func (r *resolverSystem) timeout() time.Duration {
if r.testableTimeout > 0 {
return r.testableTimeout
}
return 15 * time.Second
}
func (r *resolverSystem) lookupHost() func(ctx context.Context, domain string) ([]string, error) {
if r.testableLookupHost != nil {
return r.testableLookupHost
}
return net.DefaultResolver.LookupHost
}
// Network implements Resolver.Network.

View File

@ -4,7 +4,9 @@ import (
"context"
"errors"
"strings"
"sync"
"testing"
"time"
"github.com/apex/log"
"github.com/google/go-cmp/cmp"
@ -12,7 +14,7 @@ import (
)
func TestResolverSystemNetworkAddress(t *testing.T) {
r := resolverSystem{}
r := &resolverSystem{}
if r.Network() != "system" {
t.Fatal("invalid Network")
}
@ -22,7 +24,8 @@ func TestResolverSystemNetworkAddress(t *testing.T) {
}
func TestResolverSystemWorksAsIntended(t *testing.T) {
r := resolverSystem{}
r := &resolverSystem{}
defer r.CloseIdleConnections()
addrs, err := r.LookupHost(context.Background(), "dns.google.com")
if err != nil {
t.Fatal(err)
@ -32,6 +35,73 @@ func TestResolverSystemWorksAsIntended(t *testing.T) {
}
}
func TestResolverSystemDefaultTimeout(t *testing.T) {
r := &resolverSystem{}
if r.timeout() != 15*time.Second {
t.Fatal("unexpected default timeout")
}
}
func TestResolverSystemWithTimeoutAndSuccess(t *testing.T) {
wg := &sync.WaitGroup{}
wg.Add(1)
r := &resolverSystem{
testableTimeout: 1 * time.Microsecond,
testableLookupHost: func(ctx context.Context, domain string) ([]string, error) {
defer wg.Done()
time.Sleep(1 * time.Millisecond)
return []string{"8.8.8.8"}, nil
},
}
ctx := context.Background()
addrs, err := r.LookupHost(ctx, "example.antani")
if !errors.Is(err, context.DeadlineExceeded) {
t.Fatal("not the error we expected", err)
}
if addrs != nil {
t.Fatal("invalid addrs")
}
wg.Wait()
}
func TestResolverSystemWithTimeoutAndFailure(t *testing.T) {
wg := &sync.WaitGroup{}
wg.Add(1)
r := &resolverSystem{
testableTimeout: 1 * time.Microsecond,
testableLookupHost: func(ctx context.Context, domain string) ([]string, error) {
defer wg.Done()
time.Sleep(1 * time.Millisecond)
return nil, errors.New("no such host")
},
}
ctx := context.Background()
addrs, err := r.LookupHost(ctx, "example.antani")
if !errors.Is(err, context.DeadlineExceeded) {
t.Fatal("not the error we expected", err)
}
if addrs != nil {
t.Fatal("invalid addrs")
}
wg.Wait()
}
func TestResolverSystemWithNXDOMAIN(t *testing.T) {
r := &resolverSystem{
testableLookupHost: func(ctx context.Context, domain string) ([]string, error) {
return nil, errors.New("no such host")
},
}
ctx := context.Background()
addrs, err := r.LookupHost(ctx, "example.antani")
if err == nil || !strings.HasSuffix(err.Error(), "no such host") {
t.Fatal("not the error we expected", err)
}
if addrs != nil {
t.Fatal("invalid addrs")
}
}
func TestResolverLoggerWithSuccess(t *testing.T) {
expected := []string{"1.1.1.1"}
r := resolverLogger{