From cf6dbe48e028826ab493794ef7d7d32a4bee3f14 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Sat, 28 May 2022 15:10:30 +0200 Subject: [PATCH] netxlite: call getaddrinfo and handle platform-specific oddities (#764) This commit changes our system resolver to call getaddrinfo directly when CGO is enabled. This change allows us to: 1. obtain the CNAME easily 2. obtain the real getaddrinfo retval 3. handle platform specific oddities such as `EAI_NODATA` returned on Android devices See https://github.com/ooni/probe/issues/2029 and https://github.com/ooni/probe/issues/2029#issuecomment-1140258729 in particular. See https://github.com/ooni/probe/issues/2033 for documentation regarding the desire to see `getaddrinfo`'s retval. See https://github.com/ooni/probe/issues/2118 for possible follow-up changes. --- .github/workflows/netxlite.yml | 5 +- .../experiment/sniblocking/sniblocking.go | 2 +- .../sniblocking/sniblocking_test.go | 12 +- .../experiment/webconnectivity/dnsanalysis.go | 8 +- .../webconnectivity/dnsanalysis_test.go | 21 +- .../experiment/webconnectivity/summary.go | 9 +- .../webconnectivity/summary_test.go | 20 +- internal/netxlite/certifi.go | 2 +- internal/netxlite/classify.go | 25 +- internal/netxlite/classify_test.go | 6 + internal/netxlite/doc.go | 17 ++ internal/netxlite/errno.go | 4 +- internal/netxlite/errno_darwin.go | 2 +- internal/netxlite/errno_darwin_test.go | 2 +- internal/netxlite/errno_freebsd.go | 2 +- internal/netxlite/errno_freebsd_test.go | 2 +- internal/netxlite/errno_linux.go | 2 +- internal/netxlite/errno_linux_test.go | 2 +- internal/netxlite/errno_openbsd.go | 2 +- internal/netxlite/errno_openbsd_test.go | 2 +- internal/netxlite/errno_windows.go | 65 ++--- internal/netxlite/errno_windows_test.go | 8 +- internal/netxlite/getaddrinfo.go | 56 +++++ internal/netxlite/getaddrinfo_bsd.go | 58 +++++ internal/netxlite/getaddrinfo_bsd_test.go | 105 ++++++++ internal/netxlite/getaddrinfo_cgo.go | 229 ++++++++++++++++++ internal/netxlite/getaddrinfo_cgo_test.go | 89 +++++++ internal/netxlite/getaddrinfo_linux.go | 161 ++++++++++++ internal/netxlite/getaddrinfo_linux_test.go | 181 ++++++++++++++ internal/netxlite/getaddrinfo_otherwise.go | 12 + internal/netxlite/getaddrinfo_test.go | 80 ++++++ internal/netxlite/getaddrinfo_windows.go | 28 +++ internal/netxlite/getaddrinfo_windows_test.go | 81 +++++++ internal/netxlite/integration_test.go | 6 +- internal/netxlite/internal/generrno/main.go | 5 + internal/netxlite/tproxy.go | 7 +- 36 files changed, 1259 insertions(+), 59 deletions(-) create mode 100644 internal/netxlite/getaddrinfo.go create mode 100644 internal/netxlite/getaddrinfo_bsd.go create mode 100644 internal/netxlite/getaddrinfo_bsd_test.go create mode 100644 internal/netxlite/getaddrinfo_cgo.go create mode 100644 internal/netxlite/getaddrinfo_cgo_test.go create mode 100644 internal/netxlite/getaddrinfo_linux.go create mode 100644 internal/netxlite/getaddrinfo_linux_test.go create mode 100644 internal/netxlite/getaddrinfo_otherwise.go create mode 100644 internal/netxlite/getaddrinfo_test.go create mode 100644 internal/netxlite/getaddrinfo_windows.go create mode 100644 internal/netxlite/getaddrinfo_windows_test.go diff --git a/.github/workflows/netxlite.yml b/.github/workflows/netxlite.yml index ecb3aab..683720b 100644 --- a/.github/workflows/netxlite.yml +++ b/.github/workflows/netxlite.yml @@ -1,6 +1,9 @@ -# netxlite runs unit and integration tests on our fundamental net library +# Runs unit and integration tests for our fundamental networking library. name: netxlite on: + # Because we link libc explicitly for getaddrinfo, we SHOULD run + # these checks for every PR to ensure we still compile. + pull_request: push: branches: - "master" diff --git a/internal/engine/experiment/sniblocking/sniblocking.go b/internal/engine/experiment/sniblocking/sniblocking.go index ba0e769..4e4df18 100644 --- a/internal/engine/experiment/sniblocking/sniblocking.go +++ b/internal/engine/experiment/sniblocking/sniblocking.go @@ -68,7 +68,7 @@ func (tk *TestKeys) classify() string { return classAnomalyTestHelperUnreachable case netxlite.FailureConnectionReset: return classInterferenceReset - case netxlite.FailureDNSNXDOMAINError: + case netxlite.FailureDNSNXDOMAINError, netxlite.FailureAndroidDNSCacheNoData: return classAnomalyTestHelperUnreachable case netxlite.FailureEOFError: return classInterferenceClosed diff --git a/internal/engine/experiment/sniblocking/sniblocking_test.go b/internal/engine/experiment/sniblocking/sniblocking_test.go index bcbae62..63ec9c7 100644 --- a/internal/engine/experiment/sniblocking/sniblocking_test.go +++ b/internal/engine/experiment/sniblocking/sniblocking_test.go @@ -12,11 +12,6 @@ import ( "github.com/ooni/probe-cli/v3/internal/netxlite" ) -const ( - softwareName = "ooniprobe-example" - softwareVersion = "0.0.1" -) - func TestTestKeysClassify(t *testing.T) { asStringPtr := func(s string) *string { return &s @@ -41,6 +36,13 @@ func TestTestKeysClassify(t *testing.T) { t.Fatal("unexpected result") } }) + t.Run("with tk.Target.Failure == android_dns_cache_no_data", func(t *testing.T) { + tk := new(TestKeys) + tk.Target.Failure = asStringPtr(netxlite.FailureAndroidDNSCacheNoData) + if tk.classify() != classAnomalyTestHelperUnreachable { + t.Fatal("unexpected result") + } + }) t.Run("with tk.Target.Failure == connection_reset", func(t *testing.T) { tk := new(TestKeys) tk.Target.Failure = asStringPtr(netxlite.FailureConnectionReset) diff --git a/internal/engine/experiment/webconnectivity/dnsanalysis.go b/internal/engine/experiment/webconnectivity/dnsanalysis.go index daa656d..f1ee86a 100644 --- a/internal/engine/experiment/webconnectivity/dnsanalysis.go +++ b/internal/engine/experiment/webconnectivity/dnsanalysis.go @@ -44,7 +44,13 @@ func DNSAnalysis(URL *url.URL, measurement DNSLookupResult, switch *control.DNS.Failure { case DNSNameError: // the control returns this on NXDOMAIN error switch *measurement.Failure { - case netxlite.FailureDNSNXDOMAINError: + // When the Android getaddrinfo cache says "no data" (meaning basically + // "I don't know, mate") _and_ the test helper says NXDOMAIN, we can + // be ~confident that there's also NXDOMAIN on the Android side. + // + // See also https://github.com/ooni/probe/issues/2029. + case netxlite.FailureDNSNXDOMAINError, + netxlite.FailureAndroidDNSCacheNoData: out.DNSConsistency = &DNSConsistent } } diff --git a/internal/engine/experiment/webconnectivity/dnsanalysis_test.go b/internal/engine/experiment/webconnectivity/dnsanalysis_test.go index 906f75c..0fe2d74 100644 --- a/internal/engine/experiment/webconnectivity/dnsanalysis_test.go +++ b/internal/engine/experiment/webconnectivity/dnsanalysis_test.go @@ -14,6 +14,7 @@ func TestDNSAnalysis(t *testing.T) { measurementFailure := netxlite.FailureDNSNXDOMAINError controlFailure := webconnectivity.DNSNameError eofFailure := io.EOF.Error() + androidEaiNoData := netxlite.FailureAndroidDNSCacheNoData type args struct { URL *url.URL measurement webconnectivity.DNSLookupResult @@ -57,7 +58,7 @@ func TestDNSAnalysis(t *testing.T) { DNSConsistency: &webconnectivity.DNSInconsistent, }, }, { - name: "when the failures are compatible", + name: "when the failures are compatible (NXDOMAIN case)", args: args{ URL: &url.URL{ Host: "www.kerneltrap.org", @@ -74,6 +75,24 @@ func TestDNSAnalysis(t *testing.T) { wantOut: webconnectivity.DNSAnalysisResult{ DNSConsistency: &webconnectivity.DNSConsistent, }, + }, { + name: "when the failures are compatible (Android EAI_NODATA case)", + args: args{ + URL: &url.URL{ + Host: "www.kerneltrap.org", + }, + measurement: webconnectivity.DNSLookupResult{ + Failure: &androidEaiNoData, + }, + control: webconnectivity.ControlResponse{ + DNS: webconnectivity.ControlDNSResult{ + Failure: &controlFailure, + }, + }, + }, + wantOut: webconnectivity.DNSAnalysisResult{ + DNSConsistency: &webconnectivity.DNSConsistent, + }, }, { name: "when the ASNs are equal", args: args{ diff --git a/internal/engine/experiment/webconnectivity/summary.go b/internal/engine/experiment/webconnectivity/summary.go index d6e8e2a..28e8447 100644 --- a/internal/engine/experiment/webconnectivity/summary.go +++ b/internal/engine/experiment/webconnectivity/summary.go @@ -125,9 +125,14 @@ func Summarize(tk *TestKeys) (out Summary) { return } // If DNS failed with NXDOMAIN and the control DNS is consistent, then it - // means this website does not exist anymore. + // means this website does not exist anymore. We need to include the weird + // cache failure on Android into this analysis because that failure means + // NXDOMAIN (well, most likely) if the TH reported NXDOMAIN. + // + // See https://github.com/ooni/probe/issues/2029 for the Android issue. if tk.DNSExperimentFailure != nil && - *tk.DNSExperimentFailure == netxlite.FailureDNSNXDOMAINError && + (*tk.DNSExperimentFailure == netxlite.FailureDNSNXDOMAINError || + *tk.DNSExperimentFailure == netxlite.FailureAndroidDNSCacheNoData) && tk.DNSConsistency != nil && *tk.DNSConsistency == DNSConsistent { // TODO(bassosimone): MK flags this as accessible. This result is debatable. We // are doing what MK does. But we most likely want to make it better later. diff --git a/internal/engine/experiment/webconnectivity/summary_test.go b/internal/engine/experiment/webconnectivity/summary_test.go index ee1af7e..5808e9f 100644 --- a/internal/engine/experiment/webconnectivity/summary_test.go +++ b/internal/engine/experiment/webconnectivity/summary_test.go @@ -26,6 +26,7 @@ func TestSummarize(t *testing.T) { probeSSLInvalidHost = netxlite.FailureSSLInvalidHostname probeSSLInvalidCert = netxlite.FailureSSLInvalidCertificate probeSSLUnknownAuth = netxlite.FailureSSLUnknownAuthority + probeAndroidEaiNoData = netxlite.FailureAndroidDNSCacheNoData tcpIP = "tcp_ip" trueValue = true ) @@ -68,7 +69,7 @@ func TestSummarize(t *testing.T) { Status: webconnectivity.StatusAnomalyControlUnreachable, }, }, { - name: "with non-existing website", + name: "with non-existing website (NXDOMAIN case)", args: args{ tk: &webconnectivity.TestKeys{ DNSExperimentFailure: &probeNXDOMAIN, @@ -84,6 +85,23 @@ func TestSummarize(t *testing.T) { Status: webconnectivity.StatusSuccessNXDOMAIN | webconnectivity.StatusExperimentDNS, }, + }, { + name: "with non-existing website (Android EAI_NODATA case)", + args: args{ + tk: &webconnectivity.TestKeys{ + DNSExperimentFailure: &probeAndroidEaiNoData, + DNSAnalysisResult: webconnectivity.DNSAnalysisResult{ + DNSConsistency: &webconnectivity.DNSConsistent, + }, + }, + }, + wantOut: webconnectivity.Summary{ + BlockingReason: nil, + Blocking: false, + Accessible: &trueValue, + Status: webconnectivity.StatusSuccessNXDOMAIN | + webconnectivity.StatusExperimentDNS, + }, }, { name: "with NXDOMAIN measured only by the probe", args: args{ diff --git a/internal/netxlite/certifi.go b/internal/netxlite/certifi.go index 11aa95c..a316f8b 100644 --- a/internal/netxlite/certifi.go +++ b/internal/netxlite/certifi.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// 2022-05-19 20:30:44.840082 +0200 CEST m=+0.374984084 +// 2022-05-28 13:27:21.630174629 +0200 CEST m=+0.293627763 // https://curl.haxx.se/ca/cacert.pem package netxlite diff --git a/internal/netxlite/classify.go b/internal/netxlite/classify.go index 254ff61..bfd7f25 100644 --- a/internal/netxlite/classify.go +++ b/internal/netxlite/classify.go @@ -249,18 +249,34 @@ const ( // DNSOverHTTPSTransport and DNSOverUDPTransport). Their suffix matches the equivalent // unexported errors used by the Go standard library. var ( - ErrOODNSNoSuchHost = fmt.Errorf("ooniresolver: %s", DNSNoSuchHostSuffix) + // ErrOODNSNoSuchHost means NXDOMAIN. + ErrOODNSNoSuchHost = fmt.Errorf("ooniresolver: %s", DNSNoSuchHostSuffix) + + // ErrOODNSMisbehaving is the error typically returned by the `netgo`resolver + // when it cannot really make sense of the error. ErrOODNSMisbehaving = fmt.Errorf("ooniresolver: %s", DNSServerMisbehavingSuffix) - ErrOODNSNoAnswer = fmt.Errorf("ooniresolver: %s", DNSNoAnswerSuffix) + + // ErrOODNSNoAnswer means that we've got a valid DNS response that + // did not contain any answer for the original query. This could happen + // when we query for AAAA and the domain only has A records. + ErrOODNSNoAnswer = fmt.Errorf("ooniresolver: %s", DNSNoAnswerSuffix) ) // These errors are not part of the Go standard library but we can // return them in our custom resolvers. var ( - ErrOODNSRefused = errors.New("ooniresolver: refused") + // ErrOODNSRefused indicates that the response's Rcode was "refused" + ErrOODNSRefused = errors.New("ooniresolver: refused") + + // ErrOODNSServfail indicates that the response's Rcode was "servfail" ErrOODNSServfail = errors.New("ooniresolver: servfail") ) +// ErrAndroidDNSCacheNoData is the kind of error returned by our getaddrinfo +// code on Android when we see EAI_NODATA, an error condition that could mean +// anything as explained in getaddrinfo_linux.go. +var ErrAndroidDNSCacheNoData = errors.New(FailureAndroidDNSCacheNoData) + // classifyResolverError maps DNS resolution errors to // OONI failure strings. // @@ -291,6 +307,9 @@ func classifyResolverError(err error) string { if errors.Is(err, ErrDNSReplyWithWrongQueryID) { return FailureDNSReplyWithWrongQueryID } + if errors.Is(err, ErrAndroidDNSCacheNoData) { + return FailureAndroidDNSCacheNoData + } return classifyGenericError(err) } diff --git a/internal/netxlite/classify_test.go b/internal/netxlite/classify_test.go index 01049de..d5d976e 100644 --- a/internal/netxlite/classify_test.go +++ b/internal/netxlite/classify_test.go @@ -281,6 +281,12 @@ func TestClassifyResolverError(t *testing.T) { } }) + t.Run("for EAI_NODATA returned by Android's getaddrinfo", func(t *testing.T) { + if classifyResolverError(ErrAndroidDNSCacheNoData) != FailureAndroidDNSCacheNoData { + t.Fatal("unexpected result") + } + }) + t.Run("for another kind of error", func(t *testing.T) { if classifyResolverError(io.EOF) != FailureEOFError { t.Fatal("unexpected result") diff --git a/internal/netxlite/doc.go b/internal/netxlite/doc.go index 7a8f9cc..fa369d2 100644 --- a/internal/netxlite/doc.go +++ b/internal/netxlite/doc.go @@ -54,4 +54,21 @@ // // Operations 1, 2, 3, and 4 are used when we perform measurements, // while 5 and 6 are mostly used when speaking with our backend. +// +// Getaddrinfo usage +// +// When compiled with CGO_ENABLED=1, this package will link with libc +// and call getaddrinfo directly. While this design choice means we will +// need to maintain more code, it also allows us to save the correct +// getaddrinfo return value, which is hidden by the Go resolver. Also, +// this strategy allows us to deal with the Android EAI_NODATA implementation +// quirk (see https://github.com/ooni/probe/issues/2029). +// +// We currently use net.Resolver when CGO_ENABLED=0. A future version of +// netxlite MIGHT change this and use a custom UDP resolver in such a +// case, to avoid depending on the assumption that /etc/resolver.conf is +// present on the target system. See https://github.com/ooni/probe/issues/2118 +// for more details regarding ongoing plans to bypass net.Resolver when +// CGO_ENABLED=0. (If you're reading this piece of documentation and notice +// it's not updated, please submit a pull request to update it :-). package netxlite diff --git a/internal/netxlite/errno.go b/internal/netxlite/errno.go index 92ed71b..e21b1c5 100644 --- a/internal/netxlite/errno.go +++ b/internal/netxlite/errno.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2022-05-19 20:30:45.752543 +0200 CEST m=+0.582472918 +// Generated: 2022-05-28 13:27:22.097503116 +0200 CEST m=+0.338871155 package netxlite @@ -13,6 +13,7 @@ const ( FailureAddressInUse = "address_in_use" FailureAddressNotAvailable = "address_not_available" FailureAlreadyConnected = "already_connected" + FailureAndroidDNSCacheNoData = "android_dns_cache_no_data" FailureBadAddress = "bad_address" FailureBadFileDescriptor = "bad_file_descriptor" FailureConnectionAborted = "connection_aborted" @@ -63,6 +64,7 @@ var failuresMap = map[string]string{ "address_in_use": "address_in_use", "address_not_available": "address_not_available", "already_connected": "already_connected", + "android_dns_cache_no_data": "android_dns_cache_no_data", "bad_address": "bad_address", "bad_file_descriptor": "bad_file_descriptor", "connection_aborted": "connection_aborted", diff --git a/internal/netxlite/errno_darwin.go b/internal/netxlite/errno_darwin.go index c012c55..bd5a109 100644 --- a/internal/netxlite/errno_darwin.go +++ b/internal/netxlite/errno_darwin.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2022-05-19 20:30:45.170591 +0200 CEST m=+0.000503793 +// Generated: 2022-05-28 13:27:21.764075578 +0200 CEST m=+0.005443607 package netxlite diff --git a/internal/netxlite/errno_darwin_test.go b/internal/netxlite/errno_darwin_test.go index 03a784e..1add0b2 100644 --- a/internal/netxlite/errno_darwin_test.go +++ b/internal/netxlite/errno_darwin_test.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2022-05-19 20:30:45.466378 +0200 CEST m=+0.296299710 +// Generated: 2022-05-28 13:27:21.820244729 +0200 CEST m=+0.061612769 package netxlite diff --git a/internal/netxlite/errno_freebsd.go b/internal/netxlite/errno_freebsd.go index a320470..4e5f8f7 100644 --- a/internal/netxlite/errno_freebsd.go +++ b/internal/netxlite/errno_freebsd.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2022-05-19 20:30:45.509171 +0200 CEST m=+0.339094543 +// Generated: 2022-05-28 13:27:21.843034214 +0200 CEST m=+0.084402243 package netxlite diff --git a/internal/netxlite/errno_freebsd_test.go b/internal/netxlite/errno_freebsd_test.go index ede9abf..dcc90e1 100644 --- a/internal/netxlite/errno_freebsd_test.go +++ b/internal/netxlite/errno_freebsd_test.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2022-05-19 20:30:45.559112 +0200 CEST m=+0.389036168 +// Generated: 2022-05-28 13:27:21.881328637 +0200 CEST m=+0.122696672 package netxlite diff --git a/internal/netxlite/errno_linux.go b/internal/netxlite/errno_linux.go index 7d0b032..f1616ec 100644 --- a/internal/netxlite/errno_linux.go +++ b/internal/netxlite/errno_linux.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2022-05-19 20:30:45.642498 +0200 CEST m=+0.472425168 +// Generated: 2022-05-28 13:27:21.967785506 +0200 CEST m=+0.209153549 package netxlite diff --git a/internal/netxlite/errno_linux_test.go b/internal/netxlite/errno_linux_test.go index f4ab895..bbdfe68 100644 --- a/internal/netxlite/errno_linux_test.go +++ b/internal/netxlite/errno_linux_test.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2022-05-19 20:30:45.684349 +0200 CEST m=+0.514276960 +// Generated: 2022-05-28 13:27:22.010048884 +0200 CEST m=+0.251416941 package netxlite diff --git a/internal/netxlite/errno_openbsd.go b/internal/netxlite/errno_openbsd.go index 8dc2a76..a1e05ef 100644 --- a/internal/netxlite/errno_openbsd.go +++ b/internal/netxlite/errno_openbsd.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2022-05-19 20:30:45.579849 +0200 CEST m=+0.409773835 +// Generated: 2022-05-28 13:27:21.904104276 +0200 CEST m=+0.145472305 package netxlite diff --git a/internal/netxlite/errno_openbsd_test.go b/internal/netxlite/errno_openbsd_test.go index f84fc34..e714ec8 100644 --- a/internal/netxlite/errno_openbsd_test.go +++ b/internal/netxlite/errno_openbsd_test.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2022-05-19 20:30:45.622731 +0200 CEST m=+0.452657918 +// Generated: 2022-05-28 13:27:21.942808293 +0200 CEST m=+0.184176336 package netxlite diff --git a/internal/netxlite/errno_windows.go b/internal/netxlite/errno_windows.go index 8a1c308..ed3a18c 100644 --- a/internal/netxlite/errno_windows.go +++ b/internal/netxlite/errno_windows.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2022-05-19 20:30:45.704221 +0200 CEST m=+0.534149543 +// Generated: 2022-05-28 13:27:22.034720951 +0200 CEST m=+0.276088980 package netxlite @@ -15,36 +15,37 @@ import ( // is system dependent. You're currently looking at // the list of errors for windows. const ( - ECONNREFUSED = windows.WSAECONNREFUSED - ECONNRESET = windows.WSAECONNRESET - EHOSTUNREACH = windows.WSAEHOSTUNREACH - ETIMEDOUT = windows.WSAETIMEDOUT - EAFNOSUPPORT = windows.WSAEAFNOSUPPORT - EADDRINUSE = windows.WSAEADDRINUSE - EADDRNOTAVAIL = windows.WSAEADDRNOTAVAIL - EISCONN = windows.WSAEISCONN - EFAULT = windows.WSAEFAULT - EBADF = windows.WSAEBADF - ECONNABORTED = windows.WSAECONNABORTED - EALREADY = windows.WSAEALREADY - EDESTADDRREQ = windows.WSAEDESTADDRREQ - EINTR = windows.WSAEINTR - EINVAL = windows.WSAEINVAL - EMSGSIZE = windows.WSAEMSGSIZE - ENETDOWN = windows.WSAENETDOWN - ENETRESET = windows.WSAENETRESET - ENETUNREACH = windows.WSAENETUNREACH - ENOBUFS = windows.WSAENOBUFS - ENOPROTOOPT = windows.WSAENOPROTOOPT - ENOTSOCK = windows.WSAENOTSOCK - ENOTCONN = windows.WSAENOTCONN - EWOULDBLOCK = windows.WSAEWOULDBLOCK - EACCES = windows.WSAEACCES - EPROTONOSUPPORT = windows.WSAEPROTONOSUPPORT - EPROTOTYPE = windows.WSAEPROTOTYPE - WSANO_DATA = windows.WSANO_DATA - WSANO_RECOVERY = windows.WSANO_RECOVERY - WSATRY_AGAIN = windows.WSATRY_AGAIN + ECONNREFUSED = windows.WSAECONNREFUSED + ECONNRESET = windows.WSAECONNRESET + EHOSTUNREACH = windows.WSAEHOSTUNREACH + ETIMEDOUT = windows.WSAETIMEDOUT + EAFNOSUPPORT = windows.WSAEAFNOSUPPORT + EADDRINUSE = windows.WSAEADDRINUSE + EADDRNOTAVAIL = windows.WSAEADDRNOTAVAIL + EISCONN = windows.WSAEISCONN + EFAULT = windows.WSAEFAULT + EBADF = windows.WSAEBADF + ECONNABORTED = windows.WSAECONNABORTED + EALREADY = windows.WSAEALREADY + EDESTADDRREQ = windows.WSAEDESTADDRREQ + EINTR = windows.WSAEINTR + EINVAL = windows.WSAEINVAL + EMSGSIZE = windows.WSAEMSGSIZE + ENETDOWN = windows.WSAENETDOWN + ENETRESET = windows.WSAENETRESET + ENETUNREACH = windows.WSAENETUNREACH + ENOBUFS = windows.WSAENOBUFS + ENOPROTOOPT = windows.WSAENOPROTOOPT + ENOTSOCK = windows.WSAENOTSOCK + ENOTCONN = windows.WSAENOTCONN + EWOULDBLOCK = windows.WSAEWOULDBLOCK + EACCES = windows.WSAEACCES + EPROTONOSUPPORT = windows.WSAEPROTONOSUPPORT + EPROTOTYPE = windows.WSAEPROTOTYPE + WSANO_DATA = windows.WSANO_DATA + WSANO_RECOVERY = windows.WSANO_RECOVERY + WSATRY_AGAIN = windows.WSATRY_AGAIN + WSAHOST_NOT_FOUND = windows.WSAHOST_NOT_FOUND ) // classifySyscallError converts a syscall error to the @@ -116,6 +117,8 @@ func classifySyscallError(err error) string { return FailureDNSNonRecoverableFailure case windows.WSATRY_AGAIN: return FailureDNSTemporaryFailure + case windows.WSAHOST_NOT_FOUND: + return FailureDNSNXDOMAINError } return "" } diff --git a/internal/netxlite/errno_windows_test.go b/internal/netxlite/errno_windows_test.go index 92cf3f5..cfdbfa0 100644 --- a/internal/netxlite/errno_windows_test.go +++ b/internal/netxlite/errno_windows_test.go @@ -1,5 +1,5 @@ // Code generated by go generate; DO NOT EDIT. -// Generated: 2022-05-19 20:30:45.733431 +0200 CEST m=+0.563360501 +// Generated: 2022-05-28 13:27:22.067609692 +0200 CEST m=+0.308977732 package netxlite @@ -198,6 +198,12 @@ func TestClassifySyscallError(t *testing.T) { } }) + t.Run("for WSAHOST_NOT_FOUND", func(t *testing.T) { + if v := classifySyscallError(windows.WSAHOST_NOT_FOUND); v != FailureDNSNXDOMAINError { + t.Fatalf("expected '%s', got '%s'", FailureDNSNXDOMAINError, v) + } + }) + t.Run("for the zero errno value", func(t *testing.T) { if v := classifySyscallError(syscall.Errno(0)); v != "" { t.Fatalf("expected empty string, got '%s'", v) diff --git a/internal/netxlite/getaddrinfo.go b/internal/netxlite/getaddrinfo.go new file mode 100644 index 0000000..e8318cb --- /dev/null +++ b/internal/netxlite/getaddrinfo.go @@ -0,0 +1,56 @@ +package netxlite + +import ( + "context" + "errors" +) + +// getaddrinfoLookupHost performs a DNS lookup and returns the +// results. If we were compiled with CGO_ENABLED=0, then this +// function calls net.DefaultResolver.LookupHost. Otherwise, +// we call getaddrinfo. In such a case, if getaddrinfo returns a nonzero +// return value, we'll return as error an instance of the +// ErrGetaddrinfo error. This error will contain the specific +// code returned by getaddrinfo in its .Code field. +func getaddrinfoLookupHost(ctx context.Context, domain string) ([]string, error) { + addrs, _, err := getaddrinfoLookupANY(ctx, domain) + return addrs, err +} + +// ErrGetaddrinfo represents a getaddrinfo failure. +type ErrGetaddrinfo struct { + // Err is the error proper. + Underlying error + + // Code is getaddrinfo's return code. + Code int64 +} + +// newErrGetaddrinfo creates a new instance of the ErrGetaddrinfo type. +func newErrGetaddrinfo(code int64, err error) *ErrGetaddrinfo { + return &ErrGetaddrinfo{ + Underlying: err, + Code: code, + } +} + +// Error returns the underlying error's string. +func (err *ErrGetaddrinfo) Error() string { + return err.Underlying.Error() +} + +// Unwrap allows to get the underlying error value. +func (err *ErrGetaddrinfo) Unwrap() error { + return err.Underlying +} + +// ErrorToGetaddrinfoRetval converts an arbitrary error to +// the return value of getaddrinfo. If err is nil or is not +// an instance of ErrGetaddrinfo, we just return zero. +func ErrorToGetaddrinfoRetval(err error) int64 { + var aierr *ErrGetaddrinfo + if err != nil && errors.As(err, &aierr) { + return aierr.Code + } + return 0 +} diff --git a/internal/netxlite/getaddrinfo_bsd.go b/internal/netxlite/getaddrinfo_bsd.go new file mode 100644 index 0000000..8f6a07b --- /dev/null +++ b/internal/netxlite/getaddrinfo_bsd.go @@ -0,0 +1,58 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build cgo && (darwin || dragonfly || freebsd || openbsd) + +package netxlite + +/* +#include +*/ +import "C" +import ( + "syscall" +) + +const getaddrinfoAIFlags = (C.AI_CANONNAME | C.AI_V4MAPPED | C.AI_ALL) & C.AI_MASK + +// Making constants available to Go code so we can run tests +const ( + aiCanonname = C.AI_CANONNAME + aiV4Mapped = C.AI_V4MAPPED + aiAll = C.AI_ALL + aiMask = C.AI_MASK + eaiSystem = C.EAI_SYSTEM + eaiNoName = C.EAI_NONAME + eaiBadFlags = C.EAI_BADFLAGS +) + +// toError is the function that converts the return value from +// the getaddrinfo function into a proper Go error. +// +// This function is adapted from cgoLookupIPCNAME +// https://github.com/golang/go/blob/go1.17.6/src/net/cgo_unix.go#L145 +// +// SPDX-License-Identifier: BSD-3-Clause. +func (state *getaddrinfoState) toError(code int64, err error, goos string) error { + switch code { + case C.EAI_SYSTEM: + if err == nil { + // err should not be nil, but sometimes getaddrinfo returns + // code == C.EAI_SYSTEM with err == nil on Linux. + // The report claims that it happens when we have too many + // open files, so use syscall.EMFILE (too many open files in system). + // Most system calls would return ENFILE (too many open files), + // so at the least EMFILE should be easy to recognize if this + // comes up again. golang.org/issue/6232. + err = syscall.EMFILE + } + return newErrGetaddrinfo(code, err) + case C.EAI_NONAME: + err = ErrOODNSNoSuchHost // so it becomes FailureDNSNXDOMAIN + return newErrGetaddrinfo(code, err) + default: + err = ErrOODNSMisbehaving // so it becomes FailureDNSServerMisbehaving + return newErrGetaddrinfo(code, err) + } +} diff --git a/internal/netxlite/getaddrinfo_bsd_test.go b/internal/netxlite/getaddrinfo_bsd_test.go new file mode 100644 index 0000000..a768984 --- /dev/null +++ b/internal/netxlite/getaddrinfo_bsd_test.go @@ -0,0 +1,105 @@ +//go:build cgo && (darwin || dragonfly || freebsd || openbsd) + +package netxlite + +import ( + "errors" + "syscall" + "testing" +) + +func TestGetaddrinfoAIFlags(t *testing.T) { + var wrong bool + wrong = getaddrinfoAIFlags != (aiCanonname|aiV4Mapped|aiAll)&aiMask + if wrong { + t.Fatal("wrong flags for platform") + } +} + +func TestGetaddrinfoStateToError(t *testing.T) { + type args struct { + code int64 + err error + goos string + } + type expects struct { + message string // message obtained using .Error + code int64 + err error + } + var inputs = []struct { + name string + args args + expects expects + }{{ + name: "with C.EAI_SYSTEM and non-nil error", + args: args{ + code: eaiSystem, + err: syscall.EAGAIN, + goos: "darwin", + }, + expects: expects{ + message: syscall.EAGAIN.Error(), + code: eaiSystem, + err: syscall.EAGAIN, + }, + }, { + name: "with C.EAI_SYSTEM and nil error", + args: args{ + code: eaiSystem, + err: nil, + goos: "darwin", + }, + expects: expects{ + message: syscall.EMFILE.Error(), + code: eaiSystem, + err: syscall.EMFILE, + }, + }, { + name: "with C.EAI_NONAME", + args: args{ + code: eaiNoName, + err: nil, + goos: "darwin", + }, + expects: expects{ + message: ErrOODNSNoSuchHost.Error(), + code: eaiNoName, + err: ErrOODNSNoSuchHost, + }, + }, { + name: "with an unhandled error", + args: args{ + code: eaiBadFlags, + err: nil, + goos: "darwin", + }, + expects: expects{ + message: ErrOODNSMisbehaving.Error(), + code: eaiBadFlags, + err: ErrOODNSMisbehaving, + }, + }} + for _, input := range inputs { + t.Run(input.name, func(t *testing.T) { + state := newGetaddrinfoState(getaddrinfoNumSlots) + err := state.toError(input.args.code, input.args.err, input.args.goos) + if err == nil { + t.Fatal("expected non-nil error here") + } + if err.Error() != input.expects.message { + t.Fatal("unexpected error message") + } + var gaierr *ErrGetaddrinfo + if !errors.As(err, &gaierr) { + t.Fatal("cannot convert error to ErrGetaddrinfo") + } + if gaierr.Code != input.expects.code { + t.Fatal("unexpected code") + } + if !errors.Is(gaierr.Underlying, input.expects.err) { + t.Fatal("unexpected underlying error") + } + }) + } +} diff --git a/internal/netxlite/getaddrinfo_cgo.go b/internal/netxlite/getaddrinfo_cgo.go new file mode 100644 index 0000000..06c172f --- /dev/null +++ b/internal/netxlite/getaddrinfo_cgo.go @@ -0,0 +1,229 @@ +//go:build: cgo + +package netxlite + +/* +// On Unix systems, getaddrinfo is part of libc. On Windows, +// instead, we need to explicitly link with winsock2. +#cgo windows LDFLAGS: -lws2_32 + +#ifndef _WIN32 +#include // for getaddrinfo +#else +#include // for getaddrinfo +#endif +*/ +import "C" + +import ( + "context" + "errors" + "net" + "runtime" + "syscall" + "unsafe" +) + +func getaddrinfoLookupANY(ctx context.Context, domain string) ([]string, string, error) { + return getaddrinfoSingleton.LookupANY(ctx, domain) +} + +// getaddrinfoSingleton is the getaddrinfo singleton. +var getaddrinfoSingleton = newGetaddrinfoState(getaddrinfoNumSlots) + +// getaddrinfoSlot is a slot for calling getaddrinfo. The Go standard lib +// limits the maximum number of parallel calls to getaddrinfo. They do that +// to avoid using too many threads if the system resolver for some +// reason doesn't respond. We need to do the same. Because OONI does not +// need to be as general as the Go stdlib, we'll use a small-enough number +// of slots, rather than checking for rlimits, like the stdlib does, +// e.g., on Unix. This struct represents one of these slots. +type getaddrinfoSlot struct{} + +// getaddrinfoState is the state associated to getaddrinfo. +type getaddrinfoState struct { + // sema is the semaphore that only allows a maximum number of + // getaddrinfo slots to be active at any given time. + sema chan *getaddrinfoSlot + + // lookupANY is the function that actually implements + // the lookup ANY lookup using getaddrinfo. + lookupANY func(domain string) ([]string, string, error) +} + +// getaddrinfoNumSlots is the maximum number of parallel calls +// to getaddrinfo we may have at any given time. +const getaddrinfoNumSlots = 8 + +// newGetaddrinfoState creates the getaddrinfo state. +func newGetaddrinfoState(numSlots int) *getaddrinfoState { + state := &getaddrinfoState{ + sema: make(chan *getaddrinfoSlot, numSlots), + lookupANY: nil, + } + state.lookupANY = state.doLookupANY + return state +} + +// lookupANY invokes getaddrinfo and returns the results. +func (state *getaddrinfoState) LookupANY(ctx context.Context, domain string) ([]string, string, error) { + if err := state.grabSlot(ctx); err != nil { + return nil, "", err + } + defer state.releaseSlot() + return state.doLookupANY(domain) +} + +// grabSlot grabs a slot for calling getaddrinfo. This function may block until +// a slot becomes available (or until the context is done). +func (state *getaddrinfoState) grabSlot(ctx context.Context) error { + // Implementation note: the channel has getaddrinfoNumSlots capacity, hence + // the first getaddrinfoNumSlots channel writes will succeed and all the + // subsequent ones will block. To unblock a pending request, we release a + // slot by reading from the channel. + select { + case state.sema <- &getaddrinfoSlot{}: + return nil + case <-ctx.Done(): + return ctx.Err() + } +} + +// releaseSlot releases a previously acquired slot. +func (state *getaddrinfoState) releaseSlot() { + <-state.sema +} + +// doLookupANY calls getaddrinfo. We assume that you've already grabbed a +// slot and you're defer-releasing it when you're done. +// +// This function is adapted from cgoLookupIPCNAME +// https://github.com/golang/go/blob/go1.17.6/src/net/cgo_unix.go#L145 +// +// SPDX-License-Identifier: BSD-3-Clause. +func (state *getaddrinfoState) doLookupANY(domain string) ([]string, string, error) { + var hints C.struct_addrinfo // zero-initialized by Go + hints.ai_flags = getaddrinfoAIFlags + hints.ai_socktype = C.SOCK_STREAM + hints.ai_family = C.AF_UNSPEC + h := make([]byte, len(domain)+1) + copy(h, domain) + var res *C.struct_addrinfo + // From https://pkg.go.dev/cmd/cgo: + // + // "Any C function (even void functions) may be called in a multiple + // assignment context to retrieve both the return value (if any) and the + // C errno variable as an error" + code, err := C.getaddrinfo((*C.char)(unsafe.Pointer(&h[0])), nil, &hints, &res) + if code != 0 { + return nil, "", state.toError(int64(code), err, runtime.GOOS) + } + defer C.freeaddrinfo(res) + return state.toAddressList(res) +} + +// toAddressList is the function that converts the return value from +// the getaddrinfo function into a list of strings. +// +// This function is adapted from cgoLookupIPCNAME +// https://github.com/golang/go/blob/go1.17.6/src/net/cgo_unix.go#L145 +// +// SPDX-License-Identifier: BSD-3-Clause. +func (state *getaddrinfoState) toAddressList(res *C.struct_addrinfo) ([]string, string, error) { + var ( + addrs []string + canonname string + ) + for r := res; r != nil; r = r.ai_next { + if r.ai_canonname != nil { + canonname = C.GoString(r.ai_canonname) + } + // We only asked for SOCK_STREAM, but check anyhow. + if r.ai_socktype != C.SOCK_STREAM { + continue + } + addr, err := state.addrinfoToString(r) + if err != nil { + continue + } + addrs = append(addrs, addr) + } + if len(addrs) < 1 { + return nil, canonname, ErrOODNSNoAnswer + } + return addrs, canonname, nil +} + +// errGetaddrinfoUnknownFamily indicates we don't know the address family. +var errGetaddrinfoUnknownFamily = errors.New("unknown address family") + +// addrinfoToString is the function that converts a single entry +// in the struct_addrinfos linked list into a string. +// +// This function is adapted from cgoLookupIPCNAME +// https://github.com/golang/go/blob/go1.17.6/src/net/cgo_unix.go#L145 +// +// SPDX-License-Identifier: BSD-3-Clause. +func (state *getaddrinfoState) addrinfoToString(r *C.struct_addrinfo) (string, error) { + switch r.ai_family { + case C.AF_INET: + sa := (*syscall.RawSockaddrInet4)(unsafe.Pointer(r.ai_addr)) + addr := net.IPAddr{IP: state.copyIP(sa.Addr[:])} + return addr.String(), nil + case C.AF_INET6: + sa := (*syscall.RawSockaddrInet6)(unsafe.Pointer(r.ai_addr)) + addr := net.IPAddr{ + IP: state.copyIP(sa.Addr[:]), + Zone: state.ifnametoindex(int(sa.Scope_id)), + } + return addr.String(), nil + default: + return "", errGetaddrinfoUnknownFamily + } +} + +// staticAddrinfoWithInvalidFamily is an helper to construct an addrinfo struct +// that we use in testing. (We cannot call CGO directly from tests.) +func staticAddrinfoWithInvalidFamily() *C.struct_addrinfo { + var value C.struct_addrinfo // zeroed by Go + value.ai_socktype = C.SOCK_STREAM // this is what the code expects + value.ai_family = 0 // but 0 is not AF_INET{,6} + return &value +} + +// staticAddrinfoWithInvalidSocketType is an helper to construct an addrinfo struct +// that we use in testing. (We cannot call CGO directly from tests.) +func staticAddrinfoWithInvalidSocketType() *C.struct_addrinfo { + var value C.struct_addrinfo // zeroed by Go + value.ai_socktype = C.SOCK_DGRAM // not SOCK_STREAM + return &value +} + +// copyIP copies a net.IP. +// +// This function is adapted from copyIP +// https://github.com/golang/go/blob/go1.17.6/src/net/cgo_unix.go#L344 +// +// SPDX-License-Identifier: BSD-3-Clause. +func (state *getaddrinfoState) copyIP(x net.IP) net.IP { + if len(x) < 16 { + return x.To16() + } + y := make(net.IP, len(x)) + copy(y, x) + return y +} + +// ifnametoindex converts an IPv6 scope index into an interface name. +// +// This function is adapted from ipv6ZoneCache.update +// https://github.com/golang/go/blob/go1.17.6/src/net/interface.go#L194 +// +// SPDX-License-Identifier: BSD-3-Clause. +func (state *getaddrinfoState) ifnametoindex(idx int) string { + iface, err := net.InterfaceByIndex(idx) // internally uses caching + if err != nil { + return "" + } + return iface.Name +} diff --git a/internal/netxlite/getaddrinfo_cgo_test.go b/internal/netxlite/getaddrinfo_cgo_test.go new file mode 100644 index 0000000..0562072 --- /dev/null +++ b/internal/netxlite/getaddrinfo_cgo_test.go @@ -0,0 +1,89 @@ +//go:build: cgo + +package netxlite + +import ( + "context" + "errors" + "net" + "testing" + "time" +) + +func TestGetaddrinfoStateAddrinfoToStringWithInvalidFamily(t *testing.T) { + aip := staticAddrinfoWithInvalidFamily() + state := newGetaddrinfoState(getaddrinfoNumSlots) + addr, err := state.addrinfoToString(aip) + if !errors.Is(err, errGetaddrinfoUnknownFamily) { + t.Fatal("unexpected err", err) + } + if addr != "" { + t.Fatal("expected empty addr here") + } +} + +func TestGetaddrinfoStateIfnametoindex(t *testing.T) { + ifaces, err := net.Interfaces() + if err != nil { + t.Fatal(err) + } + state := newGetaddrinfoState(getaddrinfoNumSlots) + for _, iface := range ifaces { + name := state.ifnametoindex(iface.Index) + if name != iface.Name { + t.Fatal("unexpected name") + } + } +} + +func TestGetaddrinfoStateLookupANYWithNoSlots(t *testing.T) { + const ( + noslots = 0 + timeout = 10 * time.Millisecond + ) + state := newGetaddrinfoState(noslots) + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + addresses, cname, err := state.LookupANY(ctx, "dns.google") + if !errors.Is(err, context.DeadlineExceeded) { + t.Fatal("unexpected err", err) + } + if len(addresses) > 0 { + t.Fatal("expected no addresses", addresses) + } + if cname != "" { + t.Fatal("expected empty cname", cname) + } +} + +func TestGetaddrinfoStateToAddressList(t *testing.T) { + t.Run("with invalid sockety type", func(t *testing.T) { + state := newGetaddrinfoState(0) // number of slots not relevant + aip := staticAddrinfoWithInvalidSocketType() + addresses, cname, err := state.toAddressList(aip) + if !errors.Is(err, ErrOODNSNoAnswer) { + t.Fatal("unexpected err", err) + } + if len(addresses) > 0 { + t.Fatal("expected no addresses", addresses) + } + if cname != "" { + t.Fatal("expected empty cname", cname) + } + }) + + t.Run("with invalid family", func(t *testing.T) { + state := newGetaddrinfoState(0) // number of slots not relevant + aip := staticAddrinfoWithInvalidFamily() + addresses, cname, err := state.toAddressList(aip) + if !errors.Is(err, ErrOODNSNoAnswer) { + t.Fatal("unexpected err", err) + } + if len(addresses) > 0 { + t.Fatal("expected no addresses", addresses) + } + if cname != "" { + t.Fatal("expected empty cname", cname) + } + }) +} diff --git a/internal/netxlite/getaddrinfo_linux.go b/internal/netxlite/getaddrinfo_linux.go new file mode 100644 index 0000000..f7e3fed --- /dev/null +++ b/internal/netxlite/getaddrinfo_linux.go @@ -0,0 +1,161 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build cgo && linux + +package netxlite + +/* +// Both glibc and musl expose the EAI_NODATA error if we +// ask them to expose it through this define. See below for +// more details on how each of the supported libcs hides +// (or does not hide) the EAI_NODATA define. +#cgo CFLAGS: -D_GNU_SOURCE +#include +*/ +import "C" + +import ( + "runtime" + "syscall" +) + +// Implementation note: the original Go codebase separated linux and android +// but we want them to be in the same file, so we can implement tests for both +// operating system and increase our confidence that the behavior will be the +// one we'd like to see on Android systems. + +var getaddrinfoAIFlags = getaddrinfoGetPlatformSpecificAIFlags(runtime.GOOS) + +// This function returns the platforms-specific AI flags that go1.17.6 +// used to set when we merged resolver's code into ooni/probe-cli +// +// SPDX-License-Identifier: BSD-3-Clause. +func getaddrinfoGetPlatformSpecificAIFlags(goos string) C.int { + switch goos { + case "android": + return C.AI_CANONNAME + default: + // NOTE(rsc): In theory there are approximately balanced + // arguments for and against including AI_ADDRCONFIG + // in the flags (it includes IPv4 results only on IPv4 systems, + // and similarly for IPv6), but in practice setting it causes + // getaddrinfo to return the wrong canonical name on Linux. + // So definitely leave it out. + return C.AI_CANONNAME | C.AI_V4MAPPED | C.AI_ALL + } +} + +// Making constants available to Go code so we can run tests (it seems +// it's not possible to import C directly in tests, sadly). +const ( + aiCanonname = C.AI_CANONNAME + aiV4Mapped = C.AI_V4MAPPED + aiAll = C.AI_ALL + eaiSystem = C.EAI_SYSTEM + eaiNoName = C.EAI_NONAME + eaiBadFlags = C.EAI_BADFLAGS + eaiNoData = C.EAI_NODATA +) + +// toError is the function that converts the return value from +// the getaddrinfo function into a proper Go error. +// +// This function is adapted from cgoLookupIPCNAME +// https://github.com/golang/go/blob/go1.17.6/src/net/cgo_unix.go#L145 +// +// SPDX-License-Identifier: BSD-3-Clause. +func (state *getaddrinfoState) toError(code int64, err error, goos string) error { + switch code { + case C.EAI_SYSTEM: + if err == nil { + // err should not be nil, but sometimes getaddrinfo returns + // code == C.EAI_SYSTEM with err == nil on Linux. + // The report claims that it happens when we have too many + // open files, so use syscall.EMFILE (too many open files in system). + // Most system calls would return ENFILE (too many open files), + // so at the least EMFILE should be easy to recognize if this + // comes up again. golang.org/issue/6232. + err = syscall.EMFILE + } + return newErrGetaddrinfo(code, err) + case C.EAI_NONAME: + return newErrGetaddrinfo(code, ErrOODNSNoSuchHost) + case C.EAI_NODATA: + return state.toErrorNODATA(err, goos) + default: + return newErrGetaddrinfo(code, ErrOODNSMisbehaving) + } +} + +// toErrorNODATA maps the EAI_NODATA value to the proper return value +// depending on the underlying operating system. +// +// As of 2022-05-28, this is the status of the major C libraries whose +// getaddrinfo return value we may end up processing here: +// +// 1. musl libc (statically linked Linux builds for official OONI +// Probe packages we build): EAI_NODATA is defined in netdb.h in a +// section guarded by _GNU_SOURCE and _BSD_SOURCE and the code +// does not otherwise ever use this definition. +// +// 2. GNU libc (which is what you would get if you compile OONI Probe +// for yourself in a GNU/Linux system): the codebase defines EAI_NODATA +// inside netdb.h protected by __USE_GNU, which is defined to 1 in +// include/features.h if the user defines _GNU_SOURCE. Additionally, +// the getaddrinfo implementation returns EAI_NODATA when a name +// exists but there's no associated address for such a name. There +// was a bug, fixed in glibc 2.27, were EAI_NONAME was returned +// when EAI_NODATA would actually have been more proper: +// +// https://sourceware.org/bugzilla/show_bug.cgi?id=21922 +// +// 3. Android libc: EAI_NODATA is defined in netdb.h and is not +// protected by any feature flag. The getaddrinfo function (as +// of 4ebdeebef74) calls android_getaddrinfofornet, which in turns +// calls android_getaddrinfofornetcontext. This function will +// eventually call android_getaddrinfo_proxy. If this function +// returns any status code different from EAI_SYSTEM, then bionic +// will return its return value. Otherwise, the code ends up +// calling explore_fqdn, which in turn calls nsdispatch, which +// is what NetBSD is still doing today. +// +// So, android_getaddrinfo_proxy was introduced a long time +// ago on October 28, 2010 by this commit: +// +// https://github.com/aosp-mirror/platform_bionic/commit/a1dbf0b453801620565e5911f354f82706b0200d +// +// Then a subsequent commit changed android_getaddrinfo_proxy +// to basically default to EAI_NODATA on proxy errors: +// +// https://github.com/aosp-mirror/platform_bionic/commit/c63e59039d28c352e3053bb81319e960c392dbd4 +// +// As of today and 4ebdeebef74, android_getaddrinfo_proxy returns +// one of the following possible return codes: +// +// a) 0 on success; +// +// b) EAI_SYSTEM if it cannot speak to the proxy (which causes the code +// to fall through to the original NetBSD implementation); +// +// c) EAI_NODATA in all the other cases. +// +// The above discussion about Android provides us with a theory that explains the +// https://github.com/ooni/probe/issues/2029 issue. That said, we are still missing +// some bits, e.g., why some Android 6 phones did not experience this problem. +// +// We originally proposed to handle the EAI_NODATA error on Android like it was a +// EAI_NONAME error. However, this mapping seems very inaccurate. Any error inside +// the DNS proxy could cause EAI_NODATA (_unless_ we're "lucky" for some reason +// and the original NetBSD code runs). Therefore, the sanest choice is to introduce +// a new OONI error describing this error condition `android_dns_cache_no_data` +// and handle this error as a special case when checking for NXDOMAIN. +func (state *getaddrinfoState) toErrorNODATA(err error, goos string) error { + switch goos { + case "android": + return newErrGetaddrinfo(C.EAI_NODATA, ErrAndroidDNSCacheNoData) + default: + return newErrGetaddrinfo(C.EAI_NODATA, ErrOODNSNoAnswer) + } +} diff --git a/internal/netxlite/getaddrinfo_linux_test.go b/internal/netxlite/getaddrinfo_linux_test.go new file mode 100644 index 0000000..19e64ce --- /dev/null +++ b/internal/netxlite/getaddrinfo_linux_test.go @@ -0,0 +1,181 @@ +//go:build cgo && linux + +package netxlite + +import ( + "errors" + "runtime" + "syscall" + "testing" +) + +func TestGetaddrinfoAIFlags(t *testing.T) { + var wrong bool + switch runtime.GOOS { + case "android": + wrong = getaddrinfoAIFlags != aiCanonname + default: + wrong = getaddrinfoAIFlags != (aiCanonname | aiV4Mapped | aiAll) + } + if wrong { + t.Fatal("wrong flags for platform") + } +} + +func TestGetaddrinfoGetPlatformSpecificAiFlags(t *testing.T) { + type args struct { + goos string + } + type expects struct { + flags int64 + } + var inputs = []struct { + name string + args args + expects expects + }{{ + name: "using the Android platform", + args: args{ + goos: "android", + }, + expects: expects{ + flags: aiCanonname, + }, + }, { + name: "using Linux", + args: args{ + goos: "linux", + }, + expects: expects{ + flags: aiCanonname | aiV4Mapped | aiAll, + }, + }, { + name: "when the platform name is empty", + args: args{ + goos: "", + }, + expects: expects{ + flags: aiCanonname | aiV4Mapped | aiAll, + }, + }} + for _, input := range inputs { + t.Run(input.name, func(t *testing.T) { + flags := getaddrinfoGetPlatformSpecificAIFlags(input.args.goos) + if int64(flags) != input.expects.flags { + t.Fatal("invalid flags") + } + }) + } +} + +func TestGetaddrinfoStateToError(t *testing.T) { + type args struct { + code int64 + err error + goos string + } + type expects struct { + message string // message obtained using .Error + code int64 + err error + } + var inputs = []struct { + name string + args args + expects expects + }{{ + name: "with C.EAI_SYSTEM and non-nil error", + args: args{ + code: eaiSystem, + err: syscall.EAGAIN, + goos: "linux", + }, + expects: expects{ + message: syscall.EAGAIN.Error(), + code: eaiSystem, + err: syscall.EAGAIN, + }, + }, { + name: "with C.EAI_SYSTEM and nil error", + args: args{ + code: eaiSystem, + err: nil, + goos: "linux", + }, + expects: expects{ + message: syscall.EMFILE.Error(), + code: eaiSystem, + err: syscall.EMFILE, + }, + }, { + name: "with C.EAI_NONAME", + args: args{ + code: eaiNoName, + err: nil, + goos: "linux", + }, + expects: expects{ + message: ErrOODNSNoSuchHost.Error(), + code: eaiNoName, + err: ErrOODNSNoSuchHost, + }, + }, { + name: "with C.EAI_NODATA on Linux", + args: args{ + code: eaiNoData, + err: nil, + goos: "linux", + }, + expects: expects{ + message: ErrOODNSNoAnswer.Error(), + code: eaiNoData, + err: ErrOODNSNoAnswer, + }, + }, { + name: "with C.EAI_NODATA on Android", + args: args{ + code: eaiNoData, + err: nil, + goos: "android", + }, + expects: expects{ + message: ErrAndroidDNSCacheNoData.Error(), + code: eaiNoData, + err: ErrAndroidDNSCacheNoData, + }, + }, { + name: "with an unhandled error", + args: args{ + code: eaiBadFlags, + err: nil, + goos: "linux", + }, + expects: expects{ + message: ErrOODNSMisbehaving.Error(), + code: eaiBadFlags, + err: ErrOODNSMisbehaving, + }, + }} + for _, input := range inputs { + t.Run(input.name, func(t *testing.T) { + state := newGetaddrinfoState(getaddrinfoNumSlots) + err := state.toError(input.args.code, input.args.err, input.args.goos) + if err == nil { + t.Fatal("expected non-nil error here") + } + if err.Error() != input.expects.message { + t.Fatal("unexpected error message") + } + var gaierr *ErrGetaddrinfo + if !errors.As(err, &gaierr) { + t.Fatal("cannot convert error to ErrGetaddrinfo") + } + if gaierr.Code != input.expects.code { + t.Fatal("unexpected code") + } + if !errors.Is(gaierr.Underlying, input.expects.err) { + t.Fatal("unexpected underlying error") + } + }) + } +} diff --git a/internal/netxlite/getaddrinfo_otherwise.go b/internal/netxlite/getaddrinfo_otherwise.go new file mode 100644 index 0000000..ccd70be --- /dev/null +++ b/internal/netxlite/getaddrinfo_otherwise.go @@ -0,0 +1,12 @@ +//go:build !cgo + +package netxlite + +import ( + "context" + "net" +) + +func getaddrinfoLookupANY(ctx context.Context, domain string) ([]string, string, error) { + return net.DefaultResolver.LookupHost(ctx, domain) +} diff --git a/internal/netxlite/getaddrinfo_test.go b/internal/netxlite/getaddrinfo_test.go new file mode 100644 index 0000000..b954fa1 --- /dev/null +++ b/internal/netxlite/getaddrinfo_test.go @@ -0,0 +1,80 @@ +package netxlite + +import ( + "context" + "errors" + "io" + "testing" +) + +func TestGetaddrinfoLookupHost(t *testing.T) { + addrs, err := getaddrinfoLookupHost(context.Background(), "127.0.0.1") + if err != nil { + t.Fatal(err) + } + if len(addrs) != 1 || addrs[0] != "127.0.0.1" { + t.Fatal("unexpected addrs", addrs) + } +} + +func TestErrorToGetaddrinfoRetval(t *testing.T) { + type args struct { + err error + } + tests := []struct { + name string + args args + want int64 + }{{ + name: "with valid getaddrinfo error", + args: args{ + newErrGetaddrinfo(144, nil), + }, + want: 144, + }, { + name: "with another kind of error", + args: args{io.EOF}, + want: 0, + }} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := ErrorToGetaddrinfoRetval(tt.args.err); got != tt.want { + t.Errorf("ErrorToGetaddrinfoRetval() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_newErrGetaddrinfo(t *testing.T) { + type args struct { + code int64 + err error + } + tests := []struct { + name string + args args + }{{ + name: "common case", + args: args{ + code: 17, + err: io.EOF, + }, + }} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := newErrGetaddrinfo(tt.args.code, tt.args.err) + if err == nil { + t.Fatal("expected non-nil error") + } + if !errors.Is(err, tt.args.err) { + t.Fatal("Unwrap() is not working correctly") + } + if err.Error() != tt.args.err.Error() { + t.Fatal("Error() is not working correctly") + } + if err.Code != tt.args.code { + t.Fatal("Code has not been copied correctly") + } + }) + } +} diff --git a/internal/netxlite/getaddrinfo_windows.go b/internal/netxlite/getaddrinfo_windows.go new file mode 100644 index 0000000..2e0a1dc --- /dev/null +++ b/internal/netxlite/getaddrinfo_windows.go @@ -0,0 +1,28 @@ +//go:build cgo && windows + +package netxlite + +//#include +import "C" + +import "syscall" + +const getaddrinfoAIFlags = C.AI_CANONNAME + +// Making constants available to Go code so we can run tests (it seems +// it's not possible to import C directly in tests, sadly). +const ( + aiCanonname = C.AI_CANONNAME +) + +// toError is the function that converts the return value from +// the getaddrinfo function into a proper Go error. +func (state *getaddrinfoState) toError(code int64, err error, goos string) error { + if err == nil { + // Implementation note: on Windows getaddrinfo directly + // returns what is basically a winsock2 error. So if there + // is no other error, just cast code to a syscall err. + err = syscall.Errno(code) + } + return newErrGetaddrinfo(int64(code), err) +} diff --git a/internal/netxlite/getaddrinfo_windows_test.go b/internal/netxlite/getaddrinfo_windows_test.go new file mode 100644 index 0000000..3ed44f5 --- /dev/null +++ b/internal/netxlite/getaddrinfo_windows_test.go @@ -0,0 +1,81 @@ +//go:build cgo && windows + +package netxlite + +import ( + "errors" + "syscall" + "testing" +) + +func TestGetaddrinfoAIFlags(t *testing.T) { + var wrong bool + wrong = getaddrinfoAIFlags != aiCanonname + if wrong { + t.Fatal("wrong flags for platform") + } +} + +func TestGetaddrinfoStateToError(t *testing.T) { + type args struct { + code int64 + err error + goos string + } + type expects struct { + message string // message obtained using .Error + code int64 + err error + } + var inputs = []struct { + name string + args args + expects expects + }{{ + name: "with nonzero return code and error", + args: args{ + code: int64(WSAHOST_NOT_FOUND), + err: syscall.EAGAIN, + goos: "windows", + }, + expects: expects{ + message: syscall.EAGAIN.Error(), + code: int64(WSAHOST_NOT_FOUND), + err: syscall.EAGAIN, + }, + }, { + name: "with return code and nil error", + args: args{ + code: int64(WSAHOST_NOT_FOUND), + err: nil, + goos: "windows", + }, + expects: expects{ + message: WSAHOST_NOT_FOUND.Error(), + code: int64(WSAHOST_NOT_FOUND), + err: WSAHOST_NOT_FOUND, + }, + }} + for _, input := range inputs { + t.Run(input.name, func(t *testing.T) { + state := newGetaddrinfoState(getaddrinfoNumSlots) + err := state.toError(input.args.code, input.args.err, input.args.goos) + if err == nil { + t.Fatal("expected non-nil error here") + } + if err.Error() != input.expects.message { + t.Fatal("unexpected error message") + } + var gaierr *ErrGetaddrinfo + if !errors.As(err, &gaierr) { + t.Fatal("cannot convert error to ErrGetaddrinfo") + } + if gaierr.Code != input.expects.code { + t.Fatal("unexpected code") + } + if !errors.Is(gaierr.Underlying, input.expects.err) { + t.Fatal("unexpected underlying error") + } + }) + } +} diff --git a/internal/netxlite/integration_test.go b/internal/netxlite/integration_test.go index 19a35e1..57052ae 100644 --- a/internal/netxlite/integration_test.go +++ b/internal/netxlite/integration_test.go @@ -17,6 +17,7 @@ import ( "github.com/ooni/probe-cli/v3/internal/netxlite" "github.com/ooni/probe-cli/v3/internal/netxlite/filtering" "github.com/ooni/probe-cli/v3/internal/netxlite/quictesting" + "github.com/ooni/probe-cli/v3/internal/randx" "github.com/ooni/probe-cli/v3/internal/runtimex" utls "gitlab.com/yawning/utls.git" ) @@ -71,7 +72,10 @@ func TestMeasureWithSystemResolver(t *testing.T) { const timeout = time.Nanosecond ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() - addrs, err := r.LookupHost(ctx, "ooni.org") + // Implementation note: Windows' resolver has caching so back to back tests + // will fail unless we query for something that could bypass the cache itself + // e.g. a domain containing a few random letters + addrs, err := r.LookupHost(ctx, randx.Letters(7)+".ooni.org") if err == nil || err.Error() != netxlite.FailureGenericTimeoutError { t.Fatal("not the error we expected", err) } diff --git a/internal/netxlite/internal/generrno/main.go b/internal/netxlite/internal/generrno/main.go index 4c75357..94d48ee 100644 --- a/internal/netxlite/internal/generrno/main.go +++ b/internal/netxlite/internal/generrno/main.go @@ -149,6 +149,7 @@ var Specs = []*ErrorSpec{ NewWindowsError("NO_DATA", "DNS_no_answer"), // [ ] WSANO_DATA NewWindowsError("NO_RECOVERY", "DNS_non_recoverable_failure"), // [*] WSANO_RECOVERY NewWindowsError("TRY_AGAIN", "DNS_temporary_failure"), // [*] WSATRY_AGAIN + NewWindowsError("HOST_NOT_FOUND", "DNS_NXDOMAIN_error"), // [*] WSAHOST_NOT_FOUND // Implementation note: we need to specify acronyms we // want to be upper case in uppercase here. For example, @@ -169,6 +170,10 @@ var Specs = []*ErrorSpec{ NewLibraryError("SSL_invalid_certificate"), NewLibraryError("JSON_parse_error"), NewLibraryError("connection_already_closed"), + + // QUIRKS: the following errors exist to clearly flag strange + // underlying behavior implemented by platforms. + NewLibraryError("Android_DNS_cache_no_data"), } // mapSystemToLibrary maps the operating system name to the name diff --git a/internal/netxlite/tproxy.go b/internal/netxlite/tproxy.go index db366f1..1cbb281 100644 --- a/internal/netxlite/tproxy.go +++ b/internal/netxlite/tproxy.go @@ -30,7 +30,12 @@ func (*TProxyStdlib) ListenUDP(network string, laddr *net.UDPAddr) (model.UDPLik // LookupHost calls net.DefaultResolver.LookupHost. func (*TProxyStdlib) LookupHost(ctx context.Context, domain string) ([]string, error) { - return net.DefaultResolver.LookupHost(ctx, domain) + // Implementation note: if possible, we try to call getaddrinfo + // directly, which allows us to gather the underlying error. The + // specifics of whether "it's possible" depend on whether we've + // been compiled linking to libc as well as whether we think that + // a platform is ready for using getaddrinfo directly. + return getaddrinfoLookupHost(ctx, domain) } // NewSimpleDialer returns a &net.Dialer{Timeout: timeout} instance.