net/dns/resolver: count errors when racing DNS queries, fail earlier

If all N queries failed, we waited until context timeout (in 5
seconds) to return.

This makes (*forwarder).forward fail fast when the network's
unavailable.

Change-Id: Ibbb3efea7ed34acd3f3b29b5fee00ba8c7492569
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick 2022-04-19 10:58:52 -07:00 committed by Brad Fitzpatrick
parent d9efbd97cb
commit e3a4952527

View File

@ -615,6 +615,10 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
return err return err
} }
// Guarantee that the ctx we use below is done when this function returns.
ctx, cancel := context.WithCancel(ctx)
defer cancel()
// Drop DNS service discovery spam, primarily for battery life // Drop DNS service discovery spam, primarily for battery life
// on mobile. Things like Spotify on iOS generate this traffic, // on mobile. Things like Spotify on iOS generate this traffic,
// when browsing for LAN devices. But even when filtering this // when browsing for LAN devices. But even when filtering this
@ -655,12 +659,8 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
} }
defer fq.closeOnCtxDone.Close() defer fq.closeOnCtxDone.Close()
resc := make(chan []byte, 1) resc := make(chan []byte, 1) // it's fine buffered or not
var ( errc := make(chan error, 1) // it's fine buffered or not too
mu sync.Mutex
firstErr error
)
for i := range resolvers { for i := range resolvers {
go func(rr *resolverAndDelay) { go func(rr *resolverAndDelay) {
if rr.startDelay > 0 { if rr.startDelay > 0 {
@ -674,39 +674,48 @@ func (f *forwarder) forwardWithDestChan(ctx context.Context, query packet, respo
} }
resb, err := f.send(ctx, fq, *rr) resb, err := f.send(ctx, fq, *rr)
if err != nil { if err != nil {
mu.Lock() select {
defer mu.Unlock() case errc <- err:
if firstErr == nil { case <-ctx.Done():
firstErr = err
} }
return return
} }
select { select {
case resc <- resb: case resc <- resb:
default: case <-ctx.Done():
} }
}(&resolvers[i]) }(&resolvers[i])
} }
select { var firstErr error
case v := <-resc: var numErr int
for {
select { select {
case v := <-resc:
select {
case <-ctx.Done():
metricDNSFwdErrorContext.Add(1)
return ctx.Err()
case responseChan <- packet{v, query.addr}:
metricDNSFwdSuccess.Add(1)
return nil
}
case err := <-errc:
if firstErr == nil {
firstErr = err
}
numErr++
if numErr == len(resolvers) {
return firstErr
}
case <-ctx.Done(): case <-ctx.Done():
metricDNSFwdErrorContext.Add(1) metricDNSFwdErrorContext.Add(1)
if firstErr != nil {
metricDNSFwdErrorContextGotError.Add(1)
return firstErr
}
return ctx.Err() return ctx.Err()
case responseChan <- packet{v, query.addr}:
metricDNSFwdSuccess.Add(1)
return nil
} }
case <-ctx.Done():
mu.Lock()
defer mu.Unlock()
metricDNSFwdErrorContext.Add(1)
if firstErr != nil {
metricDNSFwdErrorContextGotError.Add(1)
return firstErr
}
return ctx.Err()
} }
} }