mirror of
https://github.com/tailscale/tailscale.git
synced 2024-12-02 14:35:39 +00:00
8b11937eaf
This raises the maximum DNS response message size from 512 to 4095. This should be large enough for almost all situations that do not need TCP. We still do not recognize EDNS, so we will still forward requests that claim support for a larger response size than 4095 (that will be solved later). For now, when a response comes back that is too large to fit in our receive buffer, we now set the truncation flag in the DNS header, which is an improvement from before but will prompt attempts to use TCP which isn't supported yet. On Windows, WSARecvFrom into a buffer that's too small returns an error in addition to the data. On other OSes, the extra data is silently discarded. In this case, we prefer the latter so need to catch the error on Windows. Partially addresses #1123 Signed-off-by: Adrian Dewhurst <adrian@tailscale.com>
526 lines
13 KiB
Go
526 lines
13 KiB
Go
// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package resolver
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/binary"
|
|
"errors"
|
|
"fmt"
|
|
"hash/crc32"
|
|
"math/rand"
|
|
"net"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
|
|
dns "golang.org/x/net/dns/dnsmessage"
|
|
"inet.af/netaddr"
|
|
"tailscale.com/logtail/backoff"
|
|
"tailscale.com/types/logger"
|
|
"tailscale.com/util/dnsname"
|
|
)
|
|
|
|
// headerBytes is the number of bytes in a DNS message header.
|
|
const headerBytes = 12
|
|
|
|
// connCount is the number of UDP connections to use for forwarding.
|
|
const connCount = 32
|
|
|
|
const (
|
|
// cleanupInterval is the interval between purged of timed-out entries from txMap.
|
|
cleanupInterval = 30 * time.Second
|
|
// responseTimeout is the maximal amount of time to wait for a DNS response.
|
|
responseTimeout = 5 * time.Second
|
|
)
|
|
|
|
var errNoUpstreams = errors.New("upstream nameservers not set")
|
|
|
|
type forwardingRecord struct {
|
|
src netaddr.IPPort
|
|
createdAt time.Time
|
|
}
|
|
|
|
// txid identifies a DNS transaction.
|
|
//
|
|
// As the standard DNS Request ID is only 16 bits, we extend it:
|
|
// the lower 32 bits are the zero-extended bits of the DNS Request ID;
|
|
// the upper 32 bits are the CRC32 checksum of the first question in the request.
|
|
// This makes probability of txid collision negligible.
|
|
type txid uint64
|
|
|
|
// getTxID computes the txid of the given DNS packet.
|
|
func getTxID(packet []byte) txid {
|
|
if len(packet) < headerBytes {
|
|
return 0
|
|
}
|
|
|
|
dnsid := binary.BigEndian.Uint16(packet[0:2])
|
|
qcount := binary.BigEndian.Uint16(packet[4:6])
|
|
if qcount == 0 {
|
|
return txid(dnsid)
|
|
}
|
|
|
|
offset := headerBytes
|
|
for i := uint16(0); i < qcount; i++ {
|
|
// Note: this relies on the fact that names are not compressed in questions,
|
|
// so they are guaranteed to end with a NUL byte.
|
|
//
|
|
// Justification:
|
|
// RFC 1035 doesn't seem to explicitly prohibit compressing names in questions,
|
|
// but this is exceedingly unlikely to be done in practice. A DNS request
|
|
// with multiple questions is ill-defined (which questions do the header flags apply to?)
|
|
// and a single question would have to contain a pointer to an *answer*,
|
|
// which would be excessively smart, pointless (an answer can just as well refer to the question)
|
|
// and perhaps even prohibited: a draft RFC (draft-ietf-dnsind-local-compression-05) states:
|
|
//
|
|
// > It is important that these pointers always point backwards.
|
|
//
|
|
// This is said in summarizing RFC 1035, although that phrase does not appear in the original RFC.
|
|
// Additionally, (https://cr.yp.to/djbdns/notes.html) states:
|
|
//
|
|
// > The precise rule is that a name can be compressed if it is a response owner name,
|
|
// > the name in NS data, the name in CNAME data, the name in PTR data, the name in MX data,
|
|
// > or one of the names in SOA data.
|
|
namebytes := bytes.IndexByte(packet[offset:], 0)
|
|
// ... | name | NUL | type | class
|
|
// ?? 1 2 2
|
|
offset = offset + namebytes + 5
|
|
if len(packet) < offset {
|
|
// Corrupt packet; don't crash.
|
|
return txid(dnsid)
|
|
}
|
|
}
|
|
|
|
hash := crc32.ChecksumIEEE(packet[headerBytes:offset])
|
|
return (txid(hash) << 32) | txid(dnsid)
|
|
}
|
|
|
|
type route struct {
|
|
suffix dnsname.FQDN
|
|
resolvers []netaddr.IPPort
|
|
}
|
|
|
|
// forwarder forwards DNS packets to a number of upstream nameservers.
|
|
type forwarder struct {
|
|
logf logger.Logf
|
|
|
|
// responses is a channel by which responses are returned.
|
|
responses chan packet
|
|
// closed signals all goroutines to stop.
|
|
closed chan struct{}
|
|
// wg signals when all goroutines have stopped.
|
|
wg sync.WaitGroup
|
|
|
|
// conns are the UDP connections used for forwarding.
|
|
// A random one is selected for each request, regardless of the target upstream.
|
|
conns []*fwdConn
|
|
|
|
mu sync.Mutex
|
|
// routes are per-suffix resolvers to use.
|
|
routes []route // most specific routes first
|
|
txMap map[txid]forwardingRecord // txids to in-flight requests
|
|
}
|
|
|
|
func init() {
|
|
rand.Seed(time.Now().UnixNano())
|
|
}
|
|
|
|
func newForwarder(logf logger.Logf, responses chan packet) *forwarder {
|
|
ret := &forwarder{
|
|
logf: logger.WithPrefix(logf, "forward: "),
|
|
responses: responses,
|
|
closed: make(chan struct{}),
|
|
conns: make([]*fwdConn, connCount),
|
|
txMap: make(map[txid]forwardingRecord),
|
|
}
|
|
|
|
ret.wg.Add(connCount + 1)
|
|
for idx := range ret.conns {
|
|
ret.conns[idx] = newFwdConn(ret.logf, idx)
|
|
go ret.recv(ret.conns[idx])
|
|
}
|
|
go ret.cleanMap()
|
|
|
|
return ret
|
|
}
|
|
|
|
func (f *forwarder) Close() {
|
|
select {
|
|
case <-f.closed:
|
|
return
|
|
default:
|
|
// continue
|
|
}
|
|
close(f.closed)
|
|
|
|
for _, conn := range f.conns {
|
|
conn.close()
|
|
}
|
|
|
|
f.wg.Wait()
|
|
}
|
|
|
|
func (f *forwarder) rebindFromNetworkChange() {
|
|
for _, c := range f.conns {
|
|
c.mu.Lock()
|
|
c.reconnectLocked()
|
|
c.mu.Unlock()
|
|
}
|
|
}
|
|
|
|
func (f *forwarder) setRoutes(routes []route) {
|
|
f.mu.Lock()
|
|
f.routes = routes
|
|
f.mu.Unlock()
|
|
}
|
|
|
|
// send sends packet to dst. It is best effort.
|
|
func (f *forwarder) send(packet []byte, dst netaddr.IPPort) {
|
|
connIdx := rand.Intn(connCount)
|
|
conn := f.conns[connIdx]
|
|
conn.send(packet, dst)
|
|
}
|
|
|
|
func (f *forwarder) recv(conn *fwdConn) {
|
|
defer f.wg.Done()
|
|
|
|
for {
|
|
select {
|
|
case <-f.closed:
|
|
return
|
|
default:
|
|
}
|
|
// The 1 extra byte is to detect packet truncation.
|
|
out := make([]byte, maxResponseBytes+1)
|
|
n := conn.read(out)
|
|
var truncated bool
|
|
if n > maxResponseBytes {
|
|
n = maxResponseBytes
|
|
truncated = true
|
|
}
|
|
if n == 0 {
|
|
continue
|
|
}
|
|
if n < headerBytes {
|
|
f.logf("recv: packet too small (%d bytes)", n)
|
|
}
|
|
|
|
out = out[:n]
|
|
txid := getTxID(out)
|
|
|
|
if truncated {
|
|
const dnsFlagTruncated = 0x200
|
|
flags := binary.BigEndian.Uint16(out[2:4])
|
|
flags |= dnsFlagTruncated
|
|
binary.BigEndian.PutUint16(out[2:4], flags)
|
|
|
|
// TODO(#2067): Remove any incomplete records? RFC 1035 section 6.2
|
|
// states that truncation should head drop so that the authority
|
|
// section can be preserved if possible. However, the UDP read with
|
|
// a too-small buffer has already dropped the end, so that's the
|
|
// best we can do.
|
|
}
|
|
|
|
f.mu.Lock()
|
|
|
|
record, found := f.txMap[txid]
|
|
// At most one nameserver will return a response:
|
|
// the first one to do so will delete txid from the map.
|
|
if !found {
|
|
f.mu.Unlock()
|
|
continue
|
|
}
|
|
delete(f.txMap, txid)
|
|
|
|
f.mu.Unlock()
|
|
|
|
pkt := packet{out, record.src}
|
|
select {
|
|
case <-f.closed:
|
|
return
|
|
case f.responses <- pkt:
|
|
// continue
|
|
}
|
|
}
|
|
}
|
|
|
|
// cleanMap periodically deletes timed-out forwarding records from f.txMap to bound growth.
|
|
func (f *forwarder) cleanMap() {
|
|
defer f.wg.Done()
|
|
|
|
t := time.NewTicker(cleanupInterval)
|
|
defer t.Stop()
|
|
|
|
var now time.Time
|
|
for {
|
|
select {
|
|
case <-f.closed:
|
|
return
|
|
case now = <-t.C:
|
|
// continue
|
|
}
|
|
|
|
f.mu.Lock()
|
|
for k, v := range f.txMap {
|
|
if now.Sub(v.createdAt) > responseTimeout {
|
|
delete(f.txMap, k)
|
|
}
|
|
}
|
|
f.mu.Unlock()
|
|
}
|
|
}
|
|
|
|
// forward forwards the query to all upstream nameservers and returns the first response.
|
|
func (f *forwarder) forward(query packet) error {
|
|
domain, err := nameFromQuery(query.bs)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
txid := getTxID(query.bs)
|
|
|
|
f.mu.Lock()
|
|
routes := f.routes
|
|
f.mu.Unlock()
|
|
|
|
var resolvers []netaddr.IPPort
|
|
for _, route := range routes {
|
|
if route.suffix != "." && !route.suffix.Contains(domain) {
|
|
continue
|
|
}
|
|
resolvers = route.resolvers
|
|
break
|
|
}
|
|
if len(resolvers) == 0 {
|
|
return errNoUpstreams
|
|
}
|
|
|
|
f.mu.Lock()
|
|
f.txMap[txid] = forwardingRecord{
|
|
src: query.addr,
|
|
createdAt: time.Now(),
|
|
}
|
|
f.mu.Unlock()
|
|
|
|
// TODO(#2066): EDNS size clamping
|
|
|
|
for _, resolver := range resolvers {
|
|
f.send(query.bs, resolver)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// A fwdConn manages a single connection used to forward DNS requests.
|
|
// Net link changes can cause a *net.UDPConn to become permanently unusable, particularly on macOS.
|
|
// fwdConn detects such situations and transparently creates new connections.
|
|
type fwdConn struct {
|
|
// logf allows a fwdConn to log.
|
|
logf logger.Logf
|
|
|
|
// change allows calls to read to block until a the network connection has been replaced.
|
|
change *sync.Cond
|
|
|
|
// mu protects fields that follow it; it is also change's Locker.
|
|
mu sync.Mutex
|
|
// closed tracks whether fwdConn has been permanently closed.
|
|
closed bool
|
|
// conn is the current active connection.
|
|
conn net.PacketConn
|
|
}
|
|
|
|
func newFwdConn(logf logger.Logf, idx int) *fwdConn {
|
|
c := new(fwdConn)
|
|
c.logf = logger.WithPrefix(logf, fmt.Sprintf("fwdConn %d: ", idx))
|
|
c.change = sync.NewCond(&c.mu)
|
|
// c.conn is created lazily in send
|
|
return c
|
|
}
|
|
|
|
// send sends packet to dst using c's connection.
|
|
// It is best effort. It is UDP, after all. Failures are logged.
|
|
func (c *fwdConn) send(packet []byte, dst netaddr.IPPort) {
|
|
var b *backoff.Backoff // lazily initialized, since it is not needed in the common case
|
|
backOff := func(err error) {
|
|
if b == nil {
|
|
b = backoff.NewBackoff("dns-fwdConn-send", c.logf, 30*time.Second)
|
|
}
|
|
b.BackOff(context.Background(), err)
|
|
}
|
|
|
|
for {
|
|
// Gather the current connection.
|
|
// We can't hold the lock while we call WriteTo.
|
|
c.mu.Lock()
|
|
conn := c.conn
|
|
closed := c.closed
|
|
if closed {
|
|
c.mu.Unlock()
|
|
return
|
|
}
|
|
if conn == nil {
|
|
c.reconnectLocked()
|
|
c.mu.Unlock()
|
|
continue
|
|
}
|
|
c.mu.Unlock()
|
|
|
|
_, err := conn.WriteTo(packet, dst.UDPAddr())
|
|
if err == nil {
|
|
// Success
|
|
return
|
|
}
|
|
if errors.Is(err, net.ErrClosed) {
|
|
// We intentionally closed this connection.
|
|
// It has been replaced by a new connection. Try again.
|
|
continue
|
|
}
|
|
// Something else went wrong.
|
|
// We have three choices here: try again, give up, or create a new connection.
|
|
var opErr *net.OpError
|
|
if !errors.As(err, &opErr) {
|
|
// Weird. All errors from the net package should be *net.OpError. Bail.
|
|
c.logf("send: non-*net.OpErr %v (%T)", err, err)
|
|
return
|
|
}
|
|
if opErr.Temporary() || opErr.Timeout() {
|
|
// I doubt that either of these can happen (this is UDP),
|
|
// but go ahead and try again.
|
|
backOff(err)
|
|
continue
|
|
}
|
|
if errors.Is(err, syscall.EHOSTUNREACH) {
|
|
// "No route to host." The network stack is fine, but
|
|
// can't talk to this destination. Not much we can do
|
|
// about that, don't spam logs.
|
|
return
|
|
}
|
|
if networkIsDown(err) {
|
|
// Fail.
|
|
c.logf("send: network is down")
|
|
return
|
|
}
|
|
if networkIsUnreachable(err) {
|
|
// This can be caused by a link change.
|
|
// Replace the existing connection with a new one.
|
|
c.mu.Lock()
|
|
// It's possible that multiple senders discovered simultaneously
|
|
// that the network is unreachable. Avoid reconnecting multiple times:
|
|
// Only reconnect if the current connection is the one that we
|
|
// discovered to be problematic.
|
|
if c.conn == conn {
|
|
backOff(err)
|
|
c.reconnectLocked()
|
|
}
|
|
c.mu.Unlock()
|
|
// Try again with our new network connection.
|
|
continue
|
|
}
|
|
// Unrecognized error. Fail.
|
|
c.logf("send: unrecognized error: %v", err)
|
|
return
|
|
}
|
|
}
|
|
|
|
// read waits for a response from c's connection.
|
|
// It returns the number of bytes read, which may be 0
|
|
// in case of an error or a closed connection.
|
|
func (c *fwdConn) read(out []byte) int {
|
|
for {
|
|
// Gather the current connection.
|
|
// We can't hold the lock while we call ReadFrom.
|
|
c.mu.Lock()
|
|
conn := c.conn
|
|
closed := c.closed
|
|
if closed {
|
|
c.mu.Unlock()
|
|
return 0
|
|
}
|
|
if conn == nil {
|
|
// There is no current connection.
|
|
// Wait for the connection to change, then try again.
|
|
c.change.Wait()
|
|
c.mu.Unlock()
|
|
continue
|
|
}
|
|
c.mu.Unlock()
|
|
|
|
n, _, err := conn.ReadFrom(out)
|
|
if err == nil || packetWasTruncated(err) {
|
|
// Success.
|
|
return n
|
|
}
|
|
if errors.Is(err, net.ErrClosed) {
|
|
// We intentionally closed this connection.
|
|
// It has been replaced by a new connection. Try again.
|
|
continue
|
|
}
|
|
|
|
c.logf("read: unrecognized error: %v", err)
|
|
return 0
|
|
}
|
|
}
|
|
|
|
// reconnectLocked replaces the current connection with a new one.
|
|
// c.mu must be locked.
|
|
func (c *fwdConn) reconnectLocked() {
|
|
c.closeConnLocked()
|
|
// Make a new connection.
|
|
conn, err := net.ListenPacket("udp", "")
|
|
if err != nil {
|
|
c.logf("ListenPacket failed: %v", err)
|
|
} else {
|
|
c.conn = conn
|
|
}
|
|
// Broadcast that a new connection is available.
|
|
c.change.Broadcast()
|
|
}
|
|
|
|
// closeCurrentConn closes the current connection.
|
|
// c.mu must be locked.
|
|
func (c *fwdConn) closeConnLocked() {
|
|
if c.conn == nil {
|
|
return
|
|
}
|
|
c.conn.Close() // unblocks all readers/writers, they'll pick up the next connection.
|
|
c.conn = nil
|
|
}
|
|
|
|
// close permanently closes c.
|
|
func (c *fwdConn) close() {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
if c.closed {
|
|
return
|
|
}
|
|
c.closed = true
|
|
c.closeConnLocked()
|
|
// Unblock any remaining readers.
|
|
c.change.Broadcast()
|
|
}
|
|
|
|
// nameFromQuery extracts the normalized query name from bs.
|
|
func nameFromQuery(bs []byte) (dnsname.FQDN, error) {
|
|
var parser dns.Parser
|
|
|
|
hdr, err := parser.Start(bs)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if hdr.Response {
|
|
return "", errNotQuery
|
|
}
|
|
|
|
q, err := parser.Question()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
n := q.Name.Data[:q.Name.Length]
|
|
return dnsname.ToFQDN(rawNameToLower(n))
|
|
}
|