// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package resolver import ( "bytes" "context" "encoding/binary" "errors" "fmt" "hash/crc32" "math/rand" "net" "sync" "syscall" "time" dns "golang.org/x/net/dns/dnsmessage" "inet.af/netaddr" "tailscale.com/logtail/backoff" "tailscale.com/types/logger" "tailscale.com/util/dnsname" ) // headerBytes is the number of bytes in a DNS message header. const headerBytes = 12 // connCount is the number of UDP connections to use for forwarding. const connCount = 32 const ( // cleanupInterval is the interval between purged of timed-out entries from txMap. cleanupInterval = 30 * time.Second // responseTimeout is the maximal amount of time to wait for a DNS response. responseTimeout = 5 * time.Second ) var errNoUpstreams = errors.New("upstream nameservers not set") type forwardingRecord struct { src netaddr.IPPort createdAt time.Time } // txid identifies a DNS transaction. // // As the standard DNS Request ID is only 16 bits, we extend it: // the lower 32 bits are the zero-extended bits of the DNS Request ID; // the upper 32 bits are the CRC32 checksum of the first question in the request. // This makes probability of txid collision negligible. type txid uint64 // getTxID computes the txid of the given DNS packet. func getTxID(packet []byte) txid { if len(packet) < headerBytes { return 0 } dnsid := binary.BigEndian.Uint16(packet[0:2]) qcount := binary.BigEndian.Uint16(packet[4:6]) if qcount == 0 { return txid(dnsid) } offset := headerBytes for i := uint16(0); i < qcount; i++ { // Note: this relies on the fact that names are not compressed in questions, // so they are guaranteed to end with a NUL byte. // // Justification: // RFC 1035 doesn't seem to explicitly prohibit compressing names in questions, // but this is exceedingly unlikely to be done in practice. A DNS request // with multiple questions is ill-defined (which questions do the header flags apply to?) // and a single question would have to contain a pointer to an *answer*, // which would be excessively smart, pointless (an answer can just as well refer to the question) // and perhaps even prohibited: a draft RFC (draft-ietf-dnsind-local-compression-05) states: // // > It is important that these pointers always point backwards. // // This is said in summarizing RFC 1035, although that phrase does not appear in the original RFC. // Additionally, (https://cr.yp.to/djbdns/notes.html) states: // // > The precise rule is that a name can be compressed if it is a response owner name, // > the name in NS data, the name in CNAME data, the name in PTR data, the name in MX data, // > or one of the names in SOA data. namebytes := bytes.IndexByte(packet[offset:], 0) // ... | name | NUL | type | class // ?? 1 2 2 offset = offset + namebytes + 5 if len(packet) < offset { // Corrupt packet; don't crash. return txid(dnsid) } } hash := crc32.ChecksumIEEE(packet[headerBytes:offset]) return (txid(hash) << 32) | txid(dnsid) } type route struct { suffix dnsname.FQDN resolvers []netaddr.IPPort } // forwarder forwards DNS packets to a number of upstream nameservers. type forwarder struct { logf logger.Logf // responses is a channel by which responses are returned. responses chan packet // closed signals all goroutines to stop. closed chan struct{} // wg signals when all goroutines have stopped. wg sync.WaitGroup // conns are the UDP connections used for forwarding. // A random one is selected for each request, regardless of the target upstream. conns []*fwdConn mu sync.Mutex // routes are per-suffix resolvers to use. routes []route // most specific routes first txMap map[txid]forwardingRecord // txids to in-flight requests } func init() { rand.Seed(time.Now().UnixNano()) } func newForwarder(logf logger.Logf, responses chan packet) *forwarder { ret := &forwarder{ logf: logger.WithPrefix(logf, "forward: "), responses: responses, closed: make(chan struct{}), conns: make([]*fwdConn, connCount), txMap: make(map[txid]forwardingRecord), } ret.wg.Add(connCount + 1) for idx := range ret.conns { ret.conns[idx] = newFwdConn(ret.logf, idx) go ret.recv(ret.conns[idx]) } go ret.cleanMap() return ret } func (f *forwarder) Close() { select { case <-f.closed: return default: // continue } close(f.closed) for _, conn := range f.conns { conn.close() } f.wg.Wait() } func (f *forwarder) rebindFromNetworkChange() { for _, c := range f.conns { c.mu.Lock() c.reconnectLocked() c.mu.Unlock() } } func (f *forwarder) setRoutes(routes []route) { f.mu.Lock() f.routes = routes f.mu.Unlock() } // send sends packet to dst. It is best effort. func (f *forwarder) send(packet []byte, dst netaddr.IPPort) { connIdx := rand.Intn(connCount) conn := f.conns[connIdx] conn.send(packet, dst) } func (f *forwarder) recv(conn *fwdConn) { defer f.wg.Done() for { select { case <-f.closed: return default: } // The 1 extra byte is to detect packet truncation. out := make([]byte, maxResponseBytes+1) n := conn.read(out) var truncated bool if n > maxResponseBytes { n = maxResponseBytes truncated = true } if n == 0 { continue } if n < headerBytes { f.logf("recv: packet too small (%d bytes)", n) } out = out[:n] txid := getTxID(out) if truncated { const dnsFlagTruncated = 0x200 flags := binary.BigEndian.Uint16(out[2:4]) flags |= dnsFlagTruncated binary.BigEndian.PutUint16(out[2:4], flags) // TODO(#2067): Remove any incomplete records? RFC 1035 section 6.2 // states that truncation should head drop so that the authority // section can be preserved if possible. However, the UDP read with // a too-small buffer has already dropped the end, so that's the // best we can do. } f.mu.Lock() record, found := f.txMap[txid] // At most one nameserver will return a response: // the first one to do so will delete txid from the map. if !found { f.mu.Unlock() continue } delete(f.txMap, txid) f.mu.Unlock() pkt := packet{out, record.src} select { case <-f.closed: return case f.responses <- pkt: // continue } } } // cleanMap periodically deletes timed-out forwarding records from f.txMap to bound growth. func (f *forwarder) cleanMap() { defer f.wg.Done() t := time.NewTicker(cleanupInterval) defer t.Stop() var now time.Time for { select { case <-f.closed: return case now = <-t.C: // continue } f.mu.Lock() for k, v := range f.txMap { if now.Sub(v.createdAt) > responseTimeout { delete(f.txMap, k) } } f.mu.Unlock() } } // forward forwards the query to all upstream nameservers and returns the first response. func (f *forwarder) forward(query packet) error { domain, err := nameFromQuery(query.bs) if err != nil { return err } txid := getTxID(query.bs) f.mu.Lock() routes := f.routes f.mu.Unlock() var resolvers []netaddr.IPPort for _, route := range routes { if route.suffix != "." && !route.suffix.Contains(domain) { continue } resolvers = route.resolvers break } if len(resolvers) == 0 { return errNoUpstreams } f.mu.Lock() f.txMap[txid] = forwardingRecord{ src: query.addr, createdAt: time.Now(), } f.mu.Unlock() // TODO(#2066): EDNS size clamping for _, resolver := range resolvers { f.send(query.bs, resolver) } return nil } // A fwdConn manages a single connection used to forward DNS requests. // Net link changes can cause a *net.UDPConn to become permanently unusable, particularly on macOS. // fwdConn detects such situations and transparently creates new connections. type fwdConn struct { // logf allows a fwdConn to log. logf logger.Logf // change allows calls to read to block until a the network connection has been replaced. change *sync.Cond // mu protects fields that follow it; it is also change's Locker. mu sync.Mutex // closed tracks whether fwdConn has been permanently closed. closed bool // conn is the current active connection. conn net.PacketConn } func newFwdConn(logf logger.Logf, idx int) *fwdConn { c := new(fwdConn) c.logf = logger.WithPrefix(logf, fmt.Sprintf("fwdConn %d: ", idx)) c.change = sync.NewCond(&c.mu) // c.conn is created lazily in send return c } // send sends packet to dst using c's connection. // It is best effort. It is UDP, after all. Failures are logged. func (c *fwdConn) send(packet []byte, dst netaddr.IPPort) { var b *backoff.Backoff // lazily initialized, since it is not needed in the common case backOff := func(err error) { if b == nil { b = backoff.NewBackoff("dns-fwdConn-send", c.logf, 30*time.Second) } b.BackOff(context.Background(), err) } for { // Gather the current connection. // We can't hold the lock while we call WriteTo. c.mu.Lock() conn := c.conn closed := c.closed if closed { c.mu.Unlock() return } if conn == nil { c.reconnectLocked() c.mu.Unlock() continue } c.mu.Unlock() _, err := conn.WriteTo(packet, dst.UDPAddr()) if err == nil { // Success return } if errors.Is(err, net.ErrClosed) { // We intentionally closed this connection. // It has been replaced by a new connection. Try again. continue } // Something else went wrong. // We have three choices here: try again, give up, or create a new connection. var opErr *net.OpError if !errors.As(err, &opErr) { // Weird. All errors from the net package should be *net.OpError. Bail. c.logf("send: non-*net.OpErr %v (%T)", err, err) return } if opErr.Temporary() || opErr.Timeout() { // I doubt that either of these can happen (this is UDP), // but go ahead and try again. backOff(err) continue } if errors.Is(err, syscall.EHOSTUNREACH) { // "No route to host." The network stack is fine, but // can't talk to this destination. Not much we can do // about that, don't spam logs. return } if networkIsDown(err) { // Fail. c.logf("send: network is down") return } if networkIsUnreachable(err) { // This can be caused by a link change. // Replace the existing connection with a new one. c.mu.Lock() // It's possible that multiple senders discovered simultaneously // that the network is unreachable. Avoid reconnecting multiple times: // Only reconnect if the current connection is the one that we // discovered to be problematic. if c.conn == conn { backOff(err) c.reconnectLocked() } c.mu.Unlock() // Try again with our new network connection. continue } // Unrecognized error. Fail. c.logf("send: unrecognized error: %v", err) return } } // read waits for a response from c's connection. // It returns the number of bytes read, which may be 0 // in case of an error or a closed connection. func (c *fwdConn) read(out []byte) int { for { // Gather the current connection. // We can't hold the lock while we call ReadFrom. c.mu.Lock() conn := c.conn closed := c.closed if closed { c.mu.Unlock() return 0 } if conn == nil { // There is no current connection. // Wait for the connection to change, then try again. c.change.Wait() c.mu.Unlock() continue } c.mu.Unlock() n, _, err := conn.ReadFrom(out) if err == nil || packetWasTruncated(err) { // Success. return n } if errors.Is(err, net.ErrClosed) { // We intentionally closed this connection. // It has been replaced by a new connection. Try again. continue } c.logf("read: unrecognized error: %v", err) return 0 } } // reconnectLocked replaces the current connection with a new one. // c.mu must be locked. func (c *fwdConn) reconnectLocked() { c.closeConnLocked() // Make a new connection. conn, err := net.ListenPacket("udp", "") if err != nil { c.logf("ListenPacket failed: %v", err) } else { c.conn = conn } // Broadcast that a new connection is available. c.change.Broadcast() } // closeCurrentConn closes the current connection. // c.mu must be locked. func (c *fwdConn) closeConnLocked() { if c.conn == nil { return } c.conn.Close() // unblocks all readers/writers, they'll pick up the next connection. c.conn = nil } // close permanently closes c. func (c *fwdConn) close() { c.mu.Lock() defer c.mu.Unlock() if c.closed { return } c.closed = true c.closeConnLocked() // Unblock any remaining readers. c.change.Broadcast() } // nameFromQuery extracts the normalized query name from bs. func nameFromQuery(bs []byte) (dnsname.FQDN, error) { var parser dns.Parser hdr, err := parser.Start(bs) if err != nil { return "", err } if hdr.Response { return "", errNotQuery } q, err := parser.Question() if err != nil { return "", err } n := q.Name.Data[:q.Name.Length] return dnsname.ToFQDN(rawNameToLower(n)) }