diff --git a/net/tstun/wrap.go b/net/tstun/wrap.go index 5de7bd215..168092711 100644 --- a/net/tstun/wrap.go +++ b/net/tstun/wrap.go @@ -692,6 +692,27 @@ func (t *Wrapper) SetFilter(filt *filter.Filter) { t.filter.Store(filt) } +// InjectInboundDirect makes the Wrapper device behave as if a packet +// with the given contents was received from the network. +// It takes ownership of one reference count on the packet. The injected +// packet will not pass through inbound filters. +// +// This path is typically used to deliver synthesized packets to the +// host networking stack. +func (t *Wrapper) InjectInboundPacketBuffer(pkt *stack.PacketBuffer) error { + buf := make([]byte, PacketStartOffset + pkt.Size()) + + n := copy(buf[PacketStartOffset:], pkt.NetworkHeader().View()) + n += copy(buf[PacketStartOffset+n:], pkt.TransportHeader().View()) + n += copy(buf[PacketStartOffset+n:], pkt.Data().AsRange().AsView()) + if n != pkt.Size() { + panic("unexpected: revisit assumptions") + } + pkt.DecRef() + + return t.InjectInboundDirect(buf, PacketStartOffset) +} + // InjectInboundDirect makes the Wrapper device behave as if a packet // with the given contents was received from the network. // It blocks and does not take ownership of the packet. diff --git a/tsnet/tsnet.go b/tsnet/tsnet.go index 0b0a51971..f000825f2 100644 --- a/tsnet/tsnet.go +++ b/tsnet/tsnet.go @@ -196,12 +196,12 @@ func (s *Server) start() error { return err } - tunDev, magicConn, ok := eng.(wgengine.InternalsGetter).GetInternals() + tunDev, magicConn, d, ok := eng.(wgengine.InternalsGetter).GetInternals() if !ok { return fmt.Errorf("%T is not a wgengine.InternalsGetter", eng) } - ns, err := netstack.Create(logf, tunDev, eng, magicConn, s.dialer) + ns, err := netstack.Create(logf, tunDev, eng, magicConn, s.dialer, d) if err != nil { return fmt.Errorf("netstack.Create: %w", err) } diff --git a/wgengine/netstack/netstack.go b/wgengine/netstack/netstack.go index 9e229bfe8..fa56d3c71 100644 --- a/wgengine/netstack/netstack.go +++ b/wgengine/netstack/netstack.go @@ -56,6 +56,11 @@ var debugNetstack = envknob.Bool("TS_DEBUG_NETSTACK") +var ( + magicDNSIP = tsaddr.TailscaleServiceIP() + magicDNSIPv6 = tsaddr.TailscaleServiceIPv6() +) + func init() { var debugNetstackLeakMode = envknob.String("TS_DEBUG_NETSTACK_LEAK_MODE") // Note: netstacks refsvfs2 package that will eventually replace refs @@ -229,8 +234,9 @@ func (ns *Impl) Start() error { udpFwd := udp.NewForwarder(ns.ipstack, ns.acceptUDP) ns.ipstack.SetTransportProtocolHandler(tcp.ProtocolNumber, ns.wrapProtoHandler(tcpFwd.HandlePacket)) ns.ipstack.SetTransportProtocolHandler(udp.ProtocolNumber, ns.wrapProtoHandler(udpFwd.HandlePacket)) - go ns.injectOutbound() + go ns.inject() ns.tundev.PostFilterIn = ns.injectInbound + ns.tundev.PreFilterFromTunToNetstack = ns.handleLocalPackets return nil } @@ -358,6 +364,35 @@ func (ns *Impl) updateIPs(nm *netmap.NetworkMap) { } } +// handleLocalPackets is hooked into the tun datapath for packets leaving +// the host and arriving at tailscaled. This method returns filter.DropSilently +// to intercept a packet for handling, for instance traffic to quad-100. +func (ns *Impl) handleLocalPackets(p *packet.Parsed, t *tstun.Wrapper) filter.Response { + // If it's not traffic to the service IP (i.e. magicDNS) we don't + // care; resume processing. + if dst := p.Dst.IP(); dst != magicDNSIP && dst != magicDNSIPv6 { + return filter.Accept + } + + var pn tcpip.NetworkProtocolNumber + switch p.IPVersion { + case 4: + pn = header.IPv4ProtocolNumber + case 6: + pn = header.IPv6ProtocolNumber + } + if debugPackets { + ns.logf("[v2] service packet in (from %v): % x", p.Src, p.Buffer()) + } + vv := buffer.View(append([]byte(nil), p.Buffer()...)).ToVectorisedView() + packetBuf := stack.NewPacketBuffer(stack.PacketBufferOptions{ + Data: vv, + }) + ns.linkEP.InjectInbound(pn, packetBuf) + packetBuf.DecRef() + return filter.DropSilently +} + func (ns *Impl) DialContextTCP(ctx context.Context, ipp netaddr.IPPort) (*gonet.TCPConn, error) { remoteAddress := tcpip.FullAddress{ NIC: nicID, @@ -390,7 +425,9 @@ func (ns *Impl) DialContextUDP(ctx context.Context, ipp netaddr.IPPort) (*gonet. return gonet.DialUDP(ns.ipstack, nil, remoteAddress, ipType) } -func (ns *Impl) injectOutbound() { +// The inject goroutine reads in packets that netstack generated, and delivers +// them to the correct path. +func (ns *Impl) inject() { for { pkt := ns.linkEP.ReadContext(ns.ctx) if pkt == nil { @@ -406,13 +443,50 @@ func (ns *Impl) injectOutbound() { ns.logf("[v2] packet Write out: % x", stack.PayloadSince(pkt.NetworkHeader())) } - // pkt has a non-zero refcount, InjectOutboundPacketBuffer takes - // ownership of one count and will decrement on completion. - if err := ns.tundev.InjectOutboundPacketBuffer(pkt); err != nil { - log.Printf("netstack inject outbound: %v", err) - return + // In the normal case, netstack synthesizes the bytes for + // traffic which should transit back into WG and go to peers. + // However, some uses of netstack (presently, magic DNS) + // send traffic destined for the local device, hence must + // be injected 'inbound'. + sendToHost := false + + // Determine if the packet is from a service IP, in which case it + // needs to go back into the machines network (inbound) instead of + // out. + // TODO(tom): Work out a way to avoid parsing packets to determine if + // its from the service IP. Maybe gvisor netstack magic. I + // went through the fields of PacketBuffer, and nop :/ + // TODO(tom): Figure out if its safe to modify packet.Parsed to fill in + // the IP src/dest even if its missing the rest of the pkt. + // That way we dont have to do this twitchy-af byte-yeeting. + if b := pkt.NetworkHeader().View(); len(b) >= 20 { // min ipv4 header + switch b[0] >> 4 { // ip proto field + case 4: + if srcIP := netaddr.IPv4(b[12], b[13], b[14], b[15]); magicDNSIP == srcIP { + sendToHost = true + } + case 6: + if len(b) >= 40 { // min ipv6 header + if srcIP, ok := netaddr.FromStdIP(net.IP(b[8:24])); ok && magicDNSIPv6 == srcIP { + sendToHost = true + } + } + } } + // pkt has a non-zero refcount, so injection methods takes + // ownership of one count and will decrement on completion. + if sendToHost { + if err := ns.tundev.InjectInboundPacketBuffer(pkt); err != nil { + log.Printf("netstack inject inbound: %v", err) + return + } + } else { + if err := ns.tundev.InjectOutboundPacketBuffer(pkt); err != nil { + log.Printf("netstack inject outbound: %v", err) + return + } + } } } @@ -436,8 +510,8 @@ func (ns *Impl) peerAPIPortAtomic(ip netaddr.IP) *uint32 { var viaRange = tsaddr.TailscaleViaRange() -// shouldProcessInbound reports whether an inbound packet should be -// handled by netstack. +// shouldProcessInbound reports whether an inbound packet (a packet from a +// WireGuard peer) should be handled by netstack. func (ns *Impl) shouldProcessInbound(p *packet.Parsed, t *tstun.Wrapper) bool { // Handle incoming peerapi connections in netstack. if ns.lb != nil && p.IPProto == ipproto.TCP { @@ -558,6 +632,11 @@ func (ns *Impl) isInboundTSSH(p *packet.Parsed) bool { ns.isLocalIP(p.Dst.IP()) } +// injectInbound is installed as a packet hook on the 'inbound' (from a +// WireGuard peer) path. Returning filter.Accept releases the packet to +// continue normally (typically being delivered to the host networking stack), +// whereas returning filter.DropSilently is done when netstack intercepts the +// packet and no further processing towards to host should be done. func (ns *Impl) injectInbound(p *packet.Parsed, t *tstun.Wrapper) filter.Response { if !ns.shouldProcessInbound(p, t) { // Let the host network stack (if any) deal with it. @@ -779,10 +858,41 @@ func (ns *Impl) acceptUDP(r *udp.ForwarderRequest) { return } + // Handle magicDNS traffic (via UDP) here. + if dst := dstAddr.IP(); dst == magicDNSIP || dst == magicDNSIPv6 { + if dstAddr.Port() != 53 { + return // Only MagicDNS traffic runs on the service IPs for now. + } + + c := gonet.NewUDPConn(ns.ipstack, &wq, ep) + go ns.handleMagicDNSUDP(srcAddr, c) + return + } + c := gonet.NewUDPConn(ns.ipstack, &wq, ep) go ns.forwardUDP(c, &wq, srcAddr, dstAddr) } +func (ns *Impl) handleMagicDNSUDP(srcAddr netaddr.IPPort, c *gonet.UDPConn) { + // In practice, implementations are advised not to exceed 512 bytes + // due to fragmenting. Just to be sure, we bump all the way to the MTU. + const maxUDPReqSize = mtu + + defer c.Close() + q := make([]byte, maxUDPReqSize) + n, err := c.Read(q) + if err != nil { + ns.logf("dns udp read: %v", err) + return + } + resp, err := ns.dns.Query(context.Background(), q[:n], srcAddr) + if err != nil { + ns.logf("dns udp query: %v", err) + return + } + c.Write(resp) +} + // forwardUDP proxies between client (with addr clientAddr) and dstAddr. // // dstAddr may be either a local Tailscale IP, in which we case we proxy to diff --git a/wgengine/netstack/netstack_test.go b/wgengine/netstack/netstack_test.go index a21d832c6..b5aac19d5 100644 --- a/wgengine/netstack/netstack_test.go +++ b/wgengine/netstack/netstack_test.go @@ -39,12 +39,12 @@ func TestInjectInboundLeak(t *testing.T) { if !ok { t.Fatal("not an InternalsGetter") } - tunWrap, magicSock, ok := ig.GetInternals() + tunWrap, magicSock, d, ok := ig.GetInternals() if !ok { t.Fatal("failed to get internals") } - ns, err := Create(logf, tunWrap, eng, magicSock, dialer) + ns, err := Create(logf, tunWrap, eng, magicSock, dialer, d) if err != nil { t.Fatal(err) } diff --git a/wgengine/userspace.go b/wgengine/userspace.go index d7f67d4d3..f732f766b 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -460,9 +460,16 @@ func echoRespondToAll(p *packet.Parsed, t *tstun.Wrapper) filter.Response { // tailscaled directly. Other packets are allowed to proceed into the // main ACL filter. func (e *userspaceEngine) handleLocalPackets(p *packet.Parsed, t *tstun.Wrapper) filter.Response { - if verdict := e.handleDNS(p, t); verdict == filter.Drop { + // Handle traffic to the service IP. + // TODO(tom): Netstack handles this when it is installed. Rip all + // this out once netstack is used on all platforms. + switch p.Dst.IP() { + case magicDNSIP, magicDNSIPv6: + err := e.dns.EnqueuePacket(append([]byte(nil), p.Payload()...), p.IPProto, p.Src, p.Dst) + if err != nil { + e.logf("dns: enqueue: %v", err) + } metricMagicDNSPacketIn.Add(1) - // local DNS handled the packet. return filter.Drop } @@ -485,21 +492,10 @@ func (e *userspaceEngine) handleLocalPackets(p *packet.Parsed, t *tstun.Wrapper) return filter.Accept } -// handleDNS is an outbound pre-filter resolving Tailscale domains. -func (e *userspaceEngine) handleDNS(p *packet.Parsed, t *tstun.Wrapper) filter.Response { - switch p.Dst.IP() { - case magicDNSIP, magicDNSIPv6: - err := e.dns.EnqueuePacket(append([]byte(nil), p.Payload()...), p.IPProto, p.Src, p.Dst) - if err != nil { - e.logf("dns: enqueue: %v", err) - } - return filter.Drop - default: - return filter.Accept - } -} - // pollResolver reads packets from the DNS resolver and injects them inbound. +// +// TODO(tom): Remove this fallback path (via NextPacket()) once +// all platforms use netstack. func (e *userspaceEngine) pollResolver() { for { bs, err := e.dns.NextPacket()