tailscale/cmd/derper/mesh.go
Brad Fitzpatrick 3d7fb6c21d derp/derphttp: fix race in mesh watcher
The derphttp client automatically reconnects upon failure.

RunWatchConnectionLoop called derphttp.Client.WatchConnectionChanges
once, but that wrapper method called the underlying
derp.Client.WatchConnectionChanges exactly once on derphttp.Client's
currently active connection. If there's a failure, we need to re-subscribe
upon all reconnections.

This removes the derphttp.Client.WatchConnectionChanges method, which
was basically impossible to use correctly, and changes it to be a
boolean field on derphttp.Client alongside MeshKey and IsProber. Then
it moves the call to the underlying derp.Client.WatchConnectionChanges
to derphttp's client connection code, so it's resubscribed on any
reconnect.

Some paranoia is then added to make sure people hold the API right,
not calling derphttp.Client.RunWatchConnectionLoop on an
already-started Client without having set the bool to true. (But still
auto-setting it to true if that's the first method that's been called
on that derphttp.Client, as is commonly the case, and prevents
existing code from breaking)

Fixes tailscale/corp#9916
Supercedes tailscale/tailscale#9719

Co-authored-by: Val <valerie@tailscale.com>
Co-authored-by: Irbe Krumina <irbe@tailscale.com>
Co-authored-by: Anton Tolchanov <anton@tailscale.com>
Signed-off-by: Brad Fitzpatrick <brad@danga.com>
2023-10-31 19:24:25 +00:00

77 lines
2.0 KiB
Go

// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package main
import (
"context"
"errors"
"fmt"
"log"
"net"
"net/netip"
"strings"
"time"
"tailscale.com/derp"
"tailscale.com/derp/derphttp"
"tailscale.com/types/key"
"tailscale.com/types/logger"
)
func startMesh(s *derp.Server) error {
if *meshWith == "" {
return nil
}
if !s.HasMeshKey() {
return errors.New("--mesh-with requires --mesh-psk-file")
}
for _, host := range strings.Split(*meshWith, ",") {
if err := startMeshWithHost(s, host); err != nil {
return err
}
}
return nil
}
func startMeshWithHost(s *derp.Server, host string) error {
logf := logger.WithPrefix(log.Printf, fmt.Sprintf("mesh(%q): ", host))
c, err := derphttp.NewClient(s.PrivateKey(), "https://"+host+"/derp", logf)
if err != nil {
return err
}
c.MeshKey = s.MeshKey()
c.WatchConnectionChanges = true
// For meshed peers within a region, connect via VPC addresses.
c.SetURLDialer(func(ctx context.Context, network, addr string) (net.Conn, error) {
host, port, err := net.SplitHostPort(addr)
if err != nil {
return nil, err
}
var d net.Dialer
var r net.Resolver
if base, ok := strings.CutSuffix(host, ".tailscale.com"); ok && port == "443" {
subCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
defer cancel()
vpcHost := base + "-vpc.tailscale.com"
ips, _ := r.LookupIP(subCtx, "ip", vpcHost)
if len(ips) > 0 {
vpcAddr := net.JoinHostPort(ips[0].String(), port)
c, err := d.DialContext(subCtx, network, vpcAddr)
if err == nil {
log.Printf("connected to %v (%v) instead of %v", vpcHost, ips[0], base)
return c, nil
}
log.Printf("failed to connect to %v (%v): %v; trying non-VPC route", vpcHost, ips[0], err)
}
}
return d.DialContext(ctx, network, addr)
})
add := func(k key.NodePublic, _ netip.AddrPort) { s.AddPacketForwarder(k, c) }
remove := func(k key.NodePublic) { s.RemovePacketForwarder(k, c) }
go c.RunWatchConnectionLoop(context.Background(), s.PublicKey(), logf, add, remove)
return nil
}