mirror of
https://github.com/tailscale/tailscale.git
synced 2025-01-07 08:07:42 +00:00
wgengine/wgcfg: recover from mismatched PublicKey/Endpoints
In rare circumstances (tailscale/corp#3016), the PublicKey and Endpoints can diverge. This by itself doesn't cause any harm, but our early exit in response did, because it prevented us from recovering from it. Remove the early exit. Signed-off-by: Josh Bleecher Snyder <josh@tailscale.com>
This commit is contained in:
parent
9fc4e876e3
commit
85184a58ed
@ -57,4 +57,5 @@ func (src *Peer) Clone() *Peer {
|
|||||||
DiscoKey key.DiscoPublic
|
DiscoKey key.DiscoPublic
|
||||||
AllowedIPs []netaddr.IPPrefix
|
AllowedIPs []netaddr.IPPrefix
|
||||||
PersistentKeepalive uint16
|
PersistentKeepalive uint16
|
||||||
|
WGEndpoint key.NodePublic
|
||||||
}{})
|
}{})
|
||||||
|
@ -28,6 +28,11 @@ type Peer struct {
|
|||||||
DiscoKey key.DiscoPublic // present only so we can handle restarts within wgengine, not passed to WireGuard
|
DiscoKey key.DiscoPublic // present only so we can handle restarts within wgengine, not passed to WireGuard
|
||||||
AllowedIPs []netaddr.IPPrefix
|
AllowedIPs []netaddr.IPPrefix
|
||||||
PersistentKeepalive uint16
|
PersistentKeepalive uint16
|
||||||
|
// wireguard-go's endpoint for this peer. It should always equal Peer.PublicKey.
|
||||||
|
// We represent it explicitly so that we can detect if they diverge and recover.
|
||||||
|
// There is no need to set WGEndpoint explicitly when constructing a Peer by hand.
|
||||||
|
// It is only populated when reading Peers from wireguard-go.
|
||||||
|
WGEndpoint key.NodePublic
|
||||||
}
|
}
|
||||||
|
|
||||||
// PeerWithKey returns the Peer with key k and reports whether it was found.
|
// PeerWithKey returns the Peer with key k and reports whether it was found.
|
||||||
|
@ -62,7 +62,7 @@ func ReconfigDevice(d *device.Device, cfg *Config, logf logger.Logf) (err error)
|
|||||||
r.Close()
|
r.Close()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
toErr := cfg.ToUAPI(w, prev)
|
toErr := cfg.ToUAPI(logf, w, prev)
|
||||||
w.Close()
|
w.Close()
|
||||||
setErr := <-errc
|
setErr := <-errc
|
||||||
return multierr.New(setErr, toErr)
|
return multierr.New(setErr, toErr)
|
||||||
|
@ -68,14 +68,14 @@ func TestDeviceConfig(t *testing.T) {
|
|||||||
}
|
}
|
||||||
prev := new(Config)
|
prev := new(Config)
|
||||||
gotbuf := new(strings.Builder)
|
gotbuf := new(strings.Builder)
|
||||||
err = got.ToUAPI(gotbuf, prev)
|
err = got.ToUAPI(t.Logf, gotbuf, prev)
|
||||||
gotStr := gotbuf.String()
|
gotStr := gotbuf.String()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("got.ToUAPI(): error: %v", err)
|
t.Errorf("got.ToUAPI(): error: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
wantbuf := new(strings.Builder)
|
wantbuf := new(strings.Builder)
|
||||||
err = want.ToUAPI(wantbuf, prev)
|
err = want.ToUAPI(t.Logf, wantbuf, prev)
|
||||||
wantStr := wantbuf.String()
|
wantStr := wantbuf.String()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("want.ToUAPI(): error: %v", err)
|
t.Errorf("want.ToUAPI(): error: %v", err)
|
||||||
|
@ -151,9 +151,11 @@ func (cfg *Config) handlePeerLine(peer *Peer, k, value mem.RO, valueBytes []byte
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("invalid endpoint %q for peer %q, expected a hex public key", value.StringCopy(), peer.PublicKey.ShortString())
|
return fmt.Errorf("invalid endpoint %q for peer %q, expected a hex public key", value.StringCopy(), peer.PublicKey.ShortString())
|
||||||
}
|
}
|
||||||
if nk != peer.PublicKey {
|
// nk ought to equal peer.PublicKey.
|
||||||
return fmt.Errorf("unexpected endpoint %q for peer %q, expected the peer's public key", value.StringCopy(), peer.PublicKey.ShortString())
|
// Under some rare circumstances, it might not. See corp issue #3016.
|
||||||
}
|
// Even if that happens, don't stop early, so that we can recover from it.
|
||||||
|
// Instead, note the value of nk so we can fix as needed.
|
||||||
|
peer.WGEndpoint = nk
|
||||||
case k.EqualString("persistent_keepalive_interval"):
|
case k.EqualString("persistent_keepalive_interval"):
|
||||||
n, err := mem.ParseUint(value, 10, 16)
|
n, err := mem.ParseUint(value, 10, 16)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -80,7 +80,7 @@ func BenchmarkFromUAPI(b *testing.B) {
|
|||||||
|
|
||||||
buf := new(bytes.Buffer)
|
buf := new(bytes.Buffer)
|
||||||
w := bufio.NewWriter(buf)
|
w := bufio.NewWriter(buf)
|
||||||
if err := cfg1.ToUAPI(w, &Config{}); err != nil {
|
if err := cfg1.ToUAPI(b.Logf, w, &Config{}); err != nil {
|
||||||
b.Fatal(err)
|
b.Fatal(err)
|
||||||
}
|
}
|
||||||
w.Flush()
|
w.Flush()
|
||||||
|
@ -11,13 +11,14 @@
|
|||||||
|
|
||||||
"inet.af/netaddr"
|
"inet.af/netaddr"
|
||||||
"tailscale.com/types/key"
|
"tailscale.com/types/key"
|
||||||
|
"tailscale.com/types/logger"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ToUAPI writes cfg in UAPI format to w.
|
// ToUAPI writes cfg in UAPI format to w.
|
||||||
// Prev is the previous device Config.
|
// Prev is the previous device Config.
|
||||||
// Prev is required so that we can remove now-defunct peers
|
// Prev is required so that we can remove now-defunct peers
|
||||||
// without having to remove and re-add all peers.
|
// without having to remove and re-add all peers.
|
||||||
func (cfg *Config) ToUAPI(w io.Writer, prev *Config) error {
|
func (cfg *Config) ToUAPI(logf logger.Logf, w io.Writer, prev *Config) error {
|
||||||
var stickyErr error
|
var stickyErr error
|
||||||
set := func(key, value string) {
|
set := func(key, value string) {
|
||||||
if stickyErr != nil {
|
if stickyErr != nil {
|
||||||
@ -54,7 +55,16 @@ func (cfg *Config) ToUAPI(w io.Writer, prev *Config) error {
|
|||||||
// Avoid setting endpoints if the correct one is already known
|
// Avoid setting endpoints if the correct one is already known
|
||||||
// to WireGuard, because doing so generates a bit more work in
|
// to WireGuard, because doing so generates a bit more work in
|
||||||
// calling magicsock's ParseEndpoint for effectively a no-op.
|
// calling magicsock's ParseEndpoint for effectively a no-op.
|
||||||
if !wasPresent {
|
if oldPeer.WGEndpoint != p.PublicKey {
|
||||||
|
if wasPresent {
|
||||||
|
// We had an endpoint, and it was wrong.
|
||||||
|
// By construction, this should not happen.
|
||||||
|
// If it does, keep going so that we can recover from it,
|
||||||
|
// but log so that we know about it,
|
||||||
|
// because it is an indicator of other failed invariants.
|
||||||
|
// See corp issue 3016.
|
||||||
|
logf("[unexpected] endpoint changed from %s to %s", oldPeer.WGEndpoint, p.PublicKey)
|
||||||
|
}
|
||||||
set("endpoint", p.PublicKey.UntypedHexString())
|
set("endpoint", p.PublicKey.UntypedHexString())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user