control/keyfallback: add baked-in fallback for control key

Similar to how we bake in the DERPMap to ensure that we can reach the
DERP servers if DNS isn't working, also bake in the control key for the
default control server that we use if the control server is down.

Updates #13890

Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
Change-Id: I18ef0381e266bd3db10063685993bc3cb76b2f42
This commit is contained in:
Andrew Dunham 2024-10-23 00:24:39 -05:00
parent b2665d9b89
commit fc4048014e
7 changed files with 196 additions and 6 deletions

View File

@ -659,6 +659,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
tailscale.com/control/controlclient from tailscale.com/ipn/ipnlocal+
tailscale.com/control/controlhttp from tailscale.com/control/controlclient
tailscale.com/control/controlknobs from tailscale.com/control/controlclient+
tailscale.com/control/keyfallback from tailscale.com/control/controlclient
tailscale.com/derp from tailscale.com/derp/derphttp+
tailscale.com/derp/derphttp from tailscale.com/ipn/localapi+
tailscale.com/disco from tailscale.com/derp+

View File

@ -250,6 +250,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/control/controlclient from tailscale.com/cmd/tailscaled+
tailscale.com/control/controlhttp from tailscale.com/control/controlclient
tailscale.com/control/controlknobs from tailscale.com/control/controlclient+
tailscale.com/control/keyfallback from tailscale.com/control/controlclient
tailscale.com/derp from tailscale.com/derp/derphttp+
tailscale.com/derp/derphttp from tailscale.com/cmd/tailscaled+
tailscale.com/disco from tailscale.com/derp+

View File

@ -29,9 +29,11 @@
"go4.org/mem"
"tailscale.com/control/controlknobs"
"tailscale.com/control/keyfallback"
"tailscale.com/envknob"
"tailscale.com/health"
"tailscale.com/hostinfo"
"tailscale.com/ipn"
"tailscale.com/ipn/ipnstate"
"tailscale.com/logtail"
"tailscale.com/net/dnscache"
@ -87,9 +89,10 @@ type Direct struct {
dialPlan ControlDialPlanner // can be nil
mu sync.Mutex // mutex guards the following fields
serverLegacyKey key.MachinePublic // original ("legacy") nacl crypto_box-based public key; only used for signRegisterRequest on Windows now
serverNoiseKey key.MachinePublic
mu sync.Mutex // mutex guards the following fields
serverLegacyKey key.MachinePublic // original ("legacy") nacl crypto_box-based public key; only used for signRegisterRequest on Windows now
serverNoiseKey key.MachinePublic
usedFallbackNoiseKey bool // true if we used the baked-in fallback key
sfGroup singleflight.Group[struct{}, *NoiseClient] // protects noiseClient creation.
noiseClient *NoiseClient
@ -498,6 +501,7 @@ func (c *Direct) doLogin(ctx context.Context, opt loginOpt) (mustRegen bool, new
tryingNewKey := c.tryingNewKey
serverKey := c.serverLegacyKey
serverNoiseKey := c.serverNoiseKey
usedFallback := c.usedFallbackNoiseKey
authKey, isWrapped, wrappedSig, wrappedKey := tka.DecodeWrappedAuthkey(c.authKey, c.logf)
hi := c.hostInfoLocked()
backendLogID := hi.BackendLogID
@ -528,7 +532,7 @@ func (c *Direct) doLogin(ctx context.Context, opt loginOpt) (mustRegen bool, new
}
c.logf("doLogin(regen=%v, hasUrl=%v)", regen, opt.URL != "")
if serverKey.IsZero() {
if serverKey.IsZero() || usedFallback {
keys, err := loadServerPubKeys(ctx, c.httpc, c.serverURL)
if err != nil && c.interceptedDial != nil && c.interceptedDial.Load() {
c.health.SetUnhealthy(macOSScreenTime, nil)
@ -536,13 +540,21 @@ func (c *Direct) doLogin(ctx context.Context, opt loginOpt) (mustRegen bool, new
c.health.SetHealthy(macOSScreenTime)
}
if err != nil {
return regen, opt.URL, nil, err
if k2, err := c.getFallbackServerPubKeys(); err == nil {
keys = k2
usedFallback = true
} else {
return regen, opt.URL, nil, err
}
} else {
usedFallback = false
c.logf("control server key from %s: ts2021=%s", c.serverURL, keys.PublicKey.ShortString())
}
c.logf("control server key from %s: ts2021=%s, legacy=%v", c.serverURL, keys.PublicKey.ShortString(), keys.LegacyPublicKey.ShortString())
c.mu.Lock()
c.serverLegacyKey = keys.LegacyPublicKey
c.serverNoiseKey = keys.PublicKey
c.usedFallbackNoiseKey = usedFallback
c.mu.Unlock()
serverKey = keys.LegacyPublicKey
serverNoiseKey = keys.PublicKey
@ -751,6 +763,22 @@ func (c *Direct) doLogin(ctx context.Context, opt loginOpt) (mustRegen bool, new
return false, resp.AuthURL, nil, nil
}
func (c *Direct) getFallbackServerPubKeys() (*tailcfg.OverTLSPublicKeyResponse, error) {
// If we saw an error, try to use the fallback key if
// we're dialing the default control server.
if ipn.IsLoginServerSynonym(c.serverURL) {
return nil, errors.New("not using default control server")
}
kf, err := keyfallback.Get()
if err != nil {
return nil, err
}
c.logf("using fallback server key: ts2021=%s", kf.PublicKey.ShortString())
return kf, nil
}
// newEndpoints acquires c.mu and sets the local port and endpoints and reports
// whether they've changed.
//

View File

@ -0,0 +1,4 @@
{
"legacyPublicKey": "mkey:9e5156a4c65121306dd2d8ed8f92cb8d738e2533011344b522c5d28409bc4970",
"publicKey": "mkey:7d2792f9c98d753d2042471536801949104c247f95eac770f8fb321595e2173b"
}

View File

@ -0,0 +1,32 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
// Package keyfallback contains a fallback mechanism for starting up Tailscale
// when the control server cannot be reached to obtain the primary Noise key.
//
// The data is backed by a JSON file `control-key.json` that is updated by
// `update.go`:
//
// (cd control/keyfallback; go run update.go)
package keyfallback
import (
_ "embed"
"encoding/json"
"tailscale.com/tailcfg"
)
// Get returns the fallback control server public key that was baked into the
// binary at compile time. It is only valid for the main Tailscale control
// server instance.
func Get() (*tailcfg.OverTLSPublicKeyResponse, error) {
out := &tailcfg.OverTLSPublicKeyResponse{}
if err := json.Unmarshal(controlKeyJSON, out); err != nil {
return nil, err
}
return out, nil
}
//go:embed control-key.json
var controlKeyJSON []byte

View File

@ -0,0 +1,77 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package keyfallback
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"reflect"
"testing"
"time"
"tailscale.com/ipn"
"tailscale.com/tailcfg"
"tailscale.com/tstest/nettest"
"tailscale.com/util/must"
)
func TestHasValidControlKey(t *testing.T) {
t.Parallel()
keys, err := Get()
if err != nil {
t.Fatalf("Get: %v", err)
}
if keys.PublicKey.IsZero() {
t.Fatalf("zero key")
}
}
// TestKeyIsUpToDate fetches the control key from the control server and
// compares it to the baked-in key, to verify that it's up-to-date. If the
// control server is unreachable, the test is skipped.
func TestKeyIsUpToDate(t *testing.T) {
nettest.SkipIfNoNetwork(t)
// Optimistically fetch the control key and check if it's up to date,
// but ignore if we don't have network access (e.g. running tests on an
// airplane).
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
keyURL := fmt.Sprintf("%v/key?v=%d", ipn.DefaultControlURL, tailcfg.CurrentCapabilityVersion)
req := must.Get(http.NewRequestWithContext(ctx, "GET", keyURL, nil))
res, err := http.DefaultClient.Do(req)
if err != nil {
t.Logf("fetch control key: %v", err)
return
}
defer res.Body.Close()
if res.StatusCode != 200 {
t.Fatalf("fetch control key: bad status; got %v, want 200", res.Status)
}
b, err := io.ReadAll(res.Body)
if err != nil {
t.Fatalf("read control key: %v", err)
}
// Verify that the key is up to date and matches the baked-in key.
out := &tailcfg.OverTLSPublicKeyResponse{}
if err := json.Unmarshal(b, out); err != nil {
t.Fatalf("unmarshal control key: %v", err)
}
keys, err := Get()
if err != nil {
t.Fatalf("Get: %v", err)
}
if !reflect.DeepEqual(keys, out) {
t.Errorf("control key is out of date")
t.Logf("old key: %v", keys)
t.Logf("new key: %v", out)
}
}

View File

@ -0,0 +1,47 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build ignore
package main
import (
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"os"
"tailscale.com/ipn"
"tailscale.com/tailcfg"
)
func main() {
keyURL := fmt.Sprintf("%v/key?v=%d", ipn.DefaultControlURL, tailcfg.CurrentCapabilityVersion)
res, err := http.Get(keyURL)
if err != nil {
log.Fatalf("fetch control key: %v", err)
}
defer res.Body.Close()
b, err := io.ReadAll(io.LimitReader(res.Body, 64<<10))
if err != nil {
log.Fatalf("read control key: %v", err)
}
if res.StatusCode != 200 {
log.Fatalf("fetch control key: bad status; got %v, want 200", res.Status)
}
// Unmarshal to make sure it's valid.
var out tailcfg.OverTLSPublicKeyResponse
if err := json.Unmarshal(b, &out); err != nil {
log.Fatalf("unmarshal control key: %v", err)
}
if out.PublicKey.IsZero() {
log.Fatalf("control key is zero")
}
if err := os.WriteFile("control-key.json", b, 0644); err != nil {
log.Fatalf("write control key: %v", err)
}
}