tailscale/tsnet/tsnet.go
Claire Wang a45c9f982a wgengine/netstack: change netstack API to require LocalBackend
The macOS client was forgetting to call netstack.Impl.SetLocalBackend.
Change the API so that it can't be started without one, eliminating this
class of bug. Then update all the callers.

Updates #6764

Change-Id: I2b3a4f31fdfd9fdbbbbfe25a42db0c505373562f
Signed-off-by: Claire Wang <claire@tailscale.com>
Co-authored-by: Brad Fitzpatrick <bradfitz@tailscale.com>
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
2022-12-23 14:01:26 -08:00

596 lines
15 KiB
Go

// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package tsnet provides Tailscale as a library.
//
// It is an experimental work in progress.
package tsnet
import (
"context"
"errors"
"fmt"
"io"
"log"
"math"
"net"
"net/http"
"net/netip"
"os"
"path/filepath"
"strings"
"sync"
"time"
"tailscale.com/client/tailscale"
"tailscale.com/control/controlclient"
"tailscale.com/envknob"
"tailscale.com/hostinfo"
"tailscale.com/ipn"
"tailscale.com/ipn/ipnlocal"
"tailscale.com/ipn/localapi"
"tailscale.com/ipn/store"
"tailscale.com/ipn/store/mem"
"tailscale.com/logpolicy"
"tailscale.com/logtail"
"tailscale.com/logtail/filch"
"tailscale.com/net/nettest"
"tailscale.com/net/tsdial"
"tailscale.com/smallzstd"
"tailscale.com/types/logger"
"tailscale.com/util/mak"
"tailscale.com/wgengine"
"tailscale.com/wgengine/monitor"
"tailscale.com/wgengine/netstack"
)
// Server is an embedded Tailscale server.
//
// Its exported fields may be changed until the first call to Listen.
type Server struct {
// Dir specifies the name of the directory to use for
// state. If empty, a directory is selected automatically
// under os.UserConfigDir (https://golang.org/pkg/os/#UserConfigDir).
// based on the name of the binary.
Dir string
// Store specifies the state store to use.
//
// If nil, a new FileStore is initialized at `Dir/tailscaled.state`.
// See tailscale.com/ipn/store for supported stores.
//
// Logs will automatically be uploaded to uploaded to log.tailscale.io,
// where the configuration file for logging will be saved at
// `Dir/tailscaled.log.conf`.
Store ipn.StateStore
// Hostname is the hostname to present to the control server.
// If empty, the binary name is used.
Hostname string
// Logf, if non-nil, specifies the logger to use. By default,
// log.Printf is used.
Logf logger.Logf
// Ephemeral, if true, specifies that the instance should register
// as an Ephemeral node (https://tailscale.com/kb/1111/ephemeral-nodes/).
Ephemeral bool
// AuthKey, if non-empty, is the auth key to create the node
// and will be preferred over the TS_AUTHKEY environment
// variable. If the node is already created (from state
// previously stored in in Store), then this field is not
// used.
AuthKey string
// ControlURL optionally specifies the coordination server URL.
// If empty, the Tailscale default is used.
ControlURL string
initOnce sync.Once
initErr error
lb *ipnlocal.LocalBackend
netstack *netstack.Impl
linkMon *monitor.Mon
localAPIListener net.Listener
rootPath string // the state directory
hostname string
shutdownCtx context.Context
shutdownCancel context.CancelFunc
localClient *tailscale.LocalClient
logbuffer *filch.Filch
logtail *logtail.Logger
mu sync.Mutex
listeners map[listenKey]*listener
dialer *tsdial.Dialer
}
// Dial connects to the address on the tailnet.
// It will start the server if it has not been started yet.
func (s *Server) Dial(ctx context.Context, network, address string) (net.Conn, error) {
if err := s.Start(); err != nil {
return nil, err
}
return s.dialer.UserDial(ctx, network, address)
}
// HTTPClient returns an HTTP client that is configured to connect over Tailscale.
//
// This is useful if you need to have your tsnet services connect to other devices on
// your tailnet.
func (s *Server) HTTPClient() *http.Client {
return &http.Client{
Transport: &http.Transport{
DialContext: s.Dial,
},
}
}
// LocalClient returns a LocalClient that speaks to s.
//
// It will start the server if it has not been started yet. If the server's
// already been started successfully, it doesn't return an error.
func (s *Server) LocalClient() (*tailscale.LocalClient, error) {
if err := s.Start(); err != nil {
return nil, err
}
return s.localClient, nil
}
// Start connects the server to the tailnet.
// Optional: any calls to Dial/Listen will also call Start.
func (s *Server) Start() error {
hostinfo.SetPackage("tsnet")
s.initOnce.Do(s.doInit)
return s.initErr
}
// Close stops the server.
//
// It must not be called before or concurrently with Start.
func (s *Server) Close() error {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
// Perform a best-effort final flush.
s.logtail.Shutdown(ctx)
s.logbuffer.Close()
}()
if _, isMemStore := s.Store.(*mem.Store); isMemStore && s.Ephemeral {
wg.Add(1)
go func() {
defer wg.Done()
// Perform a best-effort logout.
s.lb.LogoutSync(ctx)
}()
}
if s.netstack != nil {
s.netstack.Close()
s.netstack = nil
}
s.shutdownCancel()
s.lb.Shutdown()
s.linkMon.Close()
s.dialer.Close()
s.localAPIListener.Close()
s.mu.Lock()
defer s.mu.Unlock()
for _, ln := range s.listeners {
ln.Close()
}
s.listeners = nil
wg.Wait()
return nil
}
func (s *Server) doInit() {
s.shutdownCtx, s.shutdownCancel = context.WithCancel(context.Background())
if err := s.start(); err != nil {
s.initErr = fmt.Errorf("tsnet: %w", err)
}
}
func (s *Server) getAuthKey() string {
if v := s.AuthKey; v != "" {
return v
}
return os.Getenv("TS_AUTHKEY")
}
func (s *Server) start() (reterr error) {
var closePool closeOnErrorPool
defer closePool.closeAllIfError(&reterr)
exe, err := os.Executable()
if err != nil {
return err
}
prog := strings.TrimSuffix(strings.ToLower(filepath.Base(exe)), ".exe")
s.hostname = s.Hostname
if s.hostname == "" {
s.hostname = prog
}
s.rootPath = s.Dir
if s.Store != nil {
_, isMemStore := s.Store.(*mem.Store)
if isMemStore && !s.Ephemeral {
return fmt.Errorf("in-memory store is only supported for Ephemeral nodes")
}
}
logf := s.logf
if s.rootPath == "" {
confDir, err := os.UserConfigDir()
if err != nil {
return err
}
s.rootPath, err = getTSNetDir(logf, confDir, prog)
if err != nil {
return err
}
if err := os.MkdirAll(s.rootPath, 0700); err != nil {
return err
}
}
if fi, err := os.Stat(s.rootPath); err != nil {
return err
} else if !fi.IsDir() {
return fmt.Errorf("%v is not a directory", s.rootPath)
}
cfgPath := filepath.Join(s.rootPath, "tailscaled.log.conf")
lpc, err := logpolicy.ConfigFromFile(cfgPath)
switch {
case os.IsNotExist(err):
lpc = logpolicy.NewConfig(logtail.CollectionNode)
if err := lpc.Save(cfgPath); err != nil {
return fmt.Errorf("logpolicy.Config.Save for %v: %w", cfgPath, err)
}
case err != nil:
return fmt.Errorf("logpolicy.LoadConfig for %v: %w", cfgPath, err)
}
if err := lpc.Validate(logtail.CollectionNode); err != nil {
return fmt.Errorf("logpolicy.Config.Validate for %v: %w", cfgPath, err)
}
logid := lpc.PublicID.String()
s.logbuffer, err = filch.New(filepath.Join(s.rootPath, "tailscaled"), filch.Options{ReplaceStderr: false})
if err != nil {
return fmt.Errorf("error creating filch: %w", err)
}
closePool.add(s.logbuffer)
c := logtail.Config{
Collection: lpc.Collection,
PrivateID: lpc.PrivateID,
Stderr: io.Discard, // log everything to Buffer
Buffer: s.logbuffer,
NewZstdEncoder: func() logtail.Encoder {
w, err := smallzstd.NewEncoder(nil)
if err != nil {
panic(err)
}
return w
},
HTTPC: &http.Client{Transport: logpolicy.NewLogtailTransport(logtail.DefaultHost)},
}
s.logtail = logtail.NewLogger(c, logf)
closePool.addFunc(func() { s.logtail.Shutdown(context.Background()) })
s.linkMon, err = monitor.New(logf)
if err != nil {
return err
}
closePool.add(s.linkMon)
s.dialer = &tsdial.Dialer{Logf: logf} // mutated below (before used)
eng, err := wgengine.NewUserspaceEngine(logf, wgengine.Config{
ListenPort: 0,
LinkMonitor: s.linkMon,
Dialer: s.dialer,
})
if err != nil {
return err
}
closePool.add(s.dialer)
tunDev, magicConn, dns, ok := eng.(wgengine.InternalsGetter).GetInternals()
if !ok {
return fmt.Errorf("%T is not a wgengine.InternalsGetter", eng)
}
ns, err := netstack.Create(logf, tunDev, eng, magicConn, s.dialer, dns)
if err != nil {
return fmt.Errorf("netstack.Create: %w", err)
}
ns.ProcessLocalIPs = true
ns.ForwardTCPIn = s.forwardTCP
s.netstack = ns
s.dialer.UseNetstackForIP = func(ip netip.Addr) bool {
_, ok := eng.PeerForIP(ip)
return ok
}
s.dialer.NetstackDialTCP = func(ctx context.Context, dst netip.AddrPort) (net.Conn, error) {
return ns.DialContextTCP(ctx, dst)
}
if s.Store == nil {
stateFile := filepath.Join(s.rootPath, "tailscaled.state")
logf("tsnet running state path %s", stateFile)
s.Store, err = store.New(logf, stateFile)
if err != nil {
return err
}
}
loginFlags := controlclient.LoginDefault
if s.Ephemeral {
loginFlags = controlclient.LoginEphemeral
}
lb, err := ipnlocal.NewLocalBackend(logf, logid, s.Store, "", s.dialer, eng, loginFlags)
if err != nil {
return fmt.Errorf("NewLocalBackend: %v", err)
}
lb.SetVarRoot(s.rootPath)
logf("tsnet starting with hostname %q, varRoot %q", s.hostname, s.rootPath)
s.lb = lb
if err := ns.Start(lb); err != nil {
return fmt.Errorf("failed to start netstack: %w", err)
}
closePool.addFunc(func() { s.lb.Shutdown() })
lb.SetDecompressor(func() (controlclient.Decompressor, error) {
return smallzstd.NewDecoder(nil)
})
prefs := ipn.NewPrefs()
prefs.Hostname = s.hostname
prefs.WantRunning = true
prefs.ControlURL = s.ControlURL
authKey := s.getAuthKey()
err = lb.Start(ipn.Options{
UpdatePrefs: prefs,
AuthKey: authKey,
})
if err != nil {
return fmt.Errorf("starting backend: %w", err)
}
st := lb.State()
if st == ipn.NeedsLogin || envknob.Bool("TSNET_FORCE_LOGIN") {
logf("LocalBackend state is %v; running StartLoginInteractive...", st)
s.lb.StartLoginInteractive()
} else if authKey != "" {
logf("Authkey is set; but state is %v. Ignoring authkey. Re-run with TSNET_FORCE_LOGIN=1 to force use of authkey.", st)
}
go s.printAuthURLLoop()
// Run the localapi handler, to allow fetching LetsEncrypt certs.
lah := localapi.NewHandler(lb, logf, logid)
lah.PermitWrite = true
lah.PermitRead = true
// Create an in-process listener.
// nettest.Listen provides a in-memory pipe based implementation for net.Conn.
// TODO(maisem): Rename nettest package to remove "test".
lal := nettest.Listen("local-tailscaled.sock:80")
s.localAPIListener = lal
s.localClient = &tailscale.LocalClient{Dial: lal.Dial}
go func() {
if err := http.Serve(lal, lah); err != nil {
logf("localapi serve error: %v", err)
}
}()
closePool.add(s.localAPIListener)
return nil
}
type closeOnErrorPool []func()
func (p *closeOnErrorPool) add(c io.Closer) { *p = append(*p, func() { c.Close() }) }
func (p *closeOnErrorPool) addFunc(fn func()) { *p = append(*p, fn) }
func (p closeOnErrorPool) closeAllIfError(errp *error) {
if *errp != nil {
for _, closeFn := range p {
closeFn()
}
}
}
func (s *Server) logf(format string, a ...interface{}) {
if s.logtail != nil {
s.logtail.Logf(format, a...)
}
if s.Logf != nil {
s.Logf(format, a...)
return
}
log.Printf(format, a...)
}
// printAuthURLLoop loops once every few seconds while the server is still running and
// is in NeedsLogin state, printing out the auth URL.
func (s *Server) printAuthURLLoop() {
for {
if s.shutdownCtx.Err() != nil {
return
}
if st := s.lb.State(); st != ipn.NeedsLogin {
s.logf("printAuthURLLoop: state is %v; stopping", st)
return
}
st := s.lb.StatusWithoutPeers()
if st.AuthURL != "" {
s.logf("To start this tsnet server, restart with TS_AUTHKEY set, or go to: %s", st.AuthURL)
}
select {
case <-time.After(5 * time.Second):
case <-s.shutdownCtx.Done():
return
}
}
}
func (s *Server) forwardTCP(c net.Conn, port uint16) {
s.mu.Lock()
ln, ok := s.listeners[listenKey{"tcp", "", port}]
s.mu.Unlock()
if !ok {
c.Close()
return
}
t := time.NewTimer(time.Second)
defer t.Stop()
select {
case ln.conn <- c:
case <-t.C:
c.Close()
}
}
// getTSNetDir usually just returns filepath.Join(confDir, "tsnet-"+prog)
// with no error.
//
// One special case is that it renames old "tslib-" directories to
// "tsnet-", and that rename might return an error.
//
// TODO(bradfitz): remove this maybe 6 months after 2022-03-17,
// once people (notably Tailscale corp services) have updated.
func getTSNetDir(logf logger.Logf, confDir, prog string) (string, error) {
oldPath := filepath.Join(confDir, "tslib-"+prog)
newPath := filepath.Join(confDir, "tsnet-"+prog)
fi, err := os.Lstat(oldPath)
if os.IsNotExist(err) {
// Common path.
return newPath, nil
}
if err != nil {
return "", err
}
if !fi.IsDir() {
return "", fmt.Errorf("expected old tslib path %q to be a directory; got %v", oldPath, fi.Mode())
}
// At this point, oldPath exists and is a directory. But does
// the new path exist?
fi, err = os.Lstat(newPath)
if err == nil && fi.IsDir() {
// New path already exists somehow. Ignore the old one and
// don't try to migrate it.
return newPath, nil
}
if err != nil && !os.IsNotExist(err) {
return "", err
}
if err := os.Rename(oldPath, newPath); err != nil {
return "", err
}
logf("renamed old tsnet state storage directory %q to %q", oldPath, newPath)
return newPath, nil
}
// APIClient returns a tailscale.Client that can be used to make authenticated
// requests to the Tailscale control server.
// It requires the user to set tailscale.I_Acknowledge_This_API_Is_Unstable.
func (s *Server) APIClient() (*tailscale.Client, error) {
if !tailscale.I_Acknowledge_This_API_Is_Unstable {
return nil, errors.New("use of Client without setting I_Acknowledge_This_API_Is_Unstable")
}
if err := s.Start(); err != nil {
return nil, err
}
c := tailscale.NewClient("-", nil)
c.HTTPClient = &http.Client{Transport: s.lb.KeyProvingNoiseRoundTripper()}
return c, nil
}
// Listen announces only on the Tailscale network.
// It will start the server if it has not been started yet.
func (s *Server) Listen(network, addr string) (net.Listener, error) {
switch network {
case "", "tcp", "tcp4", "tcp6":
default:
return nil, errors.New("unsupported network type")
}
host, portStr, err := net.SplitHostPort(addr)
if err != nil {
return nil, fmt.Errorf("tsnet: %w", err)
}
port, err := net.LookupPort(network, portStr)
if err != nil || port < 0 || port > math.MaxUint16 {
return nil, fmt.Errorf("invalid port: %w", err)
}
if err := s.Start(); err != nil {
return nil, err
}
key := listenKey{network, host, uint16(port)}
ln := &listener{
s: s,
key: key,
addr: addr,
conn: make(chan net.Conn),
}
s.mu.Lock()
if _, ok := s.listeners[key]; ok {
s.mu.Unlock()
return nil, fmt.Errorf("tsnet: listener already open for %s, %s", network, addr)
}
mak.Set(&s.listeners, key, ln)
s.mu.Unlock()
return ln, nil
}
type listenKey struct {
network string
host string
port uint16
}
type listener struct {
s *Server
key listenKey
addr string
conn chan net.Conn
}
func (ln *listener) Accept() (net.Conn, error) {
c, ok := <-ln.conn
if !ok {
return nil, fmt.Errorf("tsnet: %w", net.ErrClosed)
}
return c, nil
}
func (ln *listener) Addr() net.Addr { return addr{ln} }
func (ln *listener) Close() error {
ln.s.mu.Lock()
defer ln.s.mu.Unlock()
if v, ok := ln.s.listeners[ln.key]; ok && v == ln {
delete(ln.s.listeners, ln.key)
close(ln.conn)
}
return nil
}
// Server returns the tsnet Server associated with the listener.
func (ln *listener) Server() *Server { return ln.s }
type addr struct{ ln *listener }
func (a addr) Network() string { return a.ln.key.network }
func (a addr) String() string { return a.ln.addr }