diff --git a/cmd/derper/depaware.txt b/cmd/derper/depaware.txt index 9f0bffa89..b0a84d134 100644 --- a/cmd/derper/depaware.txt +++ b/cmd/derper/depaware.txt @@ -138,6 +138,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa tailscale.com/types/structs from tailscale.com/ipn+ tailscale.com/types/tkatype from tailscale.com/client/tailscale+ tailscale.com/types/views from tailscale.com/ipn+ + tailscale.com/util/cibuild from tailscale.com/health tailscale.com/util/clientmetric from tailscale.com/net/netmon+ tailscale.com/util/cloudenv from tailscale.com/hostinfo+ W tailscale.com/util/cmpver from tailscale.com/net/tshttpproxy diff --git a/cmd/tailscale/depaware.txt b/cmd/tailscale/depaware.txt index 328a49980..268dd1b3e 100644 --- a/cmd/tailscale/depaware.txt +++ b/cmd/tailscale/depaware.txt @@ -142,6 +142,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep tailscale.com/types/structs from tailscale.com/ipn+ tailscale.com/types/tkatype from tailscale.com/types/key+ tailscale.com/types/views from tailscale.com/tailcfg+ + tailscale.com/util/cibuild from tailscale.com/health tailscale.com/util/clientmetric from tailscale.com/net/netcheck+ tailscale.com/util/cloudenv from tailscale.com/net/dnscache+ tailscale.com/util/cmpver from tailscale.com/net/tshttpproxy+ diff --git a/cmd/tailscaled/depaware.txt b/cmd/tailscaled/depaware.txt index 06ad2f3f7..9eb570924 100644 --- a/cmd/tailscaled/depaware.txt +++ b/cmd/tailscaled/depaware.txt @@ -358,6 +358,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/types/structs from tailscale.com/control/controlclient+ tailscale.com/types/tkatype from tailscale.com/tka+ tailscale.com/types/views from tailscale.com/ipn/ipnlocal+ + tailscale.com/util/cibuild from tailscale.com/health tailscale.com/util/clientmetric from tailscale.com/control/controlclient+ tailscale.com/util/cloudenv from tailscale.com/net/dns/resolver+ tailscale.com/util/cmpver from tailscale.com/net/dns+ diff --git a/cmd/tailscaled/tailscaled.go b/cmd/tailscaled/tailscaled.go index a32a84e6f..9a915a8b4 100644 --- a/cmd/tailscaled/tailscaled.go +++ b/cmd/tailscaled/tailscaled.go @@ -651,6 +651,7 @@ func tryEngine(logf logger.Logf, sys *tsd.System, name string) (onlyNetstack boo conf := wgengine.Config{ ListenPort: args.port, NetMon: sys.NetMon.Get(), + HealthTracker: sys.HealthTracker(), Dialer: sys.Dialer.Get(), SetSubsystem: sys.Set, ControlKnobs: sys.ControlKnobs(), diff --git a/health/health.go b/health/health.go index 61455a33e..7ef9abe36 100644 --- a/health/health.go +++ b/health/health.go @@ -9,6 +9,7 @@ "errors" "fmt" "net/http" + "os" "runtime" "sort" "sync" @@ -18,6 +19,7 @@ "tailscale.com/envknob" "tailscale.com/tailcfg" "tailscale.com/types/opt" + "tailscale.com/util/cibuild" "tailscale.com/util/mak" "tailscale.com/util/multierr" "tailscale.com/util/set" @@ -152,6 +154,11 @@ func (t *Tracker) nil() bool { if t != nil { return false } + if cibuild.On() { + stack := make([]byte, 1<<10) + stack = stack[:runtime.Stack(stack, false)] + fmt.Fprintf(os.Stderr, "## WARNING: (non-fatal) nil health.Tracker (being strict in CI):\n%s\n", stack) + } // TODO(bradfitz): open source our "unexpected" package // and use it here to capture samples of stacks where // t is nil. diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 954d90618..c4c86c210 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -327,6 +327,16 @@ type LocalBackend struct { outgoingFiles map[string]*ipn.OutgoingFile } +// HealthTracker returns the health tracker for the backend. +func (b *LocalBackend) HealthTracker() *health.Tracker { + return b.health +} + +// NetMon returns the network monitor for the backend. +func (b *LocalBackend) NetMon() *netmon.Monitor { + return b.sys.NetMon.Get() +} + type updateStatus struct { started bool } diff --git a/ipn/ipnlocal/network-lock.go b/ipn/ipnlocal/network-lock.go index 17d9eafd0..015177c5b 100644 --- a/ipn/ipnlocal/network-lock.go +++ b/ipn/ipnlocal/network-lock.go @@ -20,7 +20,6 @@ "path/filepath" "time" - "tailscale.com/health" "tailscale.com/health/healthmsg" "tailscale.com/ipn" "tailscale.com/ipn/ipnstate" @@ -59,11 +58,11 @@ type tkaState struct { // b.mu must be held. func (b *LocalBackend) tkaFilterNetmapLocked(nm *netmap.NetworkMap) { if b.tka == nil && !b.capTailnetLock { - health.Global.SetTKAHealth(nil) + b.health.SetTKAHealth(nil) return } if b.tka == nil { - health.Global.SetTKAHealth(nil) + b.health.SetTKAHealth(nil) return // TKA not enabled. } @@ -117,9 +116,9 @@ func (b *LocalBackend) tkaFilterNetmapLocked(nm *netmap.NetworkMap) { // Check that we ourselves are not locked out, report a health issue if so. if nm.SelfNode.Valid() && b.tka.authority.NodeKeyAuthorized(nm.SelfNode.Key(), nm.SelfNode.KeySignature().AsSlice()) != nil { - health.Global.SetTKAHealth(errors.New(healthmsg.LockedOut)) + b.health.SetTKAHealth(errors.New(healthmsg.LockedOut)) } else { - health.Global.SetTKAHealth(nil) + b.health.SetTKAHealth(nil) } } @@ -188,7 +187,7 @@ func (b *LocalBackend) tkaSyncIfNeeded(nm *netmap.NetworkMap, prefs ipn.PrefsVie b.logf("Disablement failed, leaving TKA enabled. Error: %v", err) } else { isEnabled = false - health.Global.SetTKAHealth(nil) + b.health.SetTKAHealth(nil) } } else { return fmt.Errorf("[bug] unreachable invariant of wantEnabled w/ isEnabled") diff --git a/ipn/ipnserver/server.go b/ipn/ipnserver/server.go index 787880f9e..817d4cabf 100644 --- a/ipn/ipnserver/server.go +++ b/ipn/ipnserver/server.go @@ -199,7 +199,7 @@ func (s *Server) serveHTTP(w http.ResponseWriter, r *http.Request) { defer onDone() if strings.HasPrefix(r.URL.Path, "/localapi/") { - lah := localapi.NewHandler(lb, s.logf, s.netMon, s.backendLogID) + lah := localapi.NewHandler(lb, s.logf, s.backendLogID) lah.PermitRead, lah.PermitWrite = s.localAPIPermissions(ci) lah.PermitCert = s.connCanFetchCerts(ci) lah.ConnIdentity = ci diff --git a/ipn/localapi/debugderp.go b/ipn/localapi/debugderp.go index 64ff571e5..85eb031e6 100644 --- a/ipn/localapi/debugderp.go +++ b/ipn/localapi/debugderp.go @@ -140,7 +140,7 @@ func (h *Handler) serveDebugDERPRegion(w http.ResponseWriter, r *http.Request) { } checkSTUN4 := func(derpNode *tailcfg.DERPNode) { - u4, err := nettype.MakePacketListenerWithNetIP(netns.Listener(h.logf, h.netMon)).ListenPacket(ctx, "udp4", ":0") + u4, err := nettype.MakePacketListenerWithNetIP(netns.Listener(h.logf, h.b.NetMon())).ListenPacket(ctx, "udp4", ":0") if err != nil { st.Errors = append(st.Errors, fmt.Sprintf("Error creating IPv4 STUN listener: %v", err)) return @@ -249,7 +249,7 @@ func (h *Handler) serveDebugDERPRegion(w http.ResponseWriter, r *http.Request) { serverPubKeys := make(map[key.NodePublic]bool) for i := range 5 { func() { - rc := derphttp.NewRegionClient(fakePrivKey, h.logf, h.netMon, func() *tailcfg.DERPRegion { + rc := derphttp.NewRegionClient(fakePrivKey, h.logf, h.b.NetMon(), func() *tailcfg.DERPRegion { return &tailcfg.DERPRegion{ RegionID: reg.RegionID, RegionCode: reg.RegionCode, diff --git a/ipn/localapi/localapi.go b/ipn/localapi/localapi.go index 7c8b97552..9a31dd304 100644 --- a/ipn/localapi/localapi.go +++ b/ipn/localapi/localapi.go @@ -36,7 +36,6 @@ "tailscale.com/clientupdate" "tailscale.com/drive" "tailscale.com/envknob" - "tailscale.com/health" "tailscale.com/hostinfo" "tailscale.com/ipn" "tailscale.com/ipn/ipnauth" @@ -156,8 +155,8 @@ // NewHandler creates a new LocalAPI HTTP handler. All parameters except netMon // are required (if non-nil it's used to do faster interface lookups). -func NewHandler(b *ipnlocal.LocalBackend, logf logger.Logf, netMon *netmon.Monitor, logID logid.PublicID) *Handler { - return &Handler{b: b, logf: logf, netMon: netMon, backendLogID: logID, clock: tstime.StdClock{}} +func NewHandler(b *ipnlocal.LocalBackend, logf logger.Logf, logID logid.PublicID) *Handler { + return &Handler{b: b, logf: logf, backendLogID: logID, clock: tstime.StdClock{}} } type Handler struct { @@ -188,7 +187,6 @@ type Handler struct { b *ipnlocal.LocalBackend logf logger.Logf - netMon *netmon.Monitor // optional; nil means interfaces will be looked up on-demand backendLogID logid.PublicID clock tstime.Clock } @@ -358,7 +356,7 @@ func (h *Handler) serveBugReport(w http.ResponseWriter, r *http.Request) { } hi, _ := json.Marshal(hostinfo.New()) h.logf("user bugreport hostinfo: %s", hi) - if err := health.Global.OverallError(); err != nil { + if err := h.b.HealthTracker().OverallError(); err != nil { h.logf("user bugreport health: %s", err.Error()) } else { h.logf("user bugreport health: ok") @@ -748,7 +746,7 @@ func (h *Handler) serveDebugPortmap(w http.ResponseWriter, r *http.Request) { done := make(chan bool, 1) var c *portmapper.Client - c = portmapper.NewClient(logger.WithPrefix(logf, "portmapper: "), h.netMon, debugKnobs, h.b.ControlKnobs(), func() { + c = portmapper.NewClient(logger.WithPrefix(logf, "portmapper: "), h.b.NetMon(), debugKnobs, h.b.ControlKnobs(), func() { logf("portmapping changed.") logf("have mapping: %v", c.HaveMapping()) diff --git a/tsnet/tsnet.go b/tsnet/tsnet.go index d782834c2..1dd60eef9 100644 --- a/tsnet/tsnet.go +++ b/tsnet/tsnet.go @@ -233,7 +233,7 @@ func (s *Server) Loopback() (addr string, proxyCred, localAPICred string, err er // out the CONNECT code from tailscaled/proxy.go that uses // httputil.ReverseProxy and adding auth support. go func() { - lah := localapi.NewHandler(s.lb, s.logf, s.netMon, s.logid) + lah := localapi.NewHandler(s.lb, s.logf, s.logid) lah.PermitWrite = true lah.PermitRead = true lah.RequiredPassword = s.localAPICred @@ -517,11 +517,12 @@ func (s *Server) start() (reterr error) { sys := new(tsd.System) s.dialer = &tsdial.Dialer{Logf: logf} // mutated below (before used) eng, err := wgengine.NewUserspaceEngine(logf, wgengine.Config{ - ListenPort: s.Port, - NetMon: s.netMon, - Dialer: s.dialer, - SetSubsystem: sys.Set, - ControlKnobs: sys.ControlKnobs(), + ListenPort: s.Port, + NetMon: s.netMon, + Dialer: s.dialer, + SetSubsystem: sys.Set, + ControlKnobs: sys.ControlKnobs(), + HealthTracker: sys.HealthTracker(), }) if err != nil { return err @@ -606,7 +607,7 @@ func (s *Server) start() (reterr error) { go s.printAuthURLLoop() // Run the localapi handler, to allow fetching LetsEncrypt certs. - lah := localapi.NewHandler(lb, logf, s.netMon, s.logid) + lah := localapi.NewHandler(lb, logf, s.logid) lah.PermitWrite = true lah.PermitRead = true diff --git a/wgengine/magicsock/derp.go b/wgengine/magicsock/derp.go index 66235d22e..7762a4e18 100644 --- a/wgengine/magicsock/derp.go +++ b/wgengine/magicsock/derp.go @@ -165,7 +165,7 @@ func (c *Conn) maybeSetNearestDERP(report *netcheck.Report) (preferredDERP int) if testenv.InTest() && !checkControlHealthDuringNearestDERPInTests { connectedToControl = true } else { - connectedToControl = health.Global.GetInPollNetMap() + connectedToControl = c.health.GetInPollNetMap() } if !connectedToControl { c.mu.Lock() @@ -201,12 +201,12 @@ func (c *Conn) setNearestDERP(derpNum int) (wantDERP bool) { defer c.mu.Unlock() if !c.wantDerpLocked() { c.myDerp = 0 - health.Global.SetMagicSockDERPHome(0, c.homeless) + c.health.SetMagicSockDERPHome(0, c.homeless) return false } if c.homeless { c.myDerp = 0 - health.Global.SetMagicSockDERPHome(0, c.homeless) + c.health.SetMagicSockDERPHome(0, c.homeless) return false } if derpNum == c.myDerp { @@ -217,7 +217,7 @@ func (c *Conn) setNearestDERP(derpNum int) (wantDERP bool) { metricDERPHomeChange.Add(1) } c.myDerp = derpNum - health.Global.SetMagicSockDERPHome(derpNum, c.homeless) + c.health.SetMagicSockDERPHome(derpNum, c.homeless) if c.privateKey.IsZero() { // No private key yet, so DERP connections won't come up anyway. @@ -400,7 +400,7 @@ func (c *Conn) derpWriteChanOfAddr(addr netip.AddrPort, peer key.NodePublic) cha } return derpMap.Regions[regionID] }) - dc.HealthTracker = health.Global + dc.HealthTracker = c.health dc.SetCanAckPings(true) dc.NotePreferred(c.myDerp == regionID) @@ -526,8 +526,8 @@ func (c *Conn) runDerpReader(ctx context.Context, derpFakeAddr netip.AddrPort, d return n } - defer health.Global.SetDERPRegionConnectedState(regionID, false) - defer health.Global.SetDERPRegionHealth(regionID, "") + defer c.health.SetDERPRegionConnectedState(regionID, false) + defer c.health.SetDERPRegionHealth(regionID, "") // peerPresent is the set of senders we know are present on this // connection, based on messages we've received from the server. @@ -539,7 +539,7 @@ func (c *Conn) runDerpReader(ctx context.Context, derpFakeAddr netip.AddrPort, d for { msg, connGen, err := dc.RecvDetail() if err != nil { - health.Global.SetDERPRegionConnectedState(regionID, false) + c.health.SetDERPRegionConnectedState(regionID, false) // Forget that all these peers have routes. for peer := range peerPresent { delete(peerPresent, peer) @@ -577,14 +577,14 @@ func (c *Conn) runDerpReader(ctx context.Context, derpFakeAddr netip.AddrPort, d now := time.Now() if lastPacketTime.IsZero() || now.Sub(lastPacketTime) > frameReceiveRecordRate { - health.Global.NoteDERPRegionReceivedFrame(regionID) + c.health.NoteDERPRegionReceivedFrame(regionID) lastPacketTime = now } switch m := msg.(type) { case derp.ServerInfoMessage: - health.Global.SetDERPRegionConnectedState(regionID, true) - health.Global.SetDERPRegionHealth(regionID, "") // until declared otherwise + c.health.SetDERPRegionConnectedState(regionID, true) + c.health.SetDERPRegionHealth(regionID, "") // until declared otherwise c.logf("magicsock: derp-%d connected; connGen=%v", regionID, connGen) continue case derp.ReceivedPacket: @@ -624,7 +624,7 @@ func (c *Conn) runDerpReader(ctx context.Context, derpFakeAddr netip.AddrPort, d }() continue case derp.HealthMessage: - health.Global.SetDERPRegionHealth(regionID, m.Problem) + c.health.SetDERPRegionHealth(regionID, m.Problem) continue case derp.PeerGoneMessage: switch m.Reason { diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 370dd6270..5f066eb44 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -91,6 +91,7 @@ type Conn struct { testOnlyPacketListener nettype.PacketListener noteRecvActivity func(key.NodePublic) // or nil, see Options.NoteRecvActivity netMon *netmon.Monitor // or nil + health *health.Tracker // or nil controlKnobs *controlknobs.Knobs // or nil // ================================================================ @@ -369,9 +370,13 @@ type Options struct { NoteRecvActivity func(key.NodePublic) // NetMon is the network monitor to use. - // With one, the portmapper won't be used. + // If nil, the portmapper won't be used. NetMon *netmon.Monitor + // HealthTracker optionally specifies the health tracker to + // report errors and warnings to. + HealthTracker *health.Tracker + // ControlKnobs are the set of control knobs to use. // If nil, they're ignored and not updated. ControlKnobs *controlknobs.Knobs @@ -463,6 +468,7 @@ func NewConn(opts Options) (*Conn, error) { c.portMapper.SetGatewayLookupFunc(opts.NetMon.GatewayAndSelfIP) } c.netMon = opts.NetMon + c.health = opts.HealthTracker c.onPortUpdate = opts.OnPortUpdate c.getPeerByKey = opts.PeerByKeyFunc @@ -666,7 +672,7 @@ func (c *Conn) updateNetInfo(ctx context.Context) (*netcheck.Report, error) { // NOTE(andrew-d): I don't love that we're depending on the // health package here, but I'd rather do that and not store // the exact same state in two different places. - GetLastDERPActivity: health.Global.GetDERPRegionReceivedTime, + GetLastDERPActivity: c.health.GetDERPRegionReceivedTime, }) if err != nil { return nil, err @@ -2471,7 +2477,7 @@ func (c *Conn) bindSocket(ruc *RebindingUDPConn, network string, curPortFate cur } ruc.setConnLocked(pconn, network, c.bind.BatchSize()) if network == "udp4" { - health.Global.SetUDP4Unbound(false) + c.health.SetUDP4Unbound(false) } return nil } @@ -2482,7 +2488,7 @@ func (c *Conn) bindSocket(ruc *RebindingUDPConn, network string, curPortFate cur // we get a link change and we can try binding again. ruc.setConnLocked(newBlockForeverConn(), "", c.bind.BatchSize()) if network == "udp4" { - health.Global.SetUDP4Unbound(true) + c.health.SetUDP4Unbound(true) } return fmt.Errorf("failed to bind any ports (tried %v)", ports) } diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index b075fd4a4..8202e2c2c 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -3113,21 +3113,23 @@ func TestMaybeSetNearestDERP(t *testing.T) { } for _, tt := range testCases { t.Run(tt.name, func(t *testing.T) { + ht := new(health.Tracker) c := newConn() c.logf = t.Logf c.myDerp = tt.old c.derpMap = derpMap + c.health = ht report := &netcheck.Report{PreferredDERP: tt.reportDERP} - oldConnected := health.Global.GetInPollNetMap() + oldConnected := ht.GetInPollNetMap() if tt.connectedToControl != oldConnected { if tt.connectedToControl { - health.Global.GotStreamedMapResponse() - t.Cleanup(health.Global.SetOutOfPollNetMap) + ht.GotStreamedMapResponse() + t.Cleanup(ht.SetOutOfPollNetMap) } else { - health.Global.SetOutOfPollNetMap() - t.Cleanup(health.Global.GotStreamedMapResponse) + ht.SetOutOfPollNetMap() + t.Cleanup(ht.GotStreamedMapResponse) } } diff --git a/wgengine/userspace.go b/wgengine/userspace.go index d33cfc6ef..7bc7341f8 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -98,6 +98,7 @@ type userspaceEngine struct { dns *dns.Manager magicConn *magicsock.Conn netMon *netmon.Monitor + health *health.Tracker netMonOwned bool // whether we created netMon (and thus need to close it) netMonUnregister func() // unsubscribes from changes; used regardless of netMonOwned birdClient BIRDClient // or nil @@ -188,6 +189,9 @@ type Config struct { // If nil, a new network monitor is created. NetMon *netmon.Monitor + // HealthTracker, if non-nil, is the health tracker to use. + HealthTracker *health.Tracker + // Dialer is the dialer to use for outbound connections. // If nil, a new Dialer is created Dialer *tsdial.Dialer @@ -310,6 +314,7 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error) birdClient: conf.BIRDClient, controlKnobs: conf.ControlKnobs, reconfigureVPN: conf.ReconfigureVPN, + health: conf.HealthTracker, } if e.birdClient != nil { @@ -372,6 +377,7 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error) IdleFunc: e.tundev.IdleDuration, NoteRecvActivity: e.noteRecvActivity, NetMon: e.netMon, + HealthTracker: e.health, ControlKnobs: conf.ControlKnobs, OnPortUpdate: onPortUpdate, PeerByKeyFunc: e.PeerByKey, @@ -970,7 +976,7 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config, e.logf("wgengine: Reconfig: configuring router") e.networkLogger.ReconfigRoutes(routerCfg) err := e.router.Set(routerCfg) - health.Global.SetRouterHealth(err) + e.health.SetRouterHealth(err) if err != nil { return err } @@ -979,7 +985,7 @@ func (e *userspaceEngine) Reconfig(cfg *wgcfg.Config, routerCfg *router.Config, // assigned address. e.logf("wgengine: Reconfig: configuring DNS") err = e.dns.Set(*dnsCfg) - health.Global.SetDNSHealth(err) + e.health.SetDNSHealth(err) if err != nil { return err } @@ -1183,7 +1189,7 @@ func (e *userspaceEngine) linkChange(delta *netmon.ChangeDelta) { e.logf("[v1] LinkChange: minor") } - health.Global.SetAnyInterfaceUp(up) + e.health.SetAnyInterfaceUp(up) e.magicConn.SetNetworkUp(up) if !up || changed { if err := e.dns.FlushCaches(); err != nil {