From 62a1e9a44f7cdc51551410f0b54c1355573aa16c Mon Sep 17 00:00:00 2001 From: Will Norris Date: Tue, 28 Mar 2023 20:27:52 -0700 Subject: [PATCH] log/sockstatlog: add delay before writing logs to disk Split apart polling of sockstats and logging them to disk. Add a 3 second delay before writing logs to disk to prevent an infinite upload loop when uploading stats to logcatcher. Fixes #7719 Signed-off-by: Will Norris --- log/sockstatlog/logger.go | 77 +++++++++++++++++++++++++++++++-------- logtail/logtail.go | 9 ++++- 2 files changed, 69 insertions(+), 17 deletions(-) diff --git a/log/sockstatlog/logger.go b/log/sockstatlog/logger.go index 1906e16e3..167a37e46 100644 --- a/log/sockstatlog/logger.go +++ b/log/sockstatlog/logger.go @@ -1,7 +1,9 @@ // Copyright (c) Tailscale Inc & AUTHORS // SPDX-License-Identifier: BSD-3-Clause -// Package sockstatlog provides a logger for capturing and storing network socket stats. +// Package sockstatlog provides a logger for capturing network socket stats for debugging. +// Stats are collected at a frequency of 10 Hz and logged to disk. +// Stats are only uploaded to the log server on demand. package sockstatlog import ( @@ -25,19 +27,29 @@ "tailscale.com/util/mak" ) -// pollPeriod specifies how often to poll for socket stats. -const pollPeriod = time.Second / 10 +// pollInterval specifies how often to poll for socket stats. +const pollInterval = time.Second / 10 + +// logInterval specifies how often to log sockstat events to disk. +// This delay is added to prevent an infinite loop when logs are uploaded, +// which itself creates additional sockstat events. +const logInterval = 3 * time.Second // Logger logs statistics about network sockets. type Logger struct { + // enabled identifies whether the logger is enabled. enabled atomic.Bool ctx context.Context cancelFn context.CancelFunc - ticker *time.Ticker - logf logger.Logf + // eventCh is used to pass events from the poller to the logger. + eventCh chan event + logf logger.Logf + + // logger is the underlying logtail logger than manages log files on disk + // and uploading to the log server. logger *logtail.Logger filch *filch.Filch tr http.RoundTripper @@ -73,6 +85,7 @@ func SockstatLogID(logID logid.PublicID) logid.PrivateID { // NewLogger returns a new Logger that will store stats in logdir. // On platforms that do not support sockstat logging, a nil Logger will be returned. // The returned Logger is not yet enabled, and must be shut down with Shutdown when it is no longer needed. +// Logs will be uploaded to the log server using a new log ID derived from the provided backend logID. func NewLogger(logdir string, logf logger.Logf, logID logid.PublicID) (*Logger, error) { if !sockstats.IsAvailable { return nil, nil @@ -88,10 +101,9 @@ func NewLogger(logdir string, logf logger.Logf, logID logid.PublicID) (*Logger, } logger := &Logger{ - ticker: time.NewTicker(pollPeriod), - logf: logf, - filch: filch, - tr: logpolicy.NewLogtailTransport(logtail.DefaultHost), + logf: logf, + filch: filch, + tr: logpolicy.NewLogtailTransport(logtail.DefaultHost), } logger.logger = logtail.NewLogger(logtail.Config{ BaseURL: logpolicy.LogURL(), @@ -124,8 +136,14 @@ func (l *Logger) SetLoggingEnabled(v bool) { old := l.enabled.Load() if old != v && l.enabled.CompareAndSwap(old, v) { if v { + if l.eventCh == nil { + // eventCh should be large enough for the number of events that will occur within logInterval. + // Add an extra second's worth of events to ensure we don't drop any. + l.eventCh = make(chan event, (logInterval+time.Second)/pollInterval) + } l.ctx, l.cancelFn = context.WithCancel(context.Background()) go l.poll() + go l.logEvents() } else { l.cancelFn() } @@ -137,19 +155,21 @@ func (l *Logger) Write(p []byte) (int, error) { } // poll fetches the current socket stats at the configured time interval, -// calculates the delta since the last poll, and logs any non-zero values. +// calculates the delta since the last poll, +// and writes any non-zero values to the logger event channel. // This method does not return. func (l *Logger) poll() { // last is the last set of socket stats we saw. var lastStats *sockstats.SockStats var lastTime time.Time - enc := json.NewEncoder(l) + ticker := time.NewTicker(pollInterval) for { select { case <-l.ctx.Done(): + ticker.Stop() return - case t := <-l.ticker.C: + case t := <-ticker.C: stats := sockstats.Get() if lastStats != nil { diffstats := delta(lastStats, stats) @@ -162,9 +182,7 @@ func (l *Logger) poll() { if stats.CurrentInterfaceCellular { e.IsCellularInterface = 1 } - if err := enc.Encode(e); err != nil { - l.logf("sockstatlog: error encoding log: %v", err) - } + l.eventCh <- e } } lastTime = t @@ -173,6 +191,34 @@ func (l *Logger) poll() { } } +// logEvents reads events from the event channel at logInterval and logs them to disk. +// This method does not return. +func (l *Logger) logEvents() { + enc := json.NewEncoder(l) + flush := func() { + for { + select { + case e := <-l.eventCh: + if err := enc.Encode(e); err != nil { + l.logf("sockstatlog: error encoding log: %v", err) + } + default: + return + } + } + } + ticker := time.NewTicker(logInterval) + for { + select { + case <-l.ctx.Done(): + ticker.Stop() + return + case <-ticker.C: + flush() + } + } +} + func (l *Logger) LogID() string { if l.logger == nil { return "" @@ -189,7 +235,6 @@ func (l *Logger) Shutdown() { if l.cancelFn != nil { l.cancelFn() } - l.ticker.Stop() l.filch.Close() l.logger.Shutdown(context.Background()) diff --git a/logtail/logtail.go b/logtail/logtail.go index cd01d5126..33f9754e2 100644 --- a/logtail/logtail.go +++ b/logtail/logtail.go @@ -150,6 +150,7 @@ func NewLogger(cfg Config, logf tslogger.Logf) *Logger { timeNow: cfg.TimeNow, bo: backoff.NewBackoff("logtail", stdLogf, 30*time.Second), metricsDelta: cfg.MetricsDelta, + sockstatsLabel: sockstats.LabelLogtailLogger, procID: procID, includeProcSequence: cfg.IncludeProcSequence, @@ -192,6 +193,7 @@ type Logger struct { metricsDelta func() string // or nil privateID logid.PrivateID httpDoCalls atomic.Int32 + sockstatsLabel sockstats.Label procID uint32 includeProcSequence bool @@ -220,6 +222,11 @@ func (l *Logger) SetLinkMonitor(lm *monitor.Mon) { l.linkMonitor = lm } +// SetSockstatsLabel sets the label used in sockstat logs to identify network traffic from this logger. +func (l *Logger) SetSockstatsLabel(label sockstats.Label) { + l.sockstatsLabel = label +} + // PrivateID returns the logger's private log ID. // // It exists for internal use only. @@ -428,7 +435,7 @@ func (l *Logger) awaitInternetUp(ctx context.Context) { // origlen of -1 indicates that the body is not compressed. func (l *Logger) upload(ctx context.Context, body []byte, origlen int) (uploaded bool, err error) { const maxUploadTime = 45 * time.Second - ctx = sockstats.WithSockStats(ctx, sockstats.LabelLogtailLogger) + ctx = sockstats.WithSockStats(ctx, l.sockstatsLabel) ctx, cancel := context.WithTimeout(ctx, maxUploadTime) defer cancel()