net/sockstats: expose debug info

Exposes some internal state of the sockstats package via the C2N and
PeerAPI endpoints, so that it can be used for debugging. For now this
includes the estimated radio on percentage and a second-by-second view
of the times the radio was active.

Also fixes another off-by-one error in the radio on percentage that
was leading to >100% values (if n seconds have passed since we started
to monitor, there may be n + 1 possible seconds where the radio could
have been on).

Updates tailscale/corp#9230

Signed-off-by: Mihai Parparita <mihai@tailscale.com>
This commit is contained in:
Mihai Parparita 2023-04-19 13:45:02 -07:00 committed by Mihai Parparita
parent 7c386ca6d2
commit d0906cda97
6 changed files with 88 additions and 21 deletions

View File

@ -17,6 +17,7 @@
"time"
"tailscale.com/envknob"
"tailscale.com/net/sockstats"
"tailscale.com/tailcfg"
"tailscale.com/util/clientmetric"
"tailscale.com/util/goroutines"
@ -94,7 +95,8 @@ func (b *LocalBackend) handleC2N(w http.ResponseWriter, r *http.Request) {
return
}
b.sockstatLogger.Flush()
fmt.Fprintln(w, b.sockstatLogger.LogID())
fmt.Fprintf(w, "logid: %s\n", b.sockstatLogger.LogID())
fmt.Fprintf(w, "debug info: %v\n", sockstats.DebugInfo())
default:
http.Error(w, "unknown c2n path", http.StatusBadRequest)
}

View File

@ -947,6 +947,12 @@ func (h *peerAPIHandler) handleServeSockStats(w http.ResponseWriter, r *http.Req
fmt.Fprintln(w, "</tfoot>")
fmt.Fprintln(w, "</table>")
fmt.Fprintln(w, "<h2>Debug Info</h2>")
fmt.Fprintln(w, "<pre>")
fmt.Fprintln(w, html.EscapeString(sockstats.DebugInfo()))
fmt.Fprintln(w, "</pre>")
}
type incomingFile struct {

View File

@ -119,3 +119,9 @@ type LinkMonitor interface {
func SetLinkMonitor(lm LinkMonitor) {
setLinkMonitor(lm)
}
// DebugInfo returns a string containing debug information about the tracked
// statistics.
func DebugInfo() string {
return debugInfo()
}

View File

@ -31,3 +31,7 @@ func getValidation() *ValidationSockStats {
func setLinkMonitor(lm LinkMonitor) {
}
func debugInfo() string {
return ""
}

View File

@ -288,6 +288,19 @@ func setLinkMonitor(lm LinkMonitor) {
})
}
func debugInfo() string {
var b strings.Builder
fmt.Fprintf(&b, "radio high percent: %d\n", radio.radioHighPercent())
fmt.Fprintf(&b, "radio activity for the last hour (one minute per line):\n")
for i, a := range radio.radioActive() {
fmt.Fprintf(&b, "%d", a)
if i%60 == 59 {
fmt.Fprintf(&b, "\n")
}
}
return b.String()
}
func isLikelyCellularInterface(ifName string) bool {
return strings.HasPrefix(ifName, "rmnet") || // Android
strings.HasPrefix(ifName, "ww") || // systemd naming scheme for WWAN
@ -331,18 +344,56 @@ func (rm *radioMonitor) active() {
radioLowIdle = 12 // seconds radio idles in low power state before transitioning to off
)
// radioActive returns a slice of 1s samples (one per second) for the past hour
// indicating whether the radio was active (1) or idle (0).
func (rm *radioMonitor) radioActive() (active [radioSampleSize]int64) {
rm.forEachSample(func(c int, isActive bool) {
if isActive {
active[c] = 1
}
})
return
}
// radioHighPercent returns the percentage of time (as an int from 0 to 100)
// that the cellular radio was in high power mode during the past hour.
// If the radio has been monitored for less than an hour,
// the percentage is calculated based on the time monitored.
func (rm *radioMonitor) radioHighPercent() int64 {
var highPowerSec int64 // total seconds radio was in high power (active or idle)
lastActive := -1 // counter when radio was last active
periodLength := rm.forEachSample(func(c int, isActive bool) {
if isActive {
// radio on and active
highPowerSec++
lastActive = c
} else if lastActive != -1 && c-lastActive < radioHighIdle {
// radio on but idle
highPowerSec++
}
})
if periodLength == 0 {
return 0
}
if highPowerSec == 0 {
return 0
}
return highPowerSec * 100 / periodLength
}
// forEachSample calls f for each sample in the past hour (or less if less time
// has passed -- the evaluated period is returned)
func (rm *radioMonitor) forEachSample(f func(c int, isActive bool)) (periodLength int64) {
now := rm.now().Unix()
var periodLength int64 = radioSampleSize
periodLength = radioSampleSize
if t := now - rm.startTime; t < periodLength {
if t <= 0 {
return 0
}
periodLength = t
periodLength = t + 1 // we want an inclusive range (with the current second)
}
periodStart := now - periodLength // start of current reporting period
@ -353,25 +404,14 @@ func (rm *radioMonitor) radioHighPercent() int64 {
rm.usage[split:],
rm.usage[:split],
}
var highPowerSec int64 // total seconds radio was in high power (active or idle)
var c int // counter
var lastActive int // counter when radio was last active
var c int // counter
for _, slice := range slices {
for _, v := range slice {
c++ // increment first so we don't have zero values
if v >= periodStart {
// radio on and active
highPowerSec++
lastActive = c
} else if lastActive > 0 && c-lastActive < radioHighIdle {
// radio on but idle
highPowerSec++
}
f(c, v >= periodStart)
c++
}
}
if highPowerSec == 0 {
return 0
}
return highPowerSec * 100 / periodLength
return periodLength
}

View File

@ -37,10 +37,19 @@ func(_ *testTime, _ *radioMonitor) {},
"active, 10 sec idle",
func(tt *testTime, rm *radioMonitor) {
rm.active()
tt.Add(10 * time.Second)
tt.Add(9 * time.Second)
},
50, // radio on 5 seconds of 10 seconds
},
{
"active, spanning two seconds",
func(tt *testTime, rm *radioMonitor) {
rm.active()
tt.Add(1100 * time.Millisecond)
rm.active()
},
100, // radio on for 2 seconds
},
{
"400 iterations: 2 sec active, 1 min idle",
func(tt *testTime, rm *radioMonitor) {
@ -57,7 +66,7 @@ func(tt *testTime, rm *radioMonitor) {
{
"activity at end of time window",
func(tt *testTime, rm *radioMonitor) {
tt.Add(2 * time.Second)
tt.Add(1 * time.Second)
rm.active()
},
50,