mirror of
https://github.com/tailscale/tailscale.git
synced 2024-11-25 19:15:34 +00:00
net/sockstats: expose debug info
Exposes some internal state of the sockstats package via the C2N and PeerAPI endpoints, so that it can be used for debugging. For now this includes the estimated radio on percentage and a second-by-second view of the times the radio was active. Also fixes another off-by-one error in the radio on percentage that was leading to >100% values (if n seconds have passed since we started to monitor, there may be n + 1 possible seconds where the radio could have been on). Updates tailscale/corp#9230 Signed-off-by: Mihai Parparita <mihai@tailscale.com>
This commit is contained in:
parent
7c386ca6d2
commit
d0906cda97
@ -17,6 +17,7 @@
|
||||
"time"
|
||||
|
||||
"tailscale.com/envknob"
|
||||
"tailscale.com/net/sockstats"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/util/clientmetric"
|
||||
"tailscale.com/util/goroutines"
|
||||
@ -94,7 +95,8 @@ func (b *LocalBackend) handleC2N(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
b.sockstatLogger.Flush()
|
||||
fmt.Fprintln(w, b.sockstatLogger.LogID())
|
||||
fmt.Fprintf(w, "logid: %s\n", b.sockstatLogger.LogID())
|
||||
fmt.Fprintf(w, "debug info: %v\n", sockstats.DebugInfo())
|
||||
default:
|
||||
http.Error(w, "unknown c2n path", http.StatusBadRequest)
|
||||
}
|
||||
|
@ -947,6 +947,12 @@ func (h *peerAPIHandler) handleServeSockStats(w http.ResponseWriter, r *http.Req
|
||||
fmt.Fprintln(w, "</tfoot>")
|
||||
|
||||
fmt.Fprintln(w, "</table>")
|
||||
|
||||
fmt.Fprintln(w, "<h2>Debug Info</h2>")
|
||||
|
||||
fmt.Fprintln(w, "<pre>")
|
||||
fmt.Fprintln(w, html.EscapeString(sockstats.DebugInfo()))
|
||||
fmt.Fprintln(w, "</pre>")
|
||||
}
|
||||
|
||||
type incomingFile struct {
|
||||
|
@ -119,3 +119,9 @@ type LinkMonitor interface {
|
||||
func SetLinkMonitor(lm LinkMonitor) {
|
||||
setLinkMonitor(lm)
|
||||
}
|
||||
|
||||
// DebugInfo returns a string containing debug information about the tracked
|
||||
// statistics.
|
||||
func DebugInfo() string {
|
||||
return debugInfo()
|
||||
}
|
||||
|
@ -31,3 +31,7 @@ func getValidation() *ValidationSockStats {
|
||||
|
||||
func setLinkMonitor(lm LinkMonitor) {
|
||||
}
|
||||
|
||||
func debugInfo() string {
|
||||
return ""
|
||||
}
|
||||
|
@ -288,6 +288,19 @@ func setLinkMonitor(lm LinkMonitor) {
|
||||
})
|
||||
}
|
||||
|
||||
func debugInfo() string {
|
||||
var b strings.Builder
|
||||
fmt.Fprintf(&b, "radio high percent: %d\n", radio.radioHighPercent())
|
||||
fmt.Fprintf(&b, "radio activity for the last hour (one minute per line):\n")
|
||||
for i, a := range radio.radioActive() {
|
||||
fmt.Fprintf(&b, "%d", a)
|
||||
if i%60 == 59 {
|
||||
fmt.Fprintf(&b, "\n")
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func isLikelyCellularInterface(ifName string) bool {
|
||||
return strings.HasPrefix(ifName, "rmnet") || // Android
|
||||
strings.HasPrefix(ifName, "ww") || // systemd naming scheme for WWAN
|
||||
@ -331,18 +344,56 @@ func (rm *radioMonitor) active() {
|
||||
radioLowIdle = 12 // seconds radio idles in low power state before transitioning to off
|
||||
)
|
||||
|
||||
// radioActive returns a slice of 1s samples (one per second) for the past hour
|
||||
// indicating whether the radio was active (1) or idle (0).
|
||||
func (rm *radioMonitor) radioActive() (active [radioSampleSize]int64) {
|
||||
rm.forEachSample(func(c int, isActive bool) {
|
||||
if isActive {
|
||||
active[c] = 1
|
||||
}
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// radioHighPercent returns the percentage of time (as an int from 0 to 100)
|
||||
// that the cellular radio was in high power mode during the past hour.
|
||||
// If the radio has been monitored for less than an hour,
|
||||
// the percentage is calculated based on the time monitored.
|
||||
func (rm *radioMonitor) radioHighPercent() int64 {
|
||||
var highPowerSec int64 // total seconds radio was in high power (active or idle)
|
||||
lastActive := -1 // counter when radio was last active
|
||||
|
||||
periodLength := rm.forEachSample(func(c int, isActive bool) {
|
||||
if isActive {
|
||||
// radio on and active
|
||||
highPowerSec++
|
||||
lastActive = c
|
||||
} else if lastActive != -1 && c-lastActive < radioHighIdle {
|
||||
// radio on but idle
|
||||
highPowerSec++
|
||||
}
|
||||
})
|
||||
|
||||
if periodLength == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
if highPowerSec == 0 {
|
||||
return 0
|
||||
}
|
||||
return highPowerSec * 100 / periodLength
|
||||
}
|
||||
|
||||
// forEachSample calls f for each sample in the past hour (or less if less time
|
||||
// has passed -- the evaluated period is returned)
|
||||
func (rm *radioMonitor) forEachSample(f func(c int, isActive bool)) (periodLength int64) {
|
||||
now := rm.now().Unix()
|
||||
var periodLength int64 = radioSampleSize
|
||||
periodLength = radioSampleSize
|
||||
if t := now - rm.startTime; t < periodLength {
|
||||
if t <= 0 {
|
||||
return 0
|
||||
}
|
||||
periodLength = t
|
||||
periodLength = t + 1 // we want an inclusive range (with the current second)
|
||||
}
|
||||
periodStart := now - periodLength // start of current reporting period
|
||||
|
||||
@ -353,25 +404,14 @@ func (rm *radioMonitor) radioHighPercent() int64 {
|
||||
rm.usage[split:],
|
||||
rm.usage[:split],
|
||||
}
|
||||
var highPowerSec int64 // total seconds radio was in high power (active or idle)
|
||||
|
||||
var c int // counter
|
||||
var lastActive int // counter when radio was last active
|
||||
for _, slice := range slices {
|
||||
for _, v := range slice {
|
||||
c++ // increment first so we don't have zero values
|
||||
if v >= periodStart {
|
||||
// radio on and active
|
||||
highPowerSec++
|
||||
lastActive = c
|
||||
} else if lastActive > 0 && c-lastActive < radioHighIdle {
|
||||
// radio on but idle
|
||||
highPowerSec++
|
||||
}
|
||||
f(c, v >= periodStart)
|
||||
c++
|
||||
}
|
||||
}
|
||||
|
||||
if highPowerSec == 0 {
|
||||
return 0
|
||||
}
|
||||
return highPowerSec * 100 / periodLength
|
||||
return periodLength
|
||||
}
|
||||
|
@ -37,10 +37,19 @@ func(_ *testTime, _ *radioMonitor) {},
|
||||
"active, 10 sec idle",
|
||||
func(tt *testTime, rm *radioMonitor) {
|
||||
rm.active()
|
||||
tt.Add(10 * time.Second)
|
||||
tt.Add(9 * time.Second)
|
||||
},
|
||||
50, // radio on 5 seconds of 10 seconds
|
||||
},
|
||||
{
|
||||
"active, spanning two seconds",
|
||||
func(tt *testTime, rm *radioMonitor) {
|
||||
rm.active()
|
||||
tt.Add(1100 * time.Millisecond)
|
||||
rm.active()
|
||||
},
|
||||
100, // radio on for 2 seconds
|
||||
},
|
||||
{
|
||||
"400 iterations: 2 sec active, 1 min idle",
|
||||
func(tt *testTime, rm *radioMonitor) {
|
||||
@ -57,7 +66,7 @@ func(tt *testTime, rm *radioMonitor) {
|
||||
{
|
||||
"activity at end of time window",
|
||||
func(tt *testTime, rm *radioMonitor) {
|
||||
tt.Add(2 * time.Second)
|
||||
tt.Add(1 * time.Second)
|
||||
rm.active()
|
||||
},
|
||||
50,
|
||||
|
Loading…
Reference in New Issue
Block a user