cmd/stunstamp: remove sqlite DB and API (#12604)

stunstamp now sends data to Prometheus via remote write, and Prometheus
can serve the same data. Retaining and cleaning up old data in sqlite
leads to long probing pauses, and it's not worth investing more effort
to optimize the schema and/or concurrency model.

Updates tailscale/corp#20344

Signed-off-by: Jordan Whited <jordan@tailscale.com>
This commit is contained in:
Jordan Whited
2024-06-25 10:21:40 -07:00
committed by GitHub
parent 3485e4bf5a
commit 94415e8029
6 changed files with 1 additions and 336 deletions

View File

@@ -1,142 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package main
import (
"compress/gzip"
"encoding/json"
"errors"
"net/http"
"net/url"
"strconv"
"strings"
"time"
sq "github.com/Masterminds/squirrel"
)
type api struct {
db *db
mux *http.ServeMux
}
func newAPI(db *db) *api {
a := &api{
db: db,
}
mux := http.NewServeMux()
mux.HandleFunc("/query", a.query)
a.mux = mux
return a
}
type apiResult struct {
At int `json:"at"` // time.Time.Unix()
RegionID int `json:"regionID"`
Hostname string `json:"hostname"`
Af int `json:"af"` // 4 or 6
Addr string `json:"addr"`
Source int `json:"source"` // timestampSourceUserspace (0) or timestampSourceKernel (1)
StableConn bool `json:"stableConn"`
DstPort int `json:"dstPort"`
RttNS *int `json:"rttNS"`
}
func getTimeBounds(vals url.Values) (from time.Time, to time.Time, err error) {
lastForm, ok := vals["last"]
if ok && len(lastForm) > 0 {
dur, err := time.ParseDuration(lastForm[0])
if err != nil {
return time.Time{}, time.Time{}, err
}
now := time.Now()
return now.Add(-dur), now, nil
}
fromForm, ok := vals["from"]
if ok && len(fromForm) > 0 {
fromUnixSec, err := strconv.Atoi(fromForm[0])
if err != nil {
return time.Time{}, time.Time{}, err
}
from = time.Unix(int64(fromUnixSec), 0)
toForm, ok := vals["to"]
if ok && len(toForm) > 0 {
toUnixSec, err := strconv.Atoi(toForm[0])
if err != nil {
return time.Time{}, time.Time{}, err
}
to = time.Unix(int64(toUnixSec), 0)
} else {
return time.Time{}, time.Time{}, errors.New("from specified without to")
}
return from, to, nil
}
// no time bounds specified, default to last 1h
now := time.Now()
return now.Add(-time.Hour), now, nil
}
func (a *api) ServeHTTP(w http.ResponseWriter, r *http.Request) {
a.mux.ServeHTTP(w, r)
}
func (a *api) query(w http.ResponseWriter, r *http.Request) {
err := r.ParseForm()
if err != nil {
http.Error(w, err.Error(), 500)
return
}
from, to, err := getTimeBounds(r.Form)
if err != nil {
http.Error(w, err.Error(), 500)
return
}
sb := sq.Select("at_unix", "region_id", "hostname", "af", "address", "timestamp_source", "stable_conn", "dst_port", "rtt_ns").From("rtt")
sb = sb.Where(sq.And{
sq.GtOrEq{"at_unix": from.Unix()},
sq.LtOrEq{"at_unix": to.Unix()},
})
query, args, err := sb.ToSql()
if err != nil {
return
}
rows, err := a.db.Query(query, args...)
if err != nil {
http.Error(w, err.Error(), 500)
return
}
results := make([]apiResult, 0)
for rows.Next() {
rtt := 0
result := apiResult{
RttNS: &rtt,
}
err = rows.Scan(&result.At, &result.RegionID, &result.Hostname, &result.Af, &result.Addr, &result.Source, &result.StableConn, &result.DstPort, &result.RttNS)
if err != nil {
http.Error(w, err.Error(), 500)
return
}
results = append(results, result)
}
if rows.Err() != nil {
http.Error(w, rows.Err().Error(), 500)
return
}
if strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") {
gz := gzip.NewWriter(w)
defer gz.Close()
w.Header().Set("Content-Encoding", "gzip")
err = json.NewEncoder(gz).Encode(&results)
} else {
err = json.NewEncoder(w).Encode(&results)
}
if err != nil {
http.Error(w, err.Error(), 500)
return
}
}

View File

@@ -38,11 +38,8 @@ import (
var (
flagDERPMap = flag.String("derp-map", "https://login.tailscale.com/derpmap/default", "URL to DERP map")
flagOut = flag.String("out", "", "output sqlite filename")
flagInterval = flag.Duration("interval", time.Minute, "interval to probe at in time.ParseDuration() format")
flagAPI = flag.String("api", "", "listen addr for HTTP API")
flagIPv6 = flag.Bool("ipv6", false, "probe IPv6 addresses")
flagRetention = flag.Duration("retention", time.Hour*24*7, "sqlite retention period in time.ParseDuration() format")
flagRemoteWriteURL = flag.String("rw-url", "", "prometheus remote write URL")
flagInstance = flag.String("instance", "", "instance label value; defaults to hostname if unspecified")
flagDstPorts = flag.String("dst-ports", "", "comma-separated list of destination ports to monitor")
@@ -639,15 +636,9 @@ func main() {
if len(*flagDERPMap) < 1 {
log.Fatal("derp-map flag is unset")
}
if len(*flagOut) < 1 {
log.Fatal("out flag is unset")
}
if *flagInterval < minInterval || *flagInterval > maxBufferDuration {
log.Fatalf("interval must be >= %s and <= %s", minInterval, maxBufferDuration)
}
if *flagRetention < *flagInterval {
log.Fatal("retention must be >= interval")
}
if len(*flagRemoteWriteURL) < 1 {
log.Fatal("rw-url flag is unset")
}
@@ -693,49 +684,6 @@ func main() {
}
}
db, err := newDB(*flagOut)
if err != nil {
log.Fatalf("error opening output file for writing: %v", err)
}
defer db.Close()
_, err = db.Exec("PRAGMA journal_mode=WAL")
if err != nil {
log.Fatalf("error enabling WAL mode: %v", err)
}
// No indices or primary key. Keep it simple for now. Reads will be full
// scans. We can AUTOINCREMENT rowid in the future and hold an in-memory
// index to at_unix if needed as reads are almost always going to be
// time-bound (e.g. WHERE at_unix >= ?). At the time of authorship we have
// ~300 data points per-interval w/o ipv6 w/kernel timestamping resulting
// in ~2.6m rows in 24h w/a 10s probe interval.
_, err = db.Exec(`
CREATE TABLE IF NOT EXISTS rtt(at_unix INT, region_id INT, hostname TEXT, af INT, address TEXT, timestamp_source INT, stable_conn INT, dst_port INT, rtt_ns INT)
`)
if err != nil {
log.Fatalf("error initializing db: %v", err)
}
wg := sync.WaitGroup{}
httpErrCh := make(chan error, 1)
var httpServer *http.Server
if len(*flagAPI) > 0 {
api := newAPI(db)
httpServer = &http.Server{
Addr: *flagAPI,
Handler: api,
ReadTimeout: time.Second * 60,
WriteTimeout: time.Second * 60,
}
wg.Add(1)
go func() {
err := httpServer.ListenAndServe()
httpErrCh <- err
wg.Done()
}()
}
tsCh := make(chan []prompb.TimeSeries, maxBufferDuration / *flagInterval)
remoteWriteDoneCh := make(chan struct{})
rwc := newRemoteWriteClient(*flagRemoteWriteURL)
@@ -745,9 +693,6 @@ CREATE TABLE IF NOT EXISTS rtt(at_unix INT, region_id INT, hostname TEXT, af INT
}()
shutdown := func() {
if httpServer != nil {
httpServer.Close()
}
close(tsCh)
select {
case <-time.After(time.Second * 10): // give goroutine some time to flush
@@ -766,7 +711,6 @@ CREATE TABLE IF NOT EXISTS rtt(at_unix INT, region_id INT, hostname TEXT, af INT
cancel()
}
wg.Wait()
return
}
@@ -787,20 +731,9 @@ CREATE TABLE IF NOT EXISTS rtt(at_unix INT, region_id INT, hostname TEXT, af INT
defer derpMapTicker.Stop()
probeTicker := time.NewTicker(*flagInterval)
defer probeTicker.Stop()
cleanupTicker := time.NewTicker(time.Hour)
defer cleanupTicker.Stop()
for {
select {
case <-cleanupTicker.C:
older := time.Now().Add(-*flagRetention)
log.Printf("cleaning up measurements older than %v", older)
_, err := db.Exec("DELETE FROM rtt WHERE at_unix < ?", older.Unix())
if err != nil {
log.Printf("error cleaning up old data: %v", err)
shutdown()
return
}
case <-probeTicker.C:
results, err := probeNodes(nodeMetaByAddr, stableConns, dstPorts)
if err != nil {
@@ -819,32 +752,6 @@ CREATE TABLE IF NOT EXISTS rtt(at_unix INT, region_id INT, hostname TEXT, af INT
tsCh <- ts
}
}
tx, err := db.Begin()
if err != nil {
log.Printf("error beginning sqlite tx: %v", err)
shutdown()
return
}
for _, result := range results {
af := 4
if result.key.meta.addr.Is6() {
af = 6
}
_, err = tx.Exec("INSERT INTO rtt(at_unix, region_id, hostname, af, address, timestamp_source, stable_conn, dst_port, rtt_ns) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)",
result.at.Unix(), result.key.meta.regionID, result.key.meta.hostname, af, result.key.meta.addr.String(), result.key.timestampSource, result.key.connStability, result.key.dstPort, result.rtt)
if err != nil {
tx.Rollback()
log.Printf("error adding result to tx: %v", err)
shutdown()
return
}
}
err = tx.Commit()
if err != nil {
log.Printf("error committing tx: %v", err)
shutdown()
return
}
case dm := <-dmCh:
staleMeta, err := nodeMetaFromDERPMap(dm, nodeMetaByAddr, *flagIPv6)
if err != nil {
@@ -874,10 +781,6 @@ CREATE TABLE IF NOT EXISTS rtt(at_unix INT, region_id INT, hostname TEXT, af INT
dmCh <- updatedDM
}
}()
case err := <-httpErrCh:
log.Printf("http server error: %v", err)
shutdown()
return
case <-sigCh:
shutdown()
return

View File

@@ -1,26 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !(windows && 386)
package main
import (
"database/sql"
_ "modernc.org/sqlite"
)
type db struct {
*sql.DB
}
func newDB(path string) (*db, error) {
d, err := sql.Open("sqlite", *flagOut)
if err != nil {
return nil, err
}
return &db{
DB: d,
}, nil
}

View File

@@ -1,17 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package main
import (
"database/sql"
"errors"
)
type db struct {
*sql.DB
}
func newDB(path string) (*db, error) {
return nil, errors.New("unsupported platform")
}