mirror of
https://github.com/tailscale/tailscale.git
synced 2025-01-07 16:17:41 +00:00
bb8ce48a6b
When tailscaled starts up, these lines run: func run() error { // ... pol := logpolicy.New("tailnode.log.tailscale.io") pol.SetVerbosityLevel(args.verbose) // ... } If there are old log entries present, they immediate start getting uploaded. This races with the call to pol.SetVerbosityLevel. This manifested itself as a test failure in tailscale.com/tstest/integration when run with -race: WARNING: DATA RACE Read at 0x00c0001bc970 by goroutine 24: tailscale.com/logtail.(*Logger).Write() /Users/josh/t/corp/oss/logtail/logtail.go:517 +0x27c log.(*Logger).Output() /Users/josh/go/ts/src/log/log.go:184 +0x2b8 log.Printf() /Users/josh/go/ts/src/log/log.go:323 +0x94 tailscale.com/logpolicy.newLogtailTransport.func1() /Users/josh/t/corp/oss/logpolicy/logpolicy.go:509 +0x36c net/http.(*Transport).dial() /Users/josh/go/ts/src/net/http/transport.go:1168 +0x238 net/http.(*Transport).dialConn() /Users/josh/go/ts/src/net/http/transport.go:1606 +0x21d0 net/http.(*Transport).dialConnFor() /Users/josh/go/ts/src/net/http/transport.go:1448 +0xe4 Previous write at 0x00c0001bc970 by main goroutine: tailscale.com/logtail.(*Logger).SetVerbosityLevel() /Users/josh/t/corp/oss/logtail/logtail.go:131 +0x98 tailscale.com/logpolicy.(*Policy).SetVerbosityLevel() /Users/josh/t/corp/oss/logpolicy/logpolicy.go:463 +0x60 main.run() /Users/josh/t/corp/oss/cmd/tailscaled/tailscaled.go:178 +0x50 main.main() /Users/josh/t/corp/oss/cmd/tailscaled/tailscaled.go:163 +0x71c Goroutine 24 (running) created at: net/http.(*Transport).queueForDial() /Users/josh/go/ts/src/net/http/transport.go:1417 +0x4d8 net/http.(*Transport).getConn() /Users/josh/go/ts/src/net/http/transport.go:1371 +0x5b8 net/http.(*Transport).roundTrip() /Users/josh/go/ts/src/net/http/transport.go:585 +0x7f4 net/http.(*Transport).RoundTrip() /Users/josh/go/ts/src/net/http/roundtrip.go:17 +0x30 net/http.send() /Users/josh/go/ts/src/net/http/client.go:251 +0x4f0 net/http.(*Client).send() /Users/josh/go/ts/src/net/http/client.go:175 +0x148 net/http.(*Client).do() /Users/josh/go/ts/src/net/http/client.go:717 +0x1d0 net/http.(*Client).Do() /Users/josh/go/ts/src/net/http/client.go:585 +0x358 tailscale.com/logtail.(*Logger).upload() /Users/josh/t/corp/oss/logtail/logtail.go:367 +0x334 tailscale.com/logtail.(*Logger).uploading() /Users/josh/t/corp/oss/logtail/logtail.go:289 +0xec Rather than complicate the logpolicy API, allow the verbosity to be adjusted concurrently. Signed-off-by: Josh Bleecher Snyder <josh@tailscale.com>
550 lines
14 KiB
Go
550 lines
14 KiB
Go
// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Package logtail sends logs to log.tailscale.io.
|
|
package logtail
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"net/http"
|
|
"os"
|
|
"strconv"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"tailscale.com/logtail/backoff"
|
|
"tailscale.com/net/interfaces"
|
|
tslogger "tailscale.com/types/logger"
|
|
"tailscale.com/wgengine/monitor"
|
|
)
|
|
|
|
// DefaultHost is the default host name to upload logs to when
|
|
// Config.BaseURL isn't provided.
|
|
const DefaultHost = "log.tailscale.io"
|
|
|
|
type Encoder interface {
|
|
EncodeAll(src, dst []byte) []byte
|
|
Close() error
|
|
}
|
|
|
|
type Config struct {
|
|
Collection string // collection name, a domain name
|
|
PrivateID PrivateID // machine-specific private identifier
|
|
BaseURL string // if empty defaults to "https://log.tailscale.io"
|
|
HTTPC *http.Client // if empty defaults to http.DefaultClient
|
|
SkipClientTime bool // if true, client_time is not written to logs
|
|
LowMemory bool // if true, logtail minimizes memory use
|
|
TimeNow func() time.Time // if set, subsitutes uses of time.Now
|
|
Stderr io.Writer // if set, logs are sent here instead of os.Stderr
|
|
StderrLevel int // max verbosity level to write to stderr; 0 means the non-verbose messages only
|
|
Buffer Buffer // temp storage, if nil a MemoryBuffer
|
|
NewZstdEncoder func() Encoder // if set, used to compress logs for transmission
|
|
|
|
// DrainLogs, if non-nil, disables automatic uploading of new logs,
|
|
// so that logs are only uploaded when a token is sent to DrainLogs.
|
|
DrainLogs <-chan struct{}
|
|
}
|
|
|
|
func NewLogger(cfg Config, logf tslogger.Logf) *Logger {
|
|
if cfg.BaseURL == "" {
|
|
cfg.BaseURL = "https://" + DefaultHost
|
|
}
|
|
if cfg.HTTPC == nil {
|
|
cfg.HTTPC = http.DefaultClient
|
|
}
|
|
if cfg.TimeNow == nil {
|
|
cfg.TimeNow = time.Now
|
|
}
|
|
if cfg.Stderr == nil {
|
|
cfg.Stderr = os.Stderr
|
|
}
|
|
if cfg.Buffer == nil {
|
|
pendingSize := 256
|
|
if cfg.LowMemory {
|
|
pendingSize = 64
|
|
}
|
|
cfg.Buffer = NewMemoryBuffer(pendingSize)
|
|
}
|
|
l := &Logger{
|
|
stderr: cfg.Stderr,
|
|
stderrLevel: int64(cfg.StderrLevel),
|
|
httpc: cfg.HTTPC,
|
|
url: cfg.BaseURL + "/c/" + cfg.Collection + "/" + cfg.PrivateID.String(),
|
|
lowMem: cfg.LowMemory,
|
|
buffer: cfg.Buffer,
|
|
skipClientTime: cfg.SkipClientTime,
|
|
sent: make(chan struct{}, 1),
|
|
sentinel: make(chan int32, 16),
|
|
drainLogs: cfg.DrainLogs,
|
|
timeNow: cfg.TimeNow,
|
|
bo: backoff.NewBackoff("logtail", logf, 30*time.Second),
|
|
|
|
shutdownStart: make(chan struct{}),
|
|
shutdownDone: make(chan struct{}),
|
|
}
|
|
if cfg.NewZstdEncoder != nil {
|
|
l.zstdEncoder = cfg.NewZstdEncoder()
|
|
}
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
l.uploadCancel = cancel
|
|
|
|
go l.uploading(ctx)
|
|
l.Write([]byte("logtail started"))
|
|
return l
|
|
}
|
|
|
|
// Logger writes logs, splitting them as configured between local
|
|
// logging facilities and uploading to a log server.
|
|
type Logger struct {
|
|
stderr io.Writer
|
|
stderrLevel int64 // accessed atomically
|
|
httpc *http.Client
|
|
url string
|
|
lowMem bool
|
|
skipClientTime bool
|
|
linkMonitor *monitor.Mon
|
|
buffer Buffer
|
|
sent chan struct{} // signal to speed up drain
|
|
drainLogs <-chan struct{} // if non-nil, external signal to attempt a drain
|
|
sentinel chan int32
|
|
timeNow func() time.Time
|
|
bo *backoff.Backoff
|
|
zstdEncoder Encoder
|
|
uploadCancel func()
|
|
|
|
shutdownStart chan struct{} // closed when shutdown begins
|
|
shutdownDone chan struct{} // closd when shutdown complete
|
|
}
|
|
|
|
// SetVerbosityLevel controls the verbosity level that should be
|
|
// written to stderr. 0 is the default (not verbose). Levels 1 or higher
|
|
// are increasingly verbose.
|
|
func (l *Logger) SetVerbosityLevel(level int) {
|
|
atomic.StoreInt64(&l.stderrLevel, int64(level))
|
|
}
|
|
|
|
// SetLinkMonitor sets the optional the link monitor.
|
|
//
|
|
// It should not be changed concurrently with log writes and should
|
|
// only be set once.
|
|
func (l *Logger) SetLinkMonitor(lm *monitor.Mon) {
|
|
l.linkMonitor = lm
|
|
}
|
|
|
|
// Shutdown gracefully shuts down the logger while completing any
|
|
// remaining uploads.
|
|
//
|
|
// It will block, continuing to try and upload unless the passed
|
|
// context object interrupts it by being done.
|
|
// If the shutdown is interrupted, an error is returned.
|
|
func (l *Logger) Shutdown(ctx context.Context) error {
|
|
done := make(chan struct{})
|
|
go func() {
|
|
select {
|
|
case <-ctx.Done():
|
|
l.uploadCancel()
|
|
<-l.shutdownDone
|
|
case <-l.shutdownDone:
|
|
}
|
|
close(done)
|
|
}()
|
|
|
|
close(l.shutdownStart)
|
|
io.WriteString(l, "logger closing down\n")
|
|
<-done
|
|
|
|
if l.zstdEncoder != nil {
|
|
return l.zstdEncoder.Close()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Close shuts down this logger object, the background log uploader
|
|
// process, and any associated goroutines.
|
|
//
|
|
// Deprecated: use Shutdown
|
|
func (l *Logger) Close() {
|
|
l.Shutdown(context.Background())
|
|
}
|
|
|
|
// drainBlock is called by drainPending when there are no logs to drain.
|
|
//
|
|
// In typical operation, every call to the Write method unblocks and triggers
|
|
// a buffer.TryReadline, so logs are written with very low latency.
|
|
//
|
|
// If the caller provides a DrainLogs channel, then unblock-drain-on-Write
|
|
// is disabled, and it is up to the caller to trigger unblock the drain.
|
|
func (l *Logger) drainBlock() (shuttingDown bool) {
|
|
if l.drainLogs == nil {
|
|
select {
|
|
case <-l.shutdownStart:
|
|
return true
|
|
case <-l.sent:
|
|
}
|
|
} else {
|
|
select {
|
|
case <-l.shutdownStart:
|
|
return true
|
|
case <-l.drainLogs:
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// drainPending drains and encodes a batch of logs from the buffer for upload.
|
|
// If no logs are available, drainPending blocks until logs are available.
|
|
func (l *Logger) drainPending() (res []byte) {
|
|
buf := new(bytes.Buffer)
|
|
entries := 0
|
|
|
|
var batchDone bool
|
|
const maxLen = 256 << 10
|
|
for buf.Len() < maxLen && !batchDone {
|
|
b, err := l.buffer.TryReadLine()
|
|
if err == io.EOF {
|
|
break
|
|
} else if err != nil {
|
|
b = []byte(fmt.Sprintf("reading ringbuffer: %v", err))
|
|
batchDone = true
|
|
} else if b == nil {
|
|
if entries > 0 {
|
|
break
|
|
}
|
|
|
|
batchDone = l.drainBlock()
|
|
continue
|
|
}
|
|
|
|
if len(b) == 0 {
|
|
continue
|
|
}
|
|
if b[0] != '{' || !json.Valid(b) {
|
|
// This is probably a log added to stderr by filch
|
|
// outside of the logtail logger. Encode it.
|
|
// Do not add a client time, as it could have been
|
|
// been written a long time ago.
|
|
b = l.encodeText(b, true)
|
|
}
|
|
|
|
switch {
|
|
case entries == 0:
|
|
buf.Write(b)
|
|
case entries == 1:
|
|
buf2 := new(bytes.Buffer)
|
|
buf2.WriteByte('[')
|
|
buf2.Write(buf.Bytes())
|
|
buf2.WriteByte(',')
|
|
buf2.Write(b)
|
|
buf.Reset()
|
|
buf.Write(buf2.Bytes())
|
|
default:
|
|
buf.WriteByte(',')
|
|
buf.Write(b)
|
|
}
|
|
entries++
|
|
}
|
|
|
|
if entries > 1 {
|
|
buf.WriteByte(']')
|
|
}
|
|
if buf.Len() == 0 {
|
|
return nil
|
|
}
|
|
return buf.Bytes()
|
|
}
|
|
|
|
// This is the goroutine that repeatedly uploads logs in the background.
|
|
func (l *Logger) uploading(ctx context.Context) {
|
|
defer close(l.shutdownDone)
|
|
|
|
for {
|
|
body := l.drainPending()
|
|
origlen := -1 // sentinel value: uncompressed
|
|
// Don't attempt to compress tiny bodies; not worth the CPU cycles.
|
|
if l.zstdEncoder != nil && len(body) > 256 {
|
|
zbody := l.zstdEncoder.EncodeAll(body, nil)
|
|
// Only send it compressed if the bandwidth savings are sufficient.
|
|
// Just the extra headers associated with enabling compression
|
|
// are 50 bytes by themselves.
|
|
if len(body)-len(zbody) > 64 {
|
|
origlen = len(body)
|
|
body = zbody
|
|
}
|
|
}
|
|
|
|
for len(body) > 0 {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
default:
|
|
}
|
|
uploaded, err := l.upload(ctx, body, origlen)
|
|
if err != nil {
|
|
if !l.internetUp() {
|
|
fmt.Fprintf(l.stderr, "logtail: internet down; waiting\n")
|
|
l.awaitInternetUp(ctx)
|
|
continue
|
|
}
|
|
fmt.Fprintf(l.stderr, "logtail: upload: %v\n", err)
|
|
}
|
|
l.bo.BackOff(ctx, err)
|
|
if uploaded {
|
|
break
|
|
}
|
|
}
|
|
|
|
select {
|
|
case <-l.shutdownStart:
|
|
return
|
|
default:
|
|
}
|
|
}
|
|
}
|
|
|
|
func (l *Logger) internetUp() bool {
|
|
if l.linkMonitor == nil {
|
|
// No way to tell, so assume it is.
|
|
return true
|
|
}
|
|
return l.linkMonitor.InterfaceState().AnyInterfaceUp()
|
|
}
|
|
|
|
func (l *Logger) awaitInternetUp(ctx context.Context) {
|
|
upc := make(chan bool, 1)
|
|
defer l.linkMonitor.RegisterChangeCallback(func(changed bool, st *interfaces.State) {
|
|
if st.AnyInterfaceUp() {
|
|
select {
|
|
case upc <- true:
|
|
default:
|
|
}
|
|
}
|
|
})()
|
|
if l.internetUp() {
|
|
return
|
|
}
|
|
select {
|
|
case <-upc:
|
|
fmt.Fprintf(l.stderr, "logtail: internet back up\n")
|
|
case <-ctx.Done():
|
|
}
|
|
}
|
|
|
|
// upload uploads body to the log server.
|
|
// origlen indicates the pre-compression body length.
|
|
// origlen of -1 indicates that the body is not compressed.
|
|
func (l *Logger) upload(ctx context.Context, body []byte, origlen int) (uploaded bool, err error) {
|
|
req, err := http.NewRequest("POST", l.url, bytes.NewReader(body))
|
|
if err != nil {
|
|
// I know of no conditions under which this could fail.
|
|
// Report it very loudly.
|
|
// TODO record logs to disk
|
|
panic("logtail: cannot build http request: " + err.Error())
|
|
}
|
|
if origlen != -1 {
|
|
req.Header.Add("Content-Encoding", "zstd")
|
|
req.Header.Add("Orig-Content-Length", strconv.Itoa(origlen))
|
|
}
|
|
req.Header["User-Agent"] = nil // not worth writing one; save some bytes
|
|
|
|
maxUploadTime := 45 * time.Second
|
|
ctx, cancel := context.WithTimeout(ctx, maxUploadTime)
|
|
defer cancel()
|
|
req = req.WithContext(ctx)
|
|
|
|
compressedNote := "not-compressed"
|
|
if origlen != -1 {
|
|
compressedNote = "compressed"
|
|
}
|
|
|
|
resp, err := l.httpc.Do(req)
|
|
if err != nil {
|
|
return false, fmt.Errorf("log upload of %d bytes %s failed: %v", len(body), compressedNote, err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != 200 {
|
|
uploaded = resp.StatusCode == 400 // the server saved the logs anyway
|
|
b, _ := ioutil.ReadAll(io.LimitReader(resp.Body, 1<<20))
|
|
return uploaded, fmt.Errorf("log upload of %d bytes %s failed %d: %q", len(body), compressedNote, resp.StatusCode, b)
|
|
}
|
|
|
|
// Try to read to EOF, in case server's response is
|
|
// chunked. We want to reuse the TCP connection if it's
|
|
// HTTP/1. On success, we expect 0 bytes.
|
|
// TODO(bradfitz): can remove a few days after 2020-04-04 once
|
|
// server is fixed.
|
|
if resp.ContentLength == -1 {
|
|
resp.Body.Read(make([]byte, 1))
|
|
}
|
|
return true, nil
|
|
}
|
|
|
|
// Flush uploads all logs to the server.
|
|
// It blocks until complete or there is an unrecoverable error.
|
|
func (l *Logger) Flush() error {
|
|
return nil
|
|
}
|
|
|
|
func (l *Logger) send(jsonBlob []byte) (int, error) {
|
|
n, err := l.buffer.Write(jsonBlob)
|
|
if l.drainLogs == nil {
|
|
select {
|
|
case l.sent <- struct{}{}:
|
|
default:
|
|
}
|
|
}
|
|
return n, err
|
|
}
|
|
|
|
// TODO: instead of allocating, this should probably just append
|
|
// directly into the output log buffer.
|
|
func (l *Logger) encodeText(buf []byte, skipClientTime bool) []byte {
|
|
now := l.timeNow()
|
|
|
|
// Factor in JSON encoding overhead to try to only do one alloc
|
|
// in the make below (so appends don't resize the buffer).
|
|
overhead := 13
|
|
if !skipClientTime {
|
|
overhead += 67
|
|
}
|
|
// TODO: do a pass over buf and count how many backslashes will be needed?
|
|
// For now just factor in a dozen.
|
|
overhead += 12
|
|
|
|
b := make([]byte, 0, len(buf)+overhead)
|
|
b = append(b, '{')
|
|
|
|
if !skipClientTime {
|
|
b = append(b, `"logtail": {"client_time": "`...)
|
|
b = now.AppendFormat(b, time.RFC3339Nano)
|
|
b = append(b, "\"}, "...)
|
|
}
|
|
|
|
b = append(b, "\"text\": \""...)
|
|
for i, c := range buf {
|
|
switch c {
|
|
case '\b':
|
|
b = append(b, '\\', 'b')
|
|
case '\f':
|
|
b = append(b, '\\', 'f')
|
|
case '\n':
|
|
b = append(b, '\\', 'n')
|
|
case '\r':
|
|
b = append(b, '\\', 'r')
|
|
case '\t':
|
|
b = append(b, '\\', 't')
|
|
case '"':
|
|
b = append(b, '\\', '"')
|
|
case '\\':
|
|
b = append(b, '\\', '\\')
|
|
default:
|
|
// TODO: what about binary gibberish or non UTF-8?
|
|
b = append(b, c)
|
|
}
|
|
if l.lowMem && i > 254 {
|
|
// TODO: this can break a UTF-8 character
|
|
// mid-encoding. We don't tend to log
|
|
// non-ASCII stuff ourselves, but e.g. client
|
|
// names might be.
|
|
b = append(b, "…"...)
|
|
break
|
|
}
|
|
}
|
|
b = append(b, "\"}\n"...)
|
|
return b
|
|
}
|
|
|
|
func (l *Logger) encode(buf []byte) []byte {
|
|
if buf[0] != '{' {
|
|
return l.encodeText(buf, l.skipClientTime) // text fast-path
|
|
}
|
|
|
|
now := l.timeNow()
|
|
|
|
obj := make(map[string]interface{})
|
|
if err := json.Unmarshal(buf, &obj); err != nil {
|
|
for k := range obj {
|
|
delete(obj, k)
|
|
}
|
|
obj["text"] = string(buf)
|
|
}
|
|
if txt, isStr := obj["text"].(string); l.lowMem && isStr && len(txt) > 254 {
|
|
// TODO(crawshaw): trim to unicode code point
|
|
obj["text"] = txt[:254] + "…"
|
|
}
|
|
|
|
hasLogtail := obj["logtail"] != nil
|
|
if hasLogtail {
|
|
obj["error_has_logtail"] = obj["logtail"]
|
|
obj["logtail"] = nil
|
|
}
|
|
if !l.skipClientTime {
|
|
obj["logtail"] = map[string]string{
|
|
"client_time": now.Format(time.RFC3339Nano),
|
|
}
|
|
}
|
|
|
|
b, err := json.Marshal(obj)
|
|
if err != nil {
|
|
fmt.Fprintf(l.stderr, "logtail: re-encoding JSON failed: %v\n", err)
|
|
// I know of no conditions under which this could fail.
|
|
// Report it very loudly.
|
|
panic("logtail: re-encoding JSON failed: " + err.Error())
|
|
}
|
|
b = append(b, '\n')
|
|
return b
|
|
}
|
|
|
|
// Write logs an encoded JSON blob.
|
|
//
|
|
// If the []byte passed to Write is not an encoded JSON blob,
|
|
// then contents is fit into a JSON blob and written.
|
|
//
|
|
// This is intended as an interface for the stdlib "log" package.
|
|
func (l *Logger) Write(buf []byte) (int, error) {
|
|
if len(buf) == 0 {
|
|
return 0, nil
|
|
}
|
|
level, buf := parseAndRemoveLogLevel(buf)
|
|
if l.stderr != nil && l.stderr != ioutil.Discard && int64(level) <= atomic.LoadInt64(&l.stderrLevel) {
|
|
if buf[len(buf)-1] == '\n' {
|
|
l.stderr.Write(buf)
|
|
} else {
|
|
// The log package always line-terminates logs,
|
|
// so this is an uncommon path.
|
|
withNL := append(buf[:len(buf):len(buf)], '\n')
|
|
l.stderr.Write(withNL)
|
|
}
|
|
}
|
|
b := l.encode(buf)
|
|
_, err := l.send(b)
|
|
return len(buf), err
|
|
}
|
|
|
|
var (
|
|
openBracketV = []byte("[v")
|
|
v1 = []byte("[v1] ")
|
|
v2 = []byte("[v2] ")
|
|
)
|
|
|
|
// level 0 is normal (or unknown) level; 1+ are increasingly verbose
|
|
func parseAndRemoveLogLevel(buf []byte) (level int, cleanBuf []byte) {
|
|
if len(buf) == 0 || buf[0] == '{' || !bytes.Contains(buf, openBracketV) {
|
|
return 0, buf
|
|
}
|
|
if bytes.Contains(buf, v1) {
|
|
return 1, bytes.ReplaceAll(buf, v1, nil)
|
|
}
|
|
if bytes.Contains(buf, v2) {
|
|
return 2, bytes.ReplaceAll(buf, v2, nil)
|
|
}
|
|
return 0, buf
|
|
}
|