control/controlclient, ipn: add client audit logging (#14950)

updates tailscale/corp#26435

Adds client support for sending audit logs to control via /machine/audit-log.
Specifically implements audit logging for user initiated disconnections.

This will require further work to optimize the peristant storage and exclusion
via build tags for mobile:
tailscale/corp#27011
tailscale/corp#27012

Signed-off-by: Jonathan Nobels <jonathan@tailscale.com>
This commit is contained in:
Jonathan Nobels
2025-03-12 10:37:03 -04:00
committed by GitHub
parent 06ae52d309
commit 52710945f5
13 changed files with 1204 additions and 13 deletions

View File

@@ -119,6 +119,7 @@ type Auto struct {
updateCh chan struct{} // readable when we should inform the server of a change
observer Observer // called to update Client status; always non-nil
observerQueue execqueue.ExecQueue
shutdownFn func() // to be called prior to shutdown or nil
unregisterHealthWatch func()
@@ -189,6 +190,7 @@ func NewNoStart(opts Options) (_ *Auto, err error) {
mapDone: make(chan struct{}),
updateDone: make(chan struct{}),
observer: opts.Observer,
shutdownFn: opts.Shutdown,
}
c.authCtx, c.authCancel = context.WithCancel(context.Background())
c.authCtx = sockstats.WithSockStats(c.authCtx, sockstats.LabelControlClientAuto, opts.Logf)
@@ -755,6 +757,7 @@ func (c *Auto) Shutdown() {
return
}
c.logf("client.Shutdown ...")
shutdownFn := c.shutdownFn
direct := c.direct
c.closed = true
@@ -767,6 +770,10 @@ func (c *Auto) Shutdown() {
c.unpauseWaiters = nil
c.mu.Unlock()
if shutdownFn != nil {
shutdownFn()
}
c.unregisterHealthWatch()
<-c.authDone
<-c.mapDone

View File

@@ -4,6 +4,8 @@
package controlclient
import (
"errors"
"fmt"
"io"
"reflect"
"slices"
@@ -147,3 +149,42 @@ func TestCanSkipStatus(t *testing.T) {
t.Errorf("Status fields = %q; this code was only written to handle fields %q", f, want)
}
}
func TestRetryableErrors(t *testing.T) {
errorTests := []struct {
err error
want bool
}{
{errNoNoiseClient, true},
{errNoNodeKey, true},
{fmt.Errorf("%w: %w", errNoNoiseClient, errors.New("no noise")), true},
{fmt.Errorf("%w: %w", errHTTPPostFailure, errors.New("bad post")), true},
{fmt.Errorf("%w: %w", errNoNodeKey, errors.New("not node key")), true},
{errBadHTTPResponse(429, "too may requests"), true},
{errBadHTTPResponse(500, "internal server eror"), true},
{errBadHTTPResponse(502, "bad gateway"), true},
{errBadHTTPResponse(503, "service unavailable"), true},
{errBadHTTPResponse(504, "gateway timeout"), true},
{errBadHTTPResponse(1234, "random error"), false},
}
for _, tt := range errorTests {
t.Run(tt.err.Error(), func(t *testing.T) {
if isRetryableErrorForTest(tt.err) != tt.want {
t.Fatalf("retriable: got %v, want %v", tt.err, tt.want)
}
})
}
}
type retryableForTest interface {
Retryable() bool
}
func isRetryableErrorForTest(err error) bool {
var ae retryableForTest
if errors.As(err, &ae) {
return ae.Retryable()
}
return false
}

View File

@@ -156,6 +156,11 @@ type Options struct {
// If we receive a new DialPlan from the server, this value will be
// updated.
DialPlan ControlDialPlanner
// Shutdown is an optional function that will be called before client shutdown is
// attempted. It is used to allow the client to clean up any resources or complete any
// tasks that are dependent on a live client.
Shutdown func()
}
// ControlDialPlanner is the interface optionally supplied when creating a
@@ -1662,11 +1667,11 @@ func (c *Auto) SetDeviceAttrs(ctx context.Context, attrs tailcfg.AttrUpdate) err
func (c *Direct) SetDeviceAttrs(ctx context.Context, attrs tailcfg.AttrUpdate) error {
nc, err := c.getNoiseClient()
if err != nil {
return err
return fmt.Errorf("%w: %w", errNoNoiseClient, err)
}
nodeKey, ok := c.GetPersist().PublicNodeKeyOK()
if !ok {
return errors.New("no node key")
return errNoNodeKey
}
if c.panicOnUse {
panic("tainted client")
@@ -1697,6 +1702,47 @@ func (c *Direct) SetDeviceAttrs(ctx context.Context, attrs tailcfg.AttrUpdate) e
return nil
}
// SendAuditLog implements [auditlog.Transport] by sending an audit log synchronously to the control plane.
//
// See docs on [tailcfg.AuditLogRequest] and [auditlog.Logger] for background.
func (c *Auto) SendAuditLog(ctx context.Context, auditLog tailcfg.AuditLogRequest) (err error) {
return c.direct.sendAuditLog(ctx, auditLog)
}
func (c *Direct) sendAuditLog(ctx context.Context, auditLog tailcfg.AuditLogRequest) (err error) {
nc, err := c.getNoiseClient()
if err != nil {
return fmt.Errorf("%w: %w", errNoNoiseClient, err)
}
nodeKey, ok := c.GetPersist().PublicNodeKeyOK()
if !ok {
return errNoNodeKey
}
req := &tailcfg.AuditLogRequest{
Version: tailcfg.CurrentCapabilityVersion,
NodeKey: nodeKey,
Action: auditLog.Action,
Details: auditLog.Details,
}
if c.panicOnUse {
panic("tainted client")
}
res, err := nc.post(ctx, "/machine/audit-log", nodeKey, req)
if err != nil {
return fmt.Errorf("%w: %w", errHTTPPostFailure, err)
}
defer res.Body.Close()
if res.StatusCode != 200 {
all, _ := io.ReadAll(res.Body)
return errBadHTTPResponse(res.StatusCode, string(all))
}
return nil
}
func addLBHeader(req *http.Request, nodeKey key.NodePublic) {
if !nodeKey.IsZero() {
req.Header.Add(tailcfg.LBHeader, nodeKey.String())

View File

@@ -0,0 +1,51 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
package controlclient
import (
"errors"
"fmt"
"net/http"
)
// apiResponseError is an error type that can be returned by controlclient
// api requests.
//
// It wraps an underlying error and a flag for clients to query if the
// error is retryable via the Retryable() method.
type apiResponseError struct {
err error
retryable bool
}
// Error implements [error].
func (e *apiResponseError) Error() string {
return e.err.Error()
}
// Retryable reports whether the error is retryable.
func (e *apiResponseError) Retryable() bool {
return e.retryable
}
func (e *apiResponseError) Unwrap() error { return e.err }
var (
errNoNodeKey = &apiResponseError{errors.New("no node key"), true}
errNoNoiseClient = &apiResponseError{errors.New("no noise client"), true}
errHTTPPostFailure = &apiResponseError{errors.New("http failure"), true}
)
func errBadHTTPResponse(code int, msg string) error {
retryable := false
switch code {
case http.StatusTooManyRequests,
http.StatusInternalServerError,
http.StatusBadGateway,
http.StatusServiceUnavailable,
http.StatusGatewayTimeout:
retryable = true
}
return &apiResponseError{fmt.Errorf("http error %d: %s", code, msg), retryable}
}