headscale/integration/helpers.go

package integration

import (
	"bufio"
	"bytes"
	"fmt"
	"io"
	"net/netip"
	"strconv"
	"strings"
	"sync"
	"testing"
	"time"

	"github.com/cenkalti/backoff/v5"
	"github.com/google/go-cmp/cmp"
	v1 "github.com/juanfont/headscale/gen/go/headscale/v1"
	policyv2 "github.com/juanfont/headscale/hscontrol/policy/v2"
	"github.com/juanfont/headscale/hscontrol/types"
	"github.com/juanfont/headscale/hscontrol/util"
	"github.com/juanfont/headscale/integration/integrationutil"
	"github.com/juanfont/headscale/integration/tsic"
	"github.com/oauth2-proxy/mockoidc"
	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
	"golang.org/x/exp/maps"
	"golang.org/x/exp/slices"
	"tailscale.com/tailcfg"
	"tailscale.com/types/ptr"
)

const (
	// derpPingTimeout defines the timeout for individual DERP ping operations
	// Used in DERP connectivity tests to verify relay server communication.
	derpPingTimeout = 2 * time.Second

	// derpPingCount defines the number of ping attempts for DERP connectivity tests
	// Higher count provides better reliability assessment of DERP connectivity.
	derpPingCount = 10

	// TimestampFormat is the standard timestamp format used across all integration tests
	// Format: "2006-01-02T15-04-05.999999999" provides high precision timestamps
	// suitable for debugging and log correlation in integration tests.
	TimestampFormat = "2006-01-02T15-04-05.999999999"

	// TimestampFormatRunID is used for generating unique run identifiers
	// Format: "20060102-150405" provides compact date-time for file/directory names.
	TimestampFormatRunID = "20060102-150405"
)

// NodeSystemStatus represents the status of a node across different systems
type NodeSystemStatus struct {
	Batcher          bool
	BatcherConnCount int
	MapResponses     bool
	NodeStore        bool
}

// requireNotNil validates that an object is not nil and fails the test if it is.
// This helper provides consistent error messaging for nil checks in integration tests.
func requireNotNil(t *testing.T, object interface{}) {
	t.Helper()
	require.NotNil(t, object)
}

// requireNoErrHeadscaleEnv validates that headscale environment creation succeeded.
// Provides specific error context for headscale environment setup failures.
func requireNoErrHeadscaleEnv(t *testing.T, err error) {
	t.Helper()
	require.NoError(t, err, "failed to create headscale environment")
}

// requireNoErrGetHeadscale validates that headscale server retrieval succeeded.
// Provides specific error context for headscale server access failures.
func requireNoErrGetHeadscale(t *testing.T, err error) {
	t.Helper()
	require.NoError(t, err, "failed to get headscale")
}

// requireNoErrListClients validates that client listing operations succeeded.
// Provides specific error context for client enumeration failures.
func requireNoErrListClients(t *testing.T, err error) {
	t.Helper()
	require.NoError(t, err, "failed to list clients")
}

// requireNoErrListClientIPs validates that client IP retrieval succeeded.
// Provides specific error context for client IP address enumeration failures.
func requireNoErrListClientIPs(t *testing.T, err error) {
	t.Helper()
	require.NoError(t, err, "failed to get client IPs")
}

// requireNoErrSync validates that client synchronization operations succeeded.
// Provides specific error context for client sync failures across the network.
func requireNoErrSync(t *testing.T, err error) {
	t.Helper()
	require.NoError(t, err, "failed to have all clients sync up")
}

// requireNoErrListFQDN validates that FQDN listing operations succeeded.
// Provides specific error context for DNS name enumeration failures.
func requireNoErrListFQDN(t *testing.T, err error) {
	t.Helper()
	require.NoError(t, err, "failed to list FQDNs")
}

// requireNoErrLogout validates that tailscale node logout operations succeeded.
// Provides specific error context for client logout failures.
func requireNoErrLogout(t *testing.T, err error) {
	t.Helper()
	require.NoError(t, err, "failed to log out tailscale nodes")
}

// collectExpectedNodeIDs extracts node IDs from a list of TailscaleClients for validation purposes
func collectExpectedNodeIDs(t *testing.T, clients []TailscaleClient) []types.NodeID {
	t.Helper()

	expectedNodes := make([]types.NodeID, 0, len(clients))
	for _, client := range clients {
		status := client.MustStatus()
		nodeID, err := strconv.ParseUint(string(status.Self.ID), 10, 64)
		require.NoError(t, err)
		expectedNodes = append(expectedNodes, types.NodeID(nodeID))
	}
	return expectedNodes
}

// validateInitialConnection performs comprehensive validation after initial client login.
// Validates that all nodes are online and have proper NetInfo/DERP configuration,
// essential for ensuring successful initial connection state in relogin tests.
func validateInitialConnection(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID) {
	t.Helper()

	requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected after initial login", 120*time.Second)
	requireAllClientsNetInfoAndDERP(t, headscale, expectedNodes, "all clients should have NetInfo and DERP after initial login", 3*time.Minute)
}

// validateLogoutComplete performs comprehensive validation after client logout.
// Ensures all nodes are properly offline across all headscale systems,
// critical for validating clean logout state in relogin tests.
func validateLogoutComplete(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID) {
	t.Helper()

	requireAllClientsOnline(t, headscale, expectedNodes, false, "all nodes should be offline after logout", 120*time.Second)
}

// validateReloginComplete performs comprehensive validation after client relogin.
// Validates that all nodes are back online with proper NetInfo/DERP configuration,
// ensuring successful relogin state restoration in integration tests.
func validateReloginComplete(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID) {
	t.Helper()

	requireAllClientsOnline(t, headscale, expectedNodes, true, "all clients should be connected after relogin", 120*time.Second)
	requireAllClientsNetInfoAndDERP(t, headscale, expectedNodes, "all clients should have NetInfo and DERP after relogin", 3*time.Minute)
}

// requireAllClientsOnline validates that all nodes are online/offline across all headscale systems
// requireAllClientsOnline verifies all expected nodes are in the specified online state across all systems
func requireAllClientsOnline(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID, expectedOnline bool, message string, timeout time.Duration) {
	t.Helper()

	startTime := time.Now()
	stateStr := "offline"
	if expectedOnline {
		stateStr = "online"
	}
	t.Logf("requireAllSystemsOnline: Starting %s validation for %d nodes at %s - %s", stateStr, len(expectedNodes), startTime.Format(TimestampFormat), message)

	if expectedOnline {
		// For online validation, use the existing logic with full timeout
		requireAllClientsOnlineWithSingleTimeout(t, headscale, expectedNodes, expectedOnline, message, timeout)
	} else {
		// For offline validation, use staged approach with component-specific timeouts
		requireAllClientsOfflineStaged(t, headscale, expectedNodes, message, timeout)
	}

	endTime := time.Now()
	t.Logf("requireAllSystemsOnline: Completed %s validation for %d nodes at %s - Duration: %s - %s", stateStr, len(expectedNodes), endTime.Format(TimestampFormat), endTime.Sub(startTime), message)
}

// requireAllClientsOnlineWithSingleTimeout is the original validation logic for online state
func requireAllClientsOnlineWithSingleTimeout(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID, expectedOnline bool, message string, timeout time.Duration) {
	t.Helper()

	var prevReport string
	require.EventuallyWithT(t, func(c *assert.CollectT) {
		// Get batcher state
		debugInfo, err := headscale.DebugBatcher()
		assert.NoError(c, err, "Failed to get batcher debug info")
		if err != nil {
			return
		}

		// Get map responses
		mapResponses, err := headscale.GetAllMapReponses()
		assert.NoError(c, err, "Failed to get map responses")
		if err != nil {
			return
		}

		// Get nodestore state
		nodeStore, err := headscale.DebugNodeStore()
		assert.NoError(c, err, "Failed to get nodestore debug info")
		if err != nil {
			return
		}

		// Validate that all expected nodes are present in nodeStore
		for _, nodeID := range expectedNodes {
			_, exists := nodeStore[nodeID]
			assert.True(c, exists, "Expected node %d not found in nodeStore", nodeID)
		}

		// Check that we have map responses for expected nodes
		mapResponseCount := len(mapResponses)
		expectedCount := len(expectedNodes)
		assert.GreaterOrEqual(c, mapResponseCount, expectedCount, "MapResponses insufficient - expected at least %d responses, got %d", expectedCount, mapResponseCount)

		// Build status map for each node
		nodeStatus := make(map[types.NodeID]NodeSystemStatus)

		// Initialize all expected nodes
		for _, nodeID := range expectedNodes {
			nodeStatus[nodeID] = NodeSystemStatus{}
		}

		// Check batcher state for expected nodes
		for _, nodeID := range expectedNodes {
			nodeIDStr := fmt.Sprintf("%d", nodeID)
			if nodeInfo, exists := debugInfo.ConnectedNodes[nodeIDStr]; exists {
				if status, exists := nodeStatus[nodeID]; exists {
					status.Batcher = nodeInfo.Connected
					status.BatcherConnCount = nodeInfo.ActiveConnections
					nodeStatus[nodeID] = status
				}
			} else {
				// Node not found in batcher, mark as disconnected
				if status, exists := nodeStatus[nodeID]; exists {
					status.Batcher = false
					status.BatcherConnCount = 0
					nodeStatus[nodeID] = status
				}
			}
		}

		// Check map responses using buildExpectedOnlineMap
		onlineFromMaps := make(map[types.NodeID]bool)
		onlineMap := integrationutil.BuildExpectedOnlineMap(mapResponses)

		// For single node scenarios, we can't validate peer visibility since there are no peers
		if len(expectedNodes) == 1 {
			// For single node, just check that we have map responses for the node
			for nodeID := range nodeStatus {
				if _, exists := onlineMap[nodeID]; exists {
					onlineFromMaps[nodeID] = true
				} else {
					onlineFromMaps[nodeID] = false
				}
			}
		} else {
			// Multi-node scenario: check peer visibility
			for nodeID := range nodeStatus {
				// Initialize as offline - will be set to true only if visible in all relevant peer maps
				onlineFromMaps[nodeID] = false

				// Count how many peer maps should show this node
				expectedPeerMaps := 0
				foundOnlinePeerMaps := 0

				for id, peerMap := range onlineMap {
					if id == nodeID {
						continue // Skip self-references
					}
					expectedPeerMaps++

					if online, exists := peerMap[nodeID]; exists && online {
						foundOnlinePeerMaps++
					}
				}

				// Node is considered online if it appears online in all peer maps
				// (or if there are no peer maps to check)
				if expectedPeerMaps == 0 || foundOnlinePeerMaps == expectedPeerMaps {
					onlineFromMaps[nodeID] = true
				}
			}
		}
		assert.Lenf(c, onlineFromMaps, expectedCount, "MapResponses missing nodes in status check")

		// Update status with map response data
		for nodeID, online := range onlineFromMaps {
			if status, exists := nodeStatus[nodeID]; exists {
				status.MapResponses = online
				nodeStatus[nodeID] = status
			}
		}

		// Check nodestore state for expected nodes
		for _, nodeID := range expectedNodes {
			if node, exists := nodeStore[nodeID]; exists {
				if status, exists := nodeStatus[nodeID]; exists {
					// Check if node is online in nodestore
					status.NodeStore = node.IsOnline != nil && *node.IsOnline
					nodeStatus[nodeID] = status
				}
			}
		}

		// Verify all systems show nodes in expected state and report failures
		allMatch := true
		var failureReport strings.Builder

		ids := types.NodeIDs(maps.Keys(nodeStatus))
		slices.Sort(ids)
		for _, nodeID := range ids {
			status := nodeStatus[nodeID]
			systemsMatch := (status.Batcher == expectedOnline) &&
				(status.MapResponses == expectedOnline) &&
				(status.NodeStore == expectedOnline)

			if !systemsMatch {
				allMatch = false
				stateStr := "offline"
				if expectedOnline {
					stateStr = "online"
				}
				failureReport.WriteString(fmt.Sprintf("node:%d is not fully %s (timestamp: %s):\n", nodeID, stateStr, time.Now().Format(TimestampFormat)))
				failureReport.WriteString(fmt.Sprintf("  - batcher: %t (expected: %t)\n", status.Batcher, expectedOnline))
				failureReport.WriteString(fmt.Sprintf("    - conn count: %d\n", status.BatcherConnCount))
				failureReport.WriteString(fmt.Sprintf("  - mapresponses: %t (expected: %t, down with at least one peer)\n", status.MapResponses, expectedOnline))
				failureReport.WriteString(fmt.Sprintf("  - nodestore: %t (expected: %t)\n", status.NodeStore, expectedOnline))
			}
		}

		if !allMatch {
			if diff := cmp.Diff(prevReport, failureReport.String()); diff != "" {
				t.Logf("Node state validation report changed at %s:", time.Now().Format(TimestampFormat))
				t.Logf("Previous report:\n%s", prevReport)
				t.Logf("Current report:\n%s", failureReport.String())
				t.Logf("Report diff:\n%s", diff)
				prevReport = failureReport.String()
			}

			failureReport.WriteString(fmt.Sprintf("validation_timestamp: %s\n", time.Now().Format(TimestampFormat)))
			// Note: timeout_remaining not available in this context

			assert.Fail(c, failureReport.String())
		}

		stateStr := "offline"
		if expectedOnline {
			stateStr = "online"
		}
		assert.True(c, allMatch, fmt.Sprintf("Not all %d nodes are %s across all systems (batcher, mapresponses, nodestore)", len(expectedNodes), stateStr))
	}, timeout, 2*time.Second, message)
}

// requireAllClientsOfflineStaged validates offline state with staged timeouts for different components
func requireAllClientsOfflineStaged(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID, message string, totalTimeout time.Duration) {
	t.Helper()

	// Stage 1: Verify batcher disconnection (should be immediate)
	t.Logf("Stage 1: Verifying batcher disconnection for %d nodes", len(expectedNodes))
	require.EventuallyWithT(t, func(c *assert.CollectT) {
		debugInfo, err := headscale.DebugBatcher()
		assert.NoError(c, err, "Failed to get batcher debug info")
		if err != nil {
			return
		}

		allBatcherOffline := true
		for _, nodeID := range expectedNodes {
			nodeIDStr := fmt.Sprintf("%d", nodeID)
			if nodeInfo, exists := debugInfo.ConnectedNodes[nodeIDStr]; exists && nodeInfo.Connected {
				allBatcherOffline = false
				assert.False(c, nodeInfo.Connected, "Node %d should not be connected in batcher", nodeID)
			}
		}
		assert.True(c, allBatcherOffline, "All nodes should be disconnected from batcher")
	}, 15*time.Second, 1*time.Second, "batcher disconnection validation")

	// Stage 2: Verify nodestore offline status (up to 15 seconds due to disconnect detection delay)
	t.Logf("Stage 2: Verifying nodestore offline status for %d nodes (allowing for 10s disconnect detection delay)", len(expectedNodes))
	require.EventuallyWithT(t, func(c *assert.CollectT) {
		nodeStore, err := headscale.DebugNodeStore()
		assert.NoError(c, err, "Failed to get nodestore debug info")
		if err != nil {
			return
		}

		allNodeStoreOffline := true
		for _, nodeID := range expectedNodes {
			if node, exists := nodeStore[nodeID]; exists {
				isOnline := node.IsOnline != nil && *node.IsOnline
				if isOnline {
					allNodeStoreOffline = false
					assert.False(c, isOnline, "Node %d should be offline in nodestore", nodeID)
				}
			}
		}
		assert.True(c, allNodeStoreOffline, "All nodes should be offline in nodestore")
	}, 20*time.Second, 1*time.Second, "nodestore offline validation")

	// Stage 3: Verify map response propagation (longest delay due to peer update timing)
	t.Logf("Stage 3: Verifying map response propagation for %d nodes (allowing for peer map update delays)", len(expectedNodes))
	require.EventuallyWithT(t, func(c *assert.CollectT) {
		mapResponses, err := headscale.GetAllMapReponses()
		assert.NoError(c, err, "Failed to get map responses")
		if err != nil {
			return
		}

		onlineMap := integrationutil.BuildExpectedOnlineMap(mapResponses)
		allMapResponsesOffline := true

		if len(expectedNodes) == 1 {
			// Single node: check if it appears in map responses
			for nodeID := range onlineMap {
				if slices.Contains(expectedNodes, nodeID) {
					allMapResponsesOffline = false
					assert.False(c, true, "Node %d should not appear in map responses", nodeID)
				}
			}
		} else {
			// Multi-node: check peer visibility
			for _, nodeID := range expectedNodes {
				for id, peerMap := range onlineMap {
					if id == nodeID {
						continue // Skip self-references
					}
					if online, exists := peerMap[nodeID]; exists && online {
						allMapResponsesOffline = false
						assert.False(c, online, "Node %d should not be visible in node %d's map response", nodeID, id)
					}
				}
			}
		}
		assert.True(c, allMapResponsesOffline, "All nodes should be absent from peer map responses")
	}, 60*time.Second, 2*time.Second, "map response propagation validation")

	t.Logf("All stages completed: nodes are fully offline across all systems")
}

// requireAllClientsNetInfoAndDERP validates that all nodes have NetInfo in the database
// and a valid DERP server based on the NetInfo. This function follows the pattern of
// requireAllClientsOnline by using hsic.DebugNodeStore to get the database state.
func requireAllClientsNetInfoAndDERP(t *testing.T, headscale ControlServer, expectedNodes []types.NodeID, message string, timeout time.Duration) {
	t.Helper()

	startTime := time.Now()
	t.Logf("requireAllClientsNetInfoAndDERP: Starting NetInfo/DERP validation for %d nodes at %s - %s", len(expectedNodes), startTime.Format(TimestampFormat), message)

	require.EventuallyWithT(t, func(c *assert.CollectT) {
		// Get nodestore state
		nodeStore, err := headscale.DebugNodeStore()
		assert.NoError(c, err, "Failed to get nodestore debug info")
		if err != nil {
			return
		}

		// Validate that all expected nodes are present in nodeStore
		for _, nodeID := range expectedNodes {
			_, exists := nodeStore[nodeID]
			assert.True(c, exists, "Expected node %d not found in nodeStore during NetInfo validation", nodeID)
		}

		// Check each expected node
		for _, nodeID := range expectedNodes {
			node, exists := nodeStore[nodeID]
			assert.True(c, exists, "Node %d not found in nodestore during NetInfo validation", nodeID)
			if !exists {
				continue
			}

			// Validate that the node has Hostinfo
			assert.NotNil(c, node.Hostinfo, "Node %d (%s) should have Hostinfo for NetInfo validation", nodeID, node.Hostname)
			if node.Hostinfo == nil {
				t.Logf("Node %d (%s) missing Hostinfo at %s", nodeID, node.Hostname, time.Now().Format(TimestampFormat))
				continue
			}

			// Validate that the node has NetInfo
			assert.NotNil(c, node.Hostinfo.NetInfo, "Node %d (%s) should have NetInfo in Hostinfo for DERP connectivity", nodeID, node.Hostname)
			if node.Hostinfo.NetInfo == nil {
				t.Logf("Node %d (%s) missing NetInfo at %s", nodeID, node.Hostname, time.Now().Format(TimestampFormat))
				continue
			}

			// Validate that the node has a valid DERP server (PreferredDERP should be > 0)
			preferredDERP := node.Hostinfo.NetInfo.PreferredDERP
			assert.Greater(c, preferredDERP, 0, "Node %d (%s) should have a valid DERP server (PreferredDERP > 0) for relay connectivity, got %d", nodeID, node.Hostname, preferredDERP)

			t.Logf("Node %d (%s) has valid NetInfo with DERP server %d at %s", nodeID, node.Hostname, preferredDERP, time.Now().Format(TimestampFormat))
		}
	}, timeout, 5*time.Second, message)

	endTime := time.Now()
	duration := endTime.Sub(startTime)
	t.Logf("requireAllClientsNetInfoAndDERP: Completed NetInfo/DERP validation for %d nodes at %s - Duration: %v - %s", len(expectedNodes), endTime.Format(TimestampFormat), duration, message)
}

// assertLastSeenSet validates that a node has a non-nil LastSeen timestamp.
// Critical for ensuring node activity tracking is functioning properly.
func assertLastSeenSet(t *testing.T, node *v1.Node) {
	assert.NotNil(t, node)
	assert.NotNil(t, node.GetLastSeen())
}

func assertLastSeenSetWithCollect(c *assert.CollectT, node *v1.Node) {
	assert.NotNil(c, node)
	assert.NotNil(c, node.GetLastSeen())
}

// assertTailscaleNodesLogout verifies that all provided Tailscale clients
// are in the logged-out state (NeedsLogin).
func assertTailscaleNodesLogout(t assert.TestingT, clients []TailscaleClient) {
	if h, ok := t.(interface{ Helper() }); ok {
		h.Helper()
	}

	for _, client := range clients {
		status, err := client.Status()
		assert.NoError(t, err, "failed to get status for client %s", client.Hostname())
		assert.Equal(t, "NeedsLogin", status.BackendState,
			"client %s should be logged out", client.Hostname())
	}
}

// pingAllHelper performs ping tests between all clients and addresses, returning success count.
// This is used to validate network connectivity in integration tests.
// Returns the total number of successful ping operations.
func pingAllHelper(t *testing.T, clients []TailscaleClient, addrs []string, opts ...tsic.PingOption) int {
	t.Helper()
	success := 0

	for _, client := range clients {
		for _, addr := range addrs {
			err := client.Ping(addr, opts...)
			if err != nil {
				t.Errorf("failed to ping %s from %s: %s", addr, client.Hostname(), err)
			} else {
				success++
			}
		}
	}

	return success
}

// pingDerpAllHelper performs DERP-based ping tests between all clients and addresses.
// This specifically tests connectivity through DERP relay servers, which is important
// for validating NAT traversal and relay functionality. Returns success count.
func pingDerpAllHelper(t *testing.T, clients []TailscaleClient, addrs []string) int {
	t.Helper()
	success := 0

	for _, client := range clients {
		for _, addr := range addrs {
			if isSelfClient(client, addr) {
				continue
			}

			err := client.Ping(
				addr,
				tsic.WithPingTimeout(derpPingTimeout),
				tsic.WithPingCount(derpPingCount),
				tsic.WithPingUntilDirect(false),
			)
			if err != nil {
				t.Logf("failed to ping %s from %s: %s", addr, client.Hostname(), err)
			} else {
				success++
			}
		}
	}

	return success
}

// isSelfClient determines if the given address belongs to the client itself.
// Used to avoid self-ping operations in connectivity tests by checking
// hostname and IP address matches.
func isSelfClient(client TailscaleClient, addr string) bool {
	if addr == client.Hostname() {
		return true
	}

	ips, err := client.IPs()
	if err != nil {
		return false
	}

	for _, ip := range ips {
		if ip.String() == addr {
			return true
		}
	}

	return false
}

// assertClientsState validates the status and netmap of a list of clients for general connectivity.
// Runs parallel validation of status, netcheck, and netmap for all clients to ensure
// they have proper network configuration for all-to-all connectivity tests.
func assertClientsState(t *testing.T, clients []TailscaleClient) {
	t.Helper()

	var wg sync.WaitGroup

	for _, client := range clients {
		wg.Add(1)
		c := client // Avoid loop pointer
		go func() {
			defer wg.Done()
			assertValidStatus(t, c)
			assertValidNetcheck(t, c)
			assertValidNetmap(t, c)
		}()
	}

	t.Logf("waiting for client state checks to finish")
	wg.Wait()
}

// assertValidNetmap validates that a client's netmap has all required fields for proper operation.
// Checks self node and all peers for essential networking data including hostinfo, addresses,
// endpoints, and DERP configuration. Skips validation for Tailscale versions below 1.56.
// This test is not suitable for ACL/partial connection tests.
func assertValidNetmap(t *testing.T, client TailscaleClient) {
	t.Helper()

	if !util.TailscaleVersionNewerOrEqual("1.56", client.Version()) {
		t.Logf("%q has version %q, skipping netmap check...", client.Hostname(), client.Version())

		return
	}

	t.Logf("Checking netmap of %q", client.Hostname())

	assert.EventuallyWithT(t, func(c *assert.CollectT) {
		netmap, err := client.Netmap()
		assert.NoError(c, err, "getting netmap for %q", client.Hostname())

		assert.Truef(c, netmap.SelfNode.Hostinfo().Valid(), "%q does not have Hostinfo", client.Hostname())
		if hi := netmap.SelfNode.Hostinfo(); hi.Valid() {
			assert.LessOrEqual(c, 1, netmap.SelfNode.Hostinfo().Services().Len(), "%q does not have enough services, got: %v", client.Hostname(), netmap.SelfNode.Hostinfo().Services())
		}

		assert.NotEmptyf(c, netmap.SelfNode.AllowedIPs(), "%q does not have any allowed IPs", client.Hostname())
		assert.NotEmptyf(c, netmap.SelfNode.Addresses(), "%q does not have any addresses", client.Hostname())

		assert.Truef(c, netmap.SelfNode.Online().Get(), "%q is not online", client.Hostname())

		assert.Falsef(c, netmap.SelfNode.Key().IsZero(), "%q does not have a valid NodeKey", client.Hostname())
		assert.Falsef(c, netmap.SelfNode.Machine().IsZero(), "%q does not have a valid MachineKey", client.Hostname())
		assert.Falsef(c, netmap.SelfNode.DiscoKey().IsZero(), "%q does not have a valid DiscoKey", client.Hostname())

		for _, peer := range netmap.Peers {
			assert.NotEqualf(c, "127.3.3.40:0", peer.LegacyDERPString(), "peer (%s) has no home DERP in %q's netmap, got: %s", peer.ComputedName(), client.Hostname(), peer.LegacyDERPString())
			assert.NotEqualf(c, 0, peer.HomeDERP(), "peer (%s) has no home DERP in %q's netmap, got: %d", peer.ComputedName(), client.Hostname(), peer.HomeDERP())

			assert.Truef(c, peer.Hostinfo().Valid(), "peer (%s) of %q does not have Hostinfo", peer.ComputedName(), client.Hostname())
			if hi := peer.Hostinfo(); hi.Valid() {
				assert.LessOrEqualf(c, 3, peer.Hostinfo().Services().Len(), "peer (%s) of %q does not have enough services, got: %v", peer.ComputedName(), client.Hostname(), peer.Hostinfo().Services())

				// Netinfo is not always set
				// assert.Truef(c, hi.NetInfo().Valid(), "peer (%s) of %q does not have NetInfo", peer.ComputedName(), client.Hostname())
				if ni := hi.NetInfo(); ni.Valid() {
					assert.NotEqualf(c, 0, ni.PreferredDERP(), "peer (%s) has no home DERP in %q's netmap, got: %s", peer.ComputedName(), client.Hostname(), peer.Hostinfo().NetInfo().PreferredDERP())
				}
			}

			assert.NotEmptyf(c, peer.Endpoints(), "peer (%s) of %q does not have any endpoints", peer.ComputedName(), client.Hostname())
			assert.NotEmptyf(c, peer.AllowedIPs(), "peer (%s) of %q does not have any allowed IPs", peer.ComputedName(), client.Hostname())
			assert.NotEmptyf(c, peer.Addresses(), "peer (%s) of %q does not have any addresses", peer.ComputedName(), client.Hostname())

			assert.Truef(c, peer.Online().Get(), "peer (%s) of %q is not online", peer.ComputedName(), client.Hostname())

			assert.Falsef(c, peer.Key().IsZero(), "peer (%s) of %q does not have a valid NodeKey", peer.ComputedName(), client.Hostname())
			assert.Falsef(c, peer.Machine().IsZero(), "peer (%s) of %q does not have a valid MachineKey", peer.ComputedName(), client.Hostname())
			assert.Falsef(c, peer.DiscoKey().IsZero(), "peer (%s) of %q does not have a valid DiscoKey", peer.ComputedName(), client.Hostname())
		}
	}, 10*time.Second, 200*time.Millisecond, "Waiting for valid netmap for %q", client.Hostname())
}

// assertValidStatus validates that a client's status has all required fields for proper operation.
// Checks self and peer status for essential data including hostinfo, tailscale IPs, endpoints,
// and network map presence. This test is not suitable for ACL/partial connection tests.
func assertValidStatus(t *testing.T, client TailscaleClient) {
	t.Helper()
	status, err := client.Status(true)
	if err != nil {
		t.Fatalf("getting status for %q: %s", client.Hostname(), err)
	}

	assert.NotEmptyf(t, status.Self.HostName, "%q does not have HostName set, likely missing Hostinfo", client.Hostname())
	assert.NotEmptyf(t, status.Self.OS, "%q does not have OS set, likely missing Hostinfo", client.Hostname())
	assert.NotEmptyf(t, status.Self.Relay, "%q does not have a relay, likely missing Hostinfo/Netinfo", client.Hostname())

	assert.NotEmptyf(t, status.Self.TailscaleIPs, "%q does not have Tailscale IPs", client.Hostname())

	// This seem to not appear until version 1.56
	if status.Self.AllowedIPs != nil {
		assert.NotEmptyf(t, status.Self.AllowedIPs, "%q does not have any allowed IPs", client.Hostname())
	}

	assert.NotEmptyf(t, status.Self.Addrs, "%q does not have any endpoints", client.Hostname())

	assert.Truef(t, status.Self.Online, "%q is not online", client.Hostname())

	assert.Truef(t, status.Self.InNetworkMap, "%q is not in network map", client.Hostname())

	// This isn't really relevant for Self as it won't be in its own socket/wireguard.
	// assert.Truef(t, status.Self.InMagicSock, "%q is not tracked by magicsock", client.Hostname())
	// assert.Truef(t, status.Self.InEngine, "%q is not in wireguard engine", client.Hostname())

	for _, peer := range status.Peer {
		assert.NotEmptyf(t, peer.HostName, "peer (%s) of %q does not have HostName set, likely missing Hostinfo", peer.DNSName, client.Hostname())
		assert.NotEmptyf(t, peer.OS, "peer (%s) of %q does not have OS set, likely missing Hostinfo", peer.DNSName, client.Hostname())
		assert.NotEmptyf(t, peer.Relay, "peer (%s) of %q does not have a relay, likely missing Hostinfo/Netinfo", peer.DNSName, client.Hostname())

		assert.NotEmptyf(t, peer.TailscaleIPs, "peer (%s) of %q does not have Tailscale IPs", peer.DNSName, client.Hostname())

		// This seem to not appear until version 1.56
		if peer.AllowedIPs != nil {
			assert.NotEmptyf(t, peer.AllowedIPs, "peer (%s) of %q does not have any allowed IPs", peer.DNSName, client.Hostname())
		}

		// Addrs does not seem to appear in the status from peers.
		// assert.NotEmptyf(t, peer.Addrs, "peer (%s) of %q does not have any endpoints", peer.DNSName, client.Hostname())

		assert.Truef(t, peer.Online, "peer (%s) of %q is not online", peer.DNSName, client.Hostname())

		assert.Truef(t, peer.InNetworkMap, "peer (%s) of %q is not in network map", peer.DNSName, client.Hostname())
		assert.Truef(t, peer.InMagicSock, "peer (%s) of %q is not tracked by magicsock", peer.DNSName, client.Hostname())

		// TODO(kradalby): InEngine is only true when a proper tunnel is set up,
		// there might be some interesting stuff to test here in the future.
		// assert.Truef(t, peer.InEngine, "peer (%s) of %q is not in wireguard engine", peer.DNSName, client.Hostname())
	}
}

// assertValidNetcheck validates that a client has a proper DERP relay configured.
// Ensures the client has discovered and selected a DERP server for relay functionality,
// which is essential for NAT traversal and connectivity in restricted networks.
func assertValidNetcheck(t *testing.T, client TailscaleClient) {
	t.Helper()
	report, err := client.Netcheck()
	if err != nil {
		t.Fatalf("getting status for %q: %s", client.Hostname(), err)
	}

	assert.NotEqualf(t, 0, report.PreferredDERP, "%q does not have a DERP relay", client.Hostname())
}

// assertCommandOutputContains executes a command with exponential backoff retry until the output
// contains the expected string or timeout is reached (10 seconds).
// This implements eventual consistency patterns and should be used instead of time.Sleep
// before executing commands that depend on network state propagation.
//
// Timeout: 10 seconds with exponential backoff
// Use cases: DNS resolution, route propagation, policy updates.
func assertCommandOutputContains(t *testing.T, c TailscaleClient, command []string, contains string) {
	t.Helper()

	_, err := backoff.Retry(t.Context(), func() (struct{}, error) {
		stdout, stderr, err := c.Execute(command)
		if err != nil {
			return struct{}{}, fmt.Errorf("executing command, stdout: %q stderr: %q, err: %w", stdout, stderr, err)
		}

		if !strings.Contains(stdout, contains) {
			return struct{}{}, fmt.Errorf("executing command, expected string %q not found in %q", contains, stdout)
		}

		return struct{}{}, nil
	}, backoff.WithBackOff(backoff.NewExponentialBackOff()), backoff.WithMaxElapsedTime(10*time.Second))

	assert.NoError(t, err)
}

// dockertestMaxWait returns the maximum wait time for Docker-based test operations.
// Uses longer timeouts in CI environments to account for slower resource allocation
// and higher system load during automated testing.
func dockertestMaxWait() time.Duration {
	wait := 300 * time.Second //nolint

	if util.IsCI() {
		wait = 600 * time.Second //nolint
	}

	return wait
}

// didClientUseWebsocketForDERP analyzes client logs to determine if WebSocket was used for DERP.
// Searches for WebSocket connection indicators in client logs to validate
// DERP relay communication method for debugging connectivity issues.
func didClientUseWebsocketForDERP(t *testing.T, client TailscaleClient) bool {
	t.Helper()

	buf := &bytes.Buffer{}
	err := client.WriteLogs(buf, buf)
	if err != nil {
		t.Fatalf("failed to fetch client logs: %s: %s", client.Hostname(), err)
	}

	count, err := countMatchingLines(buf, func(line string) bool {
		return strings.Contains(line, "websocket: connected to ")
	})
	if err != nil {
		t.Fatalf("failed to process client logs: %s: %s", client.Hostname(), err)
	}

	return count > 0
}

// countMatchingLines counts lines in a reader that match the given predicate function.
// Uses optimized buffering for log analysis and provides flexible line-by-line
// filtering for log parsing and pattern matching in integration tests.
func countMatchingLines(in io.Reader, predicate func(string) bool) (int, error) {
	count := 0
	scanner := bufio.NewScanner(in)
	{
		const logBufferInitialSize = 1024 << 10 // preallocate 1 MiB
		buff := make([]byte, logBufferInitialSize)
		scanner.Buffer(buff, len(buff))
		scanner.Split(bufio.ScanLines)
	}

	for scanner.Scan() {
		if predicate(scanner.Text()) {
			count += 1
		}
	}

	return count, scanner.Err()
}

// wildcard returns a wildcard alias (*) for use in policy v2 configurations.
// Provides a convenient helper for creating permissive policy rules.
func wildcard() policyv2.Alias {
	return policyv2.Wildcard
}

// usernamep returns a pointer to a Username as an Alias for policy v2 configurations.
// Used in ACL rules to reference specific users in network access policies.
func usernamep(name string) policyv2.Alias {
	return ptr.To(policyv2.Username(name))
}

// hostp returns a pointer to a Host as an Alias for policy v2 configurations.
// Used in ACL rules to reference specific hosts in network access policies.
func hostp(name string) policyv2.Alias {
	return ptr.To(policyv2.Host(name))
}

// groupp returns a pointer to a Group as an Alias for policy v2 configurations.
// Used in ACL rules to reference user groups in network access policies.
func groupp(name string) policyv2.Alias {
	return ptr.To(policyv2.Group(name))
}

// tagp returns a pointer to a Tag as an Alias for policy v2 configurations.
// Used in ACL rules to reference node tags in network access policies.
func tagp(name string) policyv2.Alias {
	return ptr.To(policyv2.Tag(name))
}

// prefixp returns a pointer to a Prefix from a CIDR string for policy v2 configurations.
// Converts CIDR notation to policy prefix format for network range specifications.
func prefixp(cidr string) policyv2.Alias {
	prefix := netip.MustParsePrefix(cidr)
	return ptr.To(policyv2.Prefix(prefix))
}

// aliasWithPorts creates an AliasWithPorts structure from an alias and port ranges.
// Combines network targets with specific port restrictions for fine-grained
// access control in policy v2 configurations.
func aliasWithPorts(alias policyv2.Alias, ports ...tailcfg.PortRange) policyv2.AliasWithPorts {
	return policyv2.AliasWithPorts{
		Alias: alias,
		Ports: ports,
	}
}

// usernameOwner returns a Username as an Owner for use in TagOwners policies.
// Specifies which users can assign and manage specific tags in ACL configurations.
func usernameOwner(name string) policyv2.Owner {
	return ptr.To(policyv2.Username(name))
}

// groupOwner returns a Group as an Owner for use in TagOwners policies.
// Specifies which groups can assign and manage specific tags in ACL configurations.
func groupOwner(name string) policyv2.Owner {
	return ptr.To(policyv2.Group(name))
}

// usernameApprover returns a Username as an AutoApprover for subnet route policies.
// Specifies which users can automatically approve subnet route advertisements.
func usernameApprover(name string) policyv2.AutoApprover {
	return ptr.To(policyv2.Username(name))
}

// groupApprover returns a Group as an AutoApprover for subnet route policies.
// Specifies which groups can automatically approve subnet route advertisements.
func groupApprover(name string) policyv2.AutoApprover {
	return ptr.To(policyv2.Group(name))
}

// tagApprover returns a Tag as an AutoApprover for subnet route policies.
// Specifies which tagged nodes can automatically approve subnet route advertisements.
func tagApprover(name string) policyv2.AutoApprover {
	return ptr.To(policyv2.Tag(name))
}

// oidcMockUser creates a MockUser for OIDC authentication testing.
// Generates consistent test user data with configurable email verification status
// for validating OIDC integration flows in headscale authentication tests.
func oidcMockUser(username string, emailVerified bool) mockoidc.MockUser {
	return mockoidc.MockUser{
		Subject:           username,
		PreferredUsername: username,
		Email:             username + "@headscale.net",
		EmailVerified:     emailVerified,
	}
}

// GetUserByName retrieves a user by name from the headscale server.
// This is a common pattern used when creating preauth keys or managing users.
func GetUserByName(headscale ControlServer, username string) (*v1.User, error) {
	users, err := headscale.ListUsers()
	if err != nil {
		return nil, fmt.Errorf("failed to list users: %w", err)
	}

	for _, u := range users {
		if u.GetName() == username {
			return u, nil
		}
	}

	return nil, fmt.Errorf("user %s not found", username)
}

// FindNewClient finds a client that is in the new list but not in the original list.
// This is useful when dynamically adding nodes during tests and needing to identify
// which client was just added.
func FindNewClient(original, updated []TailscaleClient) (TailscaleClient, error) {
	for _, client := range updated {
		isOriginal := false
		for _, origClient := range original {
			if client.Hostname() == origClient.Hostname() {
				isOriginal = true
				break
			}
		}
		if !isOriginal {
			return client, nil
		}
	}
	return nil, fmt.Errorf("no new client found")
}

// AddAndLoginClient adds a new tailscale client to a user and logs it in.
// This combines the common pattern of:
// 1. Creating a new node
// 2. Finding the new node in the client list
// 3. Getting the user to create a preauth key
// 4. Logging in the new node
func (s *Scenario) AddAndLoginClient(
	t *testing.T,
	username string,
	version string,
	headscale ControlServer,
	tsOpts ...tsic.Option,
) (TailscaleClient, error) {
	t.Helper()

	// Get the original client list
	originalClients, err := s.ListTailscaleClients(username)
	if err != nil {
		return nil, fmt.Errorf("failed to list original clients: %w", err)
	}

	// Create the new node
	err = s.CreateTailscaleNodesInUser(username, version, 1, tsOpts...)
	if err != nil {
		return nil, fmt.Errorf("failed to create tailscale node: %w", err)
	}

	// Wait for the new node to appear in the client list
	var newClient TailscaleClient

	_, err = backoff.Retry(t.Context(), func() (struct{}, error) {
		updatedClients, err := s.ListTailscaleClients(username)
		if err != nil {
			return struct{}{}, fmt.Errorf("failed to list updated clients: %w", err)
		}

		if len(updatedClients) != len(originalClients)+1 {
			return struct{}{}, fmt.Errorf("expected %d clients, got %d", len(originalClients)+1, len(updatedClients))
		}

		newClient, err = FindNewClient(originalClients, updatedClients)
		if err != nil {
			return struct{}{}, fmt.Errorf("failed to find new client: %w", err)
		}

		return struct{}{}, nil
	}, backoff.WithBackOff(backoff.NewConstantBackOff(500*time.Millisecond)), backoff.WithMaxElapsedTime(10*time.Second))
	if err != nil {
		return nil, fmt.Errorf("timeout waiting for new client: %w", err)
	}

	// Get the user and create preauth key
	user, err := GetUserByName(headscale, username)
	if err != nil {
		return nil, fmt.Errorf("failed to get user: %w", err)
	}

	authKey, err := s.CreatePreAuthKey(user.GetId(), true, false)
	if err != nil {
		return nil, fmt.Errorf("failed to create preauth key: %w", err)
	}

	// Login the new client
	err = newClient.Login(headscale.GetEndpoint(), authKey.GetKey())
	if err != nil {
		return nil, fmt.Errorf("failed to login new client: %w", err)
	}

	return newClient, nil
}

// MustAddAndLoginClient is like AddAndLoginClient but fails the test on error.
func (s *Scenario) MustAddAndLoginClient(
	t *testing.T,
	username string,
	version string,
	headscale ControlServer,
	tsOpts ...tsic.Option,
) TailscaleClient {
	t.Helper()

	client, err := s.AddAndLoginClient(t, username, version, headscale, tsOpts...)
	require.NoError(t, err)
	return client
}