mapper: produce map before poll (#2628)

This commit is contained in:
Kristoffer Dalby
2025-07-28 11:15:53 +02:00
committed by GitHub
parent b2a18830ed
commit a058bf3cd3
70 changed files with 5771 additions and 2475 deletions

View File

@@ -88,7 +88,7 @@ func TestAuthKeyLogoutAndReloginSameUser(t *testing.T) {
var err error
listNodes, err = headscale.ListNodes()
assert.NoError(ct, err)
assert.Equal(ct, nodeCountBeforeLogout, len(listNodes), "Node count should match before logout count")
assert.Len(ct, listNodes, nodeCountBeforeLogout, "Node count should match before logout count")
}, 20*time.Second, 1*time.Second)
for _, node := range listNodes {
@@ -123,7 +123,7 @@ func TestAuthKeyLogoutAndReloginSameUser(t *testing.T) {
var err error
listNodes, err = headscale.ListNodes()
assert.NoError(ct, err)
assert.Equal(ct, nodeCountBeforeLogout, len(listNodes), "Node count should match after HTTPS reconnection")
assert.Len(ct, listNodes, nodeCountBeforeLogout, "Node count should match after HTTPS reconnection")
}, 30*time.Second, 2*time.Second)
for _, node := range listNodes {
@@ -161,7 +161,7 @@ func TestAuthKeyLogoutAndReloginSameUser(t *testing.T) {
}
listNodes, err = headscale.ListNodes()
require.Equal(t, nodeCountBeforeLogout, len(listNodes))
require.Len(t, listNodes, nodeCountBeforeLogout)
for _, node := range listNodes {
assertLastSeenSet(t, node)
}
@@ -355,7 +355,7 @@ func TestAuthKeyLogoutAndReloginSameUserExpiredKey(t *testing.T) {
"--user",
strconv.FormatUint(userMap[userName].GetId(), 10),
"expire",
key.Key,
key.GetKey(),
})
assertNoErr(t, err)

View File

@@ -604,7 +604,7 @@ func TestPreAuthKeyCorrectUserLoggedInCommand(t *testing.T) {
assert.EventuallyWithT(t, func(ct *assert.CollectT) {
status, err := client.Status()
assert.NoError(ct, err)
assert.NotContains(ct, []string{"Starting", "Running"}, status.BackendState,
assert.NotContains(ct, []string{"Starting", "Running"}, status.BackendState,
"Expected node to be logged out, backend state: %s", status.BackendState)
}, 30*time.Second, 2*time.Second)

View File

@@ -147,3 +147,9 @@ func DockerAllowNetworkAdministration(config *docker.HostConfig) {
config.CapAdd = append(config.CapAdd, "NET_ADMIN")
config.Privileged = true
}
// DockerMemoryLimit sets memory limit and disables OOM kill for containers.
func DockerMemoryLimit(config *docker.HostConfig) {
config.Memory = 2 * 1024 * 1024 * 1024 // 2GB in bytes
config.OOMKillDisable = true
}

View File

@@ -145,9 +145,9 @@ func derpServerScenario(
assert.NoError(ct, err, "Failed to get status for client %s", client.Hostname())
for _, health := range status.Health {
assert.NotContains(ct, health, "could not connect to any relay server",
assert.NotContains(ct, health, "could not connect to any relay server",
"Client %s should be connected to DERP relay", client.Hostname())
assert.NotContains(ct, health, "could not connect to the 'Headscale Embedded DERP' relay server.",
assert.NotContains(ct, health, "could not connect to the 'Headscale Embedded DERP' relay server.",
"Client %s should be connected to Headscale Embedded DERP", client.Hostname())
}
}, 30*time.Second, 2*time.Second)
@@ -166,9 +166,9 @@ func derpServerScenario(
assert.NoError(ct, err, "Failed to get status for client %s", client.Hostname())
for _, health := range status.Health {
assert.NotContains(ct, health, "could not connect to any relay server",
assert.NotContains(ct, health, "could not connect to any relay server",
"Client %s should be connected to DERP relay after first run", client.Hostname())
assert.NotContains(ct, health, "could not connect to the 'Headscale Embedded DERP' relay server.",
assert.NotContains(ct, health, "could not connect to the 'Headscale Embedded DERP' relay server.",
"Client %s should be connected to Headscale Embedded DERP after first run", client.Hostname())
}
}, 30*time.Second, 2*time.Second)
@@ -191,9 +191,9 @@ func derpServerScenario(
assert.NoError(ct, err, "Failed to get status for client %s", client.Hostname())
for _, health := range status.Health {
assert.NotContains(ct, health, "could not connect to any relay server",
assert.NotContains(ct, health, "could not connect to any relay server",
"Client %s should be connected to DERP relay after second run", client.Hostname())
assert.NotContains(ct, health, "could not connect to the 'Headscale Embedded DERP' relay server.",
assert.NotContains(ct, health, "could not connect to the 'Headscale Embedded DERP' relay server.",
"Client %s should be connected to Headscale Embedded DERP after second run", client.Hostname())
}
}, 30*time.Second, 2*time.Second)

View File

@@ -883,6 +883,10 @@ func TestNodeOnlineStatus(t *testing.T) {
assert.EventuallyWithT(t, func(ct *assert.CollectT) {
status, err := client.Status()
assert.NoError(ct, err)
if status == nil {
assert.Fail(ct, "status is nil")
return
}
for _, peerKey := range status.Peers() {
peerStatus := status.Peer[peerKey]
@@ -984,16 +988,11 @@ func TestPingAllByIPManyUpDown(t *testing.T) {
}
// Wait for sync and successful pings after nodes come back up
assert.EventuallyWithT(t, func(ct *assert.CollectT) {
err = scenario.WaitForTailscaleSync()
assert.NoError(ct, err)
success := pingAllHelper(t, allClients, allAddrs)
assert.Greater(ct, success, 0, "Nodes should be able to ping after coming back up")
}, 30*time.Second, 2*time.Second)
err = scenario.WaitForTailscaleSync()
assert.NoError(t, err)
success := pingAllHelper(t, allClients, allAddrs)
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
assert.Equalf(t, len(allClients)*len(allIps), success, "%d successful pings out of %d", success, len(allClients)*len(allIps))
}
}

View File

@@ -260,7 +260,9 @@ func WithDERPConfig(derpMap tailcfg.DERPMap) Option {
func WithTuning(batchTimeout time.Duration, mapSessionChanSize int) Option {
return func(hsic *HeadscaleInContainer) {
hsic.env["HEADSCALE_TUNING_BATCH_CHANGE_DELAY"] = batchTimeout.String()
hsic.env["HEADSCALE_TUNING_NODE_MAPSESSION_BUFFERED_CHAN_SIZE"] = strconv.Itoa(mapSessionChanSize)
hsic.env["HEADSCALE_TUNING_NODE_MAPSESSION_BUFFERED_CHAN_SIZE"] = strconv.Itoa(
mapSessionChanSize,
)
}
}
@@ -279,10 +281,16 @@ func WithDebugPort(port int) Option {
// buildEntrypoint builds the container entrypoint command based on configuration.
func (hsic *HeadscaleInContainer) buildEntrypoint() []string {
debugCmd := fmt.Sprintf("/go/bin/dlv --listen=0.0.0.0:%d --headless=true --api-version=2 --accept-multiclient --allow-non-terminal-interactive=true exec /go/bin/headscale --continue -- serve", hsic.debugPort)
entrypoint := fmt.Sprintf("/bin/sleep 3 ; update-ca-certificates ; %s ; /bin/sleep 30", debugCmd)
debugCmd := fmt.Sprintf(
"/go/bin/dlv --listen=0.0.0.0:%d --headless=true --api-version=2 --accept-multiclient --allow-non-terminal-interactive=true exec /go/bin/headscale --continue -- serve",
hsic.debugPort,
)
entrypoint := fmt.Sprintf(
"/bin/sleep 3 ; update-ca-certificates ; %s ; /bin/sleep 30",
debugCmd,
)
return []string{"/bin/bash", "-c", entrypoint}
}
@@ -447,8 +455,12 @@ func New(
log.Printf("Created %s container\n", hsic.hostname)
hsic.container = container
log.Printf("Debug ports for %s: delve=%s, metrics/pprof=49090\n", hsic.hostname, hsic.GetHostDebugPort())
log.Printf(
"Debug ports for %s: delve=%s, metrics/pprof=49090\n",
hsic.hostname,
hsic.GetHostDebugPort(),
)
// Write the CA certificates to the container
for i, cert := range hsic.caCerts {
@@ -684,14 +696,6 @@ func (t *HeadscaleInContainer) SaveDatabase(savePath string) error {
return nil
}
// First, let's see what files are actually in /tmp
tmpListing, err := t.Execute([]string{"ls", "-la", "/tmp/"})
if err != nil {
log.Printf("Warning: could not list /tmp directory: %v", err)
} else {
log.Printf("Contents of /tmp in container %s:\n%s", t.hostname, tmpListing)
}
// Also check for any .sqlite files
sqliteFiles, err := t.Execute([]string{"find", "/tmp", "-name", "*.sqlite*", "-type", "f"})
if err != nil {
@@ -718,12 +722,6 @@ func (t *HeadscaleInContainer) SaveDatabase(savePath string) error {
return errors.New("database file exists but has no schema (empty database)")
}
// Show a preview of the schema (first 500 chars)
schemaPreview := schemaCheck
if len(schemaPreview) > 500 {
schemaPreview = schemaPreview[:500] + "..."
}
tarFile, err := t.FetchPath("/tmp/integration_test_db.sqlite3")
if err != nil {
return fmt.Errorf("failed to fetch database file: %w", err)
@@ -740,7 +738,12 @@ func (t *HeadscaleInContainer) SaveDatabase(savePath string) error {
return fmt.Errorf("failed to read tar header: %w", err)
}
log.Printf("Found file in tar: %s (type: %d, size: %d)", header.Name, header.Typeflag, header.Size)
log.Printf(
"Found file in tar: %s (type: %d, size: %d)",
header.Name,
header.Typeflag,
header.Size,
)
// Extract the first regular file we find
if header.Typeflag == tar.TypeReg {
@@ -756,11 +759,20 @@ func (t *HeadscaleInContainer) SaveDatabase(savePath string) error {
return fmt.Errorf("failed to copy database file: %w", err)
}
log.Printf("Extracted database file: %s (%d bytes written, header claimed %d bytes)", dbPath, written, header.Size)
log.Printf(
"Extracted database file: %s (%d bytes written, header claimed %d bytes)",
dbPath,
written,
header.Size,
)
// Check if we actually wrote something
if written == 0 {
return fmt.Errorf("database file is empty (size: %d, header size: %d)", written, header.Size)
return fmt.Errorf(
"database file is empty (size: %d, header size: %d)",
written,
header.Size,
)
}
return nil
@@ -871,7 +883,15 @@ func (t *HeadscaleInContainer) WaitForRunning() error {
func (t *HeadscaleInContainer) CreateUser(
user string,
) (*v1.User, error) {
command := []string{"headscale", "users", "create", user, fmt.Sprintf("--email=%s@test.no", user), "--output", "json"}
command := []string{
"headscale",
"users",
"create",
user,
fmt.Sprintf("--email=%s@test.no", user),
"--output",
"json",
}
result, _, err := dockertestutil.ExecuteCommand(
t.container,
@@ -1182,13 +1202,18 @@ func (t *HeadscaleInContainer) ApproveRoutes(id uint64, routes []netip.Prefix) (
[]string{},
)
if err != nil {
return nil, fmt.Errorf("failed to execute list node command: %w", err)
return nil, fmt.Errorf(
"failed to execute approve routes command (node %d, routes %v): %w",
id,
routes,
err,
)
}
var node *v1.Node
err = json.Unmarshal([]byte(result), &node)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal nodes: %w", err)
return nil, fmt.Errorf("failed to unmarshal node response: %q, error: %w", result, err)
}
return node, nil

View File

@@ -310,7 +310,7 @@ func TestHASubnetRouterFailover(t *testing.T) {
// Enable route on node 1
t.Logf("Enabling route on subnet router 1, no HA")
_, err = headscale.ApproveRoutes(
1,
MustFindNode(subRouter1.Hostname(), nodes).GetId(),
[]netip.Prefix{pref},
)
require.NoError(t, err)
@@ -366,7 +366,7 @@ func TestHASubnetRouterFailover(t *testing.T) {
// Enable route on node 2, now we will have a HA subnet router
t.Logf("Enabling route on subnet router 2, now HA, subnetrouter 1 is primary, 2 is standby")
_, err = headscale.ApproveRoutes(
2,
MustFindNode(subRouter2.Hostname(), nodes).GetId(),
[]netip.Prefix{pref},
)
require.NoError(t, err)
@@ -422,7 +422,7 @@ func TestHASubnetRouterFailover(t *testing.T) {
// be enabled.
t.Logf("Enabling route on subnet router 3, now HA, subnetrouter 1 is primary, 2 and 3 is standby")
_, err = headscale.ApproveRoutes(
3,
MustFindNode(subRouter3.Hostname(), nodes).GetId(),
[]netip.Prefix{pref},
)
require.NoError(t, err)
@@ -639,7 +639,7 @@ func TestHASubnetRouterFailover(t *testing.T) {
t.Logf("disabling route in subnet router r3 (%s)", subRouter3.Hostname())
t.Logf("expecting route to failover to r1 (%s), which is still available with r2", subRouter1.Hostname())
_, err = headscale.ApproveRoutes(nodes[2].GetId(), []netip.Prefix{})
_, err = headscale.ApproveRoutes(MustFindNode(subRouter3.Hostname(), nodes).GetId(), []netip.Prefix{})
time.Sleep(5 * time.Second)
@@ -647,9 +647,9 @@ func TestHASubnetRouterFailover(t *testing.T) {
require.NoError(t, err)
assert.Len(t, nodes, 6)
requireNodeRouteCount(t, nodes[0], 1, 1, 1)
requireNodeRouteCount(t, nodes[1], 1, 1, 0)
requireNodeRouteCount(t, nodes[2], 1, 0, 0)
requireNodeRouteCount(t, MustFindNode(subRouter1.Hostname(), nodes), 1, 1, 1)
requireNodeRouteCount(t, MustFindNode(subRouter2.Hostname(), nodes), 1, 1, 0)
requireNodeRouteCount(t, MustFindNode(subRouter3.Hostname(), nodes), 1, 0, 0)
// Verify that the route is announced from subnet router 1
clientStatus, err = client.Status()
@@ -684,7 +684,7 @@ func TestHASubnetRouterFailover(t *testing.T) {
// Disable the route of subnet router 1, making it failover to 2
t.Logf("disabling route in subnet router r1 (%s)", subRouter1.Hostname())
t.Logf("expecting route to failover to r2 (%s)", subRouter2.Hostname())
_, err = headscale.ApproveRoutes(nodes[0].GetId(), []netip.Prefix{})
_, err = headscale.ApproveRoutes(MustFindNode(subRouter1.Hostname(), nodes).GetId(), []netip.Prefix{})
time.Sleep(5 * time.Second)
@@ -692,9 +692,9 @@ func TestHASubnetRouterFailover(t *testing.T) {
require.NoError(t, err)
assert.Len(t, nodes, 6)
requireNodeRouteCount(t, nodes[0], 1, 0, 0)
requireNodeRouteCount(t, nodes[1], 1, 1, 1)
requireNodeRouteCount(t, nodes[2], 1, 0, 0)
requireNodeRouteCount(t, MustFindNode(subRouter1.Hostname(), nodes), 1, 0, 0)
requireNodeRouteCount(t, MustFindNode(subRouter2.Hostname(), nodes), 1, 1, 1)
requireNodeRouteCount(t, MustFindNode(subRouter3.Hostname(), nodes), 1, 0, 0)
// Verify that the route is announced from subnet router 1
clientStatus, err = client.Status()
@@ -729,9 +729,10 @@ func TestHASubnetRouterFailover(t *testing.T) {
// enable the route of subnet router 1, no change expected
t.Logf("enabling route in subnet router 1 (%s)", subRouter1.Hostname())
t.Logf("both online, expecting r2 (%s) to still be primary (no flapping)", subRouter2.Hostname())
r1Node := MustFindNode(subRouter1.Hostname(), nodes)
_, err = headscale.ApproveRoutes(
nodes[0].GetId(),
util.MustStringsToPrefixes(nodes[0].GetAvailableRoutes()),
r1Node.GetId(),
util.MustStringsToPrefixes(r1Node.GetAvailableRoutes()),
)
time.Sleep(5 * time.Second)
@@ -740,9 +741,9 @@ func TestHASubnetRouterFailover(t *testing.T) {
require.NoError(t, err)
assert.Len(t, nodes, 6)
requireNodeRouteCount(t, nodes[0], 1, 1, 0)
requireNodeRouteCount(t, nodes[1], 1, 1, 1)
requireNodeRouteCount(t, nodes[2], 1, 0, 0)
requireNodeRouteCount(t, MustFindNode(subRouter1.Hostname(), nodes), 1, 1, 0)
requireNodeRouteCount(t, MustFindNode(subRouter2.Hostname(), nodes), 1, 1, 1)
requireNodeRouteCount(t, MustFindNode(subRouter3.Hostname(), nodes), 1, 0, 0)
// Verify that the route is announced from subnet router 1
clientStatus, err = client.Status()

View File

@@ -223,7 +223,7 @@ func NewScenario(spec ScenarioSpec) (*Scenario, error) {
s.userToNetwork = userToNetwork
if spec.OIDCUsers != nil && len(spec.OIDCUsers) != 0 {
if len(spec.OIDCUsers) != 0 {
ttl := defaultAccessTTL
if spec.OIDCAccessTTL != 0 {
ttl = spec.OIDCAccessTTL

View File

@@ -370,10 +370,12 @@ func TestSSHUserOnlyIsolation(t *testing.T) {
}
func doSSH(t *testing.T, client TailscaleClient, peer TailscaleClient) (string, string, error) {
t.Helper()
return doSSHWithRetry(t, client, peer, true)
}
func doSSHWithoutRetry(t *testing.T, client TailscaleClient, peer TailscaleClient) (string, string, error) {
t.Helper()
return doSSHWithRetry(t, client, peer, false)
}

View File

@@ -319,6 +319,7 @@ func New(
dockertestutil.DockerRestartPolicy,
dockertestutil.DockerAllowLocalIPv6,
dockertestutil.DockerAllowNetworkAdministration,
dockertestutil.DockerMemoryLimit,
)
case "unstable":
tailscaleOptions.Repository = "tailscale/tailscale"
@@ -329,6 +330,7 @@ func New(
dockertestutil.DockerRestartPolicy,
dockertestutil.DockerAllowLocalIPv6,
dockertestutil.DockerAllowNetworkAdministration,
dockertestutil.DockerMemoryLimit,
)
default:
tailscaleOptions.Repository = "tailscale/tailscale"
@@ -339,6 +341,7 @@ func New(
dockertestutil.DockerRestartPolicy,
dockertestutil.DockerAllowLocalIPv6,
dockertestutil.DockerAllowNetworkAdministration,
dockertestutil.DockerMemoryLimit,
)
}

View File

@@ -22,11 +22,11 @@ import (
const (
// derpPingTimeout defines the timeout for individual DERP ping operations
// Used in DERP connectivity tests to verify relay server communication
// Used in DERP connectivity tests to verify relay server communication.
derpPingTimeout = 2 * time.Second
// derpPingCount defines the number of ping attempts for DERP connectivity tests
// Higher count provides better reliability assessment of DERP connectivity
// Higher count provides better reliability assessment of DERP connectivity.
derpPingCount = 10
)
@@ -317,11 +317,11 @@ func assertValidNetcheck(t *testing.T, client TailscaleClient) {
// assertCommandOutputContains executes a command with exponential backoff retry until the output
// contains the expected string or timeout is reached (10 seconds).
// This implements eventual consistency patterns and should be used instead of time.Sleep
// This implements eventual consistency patterns and should be used instead of time.Sleep
// before executing commands that depend on network state propagation.
//
// Timeout: 10 seconds with exponential backoff
// Use cases: DNS resolution, route propagation, policy updates
// Use cases: DNS resolution, route propagation, policy updates.
func assertCommandOutputContains(t *testing.T, c TailscaleClient, command []string, contains string) {
t.Helper()
@@ -361,10 +361,10 @@ func isSelfClient(client TailscaleClient, addr string) bool {
}
func dockertestMaxWait() time.Duration {
wait := 120 * time.Second //nolint
wait := 300 * time.Second //nolint
if util.IsCI() {
wait = 300 * time.Second //nolint
wait = 600 * time.Second //nolint
}
return wait