From a50bd13930dd6c23fe9cb3e5b303fe894888b9b5 Mon Sep 17 00:00:00 2001 From: Kristoffer Dalby Date: Mon, 15 Dec 2025 12:41:58 +0000 Subject: [PATCH] integration: prepare AutoApprove test for new tags Validates #2891 Signed-off-by: Kristoffer Dalby --- integration/route_test.go | 153 +++++++++++++++++++++++--------------- integration/scenario.go | 22 ++++-- 2 files changed, 112 insertions(+), 63 deletions(-) diff --git a/integration/route_test.go b/integration/route_test.go index 3a2be29a..0460b5ef 100644 --- a/integration/route_test.go +++ b/integration/route_test.go @@ -4,6 +4,7 @@ import ( "cmp" "encoding/json" "fmt" + "maps" "net/netip" "slices" "sort" @@ -25,7 +26,6 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" xmaps "golang.org/x/exp/maps" - "tailscale.com/envknob" "tailscale.com/ipn/ipnstate" "tailscale.com/net/tsaddr" "tailscale.com/tailcfg" @@ -1979,6 +1979,11 @@ func MustFindNode(hostname string, nodes []*v1.Node) *v1.Node { // - Verify that routes can now be seen by peers. func TestAutoApproveMultiNetwork(t *testing.T) { IntegrationSkip(t) + + // Timeout for EventuallyWithT assertions. + // Set generously to account for CI infrastructure variability. + assertTimeout := 60 * time.Second + bigRoute := netip.MustParsePrefix("10.42.0.0/16") subRoute := netip.MustParsePrefix("10.42.7.0/24") notApprovedRoute := netip.MustParsePrefix("192.168.0.0/24") @@ -2217,31 +2222,24 @@ func TestAutoApproveMultiNetwork(t *testing.T) { }, } - // Check if we should run the full matrix of tests - // By default, we only run a minimal subset to avoid overwhelming Docker/disk - // Set HEADSCALE_INTEGRATION_FULL_MATRIX=1 to run all combinations - fullMatrix := envknob.Bool("HEADSCALE_INTEGRATION_FULL_MATRIX") - - // Minimal test set: 3 tests covering all key dimensions - // - Both auth methods (authkey, webauth) - // - All 3 approver types (tag, user, group) - // - Both policy modes (database, file) - // - Both advertiseDuringUp values (true, false) - minimalTestSet := map[string]bool{ - "authkey-tag-advertiseduringup-false-pol-database": true, // authkey + database + tag + false - "webauth-user-advertiseduringup-true-pol-file": true, // webauth + file + user + true - "authkey-group-advertiseduringup-false-pol-file": true, // authkey + file + group + false - } - for _, tt := range tests { for _, polMode := range []types.PolicyMode{types.PolicyModeDB, types.PolicyModeFile} { for _, advertiseDuringUp := range []bool{false, true} { name := fmt.Sprintf("%s-advertiseduringup-%t-pol-%s", tt.name, advertiseDuringUp, polMode) t.Run(name, func(t *testing.T) { - // Skip tests not in minimal set unless full matrix is enabled - if !fullMatrix && !minimalTestSet[name] { - t.Skip("Skipping to reduce test matrix size. Set HEADSCALE_INTEGRATION_FULL_MATRIX=1 to run all tests.") + // Create a deep copy of the policy to avoid mutating the shared test case. + // Each subtest modifies AutoApprovers.Routes (add then delete), so we need + // an isolated copy to prevent state leakage between sequential test runs. + pol := &policyv2.Policy{ + ACLs: slices.Clone(tt.pol.ACLs), + Groups: maps.Clone(tt.pol.Groups), + TagOwners: maps.Clone(tt.pol.TagOwners), + AutoApprovers: policyv2.AutoApproverPolicy{ + ExitNode: slices.Clone(tt.pol.AutoApprovers.ExitNode), + Routes: maps.Clone(tt.pol.AutoApprovers.Routes), + }, } + scenario, err := NewScenario(tt.spec) require.NoErrorf(t, err, "failed to create scenario: %s", err) defer scenario.ShutdownAssertNoPanics(t) @@ -2251,7 +2249,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { hsic.WithTestName("autoapprovemulti"), hsic.WithEmbeddedDERPServerOnly(), hsic.WithTLS(), - hsic.WithACLPolicy(tt.pol), + hsic.WithACLPolicy(pol), hsic.WithPolicyMode(polMode), } @@ -2262,13 +2260,22 @@ func TestAutoApproveMultiNetwork(t *testing.T) { route, err := scenario.SubnetOfNetwork("usernet1") require.NoError(t, err) - // For authkey with tag approver, use tagged PreAuthKeys (tags-as-identity model) - var preAuthKeyTags []string - if !tt.withURL && strings.HasPrefix(tt.approver, "tag:") { + // For tag-based approvers, nodes must be tagged with that tag + // (tags-as-identity model: tagged nodes are identified by their tags) + var ( + preAuthKeyTags []string + webauthTagUser string + ) + + if strings.HasPrefix(tt.approver, "tag:") { preAuthKeyTags = []string{tt.approver} + if tt.withURL { + // For webauth, only user1 can request tags (per tagOwners policy) + webauthTagUser = "user1" + } } - err = scenario.createHeadscaleEnvWithTags(tt.withURL, tsOpts, preAuthKeyTags, + err = scenario.createHeadscaleEnvWithTags(tt.withURL, tsOpts, preAuthKeyTags, webauthTagUser, opts..., ) requireNoErrHeadscaleEnv(t, err) @@ -2301,12 +2308,10 @@ func TestAutoApproveMultiNetwork(t *testing.T) { default: approvers = append(approvers, usernameApprover(tt.approver)) } - if tt.pol.AutoApprovers.Routes == nil { - tt.pol.AutoApprovers.Routes = make(map[netip.Prefix]policyv2.AutoApprovers) - } + // pol.AutoApprovers.Routes is already initialized in the deep copy above prefix := *route - tt.pol.AutoApprovers.Routes[prefix] = approvers - err = headscale.SetPolicy(tt.pol) + pol.AutoApprovers.Routes[prefix] = approvers + err = headscale.SetPolicy(pol) require.NoError(t, err) if advertiseDuringUp { @@ -2375,6 +2380,21 @@ func TestAutoApproveMultiNetwork(t *testing.T) { err = routerUsernet1.WaitForRunning(30 * time.Second) require.NoError(t, err) + // Wait for bidirectional peer synchronization. + // Both the router and all existing clients must see each other. + // This is critical for connectivity - without this, the WireGuard + // tunnels may not be established despite peers appearing in netmaps. + + // Router waits for all existing clients + err = routerUsernet1.WaitForPeers(len(allClients), 60*time.Second, 1*time.Second) + require.NoError(t, err, "router failed to see all peers") + + // All clients wait for the router (they should see 6 peers including the router) + for _, existingClient := range allClients { + err = existingClient.WaitForPeers(len(allClients), 60*time.Second, 1*time.Second) + require.NoErrorf(t, err, "client %s failed to see all peers including router", existingClient.Hostname()) + } + routerUsernet1ID := routerUsernet1.MustID() web := services[0] @@ -2409,7 +2429,11 @@ func TestAutoApproveMultiNetwork(t *testing.T) { require.NoErrorf(t, err, "failed to advertise route: %s", err) } - // Wait for route state changes to propagate + // Wait for route state changes to propagate. + // Use a longer timeout (30s) to account for CI infrastructure variability - + // when advertiseDuringUp=true, routes are sent during registration and may + // take longer to propagate through the server's auto-approval logic in slow + // environments. assert.EventuallyWithT(t, func(c *assert.CollectT) { // These route should auto approve, so the node is expected to have a route // for all counts. @@ -2424,7 +2448,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { routerNode.GetSubnetRoutes()) requireNodeRouteCountWithCollect(c, routerNode, 1, 1, 1) - }, 10*time.Second, 500*time.Millisecond, "Initial route auto-approval: Route should be approved via policy") + }, assertTimeout, 500*time.Millisecond, "Initial route auto-approval: Route should be approved via policy") // Verify that the routes have been sent to the client. assert.EventuallyWithT(t, func(c *assert.CollectT) { @@ -2457,7 +2481,22 @@ func TestAutoApproveMultiNetwork(t *testing.T) { } assert.True(c, routerPeerFound, "Client should see the router peer") - }, 30*time.Second, 200*time.Millisecond, "Verifying routes sent to client after auto-approval") + }, assertTimeout, 200*time.Millisecond, "Verifying routes sent to client after auto-approval") + + // Verify WireGuard tunnel connectivity to the router before testing route. + // The client may have the route in its netmap but the actual tunnel may not + // be established yet, especially in CI environments with higher latency. + routerIPv4, err := routerUsernet1.IPv4() + require.NoError(t, err, "failed to get router IPv4") + assert.EventuallyWithT(t, func(c *assert.CollectT) { + err := client.Ping( + routerIPv4.String(), + tsic.WithPingUntilDirect(false), // DERP relay is fine + tsic.WithPingCount(1), + tsic.WithPingTimeout(5*time.Second), + ) + assert.NoError(c, err, "ping to router should succeed") + }, assertTimeout, 200*time.Millisecond, "Verifying WireGuard tunnel to router is established") url := fmt.Sprintf("http://%s/etc/hostname", webip) t.Logf("url from %s to %s", client.Hostname(), url) @@ -2466,7 +2505,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { result, err := client.Curl(url) assert.NoError(c, err) assert.Len(c, result, 13) - }, 60*time.Second, 200*time.Millisecond, "Verifying client can reach webservice through auto-approved route") + }, assertTimeout, 200*time.Millisecond, "Verifying client can reach webservice through auto-approved route") assert.EventuallyWithT(t, func(c *assert.CollectT) { tr, err := client.Traceroute(webip) @@ -2476,12 +2515,12 @@ func TestAutoApproveMultiNetwork(t *testing.T) { return } assertTracerouteViaIPWithCollect(c, tr, ip) - }, 60*time.Second, 200*time.Millisecond, "Verifying traceroute goes through auto-approved router") + }, assertTimeout, 200*time.Millisecond, "Verifying traceroute goes through auto-approved router") // Remove the auto approval from the policy, any routes already enabled should be allowed. prefix = *route - delete(tt.pol.AutoApprovers.Routes, prefix) - err = headscale.SetPolicy(tt.pol) + delete(pol.AutoApprovers.Routes, prefix) + err = headscale.SetPolicy(pol) require.NoError(t, err) t.Logf("Policy updated: removed auto-approver for route %s", prefix) @@ -2499,7 +2538,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { routerNode.GetSubnetRoutes()) requireNodeRouteCountWithCollect(c, routerNode, 1, 1, 1) - }, 10*time.Second, 500*time.Millisecond, "Routes should remain approved after auto-approver removal") + }, assertTimeout, 500*time.Millisecond, "Routes should remain approved after auto-approver removal") // Verify that the routes have been sent to the client. assert.EventuallyWithT(t, func(c *assert.CollectT) { @@ -2519,7 +2558,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { requirePeerSubnetRoutesWithCollect(c, peerStatus, nil) } } - }, 30*time.Second, 200*time.Millisecond, "Verifying routes remain after policy change") + }, assertTimeout, 200*time.Millisecond, "Verifying routes remain after policy change") url = fmt.Sprintf("http://%s/etc/hostname", webip) t.Logf("url from %s to %s", client.Hostname(), url) @@ -2528,7 +2567,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { result, err := client.Curl(url) assert.NoError(c, err) assert.Len(c, result, 13) - }, 60*time.Second, 200*time.Millisecond, "Verifying client can still reach webservice after policy change") + }, assertTimeout, 200*time.Millisecond, "Verifying client can still reach webservice after policy change") assert.EventuallyWithT(t, func(c *assert.CollectT) { tr, err := client.Traceroute(webip) @@ -2538,7 +2577,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { return } assertTracerouteViaIPWithCollect(c, tr, ip) - }, 60*time.Second, 200*time.Millisecond, "Verifying traceroute still goes through router after policy change") + }, assertTimeout, 200*time.Millisecond, "Verifying traceroute still goes through router after policy change") // Disable the route, making it unavailable since it is no longer auto-approved _, err = headscale.ApproveRoutes( @@ -2554,7 +2593,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { nodes, err = headscale.ListNodes() assert.NoError(c, err) requireNodeRouteCountWithCollect(c, MustFindNode(routerUsernet1.Hostname(), nodes), 1, 0, 0) - }, 15*time.Second, 500*time.Millisecond, "route state changes should propagate") + }, assertTimeout, 500*time.Millisecond, "route state changes should propagate") // Verify that the routes have been sent to the client. assert.EventuallyWithT(t, func(c *assert.CollectT) { @@ -2565,7 +2604,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { peerStatus := status.Peer[peerKey] requirePeerSubnetRoutesWithCollect(c, peerStatus, nil) } - }, 30*time.Second, 200*time.Millisecond, "Verifying routes disabled after route removal") + }, assertTimeout, 200*time.Millisecond, "Verifying routes disabled after route removal") // Add the route back to the auto approver in the policy, the route should // now become available again. @@ -2578,12 +2617,10 @@ func TestAutoApproveMultiNetwork(t *testing.T) { default: newApprovers = append(newApprovers, usernameApprover(tt.approver)) } - if tt.pol.AutoApprovers.Routes == nil { - tt.pol.AutoApprovers.Routes = make(map[netip.Prefix]policyv2.AutoApprovers) - } + // pol.AutoApprovers.Routes is already initialized in the deep copy above prefix = *route - tt.pol.AutoApprovers.Routes[prefix] = newApprovers - err = headscale.SetPolicy(tt.pol) + pol.AutoApprovers.Routes[prefix] = newApprovers + err = headscale.SetPolicy(pol) require.NoError(t, err) // Wait for route state changes to propagate @@ -2593,7 +2630,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { nodes, err = headscale.ListNodes() assert.NoError(c, err) requireNodeRouteCountWithCollect(c, MustFindNode(routerUsernet1.Hostname(), nodes), 1, 1, 1) - }, 15*time.Second, 500*time.Millisecond, "route state changes should propagate") + }, assertTimeout, 500*time.Millisecond, "route state changes should propagate") // Verify that the routes have been sent to the client. assert.EventuallyWithT(t, func(c *assert.CollectT) { @@ -2613,7 +2650,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { requirePeerSubnetRoutesWithCollect(c, peerStatus, nil) } } - }, 30*time.Second, 200*time.Millisecond, "Verifying routes re-enabled after policy re-approval") + }, assertTimeout, 200*time.Millisecond, "Verifying routes re-enabled after policy re-approval") url = fmt.Sprintf("http://%s/etc/hostname", webip) t.Logf("url from %s to %s", client.Hostname(), url) @@ -2622,7 +2659,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { result, err := client.Curl(url) assert.NoError(c, err) assert.Len(c, result, 13) - }, 60*time.Second, 200*time.Millisecond, "Verifying client can reach webservice after route re-approval") + }, assertTimeout, 200*time.Millisecond, "Verifying client can reach webservice after route re-approval") assert.EventuallyWithT(t, func(c *assert.CollectT) { tr, err := client.Traceroute(webip) @@ -2632,7 +2669,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { return } assertTracerouteViaIPWithCollect(c, tr, ip) - }, 60*time.Second, 200*time.Millisecond, "Verifying traceroute goes through router after re-approval") + }, assertTimeout, 200*time.Millisecond, "Verifying traceroute goes through router after re-approval") // Advertise and validate a subnet of an auto approved route, /24 inside the // auto approved /16. @@ -2652,7 +2689,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { assert.NoError(c, err) requireNodeRouteCountWithCollect(c, MustFindNode(routerUsernet1.Hostname(), nodes), 1, 1, 1) requireNodeRouteCountWithCollect(c, nodes[1], 1, 1, 1) - }, 15*time.Second, 500*time.Millisecond, "route state changes should propagate") + }, assertTimeout, 500*time.Millisecond, "route state changes should propagate") // Verify that the routes have been sent to the client. assert.EventuallyWithT(t, func(c *assert.CollectT) { @@ -2676,7 +2713,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { requirePeerSubnetRoutesWithCollect(c, peerStatus, nil) } } - }, 30*time.Second, 200*time.Millisecond, "Verifying sub-route propagated to client") + }, assertTimeout, 200*time.Millisecond, "Verifying sub-route propagated to client") // Advertise a not approved route will not end up anywhere command = []string{ @@ -2696,7 +2733,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { requireNodeRouteCountWithCollect(c, MustFindNode(routerUsernet1.Hostname(), nodes), 1, 1, 1) requireNodeRouteCountWithCollect(c, nodes[1], 1, 1, 0) requireNodeRouteCountWithCollect(c, nodes[2], 0, 0, 0) - }, 15*time.Second, 500*time.Millisecond, "route state changes should propagate") + }, assertTimeout, 500*time.Millisecond, "route state changes should propagate") // Verify that the routes have been sent to the client. assert.EventuallyWithT(t, func(c *assert.CollectT) { @@ -2716,7 +2753,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { requirePeerSubnetRoutesWithCollect(c, peerStatus, nil) } } - }, 30*time.Second, 200*time.Millisecond, "Verifying unapproved route not propagated") + }, assertTimeout, 200*time.Millisecond, "Verifying unapproved route not propagated") // Exit routes are also automatically approved command = []string{ @@ -2734,7 +2771,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { requireNodeRouteCountWithCollect(c, MustFindNode(routerUsernet1.Hostname(), nodes), 1, 1, 1) requireNodeRouteCountWithCollect(c, nodes[1], 1, 1, 0) requireNodeRouteCountWithCollect(c, nodes[2], 2, 2, 2) - }, 15*time.Second, 500*time.Millisecond, "route state changes should propagate") + }, assertTimeout, 500*time.Millisecond, "route state changes should propagate") // Verify that the routes have been sent to the client. assert.EventuallyWithT(t, func(c *assert.CollectT) { @@ -2755,7 +2792,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) { requirePeerSubnetRoutesWithCollect(c, peerStatus, nil) } } - }, 30*time.Second, 200*time.Millisecond, "Verifying exit node routes propagated to client") + }, assertTimeout, 200*time.Millisecond, "Verifying exit node routes propagated to client") }) } } diff --git a/integration/scenario.go b/integration/scenario.go index 2f93210e..041cc771 100644 --- a/integration/scenario.go +++ b/integration/scenario.go @@ -789,17 +789,23 @@ func (s *Scenario) createHeadscaleEnv( tsOpts []tsic.Option, opts ...hsic.Option, ) error { - return s.createHeadscaleEnvWithTags(withURL, tsOpts, nil, opts...) + return s.createHeadscaleEnvWithTags(withURL, tsOpts, nil, "", opts...) } // createHeadscaleEnvWithTags starts the headscale environment and the clients // according to the ScenarioSpec passed to the Scenario. If preAuthKeyTags is // non-empty and withURL is false, the tags will be applied to the PreAuthKey // (tags-as-identity model). +// +// For webauth (withURL=true), if webauthTagUser is non-empty and preAuthKeyTags +// is non-empty, only nodes belonging to that user will request tags via +// --advertise-tags. This is necessary because tagOwners ACL controls which +// users can request specific tags. func (s *Scenario) createHeadscaleEnvWithTags( withURL bool, tsOpts []tsic.Option, preAuthKeyTags []string, + webauthTagUser string, opts ...hsic.Option, ) error { headscale, err := s.Headscale(opts...) @@ -813,14 +819,20 @@ func (s *Scenario) createHeadscaleEnvWithTags( return err } - var opts []tsic.Option + var userOpts []tsic.Option if s.userToNetwork != nil { - opts = append(tsOpts, tsic.WithNetwork(s.userToNetwork[user])) + userOpts = append(tsOpts, tsic.WithNetwork(s.userToNetwork[user])) } else { - opts = append(tsOpts, tsic.WithNetwork(s.networks[s.testDefaultNetwork])) + userOpts = append(tsOpts, tsic.WithNetwork(s.networks[s.testDefaultNetwork])) } - err = s.CreateTailscaleNodesInUser(user, "all", s.spec.NodesPerUser, opts...) + // For webauth with tags, only apply tags to the specified webauthTagUser + // (other users may not be authorized via tagOwners) + if withURL && webauthTagUser != "" && len(preAuthKeyTags) > 0 && user == webauthTagUser { + userOpts = append(userOpts, tsic.WithTags(preAuthKeyTags)) + } + + err = s.CreateTailscaleNodesInUser(user, "all", s.spec.NodesPerUser, userOpts...) if err != nil { return err }