tags: process tags on registration, simplify policy (#2931)

This PR investigates, adds tests and aims to correctly implement Tailscale's model for how Tags should be accepted, assigned and used to identify nodes in the Tailscale access and ownership model.

When evaluating in Headscale's policy, Tags are now only checked against a nodes "tags" list, which defines the source of truth for all tags for a given node. This simplifies the code for dealing with tags greatly, and should help us have less access bugs related to nodes belonging to tags or users.

A node can either be owned by a user, or a tag.

Next, to ensure the tags list on the node is correctly implemented, we first add tests for every registration scenario and combination of user, pre auth key and pre auth key with tags with the same registration expectation as observed by trying them all with the Tailscale control server. This should ensure that we implement the correct behaviour and that it does not change or break over time.

Lastly, the missing parts of the auth has been added, or changed in the cases where it was wrong. This has in large parts allowed us to delete and simplify a lot of code.
Now, tags can only be changed when a node authenticates or if set via the CLI/API. Tags can only be fully overwritten/replaced and any use of either auth or CLI will replace the current set if different.

A user owned device can be converted to a tagged device, but it cannot be changed back. A tagged device can never remove the last tag either, it has to have a minimum of one.
This commit is contained in:
Kristoffer Dalby
2025-12-08 18:51:07 +01:00
committed by GitHub
parent 1f5df017a1
commit 22ee2bfc9c
24 changed files with 3414 additions and 1001 deletions

View File

@@ -2259,16 +2259,16 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
tsic.WithAcceptRoutes(),
}
if tt.approver == "tag:approve" {
tsOpts = append(tsOpts,
tsic.WithTags([]string{"tag:approve"}),
)
}
route, err := scenario.SubnetOfNetwork("usernet1")
require.NoError(t, err)
err = scenario.createHeadscaleEnv(tt.withURL, tsOpts,
// For authkey with tag approver, use tagged PreAuthKeys (tags-as-identity model)
var preAuthKeyTags []string
if !tt.withURL && strings.HasPrefix(tt.approver, "tag:") {
preAuthKeyTags = []string{tt.approver}
}
err = scenario.createHeadscaleEnvWithTags(tt.withURL, tsOpts, preAuthKeyTags,
opts...,
)
requireNoErrHeadscaleEnv(t, err)
@@ -2315,6 +2315,12 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
)
}
// For webauth with tag approver, the node needs to advertise the tag during registration
// (tags-as-identity model: webauth nodes can use --advertise-tags if authorized by tagOwners)
if tt.withURL && strings.HasPrefix(tt.approver, "tag:") {
tsOpts = append(tsOpts, tsic.WithTags([]string{tt.approver}))
}
tsOpts = append(tsOpts, tsic.WithNetwork(usernet1))
// This whole dance is to add a node _after_ all the other nodes
@@ -2349,7 +2355,14 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
userMap, err := headscale.MapUsers()
require.NoError(t, err)
pak, err := scenario.CreatePreAuthKey(userMap["user1"].GetId(), false, false)
// If the approver is a tag, create a tagged PreAuthKey
// (tags-as-identity model: tags come from PreAuthKey, not --advertise-tags)
var pak *v1.PreAuthKey
if strings.HasPrefix(tt.approver, "tag:") {
pak, err = scenario.CreatePreAuthKeyWithTags(userMap["user1"].GetId(), false, false, []string{tt.approver})
} else {
pak, err = scenario.CreatePreAuthKey(userMap["user1"].GetId(), false, false)
}
require.NoError(t, err)
err = routerUsernet1.Login(headscale.GetEndpoint(), pak.GetKey())
@@ -2444,7 +2457,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
}
assert.True(c, routerPeerFound, "Client should see the router peer")
}, 5*time.Second, 200*time.Millisecond, "Verifying routes sent to client after auto-approval")
}, 30*time.Second, 200*time.Millisecond, "Verifying routes sent to client after auto-approval")
url := fmt.Sprintf("http://%s/etc/hostname", webip)
t.Logf("url from %s to %s", client.Hostname(), url)
@@ -2453,7 +2466,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
result, err := client.Curl(url)
assert.NoError(c, err)
assert.Len(c, result, 13)
}, 20*time.Second, 200*time.Millisecond, "Verifying client can reach webservice through auto-approved route")
}, 60*time.Second, 200*time.Millisecond, "Verifying client can reach webservice through auto-approved route")
assert.EventuallyWithT(t, func(c *assert.CollectT) {
tr, err := client.Traceroute(webip)
@@ -2463,7 +2476,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
return
}
assertTracerouteViaIPWithCollect(c, tr, ip)
}, 20*time.Second, 200*time.Millisecond, "Verifying traceroute goes through auto-approved router")
}, 60*time.Second, 200*time.Millisecond, "Verifying traceroute goes through auto-approved router")
// Remove the auto approval from the policy, any routes already enabled should be allowed.
prefix = *route
@@ -2506,7 +2519,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
requirePeerSubnetRoutesWithCollect(c, peerStatus, nil)
}
}
}, 5*time.Second, 200*time.Millisecond, "Verifying routes remain after policy change")
}, 30*time.Second, 200*time.Millisecond, "Verifying routes remain after policy change")
url = fmt.Sprintf("http://%s/etc/hostname", webip)
t.Logf("url from %s to %s", client.Hostname(), url)
@@ -2515,7 +2528,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
result, err := client.Curl(url)
assert.NoError(c, err)
assert.Len(c, result, 13)
}, 20*time.Second, 200*time.Millisecond, "Verifying client can still reach webservice after policy change")
}, 60*time.Second, 200*time.Millisecond, "Verifying client can still reach webservice after policy change")
assert.EventuallyWithT(t, func(c *assert.CollectT) {
tr, err := client.Traceroute(webip)
@@ -2525,7 +2538,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
return
}
assertTracerouteViaIPWithCollect(c, tr, ip)
}, 20*time.Second, 200*time.Millisecond, "Verifying traceroute still goes through router after policy change")
}, 60*time.Second, 200*time.Millisecond, "Verifying traceroute still goes through router after policy change")
// Disable the route, making it unavailable since it is no longer auto-approved
_, err = headscale.ApproveRoutes(
@@ -2541,7 +2554,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
nodes, err = headscale.ListNodes()
assert.NoError(c, err)
requireNodeRouteCountWithCollect(c, MustFindNode(routerUsernet1.Hostname(), nodes), 1, 0, 0)
}, 10*time.Second, 500*time.Millisecond, "route state changes should propagate")
}, 15*time.Second, 500*time.Millisecond, "route state changes should propagate")
// Verify that the routes have been sent to the client.
assert.EventuallyWithT(t, func(c *assert.CollectT) {
@@ -2552,7 +2565,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
peerStatus := status.Peer[peerKey]
requirePeerSubnetRoutesWithCollect(c, peerStatus, nil)
}
}, 5*time.Second, 200*time.Millisecond, "Verifying routes disabled after route removal")
}, 30*time.Second, 200*time.Millisecond, "Verifying routes disabled after route removal")
// Add the route back to the auto approver in the policy, the route should
// now become available again.
@@ -2580,7 +2593,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
nodes, err = headscale.ListNodes()
assert.NoError(c, err)
requireNodeRouteCountWithCollect(c, MustFindNode(routerUsernet1.Hostname(), nodes), 1, 1, 1)
}, 10*time.Second, 500*time.Millisecond, "route state changes should propagate")
}, 15*time.Second, 500*time.Millisecond, "route state changes should propagate")
// Verify that the routes have been sent to the client.
assert.EventuallyWithT(t, func(c *assert.CollectT) {
@@ -2600,7 +2613,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
requirePeerSubnetRoutesWithCollect(c, peerStatus, nil)
}
}
}, 5*time.Second, 200*time.Millisecond, "Verifying routes re-enabled after policy re-approval")
}, 30*time.Second, 200*time.Millisecond, "Verifying routes re-enabled after policy re-approval")
url = fmt.Sprintf("http://%s/etc/hostname", webip)
t.Logf("url from %s to %s", client.Hostname(), url)
@@ -2609,7 +2622,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
result, err := client.Curl(url)
assert.NoError(c, err)
assert.Len(c, result, 13)
}, 20*time.Second, 200*time.Millisecond, "Verifying client can reach webservice after route re-approval")
}, 60*time.Second, 200*time.Millisecond, "Verifying client can reach webservice after route re-approval")
assert.EventuallyWithT(t, func(c *assert.CollectT) {
tr, err := client.Traceroute(webip)
@@ -2619,7 +2632,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
return
}
assertTracerouteViaIPWithCollect(c, tr, ip)
}, 20*time.Second, 200*time.Millisecond, "Verifying traceroute goes through router after re-approval")
}, 60*time.Second, 200*time.Millisecond, "Verifying traceroute goes through router after re-approval")
// Advertise and validate a subnet of an auto approved route, /24 inside the
// auto approved /16.
@@ -2639,7 +2652,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
assert.NoError(c, err)
requireNodeRouteCountWithCollect(c, MustFindNode(routerUsernet1.Hostname(), nodes), 1, 1, 1)
requireNodeRouteCountWithCollect(c, nodes[1], 1, 1, 1)
}, 10*time.Second, 500*time.Millisecond, "route state changes should propagate")
}, 15*time.Second, 500*time.Millisecond, "route state changes should propagate")
// Verify that the routes have been sent to the client.
assert.EventuallyWithT(t, func(c *assert.CollectT) {
@@ -2663,7 +2676,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
requirePeerSubnetRoutesWithCollect(c, peerStatus, nil)
}
}
}, 5*time.Second, 200*time.Millisecond, "Verifying sub-route propagated to client")
}, 30*time.Second, 200*time.Millisecond, "Verifying sub-route propagated to client")
// Advertise a not approved route will not end up anywhere
command = []string{
@@ -2683,7 +2696,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
requireNodeRouteCountWithCollect(c, MustFindNode(routerUsernet1.Hostname(), nodes), 1, 1, 1)
requireNodeRouteCountWithCollect(c, nodes[1], 1, 1, 0)
requireNodeRouteCountWithCollect(c, nodes[2], 0, 0, 0)
}, 10*time.Second, 500*time.Millisecond, "route state changes should propagate")
}, 15*time.Second, 500*time.Millisecond, "route state changes should propagate")
// Verify that the routes have been sent to the client.
assert.EventuallyWithT(t, func(c *assert.CollectT) {
@@ -2703,7 +2716,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
requirePeerSubnetRoutesWithCollect(c, peerStatus, nil)
}
}
}, 5*time.Second, 200*time.Millisecond, "Verifying unapproved route not propagated")
}, 30*time.Second, 200*time.Millisecond, "Verifying unapproved route not propagated")
// Exit routes are also automatically approved
command = []string{
@@ -2721,7 +2734,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
requireNodeRouteCountWithCollect(c, MustFindNode(routerUsernet1.Hostname(), nodes), 1, 1, 1)
requireNodeRouteCountWithCollect(c, nodes[1], 1, 1, 0)
requireNodeRouteCountWithCollect(c, nodes[2], 2, 2, 2)
}, 10*time.Second, 500*time.Millisecond, "route state changes should propagate")
}, 15*time.Second, 500*time.Millisecond, "route state changes should propagate")
// Verify that the routes have been sent to the client.
assert.EventuallyWithT(t, func(c *assert.CollectT) {
@@ -2742,7 +2755,7 @@ func TestAutoApproveMultiNetwork(t *testing.T) {
requirePeerSubnetRoutesWithCollect(c, peerStatus, nil)
}
}
}, 5*time.Second, 200*time.Millisecond, "Verifying exit node routes propagated to client")
}, 30*time.Second, 200*time.Millisecond, "Verifying exit node routes propagated to client")
})
}
}
@@ -2985,7 +2998,7 @@ func TestSubnetRouteACLFiltering(t *testing.T) {
// Check that the router has 3 routes now approved and available
requireNodeRouteCountWithCollect(c, routerNode, 3, 3, 3)
}, 10*time.Second, 500*time.Millisecond, "route state changes should propagate")
}, 15*time.Second, 500*time.Millisecond, "route state changes should propagate")
// Now check the client node status
assert.EventuallyWithT(t, func(c *assert.CollectT) {
@@ -3006,7 +3019,7 @@ func TestSubnetRouteACLFiltering(t *testing.T) {
result, err := nodeClient.Curl(weburl)
assert.NoError(c, err)
assert.Len(c, result, 13)
}, 20*time.Second, 200*time.Millisecond, "Verifying node can reach webservice through allowed route")
}, 60*time.Second, 200*time.Millisecond, "Verifying node can reach webservice through allowed route")
assert.EventuallyWithT(t, func(c *assert.CollectT) {
tr, err := nodeClient.Traceroute(webip)
@@ -3016,5 +3029,5 @@ func TestSubnetRouteACLFiltering(t *testing.T) {
return
}
assertTracerouteViaIPWithCollect(c, tr, ip)
}, 20*time.Second, 200*time.Millisecond, "Verifying traceroute goes through router")
}, 60*time.Second, 200*time.Millisecond, "Verifying traceroute goes through router")
}