2023-12-09 18:09:24 +01:00
package integration
import (
2025-08-06 08:37:02 +02:00
"cmp"
2025-05-04 22:52:47 +03:00
"encoding/json"
2025-03-21 11:49:32 +01:00
"fmt"
2023-12-09 18:09:24 +01:00
"net/netip"
2025-07-10 23:38:55 +02:00
"slices"
2023-12-09 18:09:24 +01:00
"sort"
2025-08-06 08:37:02 +02:00
"strconv"
2025-05-20 13:57:26 +02:00
"strings"
2023-12-09 18:09:24 +01:00
"testing"
"time"
2025-05-04 22:52:47 +03:00
cmpdiff "github.com/google/go-cmp/cmp"
2025-03-21 11:49:32 +01:00
"github.com/google/go-cmp/cmp/cmpopts"
2023-12-09 18:09:24 +01:00
v1 "github.com/juanfont/headscale/gen/go/headscale/v1"
2025-05-20 13:57:26 +02:00
policyv2 "github.com/juanfont/headscale/hscontrol/policy/v2"
2025-08-06 08:37:02 +02:00
"github.com/juanfont/headscale/hscontrol/routes"
2025-03-31 15:55:07 +02:00
"github.com/juanfont/headscale/hscontrol/types"
2024-01-18 17:30:25 +01:00
"github.com/juanfont/headscale/hscontrol/util"
2023-12-09 18:09:24 +01:00
"github.com/juanfont/headscale/integration/hsic"
"github.com/juanfont/headscale/integration/tsic"
"github.com/stretchr/testify/assert"
2025-02-23 14:10:25 -08:00
"github.com/stretchr/testify/require"
2025-08-06 08:37:02 +02:00
xmaps "golang.org/x/exp/maps"
2025-02-26 07:22:55 -08:00
"tailscale.com/ipn/ipnstate"
2025-02-23 14:10:25 -08:00
"tailscale.com/net/tsaddr"
2025-05-20 13:57:26 +02:00
"tailscale.com/tailcfg"
2024-01-18 17:30:25 +01:00
"tailscale.com/types/ipproto"
2024-08-23 15:28:54 +02:00
"tailscale.com/types/views"
2025-05-04 22:52:47 +03:00
"tailscale.com/util/must"
2025-03-21 11:49:32 +01:00
"tailscale.com/util/slicesx"
2024-01-18 17:30:25 +01:00
"tailscale.com/wgengine/filter"
2023-12-09 18:09:24 +01:00
)
2024-10-23 10:45:59 -05:00
var allPorts = filter . PortRange { First : 0 , Last : 0xffff }
2023-12-09 18:09:24 +01:00
// This test is both testing the routes command and the propagation of
// routes.
func TestEnablingRoutes ( t * testing . T ) {
IntegrationSkip ( t )
2025-03-21 11:49:32 +01:00
spec := ScenarioSpec {
NodesPerUser : 3 ,
Users : [ ] string { "user1" } ,
}
2023-12-09 18:09:24 +01:00
2025-03-21 11:49:32 +01:00
scenario , err := NewScenario ( spec )
2025-02-26 07:22:55 -08:00
require . NoErrorf ( t , err , "failed to create scenario: %s" , err )
2024-09-17 10:44:55 +01:00
defer scenario . ShutdownAssertNoPanics ( t )
2023-12-09 18:09:24 +01:00
2025-03-21 11:49:32 +01:00
err = scenario . CreateHeadscaleEnv (
[ ] tsic . Option { tsic . WithAcceptRoutes ( ) } ,
hsic . WithTestName ( "clienableroute" ) )
2023-12-09 18:09:24 +01:00
assertNoErrHeadscaleEnv ( t , err )
allClients , err := scenario . ListTailscaleClients ( )
assertNoErrListClients ( t , err )
err = scenario . WaitForTailscaleSync ( )
assertNoErrSync ( t , err )
headscale , err := scenario . Headscale ( )
assertNoErrGetHeadscale ( t , err )
expectedRoutes := map [ string ] string {
"1" : "10.0.0.0/24" ,
"2" : "10.0.1.0/24" ,
"3" : "10.0.2.0/24" ,
}
// advertise routes using the up command
for _ , client := range allClients {
2025-08-06 08:37:02 +02:00
status := client . MustStatus ( )
2023-12-09 18:09:24 +01:00
command := [ ] string {
"tailscale" ,
"set" ,
"--advertise-routes=" + expectedRoutes [ string ( status . Self . ID ) ] ,
}
_ , _ , err = client . Execute ( command )
2025-02-26 07:22:55 -08:00
require . NoErrorf ( t , err , "failed to advertise route: %s" , err )
2023-12-09 18:09:24 +01:00
}
err = scenario . WaitForTailscaleSync ( )
assertNoErrSync ( t , err )
2025-08-06 08:37:02 +02:00
var nodes [ ] * v1 . Node
// Wait for route advertisements to propagate to NodeStore
assert . EventuallyWithT ( t , func ( ct * assert . CollectT ) {
var err error
nodes , err = headscale . ListNodes ( )
assert . NoError ( ct , err )
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
for _ , node := range nodes {
assert . Len ( ct , node . GetAvailableRoutes ( ) , 1 )
assert . Empty ( ct , node . GetApprovedRoutes ( ) )
assert . Empty ( ct , node . GetSubnetRoutes ( ) )
}
} , 10 * time . Second , 100 * time . Millisecond , "route advertisements should propagate to all nodes" )
2023-12-09 18:09:24 +01:00
// Verify that no routes has been sent to the client,
// they are not yet enabled.
for _ , client := range allClients {
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
status , err := client . Status ( )
assert . NoError ( c , err )
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
for _ , peerKey := range status . Peers ( ) {
peerStatus := status . Peer [ peerKey ]
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
assert . Nil ( c , peerStatus . PrimaryRoutes )
}
} , 5 * time . Second , 200 * time . Millisecond , "Verifying no routes are active before approval" )
2023-12-09 18:09:24 +01:00
}
2025-02-26 07:22:55 -08:00
for _ , node := range nodes {
_ , err := headscale . ApproveRoutes (
node . GetId ( ) ,
util . MustStringsToPrefixes ( node . GetAvailableRoutes ( ) ) ,
)
require . NoError ( t , err )
2023-12-09 18:09:24 +01:00
}
2025-08-06 08:37:02 +02:00
// Wait for route approvals to propagate to NodeStore
assert . EventuallyWithT ( t , func ( ct * assert . CollectT ) {
var err error
nodes , err = headscale . ListNodes ( )
assert . NoError ( ct , err )
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
for _ , node := range nodes {
assert . Len ( ct , node . GetAvailableRoutes ( ) , 1 )
assert . Len ( ct , node . GetApprovedRoutes ( ) , 1 )
assert . Len ( ct , node . GetSubnetRoutes ( ) , 1 )
}
} , 10 * time . Second , 100 * time . Millisecond , "route approvals should propagate to all nodes" )
2023-12-09 18:09:24 +01:00
2025-07-05 23:30:47 +02:00
// Wait for route state changes to propagate to clients
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
// Verify that the clients can see the new routes
for _ , client := range allClients {
status , err := client . Status ( )
assert . NoError ( c , err )
for _ , peerKey := range status . Peers ( ) {
peerStatus := status . Peer [ peerKey ]
assert . NotNil ( c , peerStatus . PrimaryRoutes )
2025-08-06 08:37:02 +02:00
assert . NotNil ( c , peerStatus . AllowedIPs )
if peerStatus . AllowedIPs != nil {
assert . Len ( c , peerStatus . AllowedIPs . AsSlice ( ) , 3 )
}
2025-07-05 23:30:47 +02:00
requirePeerSubnetRoutesWithCollect ( c , peerStatus , [ ] netip . Prefix { netip . MustParsePrefix ( expectedRoutes [ string ( peerStatus . ID ) ] ) } )
}
2023-12-09 18:09:24 +01:00
}
2025-07-05 23:30:47 +02:00
} , 10 * time . Second , 500 * time . Millisecond , "clients should see new routes" )
2023-12-09 18:09:24 +01:00
2025-02-26 07:22:55 -08:00
_ , err = headscale . ApproveRoutes (
1 ,
[ ] netip . Prefix { netip . MustParsePrefix ( "10.0.1.0/24" ) } ,
2023-12-09 18:09:24 +01:00
)
2025-02-26 07:22:55 -08:00
require . NoError ( t , err )
2023-12-09 18:09:24 +01:00
2025-02-26 07:22:55 -08:00
_ , err = headscale . ApproveRoutes (
2 ,
[ ] netip . Prefix { } ,
)
require . NoError ( t , err )
2023-12-09 18:09:24 +01:00
2025-07-05 23:30:47 +02:00
// Wait for route state changes to propagate to nodes
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
2025-08-06 08:37:02 +02:00
var err error
2025-07-05 23:30:47 +02:00
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
for _ , node := range nodes {
if node . GetId ( ) == 1 {
assert . Len ( c , node . GetAvailableRoutes ( ) , 1 ) // 10.0.0.0/24
assert . Len ( c , node . GetApprovedRoutes ( ) , 1 ) // 10.0.1.0/24
assert . Empty ( c , node . GetSubnetRoutes ( ) )
} else if node . GetId ( ) == 2 {
assert . Len ( c , node . GetAvailableRoutes ( ) , 1 ) // 10.0.1.0/24
assert . Empty ( c , node . GetApprovedRoutes ( ) )
assert . Empty ( c , node . GetSubnetRoutes ( ) )
} else {
assert . Len ( c , node . GetAvailableRoutes ( ) , 1 ) // 10.0.2.0/24
assert . Len ( c , node . GetApprovedRoutes ( ) , 1 ) // 10.0.2.0/24
assert . Len ( c , node . GetSubnetRoutes ( ) , 1 ) // 10.0.2.0/24
}
2023-12-09 18:09:24 +01:00
}
2025-07-05 23:30:47 +02:00
} , 10 * time . Second , 500 * time . Millisecond , "route state changes should propagate to nodes" )
2023-12-09 18:09:24 +01:00
// Verify that the clients can see the new routes
for _ , client := range allClients {
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
status , err := client . Status ( )
assert . NoError ( c , err )
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
for _ , peerKey := range status . Peers ( ) {
peerStatus := status . Peer [ peerKey ]
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
switch peerStatus . ID {
case "1" :
requirePeerSubnetRoutesWithCollect ( c , peerStatus , nil )
case "2" :
requirePeerSubnetRoutesWithCollect ( c , peerStatus , nil )
default :
requirePeerSubnetRoutesWithCollect ( c , peerStatus , [ ] netip . Prefix { netip . MustParsePrefix ( "10.0.2.0/24" ) } )
}
2023-12-09 18:09:24 +01:00
}
2025-08-06 08:37:02 +02:00
} , 5 * time . Second , 200 * time . Millisecond , "Verifying final route state visible to clients" )
2023-12-09 18:09:24 +01:00
}
}
func TestHASubnetRouterFailover ( t * testing . T ) {
IntegrationSkip ( t )
2025-08-06 08:37:02 +02:00
propagationTime := 60 * time . Second
// Helper function to validate primary routes table state
validatePrimaryRoutes := func ( t * testing . T , headscale ControlServer , expectedRoutes * routes . DebugRoutes , message string ) {
t . Helper ( )
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
primaryRoutesState , err := headscale . PrimaryRoutes ( )
assert . NoError ( c , err )
if diff := cmpdiff . Diff ( expectedRoutes , primaryRoutesState , util . PrefixComparer ) ; diff != "" {
t . Log ( message )
t . Errorf ( "validatePrimaryRoutes mismatch (-want +got):\n%s" , diff )
}
} , propagationTime , 200 * time . Millisecond , "Validating primary routes table" )
}
2025-03-21 11:49:32 +01:00
spec := ScenarioSpec {
NodesPerUser : 3 ,
Users : [ ] string { "user1" , "user2" } ,
Networks : map [ string ] [ ] string {
"usernet1" : { "user1" } ,
"usernet2" : { "user2" } ,
} ,
ExtraService : map [ string ] [ ] extraServiceFunc {
"usernet1" : { Webservice } ,
} ,
// We build the head image with curl and traceroute, so only use
// that for this test.
Versions : [ ] string { "head" } ,
}
2023-12-09 18:09:24 +01:00
2025-03-21 11:49:32 +01:00
scenario , err := NewScenario ( spec )
2025-02-26 07:22:55 -08:00
require . NoErrorf ( t , err , "failed to create scenario: %s" , err )
2025-08-06 08:37:02 +02:00
// defer scenario.ShutdownAssertNoPanics(t)
2023-12-09 18:09:24 +01:00
2025-03-21 11:49:32 +01:00
err = scenario . CreateHeadscaleEnv (
[ ] tsic . Option { tsic . WithAcceptRoutes ( ) } ,
2025-02-26 07:22:55 -08:00
hsic . WithTestName ( "clienableroute" ) ,
hsic . WithEmbeddedDERPServerOnly ( ) ,
hsic . WithTLS ( ) ,
)
2023-12-09 18:09:24 +01:00
assertNoErrHeadscaleEnv ( t , err )
allClients , err := scenario . ListTailscaleClients ( )
assertNoErrListClients ( t , err )
err = scenario . WaitForTailscaleSync ( )
assertNoErrSync ( t , err )
headscale , err := scenario . Headscale ( )
assertNoErrGetHeadscale ( t , err )
2025-03-21 11:49:32 +01:00
prefp , err := scenario . SubnetOfNetwork ( "usernet1" )
require . NoError ( t , err )
pref := * prefp
t . Logf ( "usernet1 prefix: %s" , pref . String ( ) )
usernet1 , err := scenario . Network ( "usernet1" )
require . NoError ( t , err )
services , err := scenario . Services ( "usernet1" )
require . NoError ( t , err )
require . Len ( t , services , 1 )
web := services [ 0 ]
webip := netip . MustParseAddr ( web . GetIPInNetwork ( usernet1 ) )
weburl := fmt . Sprintf ( "http://%s/etc/hostname" , webip )
t . Logf ( "webservice: %s, %s" , webip . String ( ) , weburl )
2023-12-09 18:09:24 +01:00
// Sort nodes by ID
sort . SliceStable ( allClients , func ( i , j int ) bool {
2025-02-26 07:22:55 -08:00
statusI := allClients [ i ] . MustStatus ( )
statusJ := allClients [ j ] . MustStatus ( )
2023-12-09 18:09:24 +01:00
return statusI . Self . ID < statusJ . Self . ID
} )
2025-03-21 11:49:32 +01:00
// This is ok because the scenario makes users in order, so the three first
// nodes, which are subnet routes, will be created first, and the last user
// will be created with the second.
2023-12-09 18:09:24 +01:00
subRouter1 := allClients [ 0 ]
subRouter2 := allClients [ 1 ]
2025-02-26 07:22:55 -08:00
subRouter3 := allClients [ 2 ]
2023-12-09 18:09:24 +01:00
2025-02-26 07:22:55 -08:00
client := allClients [ 3 ]
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
t . Logf ( "%s (%s) picked as client" , client . Hostname ( ) , client . MustID ( ) )
t . Logf ( "=== Initial Route Advertisement - Setting up HA configuration with 3 routers ===" )
2025-09-08 11:18:42 +02:00
t . Logf ( "[%s] Starting test section" , time . Now ( ) . Format ( TimestampFormat ) )
2025-08-06 08:37:02 +02:00
t . Logf ( " - Router 1 (%s): Advertising route %s - will become PRIMARY when approved" , subRouter1 . Hostname ( ) , pref . String ( ) )
t . Logf ( " - Router 2 (%s): Advertising route %s - will be STANDBY when approved" , subRouter2 . Hostname ( ) , pref . String ( ) )
t . Logf ( " - Router 3 (%s): Advertising route %s - will be STANDBY when approved" , subRouter3 . Hostname ( ) , pref . String ( ) )
t . Logf ( " Expected: All 3 routers advertise the same route for redundancy, but only one will be primary at a time" )
2025-02-26 07:22:55 -08:00
for _ , client := range allClients [ : 3 ] {
2025-03-21 11:49:32 +01:00
command := [ ] string {
"tailscale" ,
"set" ,
"--advertise-routes=" + pref . String ( ) ,
2023-12-09 18:09:24 +01:00
}
2025-03-21 11:49:32 +01:00
_ , _ , err = client . Execute ( command )
require . NoErrorf ( t , err , "failed to advertise route: %s" , err )
2023-12-09 18:09:24 +01:00
}
err = scenario . WaitForTailscaleSync ( )
assertNoErrSync ( t , err )
2025-07-05 23:30:47 +02:00
// Wait for route configuration changes after advertising routes
var nodes [ ] * v1 . Node
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
assert . Len ( c , nodes , 6 )
2025-08-06 08:37:02 +02:00
require . GreaterOrEqual ( t , len ( nodes ) , 3 , "need at least 3 nodes to avoid panic" )
2025-07-05 23:30:47 +02:00
requireNodeRouteCountWithCollect ( c , nodes [ 0 ] , 1 , 0 , 0 )
requireNodeRouteCountWithCollect ( c , nodes [ 1 ] , 1 , 0 , 0 )
requireNodeRouteCountWithCollect ( c , nodes [ 2 ] , 1 , 0 , 0 )
2025-08-06 08:37:02 +02:00
} , propagationTime , 200 * time . Millisecond , "Waiting for route advertisements: All 3 routers should have advertised routes (available=1) but none approved yet (approved=0, subnet=0)" )
2023-12-09 18:09:24 +01:00
// Verify that no routes has been sent to the client,
// they are not yet enabled.
for _ , client := range allClients {
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
status , err := client . Status ( )
assert . NoError ( c , err )
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
for _ , peerKey := range status . Peers ( ) {
peerStatus := status . Peer [ peerKey ]
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
assert . Nil ( c , peerStatus . PrimaryRoutes )
requirePeerSubnetRoutesWithCollect ( c , peerStatus , nil )
}
} , propagationTime , 200 * time . Millisecond , "Verifying no routes are active before approval" )
}
// Declare variables that will be used across multiple EventuallyWithT blocks
var (
srs1 , srs2 , srs3 * ipnstate . Status
clientStatus * ipnstate . Status
srs1PeerStatus * ipnstate . PeerStatus
srs2PeerStatus * ipnstate . PeerStatus
srs3PeerStatus * ipnstate . PeerStatus
)
// Helper function to check test failure and print route map if needed
checkFailureAndPrintRoutes := func ( t * testing . T , client TailscaleClient ) {
if t . Failed ( ) {
2025-09-08 11:18:42 +02:00
t . Logf ( "[%s] Test failed at this checkpoint" , time . Now ( ) . Format ( TimestampFormat ) )
2025-08-06 08:37:02 +02:00
status , err := client . Status ( )
if err == nil {
printCurrentRouteMap ( t , xmaps . Values ( status . Peer ) ... )
}
t . FailNow ( )
2023-12-09 18:09:24 +01:00
}
}
2025-08-06 08:37:02 +02:00
// Validate primary routes table state - no routes approved yet
validatePrimaryRoutes ( t , headscale , & routes . DebugRoutes {
AvailableRoutes : map [ types . NodeID ] [ ] netip . Prefix { } ,
PrimaryRoutes : map [ string ] types . NodeID { } , // No primary routes yet
} , "Primary routes table should be empty (no approved routes yet)" )
checkFailureAndPrintRoutes ( t , client )
2025-03-21 11:49:32 +01:00
// Enable route on node 1
2025-08-06 08:37:02 +02:00
t . Logf ( "=== Approving route on router 1 (%s) - Single router mode (no HA yet) ===" , subRouter1 . Hostname ( ) )
2025-09-08 11:18:42 +02:00
t . Logf ( "[%s] Starting test section" , time . Now ( ) . Format ( TimestampFormat ) )
2025-08-06 08:37:02 +02:00
t . Logf ( " Expected: Router 1 becomes PRIMARY with route %s active" , pref . String ( ) )
t . Logf ( " Expected: Routers 2 & 3 remain with advertised but unapproved routes" )
t . Logf ( " Expected: Client can access webservice through router 1 only" )
2025-03-21 11:49:32 +01:00
_ , err = headscale . ApproveRoutes (
2025-07-28 11:15:53 +02:00
MustFindNode ( subRouter1 . Hostname ( ) , nodes ) . GetId ( ) ,
2025-03-21 11:49:32 +01:00
[ ] netip . Prefix { pref } ,
)
require . NoError ( t , err )
2025-07-05 23:30:47 +02:00
// Wait for route approval on first subnet router
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
assert . Len ( c , nodes , 6 )
2025-08-06 08:37:02 +02:00
require . GreaterOrEqual ( t , len ( nodes ) , 3 , "need at least 3 nodes to avoid panic" )
2025-07-05 23:30:47 +02:00
requireNodeRouteCountWithCollect ( c , nodes [ 0 ] , 1 , 1 , 1 )
requireNodeRouteCountWithCollect ( c , nodes [ 1 ] , 1 , 0 , 0 )
requireNodeRouteCountWithCollect ( c , nodes [ 2 ] , 1 , 0 , 0 )
2025-08-06 08:37:02 +02:00
} , propagationTime , 200 * time . Millisecond , "Router 1 approval verification: Should be PRIMARY (available=1, approved=1, subnet=1), others still unapproved (available=1, approved=0, subnet=0)" )
2023-12-09 18:09:24 +01:00
2025-03-21 11:49:32 +01:00
// Verify that the client has routes from the primary machine and can access
// the webservice.
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
srs1 = subRouter1 . MustStatus ( )
srs2 = subRouter2 . MustStatus ( )
srs3 = subRouter3 . MustStatus ( )
clientStatus = client . MustStatus ( )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
srs1PeerStatus = clientStatus . Peer [ srs1 . Self . PublicKey ]
srs2PeerStatus = clientStatus . Peer [ srs2 . Self . PublicKey ]
srs3PeerStatus = clientStatus . Peer [ srs3 . Self . PublicKey ]
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
assert . NotNil ( c , srs1PeerStatus , "Router 1 peer should exist" )
assert . NotNil ( c , srs2PeerStatus , "Router 2 peer should exist" )
assert . NotNil ( c , srs3PeerStatus , "Router 3 peer should exist" )
if srs1PeerStatus == nil || srs2PeerStatus == nil || srs3PeerStatus == nil {
return
}
assert . True ( c , srs1PeerStatus . Online , "Router 1 should be online and serving as PRIMARY" )
assert . True ( c , srs2PeerStatus . Online , "Router 2 should be online but NOT serving routes (unapproved)" )
assert . True ( c , srs3PeerStatus . Online , "Router 3 should be online but NOT serving routes (unapproved)" )
assert . Nil ( c , srs2PeerStatus . PrimaryRoutes )
assert . Nil ( c , srs3PeerStatus . PrimaryRoutes )
assert . NotNil ( c , srs1PeerStatus . PrimaryRoutes )
requirePeerSubnetRoutesWithCollect ( c , srs1PeerStatus , [ ] netip . Prefix { pref } )
requirePeerSubnetRoutesWithCollect ( c , srs2PeerStatus , nil )
requirePeerSubnetRoutesWithCollect ( c , srs3PeerStatus , nil )
if srs1PeerStatus . PrimaryRoutes != nil {
t . Logf ( "got list: %v, want in: %v" , srs1PeerStatus . PrimaryRoutes . AsSlice ( ) , pref )
assert . Contains ( c ,
srs1PeerStatus . PrimaryRoutes . AsSlice ( ) ,
pref ,
)
}
} , propagationTime , 200 * time . Millisecond , "Verifying Router 1 is PRIMARY with routes after approval" )
t . Logf ( "=== Validating connectivity through PRIMARY router 1 (%s) to webservice at %s ===" , must . Get ( subRouter1 . IPv4 ( ) ) . String ( ) , webip . String ( ) )
2025-09-08 11:18:42 +02:00
t . Logf ( "[%s] Starting test section" , time . Now ( ) . Format ( TimestampFormat ) )
2025-08-06 08:37:02 +02:00
t . Logf ( " Expected: Traffic flows through router 1 as it's the only approved route" )
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
result , err := client . Curl ( weburl )
assert . NoError ( c , err )
assert . Len ( c , result , 13 )
} , propagationTime , 200 * time . Millisecond , "Verifying client can reach webservice through router 1" )
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
tr , err := client . Traceroute ( webip )
assert . NoError ( c , err )
ip , err := subRouter1 . IPv4 ( )
if ! assert . NoError ( c , err , "failed to get IPv4 for subRouter1" ) {
return
}
assertTracerouteViaIPWithCollect ( c , tr , ip )
} , propagationTime , 200 * time . Millisecond , "Verifying traceroute goes through router 1" )
// Validate primary routes table state - router 1 is primary
validatePrimaryRoutes ( t , headscale , & routes . DebugRoutes {
AvailableRoutes : map [ types . NodeID ] [ ] netip . Prefix {
types . NodeID ( MustFindNode ( subRouter1 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
// Note: Router 2 and 3 are available but not approved
} ,
PrimaryRoutes : map [ string ] types . NodeID {
pref . String ( ) : types . NodeID ( MustFindNode ( subRouter1 . Hostname ( ) , nodes ) . GetId ( ) ) ,
} ,
} , "Router 1 should be primary for route " + pref . String ( ) )
checkFailureAndPrintRoutes ( t , client )
2025-03-21 11:49:32 +01:00
// Enable route on node 2, now we will have a HA subnet router
2025-08-06 08:37:02 +02:00
t . Logf ( "=== Enabling High Availability by approving route on router 2 (%s) ===" , subRouter2 . Hostname ( ) )
2025-09-08 11:18:42 +02:00
t . Logf ( "[%s] Starting test section" , time . Now ( ) . Format ( TimestampFormat ) )
2025-08-06 08:37:02 +02:00
t . Logf ( " Current state: Router 1 is PRIMARY and actively serving traffic" )
t . Logf ( " Expected: Router 2 becomes STANDBY (approved but not primary)" )
t . Logf ( " Expected: Router 1 remains PRIMARY (no flapping - stability preferred)" )
t . Logf ( " Expected: HA is now active - if router 1 fails, router 2 can take over" )
2025-03-21 11:49:32 +01:00
_ , err = headscale . ApproveRoutes (
2025-07-28 11:15:53 +02:00
MustFindNode ( subRouter2 . Hostname ( ) , nodes ) . GetId ( ) ,
2025-03-21 11:49:32 +01:00
[ ] netip . Prefix { pref } ,
2023-12-09 18:09:24 +01:00
)
2025-03-21 11:49:32 +01:00
require . NoError ( t , err )
2025-07-05 23:30:47 +02:00
// Wait for route approval on second subnet router
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
assert . Len ( c , nodes , 6 )
2025-08-06 08:37:02 +02:00
if len ( nodes ) >= 3 {
requireNodeRouteCountWithCollect ( c , nodes [ 0 ] , 1 , 1 , 1 )
requireNodeRouteCountWithCollect ( c , nodes [ 1 ] , 1 , 1 , 0 )
requireNodeRouteCountWithCollect ( c , nodes [ 2 ] , 1 , 0 , 0 )
}
} , 3 * time . Second , 200 * time . Millisecond , "HA setup verification: Router 2 approved as STANDBY (available=1, approved=1, subnet=0), Router 1 stays PRIMARY (subnet=1)" )
2025-03-21 11:49:32 +01:00
// Verify that the client has routes from the primary machine
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
srs1 = subRouter1 . MustStatus ( )
srs2 = subRouter2 . MustStatus ( )
srs3 = subRouter3 . MustStatus ( )
clientStatus = client . MustStatus ( )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
srs1PeerStatus = clientStatus . Peer [ srs1 . Self . PublicKey ]
srs2PeerStatus = clientStatus . Peer [ srs2 . Self . PublicKey ]
srs3PeerStatus = clientStatus . Peer [ srs3 . Self . PublicKey ]
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
assert . NotNil ( c , srs1PeerStatus , "Router 1 peer should exist" )
assert . NotNil ( c , srs2PeerStatus , "Router 2 peer should exist" )
assert . NotNil ( c , srs3PeerStatus , "Router 3 peer should exist" )
if srs1PeerStatus == nil || srs2PeerStatus == nil || srs3PeerStatus == nil {
return
}
assert . True ( c , srs1PeerStatus . Online , "Router 1 should be online and remain PRIMARY" )
assert . True ( c , srs2PeerStatus . Online , "Router 2 should be online and now approved as STANDBY" )
assert . True ( c , srs3PeerStatus . Online , "Router 3 should be online but still unapproved" )
assert . Nil ( c , srs2PeerStatus . PrimaryRoutes )
assert . Nil ( c , srs3PeerStatus . PrimaryRoutes )
assert . NotNil ( c , srs1PeerStatus . PrimaryRoutes )
requirePeerSubnetRoutesWithCollect ( c , srs1PeerStatus , [ ] netip . Prefix { pref } )
requirePeerSubnetRoutesWithCollect ( c , srs2PeerStatus , nil )
requirePeerSubnetRoutesWithCollect ( c , srs3PeerStatus , nil )
if srs1PeerStatus . PrimaryRoutes != nil {
t . Logf ( "got list: %v, want in: %v" , srs1PeerStatus . PrimaryRoutes . AsSlice ( ) , pref )
assert . Contains ( c ,
srs1PeerStatus . PrimaryRoutes . AsSlice ( ) ,
pref ,
)
}
} , propagationTime , 200 * time . Millisecond , "Verifying Router 1 remains PRIMARY after Router 2 approval" )
// Validate primary routes table state - router 1 still primary, router 2 approved but standby
validatePrimaryRoutes ( t , headscale , & routes . DebugRoutes {
AvailableRoutes : map [ types . NodeID ] [ ] netip . Prefix {
types . NodeID ( MustFindNode ( subRouter1 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
types . NodeID ( MustFindNode ( subRouter2 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
// Note: Router 3 is available but not approved
} ,
PrimaryRoutes : map [ string ] types . NodeID {
pref . String ( ) : types . NodeID ( MustFindNode ( subRouter1 . Hostname ( ) , nodes ) . GetId ( ) ) ,
} ,
} , "Router 1 should remain primary after router 2 approval" )
checkFailureAndPrintRoutes ( t , client )
t . Logf ( "=== Validating HA configuration - Router 1 PRIMARY, Router 2 STANDBY ===" )
2025-09-08 11:18:42 +02:00
t . Logf ( "[%s] Starting test section" , time . Now ( ) . Format ( TimestampFormat ) )
2025-08-06 08:37:02 +02:00
t . Logf ( " Current routing: Traffic through router 1 (%s) to %s" , must . Get ( subRouter1 . IPv4 ( ) ) , webip . String ( ) )
t . Logf ( " Expected: Router 1 continues to handle all traffic (no change from before)" )
t . Logf ( " Expected: Router 2 is ready to take over if router 1 fails" )
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
result , err := client . Curl ( weburl )
assert . NoError ( c , err )
assert . Len ( c , result , 13 )
} , propagationTime , 200 * time . Millisecond , "Verifying client can reach webservice through router 1 in HA mode" )
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
tr , err := client . Traceroute ( webip )
assert . NoError ( c , err )
ip , err := subRouter1 . IPv4 ( )
if ! assert . NoError ( c , err , "failed to get IPv4 for subRouter1" ) {
return
}
assertTracerouteViaIPWithCollect ( c , tr , ip )
} , propagationTime , 200 * time . Millisecond , "Verifying traceroute still goes through router 1 in HA mode" )
// Validate primary routes table state - router 1 primary, router 2 approved (standby)
validatePrimaryRoutes ( t , headscale , & routes . DebugRoutes {
AvailableRoutes : map [ types . NodeID ] [ ] netip . Prefix {
types . NodeID ( MustFindNode ( subRouter1 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
types . NodeID ( MustFindNode ( subRouter2 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
// Note: Router 3 is available but not approved
} ,
PrimaryRoutes : map [ string ] types . NodeID {
pref . String ( ) : types . NodeID ( MustFindNode ( subRouter1 . Hostname ( ) , nodes ) . GetId ( ) ) ,
} ,
} , "Router 1 primary with router 2 as standby" )
checkFailureAndPrintRoutes ( t , client )
2025-03-21 11:49:32 +01:00
// Enable route on node 3, now we will have a second standby and all will
// be enabled.
2025-08-06 08:37:02 +02:00
t . Logf ( "=== Adding second STANDBY router by approving route on router 3 (%s) ===" , subRouter3 . Hostname ( ) )
2025-09-08 11:18:42 +02:00
t . Logf ( "[%s] Starting test section" , time . Now ( ) . Format ( TimestampFormat ) )
2025-08-06 08:37:02 +02:00
t . Logf ( " Current state: Router 1 PRIMARY, Router 2 STANDBY" )
t . Logf ( " Expected: Router 3 becomes second STANDBY (approved but not primary)" )
t . Logf ( " Expected: Router 1 remains PRIMARY, Router 2 remains first STANDBY" )
t . Logf ( " Expected: Full HA configuration with 1 PRIMARY + 2 STANDBY routers" )
2025-03-21 11:49:32 +01:00
_ , err = headscale . ApproveRoutes (
2025-07-28 11:15:53 +02:00
MustFindNode ( subRouter3 . Hostname ( ) , nodes ) . GetId ( ) ,
2025-03-21 11:49:32 +01:00
[ ] netip . Prefix { pref } ,
)
require . NoError ( t , err )
2025-07-05 23:30:47 +02:00
// Wait for route approval on third subnet router
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
assert . Len ( c , nodes , 6 )
2025-08-06 08:37:02 +02:00
require . GreaterOrEqual ( t , len ( nodes ) , 3 , "need at least 3 nodes to avoid panic" )
2025-07-05 23:30:47 +02:00
requireNodeRouteCountWithCollect ( c , nodes [ 0 ] , 1 , 1 , 1 )
requireNodeRouteCountWithCollect ( c , nodes [ 1 ] , 1 , 1 , 0 )
requireNodeRouteCountWithCollect ( c , nodes [ 2 ] , 1 , 1 , 0 )
2025-08-06 08:37:02 +02:00
} , 3 * time . Second , 200 * time . Millisecond , "Full HA verification: Router 3 approved as second STANDBY (available=1, approved=1, subnet=0), Router 1 PRIMARY, Router 2 first STANDBY" )
2025-03-21 11:49:32 +01:00
// Verify that the client has routes from the primary machine
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
srs1 = subRouter1 . MustStatus ( )
srs2 = subRouter2 . MustStatus ( )
srs3 = subRouter3 . MustStatus ( )
clientStatus = client . MustStatus ( )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
srs1PeerStatus = clientStatus . Peer [ srs1 . Self . PublicKey ]
srs2PeerStatus = clientStatus . Peer [ srs2 . Self . PublicKey ]
srs3PeerStatus = clientStatus . Peer [ srs3 . Self . PublicKey ]
assert . NotNil ( c , srs1PeerStatus , "Router 1 peer should exist" )
assert . NotNil ( c , srs2PeerStatus , "Router 2 peer should exist" )
assert . NotNil ( c , srs3PeerStatus , "Router 3 peer should exist" )
if srs1PeerStatus == nil || srs2PeerStatus == nil || srs3PeerStatus == nil {
return
}
assert . True ( c , srs1PeerStatus . Online , "Router 1 should be online and remain PRIMARY" )
assert . True ( c , srs2PeerStatus . Online , "Router 2 should be online as first STANDBY" )
assert . True ( c , srs3PeerStatus . Online , "Router 3 should be online as second STANDBY" )
assert . Nil ( c , srs2PeerStatus . PrimaryRoutes )
assert . Nil ( c , srs3PeerStatus . PrimaryRoutes )
assert . NotNil ( c , srs1PeerStatus . PrimaryRoutes )
requirePeerSubnetRoutesWithCollect ( c , srs1PeerStatus , [ ] netip . Prefix { pref } )
requirePeerSubnetRoutesWithCollect ( c , srs2PeerStatus , nil )
requirePeerSubnetRoutesWithCollect ( c , srs3PeerStatus , nil )
if srs1PeerStatus . PrimaryRoutes != nil {
t . Logf ( "got list: %v, want in: %v" , srs1PeerStatus . PrimaryRoutes . AsSlice ( ) , pref )
assert . Contains ( c ,
srs1PeerStatus . PrimaryRoutes . AsSlice ( ) ,
pref ,
)
}
} , propagationTime , 200 * time . Millisecond , "Verifying full HA with 3 routers: Router 1 PRIMARY, Routers 2 & 3 STANDBY" )
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
result , err := client . Curl ( weburl )
assert . NoError ( c , err )
assert . Len ( c , result , 13 )
} , propagationTime , 200 * time . Millisecond , "Verifying client can reach webservice through router 1 with full HA" )
2025-03-21 11:49:32 +01:00
2025-07-05 23:30:47 +02:00
// Wait for traceroute to work correctly through the expected router
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
tr , err := client . Traceroute ( webip )
assert . NoError ( c , err )
// Get the expected router IP - use a more robust approach to handle temporary disconnections
ips , err := subRouter1 . IPs ( )
assert . NoError ( c , err )
assert . NotEmpty ( c , ips , "subRouter1 should have IP addresses" )
var expectedIP netip . Addr
for _ , ip := range ips {
if ip . Is4 ( ) {
expectedIP = ip
break
}
}
assert . True ( c , expectedIP . IsValid ( ) , "subRouter1 should have a valid IPv4 address" )
assertTracerouteViaIPWithCollect ( c , tr , expectedIP )
2025-08-06 08:37:02 +02:00
} , 10 * time . Second , 500 * time . Millisecond , "Verifying traffic still flows through PRIMARY router 1 with full HA setup active" )
// Validate primary routes table state - all 3 routers approved, router 1 still primary
validatePrimaryRoutes ( t , headscale , & routes . DebugRoutes {
AvailableRoutes : map [ types . NodeID ] [ ] netip . Prefix {
types . NodeID ( MustFindNode ( subRouter1 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
types . NodeID ( MustFindNode ( subRouter2 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
types . NodeID ( MustFindNode ( subRouter3 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
} ,
PrimaryRoutes : map [ string ] types . NodeID {
pref . String ( ) : types . NodeID ( MustFindNode ( subRouter1 . Hostname ( ) , nodes ) . GetId ( ) ) ,
} ,
} , "Router 1 primary with all 3 routers approved" )
checkFailureAndPrintRoutes ( t , client )
2023-12-09 18:09:24 +01:00
// Take down the current primary
2025-08-06 08:37:02 +02:00
t . Logf ( "=== FAILOVER TEST: Taking down PRIMARY router 1 (%s) ===" , subRouter1 . Hostname ( ) )
2025-09-08 11:18:42 +02:00
t . Logf ( "[%s] Starting test section" , time . Now ( ) . Format ( TimestampFormat ) )
2025-08-06 08:37:02 +02:00
t . Logf ( " Current state: Router 1 PRIMARY (serving traffic), Router 2 & 3 STANDBY" )
t . Logf ( " Action: Shutting down router 1 to simulate failure" )
t . Logf ( " Expected: Router 2 (%s) should automatically become new PRIMARY" , subRouter2 . Hostname ( ) )
t . Logf ( " Expected: Router 3 remains STANDBY" )
t . Logf ( " Expected: Traffic seamlessly fails over to router 2" )
2023-12-09 18:09:24 +01:00
err = subRouter1 . Down ( )
2025-02-26 07:22:55 -08:00
require . NoError ( t , err )
2023-12-09 18:09:24 +01:00
2025-07-05 23:30:47 +02:00
// Wait for router status changes after r1 goes down
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
srs2 = subRouter2 . MustStatus ( )
clientStatus = client . MustStatus ( )
2023-12-09 18:09:24 +01:00
2025-07-05 23:30:47 +02:00
srs1PeerStatus = clientStatus . Peer [ srs1 . Self . PublicKey ]
srs2PeerStatus = clientStatus . Peer [ srs2 . Self . PublicKey ]
srs3PeerStatus = clientStatus . Peer [ srs3 . Self . PublicKey ]
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
assert . NotNil ( c , srs1PeerStatus , "Router 1 peer should exist" )
assert . NotNil ( c , srs2PeerStatus , "Router 2 peer should exist" )
assert . NotNil ( c , srs3PeerStatus , "Router 3 peer should exist" )
if srs1PeerStatus == nil || srs2PeerStatus == nil || srs3PeerStatus == nil {
return
}
2025-07-05 23:30:47 +02:00
assert . False ( c , srs1PeerStatus . Online , "r1 should be offline" )
assert . True ( c , srs2PeerStatus . Online , "r2 should be online" )
assert . True ( c , srs3PeerStatus . Online , "r3 should be online" )
2024-02-23 10:59:24 +01:00
2025-08-06 08:37:02 +02:00
assert . Nil ( c , srs1PeerStatus . PrimaryRoutes )
assert . NotNil ( c , srs2PeerStatus . PrimaryRoutes )
assert . Nil ( c , srs3PeerStatus . PrimaryRoutes )
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
requirePeerSubnetRoutesWithCollect ( c , srs1PeerStatus , nil )
requirePeerSubnetRoutesWithCollect ( c , srs2PeerStatus , [ ] netip . Prefix { pref } )
requirePeerSubnetRoutesWithCollect ( c , srs3PeerStatus , nil )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
if srs2PeerStatus . PrimaryRoutes != nil {
assert . Contains ( c ,
srs2PeerStatus . PrimaryRoutes . AsSlice ( ) ,
pref ,
)
}
} , propagationTime , 200 * time . Millisecond , "Failover verification: Router 1 offline, Router 2 should be new PRIMARY with routes, Router 3 still STANDBY" )
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
result , err := client . Curl ( weburl )
assert . NoError ( c , err )
assert . Len ( c , result , 13 )
} , propagationTime , 200 * time . Millisecond , "Verifying client can reach webservice through router 2 after failover" )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
tr , err := client . Traceroute ( webip )
assert . NoError ( c , err )
ip , err := subRouter2 . IPv4 ( )
if ! assert . NoError ( c , err , "failed to get IPv4 for subRouter2" ) {
return
}
assertTracerouteViaIPWithCollect ( c , tr , ip )
} , propagationTime , 200 * time . Millisecond , "Verifying traceroute goes through router 2 after failover" )
// Validate primary routes table state - router 2 is now primary after router 1 failure
validatePrimaryRoutes ( t , headscale , & routes . DebugRoutes {
AvailableRoutes : map [ types . NodeID ] [ ] netip . Prefix {
// Router 1 is disconnected, so not in AvailableRoutes
types . NodeID ( MustFindNode ( subRouter2 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
types . NodeID ( MustFindNode ( subRouter3 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
} ,
PrimaryRoutes : map [ string ] types . NodeID {
pref . String ( ) : types . NodeID ( MustFindNode ( subRouter2 . Hostname ( ) , nodes ) . GetId ( ) ) ,
} ,
} , "Router 2 should be primary after router 1 failure" )
checkFailureAndPrintRoutes ( t , client )
2025-03-21 11:49:32 +01:00
2023-12-09 18:09:24 +01:00
// Take down subnet router 2, leaving none available
2025-08-06 08:37:02 +02:00
t . Logf ( "=== FAILOVER TEST: Taking down NEW PRIMARY router 2 (%s) ===" , subRouter2 . Hostname ( ) )
2025-09-08 11:18:42 +02:00
t . Logf ( "[%s] Starting test section" , time . Now ( ) . Format ( TimestampFormat ) )
2025-08-06 08:37:02 +02:00
t . Logf ( " Current state: Router 1 OFFLINE, Router 2 PRIMARY (serving traffic), Router 3 STANDBY" )
t . Logf ( " Action: Shutting down router 2 to simulate cascading failure" )
t . Logf ( " Expected: Router 3 (%s) should become new PRIMARY (last remaining router)" , subRouter3 . Hostname ( ) )
t . Logf ( " Expected: With only 1 router left, HA is effectively disabled" )
t . Logf ( " Expected: Traffic continues through router 3" )
2023-12-09 18:09:24 +01:00
err = subRouter2 . Down ( )
2025-02-26 07:22:55 -08:00
require . NoError ( t , err )
2023-12-09 18:09:24 +01:00
2025-07-05 23:30:47 +02:00
// Wait for router status changes after r2 goes down
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
clientStatus , err = client . Status ( )
assert . NoError ( c , err )
2023-12-09 18:09:24 +01:00
2025-07-05 23:30:47 +02:00
srs1PeerStatus = clientStatus . Peer [ srs1 . Self . PublicKey ]
srs2PeerStatus = clientStatus . Peer [ srs2 . Self . PublicKey ]
srs3PeerStatus = clientStatus . Peer [ srs3 . Self . PublicKey ]
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
assert . NotNil ( c , srs1PeerStatus , "Router 1 peer should exist" )
assert . NotNil ( c , srs2PeerStatus , "Router 2 peer should exist" )
assert . NotNil ( c , srs3PeerStatus , "Router 3 peer should exist" )
2024-02-23 10:59:24 +01:00
2025-08-06 08:37:02 +02:00
if srs1PeerStatus == nil || srs2PeerStatus == nil || srs3PeerStatus == nil {
return
}
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
assert . False ( c , srs1PeerStatus . Online , "Router 1 should still be offline" )
assert . False ( c , srs2PeerStatus . Online , "Router 2 should now be offline after failure" )
assert . True ( c , srs3PeerStatus . Online , "Router 3 should be online and taking over as PRIMARY" )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
assert . Nil ( c , srs1PeerStatus . PrimaryRoutes )
assert . Nil ( c , srs2PeerStatus . PrimaryRoutes )
assert . NotNil ( c , srs3PeerStatus . PrimaryRoutes )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
requirePeerSubnetRoutesWithCollect ( c , srs1PeerStatus , nil )
requirePeerSubnetRoutesWithCollect ( c , srs2PeerStatus , nil )
requirePeerSubnetRoutesWithCollect ( c , srs3PeerStatus , [ ] netip . Prefix { pref } )
} , propagationTime , 200 * time . Millisecond , "Second failover verification: Router 1 & 2 offline, Router 3 should be new PRIMARY (last router standing) with routes" )
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
result , err := client . Curl ( weburl )
assert . NoError ( c , err )
assert . Len ( c , result , 13 )
} , propagationTime , 200 * time . Millisecond , "Verifying client can reach webservice through router 3 after second failover" )
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
tr , err := client . Traceroute ( webip )
assert . NoError ( c , err )
ip , err := subRouter3 . IPv4 ( )
if ! assert . NoError ( c , err , "failed to get IPv4 for subRouter3" ) {
return
}
assertTracerouteViaIPWithCollect ( c , tr , ip )
} , propagationTime , 200 * time . Millisecond , "Verifying traceroute goes through router 3 after second failover" )
// Validate primary routes table state - router 3 is now primary after router 2 failure
validatePrimaryRoutes ( t , headscale , & routes . DebugRoutes {
AvailableRoutes : map [ types . NodeID ] [ ] netip . Prefix {
// Routers 1 and 2 are disconnected, so not in AvailableRoutes
types . NodeID ( MustFindNode ( subRouter3 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
} ,
PrimaryRoutes : map [ string ] types . NodeID {
pref . String ( ) : types . NodeID ( MustFindNode ( subRouter3 . Hostname ( ) , nodes ) . GetId ( ) ) ,
} ,
} , "Router 3 should be primary after router 2 failure" )
checkFailureAndPrintRoutes ( t , client )
2023-12-09 18:09:24 +01:00
// Bring up subnet router 1, making the route available from there.
2025-08-06 08:37:02 +02:00
t . Logf ( "=== RECOVERY TEST: Bringing router 1 (%s) back online ===" , subRouter1 . Hostname ( ) )
2025-09-08 11:18:42 +02:00
t . Logf ( "[%s] Starting test section" , time . Now ( ) . Format ( TimestampFormat ) )
2025-08-06 08:37:02 +02:00
t . Logf ( " Current state: Router 1 OFFLINE, Router 2 OFFLINE, Router 3 PRIMARY (only router)" )
t . Logf ( " Action: Starting router 1 to restore HA capability" )
t . Logf ( " Expected: Router 3 remains PRIMARY (stability - no unnecessary failover)" )
t . Logf ( " Expected: Router 1 becomes STANDBY (ready for HA)" )
t . Logf ( " Expected: HA is restored with 2 routers available" )
2023-12-09 18:09:24 +01:00
err = subRouter1 . Up ( )
2025-02-26 07:22:55 -08:00
require . NoError ( t , err )
2023-12-09 18:09:24 +01:00
2025-07-05 23:30:47 +02:00
// Wait for router status changes after r1 comes back up
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
clientStatus , err = client . Status ( )
assert . NoError ( c , err )
2023-12-09 18:09:24 +01:00
2025-07-05 23:30:47 +02:00
srs1PeerStatus = clientStatus . Peer [ srs1 . Self . PublicKey ]
srs2PeerStatus = clientStatus . Peer [ srs2 . Self . PublicKey ]
srs3PeerStatus = clientStatus . Peer [ srs3 . Self . PublicKey ]
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
assert . NotNil ( c , srs1PeerStatus , "Router 1 peer should exist" )
assert . NotNil ( c , srs2PeerStatus , "Router 2 peer should exist" )
assert . NotNil ( c , srs3PeerStatus , "Router 3 peer should exist" )
2024-02-23 10:59:24 +01:00
2025-08-06 08:37:02 +02:00
if srs1PeerStatus == nil || srs2PeerStatus == nil || srs3PeerStatus == nil {
return
}
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
assert . True ( c , srs1PeerStatus . Online , "Router 1 should be back online as STANDBY" )
assert . False ( c , srs2PeerStatus . Online , "Router 2 should still be offline" )
assert . True ( c , srs3PeerStatus . Online , "Router 3 should remain online as PRIMARY" )
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
assert . Nil ( c , srs1PeerStatus . PrimaryRoutes )
assert . Nil ( c , srs2PeerStatus . PrimaryRoutes )
assert . NotNil ( c , srs3PeerStatus . PrimaryRoutes )
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
requirePeerSubnetRoutesWithCollect ( c , srs1PeerStatus , nil )
requirePeerSubnetRoutesWithCollect ( c , srs2PeerStatus , nil )
requirePeerSubnetRoutesWithCollect ( c , srs3PeerStatus , [ ] netip . Prefix { pref } )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
if srs3PeerStatus . PrimaryRoutes != nil {
assert . Contains ( c ,
srs3PeerStatus . PrimaryRoutes . AsSlice ( ) ,
pref ,
)
}
} , propagationTime , 200 * time . Millisecond , "Recovery verification: Router 1 back online as STANDBY, Router 3 remains PRIMARY (no flapping) with routes" )
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
result , err := client . Curl ( weburl )
assert . NoError ( c , err )
assert . Len ( c , result , 13 )
} , propagationTime , 200 * time . Millisecond , "Verifying client can still reach webservice through router 3 after router 1 recovery" )
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
tr , err := client . Traceroute ( webip )
assert . NoError ( c , err )
ip , err := subRouter3 . IPv4 ( )
if ! assert . NoError ( c , err , "failed to get IPv4 for subRouter3" ) {
return
}
assertTracerouteViaIPWithCollect ( c , tr , ip )
} , propagationTime , 200 * time . Millisecond , "Verifying traceroute still goes through router 3 after router 1 recovery" )
// Validate primary routes table state - router 3 remains primary after router 1 comes back
validatePrimaryRoutes ( t , headscale , & routes . DebugRoutes {
AvailableRoutes : map [ types . NodeID ] [ ] netip . Prefix {
types . NodeID ( MustFindNode ( subRouter1 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
// Router 2 is still disconnected
types . NodeID ( MustFindNode ( subRouter3 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
} ,
PrimaryRoutes : map [ string ] types . NodeID {
pref . String ( ) : types . NodeID ( MustFindNode ( subRouter3 . Hostname ( ) , nodes ) . GetId ( ) ) ,
} ,
} , "Router 3 should remain primary after router 1 recovery" )
checkFailureAndPrintRoutes ( t , client )
2025-03-21 11:49:32 +01:00
2023-12-09 18:09:24 +01:00
// Bring up subnet router 2, should result in no change.
2025-08-06 08:37:02 +02:00
t . Logf ( "=== FULL RECOVERY TEST: Bringing router 2 (%s) back online ===" , subRouter2 . Hostname ( ) )
2025-09-08 11:18:42 +02:00
t . Logf ( "[%s] Starting test section" , time . Now ( ) . Format ( TimestampFormat ) )
2025-08-06 08:37:02 +02:00
t . Logf ( " Current state: Router 1 STANDBY, Router 2 OFFLINE, Router 3 PRIMARY" )
t . Logf ( " Action: Starting router 2 to restore full HA (3 routers)" )
t . Logf ( " Expected: Router 3 (%s) remains PRIMARY (stability - avoid unnecessary failovers)" , subRouter3 . Hostname ( ) )
t . Logf ( " Expected: Router 1 (%s) remains first STANDBY" , subRouter1 . Hostname ( ) )
t . Logf ( " Expected: Router 2 (%s) becomes second STANDBY" , subRouter2 . Hostname ( ) )
t . Logf ( " Expected: Full HA restored with all 3 routers online" )
2023-12-09 18:09:24 +01:00
err = subRouter2 . Up ( )
2025-02-26 07:22:55 -08:00
require . NoError ( t , err )
2023-12-09 18:09:24 +01:00
2025-07-05 23:30:47 +02:00
// Wait for nodestore batch processing to complete and online status to be updated
// NodeStore batching timeout is 500ms, so we wait up to 10 seconds for all routers to be online
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
clientStatus , err = client . Status ( )
assert . NoError ( c , err )
2023-12-09 18:09:24 +01:00
2025-07-05 23:30:47 +02:00
srs1PeerStatus = clientStatus . Peer [ srs1 . Self . PublicKey ]
srs2PeerStatus = clientStatus . Peer [ srs2 . Self . PublicKey ]
srs3PeerStatus = clientStatus . Peer [ srs3 . Self . PublicKey ]
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
assert . NotNil ( c , srs1PeerStatus , "Router 1 peer should exist" )
assert . NotNil ( c , srs2PeerStatus , "Router 2 peer should exist" )
assert . NotNil ( c , srs3PeerStatus , "Router 3 peer should exist" )
2024-02-23 10:59:24 +01:00
2025-08-06 08:37:02 +02:00
if srs1PeerStatus == nil || srs2PeerStatus == nil || srs3PeerStatus == nil {
return
}
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
assert . True ( c , srs1PeerStatus . Online , "Router 1 should be online as STANDBY" )
assert . True ( c , srs2PeerStatus . Online , "Router 2 should be back online as STANDBY" )
assert . True ( c , srs3PeerStatus . Online , "Router 3 should remain online as PRIMARY" )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
assert . Nil ( c , srs1PeerStatus . PrimaryRoutes )
assert . Nil ( c , srs2PeerStatus . PrimaryRoutes )
assert . NotNil ( c , srs3PeerStatus . PrimaryRoutes )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
requirePeerSubnetRoutesWithCollect ( c , srs1PeerStatus , nil )
requirePeerSubnetRoutesWithCollect ( c , srs2PeerStatus , nil )
requirePeerSubnetRoutesWithCollect ( c , srs3PeerStatus , [ ] netip . Prefix { pref } )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
if srs3PeerStatus . PrimaryRoutes != nil {
assert . Contains ( c ,
srs3PeerStatus . PrimaryRoutes . AsSlice ( ) ,
pref ,
)
}
} , 10 * time . Second , 500 * time . Millisecond , "Full recovery verification: All 3 routers online, Router 3 remains PRIMARY (no flapping) with routes" )
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
result , err := client . Curl ( weburl )
assert . NoError ( c , err )
assert . Len ( c , result , 13 )
} , propagationTime , 200 * time . Millisecond , "Verifying client can reach webservice through router 3 after full recovery" )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
tr , err := client . Traceroute ( webip )
assert . NoError ( c , err )
ip , err := subRouter3 . IPv4 ( )
if ! assert . NoError ( c , err , "failed to get IPv4 for subRouter3" ) {
return
}
assertTracerouteViaIPWithCollect ( c , tr , ip )
} , propagationTime , 200 * time . Millisecond , "Verifying traceroute goes through router 3 after full recovery" )
// Validate primary routes table state - router 3 remains primary after all routers back online
validatePrimaryRoutes ( t , headscale , & routes . DebugRoutes {
AvailableRoutes : map [ types . NodeID ] [ ] netip . Prefix {
types . NodeID ( MustFindNode ( subRouter1 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
types . NodeID ( MustFindNode ( subRouter2 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
types . NodeID ( MustFindNode ( subRouter3 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
} ,
PrimaryRoutes : map [ string ] types . NodeID {
pref . String ( ) : types . NodeID ( MustFindNode ( subRouter3 . Hostname ( ) , nodes ) . GetId ( ) ) ,
} ,
} , "Router 3 should remain primary after full recovery" )
checkFailureAndPrintRoutes ( t , client )
t . Logf ( "=== ROUTE DISABLE TEST: Removing approved route from PRIMARY router 3 (%s) ===" , subRouter3 . Hostname ( ) )
2025-09-08 11:18:42 +02:00
t . Logf ( "[%s] Starting test section" , time . Now ( ) . Format ( TimestampFormat ) )
2025-08-06 08:37:02 +02:00
t . Logf ( " Current state: Router 1 STANDBY, Router 2 STANDBY, Router 3 PRIMARY" )
t . Logf ( " Action: Disabling route approval on router 3 (route still advertised but not approved)" )
t . Logf ( " Expected: Router 1 (%s) should become new PRIMARY (lowest ID with approved route)" , subRouter1 . Hostname ( ) )
t . Logf ( " Expected: Router 2 (%s) remains STANDBY" , subRouter2 . Hostname ( ) )
t . Logf ( " Expected: Router 3 (%s) goes to advertised-only state (no longer serving)" , subRouter3 . Hostname ( ) )
2025-07-28 11:15:53 +02:00
_ , err = headscale . ApproveRoutes ( MustFindNode ( subRouter3 . Hostname ( ) , nodes ) . GetId ( ) , [ ] netip . Prefix { } )
2025-03-21 11:49:32 +01:00
2025-07-05 23:30:47 +02:00
// Wait for nodestore batch processing and route state changes to complete
// NodeStore batching timeout is 500ms, so we wait up to 10 seconds for route failover
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
assert . Len ( c , nodes , 6 )
2025-03-21 11:49:32 +01:00
2025-07-05 23:30:47 +02:00
// After disabling route on r3, r1 should become primary with 1 subnet route
requireNodeRouteCountWithCollect ( c , MustFindNode ( subRouter1 . Hostname ( ) , nodes ) , 1 , 1 , 1 )
requireNodeRouteCountWithCollect ( c , MustFindNode ( subRouter2 . Hostname ( ) , nodes ) , 1 , 1 , 0 )
requireNodeRouteCountWithCollect ( c , MustFindNode ( subRouter3 . Hostname ( ) , nodes ) , 1 , 0 , 0 )
2025-08-06 08:37:02 +02:00
} , 10 * time . Second , 500 * time . Millisecond , "Route disable verification: Router 3 route disabled, Router 1 should be new PRIMARY, Router 2 STANDBY" )
2025-03-21 11:49:32 +01:00
// Verify that the route is announced from subnet router 1
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
clientStatus , err = client . Status ( )
assert . NoError ( c , err )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
srs1PeerStatus = clientStatus . Peer [ srs1 . Self . PublicKey ]
srs2PeerStatus = clientStatus . Peer [ srs2 . Self . PublicKey ]
srs3PeerStatus = clientStatus . Peer [ srs3 . Self . PublicKey ]
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
assert . NotNil ( c , srs1PeerStatus , "Router 1 peer should exist" )
assert . NotNil ( c , srs2PeerStatus , "Router 2 peer should exist" )
assert . NotNil ( c , srs3PeerStatus , "Router 3 peer should exist" )
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
if srs1PeerStatus == nil || srs2PeerStatus == nil || srs3PeerStatus == nil {
return
}
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
assert . NotNil ( c , srs1PeerStatus . PrimaryRoutes )
assert . Nil ( c , srs2PeerStatus . PrimaryRoutes )
assert . Nil ( c , srs3PeerStatus . PrimaryRoutes )
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
requirePeerSubnetRoutesWithCollect ( c , srs1PeerStatus , [ ] netip . Prefix { pref } )
requirePeerSubnetRoutesWithCollect ( c , srs2PeerStatus , nil )
requirePeerSubnetRoutesWithCollect ( c , srs3PeerStatus , nil )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
if srs1PeerStatus . PrimaryRoutes != nil {
assert . Contains ( c ,
srs1PeerStatus . PrimaryRoutes . AsSlice ( ) ,
pref ,
)
}
} , propagationTime , 200 * time . Millisecond , "Verifying Router 1 becomes PRIMARY after Router 3 route disabled" )
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
result , err := client . Curl ( weburl )
assert . NoError ( c , err )
assert . Len ( c , result , 13 )
} , propagationTime , 200 * time . Millisecond , "Verifying client can reach webservice through router 1 after route disable" )
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
tr , err := client . Traceroute ( webip )
assert . NoError ( c , err )
ip , err := subRouter1 . IPv4 ( )
if ! assert . NoError ( c , err , "failed to get IPv4 for subRouter1" ) {
return
}
assertTracerouteViaIPWithCollect ( c , tr , ip )
} , propagationTime , 200 * time . Millisecond , "Verifying traceroute goes through router 1 after route disable" )
// Validate primary routes table state - router 1 is primary after router 3 route disabled
validatePrimaryRoutes ( t , headscale , & routes . DebugRoutes {
AvailableRoutes : map [ types . NodeID ] [ ] netip . Prefix {
types . NodeID ( MustFindNode ( subRouter1 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
types . NodeID ( MustFindNode ( subRouter2 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
// Router 3's route is no longer approved, so not in AvailableRoutes
} ,
PrimaryRoutes : map [ string ] types . NodeID {
pref . String ( ) : types . NodeID ( MustFindNode ( subRouter1 . Hostname ( ) , nodes ) . GetId ( ) ) ,
} ,
} , "Router 1 should be primary after router 3 route disabled" )
checkFailureAndPrintRoutes ( t , client )
2025-03-21 11:49:32 +01:00
2023-12-09 18:09:24 +01:00
// Disable the route of subnet router 1, making it failover to 2
2025-08-06 08:37:02 +02:00
t . Logf ( "=== ROUTE DISABLE TEST: Removing approved route from NEW PRIMARY router 1 (%s) ===" , subRouter1 . Hostname ( ) )
2025-09-08 11:18:42 +02:00
t . Logf ( "[%s] Starting test section" , time . Now ( ) . Format ( TimestampFormat ) )
2025-08-06 08:37:02 +02:00
t . Logf ( " Current state: Router 1 PRIMARY, Router 2 STANDBY, Router 3 advertised-only" )
t . Logf ( " Action: Disabling route approval on router 1" )
t . Logf ( " Expected: Router 2 (%s) should become new PRIMARY (only remaining approved route)" , subRouter2 . Hostname ( ) )
t . Logf ( " Expected: Router 1 (%s) goes to advertised-only state" , subRouter1 . Hostname ( ) )
t . Logf ( " Expected: Router 3 (%s) remains advertised-only" , subRouter3 . Hostname ( ) )
2025-07-28 11:15:53 +02:00
_ , err = headscale . ApproveRoutes ( MustFindNode ( subRouter1 . Hostname ( ) , nodes ) . GetId ( ) , [ ] netip . Prefix { } )
2023-12-09 18:09:24 +01:00
2025-07-05 23:30:47 +02:00
// Wait for nodestore batch processing and route state changes to complete
// NodeStore batching timeout is 500ms, so we wait up to 10 seconds for route failover
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
assert . Len ( c , nodes , 6 )
2023-12-09 18:09:24 +01:00
2025-07-05 23:30:47 +02:00
// After disabling route on r1, r2 should become primary with 1 subnet route
requireNodeRouteCountWithCollect ( c , MustFindNode ( subRouter1 . Hostname ( ) , nodes ) , 1 , 0 , 0 )
requireNodeRouteCountWithCollect ( c , MustFindNode ( subRouter2 . Hostname ( ) , nodes ) , 1 , 1 , 1 )
requireNodeRouteCountWithCollect ( c , MustFindNode ( subRouter3 . Hostname ( ) , nodes ) , 1 , 0 , 0 )
2025-08-06 08:37:02 +02:00
} , 10 * time . Second , 500 * time . Millisecond , "Second route disable verification: Router 1 route disabled, Router 2 should be new PRIMARY" )
2023-12-09 18:09:24 +01:00
// Verify that the route is announced from subnet router 1
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
clientStatus , err = client . Status ( )
assert . NoError ( c , err )
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
srs1PeerStatus = clientStatus . Peer [ srs1 . Self . PublicKey ]
srs2PeerStatus = clientStatus . Peer [ srs2 . Self . PublicKey ]
srs3PeerStatus = clientStatus . Peer [ srs3 . Self . PublicKey ]
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
assert . NotNil ( c , srs1PeerStatus , "Router 1 peer should exist" )
assert . NotNil ( c , srs2PeerStatus , "Router 2 peer should exist" )
assert . NotNil ( c , srs3PeerStatus , "Router 3 peer should exist" )
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
if srs1PeerStatus == nil || srs2PeerStatus == nil || srs3PeerStatus == nil {
return
}
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
assert . Nil ( c , srs1PeerStatus . PrimaryRoutes )
assert . NotNil ( c , srs2PeerStatus . PrimaryRoutes )
assert . Nil ( c , srs3PeerStatus . PrimaryRoutes )
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
requirePeerSubnetRoutesWithCollect ( c , srs1PeerStatus , nil )
requirePeerSubnetRoutesWithCollect ( c , srs2PeerStatus , [ ] netip . Prefix { pref } )
requirePeerSubnetRoutesWithCollect ( c , srs3PeerStatus , nil )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
if srs2PeerStatus . PrimaryRoutes != nil {
assert . Contains ( c ,
srs2PeerStatus . PrimaryRoutes . AsSlice ( ) ,
pref ,
)
}
} , propagationTime , 200 * time . Millisecond , "Verifying Router 2 becomes PRIMARY after Router 1 route disabled" )
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
result , err := client . Curl ( weburl )
assert . NoError ( c , err )
assert . Len ( c , result , 13 )
} , propagationTime , 200 * time . Millisecond , "Verifying client can reach webservice through router 2 after second route disable" )
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
tr , err := client . Traceroute ( webip )
assert . NoError ( c , err )
ip , err := subRouter2 . IPv4 ( )
if ! assert . NoError ( c , err , "failed to get IPv4 for subRouter2" ) {
return
}
assertTracerouteViaIPWithCollect ( c , tr , ip )
} , propagationTime , 200 * time . Millisecond , "Verifying traceroute goes through router 2 after second route disable" )
// Validate primary routes table state - router 2 is primary after router 1 route disabled
validatePrimaryRoutes ( t , headscale , & routes . DebugRoutes {
AvailableRoutes : map [ types . NodeID ] [ ] netip . Prefix {
// Router 1's route is no longer approved, so not in AvailableRoutes
types . NodeID ( MustFindNode ( subRouter2 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
// Router 3's route is still not approved
} ,
PrimaryRoutes : map [ string ] types . NodeID {
pref . String ( ) : types . NodeID ( MustFindNode ( subRouter2 . Hostname ( ) , nodes ) . GetId ( ) ) ,
} ,
} , "Router 2 should be primary after router 1 route disabled" )
checkFailureAndPrintRoutes ( t , client )
2025-03-21 11:49:32 +01:00
2023-12-09 18:09:24 +01:00
// enable the route of subnet router 1, no change expected
2025-08-06 08:37:02 +02:00
t . Logf ( "=== ROUTE RE-ENABLE TEST: Re-approving route on router 1 (%s) ===" , subRouter1 . Hostname ( ) )
2025-09-08 11:18:42 +02:00
t . Logf ( "[%s] Starting test section" , time . Now ( ) . Format ( TimestampFormat ) )
2025-08-06 08:37:02 +02:00
t . Logf ( " Current state: Router 1 advertised-only, Router 2 PRIMARY, Router 3 advertised-only" )
t . Logf ( " Action: Re-enabling route approval on router 1" )
t . Logf ( " Expected: Router 2 (%s) remains PRIMARY (stability - no unnecessary flapping)" , subRouter2 . Hostname ( ) )
t . Logf ( " Expected: Router 1 (%s) becomes STANDBY (approved but not primary)" , subRouter1 . Hostname ( ) )
t . Logf ( " Expected: HA fully restored with Router 2 PRIMARY and Router 1 STANDBY" )
2025-07-28 11:15:53 +02:00
r1Node := MustFindNode ( subRouter1 . Hostname ( ) , nodes )
2025-02-26 07:22:55 -08:00
_ , err = headscale . ApproveRoutes (
2025-07-28 11:15:53 +02:00
r1Node . GetId ( ) ,
util . MustStringsToPrefixes ( r1Node . GetAvailableRoutes ( ) ) ,
2025-02-26 07:22:55 -08:00
)
2023-12-09 18:09:24 +01:00
2025-07-05 23:30:47 +02:00
// Wait for route state changes after re-enabling r1
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
assert . Len ( c , nodes , 6 )
2023-12-09 18:09:24 +01:00
2025-07-05 23:30:47 +02:00
requireNodeRouteCountWithCollect ( c , MustFindNode ( subRouter1 . Hostname ( ) , nodes ) , 1 , 1 , 0 )
requireNodeRouteCountWithCollect ( c , MustFindNode ( subRouter2 . Hostname ( ) , nodes ) , 1 , 1 , 1 )
requireNodeRouteCountWithCollect ( c , MustFindNode ( subRouter3 . Hostname ( ) , nodes ) , 1 , 0 , 0 )
2025-08-06 08:37:02 +02:00
} , propagationTime , 200 * time . Millisecond , "Re-enable verification: Router 1 approved as STANDBY, Router 2 remains PRIMARY (no flapping), full HA restored" )
2023-12-09 18:09:24 +01:00
// Verify that the route is announced from subnet router 1
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
clientStatus , err = client . Status ( )
assert . NoError ( c , err )
srs1PeerStatus = clientStatus . Peer [ srs1 . Self . PublicKey ]
srs2PeerStatus = clientStatus . Peer [ srs2 . Self . PublicKey ]
srs3PeerStatus = clientStatus . Peer [ srs3 . Self . PublicKey ]
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
assert . NotNil ( c , srs1PeerStatus , "Router 1 peer should exist" )
assert . NotNil ( c , srs2PeerStatus , "Router 2 peer should exist" )
assert . NotNil ( c , srs3PeerStatus , "Router 3 peer should exist" )
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
if srs1PeerStatus == nil || srs2PeerStatus == nil || srs3PeerStatus == nil {
return
}
2023-12-09 18:09:24 +01:00
2025-08-06 08:37:02 +02:00
assert . Nil ( c , srs1PeerStatus . PrimaryRoutes )
assert . NotNil ( c , srs2PeerStatus . PrimaryRoutes )
assert . Nil ( c , srs3PeerStatus . PrimaryRoutes )
if srs2PeerStatus . PrimaryRoutes != nil {
assert . Contains ( c ,
srs2PeerStatus . PrimaryRoutes . AsSlice ( ) ,
pref ,
)
}
} , propagationTime , 200 * time . Millisecond , "Verifying Router 2 remains PRIMARY after Router 1 route re-enabled" )
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
result , err := client . Curl ( weburl )
assert . NoError ( c , err )
assert . Len ( c , result , 13 )
} , propagationTime , 200 * time . Millisecond , "Verifying client can reach webservice through router 2 after route re-enable" )
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
tr , err := client . Traceroute ( webip )
assert . NoError ( c , err )
ip , err := subRouter2 . IPv4 ( )
if ! assert . NoError ( c , err , "failed to get IPv4 for subRouter2" ) {
return
}
assertTracerouteViaIPWithCollect ( c , tr , ip )
} , propagationTime , 200 * time . Millisecond , "Verifying traceroute still goes through router 2 after route re-enable" )
// Validate primary routes table state after router 1 re-approval
validatePrimaryRoutes ( t , headscale , & routes . DebugRoutes {
AvailableRoutes : map [ types . NodeID ] [ ] netip . Prefix {
types . NodeID ( MustFindNode ( subRouter1 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
types . NodeID ( MustFindNode ( subRouter2 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
// Router 3 route is still not approved
} ,
PrimaryRoutes : map [ string ] types . NodeID {
pref . String ( ) : types . NodeID ( MustFindNode ( subRouter2 . Hostname ( ) , nodes ) . GetId ( ) ) ,
} ,
} , "Router 2 should remain primary after router 1 re-approval" )
checkFailureAndPrintRoutes ( t , client )
// Enable route on node 3, we now have all routes re-enabled
t . Logf ( "=== ROUTE RE-ENABLE TEST: Re-approving route on router 3 (%s) - Full HA Restoration ===" , subRouter3 . Hostname ( ) )
2025-09-08 11:18:42 +02:00
t . Logf ( "[%s] Starting test section" , time . Now ( ) . Format ( TimestampFormat ) )
2025-08-06 08:37:02 +02:00
t . Logf ( " Current state: Router 1 STANDBY, Router 2 PRIMARY, Router 3 advertised-only" )
t . Logf ( " Action: Re-enabling route approval on router 3" )
t . Logf ( " Expected: Router 2 (%s) remains PRIMARY (stability preferred)" , subRouter2 . Hostname ( ) )
t . Logf ( " Expected: Routers 1 & 3 are both STANDBY" )
t . Logf ( " Expected: Full HA restored with all 3 routers available" )
r3Node := MustFindNode ( subRouter3 . Hostname ( ) , nodes )
_ , err = headscale . ApproveRoutes (
r3Node . GetId ( ) ,
util . MustStringsToPrefixes ( r3Node . GetAvailableRoutes ( ) ) ,
2023-12-09 18:09:24 +01:00
)
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
// Wait for route state changes after re-enabling r3
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
assert . Len ( c , nodes , 6 )
require . GreaterOrEqual ( t , len ( nodes ) , 3 , "need at least 3 nodes to avoid panic" )
// After router 3 re-approval: Router 2 remains PRIMARY, Routers 1&3 are STANDBY
// SubnetRoutes should only show routes for PRIMARY node (actively serving)
requireNodeRouteCountWithCollect ( c , nodes [ 0 ] , 1 , 1 , 0 ) // Router 1: STANDBY (available, approved, but not serving)
requireNodeRouteCountWithCollect ( c , nodes [ 1 ] , 1 , 1 , 1 ) // Router 2: PRIMARY (available, approved, and serving)
requireNodeRouteCountWithCollect ( c , nodes [ 2 ] , 1 , 1 , 0 ) // Router 3: STANDBY (available, approved, but not serving)
} , propagationTime , 200 * time . Millisecond , "Waiting for route state after router 3 re-approval" )
// Validate primary routes table state after router 3 re-approval
validatePrimaryRoutes ( t , headscale , & routes . DebugRoutes {
AvailableRoutes : map [ types . NodeID ] [ ] netip . Prefix {
types . NodeID ( MustFindNode ( subRouter1 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
types . NodeID ( MustFindNode ( subRouter2 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
types . NodeID ( MustFindNode ( subRouter3 . Hostname ( ) , nodes ) . GetId ( ) ) : { pref } ,
} ,
PrimaryRoutes : map [ string ] types . NodeID {
pref . String ( ) : types . NodeID ( MustFindNode ( subRouter2 . Hostname ( ) , nodes ) . GetId ( ) ) ,
} ,
} , "Router 2 should remain primary after router 3 re-approval" )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
checkFailureAndPrintRoutes ( t , client )
2023-12-09 18:09:24 +01:00
}
2024-01-18 16:36:47 +01:00
2024-01-18 17:30:25 +01:00
// TestSubnetRouteACL verifies that Subnet routes are distributed
// as expected when ACLs are activated.
// It implements the issue from
// https://github.com/juanfont/headscale/issues/1604
func TestSubnetRouteACL ( t * testing . T ) {
IntegrationSkip ( t )
2025-03-10 16:20:29 +01:00
user := "user4"
2024-01-18 17:30:25 +01:00
2025-03-21 11:49:32 +01:00
spec := ScenarioSpec {
NodesPerUser : 2 ,
Users : [ ] string { user } ,
}
scenario , err := NewScenario ( spec )
2025-02-26 07:22:55 -08:00
require . NoErrorf ( t , err , "failed to create scenario: %s" , err )
2024-09-17 10:44:55 +01:00
defer scenario . ShutdownAssertNoPanics ( t )
2024-01-18 17:30:25 +01:00
2025-03-21 11:49:32 +01:00
err = scenario . CreateHeadscaleEnv ( [ ] tsic . Option {
tsic . WithAcceptRoutes ( ) ,
} , hsic . WithTestName ( "clienableroute" ) , hsic . WithACLPolicy (
2025-05-20 13:57:26 +02:00
& policyv2 . Policy {
Groups : policyv2 . Groups {
policyv2 . Group ( "group:admins" ) : [ ] policyv2 . Username { policyv2 . Username ( user + "@" ) } ,
2024-01-18 17:30:25 +01:00
} ,
2025-05-20 13:57:26 +02:00
ACLs : [ ] policyv2 . ACL {
2024-01-18 17:30:25 +01:00
{
2025-05-20 13:57:26 +02:00
Action : "accept" ,
Sources : [ ] policyv2 . Alias { groupp ( "group:admins" ) } ,
Destinations : [ ] policyv2 . AliasWithPorts {
aliasWithPorts ( groupp ( "group:admins" ) , tailcfg . PortRangeAny ) ,
} ,
2024-01-18 17:30:25 +01:00
} ,
{
2025-05-20 13:57:26 +02:00
Action : "accept" ,
Sources : [ ] policyv2 . Alias { groupp ( "group:admins" ) } ,
Destinations : [ ] policyv2 . AliasWithPorts {
aliasWithPorts ( prefixp ( "10.33.0.0/16" ) , tailcfg . PortRangeAny ) ,
} ,
2024-01-18 17:30:25 +01:00
} ,
} ,
} ,
) )
assertNoErrHeadscaleEnv ( t , err )
allClients , err := scenario . ListTailscaleClients ( )
assertNoErrListClients ( t , err )
err = scenario . WaitForTailscaleSync ( )
assertNoErrSync ( t , err )
headscale , err := scenario . Headscale ( )
assertNoErrGetHeadscale ( t , err )
expectedRoutes := map [ string ] string {
"1" : "10.33.0.0/16" ,
}
// Sort nodes by ID
sort . SliceStable ( allClients , func ( i , j int ) bool {
statusI , err := allClients [ i ] . Status ( )
if err != nil {
return false
}
statusJ , err := allClients [ j ] . Status ( )
if err != nil {
return false
}
return statusI . Self . ID < statusJ . Self . ID
} )
subRouter1 := allClients [ 0 ]
client := allClients [ 1 ]
for _ , client := range allClients {
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
status , err := client . Status ( )
assert . NoError ( c , err )
2024-01-18 17:30:25 +01:00
2025-08-06 08:37:02 +02:00
if route , ok := expectedRoutes [ string ( status . Self . ID ) ] ; ok {
command := [ ] string {
"tailscale" ,
"set" ,
"--advertise-routes=" + route ,
}
_ , _ , err = client . Execute ( command )
assert . NoErrorf ( c , err , "failed to advertise route: %s" , err )
2024-01-18 17:30:25 +01:00
}
2025-08-06 08:37:02 +02:00
} , 5 * time . Second , 200 * time . Millisecond , "Configuring route advertisements" )
2024-01-18 17:30:25 +01:00
}
err = scenario . WaitForTailscaleSync ( )
assertNoErrSync ( t , err )
2025-08-06 08:37:02 +02:00
// Wait for route advertisements to propagate to the server
var nodes [ ] * v1 . Node
require . EventuallyWithT ( t , func ( c * assert . CollectT ) {
var err error
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
assert . Len ( c , nodes , 2 )
// Find the node that should have the route by checking node IDs
var routeNode * v1 . Node
var otherNode * v1 . Node
for _ , node := range nodes {
nodeIDStr := strconv . FormatUint ( node . GetId ( ) , 10 )
if _ , shouldHaveRoute := expectedRoutes [ nodeIDStr ] ; shouldHaveRoute {
routeNode = node
} else {
otherNode = node
}
}
2024-01-18 17:30:25 +01:00
2025-08-06 08:37:02 +02:00
assert . NotNil ( c , routeNode , "could not find node that should have route" )
assert . NotNil ( c , otherNode , "could not find node that should not have route" )
// After NodeStore fix: routes are properly tracked in route manager
// This test uses a policy with NO auto-approvers, so routes should be:
// announced=1, approved=0, subnet=0 (routes announced but not approved)
requireNodeRouteCountWithCollect ( c , routeNode , 1 , 0 , 0 )
requireNodeRouteCountWithCollect ( c , otherNode , 0 , 0 , 0 )
} , 10 * time . Second , 100 * time . Millisecond , "route advertisements should propagate to server" )
2024-01-18 17:30:25 +01:00
// Verify that no routes has been sent to the client,
// they are not yet enabled.
for _ , client := range allClients {
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
status , err := client . Status ( )
assert . NoError ( c , err )
2024-01-18 17:30:25 +01:00
2025-08-06 08:37:02 +02:00
for _ , peerKey := range status . Peers ( ) {
peerStatus := status . Peer [ peerKey ]
2024-01-18 17:30:25 +01:00
2025-08-06 08:37:02 +02:00
assert . Nil ( c , peerStatus . PrimaryRoutes )
requirePeerSubnetRoutesWithCollect ( c , peerStatus , nil )
}
} , 5 * time . Second , 200 * time . Millisecond , "Verifying no routes are active before approval" )
2024-01-18 17:30:25 +01:00
}
2025-02-26 07:22:55 -08:00
_ , err = headscale . ApproveRoutes (
1 ,
[ ] netip . Prefix { netip . MustParsePrefix ( expectedRoutes [ "1" ] ) } ,
)
require . NoError ( t , err )
2024-01-18 17:30:25 +01:00
2025-07-05 23:30:47 +02:00
// Wait for route state changes to propagate to nodes
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
assert . Len ( c , nodes , 2 )
2024-01-18 17:30:25 +01:00
2025-07-05 23:30:47 +02:00
requireNodeRouteCountWithCollect ( c , nodes [ 0 ] , 1 , 1 , 1 )
requireNodeRouteCountWithCollect ( c , nodes [ 1 ] , 0 , 0 , 0 )
} , 10 * time . Second , 500 * time . Millisecond , "route state changes should propagate to nodes" )
2024-01-18 17:30:25 +01:00
// Verify that the client has routes from the primary machine
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
srs1 , err := subRouter1 . Status ( )
assert . NoError ( c , err )
2024-01-18 17:30:25 +01:00
2025-08-06 08:37:02 +02:00
clientStatus , err := client . Status ( )
assert . NoError ( c , err )
srs1PeerStatus := clientStatus . Peer [ srs1 . Self . PublicKey ]
2024-01-18 17:30:25 +01:00
2025-08-06 08:37:02 +02:00
assert . NotNil ( c , srs1PeerStatus , "Router 1 peer should exist" )
if srs1PeerStatus == nil {
return
}
2024-01-18 17:30:25 +01:00
2025-08-06 08:37:02 +02:00
requirePeerSubnetRoutesWithCollect ( c , srs1PeerStatus , [ ] netip . Prefix { netip . MustParsePrefix ( expectedRoutes [ "1" ] ) } )
} , 5 * time . Second , 200 * time . Millisecond , "Verifying client can see subnet routes from router" )
2024-01-18 17:30:25 +01:00
clientNm , err := client . Netmap ( )
2025-02-26 07:22:55 -08:00
require . NoError ( t , err )
2024-01-18 17:30:25 +01:00
wantClientFilter := [ ] filter . Match {
{
2024-08-23 15:28:54 +02:00
IPProto : views . SliceOf ( [ ] ipproto . Proto {
2025-09-12 14:47:56 +02:00
ipproto . TCP , ipproto . UDP ,
2024-08-23 15:28:54 +02:00
} ) ,
2024-01-18 17:30:25 +01:00
Srcs : [ ] netip . Prefix {
netip . MustParsePrefix ( "100.64.0.1/32" ) ,
netip . MustParsePrefix ( "100.64.0.2/32" ) ,
netip . MustParsePrefix ( "fd7a:115c:a1e0::1/128" ) ,
netip . MustParsePrefix ( "fd7a:115c:a1e0::2/128" ) ,
} ,
Dsts : [ ] filter . NetPortRange {
{
Net : netip . MustParsePrefix ( "100.64.0.2/32" ) ,
2024-10-23 10:45:59 -05:00
Ports : allPorts ,
2024-01-18 17:30:25 +01:00
} ,
{
Net : netip . MustParsePrefix ( "fd7a:115c:a1e0::2/128" ) ,
2024-10-23 10:45:59 -05:00
Ports : allPorts ,
2024-01-18 17:30:25 +01:00
} ,
} ,
Caps : [ ] filter . CapMatch { } ,
} ,
}
2025-05-04 22:52:47 +03:00
if diff := cmpdiff . Diff ( wantClientFilter , clientNm . PacketFilter , util . ViewSliceIPProtoComparer , util . PrefixComparer ) ; diff != "" {
2024-01-18 17:30:25 +01:00
t . Errorf ( "Client (%s) filter, unexpected result (-want +got):\n%s" , client . Hostname ( ) , diff )
}
subnetNm , err := subRouter1 . Netmap ( )
2025-02-26 07:22:55 -08:00
require . NoError ( t , err )
2024-01-18 17:30:25 +01:00
wantSubnetFilter := [ ] filter . Match {
{
2024-08-23 15:28:54 +02:00
IPProto : views . SliceOf ( [ ] ipproto . Proto {
2025-09-12 14:47:56 +02:00
ipproto . TCP , ipproto . UDP ,
2024-08-23 15:28:54 +02:00
} ) ,
2024-01-18 17:30:25 +01:00
Srcs : [ ] netip . Prefix {
netip . MustParsePrefix ( "100.64.0.1/32" ) ,
netip . MustParsePrefix ( "100.64.0.2/32" ) ,
netip . MustParsePrefix ( "fd7a:115c:a1e0::1/128" ) ,
netip . MustParsePrefix ( "fd7a:115c:a1e0::2/128" ) ,
} ,
Dsts : [ ] filter . NetPortRange {
{
Net : netip . MustParsePrefix ( "100.64.0.1/32" ) ,
2024-10-23 10:45:59 -05:00
Ports : allPorts ,
2024-01-18 17:30:25 +01:00
} ,
{
Net : netip . MustParsePrefix ( "fd7a:115c:a1e0::1/128" ) ,
2024-10-23 10:45:59 -05:00
Ports : allPorts ,
2024-01-18 17:30:25 +01:00
} ,
} ,
Caps : [ ] filter . CapMatch { } ,
} ,
{
2024-08-23 15:28:54 +02:00
IPProto : views . SliceOf ( [ ] ipproto . Proto {
2025-09-12 14:47:56 +02:00
ipproto . TCP , ipproto . UDP ,
2024-08-23 15:28:54 +02:00
} ) ,
2024-01-18 17:30:25 +01:00
Srcs : [ ] netip . Prefix {
netip . MustParsePrefix ( "100.64.0.1/32" ) ,
netip . MustParsePrefix ( "100.64.0.2/32" ) ,
netip . MustParsePrefix ( "fd7a:115c:a1e0::1/128" ) ,
netip . MustParsePrefix ( "fd7a:115c:a1e0::2/128" ) ,
} ,
Dsts : [ ] filter . NetPortRange {
{
Net : netip . MustParsePrefix ( "10.33.0.0/16" ) ,
2024-10-23 10:45:59 -05:00
Ports : allPorts ,
2024-01-18 17:30:25 +01:00
} ,
} ,
Caps : [ ] filter . CapMatch { } ,
} ,
}
2025-05-04 22:52:47 +03:00
if diff := cmpdiff . Diff ( wantSubnetFilter , subnetNm . PacketFilter , util . ViewSliceIPProtoComparer , util . PrefixComparer ) ; diff != "" {
2024-01-18 17:30:25 +01:00
t . Errorf ( "Subnet (%s) filter, unexpected result (-want +got):\n%s" , subRouter1 . Hostname ( ) , diff )
}
}
2025-02-23 14:10:25 -08:00
// TestEnablingExitRoutes tests enabling exit routes for clients.
// Its more or less the same as TestEnablingRoutes, but with the --advertise-exit-node flag
// set during login instead of set.
func TestEnablingExitRoutes ( t * testing . T ) {
IntegrationSkip ( t )
user := "user2"
2025-03-21 11:49:32 +01:00
spec := ScenarioSpec {
NodesPerUser : 2 ,
Users : [ ] string { user } ,
}
scenario , err := NewScenario ( spec )
2025-02-23 14:10:25 -08:00
assertNoErrf ( t , "failed to create scenario: %s" , err )
defer scenario . ShutdownAssertNoPanics ( t )
2025-03-21 11:49:32 +01:00
err = scenario . CreateHeadscaleEnv ( [ ] tsic . Option {
2025-02-23 14:10:25 -08:00
tsic . WithExtraLoginArgs ( [ ] string { "--advertise-exit-node" } ) ,
} , hsic . WithTestName ( "clienableroute" ) )
assertNoErrHeadscaleEnv ( t , err )
allClients , err := scenario . ListTailscaleClients ( )
assertNoErrListClients ( t , err )
err = scenario . WaitForTailscaleSync ( )
assertNoErrSync ( t , err )
headscale , err := scenario . Headscale ( )
assertNoErrGetHeadscale ( t , err )
err = scenario . WaitForTailscaleSync ( )
assertNoErrSync ( t , err )
2025-02-26 07:22:55 -08:00
nodes , err := headscale . ListNodes ( )
require . NoError ( t , err )
require . Len ( t , nodes , 2 )
2025-02-23 14:10:25 -08:00
2025-04-30 08:54:04 +03:00
requireNodeRouteCount ( t , nodes [ 0 ] , 2 , 0 , 0 )
requireNodeRouteCount ( t , nodes [ 1 ] , 2 , 0 , 0 )
2025-02-23 14:10:25 -08:00
// Verify that no routes has been sent to the client,
// they are not yet enabled.
for _ , client := range allClients {
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
status , err := client . Status ( )
assert . NoError ( c , err )
2025-02-23 14:10:25 -08:00
2025-08-06 08:37:02 +02:00
for _ , peerKey := range status . Peers ( ) {
peerStatus := status . Peer [ peerKey ]
2025-02-23 14:10:25 -08:00
2025-08-06 08:37:02 +02:00
assert . Nil ( c , peerStatus . PrimaryRoutes )
}
} , 5 * time . Second , 200 * time . Millisecond , "Verifying no exit routes are active before approval" )
2025-02-23 14:10:25 -08:00
}
2025-02-26 07:22:55 -08:00
// Enable all routes, but do v4 on one and v6 on other to ensure they
// are both added since they are exit routes.
_ , err = headscale . ApproveRoutes (
nodes [ 0 ] . GetId ( ) ,
[ ] netip . Prefix { tsaddr . AllIPv4 ( ) } ,
2025-02-23 14:10:25 -08:00
)
2025-02-26 07:22:55 -08:00
require . NoError ( t , err )
_ , err = headscale . ApproveRoutes (
nodes [ 1 ] . GetId ( ) ,
[ ] netip . Prefix { tsaddr . AllIPv6 ( ) } ,
)
require . NoError ( t , err )
2025-02-23 14:10:25 -08:00
2025-08-06 08:37:02 +02:00
// Wait for route state changes to propagate
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
assert . Len ( c , nodes , 2 )
2025-02-26 07:22:55 -08:00
2025-08-06 08:37:02 +02:00
requireNodeRouteCountWithCollect ( c , nodes [ 0 ] , 2 , 2 , 2 )
requireNodeRouteCountWithCollect ( c , nodes [ 1 ] , 2 , 2 , 2 )
} , 10 * time . Second , 500 * time . Millisecond , "route state changes should propagate to both nodes" )
2025-02-23 14:10:25 -08:00
2025-07-05 23:30:47 +02:00
// Wait for route state changes to propagate to clients
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
// Verify that the clients can see the new routes
for _ , client := range allClients {
status , err := client . Status ( )
assert . NoError ( c , err )
2025-02-23 14:10:25 -08:00
2025-07-05 23:30:47 +02:00
for _ , peerKey := range status . Peers ( ) {
peerStatus := status . Peer [ peerKey ]
2025-02-23 14:10:25 -08:00
2025-07-05 23:30:47 +02:00
assert . NotNil ( c , peerStatus . AllowedIPs )
2025-08-06 08:37:02 +02:00
if peerStatus . AllowedIPs != nil {
assert . Len ( c , peerStatus . AllowedIPs . AsSlice ( ) , 4 )
assert . Contains ( c , peerStatus . AllowedIPs . AsSlice ( ) , tsaddr . AllIPv4 ( ) )
assert . Contains ( c , peerStatus . AllowedIPs . AsSlice ( ) , tsaddr . AllIPv6 ( ) )
}
2025-07-05 23:30:47 +02:00
}
2025-02-23 14:10:25 -08:00
}
2025-07-05 23:30:47 +02:00
} , 10 * time . Second , 500 * time . Millisecond , "clients should see new routes" )
2025-02-23 14:10:25 -08:00
}
2025-02-26 07:22:55 -08:00
2025-03-21 11:49:32 +01:00
// TestSubnetRouterMultiNetwork is an evolution of the subnet router test.
// This test will set up multiple docker networks and use two isolated tailscale
// clients and a service available in one of the networks to validate that a
// subnet router is working as expected.
func TestSubnetRouterMultiNetwork ( t * testing . T ) {
IntegrationSkip ( t )
spec := ScenarioSpec {
NodesPerUser : 1 ,
Users : [ ] string { "user1" , "user2" } ,
Networks : map [ string ] [ ] string {
"usernet1" : { "user1" } ,
"usernet2" : { "user2" } ,
} ,
ExtraService : map [ string ] [ ] extraServiceFunc {
"usernet1" : { Webservice } ,
} ,
}
scenario , err := NewScenario ( spec )
require . NoErrorf ( t , err , "failed to create scenario: %s" , err )
defer scenario . ShutdownAssertNoPanics ( t )
err = scenario . CreateHeadscaleEnv ( [ ] tsic . Option { tsic . WithAcceptRoutes ( ) } ,
hsic . WithTestName ( "clienableroute" ) ,
hsic . WithEmbeddedDERPServerOnly ( ) ,
hsic . WithTLS ( ) ,
)
assertNoErrHeadscaleEnv ( t , err )
allClients , err := scenario . ListTailscaleClients ( )
assertNoErrListClients ( t , err )
err = scenario . WaitForTailscaleSync ( )
assertNoErrSync ( t , err )
headscale , err := scenario . Headscale ( )
assertNoErrGetHeadscale ( t , err )
assert . NotNil ( t , headscale )
pref , err := scenario . SubnetOfNetwork ( "usernet1" )
require . NoError ( t , err )
var user1c , user2c TailscaleClient
for _ , c := range allClients {
s := c . MustStatus ( )
if s . User [ s . Self . UserID ] . LoginName == "user1@test.no" {
user1c = c
}
if s . User [ s . Self . UserID ] . LoginName == "user2@test.no" {
user2c = c
}
}
require . NotNil ( t , user1c )
require . NotNil ( t , user2c )
// Advertise the route for the dockersubnet of user1
command := [ ] string {
"tailscale" ,
"set" ,
"--advertise-routes=" + pref . String ( ) ,
}
_ , _ , err = user1c . Execute ( command )
require . NoErrorf ( t , err , "failed to advertise route: %s" , err )
2025-08-06 08:37:02 +02:00
var nodes [ ] * v1 . Node
// Wait for route advertisements to propagate to NodeStore
assert . EventuallyWithT ( t , func ( ct * assert . CollectT ) {
var err error
nodes , err = headscale . ListNodes ( )
assert . NoError ( ct , err )
assert . Len ( ct , nodes , 2 )
requireNodeRouteCountWithCollect ( ct , nodes [ 0 ] , 1 , 0 , 0 )
} , 10 * time . Second , 100 * time . Millisecond , "route advertisements should propagate" )
2025-03-21 11:49:32 +01:00
// Verify that no routes has been sent to the client,
// they are not yet enabled.
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
status , err := user1c . Status ( )
assert . NoError ( c , err )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
for _ , peerKey := range status . Peers ( ) {
peerStatus := status . Peer [ peerKey ]
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
assert . Nil ( c , peerStatus . PrimaryRoutes )
requirePeerSubnetRoutesWithCollect ( c , peerStatus , nil )
}
} , 5 * time . Second , 200 * time . Millisecond , "Verifying no routes are active before approval" )
2025-03-21 11:49:32 +01:00
// Enable route
_ , err = headscale . ApproveRoutes (
2025-07-10 23:38:55 +02:00
nodes [ 0 ] . GetId ( ) ,
2025-03-21 11:49:32 +01:00
[ ] netip . Prefix { * pref } ,
)
require . NoError ( t , err )
2025-08-06 08:37:02 +02:00
// Wait for route state changes to propagate to nodes
2025-07-05 23:30:47 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
2025-08-06 08:37:02 +02:00
var err error
2025-07-05 23:30:47 +02:00
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
assert . Len ( c , nodes , 2 )
requireNodeRouteCountWithCollect ( c , nodes [ 0 ] , 1 , 1 , 1 )
2025-08-06 08:37:02 +02:00
} , 10 * time . Second , 500 * time . Millisecond , "route state changes should propagate to nodes" )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
// Verify that the routes have been sent to the client
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
status , err := user2c . Status ( )
2025-07-05 23:30:47 +02:00
assert . NoError ( c , err )
2025-03-21 11:49:32 +01:00
2025-07-05 23:30:47 +02:00
for _ , peerKey := range status . Peers ( ) {
peerStatus := status . Peer [ peerKey ]
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
if peerStatus . PrimaryRoutes != nil {
assert . Contains ( c , peerStatus . PrimaryRoutes . AsSlice ( ) , * pref )
}
2025-07-05 23:30:47 +02:00
requirePeerSubnetRoutesWithCollect ( c , peerStatus , [ ] netip . Prefix { * pref } )
}
2025-08-06 08:37:02 +02:00
} , 10 * time . Second , 500 * time . Millisecond , "routes should be visible to client" )
2025-03-21 11:49:32 +01:00
usernet1 , err := scenario . Network ( "usernet1" )
require . NoError ( t , err )
services , err := scenario . Services ( "usernet1" )
require . NoError ( t , err )
require . Len ( t , services , 1 )
web := services [ 0 ]
webip := netip . MustParseAddr ( web . GetIPInNetwork ( usernet1 ) )
url := fmt . Sprintf ( "http://%s/etc/hostname" , webip )
t . Logf ( "url from %s to %s" , user2c . Hostname ( ) , url )
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
result , err := user2c . Curl ( url )
assert . NoError ( c , err )
assert . Len ( c , result , 13 )
} , 5 * time . Second , 200 * time . Millisecond , "Verifying client can reach webservice through subnet route" )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
tr , err := user2c . Traceroute ( webip )
assert . NoError ( c , err )
ip , err := user1c . IPv4 ( )
if ! assert . NoError ( c , err , "failed to get IPv4 for user1c" ) {
return
}
assertTracerouteViaIPWithCollect ( c , tr , ip )
} , 5 * time . Second , 200 * time . Millisecond , "Verifying traceroute goes through subnet router" )
2025-03-21 11:49:32 +01:00
}
func TestSubnetRouterMultiNetworkExitNode ( t * testing . T ) {
IntegrationSkip ( t )
spec := ScenarioSpec {
NodesPerUser : 1 ,
Users : [ ] string { "user1" , "user2" } ,
Networks : map [ string ] [ ] string {
"usernet1" : { "user1" } ,
"usernet2" : { "user2" } ,
} ,
ExtraService : map [ string ] [ ] extraServiceFunc {
"usernet1" : { Webservice } ,
} ,
}
scenario , err := NewScenario ( spec )
require . NoErrorf ( t , err , "failed to create scenario: %s" , err )
defer scenario . ShutdownAssertNoPanics ( t )
err = scenario . CreateHeadscaleEnv ( [ ] tsic . Option { } ,
hsic . WithTestName ( "clienableroute" ) ,
hsic . WithEmbeddedDERPServerOnly ( ) ,
hsic . WithTLS ( ) ,
)
assertNoErrHeadscaleEnv ( t , err )
allClients , err := scenario . ListTailscaleClients ( )
assertNoErrListClients ( t , err )
err = scenario . WaitForTailscaleSync ( )
assertNoErrSync ( t , err )
headscale , err := scenario . Headscale ( )
assertNoErrGetHeadscale ( t , err )
assert . NotNil ( t , headscale )
var user1c , user2c TailscaleClient
for _ , c := range allClients {
s := c . MustStatus ( )
if s . User [ s . Self . UserID ] . LoginName == "user1@test.no" {
user1c = c
}
if s . User [ s . Self . UserID ] . LoginName == "user2@test.no" {
user2c = c
}
}
require . NotNil ( t , user1c )
require . NotNil ( t , user2c )
// Advertise the exit nodes for the dockersubnet of user1
command := [ ] string {
"tailscale" ,
"set" ,
"--advertise-exit-node" ,
}
_ , _ , err = user1c . Execute ( command )
require . NoErrorf ( t , err , "failed to advertise route: %s" , err )
2025-08-06 08:37:02 +02:00
var nodes [ ] * v1 . Node
// Wait for route advertisements to propagate to NodeStore
assert . EventuallyWithT ( t , func ( ct * assert . CollectT ) {
var err error
nodes , err = headscale . ListNodes ( )
assert . NoError ( ct , err )
assert . Len ( ct , nodes , 2 )
requireNodeRouteCountWithCollect ( ct , nodes [ 0 ] , 2 , 0 , 0 )
} , 10 * time . Second , 100 * time . Millisecond , "route advertisements should propagate" )
2025-03-21 11:49:32 +01:00
// Verify that no routes has been sent to the client,
// they are not yet enabled.
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
status , err := user1c . Status ( )
assert . NoError ( c , err )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
for _ , peerKey := range status . Peers ( ) {
peerStatus := status . Peer [ peerKey ]
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
assert . Nil ( c , peerStatus . PrimaryRoutes )
requirePeerSubnetRoutesWithCollect ( c , peerStatus , nil )
}
} , 5 * time . Second , 200 * time . Millisecond , "Verifying no routes sent to client before approval" )
2025-03-21 11:49:32 +01:00
// Enable route
2025-07-10 23:38:55 +02:00
_ , err = headscale . ApproveRoutes ( nodes [ 0 ] . GetId ( ) , [ ] netip . Prefix { tsaddr . AllIPv4 ( ) } )
2025-03-21 11:49:32 +01:00
require . NoError ( t , err )
2025-08-06 08:37:02 +02:00
// Wait for route state changes to propagate to nodes
2025-07-05 23:30:47 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
assert . Len ( c , nodes , 2 )
requireNodeRouteCountWithCollect ( c , nodes [ 0 ] , 2 , 2 , 2 )
2025-08-06 08:37:02 +02:00
} , 10 * time . Second , 500 * time . Millisecond , "route state changes should propagate to nodes" )
2025-03-21 11:49:32 +01:00
2025-08-06 08:37:02 +02:00
// Verify that the routes have been sent to the client
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
status , err := user2c . Status ( )
2025-07-05 23:30:47 +02:00
assert . NoError ( c , err )
2025-03-21 11:49:32 +01:00
2025-07-05 23:30:47 +02:00
for _ , peerKey := range status . Peers ( ) {
peerStatus := status . Peer [ peerKey ]
2025-03-21 11:49:32 +01:00
2025-07-05 23:30:47 +02:00
requirePeerSubnetRoutesWithCollect ( c , peerStatus , [ ] netip . Prefix { tsaddr . AllIPv4 ( ) , tsaddr . AllIPv6 ( ) } )
}
2025-08-06 08:37:02 +02:00
} , 10 * time . Second , 500 * time . Millisecond , "routes should be visible to client" )
2025-03-21 11:49:32 +01:00
// Tell user2c to use user1c as an exit node.
command = [ ] string {
"tailscale" ,
"set" ,
"--exit-node" ,
user1c . Hostname ( ) ,
}
_ , _ , err = user2c . Execute ( command )
require . NoErrorf ( t , err , "failed to advertise route: %s" , err )
usernet1 , err := scenario . Network ( "usernet1" )
require . NoError ( t , err )
services , err := scenario . Services ( "usernet1" )
require . NoError ( t , err )
require . Len ( t , services , 1 )
web := services [ 0 ]
webip := netip . MustParseAddr ( web . GetIPInNetwork ( usernet1 ) )
2025-07-21 03:51:57 +08:00
// We can't mess to much with ip forwarding in containers so
2025-03-21 11:49:32 +01:00
// we settle for a simple ping here.
// Direct is false since we use internal DERP which means we
2025-07-21 03:51:57 +08:00
// can't discover a direct path between docker networks.
2025-03-21 11:49:32 +01:00
err = user2c . Ping ( webip . String ( ) ,
tsic . WithPingUntilDirect ( false ) ,
tsic . WithPingCount ( 1 ) ,
tsic . WithPingTimeout ( 7 * time . Second ) ,
)
require . NoError ( t , err )
}
2025-04-30 08:54:04 +03:00
func MustFindNode ( hostname string , nodes [ ] * v1 . Node ) * v1 . Node {
for _ , node := range nodes {
if node . GetName ( ) == hostname {
return node
}
}
panic ( "node not found" )
}
2025-03-31 15:55:07 +02:00
// TestAutoApproveMultiNetwork tests auto approving of routes
// by setting up two networks where network1 has three subnet
// routers:
// - routerUsernet1: advertising the docker network
// - routerSubRoute: advertising a subroute, a /24 inside a auto approved /16
// - routeExitNode: advertising an exit node
//
// Each router is tested step by step through the following scenarios
// - Policy is set to auto approve the nodes route
// - Node advertises route and it is verified that it is auto approved and sent to nodes
// - Policy is changed to _not_ auto approve the route
// - Verify that peers can still see the node
// - Disable route, making it unavailable
// - Verify that peers can no longer use node
// - Policy is changed back to auto approve route, check that routes already existing is approved.
// - Verify that routes can now be seen by peers.
func TestAutoApproveMultiNetwork ( t * testing . T ) {
IntegrationSkip ( t )
2025-04-30 08:54:04 +03:00
bigRoute := netip . MustParsePrefix ( "10.42.0.0/16" )
2025-03-31 15:55:07 +02:00
subRoute := netip . MustParsePrefix ( "10.42.7.0/24" )
notApprovedRoute := netip . MustParsePrefix ( "192.168.0.0/24" )
2025-04-30 08:54:04 +03:00
tests := [ ] struct {
name string
2025-05-20 13:57:26 +02:00
pol * policyv2 . Policy
2025-04-30 08:54:04 +03:00
approver string
spec ScenarioSpec
withURL bool
} {
{
name : "authkey-tag" ,
2025-05-20 13:57:26 +02:00
pol : & policyv2 . Policy {
ACLs : [ ] policyv2 . ACL {
2025-04-30 08:54:04 +03:00
{
2025-05-20 13:57:26 +02:00
Action : "accept" ,
Sources : [ ] policyv2 . Alias { wildcard ( ) } ,
Destinations : [ ] policyv2 . AliasWithPorts {
aliasWithPorts ( wildcard ( ) , tailcfg . PortRangeAny ) ,
} ,
2025-04-30 08:54:04 +03:00
} ,
} ,
2025-05-20 13:57:26 +02:00
TagOwners : policyv2 . TagOwners {
policyv2 . Tag ( "tag:approve" ) : policyv2 . Owners { usernameOwner ( "user1@" ) } ,
2025-04-30 08:54:04 +03:00
} ,
2025-05-20 13:57:26 +02:00
AutoApprovers : policyv2 . AutoApproverPolicy {
Routes : map [ netip . Prefix ] policyv2 . AutoApprovers {
bigRoute : { tagApprover ( "tag:approve" ) } ,
2025-04-30 08:54:04 +03:00
} ,
2025-05-20 13:57:26 +02:00
ExitNode : policyv2 . AutoApprovers { tagApprover ( "tag:approve" ) } ,
2025-04-30 08:54:04 +03:00
} ,
} ,
approver : "tag:approve" ,
spec : ScenarioSpec {
NodesPerUser : 3 ,
Users : [ ] string { "user1" , "user2" } ,
Networks : map [ string ] [ ] string {
"usernet1" : { "user1" } ,
"usernet2" : { "user2" } ,
} ,
ExtraService : map [ string ] [ ] extraServiceFunc {
"usernet1" : { Webservice } ,
} ,
// We build the head image with curl and traceroute, so only use
// that for this test.
Versions : [ ] string { "head" } ,
2025-03-31 15:55:07 +02:00
} ,
} ,
2025-04-30 08:54:04 +03:00
{
name : "authkey-user" ,
2025-05-20 13:57:26 +02:00
pol : & policyv2 . Policy {
ACLs : [ ] policyv2 . ACL {
2025-04-30 08:54:04 +03:00
{
2025-05-20 13:57:26 +02:00
Action : "accept" ,
Sources : [ ] policyv2 . Alias { wildcard ( ) } ,
Destinations : [ ] policyv2 . AliasWithPorts {
aliasWithPorts ( wildcard ( ) , tailcfg . PortRangeAny ) ,
} ,
2025-04-30 08:54:04 +03:00
} ,
} ,
2025-05-20 13:57:26 +02:00
AutoApprovers : policyv2 . AutoApproverPolicy {
Routes : map [ netip . Prefix ] policyv2 . AutoApprovers {
bigRoute : { usernameApprover ( "user1@" ) } ,
2025-04-30 08:54:04 +03:00
} ,
2025-05-20 13:57:26 +02:00
ExitNode : policyv2 . AutoApprovers { usernameApprover ( "user1@" ) } ,
2025-04-30 08:54:04 +03:00
} ,
} ,
approver : "user1@" ,
spec : ScenarioSpec {
NodesPerUser : 3 ,
Users : [ ] string { "user1" , "user2" } ,
Networks : map [ string ] [ ] string {
"usernet1" : { "user1" } ,
"usernet2" : { "user2" } ,
} ,
ExtraService : map [ string ] [ ] extraServiceFunc {
"usernet1" : { Webservice } ,
} ,
// We build the head image with curl and traceroute, so only use
// that for this test.
Versions : [ ] string { "head" } ,
} ,
2025-03-31 15:55:07 +02:00
} ,
2025-04-30 08:54:04 +03:00
{
name : "authkey-group" ,
2025-05-20 13:57:26 +02:00
pol : & policyv2 . Policy {
ACLs : [ ] policyv2 . ACL {
2025-04-30 08:54:04 +03:00
{
2025-05-20 13:57:26 +02:00
Action : "accept" ,
Sources : [ ] policyv2 . Alias { wildcard ( ) } ,
Destinations : [ ] policyv2 . AliasWithPorts {
aliasWithPorts ( wildcard ( ) , tailcfg . PortRangeAny ) ,
} ,
2025-04-30 08:54:04 +03:00
} ,
} ,
2025-05-20 13:57:26 +02:00
Groups : policyv2 . Groups {
policyv2 . Group ( "group:approve" ) : [ ] policyv2 . Username { policyv2 . Username ( "user1@" ) } ,
2025-04-30 08:54:04 +03:00
} ,
2025-05-20 13:57:26 +02:00
AutoApprovers : policyv2 . AutoApproverPolicy {
Routes : map [ netip . Prefix ] policyv2 . AutoApprovers {
bigRoute : { groupApprover ( "group:approve" ) } ,
2025-04-30 08:54:04 +03:00
} ,
2025-05-20 13:57:26 +02:00
ExitNode : policyv2 . AutoApprovers { groupApprover ( "group:approve" ) } ,
2025-04-30 08:54:04 +03:00
} ,
} ,
approver : "group:approve" ,
spec : ScenarioSpec {
NodesPerUser : 3 ,
Users : [ ] string { "user1" , "user2" } ,
Networks : map [ string ] [ ] string {
"usernet1" : { "user1" } ,
"usernet2" : { "user2" } ,
} ,
ExtraService : map [ string ] [ ] extraServiceFunc {
"usernet1" : { Webservice } ,
} ,
// We build the head image with curl and traceroute, so only use
// that for this test.
Versions : [ ] string { "head" } ,
2025-03-31 15:55:07 +02:00
} ,
2025-04-30 08:54:04 +03:00
} ,
{
name : "webauth-user" ,
2025-05-20 13:57:26 +02:00
pol : & policyv2 . Policy {
ACLs : [ ] policyv2 . ACL {
2025-04-30 08:54:04 +03:00
{
2025-05-20 13:57:26 +02:00
Action : "accept" ,
Sources : [ ] policyv2 . Alias { wildcard ( ) } ,
Destinations : [ ] policyv2 . AliasWithPorts {
aliasWithPorts ( wildcard ( ) , tailcfg . PortRangeAny ) ,
} ,
2025-04-30 08:54:04 +03:00
} ,
} ,
2025-05-20 13:57:26 +02:00
AutoApprovers : policyv2 . AutoApproverPolicy {
Routes : map [ netip . Prefix ] policyv2 . AutoApprovers {
bigRoute : { usernameApprover ( "user1@" ) } ,
2025-04-30 08:54:04 +03:00
} ,
2025-05-20 13:57:26 +02:00
ExitNode : policyv2 . AutoApprovers { usernameApprover ( "user1@" ) } ,
2025-04-30 08:54:04 +03:00
} ,
} ,
approver : "user1@" ,
spec : ScenarioSpec {
NodesPerUser : 3 ,
Users : [ ] string { "user1" , "user2" } ,
Networks : map [ string ] [ ] string {
"usernet1" : { "user1" } ,
"usernet2" : { "user2" } ,
} ,
ExtraService : map [ string ] [ ] extraServiceFunc {
"usernet1" : { Webservice } ,
} ,
// We build the head image with curl and traceroute, so only use
// that for this test.
Versions : [ ] string { "head" } ,
} ,
withURL : true ,
} ,
{
name : "webauth-tag" ,
2025-05-20 13:57:26 +02:00
pol : & policyv2 . Policy {
ACLs : [ ] policyv2 . ACL {
2025-04-30 08:54:04 +03:00
{
2025-05-20 13:57:26 +02:00
Action : "accept" ,
Sources : [ ] policyv2 . Alias { wildcard ( ) } ,
Destinations : [ ] policyv2 . AliasWithPorts {
aliasWithPorts ( wildcard ( ) , tailcfg . PortRangeAny ) ,
} ,
2025-04-30 08:54:04 +03:00
} ,
} ,
2025-05-20 13:57:26 +02:00
TagOwners : policyv2 . TagOwners {
policyv2 . Tag ( "tag:approve" ) : policyv2 . Owners { usernameOwner ( "user1@" ) } ,
2025-04-30 08:54:04 +03:00
} ,
2025-05-20 13:57:26 +02:00
AutoApprovers : policyv2 . AutoApproverPolicy {
Routes : map [ netip . Prefix ] policyv2 . AutoApprovers {
bigRoute : { tagApprover ( "tag:approve" ) } ,
2025-04-30 08:54:04 +03:00
} ,
2025-05-20 13:57:26 +02:00
ExitNode : policyv2 . AutoApprovers { tagApprover ( "tag:approve" ) } ,
2025-04-30 08:54:04 +03:00
} ,
} ,
approver : "tag:approve" ,
spec : ScenarioSpec {
NodesPerUser : 3 ,
Users : [ ] string { "user1" , "user2" } ,
Networks : map [ string ] [ ] string {
"usernet1" : { "user1" } ,
"usernet2" : { "user2" } ,
} ,
ExtraService : map [ string ] [ ] extraServiceFunc {
"usernet1" : { Webservice } ,
} ,
// We build the head image with curl and traceroute, so only use
// that for this test.
Versions : [ ] string { "head" } ,
} ,
withURL : true ,
} ,
{
name : "webauth-group" ,
2025-05-20 13:57:26 +02:00
pol : & policyv2 . Policy {
ACLs : [ ] policyv2 . ACL {
2025-04-30 08:54:04 +03:00
{
2025-05-20 13:57:26 +02:00
Action : "accept" ,
Sources : [ ] policyv2 . Alias { wildcard ( ) } ,
Destinations : [ ] policyv2 . AliasWithPorts {
aliasWithPorts ( wildcard ( ) , tailcfg . PortRangeAny ) ,
} ,
2025-04-30 08:54:04 +03:00
} ,
} ,
2025-05-20 13:57:26 +02:00
Groups : policyv2 . Groups {
policyv2 . Group ( "group:approve" ) : [ ] policyv2 . Username { policyv2 . Username ( "user1@" ) } ,
2025-04-30 08:54:04 +03:00
} ,
2025-05-20 13:57:26 +02:00
AutoApprovers : policyv2 . AutoApproverPolicy {
Routes : map [ netip . Prefix ] policyv2 . AutoApprovers {
bigRoute : { groupApprover ( "group:approve" ) } ,
2025-04-30 08:54:04 +03:00
} ,
2025-05-20 13:57:26 +02:00
ExitNode : policyv2 . AutoApprovers { groupApprover ( "group:approve" ) } ,
2025-04-30 08:54:04 +03:00
} ,
} ,
approver : "group:approve" ,
spec : ScenarioSpec {
NodesPerUser : 3 ,
Users : [ ] string { "user1" , "user2" } ,
Networks : map [ string ] [ ] string {
"usernet1" : { "user1" } ,
"usernet2" : { "user2" } ,
} ,
ExtraService : map [ string ] [ ] extraServiceFunc {
"usernet1" : { Webservice } ,
} ,
// We build the head image with curl and traceroute, so only use
// that for this test.
Versions : [ ] string { "head" } ,
} ,
withURL : true ,
2025-03-31 15:55:07 +02:00
} ,
}
2025-04-30 08:54:04 +03:00
for _ , tt := range tests {
2025-05-04 22:52:47 +03:00
for _ , polMode := range [ ] types . PolicyMode { types . PolicyModeDB , types . PolicyModeFile } {
2025-04-30 08:54:04 +03:00
for _ , advertiseDuringUp := range [ ] bool { false , true } {
2025-05-04 22:52:47 +03:00
name := fmt . Sprintf ( "%s-advertiseduringup-%t-pol-%s" , tt . name , advertiseDuringUp , polMode )
2025-04-30 08:54:04 +03:00
t . Run ( name , func ( t * testing . T ) {
scenario , err := NewScenario ( tt . spec )
require . NoErrorf ( t , err , "failed to create scenario: %s" , err )
defer scenario . ShutdownAssertNoPanics ( t )
2025-07-05 23:30:47 +02:00
var nodes [ ] * v1 . Node
2025-04-30 08:54:04 +03:00
opts := [ ] hsic . Option {
hsic . WithTestName ( "autoapprovemulti" ) ,
hsic . WithEmbeddedDERPServerOnly ( ) ,
hsic . WithTLS ( ) ,
hsic . WithACLPolicy ( tt . pol ) ,
2025-05-04 22:52:47 +03:00
hsic . WithPolicyMode ( polMode ) ,
2025-04-30 08:54:04 +03:00
}
tsOpts := [ ] tsic . Option {
tsic . WithAcceptRoutes ( ) ,
}
if tt . approver == "tag:approve" {
tsOpts = append ( tsOpts ,
tsic . WithTags ( [ ] string { "tag:approve" } ) ,
)
}
route , err := scenario . SubnetOfNetwork ( "usernet1" )
require . NoError ( t , err )
err = scenario . createHeadscaleEnv ( tt . withURL , tsOpts ,
opts ... ,
)
assertNoErrHeadscaleEnv ( t , err )
allClients , err := scenario . ListTailscaleClients ( )
assertNoErrListClients ( t , err )
err = scenario . WaitForTailscaleSync ( )
assertNoErrSync ( t , err )
services , err := scenario . Services ( "usernet1" )
require . NoError ( t , err )
require . Len ( t , services , 1 )
usernet1 , err := scenario . Network ( "usernet1" )
require . NoError ( t , err )
headscale , err := scenario . Headscale ( )
assertNoErrGetHeadscale ( t , err )
assert . NotNil ( t , headscale )
2025-08-06 08:37:02 +02:00
// Add the Docker network route to the auto-approvers
// Keep existing auto-approvers (like bigRoute) in place
2025-05-20 13:57:26 +02:00
var approvers policyv2 . AutoApprovers
switch {
case strings . HasPrefix ( tt . approver , "tag:" ) :
approvers = append ( approvers , tagApprover ( tt . approver ) )
case strings . HasPrefix ( tt . approver , "group:" ) :
approvers = append ( approvers , groupApprover ( tt . approver ) )
default :
approvers = append ( approvers , usernameApprover ( tt . approver ) )
}
if tt . pol . AutoApprovers . Routes == nil {
tt . pol . AutoApprovers . Routes = make ( map [ netip . Prefix ] policyv2 . AutoApprovers )
}
prefix := * route
tt . pol . AutoApprovers . Routes [ prefix ] = approvers
2025-05-01 08:05:42 +03:00
err = headscale . SetPolicy ( tt . pol )
require . NoError ( t , err )
2025-04-30 08:54:04 +03:00
if advertiseDuringUp {
tsOpts = append ( tsOpts ,
tsic . WithExtraLoginArgs ( [ ] string { "--advertise-routes=" + route . String ( ) } ) ,
)
}
tsOpts = append ( tsOpts , tsic . WithNetwork ( usernet1 ) )
// This whole dance is to add a node _after_ all the other nodes
// with an additional tsOpt which advertises the route as part
// of the `tailscale up` command. If we do this as part of the
// scenario creation, it will be added to all nodes and turn
2025-07-21 03:51:57 +08:00
// into a HA node, which isn't something we are testing here.
2025-04-30 08:54:04 +03:00
routerUsernet1 , err := scenario . CreateTailscaleNode ( "head" , tsOpts ... )
require . NoError ( t , err )
defer routerUsernet1 . Shutdown ( )
if tt . withURL {
u , err := routerUsernet1 . LoginWithURL ( headscale . GetEndpoint ( ) )
assertNoErr ( t , err )
body , err := doLoginURL ( routerUsernet1 . Hostname ( ) , u )
assertNoErr ( t , err )
scenario . runHeadscaleRegister ( "user1" , body )
} else {
2025-04-30 12:45:08 +03:00
userMap , err := headscale . MapUsers ( )
assertNoErr ( t , err )
pak , err := scenario . CreatePreAuthKey ( userMap [ "user1" ] . GetId ( ) , false , false )
2025-04-30 08:54:04 +03:00
assertNoErr ( t , err )
2025-07-10 23:38:55 +02:00
err = routerUsernet1 . Login ( headscale . GetEndpoint ( ) , pak . GetKey ( ) )
2025-04-30 08:54:04 +03:00
assertNoErr ( t , err )
}
// extra creation end.
routerUsernet1ID := routerUsernet1 . MustID ( )
web := services [ 0 ]
webip := netip . MustParseAddr ( web . GetIPInNetwork ( usernet1 ) )
weburl := fmt . Sprintf ( "http://%s/etc/hostname" , webip )
t . Logf ( "webservice: %s, %s" , webip . String ( ) , weburl )
// Sort nodes by ID
sort . SliceStable ( allClients , func ( i , j int ) bool {
statusI := allClients [ i ] . MustStatus ( )
statusJ := allClients [ j ] . MustStatus ( )
return statusI . Self . ID < statusJ . Self . ID
} )
// This is ok because the scenario makes users in order, so the three first
// nodes, which are subnet routes, will be created first, and the last user
// will be created with the second.
routerSubRoute := allClients [ 1 ]
routerExitNode := allClients [ 2 ]
client := allClients [ 3 ]
if ! advertiseDuringUp {
// Advertise the route for the dockersubnet of user1
command := [ ] string {
"tailscale" ,
"set" ,
"--advertise-routes=" + route . String ( ) ,
}
_ , _ , err = routerUsernet1 . Execute ( command )
require . NoErrorf ( t , err , "failed to advertise route: %s" , err )
}
2025-07-05 23:30:47 +02:00
// Wait for route state changes to propagate
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
// These route should auto approve, so the node is expected to have a route
// for all counts.
nodes , err := headscale . ListNodes ( )
assert . NoError ( c , err )
2025-04-30 08:54:04 +03:00
2025-08-06 08:37:02 +02:00
routerNode := MustFindNode ( routerUsernet1 . Hostname ( ) , nodes )
t . Logf ( "Initial auto-approval check - Router node %s: announced=%v, approved=%v, subnet=%v" ,
routerNode . GetName ( ) ,
routerNode . GetAvailableRoutes ( ) ,
routerNode . GetApprovedRoutes ( ) ,
routerNode . GetSubnetRoutes ( ) )
requireNodeRouteCountWithCollect ( c , routerNode , 1 , 1 , 1 )
} , 10 * time . Second , 500 * time . Millisecond , "Initial route auto-approval: Route should be approved via policy" )
2025-04-30 08:54:04 +03:00
2025-08-06 08:37:02 +02:00
// Verify that the routes have been sent to the client.
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
status , err := client . Status ( )
assert . NoError ( c , err )
2025-04-30 08:54:04 +03:00
2025-08-06 08:37:02 +02:00
// Debug output to understand peer visibility
t . Logf ( "Client %s sees %d peers" , client . Hostname ( ) , len ( status . Peers ( ) ) )
routerPeerFound := false
for _ , peerKey := range status . Peers ( ) {
peerStatus := status . Peer [ peerKey ]
if peerStatus . ID == routerUsernet1ID . StableID ( ) {
routerPeerFound = true
t . Logf ( "Client sees router peer %s (ID=%s): AllowedIPs=%v, PrimaryRoutes=%v" ,
peerStatus . HostName ,
peerStatus . ID ,
peerStatus . AllowedIPs ,
peerStatus . PrimaryRoutes )
assert . NotNil ( c , peerStatus . PrimaryRoutes )
if peerStatus . PrimaryRoutes != nil {
assert . Contains ( c , peerStatus . PrimaryRoutes . AsSlice ( ) , * route )
}
requirePeerSubnetRoutesWithCollect ( c , peerStatus , [ ] netip . Prefix { * route } )
} else {
requirePeerSubnetRoutesWithCollect ( c , peerStatus , nil )
}
2025-04-30 08:54:04 +03:00
}
2025-08-06 08:37:02 +02:00
assert . True ( c , routerPeerFound , "Client should see the router peer" )
} , 5 * time . Second , 200 * time . Millisecond , "Verifying routes sent to client after auto-approval" )
2025-04-30 08:54:04 +03:00
url := fmt . Sprintf ( "http://%s/etc/hostname" , webip )
t . Logf ( "url from %s to %s" , client . Hostname ( ) , url )
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
result , err := client . Curl ( url )
assert . NoError ( c , err )
assert . Len ( c , result , 13 )
} , 5 * time . Second , 200 * time . Millisecond , "Verifying client can reach webservice through auto-approved route" )
2025-04-30 08:54:04 +03:00
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
tr , err := client . Traceroute ( webip )
assert . NoError ( c , err )
ip , err := routerUsernet1 . IPv4 ( )
if ! assert . NoError ( c , err , "failed to get IPv4 for routerUsernet1" ) {
return
}
assertTracerouteViaIPWithCollect ( c , tr , ip )
} , 5 * time . Second , 200 * time . Millisecond , "Verifying traceroute goes through auto-approved router" )
2025-04-30 08:54:04 +03:00
// Remove the auto approval from the policy, any routes already enabled should be allowed.
2025-05-20 13:57:26 +02:00
prefix = * route
delete ( tt . pol . AutoApprovers . Routes , prefix )
2025-04-30 08:54:04 +03:00
err = headscale . SetPolicy ( tt . pol )
require . NoError ( t , err )
2025-08-06 08:37:02 +02:00
t . Logf ( "Policy updated: removed auto-approver for route %s" , prefix )
2025-04-30 08:54:04 +03:00
2025-07-05 23:30:47 +02:00
// Wait for route state changes to propagate
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
2025-08-06 08:37:02 +02:00
// Routes already approved should remain approved even after policy change
2025-07-05 23:30:47 +02:00
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
2025-04-30 08:54:04 +03:00
2025-08-06 08:37:02 +02:00
routerNode := MustFindNode ( routerUsernet1 . Hostname ( ) , nodes )
t . Logf ( "After policy removal - Router node %s: announced=%v, approved=%v, subnet=%v" ,
routerNode . GetName ( ) ,
routerNode . GetAvailableRoutes ( ) ,
routerNode . GetApprovedRoutes ( ) ,
routerNode . GetSubnetRoutes ( ) )
2025-04-30 08:54:04 +03:00
2025-08-06 08:37:02 +02:00
requireNodeRouteCountWithCollect ( c , routerNode , 1 , 1 , 1 )
} , 10 * time . Second , 500 * time . Millisecond , "Routes should remain approved after auto-approver removal" )
// Verify that the routes have been sent to the client.
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
status , err := client . Status ( )
assert . NoError ( c , err )
2025-04-30 08:54:04 +03:00
2025-08-06 08:37:02 +02:00
for _ , peerKey := range status . Peers ( ) {
peerStatus := status . Peer [ peerKey ]
if peerStatus . ID == routerUsernet1ID . StableID ( ) {
assert . NotNil ( c , peerStatus . PrimaryRoutes )
if peerStatus . PrimaryRoutes != nil {
assert . Contains ( c , peerStatus . PrimaryRoutes . AsSlice ( ) , * route )
}
requirePeerSubnetRoutesWithCollect ( c , peerStatus , [ ] netip . Prefix { * route } )
} else {
requirePeerSubnetRoutesWithCollect ( c , peerStatus , nil )
}
2025-04-30 08:54:04 +03:00
}
2025-08-06 08:37:02 +02:00
} , 5 * time . Second , 200 * time . Millisecond , "Verifying routes remain after policy change" )
2025-04-30 08:54:04 +03:00
url = fmt . Sprintf ( "http://%s/etc/hostname" , webip )
t . Logf ( "url from %s to %s" , client . Hostname ( ) , url )
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
result , err := client . Curl ( url )
assert . NoError ( c , err )
assert . Len ( c , result , 13 )
} , 5 * time . Second , 200 * time . Millisecond , "Verifying client can still reach webservice after policy change" )
2025-04-30 08:54:04 +03:00
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
tr , err := client . Traceroute ( webip )
assert . NoError ( c , err )
ip , err := routerUsernet1 . IPv4 ( )
if ! assert . NoError ( c , err , "failed to get IPv4 for routerUsernet1" ) {
return
}
assertTracerouteViaIPWithCollect ( c , tr , ip )
} , 5 * time . Second , 200 * time . Millisecond , "Verifying traceroute still goes through router after policy change" )
2025-04-30 08:54:04 +03:00
// Disable the route, making it unavailable since it is no longer auto-approved
_ , err = headscale . ApproveRoutes (
MustFindNode ( routerUsernet1 . Hostname ( ) , nodes ) . GetId ( ) ,
[ ] netip . Prefix { } ,
)
require . NoError ( t , err )
2025-07-05 23:30:47 +02:00
// Wait for route state changes to propagate
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
// These route should auto approve, so the node is expected to have a route
// for all counts.
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
requireNodeRouteCountWithCollect ( c , MustFindNode ( routerUsernet1 . Hostname ( ) , nodes ) , 1 , 0 , 0 )
} , 10 * time . Second , 500 * time . Millisecond , "route state changes should propagate" )
2025-04-30 08:54:04 +03:00
// Verify that the routes have been sent to the client.
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
status , err := client . Status ( )
assert . NoError ( c , err )
2025-04-30 08:54:04 +03:00
2025-08-06 08:37:02 +02:00
for _ , peerKey := range status . Peers ( ) {
peerStatus := status . Peer [ peerKey ]
requirePeerSubnetRoutesWithCollect ( c , peerStatus , nil )
}
} , 5 * time . Second , 200 * time . Millisecond , "Verifying routes disabled after route removal" )
2025-04-30 08:54:04 +03:00
// Add the route back to the auto approver in the policy, the route should
// now become available again.
2025-05-20 13:57:26 +02:00
var newApprovers policyv2 . AutoApprovers
switch {
case strings . HasPrefix ( tt . approver , "tag:" ) :
newApprovers = append ( newApprovers , tagApprover ( tt . approver ) )
case strings . HasPrefix ( tt . approver , "group:" ) :
newApprovers = append ( newApprovers , groupApprover ( tt . approver ) )
default :
newApprovers = append ( newApprovers , usernameApprover ( tt . approver ) )
}
if tt . pol . AutoApprovers . Routes == nil {
tt . pol . AutoApprovers . Routes = make ( map [ netip . Prefix ] policyv2 . AutoApprovers )
}
prefix = * route
tt . pol . AutoApprovers . Routes [ prefix ] = newApprovers
2025-04-30 08:54:04 +03:00
err = headscale . SetPolicy ( tt . pol )
require . NoError ( t , err )
2025-07-05 23:30:47 +02:00
// Wait for route state changes to propagate
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
// These route should auto approve, so the node is expected to have a route
// for all counts.
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
requireNodeRouteCountWithCollect ( c , MustFindNode ( routerUsernet1 . Hostname ( ) , nodes ) , 1 , 1 , 1 )
} , 10 * time . Second , 500 * time . Millisecond , "route state changes should propagate" )
2025-04-30 08:54:04 +03:00
// Verify that the routes have been sent to the client.
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
status , err := client . Status ( )
assert . NoError ( c , err )
2025-04-30 08:54:04 +03:00
2025-08-06 08:37:02 +02:00
for _ , peerKey := range status . Peers ( ) {
peerStatus := status . Peer [ peerKey ]
if peerStatus . ID == routerUsernet1ID . StableID ( ) {
assert . NotNil ( c , peerStatus . PrimaryRoutes )
if peerStatus . PrimaryRoutes != nil {
assert . Contains ( c , peerStatus . PrimaryRoutes . AsSlice ( ) , * route )
}
requirePeerSubnetRoutesWithCollect ( c , peerStatus , [ ] netip . Prefix { * route } )
} else {
requirePeerSubnetRoutesWithCollect ( c , peerStatus , nil )
}
2025-04-30 08:54:04 +03:00
}
2025-08-06 08:37:02 +02:00
} , 5 * time . Second , 200 * time . Millisecond , "Verifying routes re-enabled after policy re-approval" )
2025-04-30 08:54:04 +03:00
url = fmt . Sprintf ( "http://%s/etc/hostname" , webip )
t . Logf ( "url from %s to %s" , client . Hostname ( ) , url )
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
result , err := client . Curl ( url )
assert . NoError ( c , err )
assert . Len ( c , result , 13 )
} , 5 * time . Second , 200 * time . Millisecond , "Verifying client can reach webservice after route re-approval" )
2025-04-30 08:54:04 +03:00
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
tr , err := client . Traceroute ( webip )
assert . NoError ( c , err )
ip , err := routerUsernet1 . IPv4 ( )
if ! assert . NoError ( c , err , "failed to get IPv4 for routerUsernet1" ) {
return
}
assertTracerouteViaIPWithCollect ( c , tr , ip )
} , 5 * time . Second , 200 * time . Millisecond , "Verifying traceroute goes through router after re-approval" )
2025-04-30 08:54:04 +03:00
// Advertise and validate a subnet of an auto approved route, /24 inside the
// auto approved /16.
command := [ ] string {
"tailscale" ,
"set" ,
"--advertise-routes=" + subRoute . String ( ) ,
}
_ , _ , err = routerSubRoute . Execute ( command )
require . NoErrorf ( t , err , "failed to advertise route: %s" , err )
2025-07-05 23:30:47 +02:00
// Wait for route state changes to propagate
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
// These route should auto approve, so the node is expected to have a route
// for all counts.
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
requireNodeRouteCountWithCollect ( c , MustFindNode ( routerUsernet1 . Hostname ( ) , nodes ) , 1 , 1 , 1 )
2025-08-06 08:37:02 +02:00
requireNodeRouteCountWithCollect ( c , nodes [ 1 ] , 1 , 1 , 1 )
2025-07-05 23:30:47 +02:00
} , 10 * time . Second , 500 * time . Millisecond , "route state changes should propagate" )
2025-04-30 08:54:04 +03:00
// Verify that the routes have been sent to the client.
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
status , err := client . Status ( )
assert . NoError ( c , err )
2025-04-30 08:54:04 +03:00
2025-08-06 08:37:02 +02:00
for _ , peerKey := range status . Peers ( ) {
peerStatus := status . Peer [ peerKey ]
if peerStatus . ID == routerUsernet1ID . StableID ( ) {
if peerStatus . PrimaryRoutes != nil {
assert . Contains ( c , peerStatus . PrimaryRoutes . AsSlice ( ) , * route )
}
requirePeerSubnetRoutesWithCollect ( c , peerStatus , [ ] netip . Prefix { * route } )
} else if peerStatus . ID == "2" {
if peerStatus . PrimaryRoutes != nil {
assert . Contains ( c , peerStatus . PrimaryRoutes . AsSlice ( ) , subRoute )
}
requirePeerSubnetRoutesWithCollect ( c , peerStatus , [ ] netip . Prefix { subRoute } )
} else {
requirePeerSubnetRoutesWithCollect ( c , peerStatus , nil )
}
2025-04-30 08:54:04 +03:00
}
2025-08-06 08:37:02 +02:00
} , 5 * time . Second , 200 * time . Millisecond , "Verifying sub-route propagated to client" )
2025-04-30 08:54:04 +03:00
// Advertise a not approved route will not end up anywhere
command = [ ] string {
"tailscale" ,
"set" ,
"--advertise-routes=" + notApprovedRoute . String ( ) ,
}
_ , _ , err = routerSubRoute . Execute ( command )
require . NoErrorf ( t , err , "failed to advertise route: %s" , err )
2025-07-05 23:30:47 +02:00
// Wait for route state changes to propagate
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
// These route should auto approve, so the node is expected to have a route
// for all counts.
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
requireNodeRouteCountWithCollect ( c , MustFindNode ( routerUsernet1 . Hostname ( ) , nodes ) , 1 , 1 , 1 )
2025-08-06 08:37:02 +02:00
requireNodeRouteCountWithCollect ( c , nodes [ 1 ] , 1 , 1 , 0 )
requireNodeRouteCountWithCollect ( c , nodes [ 2 ] , 0 , 0 , 0 )
2025-07-05 23:30:47 +02:00
} , 10 * time . Second , 500 * time . Millisecond , "route state changes should propagate" )
2025-04-30 08:54:04 +03:00
// Verify that the routes have been sent to the client.
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
status , err := client . Status ( )
assert . NoError ( c , err )
2025-04-30 08:54:04 +03:00
2025-08-06 08:37:02 +02:00
for _ , peerKey := range status . Peers ( ) {
peerStatus := status . Peer [ peerKey ]
if peerStatus . ID == routerUsernet1ID . StableID ( ) {
assert . NotNil ( c , peerStatus . PrimaryRoutes )
if peerStatus . PrimaryRoutes != nil {
assert . Contains ( c , peerStatus . PrimaryRoutes . AsSlice ( ) , * route )
}
requirePeerSubnetRoutesWithCollect ( c , peerStatus , [ ] netip . Prefix { * route } )
} else {
requirePeerSubnetRoutesWithCollect ( c , peerStatus , nil )
}
2025-04-30 08:54:04 +03:00
}
2025-08-06 08:37:02 +02:00
} , 5 * time . Second , 200 * time . Millisecond , "Verifying unapproved route not propagated" )
2025-04-30 08:54:04 +03:00
// Exit routes are also automatically approved
command = [ ] string {
"tailscale" ,
"set" ,
"--advertise-exit-node" ,
}
_ , _ , err = routerExitNode . Execute ( command )
require . NoErrorf ( t , err , "failed to advertise route: %s" , err )
2025-07-05 23:30:47 +02:00
// Wait for route state changes to propagate
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
nodes , err = headscale . ListNodes ( )
assert . NoError ( c , err )
requireNodeRouteCountWithCollect ( c , MustFindNode ( routerUsernet1 . Hostname ( ) , nodes ) , 1 , 1 , 1 )
requireNodeRouteCountWithCollect ( c , nodes [ 1 ] , 1 , 1 , 0 )
requireNodeRouteCountWithCollect ( c , nodes [ 2 ] , 2 , 2 , 2 )
} , 10 * time . Second , 500 * time . Millisecond , "route state changes should propagate" )
2025-04-30 08:54:04 +03:00
// Verify that the routes have been sent to the client.
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
status , err := client . Status ( )
assert . NoError ( c , err )
2025-04-30 08:54:04 +03:00
2025-08-06 08:37:02 +02:00
for _ , peerKey := range status . Peers ( ) {
peerStatus := status . Peer [ peerKey ]
if peerStatus . ID == routerUsernet1ID . StableID ( ) {
if peerStatus . PrimaryRoutes != nil {
assert . Contains ( c , peerStatus . PrimaryRoutes . AsSlice ( ) , * route )
}
requirePeerSubnetRoutesWithCollect ( c , peerStatus , [ ] netip . Prefix { * route } )
} else if peerStatus . ID == "3" {
requirePeerSubnetRoutesWithCollect ( c , peerStatus , [ ] netip . Prefix { tsaddr . AllIPv4 ( ) , tsaddr . AllIPv6 ( ) } )
} else {
requirePeerSubnetRoutesWithCollect ( c , peerStatus , nil )
}
2025-04-30 08:54:04 +03:00
}
2025-08-06 08:37:02 +02:00
} , 5 * time . Second , 200 * time . Millisecond , "Verifying exit node routes propagated to client" )
2025-04-30 08:54:04 +03:00
} )
}
2025-03-31 15:55:07 +02:00
}
}
}
2025-03-21 11:49:32 +01:00
func assertTracerouteViaIP ( t * testing . T , tr util . Traceroute , ip netip . Addr ) {
t . Helper ( )
require . NotNil ( t , tr )
require . True ( t , tr . Success )
require . NoError ( t , tr . Err )
require . NotEmpty ( t , tr . Route )
require . Equal ( t , tr . Route [ 0 ] . IP , ip )
}
2025-08-06 08:37:02 +02:00
// assertTracerouteViaIPWithCollect is a version of assertTracerouteViaIP that works with assert.CollectT.
2025-07-05 23:30:47 +02:00
func assertTracerouteViaIPWithCollect ( c * assert . CollectT , tr util . Traceroute , ip netip . Addr ) {
assert . NotNil ( c , tr )
assert . True ( c , tr . Success )
assert . NoError ( c , tr . Err )
assert . NotEmpty ( c , tr . Route )
2025-08-06 08:37:02 +02:00
// Since we're inside EventuallyWithT, we can't use require.Greater with t
// but assert.NotEmpty above ensures len(tr.Route) > 0
if len ( tr . Route ) > 0 {
assert . Equal ( c , tr . Route [ 0 ] . IP . String ( ) , ip . String ( ) )
}
2025-07-05 23:30:47 +02:00
}
2025-03-21 11:49:32 +01:00
// requirePeerSubnetRoutes asserts that the peer has the expected subnet routes.
func requirePeerSubnetRoutes ( t * testing . T , status * ipnstate . PeerStatus , expected [ ] netip . Prefix ) {
2025-02-26 07:22:55 -08:00
t . Helper ( )
if status . AllowedIPs . Len ( ) <= 2 && len ( expected ) != 0 {
2025-03-21 11:49:32 +01:00
t . Fatalf ( "peer %s (%s) has no subnet routes, expected %v" , status . HostName , status . ID , expected )
2025-02-26 07:22:55 -08:00
return
}
if len ( expected ) == 0 {
expected = [ ] netip . Prefix { }
}
2025-03-21 11:49:32 +01:00
got := slicesx . Filter ( nil , status . AllowedIPs . AsSlice ( ) , func ( p netip . Prefix ) bool {
if tsaddr . IsExitRoute ( p ) {
return true
}
return ! slices . ContainsFunc ( status . TailscaleIPs , p . Contains )
} )
2025-02-26 07:22:55 -08:00
2025-05-04 22:52:47 +03:00
if diff := cmpdiff . Diff ( expected , got , util . PrefixComparer , cmpopts . EquateEmpty ( ) ) ; diff != "" {
2025-03-21 11:49:32 +01:00
t . Fatalf ( "peer %s (%s) subnet routes, unexpected result (-want +got):\n%s" , status . HostName , status . ID , diff )
2025-02-26 07:22:55 -08:00
}
}
2025-08-06 08:37:02 +02:00
func SortPeerStatus ( a , b * ipnstate . PeerStatus ) int {
return cmp . Compare ( a . ID , b . ID )
}
func printCurrentRouteMap ( t * testing . T , routers ... * ipnstate . PeerStatus ) {
t . Logf ( "== Current routing map ==" )
slices . SortFunc ( routers , SortPeerStatus )
for _ , router := range routers {
got := filterNonRoutes ( router )
t . Logf ( " Router %s (%s) is serving:" , router . HostName , router . ID )
t . Logf ( " AllowedIPs: %v" , got )
if router . PrimaryRoutes != nil {
t . Logf ( " PrimaryRoutes: %v" , router . PrimaryRoutes . AsSlice ( ) )
}
}
}
// filterNonRoutes returns the list of routes that a [ipnstate.PeerStatus] is serving.
func filterNonRoutes ( status * ipnstate . PeerStatus ) [ ] netip . Prefix {
return slicesx . Filter ( nil , status . AllowedIPs . AsSlice ( ) , func ( p netip . Prefix ) bool {
if tsaddr . IsExitRoute ( p ) {
return true
}
return ! slices . ContainsFunc ( status . TailscaleIPs , p . Contains )
} )
}
2025-07-05 23:30:47 +02:00
func requirePeerSubnetRoutesWithCollect ( c * assert . CollectT , status * ipnstate . PeerStatus , expected [ ] netip . Prefix ) {
if status . AllowedIPs . Len ( ) <= 2 && len ( expected ) != 0 {
assert . Fail ( c , fmt . Sprintf ( "peer %s (%s) has no subnet routes, expected %v" , status . HostName , status . ID , expected ) )
return
}
if len ( expected ) == 0 {
expected = [ ] netip . Prefix { }
}
2025-08-06 08:37:02 +02:00
got := filterNonRoutes ( status )
2025-07-05 23:30:47 +02:00
if diff := cmpdiff . Diff ( expected , got , util . PrefixComparer , cmpopts . EquateEmpty ( ) ) ; diff != "" {
assert . Fail ( c , fmt . Sprintf ( "peer %s (%s) subnet routes, unexpected result (-want +got):\n%s" , status . HostName , status . ID , diff ) )
}
}
2025-04-30 08:54:04 +03:00
func requireNodeRouteCount ( t * testing . T , node * v1 . Node , announced , approved , subnet int ) {
2025-02-26 07:22:55 -08:00
t . Helper ( )
2025-04-30 08:54:04 +03:00
require . Lenf ( t , node . GetAvailableRoutes ( ) , announced , "expected %q announced routes(%v) to have %d route, had %d" , node . GetName ( ) , node . GetAvailableRoutes ( ) , announced , len ( node . GetAvailableRoutes ( ) ) )
require . Lenf ( t , node . GetApprovedRoutes ( ) , approved , "expected %q approved routes(%v) to have %d route, had %d" , node . GetName ( ) , node . GetApprovedRoutes ( ) , approved , len ( node . GetApprovedRoutes ( ) ) )
require . Lenf ( t , node . GetSubnetRoutes ( ) , subnet , "expected %q subnet routes(%v) to have %d route, had %d" , node . GetName ( ) , node . GetSubnetRoutes ( ) , subnet , len ( node . GetSubnetRoutes ( ) ) )
2025-02-26 07:22:55 -08:00
}
2025-05-04 22:52:47 +03:00
2025-07-05 23:30:47 +02:00
func requireNodeRouteCountWithCollect ( c * assert . CollectT , node * v1 . Node , announced , approved , subnet int ) {
assert . Lenf ( c , node . GetAvailableRoutes ( ) , announced , "expected %q announced routes(%v) to have %d route, had %d" , node . GetName ( ) , node . GetAvailableRoutes ( ) , announced , len ( node . GetAvailableRoutes ( ) ) )
assert . Lenf ( c , node . GetApprovedRoutes ( ) , approved , "expected %q approved routes(%v) to have %d route, had %d" , node . GetName ( ) , node . GetApprovedRoutes ( ) , approved , len ( node . GetApprovedRoutes ( ) ) )
assert . Lenf ( c , node . GetSubnetRoutes ( ) , subnet , "expected %q subnet routes(%v) to have %d route, had %d" , node . GetName ( ) , node . GetSubnetRoutes ( ) , subnet , len ( node . GetSubnetRoutes ( ) ) )
}
2025-05-04 22:52:47 +03:00
// TestSubnetRouteACLFiltering tests that a node can only access subnet routes
// that are explicitly allowed in the ACL.
func TestSubnetRouteACLFiltering ( t * testing . T ) {
IntegrationSkip ( t )
// Use router and node users for better clarity
routerUser := "router"
nodeUser := "node"
spec := ScenarioSpec {
NodesPerUser : 1 ,
Users : [ ] string { routerUser , nodeUser } ,
Networks : map [ string ] [ ] string {
"usernet1" : { routerUser , nodeUser } ,
} ,
ExtraService : map [ string ] [ ] extraServiceFunc {
"usernet1" : { Webservice } ,
} ,
// We build the head image with curl and traceroute, so only use
// that for this test.
Versions : [ ] string { "head" } ,
}
scenario , err := NewScenario ( spec )
require . NoErrorf ( t , err , "failed to create scenario: %s" , err )
defer scenario . ShutdownAssertNoPanics ( t )
// Set up the ACL policy that allows the node to access only one of the subnet routes (10.10.10.0/24)
2025-07-10 23:38:55 +02:00
aclPolicyStr := ` {
2025-05-04 22:52:47 +03:00
"hosts" : {
"router" : "100.64.0.1/32" ,
"node" : "100.64.0.2/32"
} ,
"acls" : [
{
"action" : "accept" ,
"src" : [
"*"
] ,
"dst" : [
"router:8000"
]
} ,
{
"action" : "accept" ,
"src" : [
"node"
] ,
2025-05-20 13:57:26 +02:00
"dst" : [
"*:*"
]
2025-05-04 22:52:47 +03:00
}
]
2025-07-10 23:38:55 +02:00
} `
2025-05-04 22:52:47 +03:00
route , err := scenario . SubnetOfNetwork ( "usernet1" )
require . NoError ( t , err )
services , err := scenario . Services ( "usernet1" )
require . NoError ( t , err )
require . Len ( t , services , 1 )
usernet1 , err := scenario . Network ( "usernet1" )
require . NoError ( t , err )
web := services [ 0 ]
webip := netip . MustParseAddr ( web . GetIPInNetwork ( usernet1 ) )
weburl := fmt . Sprintf ( "http://%s/etc/hostname" , webip )
t . Logf ( "webservice: %s, %s" , webip . String ( ) , weburl )
2025-05-20 13:57:26 +02:00
aclPolicy := & policyv2 . Policy { }
2025-05-04 22:52:47 +03:00
err = json . Unmarshal ( [ ] byte ( aclPolicyStr ) , aclPolicy )
require . NoError ( t , err )
err = scenario . CreateHeadscaleEnv ( [ ] tsic . Option {
tsic . WithAcceptRoutes ( ) ,
} , hsic . WithTestName ( "routeaclfilter" ) ,
hsic . WithACLPolicy ( aclPolicy ) ,
hsic . WithPolicyMode ( types . PolicyModeDB ) ,
)
assertNoErrHeadscaleEnv ( t , err )
err = scenario . WaitForTailscaleSync ( )
assertNoErrSync ( t , err )
headscale , err := scenario . Headscale ( )
assertNoErrGetHeadscale ( t , err )
2025-08-06 08:37:02 +02:00
// Get the router and node clients by user
routerClients , err := scenario . ListTailscaleClients ( routerUser )
require . NoError ( t , err )
require . Len ( t , routerClients , 1 )
routerClient := routerClients [ 0 ]
2025-05-04 22:52:47 +03:00
2025-08-06 08:37:02 +02:00
nodeClients , err := scenario . ListTailscaleClients ( nodeUser )
require . NoError ( t , err )
require . Len ( t , nodeClients , 1 )
nodeClient := nodeClients [ 0 ]
routerIP , err := routerClient . IPv4 ( )
require . NoError ( t , err , "failed to get router IPv4" )
nodeIP , err := nodeClient . IPv4 ( )
require . NoError ( t , err , "failed to get node IPv4" )
2025-05-04 22:52:47 +03:00
2025-05-20 13:57:26 +02:00
aclPolicy . Hosts = policyv2 . Hosts {
2025-08-06 08:37:02 +02:00
policyv2 . Host ( routerUser ) : policyv2 . Prefix ( must . Get ( routerIP . Prefix ( 32 ) ) ) ,
policyv2 . Host ( nodeUser ) : policyv2 . Prefix ( must . Get ( nodeIP . Prefix ( 32 ) ) ) ,
2025-05-04 22:52:47 +03:00
}
2025-05-20 13:57:26 +02:00
aclPolicy . ACLs [ 1 ] . Destinations = [ ] policyv2 . AliasWithPorts {
aliasWithPorts ( prefixp ( route . String ( ) ) , tailcfg . PortRangeAny ) ,
2025-05-04 22:52:47 +03:00
}
require . NoError ( t , headscale . SetPolicy ( aclPolicy ) )
// Set up the subnet routes for the router
2025-05-20 13:57:26 +02:00
routes := [ ] netip . Prefix {
* route , // This should be accessible by the client
netip . MustParsePrefix ( "10.10.11.0/24" ) , // These should NOT be accessible
netip . MustParsePrefix ( "10.10.12.0/24" ) ,
2025-05-04 22:52:47 +03:00
}
2025-05-20 13:57:26 +02:00
routeArg := "--advertise-routes=" + routes [ 0 ] . String ( ) + "," + routes [ 1 ] . String ( ) + "," + routes [ 2 ] . String ( )
2025-05-04 22:52:47 +03:00
command := [ ] string {
"tailscale" ,
"set" ,
routeArg ,
}
_ , _ , err = routerClient . Execute ( command )
require . NoErrorf ( t , err , "failed to advertise routes: %s" , err )
err = scenario . WaitForTailscaleSync ( )
assertNoErrSync ( t , err )
2025-08-06 08:37:02 +02:00
var routerNode , nodeNode * v1 . Node
// Wait for route advertisements to propagate to NodeStore
assert . EventuallyWithT ( t , func ( ct * assert . CollectT ) {
// List nodes and verify the router has 3 available routes
nodes , err := headscale . NodesByUser ( )
assert . NoError ( ct , err )
assert . Len ( ct , nodes , 2 )
2025-05-04 22:52:47 +03:00
2025-08-06 08:37:02 +02:00
// Find the router node
routerNode = nodes [ routerUser ] [ 0 ]
nodeNode = nodes [ nodeUser ] [ 0 ]
2025-05-04 22:52:47 +03:00
2025-08-06 08:37:02 +02:00
assert . NotNil ( ct , routerNode , "Router node not found" )
assert . NotNil ( ct , nodeNode , "Client node not found" )
2025-05-04 22:52:47 +03:00
2025-08-06 08:37:02 +02:00
// Check that the router has 3 routes available but not approved yet
requireNodeRouteCountWithCollect ( ct , routerNode , 3 , 0 , 0 )
requireNodeRouteCountWithCollect ( ct , nodeNode , 0 , 0 , 0 )
} , 10 * time . Second , 100 * time . Millisecond , "route advertisements should propagate to router node" )
2025-05-04 22:52:47 +03:00
// Approve all routes for the router
_ , err = headscale . ApproveRoutes (
routerNode . GetId ( ) ,
util . MustStringsToPrefixes ( routerNode . GetAvailableRoutes ( ) ) ,
)
require . NoError ( t , err )
2025-07-05 23:30:47 +02:00
// Wait for route state changes to propagate
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
// List nodes and verify the router has 3 available routes
2025-08-06 08:37:02 +02:00
var err error
nodes , err := headscale . NodesByUser ( )
2025-07-05 23:30:47 +02:00
assert . NoError ( c , err )
assert . Len ( c , nodes , 2 )
2025-05-04 22:52:47 +03:00
2025-07-05 23:30:47 +02:00
// Find the router node
routerNode = nodes [ routerUser ] [ 0 ]
2025-05-04 22:52:47 +03:00
2025-07-05 23:30:47 +02:00
// Check that the router has 3 routes now approved and available
requireNodeRouteCountWithCollect ( c , routerNode , 3 , 3 , 3 )
} , 10 * time . Second , 500 * time . Millisecond , "route state changes should propagate" )
2025-05-04 22:52:47 +03:00
// Now check the client node status
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
nodeStatus , err := nodeClient . Status ( )
assert . NoError ( c , err )
2025-05-04 22:52:47 +03:00
2025-08-06 08:37:02 +02:00
routerStatus , err := routerClient . Status ( )
assert . NoError ( c , err )
2025-05-04 22:52:47 +03:00
2025-08-06 08:37:02 +02:00
// Check that the node can see the subnet routes from the router
routerPeerStatus := nodeStatus . Peer [ routerStatus . Self . PublicKey ]
2025-05-04 22:52:47 +03:00
2025-08-06 08:37:02 +02:00
// The node should only have 1 subnet route
requirePeerSubnetRoutesWithCollect ( c , routerPeerStatus , [ ] netip . Prefix { * route } )
} , 5 * time . Second , 200 * time . Millisecond , "Verifying node sees filtered subnet routes" )
2025-05-04 22:52:47 +03:00
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
result , err := nodeClient . Curl ( weburl )
assert . NoError ( c , err )
assert . Len ( c , result , 13 )
} , 5 * time . Second , 200 * time . Millisecond , "Verifying node can reach webservice through allowed route" )
2025-05-04 22:52:47 +03:00
2025-08-06 08:37:02 +02:00
assert . EventuallyWithT ( t , func ( c * assert . CollectT ) {
tr , err := nodeClient . Traceroute ( webip )
assert . NoError ( c , err )
ip , err := routerClient . IPv4 ( )
if ! assert . NoError ( c , err , "failed to get IPv4 for routerClient" ) {
return
}
assertTracerouteViaIPWithCollect ( c , tr , ip )
} , 5 * time . Second , 200 * time . Millisecond , "Verifying traceroute goes through router" )
2025-05-04 22:52:47 +03:00
}