mirror of
https://github.com/tailscale/tailscale.git
synced 2025-08-11 13:18:53 +00:00
various: implement stateful firewalling on Linux (#12025)
Updates https://github.com/tailscale/corp/issues/19623 Change-Id: I7980e1fb736e234e66fa000d488066466c96ec85 Signed-off-by: Andrew Dunham <andrew@du.nham.ca> Co-authored-by: Andrew Dunham <andrew@du.nham.ca>
This commit is contained in:
@@ -85,6 +85,15 @@ func (n *fakeIPTables) Delete(table, chain string, args ...string) error {
|
||||
}
|
||||
}
|
||||
|
||||
func (n *fakeIPTables) List(table, chain string) ([]string, error) {
|
||||
k := table + "/" + chain
|
||||
if rules, ok := n.n[k]; ok {
|
||||
return rules, nil
|
||||
} else {
|
||||
return nil, fmt.Errorf("unknown table/chain %s", k)
|
||||
}
|
||||
}
|
||||
|
||||
func (n *fakeIPTables) ClearChain(table, chain string) error {
|
||||
k := table + "/" + chain
|
||||
if _, ok := n.n[k]; ok {
|
||||
|
@@ -12,6 +12,7 @@ import (
|
||||
"net/netip"
|
||||
"os"
|
||||
"os/exec"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
@@ -36,6 +37,7 @@ type iptablesInterface interface {
|
||||
Append(table, chain string, args ...string) error
|
||||
Exists(table, chain string, args ...string) (bool, error)
|
||||
Delete(table, chain string, args ...string) error
|
||||
List(table, chain string) ([]string, error)
|
||||
ClearChain(table, chain string) error
|
||||
NewChain(table, chain string) error
|
||||
DeleteChain(table, chain string) error
|
||||
@@ -530,6 +532,67 @@ func (i *iptablesRunner) DelSNATRule() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func statefulRuleArgs(tunname string) []string {
|
||||
return []string{"-o", tunname, "-m", "conntrack", "!", "--ctstate", "ESTABLISHED,RELATED", "-j", "DROP"}
|
||||
}
|
||||
|
||||
// AddStatefulRule adds a netfilter rule for stateful packet filtering using
|
||||
// conntrack.
|
||||
func (i *iptablesRunner) AddStatefulRule(tunname string) error {
|
||||
// Drop packets that are destined for the tailscale interface if
|
||||
// they're a new connection, per conntrack, to prevent hosts on the
|
||||
// same subnet from being able to use this device as a way to forward
|
||||
// packets on to the Tailscale network.
|
||||
//
|
||||
// The conntrack states are:
|
||||
// NEW A packet which creates a new connection.
|
||||
// ESTABLISHED A packet which belongs to an existing connection
|
||||
// (i.e., a reply packet, or outgoing packet on a
|
||||
// connection which has seen replies).
|
||||
// RELATED A packet which is related to, but not part of, an
|
||||
// existing connection, such as an ICMP error.
|
||||
// INVALID A packet which could not be identified for some
|
||||
// reason: this includes running out of memory and ICMP
|
||||
// errors which don't correspond to any known
|
||||
// connection. Generally these packets should be
|
||||
// dropped.
|
||||
//
|
||||
// We drop NEW packets to prevent connections from coming "into"
|
||||
// Tailscale from other hosts on the same network segment; we drop
|
||||
// INVALID packets as well.
|
||||
args := statefulRuleArgs(tunname)
|
||||
for _, ipt := range i.getTables() {
|
||||
// First, find the final "accept" rule.
|
||||
rules, err := ipt.List("filter", "ts-forward")
|
||||
if err != nil {
|
||||
return fmt.Errorf("listing rules in filter/ts-forward: %w", err)
|
||||
}
|
||||
want := fmt.Sprintf("-A %s -o %s -j ACCEPT", "ts-forward", tunname)
|
||||
|
||||
pos := slices.Index(rules, want)
|
||||
if pos < 0 {
|
||||
return fmt.Errorf("couldn't find final ACCEPT rule in filter/ts-forward")
|
||||
}
|
||||
|
||||
if err := ipt.Insert("filter", "ts-forward", pos, args...); err != nil {
|
||||
return fmt.Errorf("adding %v in filter/ts-forward: %w", args, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DelStatefulRule removes the netfilter rule for stateful packet filtering
|
||||
// using conntrack.
|
||||
func (i *iptablesRunner) DelStatefulRule(tunname string) error {
|
||||
args := statefulRuleArgs(tunname)
|
||||
for _, ipt := range i.getTables() {
|
||||
if err := ipt.Delete("filter", "ts-forward", args...); err != nil {
|
||||
return fmt.Errorf("deleting %v in filter/ts-forward: %w", args, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// buildMagicsockPortRule generates the string slice containing the arguments
|
||||
// to describe a rule accepting traffic on a particular port to iptables. It is
|
||||
// separated out here to avoid repetition in AddMagicsockPortRule and
|
||||
|
@@ -514,6 +514,14 @@ type NetfilterRunner interface {
|
||||
// DelSNATRule removes the rule added by AddSNATRule.
|
||||
DelSNATRule() error
|
||||
|
||||
// AddStatefulRule adds a netfilter rule for stateful packet filtering
|
||||
// using conntrack.
|
||||
AddStatefulRule(tunname string) error
|
||||
|
||||
// DelStatefulRule removes a netfilter rule for stateful packet filtering
|
||||
// using conntrack.
|
||||
DelStatefulRule(tunname string) error
|
||||
|
||||
// HasIPV6 reports true if the system supports IPv6.
|
||||
HasIPV6() bool
|
||||
|
||||
@@ -1748,6 +1756,194 @@ func (n *nftablesRunner) DelSNATRule() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func nativeUint32(v uint32) []byte {
|
||||
b := make([]byte, 4)
|
||||
binary.NativeEndian.PutUint32(b, v)
|
||||
return b
|
||||
}
|
||||
|
||||
func makeStatefulRuleExprs(tunname string) []expr.Any {
|
||||
return []expr.Any{
|
||||
// Check if the output interface is the Tailscale interface by
|
||||
// first loding the OIFNAME into register 1 and comparing it
|
||||
// against our tunname.
|
||||
//
|
||||
// 'cmp' implicitly breaks from a rule if a comparison fails,
|
||||
// so if we continue past this rule we know that the packet is
|
||||
// going to our TUN.
|
||||
&expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1},
|
||||
&expr.Cmp{
|
||||
Op: expr.CmpOpNeq,
|
||||
Register: 1,
|
||||
Data: []byte(tunname),
|
||||
},
|
||||
|
||||
// Store the conntrack state in register 1
|
||||
&expr.Ct{
|
||||
Register: 1,
|
||||
Key: expr.CtKeySTATE,
|
||||
},
|
||||
// Mask the state in register 1 to "hide" the ESTABLISHED and
|
||||
// RELATED bits (which are expected and fine); if there are any
|
||||
// other bits, we want them to remain.
|
||||
//
|
||||
// This operation is, in the kernel:
|
||||
// dst[i] = (src[i] & mask[i]) ^ xor[i]
|
||||
//
|
||||
// So, we can mask by setting the inverse of the bits we want
|
||||
// to remove; i.e. ESTABLISHED = 0b00000010, RELATED =
|
||||
// 0b00000100, so, if we assume an 8-bit state (in reality,
|
||||
// it's 32-bit), we can mask with 0b11111001 to clear those
|
||||
// bits and keep everything else (e.g. the INVALID bit which is
|
||||
// 0b00000001).
|
||||
//
|
||||
// TODO(andrew-d): for now, let's also allow
|
||||
// CtStateBitUNTRACKED, which is a state for packets that are not
|
||||
// tracked (marked so explicitly with an iptables rule using
|
||||
// --notrack); we should figure out if we want to allow this or not.
|
||||
&expr.Bitwise{
|
||||
SourceRegister: 1,
|
||||
DestRegister: 1,
|
||||
Len: 4,
|
||||
Mask: nativeUint32(^(0 |
|
||||
expr.CtStateBitESTABLISHED |
|
||||
expr.CtStateBitRELATED |
|
||||
expr.CtStateBitUNTRACKED)),
|
||||
|
||||
// Xor is unused but must be specified
|
||||
Xor: nativeUint32(0),
|
||||
},
|
||||
// Compare against the expected state (0, i.e. no bits set
|
||||
// other than maybe ESTABLISHED and RELATED). We want this
|
||||
// comparison to fail if there are no bits set, so that this
|
||||
// rule's evaluation stops and we don't fall through to the
|
||||
// "Drop" verdict.
|
||||
//
|
||||
// For example, if the state is ESTABLISHED (and we want to
|
||||
// break from this rule/accept this packet):
|
||||
// state = ESTABLISHED
|
||||
// register1 = 0b0 (since the bitwise operation cleared the ESTABLISHED bit)
|
||||
//
|
||||
// compare register1 (0b0) != 0: false
|
||||
// -> comparison implicitly breaks
|
||||
// -> continue to the next rule
|
||||
//
|
||||
// For example, if the state is NEW (and we want to continue to
|
||||
// the next expression and thus drop this packet):
|
||||
// state = NEW
|
||||
// register1 = 0b1000
|
||||
//
|
||||
// compare register1 (0b1000) != 0: true
|
||||
// -> comparison continues to next expr
|
||||
&expr.Cmp{
|
||||
Op: expr.CmpOpNeq,
|
||||
Register: 1,
|
||||
Data: []byte{0, 0, 0, 0},
|
||||
},
|
||||
// If we get here, we know that this packet is going to our TUN
|
||||
// device, and has a conntrack state set other than ESTABLISHED
|
||||
// or RELATED. We thus count and drop the packet.
|
||||
&expr.Counter{},
|
||||
&expr.Verdict{Kind: expr.VerdictDrop},
|
||||
}
|
||||
|
||||
// TODO(andrew-d): iptables-nft writes a rule that dumps as:
|
||||
//
|
||||
// match name conntrack rev 3
|
||||
//
|
||||
// I think this is using expr.Match against the following struct
|
||||
// (xt_conntrack_mtinfo3):
|
||||
//
|
||||
// https://github.com/torvalds/linux/blob/master/include/uapi/linux/netfilter/xt_conntrack.h#L64-L77
|
||||
//
|
||||
// We could probably do something similar here, but I'm not sure if
|
||||
// there's any advantage. Below is an example Match statement if we
|
||||
// decide to do that, based on dumping the rule that iptables-nft
|
||||
// generates:
|
||||
//
|
||||
// _ = expr.Match{
|
||||
// Name: "conntrack",
|
||||
// Rev: 3,
|
||||
// Info: &xt.ConntrackMtinfo3{
|
||||
// ConntrackMtinfo2: xt.ConntrackMtinfo2{
|
||||
// ConntrackMtinfoBase: xt.ConntrackMtinfoBase{
|
||||
// MatchFlags: xt.ConntrackState,
|
||||
// InvertFlags: xt.ConntrackState,
|
||||
// },
|
||||
// // Mask the state to remove ESTABLISHED and
|
||||
// // RELATED before comparing.
|
||||
// StateMask: expr.CtStateBitESTABLISHED | expr.CtStateBitRELATED,
|
||||
// },
|
||||
// },
|
||||
// }
|
||||
}
|
||||
|
||||
// AddStatefulRule adds a netfilter rule for stateful packet filtering using
|
||||
// conntrack.
|
||||
func (n *nftablesRunner) AddStatefulRule(tunname string) error {
|
||||
conn := n.conn
|
||||
|
||||
exprs := makeStatefulRuleExprs(tunname)
|
||||
for _, table := range n.getTables() {
|
||||
chain, err := getChainFromTable(conn, table.Filter, chainNameForward)
|
||||
if err != nil {
|
||||
return fmt.Errorf("get forward chain: %w", err)
|
||||
}
|
||||
|
||||
// First, find the 'accept' rule that we want to insert our rule before.
|
||||
acceptRule := createAcceptOutgoingPacketRule(table.Filter, chain, tunname)
|
||||
rule, err := findRule(conn, acceptRule)
|
||||
if err != nil {
|
||||
return fmt.Errorf("find accept rule: %w", err)
|
||||
}
|
||||
|
||||
conn.InsertRule(&nftables.Rule{
|
||||
Table: table.Filter,
|
||||
Chain: chain,
|
||||
Exprs: exprs,
|
||||
|
||||
// Specifying Position in an Insert operation means to
|
||||
// insert this rule before the specified rule.
|
||||
Position: rule.Handle,
|
||||
})
|
||||
}
|
||||
|
||||
if err := conn.Flush(); err != nil {
|
||||
return fmt.Errorf("flush add stateful rule: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DelStatefulRule removes the netfilter rule for stateful packet filtering
|
||||
// using conntrack.
|
||||
func (n *nftablesRunner) DelStatefulRule(tunname string) error {
|
||||
conn := n.conn
|
||||
|
||||
exprs := makeStatefulRuleExprs(tunname)
|
||||
for _, table := range n.getTables() {
|
||||
chain, err := getChainFromTable(conn, table.Filter, chainNameForward)
|
||||
if err != nil {
|
||||
return fmt.Errorf("get forward chain: %w", err)
|
||||
}
|
||||
rule, err := findRule(conn, &nftables.Rule{
|
||||
Table: table.Nat,
|
||||
Chain: chain,
|
||||
Exprs: exprs,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("find stateful rule: %w", err)
|
||||
}
|
||||
|
||||
if rule != nil {
|
||||
conn.DelRule(rule)
|
||||
}
|
||||
}
|
||||
if err := conn.Flush(); err != nil {
|
||||
return fmt.Errorf("flush del stateful rule: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanupChain removes a jump rule from hookChainName to tsChainName, and then
|
||||
// the entire chain tsChainName. Errors are logged, but attempts to remove both
|
||||
// the jump rule and chain continue even if one errors.
|
||||
|
Reference in New Issue
Block a user