mirror of
https://github.com/tailscale/tailscale.git
synced 2024-11-29 21:15:39 +00:00
65888d95c9
This commit introduces a userspace program for managing an experimental eBPF XDP STUN server program. derp/xdp contains the eBPF pseudo-C along with a Go pkg for loading it and exporting its metrics. cmd/xdpderper is a package main user of derp/xdp. Updates tailscale/corp#20689 Signed-off-by: Jordan Whited <jordan@tailscale.com>
411 lines
15 KiB
C
411 lines
15 KiB
C
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
|
|
#ifndef __BPF_HELPERS__
|
|
#define __BPF_HELPERS__
|
|
|
|
/*
|
|
* Note that bpf programs need to include either
|
|
* vmlinux.h (auto-generated from BTF) or linux/types.h
|
|
* in advance since bpf_helper_defs.h uses such types
|
|
* as __u64.
|
|
*/
|
|
#include "bpf_helper_defs.h"
|
|
|
|
#define __uint(name, val) int (*name)[val]
|
|
#define __type(name, val) typeof(val) *name
|
|
#define __array(name, val) typeof(val) *name[]
|
|
#define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name
|
|
|
|
/*
|
|
* Helper macro to place programs, maps, license in
|
|
* different sections in elf_bpf file. Section names
|
|
* are interpreted by libbpf depending on the context (BPF programs, BPF maps,
|
|
* extern variables, etc).
|
|
* To allow use of SEC() with externs (e.g., for extern .maps declarations),
|
|
* make sure __attribute__((unused)) doesn't trigger compilation warning.
|
|
*/
|
|
#if __GNUC__ && !__clang__
|
|
|
|
/*
|
|
* Pragma macros are broken on GCC
|
|
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55578
|
|
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90400
|
|
*/
|
|
#define SEC(name) __attribute__((section(name), used))
|
|
|
|
#else
|
|
|
|
#define SEC(name) \
|
|
_Pragma("GCC diagnostic push") \
|
|
_Pragma("GCC diagnostic ignored \"-Wignored-attributes\"") \
|
|
__attribute__((section(name), used)) \
|
|
_Pragma("GCC diagnostic pop") \
|
|
|
|
#endif
|
|
|
|
/* Avoid 'linux/stddef.h' definition of '__always_inline'. */
|
|
#undef __always_inline
|
|
#define __always_inline inline __attribute__((always_inline))
|
|
|
|
#ifndef __noinline
|
|
#define __noinline __attribute__((noinline))
|
|
#endif
|
|
#ifndef __weak
|
|
#define __weak __attribute__((weak))
|
|
#endif
|
|
|
|
/*
|
|
* Use __hidden attribute to mark a non-static BPF subprogram effectively
|
|
* static for BPF verifier's verification algorithm purposes, allowing more
|
|
* extensive and permissive BPF verification process, taking into account
|
|
* subprogram's caller context.
|
|
*/
|
|
#define __hidden __attribute__((visibility("hidden")))
|
|
|
|
/* When utilizing vmlinux.h with BPF CO-RE, user BPF programs can't include
|
|
* any system-level headers (such as stddef.h, linux/version.h, etc), and
|
|
* commonly-used macros like NULL and KERNEL_VERSION aren't available through
|
|
* vmlinux.h. This just adds unnecessary hurdles and forces users to re-define
|
|
* them on their own. So as a convenience, provide such definitions here.
|
|
*/
|
|
#ifndef NULL
|
|
#define NULL ((void *)0)
|
|
#endif
|
|
|
|
#ifndef KERNEL_VERSION
|
|
#define KERNEL_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c)))
|
|
#endif
|
|
|
|
/*
|
|
* Helper macros to manipulate data structures
|
|
*/
|
|
|
|
/* offsetof() definition that uses __builtin_offset() might not preserve field
|
|
* offset CO-RE relocation properly, so force-redefine offsetof() using
|
|
* old-school approach which works with CO-RE correctly
|
|
*/
|
|
#undef offsetof
|
|
#define offsetof(type, member) ((unsigned long)&((type *)0)->member)
|
|
|
|
/* redefined container_of() to ensure we use the above offsetof() macro */
|
|
#undef container_of
|
|
#define container_of(ptr, type, member) \
|
|
({ \
|
|
void *__mptr = (void *)(ptr); \
|
|
((type *)(__mptr - offsetof(type, member))); \
|
|
})
|
|
|
|
/*
|
|
* Compiler (optimization) barrier.
|
|
*/
|
|
#ifndef barrier
|
|
#define barrier() asm volatile("" ::: "memory")
|
|
#endif
|
|
|
|
/* Variable-specific compiler (optimization) barrier. It's a no-op which makes
|
|
* compiler believe that there is some black box modification of a given
|
|
* variable and thus prevents compiler from making extra assumption about its
|
|
* value and potential simplifications and optimizations on this variable.
|
|
*
|
|
* E.g., compiler might often delay or even omit 32-bit to 64-bit casting of
|
|
* a variable, making some code patterns unverifiable. Putting barrier_var()
|
|
* in place will ensure that cast is performed before the barrier_var()
|
|
* invocation, because compiler has to pessimistically assume that embedded
|
|
* asm section might perform some extra operations on that variable.
|
|
*
|
|
* This is a variable-specific variant of more global barrier().
|
|
*/
|
|
#ifndef barrier_var
|
|
#define barrier_var(var) asm volatile("" : "+r"(var))
|
|
#endif
|
|
|
|
/*
|
|
* Helper macro to throw a compilation error if __bpf_unreachable() gets
|
|
* built into the resulting code. This works given BPF back end does not
|
|
* implement __builtin_trap(). This is useful to assert that certain paths
|
|
* of the program code are never used and hence eliminated by the compiler.
|
|
*
|
|
* For example, consider a switch statement that covers known cases used by
|
|
* the program. __bpf_unreachable() can then reside in the default case. If
|
|
* the program gets extended such that a case is not covered in the switch
|
|
* statement, then it will throw a build error due to the default case not
|
|
* being compiled out.
|
|
*/
|
|
#ifndef __bpf_unreachable
|
|
# define __bpf_unreachable() __builtin_trap()
|
|
#endif
|
|
|
|
/*
|
|
* Helper function to perform a tail call with a constant/immediate map slot.
|
|
*/
|
|
#if __clang_major__ >= 8 && defined(__bpf__)
|
|
static __always_inline void
|
|
bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
|
|
{
|
|
if (!__builtin_constant_p(slot))
|
|
__bpf_unreachable();
|
|
|
|
/*
|
|
* Provide a hard guarantee that LLVM won't optimize setting r2 (map
|
|
* pointer) and r3 (constant map index) from _different paths_ ending
|
|
* up at the _same_ call insn as otherwise we won't be able to use the
|
|
* jmpq/nopl retpoline-free patching by the x86-64 JIT in the kernel
|
|
* given they mismatch. See also d2e4c1e6c294 ("bpf: Constant map key
|
|
* tracking for prog array pokes") for details on verifier tracking.
|
|
*
|
|
* Note on clobber list: we need to stay in-line with BPF calling
|
|
* convention, so even if we don't end up using r0, r4, r5, we need
|
|
* to mark them as clobber so that LLVM doesn't end up using them
|
|
* before / after the call.
|
|
*/
|
|
asm volatile("r1 = %[ctx]\n\t"
|
|
"r2 = %[map]\n\t"
|
|
"r3 = %[slot]\n\t"
|
|
"call 12"
|
|
:: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot)
|
|
: "r0", "r1", "r2", "r3", "r4", "r5");
|
|
}
|
|
#endif
|
|
|
|
enum libbpf_pin_type {
|
|
LIBBPF_PIN_NONE,
|
|
/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
|
|
LIBBPF_PIN_BY_NAME,
|
|
};
|
|
|
|
enum libbpf_tristate {
|
|
TRI_NO = 0,
|
|
TRI_YES = 1,
|
|
TRI_MODULE = 2,
|
|
};
|
|
|
|
#define __kconfig __attribute__((section(".kconfig")))
|
|
#define __ksym __attribute__((section(".ksyms")))
|
|
#define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted")))
|
|
#define __kptr __attribute__((btf_type_tag("kptr")))
|
|
#define __percpu_kptr __attribute__((btf_type_tag("percpu_kptr")))
|
|
|
|
#define bpf_ksym_exists(sym) ({ \
|
|
_Static_assert(!__builtin_constant_p(!!sym), #sym " should be marked as __weak"); \
|
|
!!sym; \
|
|
})
|
|
|
|
#define __arg_ctx __attribute__((btf_decl_tag("arg:ctx")))
|
|
#define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull")))
|
|
#define __arg_nullable __attribute((btf_decl_tag("arg:nullable")))
|
|
#define __arg_trusted __attribute((btf_decl_tag("arg:trusted")))
|
|
#define __arg_arena __attribute((btf_decl_tag("arg:arena")))
|
|
|
|
#ifndef ___bpf_concat
|
|
#define ___bpf_concat(a, b) a ## b
|
|
#endif
|
|
#ifndef ___bpf_apply
|
|
#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
|
|
#endif
|
|
#ifndef ___bpf_nth
|
|
#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
|
|
#endif
|
|
#ifndef ___bpf_narg
|
|
#define ___bpf_narg(...) \
|
|
___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
|
|
#endif
|
|
|
|
#define ___bpf_fill0(arr, p, x) do {} while (0)
|
|
#define ___bpf_fill1(arr, p, x) arr[p] = x
|
|
#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
|
|
#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
|
|
#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
|
|
#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
|
|
#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
|
|
#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
|
|
#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
|
|
#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
|
|
#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
|
|
#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
|
|
#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
|
|
#define ___bpf_fill(arr, args...) \
|
|
___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)
|
|
|
|
/*
|
|
* BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
|
|
* in a structure.
|
|
*/
|
|
#define BPF_SEQ_PRINTF(seq, fmt, args...) \
|
|
({ \
|
|
static const char ___fmt[] = fmt; \
|
|
unsigned long long ___param[___bpf_narg(args)]; \
|
|
\
|
|
_Pragma("GCC diagnostic push") \
|
|
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
|
|
___bpf_fill(___param, args); \
|
|
_Pragma("GCC diagnostic pop") \
|
|
\
|
|
bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
|
|
___param, sizeof(___param)); \
|
|
})
|
|
|
|
/*
|
|
* BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of
|
|
* an array of u64.
|
|
*/
|
|
#define BPF_SNPRINTF(out, out_size, fmt, args...) \
|
|
({ \
|
|
static const char ___fmt[] = fmt; \
|
|
unsigned long long ___param[___bpf_narg(args)]; \
|
|
\
|
|
_Pragma("GCC diagnostic push") \
|
|
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
|
|
___bpf_fill(___param, args); \
|
|
_Pragma("GCC diagnostic pop") \
|
|
\
|
|
bpf_snprintf(out, out_size, ___fmt, \
|
|
___param, sizeof(___param)); \
|
|
})
|
|
|
|
#ifdef BPF_NO_GLOBAL_DATA
|
|
#define BPF_PRINTK_FMT_MOD
|
|
#else
|
|
#define BPF_PRINTK_FMT_MOD static const
|
|
#endif
|
|
|
|
#define __bpf_printk(fmt, ...) \
|
|
({ \
|
|
BPF_PRINTK_FMT_MOD char ____fmt[] = fmt; \
|
|
bpf_trace_printk(____fmt, sizeof(____fmt), \
|
|
##__VA_ARGS__); \
|
|
})
|
|
|
|
/*
|
|
* __bpf_vprintk wraps the bpf_trace_vprintk helper with variadic arguments
|
|
* instead of an array of u64.
|
|
*/
|
|
#define __bpf_vprintk(fmt, args...) \
|
|
({ \
|
|
static const char ___fmt[] = fmt; \
|
|
unsigned long long ___param[___bpf_narg(args)]; \
|
|
\
|
|
_Pragma("GCC diagnostic push") \
|
|
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
|
|
___bpf_fill(___param, args); \
|
|
_Pragma("GCC diagnostic pop") \
|
|
\
|
|
bpf_trace_vprintk(___fmt, sizeof(___fmt), \
|
|
___param, sizeof(___param)); \
|
|
})
|
|
|
|
/* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args
|
|
* Otherwise use __bpf_vprintk
|
|
*/
|
|
#define ___bpf_pick_printk(...) \
|
|
___bpf_nth(_, ##__VA_ARGS__, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \
|
|
__bpf_vprintk, __bpf_vprintk, __bpf_vprintk, __bpf_vprintk, \
|
|
__bpf_vprintk, __bpf_vprintk, __bpf_printk /*3*/, __bpf_printk /*2*/,\
|
|
__bpf_printk /*1*/, __bpf_printk /*0*/)
|
|
|
|
/* Helper macro to print out debug messages */
|
|
#define bpf_printk(fmt, args...) ___bpf_pick_printk(args)(fmt, ##args)
|
|
|
|
struct bpf_iter_num;
|
|
|
|
extern int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) __weak __ksym;
|
|
extern int *bpf_iter_num_next(struct bpf_iter_num *it) __weak __ksym;
|
|
extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __weak __ksym;
|
|
|
|
#ifndef bpf_for_each
|
|
/* bpf_for_each(iter_type, cur_elem, args...) provides generic construct for
|
|
* using BPF open-coded iterators without having to write mundane explicit
|
|
* low-level loop logic. Instead, it provides for()-like generic construct
|
|
* that can be used pretty naturally. E.g., for some hypothetical cgroup
|
|
* iterator, you'd write:
|
|
*
|
|
* struct cgroup *cg, *parent_cg = <...>;
|
|
*
|
|
* bpf_for_each(cgroup, cg, parent_cg, CG_ITER_CHILDREN) {
|
|
* bpf_printk("Child cgroup id = %d", cg->cgroup_id);
|
|
* if (cg->cgroup_id == 123)
|
|
* break;
|
|
* }
|
|
*
|
|
* I.e., it looks almost like high-level for each loop in other languages,
|
|
* supports continue/break, and is verifiable by BPF verifier.
|
|
*
|
|
* For iterating integers, the difference betwen bpf_for_each(num, i, N, M)
|
|
* and bpf_for(i, N, M) is in that bpf_for() provides additional proof to
|
|
* verifier that i is in [N, M) range, and in bpf_for_each() case i is `int
|
|
* *`, not just `int`. So for integers bpf_for() is more convenient.
|
|
*
|
|
* Note: this macro relies on C99 feature of allowing to declare variables
|
|
* inside for() loop, bound to for() loop lifetime. It also utilizes GCC
|
|
* extension: __attribute__((cleanup(<func>))), supported by both GCC and
|
|
* Clang.
|
|
*/
|
|
#define bpf_for_each(type, cur, args...) for ( \
|
|
/* initialize and define destructor */ \
|
|
struct bpf_iter_##type ___it __attribute__((aligned(8), /* enforce, just in case */, \
|
|
cleanup(bpf_iter_##type##_destroy))), \
|
|
/* ___p pointer is just to call bpf_iter_##type##_new() *once* to init ___it */ \
|
|
*___p __attribute__((unused)) = ( \
|
|
bpf_iter_##type##_new(&___it, ##args), \
|
|
/* this is a workaround for Clang bug: it currently doesn't emit BTF */ \
|
|
/* for bpf_iter_##type##_destroy() when used from cleanup() attribute */ \
|
|
(void)bpf_iter_##type##_destroy, (void *)0); \
|
|
/* iteration and termination check */ \
|
|
(((cur) = bpf_iter_##type##_next(&___it))); \
|
|
)
|
|
#endif /* bpf_for_each */
|
|
|
|
#ifndef bpf_for
|
|
/* bpf_for(i, start, end) implements a for()-like looping construct that sets
|
|
* provided integer variable *i* to values starting from *start* through,
|
|
* but not including, *end*. It also proves to BPF verifier that *i* belongs
|
|
* to range [start, end), so this can be used for accessing arrays without
|
|
* extra checks.
|
|
*
|
|
* Note: *start* and *end* are assumed to be expressions with no side effects
|
|
* and whose values do not change throughout bpf_for() loop execution. They do
|
|
* not have to be statically known or constant, though.
|
|
*
|
|
* Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for()
|
|
* loop bound variables and cleanup attribute, supported by GCC and Clang.
|
|
*/
|
|
#define bpf_for(i, start, end) for ( \
|
|
/* initialize and define destructor */ \
|
|
struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \
|
|
cleanup(bpf_iter_num_destroy))), \
|
|
/* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \
|
|
*___p __attribute__((unused)) = ( \
|
|
bpf_iter_num_new(&___it, (start), (end)), \
|
|
/* this is a workaround for Clang bug: it currently doesn't emit BTF */ \
|
|
/* for bpf_iter_num_destroy() when used from cleanup() attribute */ \
|
|
(void)bpf_iter_num_destroy, (void *)0); \
|
|
({ \
|
|
/* iteration step */ \
|
|
int *___t = bpf_iter_num_next(&___it); \
|
|
/* termination and bounds check */ \
|
|
(___t && ((i) = *___t, (i) >= (start) && (i) < (end))); \
|
|
}); \
|
|
)
|
|
#endif /* bpf_for */
|
|
|
|
#ifndef bpf_repeat
|
|
/* bpf_repeat(N) performs N iterations without exposing iteration number
|
|
*
|
|
* Note: similarly to bpf_for_each(), it relies on C99 feature of declaring for()
|
|
* loop bound variables and cleanup attribute, supported by GCC and Clang.
|
|
*/
|
|
#define bpf_repeat(N) for ( \
|
|
/* initialize and define destructor */ \
|
|
struct bpf_iter_num ___it __attribute__((aligned(8), /* enforce, just in case */ \
|
|
cleanup(bpf_iter_num_destroy))), \
|
|
/* ___p pointer is necessary to call bpf_iter_num_new() *once* to init ___it */ \
|
|
*___p __attribute__((unused)) = ( \
|
|
bpf_iter_num_new(&___it, 0, (N)), \
|
|
/* this is a workaround for Clang bug: it currently doesn't emit BTF */ \
|
|
/* for bpf_iter_num_destroy() when used from cleanup() attribute */ \
|
|
(void)bpf_iter_num_destroy, (void *)0); \
|
|
bpf_iter_num_next(&___it); \
|
|
/* nothing here */ \
|
|
)
|
|
#endif /* bpf_repeat */
|
|
|
|
#endif
|