mirror of
https://github.com/juanfont/headscale.git
synced 2025-08-22 14:07:38 +00:00
Compare commits
3 Commits
topic/dock
...
v0.23.0-al
Author | SHA1 | Date | |
---|---|---|---|
![]() |
c8ebbede54 | ||
![]() |
8185a70dc7 | ||
![]() |
2dc62e981e |
14
CHANGELOG.md
14
CHANGELOG.md
@@ -26,7 +26,7 @@ after improving the test harness as part of adopting [#1460](https://github.com/
|
||||
- Code reorganisation, a lot of code has moved, please review the following PRs accordingly [#1473](https://github.com/juanfont/headscale/pull/1473)
|
||||
- Change the structure of database configuration, see [config-example.yaml](./config-example.yaml) for the new structure. [#1700](https://github.com/juanfont/headscale/pull/1700)
|
||||
- Old structure has been remove and the configuration _must_ be converted.
|
||||
- Adds additional configuration for PostgreSQL for setting max open, idle conection and idle connection lifetime.
|
||||
- Adds additional configuration for PostgreSQL for setting max open, idle connection and idle connection lifetime.
|
||||
- API: Machine is now Node [#1553](https://github.com/juanfont/headscale/pull/1553)
|
||||
- Remove support for older Tailscale clients [#1611](https://github.com/juanfont/headscale/pull/1611)
|
||||
- The latest supported client is 1.38
|
||||
@@ -70,7 +70,7 @@ after improving the test harness as part of adopting [#1460](https://github.com/
|
||||
### Changes
|
||||
|
||||
- Add environment flags to enable pprof (profiling) [#1382](https://github.com/juanfont/headscale/pull/1382)
|
||||
- Profiles are continously generated in our integration tests.
|
||||
- Profiles are continuously generated in our integration tests.
|
||||
- Fix systemd service file location in `.deb` packages [#1391](https://github.com/juanfont/headscale/pull/1391)
|
||||
- Improvements on Noise implementation [#1379](https://github.com/juanfont/headscale/pull/1379)
|
||||
- Replace node filter logic, ensuring nodes with access can see eachother [#1381](https://github.com/juanfont/headscale/pull/1381)
|
||||
@@ -161,7 +161,7 @@ after improving the test harness as part of adopting [#1460](https://github.com/
|
||||
- SSH ACLs status:
|
||||
- Support `accept` and `check` (SSH can be enabled and used for connecting and authentication)
|
||||
- Rejecting connections **are not supported**, meaning that if you enable SSH, then assume that _all_ `ssh` connections **will be allowed**.
|
||||
- If you decied to try this feature, please carefully managed permissions by blocking port `22` with regular ACLs or do _not_ set `--ssh` on your clients.
|
||||
- If you decided to try this feature, please carefully managed permissions by blocking port `22` with regular ACLs or do _not_ set `--ssh` on your clients.
|
||||
- We are currently improving our testing of the SSH ACLs, help us get an overview by testing and giving feedback.
|
||||
- This feature should be considered dangerous and it is disabled by default. Enable by setting `HEADSCALE_EXPERIMENTAL_FEATURE_SSH=1`.
|
||||
|
||||
@@ -211,7 +211,7 @@ after improving the test harness as part of adopting [#1460](https://github.com/
|
||||
### Changes
|
||||
|
||||
- Updated dependencies (including the library that lacked armhf support) [#722](https://github.com/juanfont/headscale/pull/722)
|
||||
- Fix missing group expansion in function `excludeCorretlyTaggedNodes` [#563](https://github.com/juanfont/headscale/issues/563)
|
||||
- Fix missing group expansion in function `excludeCorrectlyTaggedNodes` [#563](https://github.com/juanfont/headscale/issues/563)
|
||||
- Improve registration protocol implementation and switch to NodeKey as main identifier [#725](https://github.com/juanfont/headscale/pull/725)
|
||||
- Add ability to connect to PostgreSQL via unix socket [#734](https://github.com/juanfont/headscale/pull/734)
|
||||
|
||||
@@ -231,7 +231,7 @@ after improving the test harness as part of adopting [#1460](https://github.com/
|
||||
- Fix send on closed channel crash in polling [#542](https://github.com/juanfont/headscale/pull/542)
|
||||
- Fixed spurious calls to setLastStateChangeToNow from ephemeral nodes [#566](https://github.com/juanfont/headscale/pull/566)
|
||||
- Add command for moving nodes between namespaces [#362](https://github.com/juanfont/headscale/issues/362)
|
||||
- Added more configuration parameters for OpenID Connect (scopes, free-form paramters, domain and user allowlist)
|
||||
- Added more configuration parameters for OpenID Connect (scopes, free-form parameters, domain and user allowlist)
|
||||
- Add command to set tags on a node [#525](https://github.com/juanfont/headscale/issues/525)
|
||||
- Add command to view tags of nodes [#356](https://github.com/juanfont/headscale/issues/356)
|
||||
- Add --all (-a) flag to enable routes command [#360](https://github.com/juanfont/headscale/issues/360)
|
||||
@@ -279,10 +279,10 @@ after improving the test harness as part of adopting [#1460](https://github.com/
|
||||
|
||||
- Fix a bug were the same IP could be assigned to multiple hosts if joined in quick succession [#346](https://github.com/juanfont/headscale/pull/346)
|
||||
- Simplify the code behind registration of machines [#366](https://github.com/juanfont/headscale/pull/366)
|
||||
- Nodes are now only written to database if they are registrated successfully
|
||||
- Nodes are now only written to database if they are registered successfully
|
||||
- Fix a limitation in the ACLs that prevented users to write rules with `*` as source [#374](https://github.com/juanfont/headscale/issues/374)
|
||||
- Reduce the overhead of marshal/unmarshal for Hostinfo, routes and endpoints by using specific types in Machine [#371](https://github.com/juanfont/headscale/pull/371)
|
||||
- Apply normalization function to FQDN on hostnames when hosts registers and retrieve informations [#363](https://github.com/juanfont/headscale/issues/363)
|
||||
- Apply normalization function to FQDN on hostnames when hosts registers and retrieve information [#363](https://github.com/juanfont/headscale/issues/363)
|
||||
- Fix a bug that prevented the use of `tailscale logout` with OIDC [#508](https://github.com/juanfont/headscale/issues/508)
|
||||
- Added Tailscale repo HEAD and unstable releases channel to the integration tests targets [#513](https://github.com/juanfont/headscale/pull/513)
|
||||
|
||||
|
@@ -1,21 +1,43 @@
|
||||
# This Dockerfile and the images produced are for testing headscale,
|
||||
# and are in no way endorsed by Headscale's maintainers as an
|
||||
# official nor supported release or distribution.
|
||||
# Copyright (c) Tailscale Inc & AUTHORS
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
FROM golang:latest
|
||||
# This Dockerfile is more or less lifted from tailscale/tailscale
|
||||
# to ensure a similar build process when testing the HEAD of tailscale.
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y dnsutils git iptables ssh ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
FROM golang:1.22-alpine AS build-env
|
||||
|
||||
RUN useradd --shell=/bin/bash --create-home ssh-it-user
|
||||
WORKDIR /go/src
|
||||
|
||||
RUN apk add --no-cache git
|
||||
|
||||
# Replace `RUN git...` with `COPY` and a local checked out version of Tailscale in `./tailscale`
|
||||
# to test specific commits of the Tailscale client. This is useful when trying to find out why
|
||||
# something specific broke between two versions of Tailscale with for example `git bisect`.
|
||||
# COPY ./tailscale .
|
||||
RUN git clone https://github.com/tailscale/tailscale.git
|
||||
|
||||
WORKDIR /go/tailscale
|
||||
WORKDIR /go/src/tailscale
|
||||
|
||||
RUN git checkout main \
|
||||
&& sh build_dist.sh tailscale.com/cmd/tailscale \
|
||||
&& sh build_dist.sh tailscale.com/cmd/tailscaled \
|
||||
&& cp tailscale /usr/local/bin/ \
|
||||
&& cp tailscaled /usr/local/bin/
|
||||
|
||||
# see build_docker.sh
|
||||
ARG VERSION_LONG=""
|
||||
ENV VERSION_LONG=$VERSION_LONG
|
||||
ARG VERSION_SHORT=""
|
||||
ENV VERSION_SHORT=$VERSION_SHORT
|
||||
ARG VERSION_GIT_HASH=""
|
||||
ENV VERSION_GIT_HASH=$VERSION_GIT_HASH
|
||||
ARG TARGETARCH
|
||||
|
||||
RUN GOARCH=$TARGETARCH go install -ldflags="\
|
||||
-X tailscale.com/version.longStamp=$VERSION_LONG \
|
||||
-X tailscale.com/version.shortStamp=$VERSION_SHORT \
|
||||
-X tailscale.com/version.gitCommitStamp=$VERSION_GIT_HASH" \
|
||||
-v ./cmd/tailscale ./cmd/tailscaled ./cmd/containerboot
|
||||
|
||||
FROM alpine:3.18
|
||||
RUN apk add --no-cache ca-certificates iptables iproute2 ip6tables curl
|
||||
|
||||
COPY --from=build-env /go/bin/* /usr/local/bin/
|
||||
# For compat with the previous run.sh, although ideally you should be
|
||||
# using build_docker.sh which sets an entrypoint for the image.
|
||||
RUN mkdir /tailscale && ln -s /usr/local/bin/containerboot /tailscale/run.sh
|
||||
|
@@ -99,7 +99,7 @@ Please read the [CONTRIBUTING.md](./CONTRIBUTING.md) file.
|
||||
|
||||
### Requirements
|
||||
|
||||
To contribute to headscale you would need the lastest version of [Go](https://golang.org)
|
||||
To contribute to headscale you would need the latest version of [Go](https://golang.org)
|
||||
and [Buf](https://buf.build)(Protobuf generator).
|
||||
|
||||
We recommend using [Nix](https://nixos.org/) to setup a development environment. This can
|
||||
|
@@ -105,7 +105,7 @@ derp:
|
||||
automatically_add_embedded_derp_region: true
|
||||
|
||||
# For better connection stability (especially when using an Exit-Node and DNS is not working),
|
||||
# it is possible to optionall add the public IPv4 and IPv6 address to the Derp-Map using:
|
||||
# it is possible to optionally add the public IPv4 and IPv6 address to the Derp-Map using:
|
||||
ipv4: 1.2.3.4
|
||||
ipv6: 2001:db8::1
|
||||
|
||||
@@ -199,7 +199,7 @@ log:
|
||||
format: text
|
||||
level: info
|
||||
|
||||
# Path to a file containg ACL policies.
|
||||
# Path to a file containing ACL policies.
|
||||
# ACLs can be defined as YAML or HUJSON.
|
||||
# https://tailscale.com/kb/1018/acls/
|
||||
acl_policy_path: ""
|
||||
|
@@ -14,7 +14,7 @@ If the node is already registered, it can advertise exit capabilities like this:
|
||||
$ sudo tailscale set --advertise-exit-node
|
||||
```
|
||||
|
||||
To use a node as an exit node, IP forwarding must be enabled on the node. Check the official [Tailscale documentation](https://tailscale.com/kb/1019/subnets/?tab=linux#enable-ip-forwarding) for how to enable IP fowarding.
|
||||
To use a node as an exit node, IP forwarding must be enabled on the node. Check the official [Tailscale documentation](https://tailscale.com/kb/1019/subnets/?tab=linux#enable-ip-forwarding) for how to enable IP forwarding.
|
||||
|
||||
## On the control server
|
||||
|
||||
|
@@ -36,7 +36,7 @@ We don't know. We might be working on it. If you want to help, please send us a
|
||||
Please be aware that there are a number of reasons why we might not accept specific contributions:
|
||||
|
||||
- It is not possible to implement the feature in a way that makes sense in a self-hosted environment.
|
||||
- Given that we are reverse-engineering Tailscale to satify our own curiosity, we might be interested in implementing the feature ourselves.
|
||||
- Given that we are reverse-engineering Tailscale to satisfy our own curiosity, we might be interested in implementing the feature ourselves.
|
||||
- You are not sending unit and integration tests with it.
|
||||
|
||||
## Do you support Y method of deploying Headscale?
|
||||
|
@@ -58,12 +58,12 @@ A solution could be to consider a headscale server (in it's entirety) as a
|
||||
tailnet.
|
||||
|
||||
For personal users the default behavior could either allow all communications
|
||||
between all namespaces (like tailscale) or dissallow all communications between
|
||||
between all namespaces (like tailscale) or disallow all communications between
|
||||
namespaces (current behavior).
|
||||
|
||||
For businesses and organisations, viewing a headscale instance a single tailnet
|
||||
would allow users (namespace) to talk to each other with the ACLs. As described
|
||||
in tailscale's documentation [[1]], a server should be tagged and personnal
|
||||
in tailscale's documentation [[1]], a server should be tagged and personal
|
||||
devices should be tied to a user. Translated in headscale's terms each user can
|
||||
have multiple devices and all those devices should be in the same namespace.
|
||||
The servers should be tagged and used as such.
|
||||
@@ -88,7 +88,7 @@ the ability to rules in either format (HuJSON or YAML).
|
||||
Let's build an example use case for a small business (It may be the place where
|
||||
ACL's are the most useful).
|
||||
|
||||
We have a small company with a boss, an admin, two developper and an intern.
|
||||
We have a small company with a boss, an admin, two developer and an intern.
|
||||
|
||||
The boss should have access to all servers but not to the users hosts. Admin
|
||||
should also have access to all hosts except that their permissions should be
|
||||
@@ -173,7 +173,7 @@ need to add the following ACLs
|
||||
"ports": ["prod:*", "dev:*", "internal:*"]
|
||||
},
|
||||
|
||||
// admin have access to adminstration port (lets only consider port 22 here)
|
||||
// admin have access to administration port (lets only consider port 22 here)
|
||||
{
|
||||
"action": "accept",
|
||||
"users": ["group:admin"],
|
||||
|
@@ -1,13 +1,13 @@
|
||||
# Controlling `headscale` with remote CLI
|
||||
|
||||
## Prerequisit
|
||||
## Prerequisite
|
||||
|
||||
- A workstation to run `headscale` (could be Linux, macOS, other supported platforms)
|
||||
- A `headscale` server (version `0.13.0` or newer)
|
||||
- Access to create API keys (local access to the `headscale` server)
|
||||
- `headscale` _must_ be served over TLS/HTTPS
|
||||
- Remote access does _not_ support unencrypted traffic.
|
||||
- Port `50443` must be open in the firewall (or port overriden by `grpc_listen_addr` option)
|
||||
- Port `50443` must be open in the firewall (or port overridden by `grpc_listen_addr` option)
|
||||
|
||||
## Goal
|
||||
|
||||
@@ -97,4 +97,4 @@ Checklist:
|
||||
- Make sure you use version `0.13.0` or newer.
|
||||
- Verify that your TLS certificate is valid and trusted
|
||||
- If you do not have access to a trusted certificate (e.g. from Let's Encrypt), add your self signed certificate to the trust store of your OS or
|
||||
- Set `HEADSCALE_CLI_INSECURE` to 0 in your environement
|
||||
- Set `HEADSCALE_CLI_INSECURE` to 0 in your environment
|
||||
|
@@ -115,7 +115,7 @@ The following Caddyfile is all that is necessary to use Caddy as a reverse proxy
|
||||
}
|
||||
```
|
||||
|
||||
Caddy v2 will [automatically](https://caddyserver.com/docs/automatic-https) provision a certficate for your domain/subdomain, force HTTPS, and proxy websockets - no further configuration is necessary.
|
||||
Caddy v2 will [automatically](https://caddyserver.com/docs/automatic-https) provision a certificate for your domain/subdomain, force HTTPS, and proxy websockets - no further configuration is necessary.
|
||||
|
||||
For a slightly more complex configuration which utilizes Docker containers to manage Caddy, Headscale, and Headscale-UI, [Guru Computing's guide](https://blog.gurucomputing.com.au/smart-vpns-with-headscale/) is an excellent reference.
|
||||
|
||||
|
@@ -30,7 +30,7 @@ describing how to make `headscale` run properly in a server environment.
|
||||
cd headscale
|
||||
|
||||
# optionally checkout a release
|
||||
# option a. you can find offical relase at https://github.com/juanfont/headscale/releases/latest
|
||||
# option a. you can find official release at https://github.com/juanfont/headscale/releases/latest
|
||||
# option b. get latest tag, this may be a beta release
|
||||
latestTag=$(git describe --tags `git rev-list --tags --max-count=1`)
|
||||
|
||||
@@ -57,7 +57,7 @@ describing how to make `headscale` run properly in a server environment.
|
||||
cd headscale
|
||||
|
||||
# optionally checkout a release
|
||||
# option a. you can find offical relase at https://github.com/juanfont/headscale/releases/latest
|
||||
# option a. you can find official release at https://github.com/juanfont/headscale/releases/latest
|
||||
# option b. get latest tag, this may be a beta release
|
||||
latestTag=$(git describe --tags `git rev-list --tags --max-count=1`)
|
||||
|
||||
|
@@ -31,7 +31,7 @@
|
||||
|
||||
# When updating go.mod or go.sum, a new sha will need to be calculated,
|
||||
# update this if you have a mismatch after doing a change to thos files.
|
||||
vendorHash = "sha256-wXfKeiJaGe6ahOsONrQhvbuMN8flQ13b0ZjxdbFs1e8=";
|
||||
vendorHash = "sha256-EorT2AVwA3usly/LcNor6r5UIhLCdj3L4O4ilgTIC2o=";
|
||||
|
||||
subPackages = ["cmd/headscale"];
|
||||
|
||||
|
2
go.mod
2
go.mod
@@ -29,6 +29,7 @@ require (
|
||||
github.com/puzpuzpuz/xsync/v3 v3.1.0
|
||||
github.com/rs/zerolog v1.32.0
|
||||
github.com/samber/lo v1.39.0
|
||||
github.com/sasha-s/go-deadlock v0.3.1
|
||||
github.com/spf13/cobra v1.8.0
|
||||
github.com/spf13/viper v1.18.2
|
||||
github.com/stretchr/testify v1.9.0
|
||||
@@ -155,6 +156,7 @@ require (
|
||||
github.com/opencontainers/image-spec v1.1.0 // indirect
|
||||
github.com/opencontainers/runc v1.1.12 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
|
||||
github.com/petermattis/goid v0.0.0-20180202154549-b0b1615b78e5 // indirect
|
||||
github.com/pierrec/lz4/v4 v4.1.21 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
|
||||
|
4
go.sum
4
go.sum
@@ -367,6 +367,8 @@ github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaR
|
||||
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
|
||||
github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
|
||||
github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
|
||||
github.com/petermattis/goid v0.0.0-20180202154549-b0b1615b78e5 h1:q2e307iGHPdTGp0hoxKjt1H5pDo6utceo3dQVK3I5XQ=
|
||||
github.com/petermattis/goid v0.0.0-20180202154549-b0b1615b78e5/go.mod h1:jvVRKCrJTQWu0XVbaOlby/2lO20uSCHEMzzplHXte1o=
|
||||
github.com/philip-bui/grpc-zerolog v1.0.1 h1:EMacvLRUd2O1K0eWod27ZP5CY1iTNkhBDLSN+Q4JEvA=
|
||||
github.com/philip-bui/grpc-zerolog v1.0.1/go.mod h1:qXbiq/2X4ZUMMshsqlWyTHOcw7ns+GZmlqZZN05ZHcQ=
|
||||
github.com/pierrec/lz4/v4 v4.1.14/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
|
||||
@@ -423,6 +425,8 @@ github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6g
|
||||
github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ=
|
||||
github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA=
|
||||
github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA=
|
||||
github.com/sasha-s/go-deadlock v0.3.1 h1:sqv7fDNShgjcaxkO0JNcOAlr8B9+cV5Ey/OB71efZx0=
|
||||
github.com/sasha-s/go-deadlock v0.3.1/go.mod h1:F73l+cr82YSh10GxyRI6qZiCgK64VaZjwesgfQ1/iLM=
|
||||
github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
|
||||
github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8=
|
||||
github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I=
|
||||
|
@@ -19,6 +19,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/coreos/go-oidc/v3/oidc"
|
||||
"github.com/davecgh/go-spew/spew"
|
||||
"github.com/gorilla/mux"
|
||||
grpcMiddleware "github.com/grpc-ecosystem/go-grpc-middleware"
|
||||
grpcRuntime "github.com/grpc-ecosystem/grpc-gateway/v2/runtime"
|
||||
@@ -104,16 +105,15 @@ type Headscale struct {
|
||||
registrationCache *cache.Cache
|
||||
|
||||
pollNetMapStreamWG sync.WaitGroup
|
||||
|
||||
mapSessions map[types.NodeID]*mapSession
|
||||
mapSessionMu sync.Mutex
|
||||
}
|
||||
|
||||
var (
|
||||
profilingEnabled = envknob.Bool("HEADSCALE_PROFILING_ENABLED")
|
||||
profilingEnabled = envknob.Bool("HEADSCALE_DEBUG_PROFILING_ENABLED")
|
||||
profilingPath = envknob.String("HEADSCALE_DEBUG_PROFILING_PATH")
|
||||
tailsqlEnabled = envknob.Bool("HEADSCALE_DEBUG_TAILSQL_ENABLED")
|
||||
tailsqlStateDir = envknob.String("HEADSCALE_DEBUG_TAILSQL_STATE_DIR")
|
||||
tailsqlTSKey = envknob.String("TS_AUTHKEY")
|
||||
dumpConfig = envknob.Bool("HEADSCALE_DEBUG_DUMP_CONFIG")
|
||||
)
|
||||
|
||||
func NewHeadscale(cfg *types.Config) (*Headscale, error) {
|
||||
@@ -138,7 +138,6 @@ func NewHeadscale(cfg *types.Config) (*Headscale, error) {
|
||||
registrationCache: registrationCache,
|
||||
pollNetMapStreamWG: sync.WaitGroup{},
|
||||
nodeNotifier: notifier.NewNotifier(cfg),
|
||||
mapSessions: make(map[types.NodeID]*mapSession),
|
||||
}
|
||||
|
||||
app.db, err = db.NewHeadscaleDatabase(
|
||||
@@ -330,7 +329,7 @@ func (h *Headscale) grpcAuthenticationInterceptor(ctx context.Context,
|
||||
// Check if the request is coming from the on-server client.
|
||||
// This is not secure, but it is to maintain maintainability
|
||||
// with the "legacy" database-based client
|
||||
// It is also neede for grpc-gateway to be able to connect to
|
||||
// It is also needed for grpc-gateway to be able to connect to
|
||||
// the server
|
||||
client, _ := peer.FromContext(ctx)
|
||||
|
||||
@@ -502,14 +501,14 @@ func (h *Headscale) createRouter(grpcMux *grpcRuntime.ServeMux) *mux.Router {
|
||||
|
||||
// Serve launches the HTTP and gRPC server service Headscale and the API.
|
||||
func (h *Headscale) Serve() error {
|
||||
if _, enableProfile := os.LookupEnv("HEADSCALE_PROFILING_ENABLED"); enableProfile {
|
||||
if profilePath, ok := os.LookupEnv("HEADSCALE_PROFILING_PATH"); ok {
|
||||
err := os.MkdirAll(profilePath, os.ModePerm)
|
||||
if profilingEnabled {
|
||||
if profilingPath != "" {
|
||||
err := os.MkdirAll(profilingPath, os.ModePerm)
|
||||
if err != nil {
|
||||
log.Fatal().Err(err).Msg("failed to create profiling directory")
|
||||
}
|
||||
|
||||
defer profile.Start(profile.ProfilePath(profilePath)).Stop()
|
||||
defer profile.Start(profile.ProfilePath(profilingPath)).Stop()
|
||||
} else {
|
||||
defer profile.Start().Stop()
|
||||
}
|
||||
@@ -517,6 +516,10 @@ func (h *Headscale) Serve() error {
|
||||
|
||||
var err error
|
||||
|
||||
if dumpConfig {
|
||||
spew.Dump(h.cfg)
|
||||
}
|
||||
|
||||
// Fetch an initial DERP Map before we start serving
|
||||
h.DERPMap = derp.GetDERPMap(h.cfg.DERP)
|
||||
h.mapper = mapper.NewMapper(h.db, h.cfg, h.DERPMap, h.nodeNotifier)
|
||||
@@ -729,19 +732,6 @@ func (h *Headscale) Serve() error {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(h.nodeNotifier.String()))
|
||||
})
|
||||
debugMux.HandleFunc("/debug/mapresp", func(w http.ResponseWriter, r *http.Request) {
|
||||
h.mapSessionMu.Lock()
|
||||
defer h.mapSessionMu.Unlock()
|
||||
|
||||
var b strings.Builder
|
||||
b.WriteString("mapresponders:\n")
|
||||
for k, v := range h.mapSessions {
|
||||
fmt.Fprintf(&b, "\t%d: %p\n", k, v)
|
||||
}
|
||||
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(b.String()))
|
||||
})
|
||||
debugMux.Handle("/metrics", promhttp.Handler())
|
||||
|
||||
debugHTTPServer := &http.Server{
|
||||
@@ -822,17 +812,6 @@ func (h *Headscale) Serve() error {
|
||||
expireNodeCancel()
|
||||
expireEphemeralCancel()
|
||||
|
||||
trace("closing map sessions")
|
||||
wg := sync.WaitGroup{}
|
||||
for _, mapSess := range h.mapSessions {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
mapSess.close()
|
||||
wg.Done()
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
trace("waiting for netmap stream to close")
|
||||
h.pollNetMapStreamWG.Wait()
|
||||
|
||||
|
@@ -661,7 +661,7 @@ func GenerateGivenName(
|
||||
}
|
||||
|
||||
func DeleteExpiredEphemeralNodes(tx *gorm.DB,
|
||||
inactivityThreshhold time.Duration,
|
||||
inactivityThreshold time.Duration,
|
||||
) ([]types.NodeID, []types.NodeID) {
|
||||
users, err := ListUsers(tx)
|
||||
if err != nil {
|
||||
@@ -679,7 +679,7 @@ func DeleteExpiredEphemeralNodes(tx *gorm.DB,
|
||||
for idx, node := range nodes {
|
||||
if node.IsEphemeral() && node.LastSeen != nil &&
|
||||
time.Now().
|
||||
After(node.LastSeen.Add(inactivityThreshhold)) {
|
||||
After(node.LastSeen.Add(inactivityThreshold)) {
|
||||
expired = append(expired, node.ID)
|
||||
|
||||
log.Info().
|
||||
|
@@ -393,7 +393,7 @@ func (s *Suite) TestSetTags(c *check.C) {
|
||||
c.Assert(err, check.IsNil)
|
||||
c.Assert(node.ForcedTags, check.DeepEquals, types.StringList(sTags))
|
||||
|
||||
// assign duplicat tags, expect no errors but no doubles in DB
|
||||
// assign duplicate tags, expect no errors but no doubles in DB
|
||||
eTags := []string{"tag:bar", "tag:test", "tag:unknown", "tag:test"}
|
||||
err = db.SetTags(node.ID, eTags)
|
||||
c.Assert(err, check.IsNil)
|
||||
|
@@ -83,7 +83,7 @@ func CreatePreAuthKey(
|
||||
if !seenTags[tag] {
|
||||
if err := tx.Save(&types.PreAuthKeyACLTag{PreAuthKeyID: key.ID, Tag: tag}).Error; err != nil {
|
||||
return nil, fmt.Errorf(
|
||||
"failed to ceate key tag in the database: %w",
|
||||
"failed to create key tag in the database: %w",
|
||||
err,
|
||||
)
|
||||
}
|
||||
|
@@ -204,7 +204,7 @@ func DERPProbeHandler(
|
||||
}
|
||||
}
|
||||
|
||||
// DERPBootstrapDNSHandler implements the /bootsrap-dns endpoint
|
||||
// DERPBootstrapDNSHandler implements the /bootstrap-dns endpoint
|
||||
// Described in https://github.com/tailscale/tailscale/issues/1405,
|
||||
// this endpoint provides a way to help a client when it fails to start up
|
||||
// because its DNS are broken.
|
||||
|
@@ -7,8 +7,23 @@ import (
|
||||
"github.com/gorilla/mux"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
"tailscale.com/envknob"
|
||||
)
|
||||
|
||||
var debugHighCardinalityMetrics = envknob.Bool("HEADSCALE_DEBUG_HIGH_CARDINALITY_METRICS")
|
||||
|
||||
var mapResponseLastSentSeconds *prometheus.GaugeVec
|
||||
|
||||
func init() {
|
||||
if debugHighCardinalityMetrics {
|
||||
mapResponseLastSentSeconds = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: prometheusNamespace,
|
||||
Name: "mapresponse_last_sent_seconds",
|
||||
Help: "last sent metric to node.id",
|
||||
}, []string{"type", "id"})
|
||||
}
|
||||
}
|
||||
|
||||
const prometheusNamespace = "headscale"
|
||||
|
||||
var (
|
||||
@@ -37,16 +52,16 @@ var (
|
||||
Name: "mapresponse_readonly_requests_total",
|
||||
Help: "total count of readonly requests received",
|
||||
}, []string{"status"})
|
||||
mapResponseSessions = promauto.NewGauge(prometheus.GaugeOpts{
|
||||
mapResponseEnded = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: prometheusNamespace,
|
||||
Name: "mapresponse_current_sessions_total",
|
||||
Help: "total count open map response sessions",
|
||||
})
|
||||
mapResponseRejected = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: prometheusNamespace,
|
||||
Name: "mapresponse_rejected_new_sessions_total",
|
||||
Help: "total count of new mapsessions rejected",
|
||||
Name: "mapresponse_ended_total",
|
||||
Help: "total count of new mapsessions ended",
|
||||
}, []string{"reason"})
|
||||
mapResponseClosed = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: prometheusNamespace,
|
||||
Name: "mapresponse_closed_total",
|
||||
Help: "total count of calls to mapresponse close",
|
||||
}, []string{"return"})
|
||||
httpDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: prometheusNamespace,
|
||||
Name: "http_duration_seconds",
|
||||
|
@@ -231,62 +231,12 @@ func (ns *noiseServer) NoisePollNetMapHandler(
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
sess := ns.headscale.newMapSession(req.Context(), mapRequest, writer, node)
|
||||
|
||||
sess.tracef("a node sending a MapRequest with Noise protocol")
|
||||
|
||||
// If a streaming mapSession exists for this node, close it
|
||||
// and start a new one.
|
||||
if sess.isStreaming() {
|
||||
sess.tracef("aquiring lock to check stream")
|
||||
|
||||
ns.headscale.mapSessionMu.Lock()
|
||||
if _, ok := ns.headscale.mapSessions[node.ID]; ok {
|
||||
// NOTE/TODO(kradalby): From how I understand the protocol, when
|
||||
// a client connects with stream=true, and already has a streaming
|
||||
// connection open, the correct way is to close the current channel
|
||||
// and replace it. However, I cannot manage to get that working with
|
||||
// some sort of lock/block happening on the cancelCh in the streaming
|
||||
// session.
|
||||
// Not closing the channel and replacing it puts us in a weird state
|
||||
// which keeps a ghost stream open, receiving keep alives, but no updates.
|
||||
//
|
||||
// Typically a new connection is opened when one exists as a client which
|
||||
// is already authenticated reconnects (e.g. down, then up). The client will
|
||||
// start auth and streaming at the same time, and then cancel the streaming
|
||||
// when the auth has finished successfully, opening a new connection.
|
||||
//
|
||||
// As a work-around to not replacing, abusing the clients "resilience"
|
||||
// by reject the new connection which will cause the client to immediately
|
||||
// reconnect and "fix" the issue, as the other connection typically has been
|
||||
// closed, meaning there is nothing to replace.
|
||||
//
|
||||
// sess.infof("node has an open stream(%p), replacing with %p", oldSession, sess)
|
||||
// oldSession.close()
|
||||
|
||||
defer ns.headscale.mapSessionMu.Unlock()
|
||||
|
||||
sess.infof("node has an open stream(%p), rejecting new stream", sess)
|
||||
mapResponseRejected.WithLabelValues("exists").Inc()
|
||||
return
|
||||
}
|
||||
|
||||
ns.headscale.mapSessions[node.ID] = sess
|
||||
mapResponseSessions.Inc()
|
||||
ns.headscale.mapSessionMu.Unlock()
|
||||
sess.tracef("releasing lock to check stream")
|
||||
}
|
||||
|
||||
sess.serve()
|
||||
|
||||
if sess.isStreaming() {
|
||||
sess.tracef("aquiring lock to remove stream")
|
||||
ns.headscale.mapSessionMu.Lock()
|
||||
defer ns.headscale.mapSessionMu.Unlock()
|
||||
|
||||
delete(ns.headscale.mapSessions, node.ID)
|
||||
mapResponseSessions.Dec()
|
||||
|
||||
sess.tracef("releasing lock to remove stream")
|
||||
if !sess.isStreaming() {
|
||||
sess.serve()
|
||||
} else {
|
||||
sess.serveLongPoll()
|
||||
}
|
||||
}
|
||||
|
@@ -3,22 +3,43 @@ package notifier
|
||||
import (
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
"tailscale.com/envknob"
|
||||
)
|
||||
|
||||
const prometheusNamespace = "headscale"
|
||||
|
||||
var debugHighCardinalityMetrics = envknob.Bool("HEADSCALE_DEBUG_HIGH_CARDINALITY_METRICS")
|
||||
|
||||
var notifierUpdateSent *prometheus.CounterVec
|
||||
|
||||
func init() {
|
||||
if debugHighCardinalityMetrics {
|
||||
notifierUpdateSent = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: prometheusNamespace,
|
||||
Name: "notifier_update_sent_total",
|
||||
Help: "total count of update sent on nodes channel",
|
||||
}, []string{"status", "type", "trigger", "id"})
|
||||
} else {
|
||||
notifierUpdateSent = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: prometheusNamespace,
|
||||
Name: "notifier_update_sent_total",
|
||||
Help: "total count of update sent on nodes channel",
|
||||
}, []string{"status", "type", "trigger"})
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
notifierWaitersForLock = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: prometheusNamespace,
|
||||
Name: "notifier_waiters_for_lock",
|
||||
Help: "gauge of waiters for the notifier lock",
|
||||
}, []string{"type", "action"})
|
||||
notifierWaitForLock = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: prometheusNamespace,
|
||||
Name: "notifier_wait_for_lock_seconds",
|
||||
Help: "histogram of time spent waiting for the notifier lock",
|
||||
Buckets: []float64{0.001, 0.01, 0.1, 0.3, 0.5, 1, 3, 5, 10},
|
||||
}, []string{"action"})
|
||||
notifierUpdateSent = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: prometheusNamespace,
|
||||
Name: "notifier_update_sent_total",
|
||||
Help: "total count of update sent on nodes channel",
|
||||
}, []string{"status", "type", "trigger"})
|
||||
notifierUpdateReceived = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: prometheusNamespace,
|
||||
Name: "notifier_update_received_total",
|
||||
@@ -29,4 +50,19 @@ var (
|
||||
Name: "notifier_open_channels_total",
|
||||
Help: "total count open channels in notifier",
|
||||
})
|
||||
notifierBatcherWaitersForLock = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: prometheusNamespace,
|
||||
Name: "notifier_batcher_waiters_for_lock",
|
||||
Help: "gauge of waiters for the notifier batcher lock",
|
||||
}, []string{"type", "action"})
|
||||
notifierBatcherChanges = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: prometheusNamespace,
|
||||
Name: "notifier_batcher_changes_pending",
|
||||
Help: "gauge of full changes pending in the notifier batcher",
|
||||
}, []string{})
|
||||
notifierBatcherPatches = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: prometheusNamespace,
|
||||
Name: "notifier_batcher_patches_pending",
|
||||
Help: "gauge of patches pending in the notifier batcher",
|
||||
}, []string{})
|
||||
)
|
||||
|
@@ -11,25 +11,40 @@ import (
|
||||
"github.com/juanfont/headscale/hscontrol/types"
|
||||
"github.com/puzpuzpuz/xsync/v3"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/sasha-s/go-deadlock"
|
||||
"tailscale.com/envknob"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/util/set"
|
||||
)
|
||||
|
||||
var debugDeadlock = envknob.Bool("HEADSCALE_DEBUG_DEADLOCK")
|
||||
var debugDeadlockTimeout = envknob.RegisterDuration("HEADSCALE_DEBUG_DEADLOCK_TIMEOUT")
|
||||
|
||||
func init() {
|
||||
deadlock.Opts.Disable = !debugDeadlock
|
||||
if debugDeadlock {
|
||||
deadlock.Opts.DeadlockTimeout = debugDeadlockTimeout()
|
||||
deadlock.Opts.PrintAllCurrentGoroutines = true
|
||||
}
|
||||
}
|
||||
|
||||
type Notifier struct {
|
||||
l sync.RWMutex
|
||||
l deadlock.Mutex
|
||||
nodes map[types.NodeID]chan<- types.StateUpdate
|
||||
connected *xsync.MapOf[types.NodeID, bool]
|
||||
b *batcher
|
||||
cfg *types.Config
|
||||
}
|
||||
|
||||
func NewNotifier(cfg *types.Config) *Notifier {
|
||||
n := &Notifier{
|
||||
nodes: make(map[types.NodeID]chan<- types.StateUpdate),
|
||||
connected: xsync.NewMapOf[types.NodeID, bool](),
|
||||
cfg: cfg,
|
||||
}
|
||||
b := newBatcher(cfg.Tuning.BatchChangeDelay, n)
|
||||
n.b = b
|
||||
// TODO(kradalby): clean this up
|
||||
|
||||
go b.doWork()
|
||||
return n
|
||||
}
|
||||
@@ -39,59 +54,75 @@ func (n *Notifier) Close() {
|
||||
n.b.close()
|
||||
}
|
||||
|
||||
func (n *Notifier) AddNode(nodeID types.NodeID, c chan<- types.StateUpdate) {
|
||||
log.Trace().Caller().Uint64("node.id", nodeID.Uint64()).Msg("acquiring lock to add node")
|
||||
defer log.Trace().
|
||||
Caller().
|
||||
Uint64("node.id", nodeID.Uint64()).
|
||||
Msg("releasing lock to add node")
|
||||
func (n *Notifier) tracef(nID types.NodeID, msg string, args ...any) {
|
||||
log.Trace().
|
||||
Uint64("node.id", nID.Uint64()).
|
||||
Int("open_chans", len(n.nodes)).Msgf(msg, args...)
|
||||
}
|
||||
|
||||
func (n *Notifier) AddNode(nodeID types.NodeID, c chan<- types.StateUpdate) {
|
||||
start := time.Now()
|
||||
notifierWaitersForLock.WithLabelValues("lock", "add").Inc()
|
||||
n.l.Lock()
|
||||
defer n.l.Unlock()
|
||||
notifierWaitersForLock.WithLabelValues("lock", "add").Dec()
|
||||
notifierWaitForLock.WithLabelValues("add").Observe(time.Since(start).Seconds())
|
||||
|
||||
// If a channel exists, it means the node has opened a new
|
||||
// connection. Close the old channel and replace it.
|
||||
if curr, ok := n.nodes[nodeID]; ok {
|
||||
n.tracef(nodeID, "channel present, closing and replacing")
|
||||
close(curr)
|
||||
}
|
||||
|
||||
n.nodes[nodeID] = c
|
||||
n.connected.Store(nodeID, true)
|
||||
|
||||
log.Trace().
|
||||
Uint64("node.id", nodeID.Uint64()).
|
||||
Int("open_chans", len(n.nodes)).
|
||||
Msg("Added new channel")
|
||||
n.tracef(nodeID, "added new channel")
|
||||
notifierNodeUpdateChans.Inc()
|
||||
}
|
||||
|
||||
func (n *Notifier) RemoveNode(nodeID types.NodeID) {
|
||||
log.Trace().Caller().Uint64("node.id", nodeID.Uint64()).Msg("acquiring lock to remove node")
|
||||
defer log.Trace().
|
||||
Caller().
|
||||
Uint64("node.id", nodeID.Uint64()).
|
||||
Msg("releasing lock to remove node")
|
||||
|
||||
// RemoveNode removes a node and a given channel from the notifier.
|
||||
// It checks that the channel is the same as currently being updated
|
||||
// and ignores the removal if it is not.
|
||||
// RemoveNode reports if the node/chan was removed.
|
||||
func (n *Notifier) RemoveNode(nodeID types.NodeID, c chan<- types.StateUpdate) bool {
|
||||
start := time.Now()
|
||||
notifierWaitersForLock.WithLabelValues("lock", "remove").Inc()
|
||||
n.l.Lock()
|
||||
defer n.l.Unlock()
|
||||
notifierWaitersForLock.WithLabelValues("lock", "remove").Dec()
|
||||
notifierWaitForLock.WithLabelValues("remove").Observe(time.Since(start).Seconds())
|
||||
|
||||
if len(n.nodes) == 0 {
|
||||
return
|
||||
return true
|
||||
}
|
||||
|
||||
// If the channel exist, but it does not belong
|
||||
// to the caller, ignore.
|
||||
if curr, ok := n.nodes[nodeID]; ok {
|
||||
if curr != c {
|
||||
n.tracef(nodeID, "channel has been replaced, not removing")
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
delete(n.nodes, nodeID)
|
||||
n.connected.Store(nodeID, false)
|
||||
|
||||
log.Trace().
|
||||
Uint64("node.id", nodeID.Uint64()).
|
||||
Int("open_chans", len(n.nodes)).
|
||||
Msg("Removed channel")
|
||||
n.tracef(nodeID, "removed channel")
|
||||
notifierNodeUpdateChans.Dec()
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// IsConnected reports if a node is connected to headscale and has a
|
||||
// poll session open.
|
||||
func (n *Notifier) IsConnected(nodeID types.NodeID) bool {
|
||||
n.l.RLock()
|
||||
defer n.l.RUnlock()
|
||||
notifierWaitersForLock.WithLabelValues("lock", "conncheck").Inc()
|
||||
n.l.Lock()
|
||||
defer n.l.Unlock()
|
||||
notifierWaitersForLock.WithLabelValues("lock", "conncheck").Dec()
|
||||
|
||||
if val, ok := n.connected.Load(nodeID); ok {
|
||||
return val
|
||||
@@ -130,15 +161,11 @@ func (n *Notifier) NotifyByNodeID(
|
||||
update types.StateUpdate,
|
||||
nodeID types.NodeID,
|
||||
) {
|
||||
log.Trace().Caller().Str("type", update.Type.String()).Msg("acquiring lock to notify")
|
||||
defer log.Trace().
|
||||
Caller().
|
||||
Str("type", update.Type.String()).
|
||||
Msg("releasing lock, finished notifying")
|
||||
|
||||
start := time.Now()
|
||||
n.l.RLock()
|
||||
defer n.l.RUnlock()
|
||||
notifierWaitersForLock.WithLabelValues("lock", "notify").Inc()
|
||||
n.l.Lock()
|
||||
defer n.l.Unlock()
|
||||
notifierWaitersForLock.WithLabelValues("lock", "notify").Dec()
|
||||
notifierWaitForLock.WithLabelValues("notify").Observe(time.Since(start).Seconds())
|
||||
|
||||
if c, ok := n.nodes[nodeID]; ok {
|
||||
@@ -150,50 +177,94 @@ func (n *Notifier) NotifyByNodeID(
|
||||
Any("origin", types.NotifyOriginKey.Value(ctx)).
|
||||
Any("origin-hostname", types.NotifyHostnameKey.Value(ctx)).
|
||||
Msgf("update not sent, context cancelled")
|
||||
notifierUpdateSent.WithLabelValues("cancelled", update.Type.String(), types.NotifyOriginKey.Value(ctx)).Inc()
|
||||
if debugHighCardinalityMetrics {
|
||||
notifierUpdateSent.WithLabelValues("cancelled", update.Type.String(), types.NotifyOriginKey.Value(ctx), nodeID.String()).Inc()
|
||||
} else {
|
||||
notifierUpdateSent.WithLabelValues("cancelled", update.Type.String(), types.NotifyOriginKey.Value(ctx)).Inc()
|
||||
}
|
||||
|
||||
return
|
||||
case c <- update:
|
||||
log.Trace().
|
||||
Uint64("node.id", nodeID.Uint64()).
|
||||
Any("origin", ctx.Value("origin")).
|
||||
Any("origin-hostname", ctx.Value("hostname")).
|
||||
Msgf("update successfully sent on chan")
|
||||
notifierUpdateSent.WithLabelValues("ok", update.Type.String(), types.NotifyOriginKey.Value(ctx)).Inc()
|
||||
n.tracef(nodeID, "update successfully sent on chan, origin: %s, origin-hostname: %s", ctx.Value("origin"), ctx.Value("hostname"))
|
||||
if debugHighCardinalityMetrics {
|
||||
notifierUpdateSent.WithLabelValues("ok", update.Type.String(), types.NotifyOriginKey.Value(ctx), nodeID.String()).Inc()
|
||||
} else {
|
||||
notifierUpdateSent.WithLabelValues("ok", update.Type.String(), types.NotifyOriginKey.Value(ctx)).Inc()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (n *Notifier) sendAll(update types.StateUpdate) {
|
||||
start := time.Now()
|
||||
n.l.RLock()
|
||||
defer n.l.RUnlock()
|
||||
notifierWaitersForLock.WithLabelValues("lock", "send-all").Inc()
|
||||
n.l.Lock()
|
||||
defer n.l.Unlock()
|
||||
notifierWaitersForLock.WithLabelValues("lock", "send-all").Dec()
|
||||
notifierWaitForLock.WithLabelValues("send-all").Observe(time.Since(start).Seconds())
|
||||
|
||||
for _, c := range n.nodes {
|
||||
c <- update
|
||||
notifierUpdateSent.WithLabelValues("ok", update.Type.String(), "send-all").Inc()
|
||||
for id, c := range n.nodes {
|
||||
// Whenever an update is sent to all nodes, there is a chance that the node
|
||||
// has disconnected and the goroutine that was supposed to consume the update
|
||||
// has shut down the channel and is waiting for the lock held here in RemoveNode.
|
||||
// This means that there is potential for a deadlock which would stop all updates
|
||||
// going out to clients. This timeout prevents that from happening by moving on to the
|
||||
// next node if the context is cancelled. Afther sendAll releases the lock, the add/remove
|
||||
// call will succeed and the update will go to the correct nodes on the next call.
|
||||
ctx, cancel := context.WithTimeout(context.Background(), n.cfg.Tuning.NotifierSendTimeout)
|
||||
defer cancel()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Error().
|
||||
Err(ctx.Err()).
|
||||
Uint64("node.id", id.Uint64()).
|
||||
Msgf("update not sent, context cancelled")
|
||||
if debugHighCardinalityMetrics {
|
||||
notifierUpdateSent.WithLabelValues("cancelled", update.Type.String(), "send-all", id.String()).Inc()
|
||||
} else {
|
||||
notifierUpdateSent.WithLabelValues("cancelled", update.Type.String(), "send-all").Inc()
|
||||
}
|
||||
|
||||
return
|
||||
case c <- update:
|
||||
if debugHighCardinalityMetrics {
|
||||
notifierUpdateSent.WithLabelValues("ok", update.Type.String(), "send-all", id.String()).Inc()
|
||||
} else {
|
||||
notifierUpdateSent.WithLabelValues("ok", update.Type.String(), "send-all").Inc()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (n *Notifier) String() string {
|
||||
n.l.RLock()
|
||||
defer n.l.RUnlock()
|
||||
notifierWaitersForLock.WithLabelValues("lock", "string").Inc()
|
||||
n.l.Lock()
|
||||
defer n.l.Unlock()
|
||||
notifierWaitersForLock.WithLabelValues("lock", "string").Dec()
|
||||
|
||||
var b strings.Builder
|
||||
b.WriteString("chans:\n")
|
||||
fmt.Fprintf(&b, "chans (%d):\n", len(n.nodes))
|
||||
|
||||
for k, v := range n.nodes {
|
||||
fmt.Fprintf(&b, "\t%d: %p\n", k, v)
|
||||
var keys []types.NodeID
|
||||
n.connected.Range(func(key types.NodeID, value bool) bool {
|
||||
keys = append(keys, key)
|
||||
return true
|
||||
})
|
||||
sort.Slice(keys, func(i, j int) bool {
|
||||
return keys[i] < keys[j]
|
||||
})
|
||||
|
||||
for _, key := range keys {
|
||||
fmt.Fprintf(&b, "\t%d: %p\n", key, n.nodes[key])
|
||||
}
|
||||
|
||||
b.WriteString("\n")
|
||||
b.WriteString("connected:\n")
|
||||
fmt.Fprintf(&b, "connected (%d):\n", len(n.nodes))
|
||||
|
||||
n.connected.Range(func(k types.NodeID, v bool) bool {
|
||||
fmt.Fprintf(&b, "\t%d: %t\n", k, v)
|
||||
return true
|
||||
})
|
||||
for _, key := range keys {
|
||||
val, _ := n.connected.Load(key)
|
||||
fmt.Fprintf(&b, "\t%d: %t\n", key, val)
|
||||
}
|
||||
|
||||
return b.String()
|
||||
}
|
||||
@@ -230,13 +301,16 @@ func (b *batcher) close() {
|
||||
// addOrPassthrough adds the update to the batcher, if it is not a
|
||||
// type that is currently batched, it will be sent immediately.
|
||||
func (b *batcher) addOrPassthrough(update types.StateUpdate) {
|
||||
notifierBatcherWaitersForLock.WithLabelValues("lock", "add").Inc()
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
notifierBatcherWaitersForLock.WithLabelValues("lock", "add").Dec()
|
||||
|
||||
switch update.Type {
|
||||
case types.StatePeerChanged:
|
||||
b.changedNodeIDs.Add(update.ChangeNodes...)
|
||||
b.nodesChanged = true
|
||||
notifierBatcherChanges.WithLabelValues().Set(float64(b.changedNodeIDs.Len()))
|
||||
|
||||
case types.StatePeerChangedPatch:
|
||||
for _, newPatch := range update.ChangePatches {
|
||||
@@ -248,6 +322,7 @@ func (b *batcher) addOrPassthrough(update types.StateUpdate) {
|
||||
}
|
||||
}
|
||||
b.patchesChanged = true
|
||||
notifierBatcherPatches.WithLabelValues().Set(float64(len(b.patches)))
|
||||
|
||||
default:
|
||||
b.n.sendAll(update)
|
||||
@@ -257,8 +332,10 @@ func (b *batcher) addOrPassthrough(update types.StateUpdate) {
|
||||
// flush sends all the accumulated patches to all
|
||||
// nodes in the notifier.
|
||||
func (b *batcher) flush() {
|
||||
notifierBatcherWaitersForLock.WithLabelValues("lock", "flush").Inc()
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
notifierBatcherWaitersForLock.WithLabelValues("lock", "flush").Dec()
|
||||
|
||||
if b.nodesChanged || b.patchesChanged {
|
||||
var patches []*tailcfg.PeerChange
|
||||
@@ -296,8 +373,10 @@ func (b *batcher) flush() {
|
||||
}
|
||||
|
||||
b.changedNodeIDs = set.Slice[types.NodeID]{}
|
||||
notifierBatcherChanges.WithLabelValues().Set(0)
|
||||
b.nodesChanged = false
|
||||
b.patches = make(map[types.NodeID]tailcfg.PeerChange, len(b.patches))
|
||||
notifierBatcherPatches.WithLabelValues().Set(0)
|
||||
b.patchesChanged = false
|
||||
}
|
||||
}
|
||||
|
@@ -227,7 +227,7 @@ func TestBatcher(t *testing.T) {
|
||||
ch := make(chan types.StateUpdate, 30)
|
||||
defer close(ch)
|
||||
n.AddNode(1, ch)
|
||||
defer n.RemoveNode(1)
|
||||
defer n.RemoveNode(1, ch)
|
||||
|
||||
for _, u := range tt.updates {
|
||||
n.NotifyAll(context.Background(), u)
|
||||
|
@@ -532,7 +532,7 @@ func (s *Suite) TestRuleInvalidGeneration(c *check.C) {
|
||||
"example-host-2:80"
|
||||
],
|
||||
"deny": [
|
||||
"exapmle-host-2:100"
|
||||
"example-host-2:100"
|
||||
],
|
||||
},
|
||||
{
|
||||
@@ -635,7 +635,7 @@ func Test_expandGroup(t *testing.T) {
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "InexistantGroup",
|
||||
name: "InexistentGroup",
|
||||
field: field{
|
||||
pol: ACLPolicy{
|
||||
Groups: Groups{
|
||||
@@ -2604,7 +2604,7 @@ func Test_getFilteredByACLPeers(t *testing.T) {
|
||||
{
|
||||
name: "all hosts can talk to each other",
|
||||
args: args{
|
||||
nodes: types.Nodes{ // list of all nodess in the database
|
||||
nodes: types.Nodes{ // list of all nodes in the database
|
||||
&types.Node{
|
||||
ID: 1,
|
||||
IPv4: iap("100.64.0.1"),
|
||||
@@ -2651,7 +2651,7 @@ func Test_getFilteredByACLPeers(t *testing.T) {
|
||||
{
|
||||
name: "One host can talk to another, but not all hosts",
|
||||
args: args{
|
||||
nodes: types.Nodes{ // list of all nodess in the database
|
||||
nodes: types.Nodes{ // list of all nodes in the database
|
||||
&types.Node{
|
||||
ID: 1,
|
||||
IPv4: iap("100.64.0.1"),
|
||||
@@ -2693,7 +2693,7 @@ func Test_getFilteredByACLPeers(t *testing.T) {
|
||||
{
|
||||
name: "host cannot directly talk to destination, but return path is authorized",
|
||||
args: args{
|
||||
nodes: types.Nodes{ // list of all nodess in the database
|
||||
nodes: types.Nodes{ // list of all nodes in the database
|
||||
&types.Node{
|
||||
ID: 1,
|
||||
IPv4: iap("100.64.0.1"),
|
||||
@@ -2735,7 +2735,7 @@ func Test_getFilteredByACLPeers(t *testing.T) {
|
||||
{
|
||||
name: "rules allows all hosts to reach one destination",
|
||||
args: args{
|
||||
nodes: types.Nodes{ // list of all nodess in the database
|
||||
nodes: types.Nodes{ // list of all nodes in the database
|
||||
&types.Node{
|
||||
ID: 1,
|
||||
IPv4: iap("100.64.0.1"),
|
||||
@@ -2777,7 +2777,7 @@ func Test_getFilteredByACLPeers(t *testing.T) {
|
||||
{
|
||||
name: "rules allows all hosts to reach one destination, destination can reach all hosts",
|
||||
args: args{
|
||||
nodes: types.Nodes{ // list of all nodess in the database
|
||||
nodes: types.Nodes{ // list of all nodes in the database
|
||||
&types.Node{
|
||||
ID: 1,
|
||||
IPv4: iap("100.64.0.1"),
|
||||
@@ -2824,7 +2824,7 @@ func Test_getFilteredByACLPeers(t *testing.T) {
|
||||
{
|
||||
name: "rule allows all hosts to reach all destinations",
|
||||
args: args{
|
||||
nodes: types.Nodes{ // list of all nodess in the database
|
||||
nodes: types.Nodes{ // list of all nodes in the database
|
||||
&types.Node{
|
||||
ID: 1,
|
||||
IPv4: iap("100.64.0.1"),
|
||||
@@ -2871,7 +2871,7 @@ func Test_getFilteredByACLPeers(t *testing.T) {
|
||||
{
|
||||
name: "without rule all communications are forbidden",
|
||||
args: args{
|
||||
nodes: types.Nodes{ // list of all nodess in the database
|
||||
nodes: types.Nodes{ // list of all nodes in the database
|
||||
&types.Node{
|
||||
ID: 1,
|
||||
IPv4: iap("100.64.0.1"),
|
||||
|
@@ -9,13 +9,13 @@ import (
|
||||
"net/netip"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/juanfont/headscale/hscontrol/db"
|
||||
"github.com/juanfont/headscale/hscontrol/mapper"
|
||||
"github.com/juanfont/headscale/hscontrol/types"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/sasha-s/go-deadlock"
|
||||
xslices "golang.org/x/exp/slices"
|
||||
"gorm.io/gorm"
|
||||
"tailscale.com/tailcfg"
|
||||
@@ -29,11 +29,6 @@ type contextKey string
|
||||
|
||||
const nodeNameContextKey = contextKey("nodeName")
|
||||
|
||||
type sessionManager struct {
|
||||
mu sync.RWMutex
|
||||
sess map[types.NodeID]*mapSession
|
||||
}
|
||||
|
||||
type mapSession struct {
|
||||
h *Headscale
|
||||
req tailcfg.MapRequest
|
||||
@@ -41,12 +36,13 @@ type mapSession struct {
|
||||
capVer tailcfg.CapabilityVersion
|
||||
mapper *mapper.Mapper
|
||||
|
||||
serving bool
|
||||
servingMu sync.Mutex
|
||||
cancelChMu deadlock.Mutex
|
||||
|
||||
ch chan types.StateUpdate
|
||||
cancelCh chan struct{}
|
||||
ch chan types.StateUpdate
|
||||
cancelCh chan struct{}
|
||||
cancelChOpen bool
|
||||
|
||||
keepAlive time.Duration
|
||||
keepAliveTicker *time.Ticker
|
||||
|
||||
node *types.Node
|
||||
@@ -77,6 +73,8 @@ func (h *Headscale) newMapSession(
|
||||
}
|
||||
}
|
||||
|
||||
ka := keepAliveInterval + (time.Duration(rand.IntN(9000)) * time.Millisecond)
|
||||
|
||||
return &mapSession{
|
||||
h: h,
|
||||
ctx: ctx,
|
||||
@@ -86,13 +84,12 @@ func (h *Headscale) newMapSession(
|
||||
capVer: req.Version,
|
||||
mapper: h.mapper,
|
||||
|
||||
// serving indicates if a client is being served.
|
||||
serving: false,
|
||||
ch: updateChan,
|
||||
cancelCh: make(chan struct{}),
|
||||
cancelChOpen: true,
|
||||
|
||||
ch: updateChan,
|
||||
cancelCh: make(chan struct{}),
|
||||
|
||||
keepAliveTicker: time.NewTicker(keepAliveInterval + (time.Duration(rand.IntN(9000)) * time.Millisecond)),
|
||||
keepAlive: ka,
|
||||
keepAliveTicker: nil,
|
||||
|
||||
// Loggers
|
||||
warnf: warnf,
|
||||
@@ -103,15 +100,23 @@ func (h *Headscale) newMapSession(
|
||||
}
|
||||
|
||||
func (m *mapSession) close() {
|
||||
m.servingMu.Lock()
|
||||
defer m.servingMu.Unlock()
|
||||
if !m.serving {
|
||||
m.cancelChMu.Lock()
|
||||
defer m.cancelChMu.Unlock()
|
||||
|
||||
if !m.cancelChOpen {
|
||||
mapResponseClosed.WithLabelValues("chanclosed").Inc()
|
||||
return
|
||||
}
|
||||
|
||||
m.tracef("mapSession (%p) sending message on cancel chan")
|
||||
m.cancelCh <- struct{}{}
|
||||
m.tracef("mapSession (%p) sent message on cancel chan")
|
||||
m.tracef("mapSession (%p) sending message on cancel chan", m)
|
||||
select {
|
||||
case m.cancelCh <- struct{}{}:
|
||||
mapResponseClosed.WithLabelValues("sent").Inc()
|
||||
m.tracef("mapSession (%p) sent message on cancel chan", m)
|
||||
case <-time.After(30 * time.Second):
|
||||
mapResponseClosed.WithLabelValues("timeout").Inc()
|
||||
m.tracef("mapSession (%p) timed out sending close message", m)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *mapSession) isStreaming() bool {
|
||||
@@ -126,40 +131,12 @@ func (m *mapSession) isReadOnlyUpdate() bool {
|
||||
return !m.req.Stream && m.req.OmitPeers && m.req.ReadOnly
|
||||
}
|
||||
|
||||
// handlePoll ensures the node gets the appropriate updates from either
|
||||
// polling or immediate responses.
|
||||
//
|
||||
//nolint:gocyclo
|
||||
func (m *mapSession) resetKeepAlive() {
|
||||
m.keepAliveTicker.Reset(m.keepAlive)
|
||||
}
|
||||
|
||||
// serve handles non-streaming requests.
|
||||
func (m *mapSession) serve() {
|
||||
// Register with the notifier if this is a streaming
|
||||
// session
|
||||
if m.isStreaming() {
|
||||
// defers are called in reverse order,
|
||||
// so top one is executed last.
|
||||
|
||||
// Failover the node's routes if any.
|
||||
defer m.infof("node has disconnected, mapSession: %p", m)
|
||||
defer m.pollFailoverRoutes("node closing connection", m.node)
|
||||
|
||||
defer m.h.updateNodeOnlineStatus(false, m.node)
|
||||
defer m.h.nodeNotifier.RemoveNode(m.node.ID)
|
||||
|
||||
defer func() {
|
||||
m.servingMu.Lock()
|
||||
defer m.servingMu.Unlock()
|
||||
|
||||
m.serving = false
|
||||
close(m.cancelCh)
|
||||
}()
|
||||
|
||||
m.serving = true
|
||||
|
||||
m.h.nodeNotifier.AddNode(m.node.ID, m.ch)
|
||||
m.h.updateNodeOnlineStatus(true, m.node)
|
||||
|
||||
m.infof("node has connected, mapSession: %p", m)
|
||||
}
|
||||
|
||||
// TODO(kradalby): A set todos to harden:
|
||||
// - func to tell the stream to die, readonly -> false, !stream && omitpeers -> false, true
|
||||
|
||||
@@ -196,13 +173,43 @@ func (m *mapSession) serve() {
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// serveLongPoll ensures the node gets the appropriate updates from either
|
||||
// polling or immediate responses.
|
||||
//
|
||||
//nolint:gocyclo
|
||||
func (m *mapSession) serveLongPoll() {
|
||||
// Clean up the session when the client disconnects
|
||||
defer func() {
|
||||
m.cancelChMu.Lock()
|
||||
m.cancelChOpen = false
|
||||
close(m.cancelCh)
|
||||
m.cancelChMu.Unlock()
|
||||
|
||||
// only update node status if the node channel was removed.
|
||||
// in principal, it will be removed, but the client rapidly
|
||||
// reconnects, the channel might be of another connection.
|
||||
// In that case, it is not closed and the node is still online.
|
||||
if m.h.nodeNotifier.RemoveNode(m.node.ID, m.ch) {
|
||||
// Failover the node's routes if any.
|
||||
m.h.updateNodeOnlineStatus(false, m.node)
|
||||
m.pollFailoverRoutes("node closing connection", m.node)
|
||||
}
|
||||
|
||||
m.infof("node has disconnected, mapSession: %p, chan: %p", m, m.ch)
|
||||
}()
|
||||
|
||||
// From version 68, all streaming requests can be treated as read only.
|
||||
// TODO: Remove when we drop support for 1.48
|
||||
if m.capVer < 68 {
|
||||
// Error has been handled/written to client in the func
|
||||
// return
|
||||
err := m.handleSaveNode()
|
||||
if err != nil {
|
||||
mapResponseWriteUpdatesInStream.WithLabelValues("error").Inc()
|
||||
|
||||
m.close()
|
||||
return
|
||||
}
|
||||
mapResponseWriteUpdatesInStream.WithLabelValues("ok").Inc()
|
||||
@@ -224,6 +231,13 @@ func (m *mapSession) serve() {
|
||||
ctx, cancel := context.WithCancel(context.WithValue(m.ctx, nodeNameContextKey, m.node.Hostname))
|
||||
defer cancel()
|
||||
|
||||
m.keepAliveTicker = time.NewTicker(m.keepAlive)
|
||||
|
||||
m.h.nodeNotifier.AddNode(m.node.ID, m.ch)
|
||||
go m.h.updateNodeOnlineStatus(true, m.node)
|
||||
|
||||
m.infof("node has connected, mapSession: %p, chan: %p", m, m.ch)
|
||||
|
||||
// Loop through updates and continuously send them to the
|
||||
// client.
|
||||
for {
|
||||
@@ -231,13 +245,21 @@ func (m *mapSession) serve() {
|
||||
select {
|
||||
case <-m.cancelCh:
|
||||
m.tracef("poll cancelled received")
|
||||
return
|
||||
case <-ctx.Done():
|
||||
m.tracef("poll context done")
|
||||
mapResponseEnded.WithLabelValues("cancelled").Inc()
|
||||
return
|
||||
|
||||
// Consume all updates sent to node
|
||||
case update := <-m.ch:
|
||||
case <-ctx.Done():
|
||||
m.tracef("poll context done")
|
||||
mapResponseEnded.WithLabelValues("done").Inc()
|
||||
return
|
||||
|
||||
// Consume updates sent to node
|
||||
case update, ok := <-m.ch:
|
||||
if !ok {
|
||||
m.tracef("update channel closed, streaming session is likely being replaced")
|
||||
return
|
||||
}
|
||||
|
||||
m.tracef("received stream update: %s %s", update.Type.String(), update.Message)
|
||||
mapResponseUpdateReceived.WithLabelValues(update.Type.String()).Inc()
|
||||
|
||||
@@ -303,15 +325,13 @@ func (m *mapSession) serve() {
|
||||
return
|
||||
}
|
||||
|
||||
// log.Trace().Str("node", m.node.Hostname).TimeDiff("timeSpent", time.Now(), startMapResp).Str("mkey", m.node.MachineKey.String()).Int("type", int(update.Type)).Msg("finished making map response")
|
||||
|
||||
// Only send update if there is change
|
||||
if data != nil {
|
||||
startWrite := time.Now()
|
||||
_, err = m.w.Write(data)
|
||||
if err != nil {
|
||||
mapResponseSent.WithLabelValues("error", updateType).Inc()
|
||||
m.errf(err, "Could not write the map response, for mapSession: %p", m)
|
||||
m.errf(err, "could not write the map response(%s), for mapSession: %p", update.Type.String(), m)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -324,8 +344,12 @@ func (m *mapSession) serve() {
|
||||
|
||||
log.Trace().Str("node", m.node.Hostname).TimeDiff("timeSpent", time.Now(), startWrite).Str("mkey", m.node.MachineKey.String()).Msg("finished writing mapresp to node")
|
||||
|
||||
if debugHighCardinalityMetrics {
|
||||
mapResponseLastSentSeconds.WithLabelValues(updateType, m.node.ID.String()).Set(float64(time.Now().Unix()))
|
||||
}
|
||||
mapResponseSent.WithLabelValues("ok", updateType).Inc()
|
||||
m.tracef("update sent")
|
||||
m.resetKeepAlive()
|
||||
}
|
||||
|
||||
case <-m.keepAliveTicker.C:
|
||||
@@ -348,6 +372,9 @@ func (m *mapSession) serve() {
|
||||
return
|
||||
}
|
||||
|
||||
if debugHighCardinalityMetrics {
|
||||
mapResponseLastSentSeconds.WithLabelValues("keepalive", m.node.ID.String()).Set(float64(time.Now().Unix()))
|
||||
}
|
||||
mapResponseSent.WithLabelValues("ok", "keepalive").Inc()
|
||||
}
|
||||
}
|
||||
@@ -404,16 +431,6 @@ func (h *Headscale) updateNodeOnlineStatus(online bool, node *types.Node) {
|
||||
}, node.ID)
|
||||
}
|
||||
|
||||
func closeChanWithLog[C chan []byte | chan struct{} | chan types.StateUpdate](channel C, node, name string) {
|
||||
log.Trace().
|
||||
Str("handler", "PollNetMap").
|
||||
Str("node", node).
|
||||
Str("channel", "Done").
|
||||
Msg(fmt.Sprintf("Closing %s channel", name))
|
||||
|
||||
close(channel)
|
||||
}
|
||||
|
||||
func (m *mapSession) handleEndpointUpdate() {
|
||||
m.tracef("received endpoint update")
|
||||
|
||||
@@ -425,6 +442,17 @@ func (m *mapSession) handleEndpointUpdate() {
|
||||
m.node.ApplyPeerChange(&change)
|
||||
|
||||
sendUpdate, routesChanged := hostInfoChanged(m.node.Hostinfo, m.req.Hostinfo)
|
||||
|
||||
// The node might not set NetInfo if it has not changed and if
|
||||
// the full HostInfo object is overrwritten, the information is lost.
|
||||
// If there is no NetInfo, keep the previous one.
|
||||
// From 1.66 the client only sends it if changed:
|
||||
// https://github.com/tailscale/tailscale/commit/e1011f138737286ecf5123ff887a7a5800d129a2
|
||||
// TODO(kradalby): evaulate if we need better comparing of hostinfo
|
||||
// before we take the changes.
|
||||
if m.req.Hostinfo.NetInfo == nil {
|
||||
m.req.Hostinfo.NetInfo = m.node.Hostinfo.NetInfo
|
||||
}
|
||||
m.node.Hostinfo = m.req.Hostinfo
|
||||
|
||||
logTracePeerChange(m.node.Hostname, sendUpdate, &change)
|
||||
|
@@ -171,6 +171,7 @@ type LogConfig struct {
|
||||
}
|
||||
|
||||
type Tuning struct {
|
||||
NotifierSendTimeout time.Duration
|
||||
BatchChangeDelay time.Duration
|
||||
NodeMapSessionBufferedChanSize int
|
||||
}
|
||||
@@ -232,6 +233,7 @@ func LoadConfig(path string, isFile bool) error {
|
||||
|
||||
viper.SetDefault("ephemeral_node_inactivity_timeout", "120s")
|
||||
|
||||
viper.SetDefault("tuning.notifier_send_timeout", "800ms")
|
||||
viper.SetDefault("tuning.batch_change_delay", "800ms")
|
||||
viper.SetDefault("tuning.node_mapsession_buffered_chan_size", 30)
|
||||
|
||||
@@ -640,7 +642,7 @@ func GetHeadscaleConfig() (*Config, error) {
|
||||
}, nil
|
||||
}
|
||||
|
||||
logConfig := GetLogConfig()
|
||||
logConfig := GetLogConfig()
|
||||
zerolog.SetGlobalLevel(logConfig.Level)
|
||||
|
||||
prefix4, err := PrefixV4()
|
||||
@@ -768,6 +770,7 @@ func GetHeadscaleConfig() (*Config, error) {
|
||||
|
||||
// TODO(kradalby): Document these settings when more stable
|
||||
Tuning: Tuning{
|
||||
NotifierSendTimeout: viper.GetDuration("tuning.notifier_send_timeout"),
|
||||
BatchChangeDelay: viper.GetDuration("tuning.batch_change_delay"),
|
||||
NodeMapSessionBufferedChanSize: viper.GetInt("tuning.node_mapsession_buffered_chan_size"),
|
||||
},
|
||||
|
@@ -43,6 +43,10 @@ func (id NodeID) Uint64() uint64 {
|
||||
return uint64(id)
|
||||
}
|
||||
|
||||
func (id NodeID) String() string {
|
||||
return strconv.FormatUint(id.Uint64(), util.Base10)
|
||||
}
|
||||
|
||||
// Node is a Headscale client.
|
||||
type Node struct {
|
||||
ID NodeID `gorm:"primary_key"`
|
||||
|
@@ -1,6 +1,7 @@
|
||||
package integration
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/netip"
|
||||
@@ -15,6 +16,7 @@ import (
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/samber/lo"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"golang.org/x/sync/errgroup"
|
||||
"tailscale.com/client/tailscale/apitype"
|
||||
"tailscale.com/types/key"
|
||||
)
|
||||
@@ -335,14 +337,14 @@ func TestTaildrop(t *testing.T) {
|
||||
IntegrationSkip(t)
|
||||
t.Parallel()
|
||||
|
||||
retry := func(times int, sleepInverval time.Duration, doWork func() error) error {
|
||||
retry := func(times int, sleepInterval time.Duration, doWork func() error) error {
|
||||
var err error
|
||||
for attempts := 0; attempts < times; attempts++ {
|
||||
err = doWork()
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
time.Sleep(sleepInverval)
|
||||
time.Sleep(sleepInterval)
|
||||
}
|
||||
|
||||
return err
|
||||
@@ -793,7 +795,7 @@ func TestNodeOnlineStatus(t *testing.T) {
|
||||
continue
|
||||
}
|
||||
|
||||
// All peers of this nodess are reporting to be
|
||||
// All peers of this nodes are reporting to be
|
||||
// connected to the control server
|
||||
assert.Truef(
|
||||
t,
|
||||
@@ -829,24 +831,10 @@ func TestPingAllByIPManyUpDown(t *testing.T) {
|
||||
"user2": len(MustTestVersions),
|
||||
}
|
||||
|
||||
headscaleConfig := map[string]string{
|
||||
"HEADSCALE_DERP_URLS": "",
|
||||
"HEADSCALE_DERP_SERVER_ENABLED": "true",
|
||||
"HEADSCALE_DERP_SERVER_REGION_ID": "999",
|
||||
"HEADSCALE_DERP_SERVER_REGION_CODE": "headscale",
|
||||
"HEADSCALE_DERP_SERVER_REGION_NAME": "Headscale Embedded DERP",
|
||||
"HEADSCALE_DERP_SERVER_STUN_LISTEN_ADDR": "0.0.0.0:3478",
|
||||
"HEADSCALE_DERP_SERVER_PRIVATE_KEY_PATH": "/tmp/derp.key",
|
||||
|
||||
// Envknob for enabling DERP debug logs
|
||||
"DERP_DEBUG_LOGS": "true",
|
||||
"DERP_PROBER_DEBUG_LOGS": "true",
|
||||
}
|
||||
|
||||
err = scenario.CreateHeadscaleEnv(spec,
|
||||
[]tsic.Option{},
|
||||
hsic.WithTestName("pingallbyip"),
|
||||
hsic.WithConfigEnv(headscaleConfig),
|
||||
hsic.WithTestName("pingallbyipmany"),
|
||||
hsic.WithEmbeddedDERPServerOnly(),
|
||||
hsic.WithTLS(),
|
||||
hsic.WithHostnameAsServerURL(),
|
||||
)
|
||||
@@ -870,19 +858,35 @@ func TestPingAllByIPManyUpDown(t *testing.T) {
|
||||
success := pingAllHelper(t, allClients, allAddrs)
|
||||
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||
|
||||
wg, _ := errgroup.WithContext(context.Background())
|
||||
|
||||
for run := range 3 {
|
||||
t.Logf("Starting DownUpPing run %d", run+1)
|
||||
|
||||
for _, client := range allClients {
|
||||
t.Logf("taking down %q", client.Hostname())
|
||||
client.Down()
|
||||
c := client
|
||||
wg.Go(func() error {
|
||||
t.Logf("taking down %q", c.Hostname())
|
||||
return c.Down()
|
||||
})
|
||||
}
|
||||
|
||||
if err := wg.Wait(); err != nil {
|
||||
t.Fatalf("failed to take down all nodes: %s", err)
|
||||
}
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
|
||||
for _, client := range allClients {
|
||||
t.Logf("bringing up %q", client.Hostname())
|
||||
client.Up()
|
||||
c := client
|
||||
wg.Go(func() error {
|
||||
t.Logf("bringing up %q", c.Hostname())
|
||||
return c.Up()
|
||||
})
|
||||
}
|
||||
|
||||
if err := wg.Wait(); err != nil {
|
||||
t.Fatalf("failed to take down all nodes: %s", err)
|
||||
}
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
|
@@ -286,9 +286,13 @@ func New(
|
||||
}
|
||||
|
||||
env := []string{
|
||||
"HEADSCALE_PROFILING_ENABLED=1",
|
||||
"HEADSCALE_PROFILING_PATH=/tmp/profile",
|
||||
"HEADSCALE_DEBUG_PROFILING_ENABLED=1",
|
||||
"HEADSCALE_DEBUG_PROFILING_PATH=/tmp/profile",
|
||||
"HEADSCALE_DEBUG_DUMP_MAPRESPONSE_PATH=/tmp/mapresponses",
|
||||
"HEADSCALE_DEBUG_DEADLOCK=1",
|
||||
"HEADSCALE_DEBUG_DEADLOCK_TIMEOUT=5s",
|
||||
"HEADSCALE_DEBUG_HIGH_CARDINALITY_METRICS=1",
|
||||
"HEADSCALE_DEBUG_DUMP_CONFIG=1",
|
||||
}
|
||||
for key, value := range hsic.env {
|
||||
env = append(env, fmt.Sprintf("%s=%s", key, value))
|
||||
@@ -397,7 +401,7 @@ func (t *HeadscaleInContainer) Shutdown() error {
|
||||
)
|
||||
}
|
||||
|
||||
err = t.SaveMetrics("/tmp/control/metrics.txt")
|
||||
err = t.SaveMetrics(fmt.Sprintf("/tmp/control/%s_metrics.txt", t.hostname))
|
||||
if err != nil {
|
||||
log.Printf(
|
||||
"Failed to metrics from control: %s",
|
||||
@@ -747,7 +751,7 @@ func createCertificate(hostname string) ([]byte, []byte, error) {
|
||||
Locality: []string{"Leiden"},
|
||||
},
|
||||
NotBefore: time.Now(),
|
||||
NotAfter: time.Now().Add(60 * time.Minute),
|
||||
NotAfter: time.Now().Add(60 * time.Hour),
|
||||
IsCA: true,
|
||||
ExtKeyUsage: []x509.ExtKeyUsage{
|
||||
x509.ExtKeyUsageClientAuth,
|
||||
|
@@ -51,8 +51,11 @@ var (
|
||||
tailscaleVersions2021 = map[string]bool{
|
||||
"head": true,
|
||||
"unstable": true,
|
||||
"1.60": true, // CapVer: 82
|
||||
"1.58": true, // CapVer: 82
|
||||
"1.66": true, // CapVer: not checked
|
||||
"1.64": true, // CapVer: not checked
|
||||
"1.62": true, // CapVer: not checked
|
||||
"1.60": true, // CapVer: not checked
|
||||
"1.58": true, // CapVer: not checked
|
||||
"1.56": true, // CapVer: 82
|
||||
"1.54": true, // CapVer: 79
|
||||
"1.52": true, // CapVer: 79
|
||||
@@ -423,8 +426,10 @@ func (s *Scenario) WaitForTailscaleSync() error {
|
||||
if err != nil {
|
||||
for _, user := range s.users {
|
||||
for _, client := range user.Clients {
|
||||
peers, _ := client.PrettyPeers()
|
||||
log.Println(peers)
|
||||
peers, allOnline, _ := client.FailingPeersAsString()
|
||||
if !allOnline {
|
||||
log.Println(peers)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -450,7 +455,7 @@ func (s *Scenario) WaitForTailscaleSyncWithPeerCount(peerCount int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// CreateHeadscaleEnv is a conventient method returning a complete Headcale
|
||||
// CreateHeadscaleEnv is a convenient method returning a complete Headcale
|
||||
// test environment with nodes of all versions, joined to the server with X
|
||||
// users.
|
||||
func (s *Scenario) CreateHeadscaleEnv(
|
||||
|
@@ -36,5 +36,8 @@ type TailscaleClient interface {
|
||||
Ping(hostnameOrIP string, opts ...tsic.PingOption) error
|
||||
Curl(url string, opts ...tsic.CurlOption) (string, error)
|
||||
ID() string
|
||||
PrettyPeers() (string, error)
|
||||
|
||||
// FailingPeersAsString returns a formatted-ish multi-line-string of peers in the client
|
||||
// and a bool indicating if the clients online count and peer count is equal.
|
||||
FailingPeersAsString() (string, bool, error)
|
||||
}
|
||||
|
@@ -691,15 +691,18 @@ func (t *TailscaleInContainer) FQDN() (string, error) {
|
||||
return status.Self.DNSName, nil
|
||||
}
|
||||
|
||||
// PrettyPeers returns a formatted-ish table of peers in the client.
|
||||
func (t *TailscaleInContainer) PrettyPeers() (string, error) {
|
||||
// FailingPeersAsString returns a formatted-ish multi-line-string of peers in the client
|
||||
// and a bool indicating if the clients online count and peer count is equal.
|
||||
func (t *TailscaleInContainer) FailingPeersAsString() (string, bool, error) {
|
||||
status, err := t.Status()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to get FQDN: %w", err)
|
||||
return "", false, fmt.Errorf("failed to get FQDN: %w", err)
|
||||
}
|
||||
|
||||
str := fmt.Sprintf("Peers of %s\n", t.hostname)
|
||||
str += "Hostname\tOnline\tLastSeen\n"
|
||||
var b strings.Builder
|
||||
|
||||
fmt.Fprintf(&b, "Peers of %s\n", t.hostname)
|
||||
fmt.Fprint(&b, "Hostname\tOnline\tLastSeen\n")
|
||||
|
||||
peerCount := len(status.Peers())
|
||||
onlineCount := 0
|
||||
@@ -711,12 +714,12 @@ func (t *TailscaleInContainer) PrettyPeers() (string, error) {
|
||||
onlineCount++
|
||||
}
|
||||
|
||||
str += fmt.Sprintf("%s\t%t\t%s\n", peer.HostName, peer.Online, peer.LastSeen)
|
||||
fmt.Fprintf(&b, "%s\t%t\t%s\n", peer.HostName, peer.Online, peer.LastSeen)
|
||||
}
|
||||
|
||||
str += fmt.Sprintf("Peer Count: %d, Online Count: %d\n\n", peerCount, onlineCount)
|
||||
fmt.Fprintf(&b, "Peer Count: %d, Online Count: %d\n\n", peerCount, onlineCount)
|
||||
|
||||
return str, nil
|
||||
return b.String(), peerCount == onlineCount, nil
|
||||
}
|
||||
|
||||
// WaitForNeedsLogin blocks until the Tailscale (tailscaled) instance has
|
||||
|
@@ -331,7 +331,7 @@ func dockertestMaxWait() time.Duration {
|
||||
// return timeout
|
||||
// }
|
||||
|
||||
// pingAllNegativeHelper is intended to have 1 or more nodes timeing out from the ping,
|
||||
// pingAllNegativeHelper is intended to have 1 or more nodes timing out from the ping,
|
||||
// it counts failures instead of successes.
|
||||
// func pingAllNegativeHelper(t *testing.T, clients []TailscaleClient, addrs []string) int {
|
||||
// t.Helper()
|
||||
|
Reference in New Issue
Block a user