mirror of
https://github.com/juanfont/headscale.git
synced 2025-12-16 14:32:13 +00:00
Compare commits
3 Commits
topic/dock
...
v0.23.0-al
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c8ebbede54 | ||
|
|
8185a70dc7 | ||
|
|
2dc62e981e |
@@ -135,7 +135,7 @@ kos:
|
|||||||
- id: ghcr-debug
|
- id: ghcr-debug
|
||||||
repository: ghcr.io/juanfont/headscale
|
repository: ghcr.io/juanfont/headscale
|
||||||
bare: true
|
bare: true
|
||||||
base_image: "debian:12"
|
base_image: gcr.io/distroless/base-debian12:debug
|
||||||
build: headscale
|
build: headscale
|
||||||
main: ./cmd/headscale
|
main: ./cmd/headscale
|
||||||
env:
|
env:
|
||||||
@@ -160,7 +160,7 @@ kos:
|
|||||||
|
|
||||||
- id: dockerhub-debug
|
- id: dockerhub-debug
|
||||||
build: headscale
|
build: headscale
|
||||||
base_image: "debian:12"
|
base_image: gcr.io/distroless/base-debian12:debug
|
||||||
repository: headscale/headscale
|
repository: headscale/headscale
|
||||||
bare: true
|
bare: true
|
||||||
platforms:
|
platforms:
|
||||||
|
|||||||
14
CHANGELOG.md
14
CHANGELOG.md
@@ -26,7 +26,7 @@ after improving the test harness as part of adopting [#1460](https://github.com/
|
|||||||
- Code reorganisation, a lot of code has moved, please review the following PRs accordingly [#1473](https://github.com/juanfont/headscale/pull/1473)
|
- Code reorganisation, a lot of code has moved, please review the following PRs accordingly [#1473](https://github.com/juanfont/headscale/pull/1473)
|
||||||
- Change the structure of database configuration, see [config-example.yaml](./config-example.yaml) for the new structure. [#1700](https://github.com/juanfont/headscale/pull/1700)
|
- Change the structure of database configuration, see [config-example.yaml](./config-example.yaml) for the new structure. [#1700](https://github.com/juanfont/headscale/pull/1700)
|
||||||
- Old structure has been remove and the configuration _must_ be converted.
|
- Old structure has been remove and the configuration _must_ be converted.
|
||||||
- Adds additional configuration for PostgreSQL for setting max open, idle conection and idle connection lifetime.
|
- Adds additional configuration for PostgreSQL for setting max open, idle connection and idle connection lifetime.
|
||||||
- API: Machine is now Node [#1553](https://github.com/juanfont/headscale/pull/1553)
|
- API: Machine is now Node [#1553](https://github.com/juanfont/headscale/pull/1553)
|
||||||
- Remove support for older Tailscale clients [#1611](https://github.com/juanfont/headscale/pull/1611)
|
- Remove support for older Tailscale clients [#1611](https://github.com/juanfont/headscale/pull/1611)
|
||||||
- The latest supported client is 1.38
|
- The latest supported client is 1.38
|
||||||
@@ -70,7 +70,7 @@ after improving the test harness as part of adopting [#1460](https://github.com/
|
|||||||
### Changes
|
### Changes
|
||||||
|
|
||||||
- Add environment flags to enable pprof (profiling) [#1382](https://github.com/juanfont/headscale/pull/1382)
|
- Add environment flags to enable pprof (profiling) [#1382](https://github.com/juanfont/headscale/pull/1382)
|
||||||
- Profiles are continously generated in our integration tests.
|
- Profiles are continuously generated in our integration tests.
|
||||||
- Fix systemd service file location in `.deb` packages [#1391](https://github.com/juanfont/headscale/pull/1391)
|
- Fix systemd service file location in `.deb` packages [#1391](https://github.com/juanfont/headscale/pull/1391)
|
||||||
- Improvements on Noise implementation [#1379](https://github.com/juanfont/headscale/pull/1379)
|
- Improvements on Noise implementation [#1379](https://github.com/juanfont/headscale/pull/1379)
|
||||||
- Replace node filter logic, ensuring nodes with access can see eachother [#1381](https://github.com/juanfont/headscale/pull/1381)
|
- Replace node filter logic, ensuring nodes with access can see eachother [#1381](https://github.com/juanfont/headscale/pull/1381)
|
||||||
@@ -161,7 +161,7 @@ after improving the test harness as part of adopting [#1460](https://github.com/
|
|||||||
- SSH ACLs status:
|
- SSH ACLs status:
|
||||||
- Support `accept` and `check` (SSH can be enabled and used for connecting and authentication)
|
- Support `accept` and `check` (SSH can be enabled and used for connecting and authentication)
|
||||||
- Rejecting connections **are not supported**, meaning that if you enable SSH, then assume that _all_ `ssh` connections **will be allowed**.
|
- Rejecting connections **are not supported**, meaning that if you enable SSH, then assume that _all_ `ssh` connections **will be allowed**.
|
||||||
- If you decied to try this feature, please carefully managed permissions by blocking port `22` with regular ACLs or do _not_ set `--ssh` on your clients.
|
- If you decided to try this feature, please carefully managed permissions by blocking port `22` with regular ACLs or do _not_ set `--ssh` on your clients.
|
||||||
- We are currently improving our testing of the SSH ACLs, help us get an overview by testing and giving feedback.
|
- We are currently improving our testing of the SSH ACLs, help us get an overview by testing and giving feedback.
|
||||||
- This feature should be considered dangerous and it is disabled by default. Enable by setting `HEADSCALE_EXPERIMENTAL_FEATURE_SSH=1`.
|
- This feature should be considered dangerous and it is disabled by default. Enable by setting `HEADSCALE_EXPERIMENTAL_FEATURE_SSH=1`.
|
||||||
|
|
||||||
@@ -211,7 +211,7 @@ after improving the test harness as part of adopting [#1460](https://github.com/
|
|||||||
### Changes
|
### Changes
|
||||||
|
|
||||||
- Updated dependencies (including the library that lacked armhf support) [#722](https://github.com/juanfont/headscale/pull/722)
|
- Updated dependencies (including the library that lacked armhf support) [#722](https://github.com/juanfont/headscale/pull/722)
|
||||||
- Fix missing group expansion in function `excludeCorretlyTaggedNodes` [#563](https://github.com/juanfont/headscale/issues/563)
|
- Fix missing group expansion in function `excludeCorrectlyTaggedNodes` [#563](https://github.com/juanfont/headscale/issues/563)
|
||||||
- Improve registration protocol implementation and switch to NodeKey as main identifier [#725](https://github.com/juanfont/headscale/pull/725)
|
- Improve registration protocol implementation and switch to NodeKey as main identifier [#725](https://github.com/juanfont/headscale/pull/725)
|
||||||
- Add ability to connect to PostgreSQL via unix socket [#734](https://github.com/juanfont/headscale/pull/734)
|
- Add ability to connect to PostgreSQL via unix socket [#734](https://github.com/juanfont/headscale/pull/734)
|
||||||
|
|
||||||
@@ -231,7 +231,7 @@ after improving the test harness as part of adopting [#1460](https://github.com/
|
|||||||
- Fix send on closed channel crash in polling [#542](https://github.com/juanfont/headscale/pull/542)
|
- Fix send on closed channel crash in polling [#542](https://github.com/juanfont/headscale/pull/542)
|
||||||
- Fixed spurious calls to setLastStateChangeToNow from ephemeral nodes [#566](https://github.com/juanfont/headscale/pull/566)
|
- Fixed spurious calls to setLastStateChangeToNow from ephemeral nodes [#566](https://github.com/juanfont/headscale/pull/566)
|
||||||
- Add command for moving nodes between namespaces [#362](https://github.com/juanfont/headscale/issues/362)
|
- Add command for moving nodes between namespaces [#362](https://github.com/juanfont/headscale/issues/362)
|
||||||
- Added more configuration parameters for OpenID Connect (scopes, free-form paramters, domain and user allowlist)
|
- Added more configuration parameters for OpenID Connect (scopes, free-form parameters, domain and user allowlist)
|
||||||
- Add command to set tags on a node [#525](https://github.com/juanfont/headscale/issues/525)
|
- Add command to set tags on a node [#525](https://github.com/juanfont/headscale/issues/525)
|
||||||
- Add command to view tags of nodes [#356](https://github.com/juanfont/headscale/issues/356)
|
- Add command to view tags of nodes [#356](https://github.com/juanfont/headscale/issues/356)
|
||||||
- Add --all (-a) flag to enable routes command [#360](https://github.com/juanfont/headscale/issues/360)
|
- Add --all (-a) flag to enable routes command [#360](https://github.com/juanfont/headscale/issues/360)
|
||||||
@@ -279,10 +279,10 @@ after improving the test harness as part of adopting [#1460](https://github.com/
|
|||||||
|
|
||||||
- Fix a bug were the same IP could be assigned to multiple hosts if joined in quick succession [#346](https://github.com/juanfont/headscale/pull/346)
|
- Fix a bug were the same IP could be assigned to multiple hosts if joined in quick succession [#346](https://github.com/juanfont/headscale/pull/346)
|
||||||
- Simplify the code behind registration of machines [#366](https://github.com/juanfont/headscale/pull/366)
|
- Simplify the code behind registration of machines [#366](https://github.com/juanfont/headscale/pull/366)
|
||||||
- Nodes are now only written to database if they are registrated successfully
|
- Nodes are now only written to database if they are registered successfully
|
||||||
- Fix a limitation in the ACLs that prevented users to write rules with `*` as source [#374](https://github.com/juanfont/headscale/issues/374)
|
- Fix a limitation in the ACLs that prevented users to write rules with `*` as source [#374](https://github.com/juanfont/headscale/issues/374)
|
||||||
- Reduce the overhead of marshal/unmarshal for Hostinfo, routes and endpoints by using specific types in Machine [#371](https://github.com/juanfont/headscale/pull/371)
|
- Reduce the overhead of marshal/unmarshal for Hostinfo, routes and endpoints by using specific types in Machine [#371](https://github.com/juanfont/headscale/pull/371)
|
||||||
- Apply normalization function to FQDN on hostnames when hosts registers and retrieve informations [#363](https://github.com/juanfont/headscale/issues/363)
|
- Apply normalization function to FQDN on hostnames when hosts registers and retrieve information [#363](https://github.com/juanfont/headscale/issues/363)
|
||||||
- Fix a bug that prevented the use of `tailscale logout` with OIDC [#508](https://github.com/juanfont/headscale/issues/508)
|
- Fix a bug that prevented the use of `tailscale logout` with OIDC [#508](https://github.com/juanfont/headscale/issues/508)
|
||||||
- Added Tailscale repo HEAD and unstable releases channel to the integration tests targets [#513](https://github.com/juanfont/headscale/pull/513)
|
- Added Tailscale repo HEAD and unstable releases channel to the integration tests targets [#513](https://github.com/juanfont/headscale/pull/513)
|
||||||
|
|
||||||
|
|||||||
@@ -1,21 +1,43 @@
|
|||||||
# This Dockerfile and the images produced are for testing headscale,
|
# Copyright (c) Tailscale Inc & AUTHORS
|
||||||
# and are in no way endorsed by Headscale's maintainers as an
|
# SPDX-License-Identifier: BSD-3-Clause
|
||||||
# official nor supported release or distribution.
|
|
||||||
|
|
||||||
FROM golang:latest
|
# This Dockerfile is more or less lifted from tailscale/tailscale
|
||||||
|
# to ensure a similar build process when testing the HEAD of tailscale.
|
||||||
|
|
||||||
RUN apt-get update \
|
FROM golang:1.22-alpine AS build-env
|
||||||
&& apt-get install -y dnsutils git iptables ssh ca-certificates \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
RUN useradd --shell=/bin/bash --create-home ssh-it-user
|
WORKDIR /go/src
|
||||||
|
|
||||||
|
RUN apk add --no-cache git
|
||||||
|
|
||||||
|
# Replace `RUN git...` with `COPY` and a local checked out version of Tailscale in `./tailscale`
|
||||||
|
# to test specific commits of the Tailscale client. This is useful when trying to find out why
|
||||||
|
# something specific broke between two versions of Tailscale with for example `git bisect`.
|
||||||
|
# COPY ./tailscale .
|
||||||
RUN git clone https://github.com/tailscale/tailscale.git
|
RUN git clone https://github.com/tailscale/tailscale.git
|
||||||
|
|
||||||
WORKDIR /go/tailscale
|
WORKDIR /go/src/tailscale
|
||||||
|
|
||||||
RUN git checkout main \
|
|
||||||
&& sh build_dist.sh tailscale.com/cmd/tailscale \
|
# see build_docker.sh
|
||||||
&& sh build_dist.sh tailscale.com/cmd/tailscaled \
|
ARG VERSION_LONG=""
|
||||||
&& cp tailscale /usr/local/bin/ \
|
ENV VERSION_LONG=$VERSION_LONG
|
||||||
&& cp tailscaled /usr/local/bin/
|
ARG VERSION_SHORT=""
|
||||||
|
ENV VERSION_SHORT=$VERSION_SHORT
|
||||||
|
ARG VERSION_GIT_HASH=""
|
||||||
|
ENV VERSION_GIT_HASH=$VERSION_GIT_HASH
|
||||||
|
ARG TARGETARCH
|
||||||
|
|
||||||
|
RUN GOARCH=$TARGETARCH go install -ldflags="\
|
||||||
|
-X tailscale.com/version.longStamp=$VERSION_LONG \
|
||||||
|
-X tailscale.com/version.shortStamp=$VERSION_SHORT \
|
||||||
|
-X tailscale.com/version.gitCommitStamp=$VERSION_GIT_HASH" \
|
||||||
|
-v ./cmd/tailscale ./cmd/tailscaled ./cmd/containerboot
|
||||||
|
|
||||||
|
FROM alpine:3.18
|
||||||
|
RUN apk add --no-cache ca-certificates iptables iproute2 ip6tables curl
|
||||||
|
|
||||||
|
COPY --from=build-env /go/bin/* /usr/local/bin/
|
||||||
|
# For compat with the previous run.sh, although ideally you should be
|
||||||
|
# using build_docker.sh which sets an entrypoint for the image.
|
||||||
|
RUN mkdir /tailscale && ln -s /usr/local/bin/containerboot /tailscale/run.sh
|
||||||
|
|||||||
@@ -99,7 +99,7 @@ Please read the [CONTRIBUTING.md](./CONTRIBUTING.md) file.
|
|||||||
|
|
||||||
### Requirements
|
### Requirements
|
||||||
|
|
||||||
To contribute to headscale you would need the lastest version of [Go](https://golang.org)
|
To contribute to headscale you would need the latest version of [Go](https://golang.org)
|
||||||
and [Buf](https://buf.build)(Protobuf generator).
|
and [Buf](https://buf.build)(Protobuf generator).
|
||||||
|
|
||||||
We recommend using [Nix](https://nixos.org/) to setup a development environment. This can
|
We recommend using [Nix](https://nixos.org/) to setup a development environment. This can
|
||||||
|
|||||||
@@ -105,7 +105,7 @@ derp:
|
|||||||
automatically_add_embedded_derp_region: true
|
automatically_add_embedded_derp_region: true
|
||||||
|
|
||||||
# For better connection stability (especially when using an Exit-Node and DNS is not working),
|
# For better connection stability (especially when using an Exit-Node and DNS is not working),
|
||||||
# it is possible to optionall add the public IPv4 and IPv6 address to the Derp-Map using:
|
# it is possible to optionally add the public IPv4 and IPv6 address to the Derp-Map using:
|
||||||
ipv4: 1.2.3.4
|
ipv4: 1.2.3.4
|
||||||
ipv6: 2001:db8::1
|
ipv6: 2001:db8::1
|
||||||
|
|
||||||
@@ -199,7 +199,7 @@ log:
|
|||||||
format: text
|
format: text
|
||||||
level: info
|
level: info
|
||||||
|
|
||||||
# Path to a file containg ACL policies.
|
# Path to a file containing ACL policies.
|
||||||
# ACLs can be defined as YAML or HUJSON.
|
# ACLs can be defined as YAML or HUJSON.
|
||||||
# https://tailscale.com/kb/1018/acls/
|
# https://tailscale.com/kb/1018/acls/
|
||||||
acl_policy_path: ""
|
acl_policy_path: ""
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ If the node is already registered, it can advertise exit capabilities like this:
|
|||||||
$ sudo tailscale set --advertise-exit-node
|
$ sudo tailscale set --advertise-exit-node
|
||||||
```
|
```
|
||||||
|
|
||||||
To use a node as an exit node, IP forwarding must be enabled on the node. Check the official [Tailscale documentation](https://tailscale.com/kb/1019/subnets/?tab=linux#enable-ip-forwarding) for how to enable IP fowarding.
|
To use a node as an exit node, IP forwarding must be enabled on the node. Check the official [Tailscale documentation](https://tailscale.com/kb/1019/subnets/?tab=linux#enable-ip-forwarding) for how to enable IP forwarding.
|
||||||
|
|
||||||
## On the control server
|
## On the control server
|
||||||
|
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ We don't know. We might be working on it. If you want to help, please send us a
|
|||||||
Please be aware that there are a number of reasons why we might not accept specific contributions:
|
Please be aware that there are a number of reasons why we might not accept specific contributions:
|
||||||
|
|
||||||
- It is not possible to implement the feature in a way that makes sense in a self-hosted environment.
|
- It is not possible to implement the feature in a way that makes sense in a self-hosted environment.
|
||||||
- Given that we are reverse-engineering Tailscale to satify our own curiosity, we might be interested in implementing the feature ourselves.
|
- Given that we are reverse-engineering Tailscale to satisfy our own curiosity, we might be interested in implementing the feature ourselves.
|
||||||
- You are not sending unit and integration tests with it.
|
- You are not sending unit and integration tests with it.
|
||||||
|
|
||||||
## Do you support Y method of deploying Headscale?
|
## Do you support Y method of deploying Headscale?
|
||||||
|
|||||||
@@ -58,12 +58,12 @@ A solution could be to consider a headscale server (in it's entirety) as a
|
|||||||
tailnet.
|
tailnet.
|
||||||
|
|
||||||
For personal users the default behavior could either allow all communications
|
For personal users the default behavior could either allow all communications
|
||||||
between all namespaces (like tailscale) or dissallow all communications between
|
between all namespaces (like tailscale) or disallow all communications between
|
||||||
namespaces (current behavior).
|
namespaces (current behavior).
|
||||||
|
|
||||||
For businesses and organisations, viewing a headscale instance a single tailnet
|
For businesses and organisations, viewing a headscale instance a single tailnet
|
||||||
would allow users (namespace) to talk to each other with the ACLs. As described
|
would allow users (namespace) to talk to each other with the ACLs. As described
|
||||||
in tailscale's documentation [[1]], a server should be tagged and personnal
|
in tailscale's documentation [[1]], a server should be tagged and personal
|
||||||
devices should be tied to a user. Translated in headscale's terms each user can
|
devices should be tied to a user. Translated in headscale's terms each user can
|
||||||
have multiple devices and all those devices should be in the same namespace.
|
have multiple devices and all those devices should be in the same namespace.
|
||||||
The servers should be tagged and used as such.
|
The servers should be tagged and used as such.
|
||||||
@@ -88,7 +88,7 @@ the ability to rules in either format (HuJSON or YAML).
|
|||||||
Let's build an example use case for a small business (It may be the place where
|
Let's build an example use case for a small business (It may be the place where
|
||||||
ACL's are the most useful).
|
ACL's are the most useful).
|
||||||
|
|
||||||
We have a small company with a boss, an admin, two developper and an intern.
|
We have a small company with a boss, an admin, two developer and an intern.
|
||||||
|
|
||||||
The boss should have access to all servers but not to the users hosts. Admin
|
The boss should have access to all servers but not to the users hosts. Admin
|
||||||
should also have access to all hosts except that their permissions should be
|
should also have access to all hosts except that their permissions should be
|
||||||
@@ -173,7 +173,7 @@ need to add the following ACLs
|
|||||||
"ports": ["prod:*", "dev:*", "internal:*"]
|
"ports": ["prod:*", "dev:*", "internal:*"]
|
||||||
},
|
},
|
||||||
|
|
||||||
// admin have access to adminstration port (lets only consider port 22 here)
|
// admin have access to administration port (lets only consider port 22 here)
|
||||||
{
|
{
|
||||||
"action": "accept",
|
"action": "accept",
|
||||||
"users": ["group:admin"],
|
"users": ["group:admin"],
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
# Controlling `headscale` with remote CLI
|
# Controlling `headscale` with remote CLI
|
||||||
|
|
||||||
## Prerequisit
|
## Prerequisite
|
||||||
|
|
||||||
- A workstation to run `headscale` (could be Linux, macOS, other supported platforms)
|
- A workstation to run `headscale` (could be Linux, macOS, other supported platforms)
|
||||||
- A `headscale` server (version `0.13.0` or newer)
|
- A `headscale` server (version `0.13.0` or newer)
|
||||||
- Access to create API keys (local access to the `headscale` server)
|
- Access to create API keys (local access to the `headscale` server)
|
||||||
- `headscale` _must_ be served over TLS/HTTPS
|
- `headscale` _must_ be served over TLS/HTTPS
|
||||||
- Remote access does _not_ support unencrypted traffic.
|
- Remote access does _not_ support unencrypted traffic.
|
||||||
- Port `50443` must be open in the firewall (or port overriden by `grpc_listen_addr` option)
|
- Port `50443` must be open in the firewall (or port overridden by `grpc_listen_addr` option)
|
||||||
|
|
||||||
## Goal
|
## Goal
|
||||||
|
|
||||||
@@ -97,4 +97,4 @@ Checklist:
|
|||||||
- Make sure you use version `0.13.0` or newer.
|
- Make sure you use version `0.13.0` or newer.
|
||||||
- Verify that your TLS certificate is valid and trusted
|
- Verify that your TLS certificate is valid and trusted
|
||||||
- If you do not have access to a trusted certificate (e.g. from Let's Encrypt), add your self signed certificate to the trust store of your OS or
|
- If you do not have access to a trusted certificate (e.g. from Let's Encrypt), add your self signed certificate to the trust store of your OS or
|
||||||
- Set `HEADSCALE_CLI_INSECURE` to 0 in your environement
|
- Set `HEADSCALE_CLI_INSECURE` to 0 in your environment
|
||||||
|
|||||||
@@ -115,7 +115,7 @@ The following Caddyfile is all that is necessary to use Caddy as a reverse proxy
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Caddy v2 will [automatically](https://caddyserver.com/docs/automatic-https) provision a certficate for your domain/subdomain, force HTTPS, and proxy websockets - no further configuration is necessary.
|
Caddy v2 will [automatically](https://caddyserver.com/docs/automatic-https) provision a certificate for your domain/subdomain, force HTTPS, and proxy websockets - no further configuration is necessary.
|
||||||
|
|
||||||
For a slightly more complex configuration which utilizes Docker containers to manage Caddy, Headscale, and Headscale-UI, [Guru Computing's guide](https://blog.gurucomputing.com.au/smart-vpns-with-headscale/) is an excellent reference.
|
For a slightly more complex configuration which utilizes Docker containers to manage Caddy, Headscale, and Headscale-UI, [Guru Computing's guide](https://blog.gurucomputing.com.au/smart-vpns-with-headscale/) is an excellent reference.
|
||||||
|
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ describing how to make `headscale` run properly in a server environment.
|
|||||||
cd headscale
|
cd headscale
|
||||||
|
|
||||||
# optionally checkout a release
|
# optionally checkout a release
|
||||||
# option a. you can find offical relase at https://github.com/juanfont/headscale/releases/latest
|
# option a. you can find official release at https://github.com/juanfont/headscale/releases/latest
|
||||||
# option b. get latest tag, this may be a beta release
|
# option b. get latest tag, this may be a beta release
|
||||||
latestTag=$(git describe --tags `git rev-list --tags --max-count=1`)
|
latestTag=$(git describe --tags `git rev-list --tags --max-count=1`)
|
||||||
|
|
||||||
@@ -57,7 +57,7 @@ describing how to make `headscale` run properly in a server environment.
|
|||||||
cd headscale
|
cd headscale
|
||||||
|
|
||||||
# optionally checkout a release
|
# optionally checkout a release
|
||||||
# option a. you can find offical relase at https://github.com/juanfont/headscale/releases/latest
|
# option a. you can find official release at https://github.com/juanfont/headscale/releases/latest
|
||||||
# option b. get latest tag, this may be a beta release
|
# option b. get latest tag, this may be a beta release
|
||||||
latestTag=$(git describe --tags `git rev-list --tags --max-count=1`)
|
latestTag=$(git describe --tags `git rev-list --tags --max-count=1`)
|
||||||
|
|
||||||
|
|||||||
@@ -31,7 +31,7 @@
|
|||||||
|
|
||||||
# When updating go.mod or go.sum, a new sha will need to be calculated,
|
# When updating go.mod or go.sum, a new sha will need to be calculated,
|
||||||
# update this if you have a mismatch after doing a change to thos files.
|
# update this if you have a mismatch after doing a change to thos files.
|
||||||
vendorHash = "sha256-wXfKeiJaGe6ahOsONrQhvbuMN8flQ13b0ZjxdbFs1e8=";
|
vendorHash = "sha256-EorT2AVwA3usly/LcNor6r5UIhLCdj3L4O4ilgTIC2o=";
|
||||||
|
|
||||||
subPackages = ["cmd/headscale"];
|
subPackages = ["cmd/headscale"];
|
||||||
|
|
||||||
|
|||||||
2
go.mod
2
go.mod
@@ -29,6 +29,7 @@ require (
|
|||||||
github.com/puzpuzpuz/xsync/v3 v3.1.0
|
github.com/puzpuzpuz/xsync/v3 v3.1.0
|
||||||
github.com/rs/zerolog v1.32.0
|
github.com/rs/zerolog v1.32.0
|
||||||
github.com/samber/lo v1.39.0
|
github.com/samber/lo v1.39.0
|
||||||
|
github.com/sasha-s/go-deadlock v0.3.1
|
||||||
github.com/spf13/cobra v1.8.0
|
github.com/spf13/cobra v1.8.0
|
||||||
github.com/spf13/viper v1.18.2
|
github.com/spf13/viper v1.18.2
|
||||||
github.com/stretchr/testify v1.9.0
|
github.com/stretchr/testify v1.9.0
|
||||||
@@ -155,6 +156,7 @@ require (
|
|||||||
github.com/opencontainers/image-spec v1.1.0 // indirect
|
github.com/opencontainers/image-spec v1.1.0 // indirect
|
||||||
github.com/opencontainers/runc v1.1.12 // indirect
|
github.com/opencontainers/runc v1.1.12 // indirect
|
||||||
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
|
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
|
||||||
|
github.com/petermattis/goid v0.0.0-20180202154549-b0b1615b78e5 // indirect
|
||||||
github.com/pierrec/lz4/v4 v4.1.21 // indirect
|
github.com/pierrec/lz4/v4 v4.1.21 // indirect
|
||||||
github.com/pkg/errors v0.9.1 // indirect
|
github.com/pkg/errors v0.9.1 // indirect
|
||||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
|
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
|
||||||
|
|||||||
4
go.sum
4
go.sum
@@ -367,6 +367,8 @@ github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaR
|
|||||||
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
|
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
|
||||||
github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
|
github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
|
||||||
github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
|
github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
|
||||||
|
github.com/petermattis/goid v0.0.0-20180202154549-b0b1615b78e5 h1:q2e307iGHPdTGp0hoxKjt1H5pDo6utceo3dQVK3I5XQ=
|
||||||
|
github.com/petermattis/goid v0.0.0-20180202154549-b0b1615b78e5/go.mod h1:jvVRKCrJTQWu0XVbaOlby/2lO20uSCHEMzzplHXte1o=
|
||||||
github.com/philip-bui/grpc-zerolog v1.0.1 h1:EMacvLRUd2O1K0eWod27ZP5CY1iTNkhBDLSN+Q4JEvA=
|
github.com/philip-bui/grpc-zerolog v1.0.1 h1:EMacvLRUd2O1K0eWod27ZP5CY1iTNkhBDLSN+Q4JEvA=
|
||||||
github.com/philip-bui/grpc-zerolog v1.0.1/go.mod h1:qXbiq/2X4ZUMMshsqlWyTHOcw7ns+GZmlqZZN05ZHcQ=
|
github.com/philip-bui/grpc-zerolog v1.0.1/go.mod h1:qXbiq/2X4ZUMMshsqlWyTHOcw7ns+GZmlqZZN05ZHcQ=
|
||||||
github.com/pierrec/lz4/v4 v4.1.14/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
|
github.com/pierrec/lz4/v4 v4.1.14/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
|
||||||
@@ -423,6 +425,8 @@ github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6g
|
|||||||
github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ=
|
github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ=
|
||||||
github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA=
|
github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA=
|
||||||
github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA=
|
github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA=
|
||||||
|
github.com/sasha-s/go-deadlock v0.3.1 h1:sqv7fDNShgjcaxkO0JNcOAlr8B9+cV5Ey/OB71efZx0=
|
||||||
|
github.com/sasha-s/go-deadlock v0.3.1/go.mod h1:F73l+cr82YSh10GxyRI6qZiCgK64VaZjwesgfQ1/iLM=
|
||||||
github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
|
github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
|
||||||
github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8=
|
github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8=
|
||||||
github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I=
|
github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I=
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/coreos/go-oidc/v3/oidc"
|
"github.com/coreos/go-oidc/v3/oidc"
|
||||||
|
"github.com/davecgh/go-spew/spew"
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
grpcMiddleware "github.com/grpc-ecosystem/go-grpc-middleware"
|
grpcMiddleware "github.com/grpc-ecosystem/go-grpc-middleware"
|
||||||
grpcRuntime "github.com/grpc-ecosystem/grpc-gateway/v2/runtime"
|
grpcRuntime "github.com/grpc-ecosystem/grpc-gateway/v2/runtime"
|
||||||
@@ -104,16 +105,15 @@ type Headscale struct {
|
|||||||
registrationCache *cache.Cache
|
registrationCache *cache.Cache
|
||||||
|
|
||||||
pollNetMapStreamWG sync.WaitGroup
|
pollNetMapStreamWG sync.WaitGroup
|
||||||
|
|
||||||
mapSessions map[types.NodeID]*mapSession
|
|
||||||
mapSessionMu sync.Mutex
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
profilingEnabled = envknob.Bool("HEADSCALE_PROFILING_ENABLED")
|
profilingEnabled = envknob.Bool("HEADSCALE_DEBUG_PROFILING_ENABLED")
|
||||||
|
profilingPath = envknob.String("HEADSCALE_DEBUG_PROFILING_PATH")
|
||||||
tailsqlEnabled = envknob.Bool("HEADSCALE_DEBUG_TAILSQL_ENABLED")
|
tailsqlEnabled = envknob.Bool("HEADSCALE_DEBUG_TAILSQL_ENABLED")
|
||||||
tailsqlStateDir = envknob.String("HEADSCALE_DEBUG_TAILSQL_STATE_DIR")
|
tailsqlStateDir = envknob.String("HEADSCALE_DEBUG_TAILSQL_STATE_DIR")
|
||||||
tailsqlTSKey = envknob.String("TS_AUTHKEY")
|
tailsqlTSKey = envknob.String("TS_AUTHKEY")
|
||||||
|
dumpConfig = envknob.Bool("HEADSCALE_DEBUG_DUMP_CONFIG")
|
||||||
)
|
)
|
||||||
|
|
||||||
func NewHeadscale(cfg *types.Config) (*Headscale, error) {
|
func NewHeadscale(cfg *types.Config) (*Headscale, error) {
|
||||||
@@ -138,7 +138,6 @@ func NewHeadscale(cfg *types.Config) (*Headscale, error) {
|
|||||||
registrationCache: registrationCache,
|
registrationCache: registrationCache,
|
||||||
pollNetMapStreamWG: sync.WaitGroup{},
|
pollNetMapStreamWG: sync.WaitGroup{},
|
||||||
nodeNotifier: notifier.NewNotifier(cfg),
|
nodeNotifier: notifier.NewNotifier(cfg),
|
||||||
mapSessions: make(map[types.NodeID]*mapSession),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
app.db, err = db.NewHeadscaleDatabase(
|
app.db, err = db.NewHeadscaleDatabase(
|
||||||
@@ -330,7 +329,7 @@ func (h *Headscale) grpcAuthenticationInterceptor(ctx context.Context,
|
|||||||
// Check if the request is coming from the on-server client.
|
// Check if the request is coming from the on-server client.
|
||||||
// This is not secure, but it is to maintain maintainability
|
// This is not secure, but it is to maintain maintainability
|
||||||
// with the "legacy" database-based client
|
// with the "legacy" database-based client
|
||||||
// It is also neede for grpc-gateway to be able to connect to
|
// It is also needed for grpc-gateway to be able to connect to
|
||||||
// the server
|
// the server
|
||||||
client, _ := peer.FromContext(ctx)
|
client, _ := peer.FromContext(ctx)
|
||||||
|
|
||||||
@@ -502,14 +501,14 @@ func (h *Headscale) createRouter(grpcMux *grpcRuntime.ServeMux) *mux.Router {
|
|||||||
|
|
||||||
// Serve launches the HTTP and gRPC server service Headscale and the API.
|
// Serve launches the HTTP and gRPC server service Headscale and the API.
|
||||||
func (h *Headscale) Serve() error {
|
func (h *Headscale) Serve() error {
|
||||||
if _, enableProfile := os.LookupEnv("HEADSCALE_PROFILING_ENABLED"); enableProfile {
|
if profilingEnabled {
|
||||||
if profilePath, ok := os.LookupEnv("HEADSCALE_PROFILING_PATH"); ok {
|
if profilingPath != "" {
|
||||||
err := os.MkdirAll(profilePath, os.ModePerm)
|
err := os.MkdirAll(profilingPath, os.ModePerm)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal().Err(err).Msg("failed to create profiling directory")
|
log.Fatal().Err(err).Msg("failed to create profiling directory")
|
||||||
}
|
}
|
||||||
|
|
||||||
defer profile.Start(profile.ProfilePath(profilePath)).Stop()
|
defer profile.Start(profile.ProfilePath(profilingPath)).Stop()
|
||||||
} else {
|
} else {
|
||||||
defer profile.Start().Stop()
|
defer profile.Start().Stop()
|
||||||
}
|
}
|
||||||
@@ -517,6 +516,10 @@ func (h *Headscale) Serve() error {
|
|||||||
|
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
|
if dumpConfig {
|
||||||
|
spew.Dump(h.cfg)
|
||||||
|
}
|
||||||
|
|
||||||
// Fetch an initial DERP Map before we start serving
|
// Fetch an initial DERP Map before we start serving
|
||||||
h.DERPMap = derp.GetDERPMap(h.cfg.DERP)
|
h.DERPMap = derp.GetDERPMap(h.cfg.DERP)
|
||||||
h.mapper = mapper.NewMapper(h.db, h.cfg, h.DERPMap, h.nodeNotifier)
|
h.mapper = mapper.NewMapper(h.db, h.cfg, h.DERPMap, h.nodeNotifier)
|
||||||
@@ -729,19 +732,6 @@ func (h *Headscale) Serve() error {
|
|||||||
w.WriteHeader(http.StatusOK)
|
w.WriteHeader(http.StatusOK)
|
||||||
w.Write([]byte(h.nodeNotifier.String()))
|
w.Write([]byte(h.nodeNotifier.String()))
|
||||||
})
|
})
|
||||||
debugMux.HandleFunc("/debug/mapresp", func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
h.mapSessionMu.Lock()
|
|
||||||
defer h.mapSessionMu.Unlock()
|
|
||||||
|
|
||||||
var b strings.Builder
|
|
||||||
b.WriteString("mapresponders:\n")
|
|
||||||
for k, v := range h.mapSessions {
|
|
||||||
fmt.Fprintf(&b, "\t%d: %p\n", k, v)
|
|
||||||
}
|
|
||||||
|
|
||||||
w.WriteHeader(http.StatusOK)
|
|
||||||
w.Write([]byte(b.String()))
|
|
||||||
})
|
|
||||||
debugMux.Handle("/metrics", promhttp.Handler())
|
debugMux.Handle("/metrics", promhttp.Handler())
|
||||||
|
|
||||||
debugHTTPServer := &http.Server{
|
debugHTTPServer := &http.Server{
|
||||||
@@ -822,17 +812,6 @@ func (h *Headscale) Serve() error {
|
|||||||
expireNodeCancel()
|
expireNodeCancel()
|
||||||
expireEphemeralCancel()
|
expireEphemeralCancel()
|
||||||
|
|
||||||
trace("closing map sessions")
|
|
||||||
wg := sync.WaitGroup{}
|
|
||||||
for _, mapSess := range h.mapSessions {
|
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
mapSess.close()
|
|
||||||
wg.Done()
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
wg.Wait()
|
|
||||||
|
|
||||||
trace("waiting for netmap stream to close")
|
trace("waiting for netmap stream to close")
|
||||||
h.pollNetMapStreamWG.Wait()
|
h.pollNetMapStreamWG.Wait()
|
||||||
|
|
||||||
|
|||||||
@@ -661,7 +661,7 @@ func GenerateGivenName(
|
|||||||
}
|
}
|
||||||
|
|
||||||
func DeleteExpiredEphemeralNodes(tx *gorm.DB,
|
func DeleteExpiredEphemeralNodes(tx *gorm.DB,
|
||||||
inactivityThreshhold time.Duration,
|
inactivityThreshold time.Duration,
|
||||||
) ([]types.NodeID, []types.NodeID) {
|
) ([]types.NodeID, []types.NodeID) {
|
||||||
users, err := ListUsers(tx)
|
users, err := ListUsers(tx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -679,7 +679,7 @@ func DeleteExpiredEphemeralNodes(tx *gorm.DB,
|
|||||||
for idx, node := range nodes {
|
for idx, node := range nodes {
|
||||||
if node.IsEphemeral() && node.LastSeen != nil &&
|
if node.IsEphemeral() && node.LastSeen != nil &&
|
||||||
time.Now().
|
time.Now().
|
||||||
After(node.LastSeen.Add(inactivityThreshhold)) {
|
After(node.LastSeen.Add(inactivityThreshold)) {
|
||||||
expired = append(expired, node.ID)
|
expired = append(expired, node.ID)
|
||||||
|
|
||||||
log.Info().
|
log.Info().
|
||||||
|
|||||||
@@ -393,7 +393,7 @@ func (s *Suite) TestSetTags(c *check.C) {
|
|||||||
c.Assert(err, check.IsNil)
|
c.Assert(err, check.IsNil)
|
||||||
c.Assert(node.ForcedTags, check.DeepEquals, types.StringList(sTags))
|
c.Assert(node.ForcedTags, check.DeepEquals, types.StringList(sTags))
|
||||||
|
|
||||||
// assign duplicat tags, expect no errors but no doubles in DB
|
// assign duplicate tags, expect no errors but no doubles in DB
|
||||||
eTags := []string{"tag:bar", "tag:test", "tag:unknown", "tag:test"}
|
eTags := []string{"tag:bar", "tag:test", "tag:unknown", "tag:test"}
|
||||||
err = db.SetTags(node.ID, eTags)
|
err = db.SetTags(node.ID, eTags)
|
||||||
c.Assert(err, check.IsNil)
|
c.Assert(err, check.IsNil)
|
||||||
|
|||||||
@@ -83,7 +83,7 @@ func CreatePreAuthKey(
|
|||||||
if !seenTags[tag] {
|
if !seenTags[tag] {
|
||||||
if err := tx.Save(&types.PreAuthKeyACLTag{PreAuthKeyID: key.ID, Tag: tag}).Error; err != nil {
|
if err := tx.Save(&types.PreAuthKeyACLTag{PreAuthKeyID: key.ID, Tag: tag}).Error; err != nil {
|
||||||
return nil, fmt.Errorf(
|
return nil, fmt.Errorf(
|
||||||
"failed to ceate key tag in the database: %w",
|
"failed to create key tag in the database: %w",
|
||||||
err,
|
err,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -204,7 +204,7 @@ func DERPProbeHandler(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// DERPBootstrapDNSHandler implements the /bootsrap-dns endpoint
|
// DERPBootstrapDNSHandler implements the /bootstrap-dns endpoint
|
||||||
// Described in https://github.com/tailscale/tailscale/issues/1405,
|
// Described in https://github.com/tailscale/tailscale/issues/1405,
|
||||||
// this endpoint provides a way to help a client when it fails to start up
|
// this endpoint provides a way to help a client when it fails to start up
|
||||||
// because its DNS are broken.
|
// because its DNS are broken.
|
||||||
|
|||||||
@@ -7,8 +7,23 @@ import (
|
|||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||||
|
"tailscale.com/envknob"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var debugHighCardinalityMetrics = envknob.Bool("HEADSCALE_DEBUG_HIGH_CARDINALITY_METRICS")
|
||||||
|
|
||||||
|
var mapResponseLastSentSeconds *prometheus.GaugeVec
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
if debugHighCardinalityMetrics {
|
||||||
|
mapResponseLastSentSeconds = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Namespace: prometheusNamespace,
|
||||||
|
Name: "mapresponse_last_sent_seconds",
|
||||||
|
Help: "last sent metric to node.id",
|
||||||
|
}, []string{"type", "id"})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const prometheusNamespace = "headscale"
|
const prometheusNamespace = "headscale"
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -37,16 +52,16 @@ var (
|
|||||||
Name: "mapresponse_readonly_requests_total",
|
Name: "mapresponse_readonly_requests_total",
|
||||||
Help: "total count of readonly requests received",
|
Help: "total count of readonly requests received",
|
||||||
}, []string{"status"})
|
}, []string{"status"})
|
||||||
mapResponseSessions = promauto.NewGauge(prometheus.GaugeOpts{
|
mapResponseEnded = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||||
Namespace: prometheusNamespace,
|
Namespace: prometheusNamespace,
|
||||||
Name: "mapresponse_current_sessions_total",
|
Name: "mapresponse_ended_total",
|
||||||
Help: "total count open map response sessions",
|
Help: "total count of new mapsessions ended",
|
||||||
})
|
|
||||||
mapResponseRejected = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
||||||
Namespace: prometheusNamespace,
|
|
||||||
Name: "mapresponse_rejected_new_sessions_total",
|
|
||||||
Help: "total count of new mapsessions rejected",
|
|
||||||
}, []string{"reason"})
|
}, []string{"reason"})
|
||||||
|
mapResponseClosed = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||||
|
Namespace: prometheusNamespace,
|
||||||
|
Name: "mapresponse_closed_total",
|
||||||
|
Help: "total count of calls to mapresponse close",
|
||||||
|
}, []string{"return"})
|
||||||
httpDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
httpDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
||||||
Namespace: prometheusNamespace,
|
Namespace: prometheusNamespace,
|
||||||
Name: "http_duration_seconds",
|
Name: "http_duration_seconds",
|
||||||
|
|||||||
@@ -231,62 +231,12 @@ func (ns *noiseServer) NoisePollNetMapHandler(
|
|||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
sess := ns.headscale.newMapSession(req.Context(), mapRequest, writer, node)
|
sess := ns.headscale.newMapSession(req.Context(), mapRequest, writer, node)
|
||||||
|
|
||||||
sess.tracef("a node sending a MapRequest with Noise protocol")
|
sess.tracef("a node sending a MapRequest with Noise protocol")
|
||||||
|
if !sess.isStreaming() {
|
||||||
// If a streaming mapSession exists for this node, close it
|
sess.serve()
|
||||||
// and start a new one.
|
} else {
|
||||||
if sess.isStreaming() {
|
sess.serveLongPoll()
|
||||||
sess.tracef("aquiring lock to check stream")
|
|
||||||
|
|
||||||
ns.headscale.mapSessionMu.Lock()
|
|
||||||
if _, ok := ns.headscale.mapSessions[node.ID]; ok {
|
|
||||||
// NOTE/TODO(kradalby): From how I understand the protocol, when
|
|
||||||
// a client connects with stream=true, and already has a streaming
|
|
||||||
// connection open, the correct way is to close the current channel
|
|
||||||
// and replace it. However, I cannot manage to get that working with
|
|
||||||
// some sort of lock/block happening on the cancelCh in the streaming
|
|
||||||
// session.
|
|
||||||
// Not closing the channel and replacing it puts us in a weird state
|
|
||||||
// which keeps a ghost stream open, receiving keep alives, but no updates.
|
|
||||||
//
|
|
||||||
// Typically a new connection is opened when one exists as a client which
|
|
||||||
// is already authenticated reconnects (e.g. down, then up). The client will
|
|
||||||
// start auth and streaming at the same time, and then cancel the streaming
|
|
||||||
// when the auth has finished successfully, opening a new connection.
|
|
||||||
//
|
|
||||||
// As a work-around to not replacing, abusing the clients "resilience"
|
|
||||||
// by reject the new connection which will cause the client to immediately
|
|
||||||
// reconnect and "fix" the issue, as the other connection typically has been
|
|
||||||
// closed, meaning there is nothing to replace.
|
|
||||||
//
|
|
||||||
// sess.infof("node has an open stream(%p), replacing with %p", oldSession, sess)
|
|
||||||
// oldSession.close()
|
|
||||||
|
|
||||||
defer ns.headscale.mapSessionMu.Unlock()
|
|
||||||
|
|
||||||
sess.infof("node has an open stream(%p), rejecting new stream", sess)
|
|
||||||
mapResponseRejected.WithLabelValues("exists").Inc()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
ns.headscale.mapSessions[node.ID] = sess
|
|
||||||
mapResponseSessions.Inc()
|
|
||||||
ns.headscale.mapSessionMu.Unlock()
|
|
||||||
sess.tracef("releasing lock to check stream")
|
|
||||||
}
|
|
||||||
|
|
||||||
sess.serve()
|
|
||||||
|
|
||||||
if sess.isStreaming() {
|
|
||||||
sess.tracef("aquiring lock to remove stream")
|
|
||||||
ns.headscale.mapSessionMu.Lock()
|
|
||||||
defer ns.headscale.mapSessionMu.Unlock()
|
|
||||||
|
|
||||||
delete(ns.headscale.mapSessions, node.ID)
|
|
||||||
mapResponseSessions.Dec()
|
|
||||||
|
|
||||||
sess.tracef("releasing lock to remove stream")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,22 +3,43 @@ package notifier
|
|||||||
import (
|
import (
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||||
|
"tailscale.com/envknob"
|
||||||
)
|
)
|
||||||
|
|
||||||
const prometheusNamespace = "headscale"
|
const prometheusNamespace = "headscale"
|
||||||
|
|
||||||
|
var debugHighCardinalityMetrics = envknob.Bool("HEADSCALE_DEBUG_HIGH_CARDINALITY_METRICS")
|
||||||
|
|
||||||
|
var notifierUpdateSent *prometheus.CounterVec
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
if debugHighCardinalityMetrics {
|
||||||
|
notifierUpdateSent = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||||
|
Namespace: prometheusNamespace,
|
||||||
|
Name: "notifier_update_sent_total",
|
||||||
|
Help: "total count of update sent on nodes channel",
|
||||||
|
}, []string{"status", "type", "trigger", "id"})
|
||||||
|
} else {
|
||||||
|
notifierUpdateSent = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||||
|
Namespace: prometheusNamespace,
|
||||||
|
Name: "notifier_update_sent_total",
|
||||||
|
Help: "total count of update sent on nodes channel",
|
||||||
|
}, []string{"status", "type", "trigger"})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
notifierWaitersForLock = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Namespace: prometheusNamespace,
|
||||||
|
Name: "notifier_waiters_for_lock",
|
||||||
|
Help: "gauge of waiters for the notifier lock",
|
||||||
|
}, []string{"type", "action"})
|
||||||
notifierWaitForLock = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
notifierWaitForLock = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
||||||
Namespace: prometheusNamespace,
|
Namespace: prometheusNamespace,
|
||||||
Name: "notifier_wait_for_lock_seconds",
|
Name: "notifier_wait_for_lock_seconds",
|
||||||
Help: "histogram of time spent waiting for the notifier lock",
|
Help: "histogram of time spent waiting for the notifier lock",
|
||||||
Buckets: []float64{0.001, 0.01, 0.1, 0.3, 0.5, 1, 3, 5, 10},
|
Buckets: []float64{0.001, 0.01, 0.1, 0.3, 0.5, 1, 3, 5, 10},
|
||||||
}, []string{"action"})
|
}, []string{"action"})
|
||||||
notifierUpdateSent = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
||||||
Namespace: prometheusNamespace,
|
|
||||||
Name: "notifier_update_sent_total",
|
|
||||||
Help: "total count of update sent on nodes channel",
|
|
||||||
}, []string{"status", "type", "trigger"})
|
|
||||||
notifierUpdateReceived = promauto.NewCounterVec(prometheus.CounterOpts{
|
notifierUpdateReceived = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||||
Namespace: prometheusNamespace,
|
Namespace: prometheusNamespace,
|
||||||
Name: "notifier_update_received_total",
|
Name: "notifier_update_received_total",
|
||||||
@@ -29,4 +50,19 @@ var (
|
|||||||
Name: "notifier_open_channels_total",
|
Name: "notifier_open_channels_total",
|
||||||
Help: "total count open channels in notifier",
|
Help: "total count open channels in notifier",
|
||||||
})
|
})
|
||||||
|
notifierBatcherWaitersForLock = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Namespace: prometheusNamespace,
|
||||||
|
Name: "notifier_batcher_waiters_for_lock",
|
||||||
|
Help: "gauge of waiters for the notifier batcher lock",
|
||||||
|
}, []string{"type", "action"})
|
||||||
|
notifierBatcherChanges = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Namespace: prometheusNamespace,
|
||||||
|
Name: "notifier_batcher_changes_pending",
|
||||||
|
Help: "gauge of full changes pending in the notifier batcher",
|
||||||
|
}, []string{})
|
||||||
|
notifierBatcherPatches = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Namespace: prometheusNamespace,
|
||||||
|
Name: "notifier_batcher_patches_pending",
|
||||||
|
Help: "gauge of patches pending in the notifier batcher",
|
||||||
|
}, []string{})
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -11,25 +11,40 @@ import (
|
|||||||
"github.com/juanfont/headscale/hscontrol/types"
|
"github.com/juanfont/headscale/hscontrol/types"
|
||||||
"github.com/puzpuzpuz/xsync/v3"
|
"github.com/puzpuzpuz/xsync/v3"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
|
"github.com/sasha-s/go-deadlock"
|
||||||
|
"tailscale.com/envknob"
|
||||||
"tailscale.com/tailcfg"
|
"tailscale.com/tailcfg"
|
||||||
"tailscale.com/util/set"
|
"tailscale.com/util/set"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var debugDeadlock = envknob.Bool("HEADSCALE_DEBUG_DEADLOCK")
|
||||||
|
var debugDeadlockTimeout = envknob.RegisterDuration("HEADSCALE_DEBUG_DEADLOCK_TIMEOUT")
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
deadlock.Opts.Disable = !debugDeadlock
|
||||||
|
if debugDeadlock {
|
||||||
|
deadlock.Opts.DeadlockTimeout = debugDeadlockTimeout()
|
||||||
|
deadlock.Opts.PrintAllCurrentGoroutines = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
type Notifier struct {
|
type Notifier struct {
|
||||||
l sync.RWMutex
|
l deadlock.Mutex
|
||||||
nodes map[types.NodeID]chan<- types.StateUpdate
|
nodes map[types.NodeID]chan<- types.StateUpdate
|
||||||
connected *xsync.MapOf[types.NodeID, bool]
|
connected *xsync.MapOf[types.NodeID, bool]
|
||||||
b *batcher
|
b *batcher
|
||||||
|
cfg *types.Config
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewNotifier(cfg *types.Config) *Notifier {
|
func NewNotifier(cfg *types.Config) *Notifier {
|
||||||
n := &Notifier{
|
n := &Notifier{
|
||||||
nodes: make(map[types.NodeID]chan<- types.StateUpdate),
|
nodes: make(map[types.NodeID]chan<- types.StateUpdate),
|
||||||
connected: xsync.NewMapOf[types.NodeID, bool](),
|
connected: xsync.NewMapOf[types.NodeID, bool](),
|
||||||
|
cfg: cfg,
|
||||||
}
|
}
|
||||||
b := newBatcher(cfg.Tuning.BatchChangeDelay, n)
|
b := newBatcher(cfg.Tuning.BatchChangeDelay, n)
|
||||||
n.b = b
|
n.b = b
|
||||||
// TODO(kradalby): clean this up
|
|
||||||
go b.doWork()
|
go b.doWork()
|
||||||
return n
|
return n
|
||||||
}
|
}
|
||||||
@@ -39,59 +54,75 @@ func (n *Notifier) Close() {
|
|||||||
n.b.close()
|
n.b.close()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (n *Notifier) AddNode(nodeID types.NodeID, c chan<- types.StateUpdate) {
|
func (n *Notifier) tracef(nID types.NodeID, msg string, args ...any) {
|
||||||
log.Trace().Caller().Uint64("node.id", nodeID.Uint64()).Msg("acquiring lock to add node")
|
log.Trace().
|
||||||
defer log.Trace().
|
Uint64("node.id", nID.Uint64()).
|
||||||
Caller().
|
Int("open_chans", len(n.nodes)).Msgf(msg, args...)
|
||||||
Uint64("node.id", nodeID.Uint64()).
|
}
|
||||||
Msg("releasing lock to add node")
|
|
||||||
|
|
||||||
|
func (n *Notifier) AddNode(nodeID types.NodeID, c chan<- types.StateUpdate) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
notifierWaitersForLock.WithLabelValues("lock", "add").Inc()
|
||||||
n.l.Lock()
|
n.l.Lock()
|
||||||
defer n.l.Unlock()
|
defer n.l.Unlock()
|
||||||
|
notifierWaitersForLock.WithLabelValues("lock", "add").Dec()
|
||||||
notifierWaitForLock.WithLabelValues("add").Observe(time.Since(start).Seconds())
|
notifierWaitForLock.WithLabelValues("add").Observe(time.Since(start).Seconds())
|
||||||
|
|
||||||
|
// If a channel exists, it means the node has opened a new
|
||||||
|
// connection. Close the old channel and replace it.
|
||||||
|
if curr, ok := n.nodes[nodeID]; ok {
|
||||||
|
n.tracef(nodeID, "channel present, closing and replacing")
|
||||||
|
close(curr)
|
||||||
|
}
|
||||||
|
|
||||||
n.nodes[nodeID] = c
|
n.nodes[nodeID] = c
|
||||||
n.connected.Store(nodeID, true)
|
n.connected.Store(nodeID, true)
|
||||||
|
|
||||||
log.Trace().
|
n.tracef(nodeID, "added new channel")
|
||||||
Uint64("node.id", nodeID.Uint64()).
|
|
||||||
Int("open_chans", len(n.nodes)).
|
|
||||||
Msg("Added new channel")
|
|
||||||
notifierNodeUpdateChans.Inc()
|
notifierNodeUpdateChans.Inc()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (n *Notifier) RemoveNode(nodeID types.NodeID) {
|
// RemoveNode removes a node and a given channel from the notifier.
|
||||||
log.Trace().Caller().Uint64("node.id", nodeID.Uint64()).Msg("acquiring lock to remove node")
|
// It checks that the channel is the same as currently being updated
|
||||||
defer log.Trace().
|
// and ignores the removal if it is not.
|
||||||
Caller().
|
// RemoveNode reports if the node/chan was removed.
|
||||||
Uint64("node.id", nodeID.Uint64()).
|
func (n *Notifier) RemoveNode(nodeID types.NodeID, c chan<- types.StateUpdate) bool {
|
||||||
Msg("releasing lock to remove node")
|
|
||||||
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
notifierWaitersForLock.WithLabelValues("lock", "remove").Inc()
|
||||||
n.l.Lock()
|
n.l.Lock()
|
||||||
defer n.l.Unlock()
|
defer n.l.Unlock()
|
||||||
|
notifierWaitersForLock.WithLabelValues("lock", "remove").Dec()
|
||||||
notifierWaitForLock.WithLabelValues("remove").Observe(time.Since(start).Seconds())
|
notifierWaitForLock.WithLabelValues("remove").Observe(time.Since(start).Seconds())
|
||||||
|
|
||||||
if len(n.nodes) == 0 {
|
if len(n.nodes) == 0 {
|
||||||
return
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the channel exist, but it does not belong
|
||||||
|
// to the caller, ignore.
|
||||||
|
if curr, ok := n.nodes[nodeID]; ok {
|
||||||
|
if curr != c {
|
||||||
|
n.tracef(nodeID, "channel has been replaced, not removing")
|
||||||
|
return false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
delete(n.nodes, nodeID)
|
delete(n.nodes, nodeID)
|
||||||
n.connected.Store(nodeID, false)
|
n.connected.Store(nodeID, false)
|
||||||
|
|
||||||
log.Trace().
|
n.tracef(nodeID, "removed channel")
|
||||||
Uint64("node.id", nodeID.Uint64()).
|
|
||||||
Int("open_chans", len(n.nodes)).
|
|
||||||
Msg("Removed channel")
|
|
||||||
notifierNodeUpdateChans.Dec()
|
notifierNodeUpdateChans.Dec()
|
||||||
|
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsConnected reports if a node is connected to headscale and has a
|
// IsConnected reports if a node is connected to headscale and has a
|
||||||
// poll session open.
|
// poll session open.
|
||||||
func (n *Notifier) IsConnected(nodeID types.NodeID) bool {
|
func (n *Notifier) IsConnected(nodeID types.NodeID) bool {
|
||||||
n.l.RLock()
|
notifierWaitersForLock.WithLabelValues("lock", "conncheck").Inc()
|
||||||
defer n.l.RUnlock()
|
n.l.Lock()
|
||||||
|
defer n.l.Unlock()
|
||||||
|
notifierWaitersForLock.WithLabelValues("lock", "conncheck").Dec()
|
||||||
|
|
||||||
if val, ok := n.connected.Load(nodeID); ok {
|
if val, ok := n.connected.Load(nodeID); ok {
|
||||||
return val
|
return val
|
||||||
@@ -130,15 +161,11 @@ func (n *Notifier) NotifyByNodeID(
|
|||||||
update types.StateUpdate,
|
update types.StateUpdate,
|
||||||
nodeID types.NodeID,
|
nodeID types.NodeID,
|
||||||
) {
|
) {
|
||||||
log.Trace().Caller().Str("type", update.Type.String()).Msg("acquiring lock to notify")
|
|
||||||
defer log.Trace().
|
|
||||||
Caller().
|
|
||||||
Str("type", update.Type.String()).
|
|
||||||
Msg("releasing lock, finished notifying")
|
|
||||||
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
n.l.RLock()
|
notifierWaitersForLock.WithLabelValues("lock", "notify").Inc()
|
||||||
defer n.l.RUnlock()
|
n.l.Lock()
|
||||||
|
defer n.l.Unlock()
|
||||||
|
notifierWaitersForLock.WithLabelValues("lock", "notify").Dec()
|
||||||
notifierWaitForLock.WithLabelValues("notify").Observe(time.Since(start).Seconds())
|
notifierWaitForLock.WithLabelValues("notify").Observe(time.Since(start).Seconds())
|
||||||
|
|
||||||
if c, ok := n.nodes[nodeID]; ok {
|
if c, ok := n.nodes[nodeID]; ok {
|
||||||
@@ -150,50 +177,94 @@ func (n *Notifier) NotifyByNodeID(
|
|||||||
Any("origin", types.NotifyOriginKey.Value(ctx)).
|
Any("origin", types.NotifyOriginKey.Value(ctx)).
|
||||||
Any("origin-hostname", types.NotifyHostnameKey.Value(ctx)).
|
Any("origin-hostname", types.NotifyHostnameKey.Value(ctx)).
|
||||||
Msgf("update not sent, context cancelled")
|
Msgf("update not sent, context cancelled")
|
||||||
notifierUpdateSent.WithLabelValues("cancelled", update.Type.String(), types.NotifyOriginKey.Value(ctx)).Inc()
|
if debugHighCardinalityMetrics {
|
||||||
|
notifierUpdateSent.WithLabelValues("cancelled", update.Type.String(), types.NotifyOriginKey.Value(ctx), nodeID.String()).Inc()
|
||||||
|
} else {
|
||||||
|
notifierUpdateSent.WithLabelValues("cancelled", update.Type.String(), types.NotifyOriginKey.Value(ctx)).Inc()
|
||||||
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
case c <- update:
|
case c <- update:
|
||||||
log.Trace().
|
n.tracef(nodeID, "update successfully sent on chan, origin: %s, origin-hostname: %s", ctx.Value("origin"), ctx.Value("hostname"))
|
||||||
Uint64("node.id", nodeID.Uint64()).
|
if debugHighCardinalityMetrics {
|
||||||
Any("origin", ctx.Value("origin")).
|
notifierUpdateSent.WithLabelValues("ok", update.Type.String(), types.NotifyOriginKey.Value(ctx), nodeID.String()).Inc()
|
||||||
Any("origin-hostname", ctx.Value("hostname")).
|
} else {
|
||||||
Msgf("update successfully sent on chan")
|
notifierUpdateSent.WithLabelValues("ok", update.Type.String(), types.NotifyOriginKey.Value(ctx)).Inc()
|
||||||
notifierUpdateSent.WithLabelValues("ok", update.Type.String(), types.NotifyOriginKey.Value(ctx)).Inc()
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (n *Notifier) sendAll(update types.StateUpdate) {
|
func (n *Notifier) sendAll(update types.StateUpdate) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
n.l.RLock()
|
notifierWaitersForLock.WithLabelValues("lock", "send-all").Inc()
|
||||||
defer n.l.RUnlock()
|
n.l.Lock()
|
||||||
|
defer n.l.Unlock()
|
||||||
|
notifierWaitersForLock.WithLabelValues("lock", "send-all").Dec()
|
||||||
notifierWaitForLock.WithLabelValues("send-all").Observe(time.Since(start).Seconds())
|
notifierWaitForLock.WithLabelValues("send-all").Observe(time.Since(start).Seconds())
|
||||||
|
|
||||||
for _, c := range n.nodes {
|
for id, c := range n.nodes {
|
||||||
c <- update
|
// Whenever an update is sent to all nodes, there is a chance that the node
|
||||||
notifierUpdateSent.WithLabelValues("ok", update.Type.String(), "send-all").Inc()
|
// has disconnected and the goroutine that was supposed to consume the update
|
||||||
|
// has shut down the channel and is waiting for the lock held here in RemoveNode.
|
||||||
|
// This means that there is potential for a deadlock which would stop all updates
|
||||||
|
// going out to clients. This timeout prevents that from happening by moving on to the
|
||||||
|
// next node if the context is cancelled. Afther sendAll releases the lock, the add/remove
|
||||||
|
// call will succeed and the update will go to the correct nodes on the next call.
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), n.cfg.Tuning.NotifierSendTimeout)
|
||||||
|
defer cancel()
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
log.Error().
|
||||||
|
Err(ctx.Err()).
|
||||||
|
Uint64("node.id", id.Uint64()).
|
||||||
|
Msgf("update not sent, context cancelled")
|
||||||
|
if debugHighCardinalityMetrics {
|
||||||
|
notifierUpdateSent.WithLabelValues("cancelled", update.Type.String(), "send-all", id.String()).Inc()
|
||||||
|
} else {
|
||||||
|
notifierUpdateSent.WithLabelValues("cancelled", update.Type.String(), "send-all").Inc()
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
case c <- update:
|
||||||
|
if debugHighCardinalityMetrics {
|
||||||
|
notifierUpdateSent.WithLabelValues("ok", update.Type.String(), "send-all", id.String()).Inc()
|
||||||
|
} else {
|
||||||
|
notifierUpdateSent.WithLabelValues("ok", update.Type.String(), "send-all").Inc()
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (n *Notifier) String() string {
|
func (n *Notifier) String() string {
|
||||||
n.l.RLock()
|
notifierWaitersForLock.WithLabelValues("lock", "string").Inc()
|
||||||
defer n.l.RUnlock()
|
n.l.Lock()
|
||||||
|
defer n.l.Unlock()
|
||||||
|
notifierWaitersForLock.WithLabelValues("lock", "string").Dec()
|
||||||
|
|
||||||
var b strings.Builder
|
var b strings.Builder
|
||||||
b.WriteString("chans:\n")
|
fmt.Fprintf(&b, "chans (%d):\n", len(n.nodes))
|
||||||
|
|
||||||
for k, v := range n.nodes {
|
var keys []types.NodeID
|
||||||
fmt.Fprintf(&b, "\t%d: %p\n", k, v)
|
n.connected.Range(func(key types.NodeID, value bool) bool {
|
||||||
|
keys = append(keys, key)
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
sort.Slice(keys, func(i, j int) bool {
|
||||||
|
return keys[i] < keys[j]
|
||||||
|
})
|
||||||
|
|
||||||
|
for _, key := range keys {
|
||||||
|
fmt.Fprintf(&b, "\t%d: %p\n", key, n.nodes[key])
|
||||||
}
|
}
|
||||||
|
|
||||||
b.WriteString("\n")
|
b.WriteString("\n")
|
||||||
b.WriteString("connected:\n")
|
fmt.Fprintf(&b, "connected (%d):\n", len(n.nodes))
|
||||||
|
|
||||||
n.connected.Range(func(k types.NodeID, v bool) bool {
|
for _, key := range keys {
|
||||||
fmt.Fprintf(&b, "\t%d: %t\n", k, v)
|
val, _ := n.connected.Load(key)
|
||||||
return true
|
fmt.Fprintf(&b, "\t%d: %t\n", key, val)
|
||||||
})
|
}
|
||||||
|
|
||||||
return b.String()
|
return b.String()
|
||||||
}
|
}
|
||||||
@@ -230,13 +301,16 @@ func (b *batcher) close() {
|
|||||||
// addOrPassthrough adds the update to the batcher, if it is not a
|
// addOrPassthrough adds the update to the batcher, if it is not a
|
||||||
// type that is currently batched, it will be sent immediately.
|
// type that is currently batched, it will be sent immediately.
|
||||||
func (b *batcher) addOrPassthrough(update types.StateUpdate) {
|
func (b *batcher) addOrPassthrough(update types.StateUpdate) {
|
||||||
|
notifierBatcherWaitersForLock.WithLabelValues("lock", "add").Inc()
|
||||||
b.mu.Lock()
|
b.mu.Lock()
|
||||||
defer b.mu.Unlock()
|
defer b.mu.Unlock()
|
||||||
|
notifierBatcherWaitersForLock.WithLabelValues("lock", "add").Dec()
|
||||||
|
|
||||||
switch update.Type {
|
switch update.Type {
|
||||||
case types.StatePeerChanged:
|
case types.StatePeerChanged:
|
||||||
b.changedNodeIDs.Add(update.ChangeNodes...)
|
b.changedNodeIDs.Add(update.ChangeNodes...)
|
||||||
b.nodesChanged = true
|
b.nodesChanged = true
|
||||||
|
notifierBatcherChanges.WithLabelValues().Set(float64(b.changedNodeIDs.Len()))
|
||||||
|
|
||||||
case types.StatePeerChangedPatch:
|
case types.StatePeerChangedPatch:
|
||||||
for _, newPatch := range update.ChangePatches {
|
for _, newPatch := range update.ChangePatches {
|
||||||
@@ -248,6 +322,7 @@ func (b *batcher) addOrPassthrough(update types.StateUpdate) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
b.patchesChanged = true
|
b.patchesChanged = true
|
||||||
|
notifierBatcherPatches.WithLabelValues().Set(float64(len(b.patches)))
|
||||||
|
|
||||||
default:
|
default:
|
||||||
b.n.sendAll(update)
|
b.n.sendAll(update)
|
||||||
@@ -257,8 +332,10 @@ func (b *batcher) addOrPassthrough(update types.StateUpdate) {
|
|||||||
// flush sends all the accumulated patches to all
|
// flush sends all the accumulated patches to all
|
||||||
// nodes in the notifier.
|
// nodes in the notifier.
|
||||||
func (b *batcher) flush() {
|
func (b *batcher) flush() {
|
||||||
|
notifierBatcherWaitersForLock.WithLabelValues("lock", "flush").Inc()
|
||||||
b.mu.Lock()
|
b.mu.Lock()
|
||||||
defer b.mu.Unlock()
|
defer b.mu.Unlock()
|
||||||
|
notifierBatcherWaitersForLock.WithLabelValues("lock", "flush").Dec()
|
||||||
|
|
||||||
if b.nodesChanged || b.patchesChanged {
|
if b.nodesChanged || b.patchesChanged {
|
||||||
var patches []*tailcfg.PeerChange
|
var patches []*tailcfg.PeerChange
|
||||||
@@ -296,8 +373,10 @@ func (b *batcher) flush() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
b.changedNodeIDs = set.Slice[types.NodeID]{}
|
b.changedNodeIDs = set.Slice[types.NodeID]{}
|
||||||
|
notifierBatcherChanges.WithLabelValues().Set(0)
|
||||||
b.nodesChanged = false
|
b.nodesChanged = false
|
||||||
b.patches = make(map[types.NodeID]tailcfg.PeerChange, len(b.patches))
|
b.patches = make(map[types.NodeID]tailcfg.PeerChange, len(b.patches))
|
||||||
|
notifierBatcherPatches.WithLabelValues().Set(0)
|
||||||
b.patchesChanged = false
|
b.patchesChanged = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -227,7 +227,7 @@ func TestBatcher(t *testing.T) {
|
|||||||
ch := make(chan types.StateUpdate, 30)
|
ch := make(chan types.StateUpdate, 30)
|
||||||
defer close(ch)
|
defer close(ch)
|
||||||
n.AddNode(1, ch)
|
n.AddNode(1, ch)
|
||||||
defer n.RemoveNode(1)
|
defer n.RemoveNode(1, ch)
|
||||||
|
|
||||||
for _, u := range tt.updates {
|
for _, u := range tt.updates {
|
||||||
n.NotifyAll(context.Background(), u)
|
n.NotifyAll(context.Background(), u)
|
||||||
|
|||||||
@@ -532,7 +532,7 @@ func (s *Suite) TestRuleInvalidGeneration(c *check.C) {
|
|||||||
"example-host-2:80"
|
"example-host-2:80"
|
||||||
],
|
],
|
||||||
"deny": [
|
"deny": [
|
||||||
"exapmle-host-2:100"
|
"example-host-2:100"
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -635,7 +635,7 @@ func Test_expandGroup(t *testing.T) {
|
|||||||
wantErr: false,
|
wantErr: false,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "InexistantGroup",
|
name: "InexistentGroup",
|
||||||
field: field{
|
field: field{
|
||||||
pol: ACLPolicy{
|
pol: ACLPolicy{
|
||||||
Groups: Groups{
|
Groups: Groups{
|
||||||
@@ -2604,7 +2604,7 @@ func Test_getFilteredByACLPeers(t *testing.T) {
|
|||||||
{
|
{
|
||||||
name: "all hosts can talk to each other",
|
name: "all hosts can talk to each other",
|
||||||
args: args{
|
args: args{
|
||||||
nodes: types.Nodes{ // list of all nodess in the database
|
nodes: types.Nodes{ // list of all nodes in the database
|
||||||
&types.Node{
|
&types.Node{
|
||||||
ID: 1,
|
ID: 1,
|
||||||
IPv4: iap("100.64.0.1"),
|
IPv4: iap("100.64.0.1"),
|
||||||
@@ -2651,7 +2651,7 @@ func Test_getFilteredByACLPeers(t *testing.T) {
|
|||||||
{
|
{
|
||||||
name: "One host can talk to another, but not all hosts",
|
name: "One host can talk to another, but not all hosts",
|
||||||
args: args{
|
args: args{
|
||||||
nodes: types.Nodes{ // list of all nodess in the database
|
nodes: types.Nodes{ // list of all nodes in the database
|
||||||
&types.Node{
|
&types.Node{
|
||||||
ID: 1,
|
ID: 1,
|
||||||
IPv4: iap("100.64.0.1"),
|
IPv4: iap("100.64.0.1"),
|
||||||
@@ -2693,7 +2693,7 @@ func Test_getFilteredByACLPeers(t *testing.T) {
|
|||||||
{
|
{
|
||||||
name: "host cannot directly talk to destination, but return path is authorized",
|
name: "host cannot directly talk to destination, but return path is authorized",
|
||||||
args: args{
|
args: args{
|
||||||
nodes: types.Nodes{ // list of all nodess in the database
|
nodes: types.Nodes{ // list of all nodes in the database
|
||||||
&types.Node{
|
&types.Node{
|
||||||
ID: 1,
|
ID: 1,
|
||||||
IPv4: iap("100.64.0.1"),
|
IPv4: iap("100.64.0.1"),
|
||||||
@@ -2735,7 +2735,7 @@ func Test_getFilteredByACLPeers(t *testing.T) {
|
|||||||
{
|
{
|
||||||
name: "rules allows all hosts to reach one destination",
|
name: "rules allows all hosts to reach one destination",
|
||||||
args: args{
|
args: args{
|
||||||
nodes: types.Nodes{ // list of all nodess in the database
|
nodes: types.Nodes{ // list of all nodes in the database
|
||||||
&types.Node{
|
&types.Node{
|
||||||
ID: 1,
|
ID: 1,
|
||||||
IPv4: iap("100.64.0.1"),
|
IPv4: iap("100.64.0.1"),
|
||||||
@@ -2777,7 +2777,7 @@ func Test_getFilteredByACLPeers(t *testing.T) {
|
|||||||
{
|
{
|
||||||
name: "rules allows all hosts to reach one destination, destination can reach all hosts",
|
name: "rules allows all hosts to reach one destination, destination can reach all hosts",
|
||||||
args: args{
|
args: args{
|
||||||
nodes: types.Nodes{ // list of all nodess in the database
|
nodes: types.Nodes{ // list of all nodes in the database
|
||||||
&types.Node{
|
&types.Node{
|
||||||
ID: 1,
|
ID: 1,
|
||||||
IPv4: iap("100.64.0.1"),
|
IPv4: iap("100.64.0.1"),
|
||||||
@@ -2824,7 +2824,7 @@ func Test_getFilteredByACLPeers(t *testing.T) {
|
|||||||
{
|
{
|
||||||
name: "rule allows all hosts to reach all destinations",
|
name: "rule allows all hosts to reach all destinations",
|
||||||
args: args{
|
args: args{
|
||||||
nodes: types.Nodes{ // list of all nodess in the database
|
nodes: types.Nodes{ // list of all nodes in the database
|
||||||
&types.Node{
|
&types.Node{
|
||||||
ID: 1,
|
ID: 1,
|
||||||
IPv4: iap("100.64.0.1"),
|
IPv4: iap("100.64.0.1"),
|
||||||
@@ -2871,7 +2871,7 @@ func Test_getFilteredByACLPeers(t *testing.T) {
|
|||||||
{
|
{
|
||||||
name: "without rule all communications are forbidden",
|
name: "without rule all communications are forbidden",
|
||||||
args: args{
|
args: args{
|
||||||
nodes: types.Nodes{ // list of all nodess in the database
|
nodes: types.Nodes{ // list of all nodes in the database
|
||||||
&types.Node{
|
&types.Node{
|
||||||
ID: 1,
|
ID: 1,
|
||||||
IPv4: iap("100.64.0.1"),
|
IPv4: iap("100.64.0.1"),
|
||||||
|
|||||||
@@ -9,13 +9,13 @@ import (
|
|||||||
"net/netip"
|
"net/netip"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/juanfont/headscale/hscontrol/db"
|
"github.com/juanfont/headscale/hscontrol/db"
|
||||||
"github.com/juanfont/headscale/hscontrol/mapper"
|
"github.com/juanfont/headscale/hscontrol/mapper"
|
||||||
"github.com/juanfont/headscale/hscontrol/types"
|
"github.com/juanfont/headscale/hscontrol/types"
|
||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
|
"github.com/sasha-s/go-deadlock"
|
||||||
xslices "golang.org/x/exp/slices"
|
xslices "golang.org/x/exp/slices"
|
||||||
"gorm.io/gorm"
|
"gorm.io/gorm"
|
||||||
"tailscale.com/tailcfg"
|
"tailscale.com/tailcfg"
|
||||||
@@ -29,11 +29,6 @@ type contextKey string
|
|||||||
|
|
||||||
const nodeNameContextKey = contextKey("nodeName")
|
const nodeNameContextKey = contextKey("nodeName")
|
||||||
|
|
||||||
type sessionManager struct {
|
|
||||||
mu sync.RWMutex
|
|
||||||
sess map[types.NodeID]*mapSession
|
|
||||||
}
|
|
||||||
|
|
||||||
type mapSession struct {
|
type mapSession struct {
|
||||||
h *Headscale
|
h *Headscale
|
||||||
req tailcfg.MapRequest
|
req tailcfg.MapRequest
|
||||||
@@ -41,12 +36,13 @@ type mapSession struct {
|
|||||||
capVer tailcfg.CapabilityVersion
|
capVer tailcfg.CapabilityVersion
|
||||||
mapper *mapper.Mapper
|
mapper *mapper.Mapper
|
||||||
|
|
||||||
serving bool
|
cancelChMu deadlock.Mutex
|
||||||
servingMu sync.Mutex
|
|
||||||
|
|
||||||
ch chan types.StateUpdate
|
ch chan types.StateUpdate
|
||||||
cancelCh chan struct{}
|
cancelCh chan struct{}
|
||||||
|
cancelChOpen bool
|
||||||
|
|
||||||
|
keepAlive time.Duration
|
||||||
keepAliveTicker *time.Ticker
|
keepAliveTicker *time.Ticker
|
||||||
|
|
||||||
node *types.Node
|
node *types.Node
|
||||||
@@ -77,6 +73,8 @@ func (h *Headscale) newMapSession(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ka := keepAliveInterval + (time.Duration(rand.IntN(9000)) * time.Millisecond)
|
||||||
|
|
||||||
return &mapSession{
|
return &mapSession{
|
||||||
h: h,
|
h: h,
|
||||||
ctx: ctx,
|
ctx: ctx,
|
||||||
@@ -86,13 +84,12 @@ func (h *Headscale) newMapSession(
|
|||||||
capVer: req.Version,
|
capVer: req.Version,
|
||||||
mapper: h.mapper,
|
mapper: h.mapper,
|
||||||
|
|
||||||
// serving indicates if a client is being served.
|
ch: updateChan,
|
||||||
serving: false,
|
cancelCh: make(chan struct{}),
|
||||||
|
cancelChOpen: true,
|
||||||
|
|
||||||
ch: updateChan,
|
keepAlive: ka,
|
||||||
cancelCh: make(chan struct{}),
|
keepAliveTicker: nil,
|
||||||
|
|
||||||
keepAliveTicker: time.NewTicker(keepAliveInterval + (time.Duration(rand.IntN(9000)) * time.Millisecond)),
|
|
||||||
|
|
||||||
// Loggers
|
// Loggers
|
||||||
warnf: warnf,
|
warnf: warnf,
|
||||||
@@ -103,15 +100,23 @@ func (h *Headscale) newMapSession(
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *mapSession) close() {
|
func (m *mapSession) close() {
|
||||||
m.servingMu.Lock()
|
m.cancelChMu.Lock()
|
||||||
defer m.servingMu.Unlock()
|
defer m.cancelChMu.Unlock()
|
||||||
if !m.serving {
|
|
||||||
|
if !m.cancelChOpen {
|
||||||
|
mapResponseClosed.WithLabelValues("chanclosed").Inc()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
m.tracef("mapSession (%p) sending message on cancel chan")
|
m.tracef("mapSession (%p) sending message on cancel chan", m)
|
||||||
m.cancelCh <- struct{}{}
|
select {
|
||||||
m.tracef("mapSession (%p) sent message on cancel chan")
|
case m.cancelCh <- struct{}{}:
|
||||||
|
mapResponseClosed.WithLabelValues("sent").Inc()
|
||||||
|
m.tracef("mapSession (%p) sent message on cancel chan", m)
|
||||||
|
case <-time.After(30 * time.Second):
|
||||||
|
mapResponseClosed.WithLabelValues("timeout").Inc()
|
||||||
|
m.tracef("mapSession (%p) timed out sending close message", m)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *mapSession) isStreaming() bool {
|
func (m *mapSession) isStreaming() bool {
|
||||||
@@ -126,40 +131,12 @@ func (m *mapSession) isReadOnlyUpdate() bool {
|
|||||||
return !m.req.Stream && m.req.OmitPeers && m.req.ReadOnly
|
return !m.req.Stream && m.req.OmitPeers && m.req.ReadOnly
|
||||||
}
|
}
|
||||||
|
|
||||||
// handlePoll ensures the node gets the appropriate updates from either
|
func (m *mapSession) resetKeepAlive() {
|
||||||
// polling or immediate responses.
|
m.keepAliveTicker.Reset(m.keepAlive)
|
||||||
//
|
}
|
||||||
//nolint:gocyclo
|
|
||||||
|
// serve handles non-streaming requests.
|
||||||
func (m *mapSession) serve() {
|
func (m *mapSession) serve() {
|
||||||
// Register with the notifier if this is a streaming
|
|
||||||
// session
|
|
||||||
if m.isStreaming() {
|
|
||||||
// defers are called in reverse order,
|
|
||||||
// so top one is executed last.
|
|
||||||
|
|
||||||
// Failover the node's routes if any.
|
|
||||||
defer m.infof("node has disconnected, mapSession: %p", m)
|
|
||||||
defer m.pollFailoverRoutes("node closing connection", m.node)
|
|
||||||
|
|
||||||
defer m.h.updateNodeOnlineStatus(false, m.node)
|
|
||||||
defer m.h.nodeNotifier.RemoveNode(m.node.ID)
|
|
||||||
|
|
||||||
defer func() {
|
|
||||||
m.servingMu.Lock()
|
|
||||||
defer m.servingMu.Unlock()
|
|
||||||
|
|
||||||
m.serving = false
|
|
||||||
close(m.cancelCh)
|
|
||||||
}()
|
|
||||||
|
|
||||||
m.serving = true
|
|
||||||
|
|
||||||
m.h.nodeNotifier.AddNode(m.node.ID, m.ch)
|
|
||||||
m.h.updateNodeOnlineStatus(true, m.node)
|
|
||||||
|
|
||||||
m.infof("node has connected, mapSession: %p", m)
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(kradalby): A set todos to harden:
|
// TODO(kradalby): A set todos to harden:
|
||||||
// - func to tell the stream to die, readonly -> false, !stream && omitpeers -> false, true
|
// - func to tell the stream to die, readonly -> false, !stream && omitpeers -> false, true
|
||||||
|
|
||||||
@@ -196,13 +173,43 @@ func (m *mapSession) serve() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// serveLongPoll ensures the node gets the appropriate updates from either
|
||||||
|
// polling or immediate responses.
|
||||||
|
//
|
||||||
|
//nolint:gocyclo
|
||||||
|
func (m *mapSession) serveLongPoll() {
|
||||||
|
// Clean up the session when the client disconnects
|
||||||
|
defer func() {
|
||||||
|
m.cancelChMu.Lock()
|
||||||
|
m.cancelChOpen = false
|
||||||
|
close(m.cancelCh)
|
||||||
|
m.cancelChMu.Unlock()
|
||||||
|
|
||||||
|
// only update node status if the node channel was removed.
|
||||||
|
// in principal, it will be removed, but the client rapidly
|
||||||
|
// reconnects, the channel might be of another connection.
|
||||||
|
// In that case, it is not closed and the node is still online.
|
||||||
|
if m.h.nodeNotifier.RemoveNode(m.node.ID, m.ch) {
|
||||||
|
// Failover the node's routes if any.
|
||||||
|
m.h.updateNodeOnlineStatus(false, m.node)
|
||||||
|
m.pollFailoverRoutes("node closing connection", m.node)
|
||||||
|
}
|
||||||
|
|
||||||
|
m.infof("node has disconnected, mapSession: %p, chan: %p", m, m.ch)
|
||||||
|
}()
|
||||||
|
|
||||||
// From version 68, all streaming requests can be treated as read only.
|
// From version 68, all streaming requests can be treated as read only.
|
||||||
|
// TODO: Remove when we drop support for 1.48
|
||||||
if m.capVer < 68 {
|
if m.capVer < 68 {
|
||||||
// Error has been handled/written to client in the func
|
// Error has been handled/written to client in the func
|
||||||
// return
|
// return
|
||||||
err := m.handleSaveNode()
|
err := m.handleSaveNode()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
mapResponseWriteUpdatesInStream.WithLabelValues("error").Inc()
|
mapResponseWriteUpdatesInStream.WithLabelValues("error").Inc()
|
||||||
|
|
||||||
|
m.close()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
mapResponseWriteUpdatesInStream.WithLabelValues("ok").Inc()
|
mapResponseWriteUpdatesInStream.WithLabelValues("ok").Inc()
|
||||||
@@ -224,6 +231,13 @@ func (m *mapSession) serve() {
|
|||||||
ctx, cancel := context.WithCancel(context.WithValue(m.ctx, nodeNameContextKey, m.node.Hostname))
|
ctx, cancel := context.WithCancel(context.WithValue(m.ctx, nodeNameContextKey, m.node.Hostname))
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
|
m.keepAliveTicker = time.NewTicker(m.keepAlive)
|
||||||
|
|
||||||
|
m.h.nodeNotifier.AddNode(m.node.ID, m.ch)
|
||||||
|
go m.h.updateNodeOnlineStatus(true, m.node)
|
||||||
|
|
||||||
|
m.infof("node has connected, mapSession: %p, chan: %p", m, m.ch)
|
||||||
|
|
||||||
// Loop through updates and continuously send them to the
|
// Loop through updates and continuously send them to the
|
||||||
// client.
|
// client.
|
||||||
for {
|
for {
|
||||||
@@ -231,13 +245,21 @@ func (m *mapSession) serve() {
|
|||||||
select {
|
select {
|
||||||
case <-m.cancelCh:
|
case <-m.cancelCh:
|
||||||
m.tracef("poll cancelled received")
|
m.tracef("poll cancelled received")
|
||||||
return
|
mapResponseEnded.WithLabelValues("cancelled").Inc()
|
||||||
case <-ctx.Done():
|
|
||||||
m.tracef("poll context done")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
// Consume all updates sent to node
|
case <-ctx.Done():
|
||||||
case update := <-m.ch:
|
m.tracef("poll context done")
|
||||||
|
mapResponseEnded.WithLabelValues("done").Inc()
|
||||||
|
return
|
||||||
|
|
||||||
|
// Consume updates sent to node
|
||||||
|
case update, ok := <-m.ch:
|
||||||
|
if !ok {
|
||||||
|
m.tracef("update channel closed, streaming session is likely being replaced")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
m.tracef("received stream update: %s %s", update.Type.String(), update.Message)
|
m.tracef("received stream update: %s %s", update.Type.String(), update.Message)
|
||||||
mapResponseUpdateReceived.WithLabelValues(update.Type.String()).Inc()
|
mapResponseUpdateReceived.WithLabelValues(update.Type.String()).Inc()
|
||||||
|
|
||||||
@@ -303,15 +325,13 @@ func (m *mapSession) serve() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// log.Trace().Str("node", m.node.Hostname).TimeDiff("timeSpent", time.Now(), startMapResp).Str("mkey", m.node.MachineKey.String()).Int("type", int(update.Type)).Msg("finished making map response")
|
|
||||||
|
|
||||||
// Only send update if there is change
|
// Only send update if there is change
|
||||||
if data != nil {
|
if data != nil {
|
||||||
startWrite := time.Now()
|
startWrite := time.Now()
|
||||||
_, err = m.w.Write(data)
|
_, err = m.w.Write(data)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
mapResponseSent.WithLabelValues("error", updateType).Inc()
|
mapResponseSent.WithLabelValues("error", updateType).Inc()
|
||||||
m.errf(err, "Could not write the map response, for mapSession: %p", m)
|
m.errf(err, "could not write the map response(%s), for mapSession: %p", update.Type.String(), m)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -324,8 +344,12 @@ func (m *mapSession) serve() {
|
|||||||
|
|
||||||
log.Trace().Str("node", m.node.Hostname).TimeDiff("timeSpent", time.Now(), startWrite).Str("mkey", m.node.MachineKey.String()).Msg("finished writing mapresp to node")
|
log.Trace().Str("node", m.node.Hostname).TimeDiff("timeSpent", time.Now(), startWrite).Str("mkey", m.node.MachineKey.String()).Msg("finished writing mapresp to node")
|
||||||
|
|
||||||
|
if debugHighCardinalityMetrics {
|
||||||
|
mapResponseLastSentSeconds.WithLabelValues(updateType, m.node.ID.String()).Set(float64(time.Now().Unix()))
|
||||||
|
}
|
||||||
mapResponseSent.WithLabelValues("ok", updateType).Inc()
|
mapResponseSent.WithLabelValues("ok", updateType).Inc()
|
||||||
m.tracef("update sent")
|
m.tracef("update sent")
|
||||||
|
m.resetKeepAlive()
|
||||||
}
|
}
|
||||||
|
|
||||||
case <-m.keepAliveTicker.C:
|
case <-m.keepAliveTicker.C:
|
||||||
@@ -348,6 +372,9 @@ func (m *mapSession) serve() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if debugHighCardinalityMetrics {
|
||||||
|
mapResponseLastSentSeconds.WithLabelValues("keepalive", m.node.ID.String()).Set(float64(time.Now().Unix()))
|
||||||
|
}
|
||||||
mapResponseSent.WithLabelValues("ok", "keepalive").Inc()
|
mapResponseSent.WithLabelValues("ok", "keepalive").Inc()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -404,16 +431,6 @@ func (h *Headscale) updateNodeOnlineStatus(online bool, node *types.Node) {
|
|||||||
}, node.ID)
|
}, node.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
func closeChanWithLog[C chan []byte | chan struct{} | chan types.StateUpdate](channel C, node, name string) {
|
|
||||||
log.Trace().
|
|
||||||
Str("handler", "PollNetMap").
|
|
||||||
Str("node", node).
|
|
||||||
Str("channel", "Done").
|
|
||||||
Msg(fmt.Sprintf("Closing %s channel", name))
|
|
||||||
|
|
||||||
close(channel)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *mapSession) handleEndpointUpdate() {
|
func (m *mapSession) handleEndpointUpdate() {
|
||||||
m.tracef("received endpoint update")
|
m.tracef("received endpoint update")
|
||||||
|
|
||||||
@@ -425,6 +442,17 @@ func (m *mapSession) handleEndpointUpdate() {
|
|||||||
m.node.ApplyPeerChange(&change)
|
m.node.ApplyPeerChange(&change)
|
||||||
|
|
||||||
sendUpdate, routesChanged := hostInfoChanged(m.node.Hostinfo, m.req.Hostinfo)
|
sendUpdate, routesChanged := hostInfoChanged(m.node.Hostinfo, m.req.Hostinfo)
|
||||||
|
|
||||||
|
// The node might not set NetInfo if it has not changed and if
|
||||||
|
// the full HostInfo object is overrwritten, the information is lost.
|
||||||
|
// If there is no NetInfo, keep the previous one.
|
||||||
|
// From 1.66 the client only sends it if changed:
|
||||||
|
// https://github.com/tailscale/tailscale/commit/e1011f138737286ecf5123ff887a7a5800d129a2
|
||||||
|
// TODO(kradalby): evaulate if we need better comparing of hostinfo
|
||||||
|
// before we take the changes.
|
||||||
|
if m.req.Hostinfo.NetInfo == nil {
|
||||||
|
m.req.Hostinfo.NetInfo = m.node.Hostinfo.NetInfo
|
||||||
|
}
|
||||||
m.node.Hostinfo = m.req.Hostinfo
|
m.node.Hostinfo = m.req.Hostinfo
|
||||||
|
|
||||||
logTracePeerChange(m.node.Hostname, sendUpdate, &change)
|
logTracePeerChange(m.node.Hostname, sendUpdate, &change)
|
||||||
|
|||||||
@@ -171,6 +171,7 @@ type LogConfig struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Tuning struct {
|
type Tuning struct {
|
||||||
|
NotifierSendTimeout time.Duration
|
||||||
BatchChangeDelay time.Duration
|
BatchChangeDelay time.Duration
|
||||||
NodeMapSessionBufferedChanSize int
|
NodeMapSessionBufferedChanSize int
|
||||||
}
|
}
|
||||||
@@ -232,6 +233,7 @@ func LoadConfig(path string, isFile bool) error {
|
|||||||
|
|
||||||
viper.SetDefault("ephemeral_node_inactivity_timeout", "120s")
|
viper.SetDefault("ephemeral_node_inactivity_timeout", "120s")
|
||||||
|
|
||||||
|
viper.SetDefault("tuning.notifier_send_timeout", "800ms")
|
||||||
viper.SetDefault("tuning.batch_change_delay", "800ms")
|
viper.SetDefault("tuning.batch_change_delay", "800ms")
|
||||||
viper.SetDefault("tuning.node_mapsession_buffered_chan_size", 30)
|
viper.SetDefault("tuning.node_mapsession_buffered_chan_size", 30)
|
||||||
|
|
||||||
@@ -640,7 +642,7 @@ func GetHeadscaleConfig() (*Config, error) {
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
logConfig := GetLogConfig()
|
logConfig := GetLogConfig()
|
||||||
zerolog.SetGlobalLevel(logConfig.Level)
|
zerolog.SetGlobalLevel(logConfig.Level)
|
||||||
|
|
||||||
prefix4, err := PrefixV4()
|
prefix4, err := PrefixV4()
|
||||||
@@ -768,6 +770,7 @@ func GetHeadscaleConfig() (*Config, error) {
|
|||||||
|
|
||||||
// TODO(kradalby): Document these settings when more stable
|
// TODO(kradalby): Document these settings when more stable
|
||||||
Tuning: Tuning{
|
Tuning: Tuning{
|
||||||
|
NotifierSendTimeout: viper.GetDuration("tuning.notifier_send_timeout"),
|
||||||
BatchChangeDelay: viper.GetDuration("tuning.batch_change_delay"),
|
BatchChangeDelay: viper.GetDuration("tuning.batch_change_delay"),
|
||||||
NodeMapSessionBufferedChanSize: viper.GetInt("tuning.node_mapsession_buffered_chan_size"),
|
NodeMapSessionBufferedChanSize: viper.GetInt("tuning.node_mapsession_buffered_chan_size"),
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -43,6 +43,10 @@ func (id NodeID) Uint64() uint64 {
|
|||||||
return uint64(id)
|
return uint64(id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (id NodeID) String() string {
|
||||||
|
return strconv.FormatUint(id.Uint64(), util.Base10)
|
||||||
|
}
|
||||||
|
|
||||||
// Node is a Headscale client.
|
// Node is a Headscale client.
|
||||||
type Node struct {
|
type Node struct {
|
||||||
ID NodeID `gorm:"primary_key"`
|
ID NodeID `gorm:"primary_key"`
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package integration
|
package integration
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/netip"
|
"net/netip"
|
||||||
@@ -15,6 +16,7 @@ import (
|
|||||||
"github.com/rs/zerolog/log"
|
"github.com/rs/zerolog/log"
|
||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
|
"golang.org/x/sync/errgroup"
|
||||||
"tailscale.com/client/tailscale/apitype"
|
"tailscale.com/client/tailscale/apitype"
|
||||||
"tailscale.com/types/key"
|
"tailscale.com/types/key"
|
||||||
)
|
)
|
||||||
@@ -335,14 +337,14 @@ func TestTaildrop(t *testing.T) {
|
|||||||
IntegrationSkip(t)
|
IntegrationSkip(t)
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
retry := func(times int, sleepInverval time.Duration, doWork func() error) error {
|
retry := func(times int, sleepInterval time.Duration, doWork func() error) error {
|
||||||
var err error
|
var err error
|
||||||
for attempts := 0; attempts < times; attempts++ {
|
for attempts := 0; attempts < times; attempts++ {
|
||||||
err = doWork()
|
err = doWork()
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
time.Sleep(sleepInverval)
|
time.Sleep(sleepInterval)
|
||||||
}
|
}
|
||||||
|
|
||||||
return err
|
return err
|
||||||
@@ -793,7 +795,7 @@ func TestNodeOnlineStatus(t *testing.T) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// All peers of this nodess are reporting to be
|
// All peers of this nodes are reporting to be
|
||||||
// connected to the control server
|
// connected to the control server
|
||||||
assert.Truef(
|
assert.Truef(
|
||||||
t,
|
t,
|
||||||
@@ -829,24 +831,10 @@ func TestPingAllByIPManyUpDown(t *testing.T) {
|
|||||||
"user2": len(MustTestVersions),
|
"user2": len(MustTestVersions),
|
||||||
}
|
}
|
||||||
|
|
||||||
headscaleConfig := map[string]string{
|
|
||||||
"HEADSCALE_DERP_URLS": "",
|
|
||||||
"HEADSCALE_DERP_SERVER_ENABLED": "true",
|
|
||||||
"HEADSCALE_DERP_SERVER_REGION_ID": "999",
|
|
||||||
"HEADSCALE_DERP_SERVER_REGION_CODE": "headscale",
|
|
||||||
"HEADSCALE_DERP_SERVER_REGION_NAME": "Headscale Embedded DERP",
|
|
||||||
"HEADSCALE_DERP_SERVER_STUN_LISTEN_ADDR": "0.0.0.0:3478",
|
|
||||||
"HEADSCALE_DERP_SERVER_PRIVATE_KEY_PATH": "/tmp/derp.key",
|
|
||||||
|
|
||||||
// Envknob for enabling DERP debug logs
|
|
||||||
"DERP_DEBUG_LOGS": "true",
|
|
||||||
"DERP_PROBER_DEBUG_LOGS": "true",
|
|
||||||
}
|
|
||||||
|
|
||||||
err = scenario.CreateHeadscaleEnv(spec,
|
err = scenario.CreateHeadscaleEnv(spec,
|
||||||
[]tsic.Option{},
|
[]tsic.Option{},
|
||||||
hsic.WithTestName("pingallbyip"),
|
hsic.WithTestName("pingallbyipmany"),
|
||||||
hsic.WithConfigEnv(headscaleConfig),
|
hsic.WithEmbeddedDERPServerOnly(),
|
||||||
hsic.WithTLS(),
|
hsic.WithTLS(),
|
||||||
hsic.WithHostnameAsServerURL(),
|
hsic.WithHostnameAsServerURL(),
|
||||||
)
|
)
|
||||||
@@ -870,19 +858,35 @@ func TestPingAllByIPManyUpDown(t *testing.T) {
|
|||||||
success := pingAllHelper(t, allClients, allAddrs)
|
success := pingAllHelper(t, allClients, allAddrs)
|
||||||
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
|
t.Logf("%d successful pings out of %d", success, len(allClients)*len(allIps))
|
||||||
|
|
||||||
|
wg, _ := errgroup.WithContext(context.Background())
|
||||||
|
|
||||||
for run := range 3 {
|
for run := range 3 {
|
||||||
t.Logf("Starting DownUpPing run %d", run+1)
|
t.Logf("Starting DownUpPing run %d", run+1)
|
||||||
|
|
||||||
for _, client := range allClients {
|
for _, client := range allClients {
|
||||||
t.Logf("taking down %q", client.Hostname())
|
c := client
|
||||||
client.Down()
|
wg.Go(func() error {
|
||||||
|
t.Logf("taking down %q", c.Hostname())
|
||||||
|
return c.Down()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := wg.Wait(); err != nil {
|
||||||
|
t.Fatalf("failed to take down all nodes: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
time.Sleep(5 * time.Second)
|
time.Sleep(5 * time.Second)
|
||||||
|
|
||||||
for _, client := range allClients {
|
for _, client := range allClients {
|
||||||
t.Logf("bringing up %q", client.Hostname())
|
c := client
|
||||||
client.Up()
|
wg.Go(func() error {
|
||||||
|
t.Logf("bringing up %q", c.Hostname())
|
||||||
|
return c.Up()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := wg.Wait(); err != nil {
|
||||||
|
t.Fatalf("failed to take down all nodes: %s", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
time.Sleep(5 * time.Second)
|
time.Sleep(5 * time.Second)
|
||||||
|
|||||||
@@ -286,9 +286,13 @@ func New(
|
|||||||
}
|
}
|
||||||
|
|
||||||
env := []string{
|
env := []string{
|
||||||
"HEADSCALE_PROFILING_ENABLED=1",
|
"HEADSCALE_DEBUG_PROFILING_ENABLED=1",
|
||||||
"HEADSCALE_PROFILING_PATH=/tmp/profile",
|
"HEADSCALE_DEBUG_PROFILING_PATH=/tmp/profile",
|
||||||
"HEADSCALE_DEBUG_DUMP_MAPRESPONSE_PATH=/tmp/mapresponses",
|
"HEADSCALE_DEBUG_DUMP_MAPRESPONSE_PATH=/tmp/mapresponses",
|
||||||
|
"HEADSCALE_DEBUG_DEADLOCK=1",
|
||||||
|
"HEADSCALE_DEBUG_DEADLOCK_TIMEOUT=5s",
|
||||||
|
"HEADSCALE_DEBUG_HIGH_CARDINALITY_METRICS=1",
|
||||||
|
"HEADSCALE_DEBUG_DUMP_CONFIG=1",
|
||||||
}
|
}
|
||||||
for key, value := range hsic.env {
|
for key, value := range hsic.env {
|
||||||
env = append(env, fmt.Sprintf("%s=%s", key, value))
|
env = append(env, fmt.Sprintf("%s=%s", key, value))
|
||||||
@@ -397,7 +401,7 @@ func (t *HeadscaleInContainer) Shutdown() error {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = t.SaveMetrics("/tmp/control/metrics.txt")
|
err = t.SaveMetrics(fmt.Sprintf("/tmp/control/%s_metrics.txt", t.hostname))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf(
|
log.Printf(
|
||||||
"Failed to metrics from control: %s",
|
"Failed to metrics from control: %s",
|
||||||
@@ -747,7 +751,7 @@ func createCertificate(hostname string) ([]byte, []byte, error) {
|
|||||||
Locality: []string{"Leiden"},
|
Locality: []string{"Leiden"},
|
||||||
},
|
},
|
||||||
NotBefore: time.Now(),
|
NotBefore: time.Now(),
|
||||||
NotAfter: time.Now().Add(60 * time.Minute),
|
NotAfter: time.Now().Add(60 * time.Hour),
|
||||||
IsCA: true,
|
IsCA: true,
|
||||||
ExtKeyUsage: []x509.ExtKeyUsage{
|
ExtKeyUsage: []x509.ExtKeyUsage{
|
||||||
x509.ExtKeyUsageClientAuth,
|
x509.ExtKeyUsageClientAuth,
|
||||||
|
|||||||
@@ -51,8 +51,11 @@ var (
|
|||||||
tailscaleVersions2021 = map[string]bool{
|
tailscaleVersions2021 = map[string]bool{
|
||||||
"head": true,
|
"head": true,
|
||||||
"unstable": true,
|
"unstable": true,
|
||||||
"1.60": true, // CapVer: 82
|
"1.66": true, // CapVer: not checked
|
||||||
"1.58": true, // CapVer: 82
|
"1.64": true, // CapVer: not checked
|
||||||
|
"1.62": true, // CapVer: not checked
|
||||||
|
"1.60": true, // CapVer: not checked
|
||||||
|
"1.58": true, // CapVer: not checked
|
||||||
"1.56": true, // CapVer: 82
|
"1.56": true, // CapVer: 82
|
||||||
"1.54": true, // CapVer: 79
|
"1.54": true, // CapVer: 79
|
||||||
"1.52": true, // CapVer: 79
|
"1.52": true, // CapVer: 79
|
||||||
@@ -423,8 +426,10 @@ func (s *Scenario) WaitForTailscaleSync() error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
for _, user := range s.users {
|
for _, user := range s.users {
|
||||||
for _, client := range user.Clients {
|
for _, client := range user.Clients {
|
||||||
peers, _ := client.PrettyPeers()
|
peers, allOnline, _ := client.FailingPeersAsString()
|
||||||
log.Println(peers)
|
if !allOnline {
|
||||||
|
log.Println(peers)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -450,7 +455,7 @@ func (s *Scenario) WaitForTailscaleSyncWithPeerCount(peerCount int) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// CreateHeadscaleEnv is a conventient method returning a complete Headcale
|
// CreateHeadscaleEnv is a convenient method returning a complete Headcale
|
||||||
// test environment with nodes of all versions, joined to the server with X
|
// test environment with nodes of all versions, joined to the server with X
|
||||||
// users.
|
// users.
|
||||||
func (s *Scenario) CreateHeadscaleEnv(
|
func (s *Scenario) CreateHeadscaleEnv(
|
||||||
|
|||||||
@@ -36,5 +36,8 @@ type TailscaleClient interface {
|
|||||||
Ping(hostnameOrIP string, opts ...tsic.PingOption) error
|
Ping(hostnameOrIP string, opts ...tsic.PingOption) error
|
||||||
Curl(url string, opts ...tsic.CurlOption) (string, error)
|
Curl(url string, opts ...tsic.CurlOption) (string, error)
|
||||||
ID() string
|
ID() string
|
||||||
PrettyPeers() (string, error)
|
|
||||||
|
// FailingPeersAsString returns a formatted-ish multi-line-string of peers in the client
|
||||||
|
// and a bool indicating if the clients online count and peer count is equal.
|
||||||
|
FailingPeersAsString() (string, bool, error)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -691,15 +691,18 @@ func (t *TailscaleInContainer) FQDN() (string, error) {
|
|||||||
return status.Self.DNSName, nil
|
return status.Self.DNSName, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// PrettyPeers returns a formatted-ish table of peers in the client.
|
// FailingPeersAsString returns a formatted-ish multi-line-string of peers in the client
|
||||||
func (t *TailscaleInContainer) PrettyPeers() (string, error) {
|
// and a bool indicating if the clients online count and peer count is equal.
|
||||||
|
func (t *TailscaleInContainer) FailingPeersAsString() (string, bool, error) {
|
||||||
status, err := t.Status()
|
status, err := t.Status()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("failed to get FQDN: %w", err)
|
return "", false, fmt.Errorf("failed to get FQDN: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
str := fmt.Sprintf("Peers of %s\n", t.hostname)
|
var b strings.Builder
|
||||||
str += "Hostname\tOnline\tLastSeen\n"
|
|
||||||
|
fmt.Fprintf(&b, "Peers of %s\n", t.hostname)
|
||||||
|
fmt.Fprint(&b, "Hostname\tOnline\tLastSeen\n")
|
||||||
|
|
||||||
peerCount := len(status.Peers())
|
peerCount := len(status.Peers())
|
||||||
onlineCount := 0
|
onlineCount := 0
|
||||||
@@ -711,12 +714,12 @@ func (t *TailscaleInContainer) PrettyPeers() (string, error) {
|
|||||||
onlineCount++
|
onlineCount++
|
||||||
}
|
}
|
||||||
|
|
||||||
str += fmt.Sprintf("%s\t%t\t%s\n", peer.HostName, peer.Online, peer.LastSeen)
|
fmt.Fprintf(&b, "%s\t%t\t%s\n", peer.HostName, peer.Online, peer.LastSeen)
|
||||||
}
|
}
|
||||||
|
|
||||||
str += fmt.Sprintf("Peer Count: %d, Online Count: %d\n\n", peerCount, onlineCount)
|
fmt.Fprintf(&b, "Peer Count: %d, Online Count: %d\n\n", peerCount, onlineCount)
|
||||||
|
|
||||||
return str, nil
|
return b.String(), peerCount == onlineCount, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// WaitForNeedsLogin blocks until the Tailscale (tailscaled) instance has
|
// WaitForNeedsLogin blocks until the Tailscale (tailscaled) instance has
|
||||||
|
|||||||
@@ -331,7 +331,7 @@ func dockertestMaxWait() time.Duration {
|
|||||||
// return timeout
|
// return timeout
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// pingAllNegativeHelper is intended to have 1 or more nodes timeing out from the ping,
|
// pingAllNegativeHelper is intended to have 1 or more nodes timing out from the ping,
|
||||||
// it counts failures instead of successes.
|
// it counts failures instead of successes.
|
||||||
// func pingAllNegativeHelper(t *testing.T, clients []TailscaleClient, addrs []string) int {
|
// func pingAllNegativeHelper(t *testing.T, clients []TailscaleClient, addrs []string) int {
|
||||||
// t.Helper()
|
// t.Helper()
|
||||||
|
|||||||
Reference in New Issue
Block a user