Compare commits

..

23 Commits

Author SHA1 Message Date
Kristoffer Dalby
87e2ae4d52 add autogroup:internet, fix reduce filter rules (#1917) 2024-04-30 07:23:16 +02:00
Juan Font
ff427ccb78 Apply suggestions from code review
Co-authored-by: ohdearaugustin <ohdearaugustin@users.noreply.github.com>
2024-04-29 23:04:02 +02:00
Juan Font
39277844dd Apply suggestions from code review
Co-authored-by: ohdearaugustin <ohdearaugustin@users.noreply.github.com>
2024-04-29 23:04:02 +02:00
Juan Font
50a7d15769 Update CONTRIBUTING.md
Co-authored-by: ohdearaugustin <ohdearaugustin@users.noreply.github.com>
2024-04-29 23:04:02 +02:00
Juan Font
d740ee489e Update CONTRIBUTING.md
Co-authored-by: ohdearaugustin <ohdearaugustin@users.noreply.github.com>
2024-04-29 23:04:02 +02:00
Juan Font
10e37ec28d Add contributing document 2024-04-29 23:04:02 +02:00
Kristoffer Dalby
cb0b495ea9 batch updates in notifier (#1905) 2024-04-27 10:47:39 +02:00
Juan Font
fef8261339 Do not access node ID when node is not found (#1912) 2024-04-24 07:44:35 +02:00
Kristoffer Dalby
c62d5570f2 remove multistep build, build go last, allowing cached build layers (#1903) 2024-04-24 07:44:07 +02:00
Kristoffer Dalby
318d5d2b21 replace issue templates with github issue forms
Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
2024-04-22 23:10:34 +02:00
Kristoffer Dalby
9229d17bbe remove examples/, and kustomize (#1906)
this directory is unmaintained and not verified, if it should be restored, it should end up
under the community docs effort.

Signed-off-by: Kristoffer Dalby <kristoffer@tailscale.com>
2024-04-22 21:21:38 +02:00
Juan Font
aba4b36030 Clarify relation with Tailscale (#1908) 2024-04-22 20:37:59 +02:00
Juan Font
bd047928f7 Move pprof to metrics router (#1902) 2024-04-21 22:08:59 +02:00
ChengenH
9375b09206 chore: use errors.New to replace fmt.Errorf with no parameters will much better
Signed-off-by: ChengenH <hce19970702@gmail.com>
2024-04-21 20:23:25 +02:00
Kristoffer Dalby
ba614a5e6c metrics, tuning in tests, db cleanups, fix concurrency issue (#1895) 2024-04-21 18:28:17 +02:00
oftenoccur
7d8178406d chore: fix function names in comment (#1866)
* chore: fix function names in comment

Signed-off-by: oftenoccur <ezc5@sina.com>

---------

Signed-off-by: oftenoccur <ezc5@sina.com>
Co-authored-by: ohdearaugustin <ohdearaugustin@users.noreply.github.com>
2024-04-21 18:19:38 +02:00
ohdearaugustin
8394208856 fix prettier 2024-04-21 17:32:41 +02:00
Arnaud Dezandee
803269a64c docs(readme): change contributors section (#1889) 2024-04-21 16:48:33 +02:00
Carson Yang
d6ec31c4e0 docs: Add docs for running headscale on sealos (#1666)
* docs: Add docs for running headscale on sealos

Signed-off-by: Carson Yang <yangchuansheng33@gmail.com>

* run prettier

---------

Signed-off-by: Carson Yang <yangchuansheng33@gmail.com>
Co-authored-by: ohdearaugustin <ohdearaugustin@users.noreply.github.com>
2024-04-21 16:43:31 +02:00
Juan Font
68503581a0 Add test stage to docs (#1893)
* Add test stage to docs

Add new file with docs tets

Run only in pulls

* set explicit python version

* Revert "set explicit python version"

This reverts commit 4dd7b81f26.

* docs/requirements: update mkdocs-material

---------

Co-authored-by: ohdearaugustin <ohdearaugustin@users.noreply.github.com>
2024-04-21 16:33:22 +02:00
Juan Font
e2afd30b1c Add the latest UI to the website 2024-04-18 14:55:59 +02:00
Juan Font
c906aaf927 Allow to remove forced tags of a node
Set as empty StringList
2024-04-18 09:55:55 +02:00
Juan Font
580f96ce83 Remove unused node check interval 2024-04-17 20:20:44 +02:00
70 changed files with 1843 additions and 2095 deletions

View File

@@ -1,65 +0,0 @@
---
name: "Bug report"
about: "Create a bug report to help us improve"
title: ""
labels: ["bug"]
assignees: ""
---
<!--
Before posting a bug report, discuss the behaviour you are expecting with the Discord community
to make sure that it is truly a bug.
The issue tracker is not the place to ask for support or how to set up Headscale.
Bug reports without the sufficient information will be closed.
Headscale is a multinational community across the globe. Our language is English.
All bug reports needs to be in English.
-->
## Bug description
<!-- A clear and concise description of what the bug is. Describe the expected bahavior
and how it is currently different. If you are unsure if it is a bug, consider discussing
it on our Discord server first. -->
## Environment
<!-- Please add relevant information about your system. For example:
- Version of headscale used
- Version of tailscale client
- OS (e.g. Linux, Mac, Cygwin, WSL, etc.) and version
- Kernel version
- The relevant config parameters you used
- Log output
-->
- OS:
- Headscale version:
- Tailscale version:
<!--
We do not support running Headscale in a container nor behind a (reverse) proxy.
If either of these are true for your environment, ask the community in Discord
instead of filing a bug report.
-->
- [ ] Headscale is behind a (reverse) proxy
- [ ] Headscale runs in a container
## To Reproduce
<!-- Steps to reproduce the behavior. -->
## Logs and attachments
<!-- Please attach files with:
- Client netmap dump (see below)
- ACL configuration
- Headscale configuration
Dump the netmap of tailscale clients:
`tailscale debug netmap > DESCRIPTIVE_NAME.json`
Please provide information describing the netmap, which client, which headscale version etc.
-->

83
.github/ISSUE_TEMPLATE/bug_report.yaml vendored Normal file
View File

@@ -0,0 +1,83 @@
name: 🐞 Bug
description: File a bug/issue
title: "[Bug] <title>"
labels: ["bug", "needs triage"]
body:
- type: checkboxes
attributes:
label: Is this a support request?
description: This issue tracker is for bugs and feature requests only. If you need help, please use ask in our Discord community
options:
- label: This is not a support request
required: true
- type: checkboxes
attributes:
label: Is there an existing issue for this?
description: Please search to see if an issue already exists for the bug you encountered.
options:
- label: I have searched the existing issues
required: true
- type: textarea
attributes:
label: Current Behavior
description: A concise description of what you're experiencing.
validations:
required: true
- type: textarea
attributes:
label: Expected Behavior
description: A concise description of what you expected to happen.
validations:
required: true
- type: textarea
attributes:
label: Steps To Reproduce
description: Steps to reproduce the behavior.
placeholder: |
1. In this environment...
1. With this config...
1. Run '...'
1. See error...
validations:
required: true
- type: textarea
attributes:
label: Environment
description: |
examples:
- **OS**: Ubuntu 20.04
- **Headscale version**: 0.22.3
- **Tailscale version**: 1.64.0
value: |
- OS:
- Headscale version:
- Tailscale version:
render: markdown
validations:
required: true
- type: checkboxes
attributes:
label: Runtime environment
options:
- label: Headscale is behind a (reverse) proxy
required: false
- label: Headscale runs in a container
required: false
- type: textarea
attributes:
label: Anything else?
description: |
Links? References? Anything that will give us more context about the issue you are encountering!
- Client netmap dump (see below)
- ACL configuration
- Headscale configuration
Dump the netmap of tailscale clients:
`tailscale debug netmap > DESCRIPTIVE_NAME.json`
Please provide information describing the netmap, which client, which headscale version etc.
Tip: You can attach images or log files by clicking this area to highlight it and then dragging files in.
validations:
required: false

View File

@@ -1,26 +0,0 @@
---
name: "Feature request"
about: "Suggest an idea for headscale"
title: ""
labels: ["enhancement"]
assignees: ""
---
<!--
We typically have a clear roadmap for what we want to improve and reserve the right
to close feature requests that does not fit in the roadmap, or fit with the scope
of the project, or we actually want to implement ourselves.
Headscale is a multinational community across the globe. Our language is English.
All bug reports needs to be in English.
-->
## Why
<!-- Include the reason, why you would need the feature. E.g. what problem
does it solve? Or which workflow is currently frustrating and will be improved by
this? -->
## Description
<!-- A clear and precise description of what new or changed feature you want. -->

View File

@@ -0,0 +1,36 @@
name: 🚀 Feature Request
description: Suggest an idea for Headscale
title: "[Feature] <title>"
labels: [enhancement]
body:
- type: textarea
attributes:
label: Use case
description: Please describe the use case for this feature.
placeholder: |
<!-- Include the reason, why you would need the feature. E.g. what problem
does it solve? Or which workflow is currently frustrating and will be improved by
this? -->
validations:
required: true
- type: textarea
attributes:
label: Description
description: A clear and precise description of what new or changed feature you want.
validations:
required: true
- type: checkboxes
attributes:
label: Contribution
description: Are you willing to contribute to the implementation of this feature?
options:
- label: I can write the design doc for this feature
required: true
- label: I can contribute this feature
required: true
- type: textarea
attributes:
label: How can it be implemented?
description: Free text for your ideas on how this feature could be implemented.
validations:
required: false

View File

@@ -12,7 +12,7 @@ If you find mistakes in the documentation, please submit a fix to the documentat
<!-- Please tick if the following things apply. You… -->
- [ ] read the [CONTRIBUTING guidelines](README.md#contributing)
- [ ] have read the [CONTRIBUTING.md](./CONTRIBUTING.md) file
- [ ] raised a GitHub issue or discussed it on the projects chat beforehand
- [ ] added unit tests
- [ ] added integration tests

View File

@@ -1,36 +0,0 @@
name: Contributors
on:
push:
branches:
- main
workflow_dispatch:
jobs:
add-contributors:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Delete upstream contributor branch
# Allow continue on failure to account for when the
# upstream branch is deleted or does not exist.
continue-on-error: true
run: git push origin --delete update-contributors
- name: Create up-to-date contributors branch
run: git checkout -B update-contributors
- name: Push empty contributors branch
run: git push origin update-contributors
- name: Switch back to main
run: git checkout main
- uses: BobAnkh/add-contributors@v0.2.2
with:
CONTRIBUTOR: "## Contributors"
COLUMN_PER_ROW: "6"
ACCESS_TOKEN: ${{secrets.GITHUB_TOKEN}}
IMG_WIDTH: "100"
FONT_SIZE: "14"
PATH: "/README.md"
COMMIT_MESSAGE: "docs(README): update contributors"
AVATAR_SHAPE: "round"
BRANCH: "update-contributors"
PULL_REQUEST: "main"

27
.github/workflows/docs-test.yml vendored Normal file
View File

@@ -0,0 +1,27 @@
name: Test documentation build
on: [pull_request]
concurrency:
group: ${{ github.workflow }}-$${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Install python
uses: actions/setup-python@v4
with:
python-version: 3.x
- name: Setup cache
uses: actions/cache@v2
with:
key: ${{ github.ref }}
path: .cache
- name: Setup dependencies
run: pip install -r docs/requirements.txt
- name: Build docs
run: mkdocs build --strict

View File

@@ -56,6 +56,7 @@ after improving the test harness as part of adopting [#1460](https://github.com/
- Add support for deleting api keys [#1702](https://github.com/juanfont/headscale/pull/1702)
- Add command to backfill IP addresses for nodes missing IPs from configured prefixes. [#1869](https://github.com/juanfont/headscale/pull/1869)
- Log available update as warning [#1877](https://github.com/juanfont/headscale/pull/1877)
- Add `autogroup:internet` to Policy [#1917](https://github.com/juanfont/headscale/pull/1917)
## 0.22.3 (2023-05-12)

34
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,34 @@
# Contributing
Headscale is "Open Source, acknowledged contribution", this means that any contribution will have to be discussed with the maintainers before being added to the project.
This model has been chosen to reduce the risk of burnout by limiting the maintenance overhead of reviewing and validating third-party code.
## Why do we have this model?
Headscale has a small maintainer team that tries to balance working on the project, fixing bugs and reviewing contributions.
When we work on issues ourselves, we develop first hand knowledge of the code and it makes it possible for us to maintain and own the code as the project develops.
Code contributions are seen as a positive thing. People enjoy and engage with our project, but it also comes with some challenges; we have to understand the code, we have to understand the feature, we might have to become familiar with external libraries or services and we think about security implications. All those steps are required during the reviewing process. After the code has been merged, the feature has to be maintained. Any changes reliant on external services must be updated and expanded accordingly.
The review and day-1 maintenance adds a significant burden on the maintainers. Often we hope that the contributor will help out, but we found that most of the time, they disappear after their new feature was added.
This means that when someone contributes, we are mostly happy about it, but we do have to run it through a series of checks to establish if we actually can maintain this feature.
## What do we require?
A general description is provided here and an explicit list is provided in our pull request template.
All new features have to start out with a design document, which should be discussed on the issue tracker (not discord). It should include a use case for the feature, how it can be implemented, who will implement it and a plan for maintaining it.
All features have to be end-to-end tested (integration tests) and have good unit test coverage to ensure that they work as expected. This will also ensure that the feature continues to work as expected over time. If a change cannot be tested, a strong case for why this is not possible needs to be presented.
The contributor should help to maintain the feature over time. In case the feature is not maintained probably, the maintainers reserve themselves the right to remove features they redeem as unmaintainable. This should help to improve the quality of the software and keep it in a maintainable state.
## Bug fixes
Headscale is open to code contributions for bug fixes without discussion.
## Documentation
If you find mistakes in the documentation, please submit a fix to the documentation.

View File

@@ -2,31 +2,24 @@
# and are in no way endorsed by Headscale's maintainers as an
# official nor supported release or distribution.
FROM docker.io/golang:1.22-bookworm AS build
FROM docker.io/golang:1.22-bookworm
ARG VERSION=dev
ENV GOPATH /go
WORKDIR /go/src/headscale
COPY go.mod go.sum /go/src/headscale/
RUN go mod download
COPY . .
RUN CGO_ENABLED=0 GOOS=linux go install -ldflags="-s -w -X github.com/juanfont/headscale/cmd/headscale/cli.Version=$VERSION" -a ./cmd/headscale
RUN test -e /go/bin/headscale
# Debug image
FROM docker.io/golang:1.22-bookworm
COPY --from=build /go/bin/headscale /bin/headscale
ENV TZ UTC
RUN apt-get update \
&& apt-get install --no-install-recommends --yes less jq \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
RUN mkdir -p /var/run/headscale
COPY go.mod go.sum /go/src/headscale/
RUN go mod download
COPY . .
RUN CGO_ENABLED=0 GOOS=linux go install -ldflags="-s -w -X github.com/juanfont/headscale/cmd/headscale/cli.Version=$VERSION" -a ./cmd/headscale && test -e /go/bin/headscale
# Need to reset the entrypoint or everything will run as a busybox script
ENTRYPOINT []
EXPOSE 8080/tcp

1052
README.md

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

View File

@@ -1,5 +1,4 @@
cairosvg~=2.7.1
mkdocs-material~=9.4.14
mkdocs-material~=9.5.18
mkdocs-minify-plugin~=0.7.1
pillow~=10.1.0

View File

@@ -0,0 +1,136 @@
# Running headscale on Sealos
!!! warning "Community documentation"
This page is not actively maintained by the headscale authors and is
written by community members. It is _not_ verified by `headscale` developers.
**It might be outdated and it might miss necessary steps**.
## Goal
This documentation has the goal of showing a user how-to run `headscale` on Sealos.
## Running headscale server
1. Click the following prebuilt template(version [0.23.0-alpha2](https://github.com/juanfont/headscale/releases/tag/v0.23.0-alpha2)):
[![](https://cdn.jsdelivr.net/gh/labring-actions/templates@main/Deploy-on-Sealos.svg)](https://cloud.sealos.io/?openapp=system-template%3FtemplateName%3Dheadscale)
2. Click "Deploy Application" on the template page to start deployment. Upon completion, two applications appear: Headscale, and its [visual interface](https://github.com/GoodiesHQ/headscale-admin).
3. Once deployment concludes, click 'Details' on the Headscale application page to navigate to the application's details.
4. Wait for the application's status to switch to running. For accessing the headscale server, the Public Address associated with port 8080 is the address of the headscale server. To access the Headscale console, simply append `/admin/` to the Headscale public URL.
![](./images/headscale-sealos-url.png)
5. Click on 'Terminal' button on the right side of the details to access the Terminal of the headscale application. then create a user ([tailnet](https://tailscale.com/kb/1136/tailnet/)):
```bash
headscale users create myfirstuser
```
### Register a machine (normal login)
On a client machine, execute the `tailscale` login command:
```bash
# replace <YOUR_HEADSCALE_URL> with the public domain provided by Sealos
tailscale up --login-server YOUR_HEADSCALE_URL
```
To register a machine when running headscale in [Sealos](https://sealos.io), click on 'Terminal' button on the right side of the headscale application's detail page to access the Terminal of the headscale application, then take the headscale command:
```bash
headscale --user myfirstuser nodes register --key <YOU_+MACHINE_KEY>
```
### Register machine using a pre authenticated key
click on 'Terminal' button on the right side of the headscale application's detail page to access the Terminal of the headscale application, then generate a key using the command line:
```bash
headscale --user myfirstuser preauthkeys create --reusable --expiration 24h
```
This will return a pre-authenticated key that can be used to connect a node to `headscale` during the `tailscale` command:
```bash
tailscale up --login-server <YOUR_HEADSCALE_URL> --authkey <YOUR_AUTH_KEY>
```
## Controlling headscale with remote CLI
This documentation has the goal of showing a user how-to set control a headscale instance from a remote machine with the headscale command line binary.
### Create an API key
We need to create an API key to authenticate our remote headscale when using it from our workstation.
To create a API key, click on 'Terminal' button on the right side of the headscale application's detail page to access the Terminal of the headscale application, then generate a key:
```bash
headscale apikeys create --expiration 90d
```
Copy the output of the command and save it for later. Please note that you can not retrieve a key again, if the key is lost, expire the old one, and create a new key.
To list the keys currently assosicated with the server:
```bash
headscale apikeys list
```
and to expire a key:
```bash
headscale apikeys expire --prefix "<PREFIX>"
```
### Download and configure `headscale` client
1. Download the latest [`headscale` binary from GitHub's release page](https://github.com/juanfont/headscale/releases):
2. Put the binary somewhere in your `PATH`, e.g. `/usr/local/bin/headscale`
3. Make `headscale` executable:
```shell
chmod +x /usr/local/bin/headscale
```
4. Configure the CLI through Environment Variables
```shell
export HEADSCALE_CLI_ADDRESS="<HEADSCALE ADDRESS>:443"
export HEADSCALE_CLI_API_KEY="<API KEY FROM PREVIOUS STAGE>"
```
In the headscale application's detail page, The Public Address corresponding to port 50443 corresponds to the value of <HEADSCALE ADDRESS>.
![](./images/headscale-sealos-grpc-url.png)
for example:
```shell
export HEADSCALE_CLI_ADDRESS="pwnjnnly.cloud.sealos.io:443"
export HEADSCALE_CLI_API_KEY="abcde12345"
```
This will tell the `headscale` binary to connect to a remote instance, instead of looking
for a local instance.
The API key is needed to make sure that your are allowed to access the server. The key is _not_
needed when running directly on the server, as the connection is local.
1. Test the connection
Let us run the headscale command to verify that we can connect by listing our nodes:
```shell
headscale nodes list
```
You should now be able to see a list of your nodes from your workstation, and you can
now control the `headscale` server from your workstation.
> Reference: [Headscale Deployment and Usage Guide: Mastering Tailscale's Self-Hosting Basics](https://icloudnative.io/en/posts/how-to-set-up-or-migrate-headscale/)

View File

@@ -5,10 +5,11 @@
This page contains community contributions. The projects listed here are not
maintained by the Headscale authors and are written by community members.
| Name | Repository Link | Description | Status |
| --------------- | ------------------------------------------------------- | ------------------------------------------------------------------------- | ------ |
| headscale-webui | [Github](https://github.com/ifargle/headscale-webui) | A simple Headscale web UI for small-scale deployments. | Alpha |
| headscale-ui | [Github](https://github.com/gurucomputing/headscale-ui) | A web frontend for the headscale Tailscale-compatible coordination server | Alpha |
| HeadscaleUi | [GitHub](https://github.com/simcu/headscale-ui) | A static headscale admin ui, no backend enviroment required | Alpha |
| Name | Repository Link | Description | Status |
| --------------- | ------------------------------------------------------- | --------------------------------------------------------------------------- | ------ |
| headscale-webui | [Github](https://github.com/ifargle/headscale-webui) | A simple Headscale web UI for small-scale deployments. | Alpha |
| headscale-ui | [Github](https://github.com/gurucomputing/headscale-ui) | A web frontend for the headscale Tailscale-compatible coordination server | Alpha |
| HeadscaleUi | [GitHub](https://github.com/simcu/headscale-ui) | A static headscale admin ui, no backend enviroment required | Alpha |
| headscale-admin | [Github](https://github.com/GoodiesHQ/headscale-admin) | Headscale-Admin is meant to be a simple, modern web interface for Headscale | Beta |
You can ask for support on our dedicated [Discord channel](https://discord.com/channels/896711691637780480/1105842846386356294).

View File

@@ -1,5 +0,0 @@
# Examples
This directory contains examples on how to run `headscale` on different platforms.
All examples are provided by the community and they are not verified by the `headscale` authors.

View File

@@ -1,2 +0,0 @@
/**/site
/**/secrets

View File

@@ -1,100 +0,0 @@
# Deploying headscale on Kubernetes
**Note:** This is contributed by the community and not verified by the headscale authors.
This directory contains [Kustomize](https://kustomize.io) templates that deploy
headscale in various configurations.
These templates currently support Rancher k3s. Other clusters may require
adaptation, especially around volume claims and ingress.
Commands below assume this directory is your current working directory.
# Generate secrets and site configuration
Run `./init.bash` to generate keys, passwords, and site configuration files.
Edit `base/site/public.env`, changing `public-hostname` to the public DNS name
that will be used for your headscale deployment.
Set `public-proto` to "https" if you're planning to use TLS & Let's Encrypt.
Configure DERP servers by editing `base/site/derp.yaml` if needed.
# Add the image to the registry
You'll somehow need to get `headscale:latest` into your cluster image registry.
An easy way to do this with k3s:
- Reconfigure k3s to use docker instead of containerd (`k3s server --docker`)
- `docker build -t headscale:latest ..` from here
# Create the namespace
If it doesn't already exist, `kubectl create ns headscale`.
# Deploy headscale
## sqlite
`kubectl -n headscale apply -k ./sqlite`
## postgres
`kubectl -n headscale apply -k ./postgres`
# TLS & Let's Encrypt
Test a staging certificate with your configured DNS name and Let's Encrypt.
`kubectl -n headscale apply -k ./staging-tls`
Replace with a production certificate.
`kubectl -n headscale apply -k ./production-tls`
## Static / custom TLS certificates
Only Let's Encrypt is supported. If you need other TLS settings, modify or patch the ingress.
# Administration
Use the wrapper script to remotely operate headscale to perform administrative
tasks like creating namespaces, authkeys, etc.
```
[c@nix-slate:~/Projects/headscale/k8s]$ ./headscale.bash
headscale is an open source implementation of the Tailscale control server
https://github.com/juanfont/headscale
Usage:
headscale [command]
Available Commands:
help Help about any command
namespace Manage the namespaces of headscale
node Manage the nodes of headscale
preauthkey Handle the preauthkeys in headscale
routes Manage the routes of headscale
serve Launches the headscale server
version Print the version.
Flags:
-h, --help help for headscale
-o, --output string Output format. Empty for human-readable, 'json' or 'json-line'
Use "headscale [command] --help" for more information about a command.
```
# TODO / Ideas
- Interpolate `email:` option to the ClusterIssuer from site configuration.
This probably needs to be done with a transformer, kustomize vars don't seem to work.
- Add kustomize examples for cloud-native ingress, load balancer
- CockroachDB for the backend
- DERP server deployment
- Tor hidden service

View File

@@ -1,9 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: headscale-config
data:
server_url: $(PUBLIC_PROTO)://$(PUBLIC_HOSTNAME)
listen_addr: "0.0.0.0:8080"
metrics_listen_addr: "127.0.0.1:9090"
ephemeral_node_inactivity_timeout: "30m"

View File

@@ -1,18 +0,0 @@
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: headscale
annotations:
kubernetes.io/ingress.class: traefik
spec:
rules:
- host: $(PUBLIC_HOSTNAME)
http:
paths:
- backend:
service:
name: headscale
port:
number: 8080
path: /
pathType: Prefix

View File

@@ -1,42 +0,0 @@
namespace: headscale
resources:
- configmap.yaml
- ingress.yaml
- service.yaml
generatorOptions:
disableNameSuffixHash: true
configMapGenerator:
- name: headscale-site
files:
- derp.yaml=site/derp.yaml
envs:
- site/public.env
- name: headscale-etc
literals:
- config.json={}
secretGenerator:
- name: headscale
files:
- secrets/private-key
vars:
- name: PUBLIC_PROTO
objRef:
kind: ConfigMap
name: headscale-site
apiVersion: v1
fieldRef:
fieldPath: data.public-proto
- name: PUBLIC_HOSTNAME
objRef:
kind: ConfigMap
name: headscale-site
apiVersion: v1
fieldRef:
fieldPath: data.public-hostname
- name: CONTACT_EMAIL
objRef:
kind: ConfigMap
name: headscale-site
apiVersion: v1
fieldRef:
fieldPath: data.contact-email

View File

@@ -1,13 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: headscale
labels:
app: headscale
spec:
selector:
app: headscale
ports:
- name: http
targetPort: http
port: 8080

View File

@@ -1,3 +0,0 @@
#!/usr/bin/env bash
set -eu
exec kubectl -n headscale exec -ti pod/headscale-0 -- /go/bin/headscale "$@"

View File

@@ -1,22 +0,0 @@
#!/usr/bin/env bash
set -eux
cd $(dirname $0)
umask 022
mkdir -p base/site/
[ ! -e base/site/public.env ] && (
cat >base/site/public.env <<EOF
public-hostname=localhost
public-proto=http
contact-email=headscale@example.com
EOF
)
[ ! -e base/site/derp.yaml ] && cp ../derp.yaml base/site/derp.yaml
umask 077
mkdir -p base/secrets/
[ ! -e base/secrets/private-key ] && (
wg genkey > base/secrets/private-key
)
mkdir -p postgres/secrets/
[ ! -e postgres/secrets/password ] && (head -c 32 /dev/urandom | base64 -w0 > postgres/secrets/password)

View File

@@ -1,3 +0,0 @@
#!/usr/bin/env bash
set -eux
kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v1.4.0/cert-manager.yaml

View File

@@ -1,81 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: headscale
spec:
replicas: 2
selector:
matchLabels:
app: headscale
template:
metadata:
labels:
app: headscale
spec:
containers:
- name: headscale
image: "headscale:latest"
imagePullPolicy: IfNotPresent
command: ["/go/bin/headscale", "serve"]
env:
- name: SERVER_URL
value: $(PUBLIC_PROTO)://$(PUBLIC_HOSTNAME)
- name: LISTEN_ADDR
valueFrom:
configMapKeyRef:
name: headscale-config
key: listen_addr
- name: METRICS_LISTEN_ADDR
valueFrom:
configMapKeyRef:
name: headscale-config
key: metrics_listen_addr
- name: DERP_MAP_PATH
value: /vol/config/derp.yaml
- name: EPHEMERAL_NODE_INACTIVITY_TIMEOUT
valueFrom:
configMapKeyRef:
name: headscale-config
key: ephemeral_node_inactivity_timeout
- name: DB_TYPE
value: postgres
- name: DB_HOST
value: postgres.headscale.svc.cluster.local
- name: DB_PORT
value: "5432"
- name: DB_USER
value: headscale
- name: DB_PASS
valueFrom:
secretKeyRef:
name: postgresql
key: password
- name: DB_NAME
value: headscale
ports:
- name: http
protocol: TCP
containerPort: 8080
livenessProbe:
tcpSocket:
port: http
initialDelaySeconds: 30
timeoutSeconds: 5
periodSeconds: 15
volumeMounts:
- name: config
mountPath: /vol/config
- name: secret
mountPath: /vol/secret
- name: etc
mountPath: /etc/headscale
volumes:
- name: config
configMap:
name: headscale-site
- name: etc
configMap:
name: headscale-etc
- name: secret
secret:
secretName: headscale

View File

@@ -1,13 +0,0 @@
namespace: headscale
bases:
- ../base
resources:
- deployment.yaml
- postgres-service.yaml
- postgres-statefulset.yaml
generatorOptions:
disableNameSuffixHash: true
secretGenerator:
- name: postgresql
files:
- secrets/password

View File

@@ -1,13 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: postgres
labels:
app: postgres
spec:
selector:
app: postgres
ports:
- name: postgres
targetPort: postgres
port: 5432

View File

@@ -1,49 +0,0 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: postgres
spec:
serviceName: postgres
replicas: 1
selector:
matchLabels:
app: postgres
template:
metadata:
labels:
app: postgres
spec:
containers:
- name: postgres
image: "postgres:13"
imagePullPolicy: IfNotPresent
env:
- name: POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: postgresql
key: password
- name: POSTGRES_USER
value: headscale
ports:
- name: postgres
protocol: TCP
containerPort: 5432
livenessProbe:
tcpSocket:
port: 5432
initialDelaySeconds: 30
timeoutSeconds: 5
periodSeconds: 15
volumeMounts:
- name: pgdata
mountPath: /var/lib/postgresql/data
volumeClaimTemplates:
- metadata:
name: pgdata
spec:
storageClassName: local-path
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi

View File

@@ -1,11 +0,0 @@
kind: Ingress
metadata:
name: headscale
annotations:
cert-manager.io/cluster-issuer: letsencrypt-production
traefik.ingress.kubernetes.io/router.tls: "true"
spec:
tls:
- hosts:
- $(PUBLIC_HOSTNAME)
secretName: production-cert

View File

@@ -1,9 +0,0 @@
namespace: headscale
bases:
- ../base
resources:
- production-issuer.yaml
patches:
- path: ingress-patch.yaml
target:
kind: Ingress

View File

@@ -1,16 +0,0 @@
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-production
spec:
acme:
# TODO: figure out how to get kustomize to interpolate this, or use a transformer
#email: $(CONTACT_EMAIL)
server: https://acme-v02.api.letsencrypt.org/directory
privateKeySecretRef:
# Secret resource used to store the account's private key.
name: letsencrypt-production-acc-key
solvers:
- http01:
ingress:
class: traefik

View File

@@ -1,5 +0,0 @@
namespace: headscale
bases:
- ../base
resources:
- statefulset.yaml

View File

@@ -1,82 +0,0 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: headscale
spec:
serviceName: headscale
replicas: 1
selector:
matchLabels:
app: headscale
template:
metadata:
labels:
app: headscale
spec:
containers:
- name: headscale
image: "headscale:latest"
imagePullPolicy: IfNotPresent
command: ["/go/bin/headscale", "serve"]
env:
- name: SERVER_URL
value: $(PUBLIC_PROTO)://$(PUBLIC_HOSTNAME)
- name: LISTEN_ADDR
valueFrom:
configMapKeyRef:
name: headscale-config
key: listen_addr
- name: METRICS_LISTEN_ADDR
valueFrom:
configMapKeyRef:
name: headscale-config
key: metrics_listen_addr
- name: DERP_MAP_PATH
value: /vol/config/derp.yaml
- name: EPHEMERAL_NODE_INACTIVITY_TIMEOUT
valueFrom:
configMapKeyRef:
name: headscale-config
key: ephemeral_node_inactivity_timeout
- name: DB_TYPE
value: sqlite3
- name: DB_PATH
value: /vol/data/db.sqlite
ports:
- name: http
protocol: TCP
containerPort: 8080
livenessProbe:
tcpSocket:
port: http
initialDelaySeconds: 30
timeoutSeconds: 5
periodSeconds: 15
volumeMounts:
- name: config
mountPath: /vol/config
- name: data
mountPath: /vol/data
- name: secret
mountPath: /vol/secret
- name: etc
mountPath: /etc/headscale
volumes:
- name: config
configMap:
name: headscale-site
- name: etc
configMap:
name: headscale-etc
- name: secret
secret:
secretName: headscale
volumeClaimTemplates:
- metadata:
name: data
spec:
storageClassName: local-path
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 1Gi

View File

@@ -1,11 +0,0 @@
kind: Ingress
metadata:
name: headscale
annotations:
cert-manager.io/cluster-issuer: letsencrypt-staging
traefik.ingress.kubernetes.io/router.tls: "true"
spec:
tls:
- hosts:
- $(PUBLIC_HOSTNAME)
secretName: staging-cert

View File

@@ -1,9 +0,0 @@
namespace: headscale
bases:
- ../base
resources:
- staging-issuer.yaml
patches:
- path: ingress-patch.yaml
target:
kind: Ingress

View File

@@ -1,16 +0,0 @@
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-staging
spec:
acme:
# TODO: figure out how to get kustomize to interpolate this, or use a transformer
#email: $(CONTACT_EMAIL)
server: https://acme-staging-v02.api.letsencrypt.org/directory
privateKeySecretRef:
# Secret resource used to store the account's private key.
name: letsencrypt-staging-acc-key
solvers:
- http01:
ingress:
class: traefik

View File

@@ -137,7 +137,7 @@ func NewHeadscale(cfg *types.Config) (*Headscale, error) {
noisePrivateKey: noisePrivateKey,
registrationCache: registrationCache,
pollNetMapStreamWG: sync.WaitGroup{},
nodeNotifier: notifier.NewNotifier(),
nodeNotifier: notifier.NewNotifier(cfg),
mapSessions: make(map[types.NodeID]*mapSession),
}
@@ -225,7 +225,7 @@ func (h *Headscale) deleteExpireEphemeralNodes(milliSeconds int64) {
for range ticker.C {
var removed []types.NodeID
var changed []types.NodeID
if err := h.db.DB.Transaction(func(tx *gorm.DB) error {
if err := h.db.Write(func(tx *gorm.DB) error {
removed, changed = db.DeleteExpiredEphemeralNodes(tx, h.cfg.EphemeralNodeInactivityTimeout)
return nil
@@ -263,7 +263,7 @@ func (h *Headscale) expireExpiredMachines(intervalMs int64) {
var changed bool
for range ticker.C {
if err := h.db.DB.Transaction(func(tx *gorm.DB) error {
if err := h.db.Write(func(tx *gorm.DB) error {
lastCheck, update, changed = db.ExpireExpiredNodes(tx, lastCheck)
return nil
@@ -452,7 +452,7 @@ func (h *Headscale) ensureUnixSocketIsAbsent() error {
func (h *Headscale) createRouter(grpcMux *grpcRuntime.ServeMux) *mux.Router {
router := mux.NewRouter()
router.PathPrefix("/debug/pprof/").Handler(http.DefaultServeMux)
router.Use(prometheusMiddleware)
router.HandleFunc(ts2021UpgradePath, h.NoiseUpgradeHandler).Methods(http.MethodPost)
@@ -508,7 +508,7 @@ func (h *Headscale) Serve() error {
// Fetch an initial DERP Map before we start serving
h.DERPMap = derp.GetDERPMap(h.cfg.DERP)
h.mapper = mapper.NewMapper(h.db, h.cfg, h.DERPMap, h.nodeNotifier.ConnectedMap())
h.mapper = mapper.NewMapper(h.db, h.cfg, h.DERPMap, h.nodeNotifier)
if h.cfg.DERP.ServerEnabled {
// When embedded DERP is enabled we always need a STUN server
@@ -680,7 +680,7 @@ func (h *Headscale) Serve() error {
// HTTP setup
//
// This is the regular router that we expose
// over our main Addr. It also serves the legacy Tailcale API
// over our main Addr
router := h.createRouter(grpcGatewayMux)
httpServer := &http.Server{
@@ -710,11 +710,10 @@ func (h *Headscale) Serve() error {
Msgf("listening and serving HTTP on: %s", h.cfg.Addr)
debugMux := http.NewServeMux()
debugMux.Handle("/debug/pprof/", http.DefaultServeMux)
debugMux.HandleFunc("/debug/notifier", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
w.Write([]byte(h.nodeNotifier.String()))
return
})
debugMux.HandleFunc("/debug/mapresp", func(w http.ResponseWriter, r *http.Request) {
h.mapSessionMu.Lock()
@@ -728,8 +727,6 @@ func (h *Headscale) Serve() error {
w.WriteHeader(http.StatusOK)
w.Write([]byte(b.String()))
return
})
debugMux.Handle("/metrics", promhttp.Handler())

View File

@@ -273,8 +273,6 @@ func (h *Headscale) handleAuthKey(
Err(err).
Msg("Cannot encode message")
http.Error(writer, "Internal server error", http.StatusInternalServerError)
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "error", pak.User.Name).
Inc()
return
}
@@ -294,13 +292,6 @@ func (h *Headscale) handleAuthKey(
Str("node", registerRequest.Hostinfo.Hostname).
Msg("Failed authentication via AuthKey")
if pak != nil {
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "error", pak.User.Name).
Inc()
} else {
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "error", "unknown").Inc()
}
return
}
@@ -404,15 +395,13 @@ func (h *Headscale) handleAuthKey(
Caller().
Err(err).
Msg("could not register node")
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "error", pak.User.Name).
Inc()
http.Error(writer, "Internal server error", http.StatusInternalServerError)
return
}
}
err = h.db.DB.Transaction(func(tx *gorm.DB) error {
h.db.Write(func(tx *gorm.DB) error {
return db.UsePreAuthKey(tx, pak)
})
if err != nil {
@@ -420,8 +409,6 @@ func (h *Headscale) handleAuthKey(
Caller().
Err(err).
Msg("Failed to use pre-auth key")
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "error", pak.User.Name).
Inc()
http.Error(writer, "Internal server error", http.StatusInternalServerError)
return
@@ -440,14 +427,10 @@ func (h *Headscale) handleAuthKey(
Str("node", registerRequest.Hostinfo.Hostname).
Err(err).
Msg("Cannot encode message")
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "error", pak.User.Name).
Inc()
http.Error(writer, "Internal server error", http.StatusInternalServerError)
return
}
nodeRegistrations.WithLabelValues("new", util.RegisterMethodAuthKey, "success", pak.User.Name).
Inc()
writer.Header().Set("Content-Type", "application/json; charset=utf-8")
writer.WriteHeader(http.StatusOK)
_, err = writer.Write(respBody)
@@ -563,7 +546,7 @@ func (h *Headscale) handleNodeLogOut(
}
if node.IsEphemeral() {
changedNodes, err := h.db.DeleteNode(&node, h.nodeNotifier.ConnectedMap())
changedNodes, err := h.db.DeleteNode(&node, h.nodeNotifier.LikelyConnectedMap())
if err != nil {
log.Error().
Err(err).
@@ -616,14 +599,10 @@ func (h *Headscale) handleNodeWithValidRegistration(
Caller().
Err(err).
Msg("Cannot encode message")
nodeRegistrations.WithLabelValues("update", "web", "error", node.User.Name).
Inc()
http.Error(writer, "Internal server error", http.StatusInternalServerError)
return
}
nodeRegistrations.WithLabelValues("update", "web", "success", node.User.Name).
Inc()
writer.Header().Set("Content-Type", "application/json; charset=utf-8")
writer.WriteHeader(http.StatusOK)
@@ -654,7 +633,7 @@ func (h *Headscale) handleNodeKeyRefresh(
Str("node", node.Hostname).
Msg("We have the OldNodeKey in the database. This is a key refresh")
err := h.db.DB.Transaction(func(tx *gorm.DB) error {
err := h.db.Write(func(tx *gorm.DB) error {
return db.NodeSetNodeKey(tx, &node, registerRequest.NodeKey)
})
if err != nil {
@@ -737,14 +716,10 @@ func (h *Headscale) handleNodeExpiredOrLoggedOut(
Caller().
Err(err).
Msg("Cannot encode message")
nodeRegistrations.WithLabelValues("reauth", "web", "error", node.User.Name).
Inc()
http.Error(writer, "Internal server error", http.StatusInternalServerError)
return
}
nodeRegistrations.WithLabelValues("reauth", "web", "success", node.User.Name).
Inc()
writer.Header().Set("Content-Type", "application/json; charset=utf-8")
writer.WriteHeader(http.StatusOK)

View File

@@ -33,7 +33,6 @@ func (ns *noiseServer) NoiseRegistrationHandler(
Caller().
Err(err).
Msg("Cannot parse RegisterRequest")
nodeRegistrations.WithLabelValues("unknown", "web", "error", "unknown").Inc()
http.Error(writer, "Internal error", http.StatusInternalServerError)
return

View File

@@ -10,6 +10,7 @@ import (
"github.com/juanfont/headscale/hscontrol/types"
"github.com/juanfont/headscale/hscontrol/util"
"github.com/patrickmn/go-cache"
"github.com/puzpuzpuz/xsync/v3"
"github.com/rs/zerolog/log"
"gorm.io/gorm"
"tailscale.com/tailcfg"
@@ -206,6 +207,11 @@ func SetTags(
tags []string,
) error {
if len(tags) == 0 {
// if no tags are provided, we remove all forced tags
if err := tx.Model(&types.Node{}).Where("id = ?", nodeID).Update("forced_tags", types.StringList{}).Error; err != nil {
return fmt.Errorf("failed to remove tags for node in the database: %w", err)
}
return nil
}
@@ -255,9 +261,9 @@ func NodeSetExpiry(tx *gorm.DB,
return tx.Model(&types.Node{}).Where("id = ?", nodeID).Update("expiry", expiry).Error
}
func (hsdb *HSDatabase) DeleteNode(node *types.Node, isConnected types.NodeConnectedMap) ([]types.NodeID, error) {
func (hsdb *HSDatabase) DeleteNode(node *types.Node, isLikelyConnected *xsync.MapOf[types.NodeID, bool]) ([]types.NodeID, error) {
return Write(hsdb.DB, func(tx *gorm.DB) ([]types.NodeID, error) {
return DeleteNode(tx, node, isConnected)
return DeleteNode(tx, node, isLikelyConnected)
})
}
@@ -265,9 +271,9 @@ func (hsdb *HSDatabase) DeleteNode(node *types.Node, isConnected types.NodeConne
// Caller is responsible for notifying all of change.
func DeleteNode(tx *gorm.DB,
node *types.Node,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
) ([]types.NodeID, error) {
changed, err := deleteNodeRoutes(tx, node, isConnected)
changed, err := deleteNodeRoutes(tx, node, isLikelyConnected)
if err != nil {
return changed, err
}

View File

@@ -11,6 +11,7 @@ import (
"github.com/juanfont/headscale/hscontrol/policy"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/juanfont/headscale/hscontrol/util"
"github.com/puzpuzpuz/xsync/v3"
"gopkg.in/check.v1"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
@@ -120,7 +121,7 @@ func (s *Suite) TestHardDeleteNode(c *check.C) {
}
db.DB.Save(&node)
_, err = db.DeleteNode(&node, types.NodeConnectedMap{})
_, err = db.DeleteNode(&node, xsync.NewMapOf[types.NodeID, bool]())
c.Assert(err, check.IsNil)
_, err = db.getNode(user.Name, "testnode3")
@@ -386,6 +387,13 @@ func (s *Suite) TestSetTags(c *check.C) {
check.DeepEquals,
types.StringList([]string{"tag:bar", "tag:test", "tag:unknown"}),
)
// test removing tags
err = db.SetTags(node.ID, []string{})
c.Assert(err, check.IsNil)
node, err = db.getNode("test", "testnode")
c.Assert(err, check.IsNil)
c.Assert(node.ForcedTags, check.DeepEquals, types.StringList([]string{}))
}
func TestHeadscale_generateGivenName(t *testing.T) {

View File

@@ -147,7 +147,7 @@ func (*Suite) TestEphemeralKeyReusable(c *check.C) {
_, err = db.getNode("test7", "testest")
c.Assert(err, check.IsNil)
db.DB.Transaction(func(tx *gorm.DB) error {
db.Write(func(tx *gorm.DB) error {
DeleteExpiredEphemeralNodes(tx, time.Second*20)
return nil
})
@@ -181,7 +181,7 @@ func (*Suite) TestEphemeralKeyNotReusable(c *check.C) {
_, err = db.getNode("test7", "testest")
c.Assert(err, check.IsNil)
db.DB.Transaction(func(tx *gorm.DB) error {
db.Write(func(tx *gorm.DB) error {
DeleteExpiredEphemeralNodes(tx, time.Second*20)
return nil
})

View File

@@ -8,6 +8,7 @@ import (
"github.com/juanfont/headscale/hscontrol/policy"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/puzpuzpuz/xsync/v3"
"github.com/rs/zerolog/log"
"gorm.io/gorm"
"tailscale.com/util/set"
@@ -126,7 +127,7 @@ func EnableRoute(tx *gorm.DB, id uint64) (*types.StateUpdate, error) {
func DisableRoute(tx *gorm.DB,
id uint64,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
) ([]types.NodeID, error) {
route, err := GetRoute(tx, id)
if err != nil {
@@ -147,7 +148,7 @@ func DisableRoute(tx *gorm.DB,
return nil, err
}
update, err = failoverRouteTx(tx, isConnected, route)
update, err = failoverRouteTx(tx, isLikelyConnected, route)
if err != nil {
return nil, err
}
@@ -182,17 +183,17 @@ func DisableRoute(tx *gorm.DB,
func (hsdb *HSDatabase) DeleteRoute(
id uint64,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
) ([]types.NodeID, error) {
return Write(hsdb.DB, func(tx *gorm.DB) ([]types.NodeID, error) {
return DeleteRoute(tx, id, isConnected)
return DeleteRoute(tx, id, isLikelyConnected)
})
}
func DeleteRoute(
tx *gorm.DB,
id uint64,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
) ([]types.NodeID, error) {
route, err := GetRoute(tx, id)
if err != nil {
@@ -207,7 +208,7 @@ func DeleteRoute(
// https://github.com/juanfont/headscale/issues/804#issuecomment-1399314002
var update []types.NodeID
if !route.IsExitRoute() {
update, err = failoverRouteTx(tx, isConnected, route)
update, err = failoverRouteTx(tx, isLikelyConnected, route)
if err != nil {
return nil, nil
}
@@ -252,7 +253,7 @@ func DeleteRoute(
return update, nil
}
func deleteNodeRoutes(tx *gorm.DB, node *types.Node, isConnected types.NodeConnectedMap) ([]types.NodeID, error) {
func deleteNodeRoutes(tx *gorm.DB, node *types.Node, isLikelyConnected *xsync.MapOf[types.NodeID, bool]) ([]types.NodeID, error) {
routes, err := GetNodeRoutes(tx, node)
if err != nil {
return nil, fmt.Errorf("getting node routes: %w", err)
@@ -266,7 +267,7 @@ func deleteNodeRoutes(tx *gorm.DB, node *types.Node, isConnected types.NodeConne
// TODO(kradalby): This is a bit too aggressive, we could probably
// figure out which routes needs to be failed over rather than all.
chn, err := failoverRouteTx(tx, isConnected, &routes[i])
chn, err := failoverRouteTx(tx, isLikelyConnected, &routes[i])
if err != nil {
return changed, fmt.Errorf("failing over route after delete: %w", err)
}
@@ -409,7 +410,7 @@ func SaveNodeRoutes(tx *gorm.DB, node *types.Node) (bool, error) {
// If needed, the failover will be attempted.
func FailoverNodeRoutesIfNeccessary(
tx *gorm.DB,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
node *types.Node,
) (*types.StateUpdate, error) {
nodeRoutes, err := GetNodeRoutes(tx, node)
@@ -430,12 +431,12 @@ nodeRouteLoop:
if route.IsPrimary {
// if we have a primary route, and the node is connected
// nothing needs to be done.
if conn, ok := isConnected[route.Node.ID]; conn && ok {
if val, ok := isLikelyConnected.Load(route.Node.ID); ok && val {
continue nodeRouteLoop
}
// if not, we need to failover the route
failover := failoverRoute(isConnected, &route, routes)
failover := failoverRoute(isLikelyConnected, &route, routes)
if failover != nil {
err := failover.save(tx)
if err != nil {
@@ -477,7 +478,7 @@ nodeRouteLoop:
// If the given route was not primary, it returns early.
func failoverRouteTx(
tx *gorm.DB,
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
r *types.Route,
) ([]types.NodeID, error) {
if r == nil {
@@ -500,7 +501,7 @@ func failoverRouteTx(
return nil, fmt.Errorf("getting routes by prefix: %w", err)
}
fo := failoverRoute(isConnected, r, routes)
fo := failoverRoute(isLikelyConnected, r, routes)
if fo == nil {
return nil, nil
}
@@ -538,7 +539,7 @@ func (f *failover) save(tx *gorm.DB) error {
}
func failoverRoute(
isConnected types.NodeConnectedMap,
isLikelyConnected *xsync.MapOf[types.NodeID, bool],
routeToReplace *types.Route,
altRoutes types.Routes,
@@ -570,9 +571,11 @@ func failoverRoute(
continue
}
if isConnected != nil && isConnected[route.Node.ID] {
newPrimary = &altRoutes[idx]
break
if isLikelyConnected != nil {
if val, ok := isLikelyConnected.Load(route.Node.ID); ok && val {
newPrimary = &altRoutes[idx]
break
}
}
}

View File

@@ -10,11 +10,22 @@ import (
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/juanfont/headscale/hscontrol/util"
"github.com/puzpuzpuz/xsync/v3"
"gopkg.in/check.v1"
"gorm.io/gorm"
"tailscale.com/tailcfg"
)
var smap = func(m map[types.NodeID]bool) *xsync.MapOf[types.NodeID, bool] {
s := xsync.NewMapOf[types.NodeID, bool]()
for k, v := range m {
s.Store(k, v)
}
return s
}
func (s *Suite) TestGetRoutes(c *check.C) {
user, err := db.CreateUser("test")
c.Assert(err, check.IsNil)
@@ -331,7 +342,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
name string
nodes types.Nodes
routes types.Routes
isConnected []types.NodeConnectedMap
isConnected []map[types.NodeID]bool
want []*types.StateUpdate
wantErr bool
}{
@@ -346,7 +357,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
r(1, 1, ipp("10.0.0.0/24"), true, true),
r(2, 2, ipp("10.0.0.0/24"), true, false),
},
isConnected: []types.NodeConnectedMap{
isConnected: []map[types.NodeID]bool{
// n1 goes down
{
1: false,
@@ -384,7 +395,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
r(1, 1, ipp("10.0.0.0/24"), true, true),
r(2, 2, ipp("10.0.0.0/24"), true, false),
},
isConnected: []types.NodeConnectedMap{
isConnected: []map[types.NodeID]bool{
// n1 up recon = noop
{
1: true,
@@ -428,7 +439,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
r(2, 2, ipp("10.0.0.0/24"), true, false),
r(3, 3, ipp("10.0.0.0/24"), true, false),
},
isConnected: []types.NodeConnectedMap{
isConnected: []map[types.NodeID]bool{
// n1 goes down
{
1: false,
@@ -486,7 +497,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
r(2, 2, ipp("10.0.0.0/24"), false, false),
r(3, 3, ipp("10.0.0.0/24"), true, false),
},
isConnected: []types.NodeConnectedMap{
isConnected: []map[types.NodeID]bool{
// n1 goes down
{
1: false,
@@ -516,7 +527,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
r(2, 2, ipp("10.0.0.0/24"), true, false),
r(3, 3, ipp("10.1.0.0/24"), true, false),
},
isConnected: []types.NodeConnectedMap{
isConnected: []map[types.NodeID]bool{
// n1 goes down
{
1: false,
@@ -539,7 +550,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
r(2, 2, ipp("10.0.0.0/24"), true, false),
r(3, 3, ipp("10.1.0.0/24"), false, false),
},
isConnected: []types.NodeConnectedMap{
isConnected: []map[types.NodeID]bool{
// n1 goes down
{
1: false,
@@ -562,7 +573,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
r(2, 2, ipp("10.0.0.0/24"), true, false),
r(3, 3, ipp("10.1.0.0/24"), true, false),
},
isConnected: []types.NodeConnectedMap{
isConnected: []map[types.NodeID]bool{
// n1 goes down
{
1: false,
@@ -585,7 +596,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
r(2, 2, ipp("10.0.0.0/24"), true, true),
r(3, 3, ipp("10.1.0.0/24"), true, false),
},
isConnected: []types.NodeConnectedMap{
isConnected: []map[types.NodeID]bool{
// n1 goes down
{
1: true,
@@ -618,7 +629,7 @@ func TestFailoverNodeRoutesIfNeccessary(t *testing.T) {
want := tt.want[step]
got, err := Write(db.DB, func(tx *gorm.DB) (*types.StateUpdate, error) {
return FailoverNodeRoutesIfNeccessary(tx, isConnected, node)
return FailoverNodeRoutesIfNeccessary(tx, smap(isConnected), node)
})
if (err != nil) != tt.wantErr {
@@ -640,7 +651,7 @@ func TestFailoverRouteTx(t *testing.T) {
name string
failingRoute types.Route
routes types.Routes
isConnected types.NodeConnectedMap
isConnected map[types.NodeID]bool
want []types.NodeID
wantErr bool
}{
@@ -743,7 +754,7 @@ func TestFailoverRouteTx(t *testing.T) {
Enabled: true,
},
},
isConnected: types.NodeConnectedMap{
isConnected: map[types.NodeID]bool{
1: false,
2: true,
},
@@ -841,7 +852,7 @@ func TestFailoverRouteTx(t *testing.T) {
Enabled: true,
},
},
isConnected: types.NodeConnectedMap{
isConnected: map[types.NodeID]bool{
1: true,
2: true,
3: true,
@@ -889,7 +900,7 @@ func TestFailoverRouteTx(t *testing.T) {
Enabled: true,
},
},
isConnected: types.NodeConnectedMap{
isConnected: map[types.NodeID]bool{
1: true,
4: false,
},
@@ -945,7 +956,7 @@ func TestFailoverRouteTx(t *testing.T) {
Enabled: true,
},
},
isConnected: types.NodeConnectedMap{
isConnected: map[types.NodeID]bool{
1: false,
2: true,
4: false,
@@ -1010,7 +1021,7 @@ func TestFailoverRouteTx(t *testing.T) {
}
got, err := Write(db.DB, func(tx *gorm.DB) ([]types.NodeID, error) {
return failoverRouteTx(tx, tt.isConnected, &tt.failingRoute)
return failoverRouteTx(tx, smap(tt.isConnected), &tt.failingRoute)
})
if (err != nil) != tt.wantErr {
@@ -1048,7 +1059,7 @@ func TestFailoverRoute(t *testing.T) {
name string
failingRoute types.Route
routes types.Routes
isConnected types.NodeConnectedMap
isConnected map[types.NodeID]bool
want *failover
}{
{
@@ -1085,7 +1096,7 @@ func TestFailoverRoute(t *testing.T) {
r(1, 1, ipp("10.0.0.0/24"), true, true),
r(2, 2, ipp("10.0.0.0/24"), true, false),
},
isConnected: types.NodeConnectedMap{
isConnected: map[types.NodeID]bool{
1: false,
2: true,
},
@@ -1111,7 +1122,7 @@ func TestFailoverRoute(t *testing.T) {
r(2, 2, ipp("10.0.0.0/24"), true, true),
r(3, 3, ipp("10.0.0.0/24"), true, false),
},
isConnected: types.NodeConnectedMap{
isConnected: map[types.NodeID]bool{
1: true,
2: true,
3: true,
@@ -1128,7 +1139,7 @@ func TestFailoverRoute(t *testing.T) {
r(1, 1, ipp("10.0.0.0/24"), true, true),
r(2, 4, ipp("10.0.0.0/24"), true, false),
},
isConnected: types.NodeConnectedMap{
isConnected: map[types.NodeID]bool{
1: true,
4: false,
},
@@ -1142,7 +1153,7 @@ func TestFailoverRoute(t *testing.T) {
r(2, 4, ipp("10.0.0.0/24"), true, false),
r(3, 2, ipp("10.0.0.0/24"), true, false),
},
isConnected: types.NodeConnectedMap{
isConnected: map[types.NodeID]bool{
1: false,
2: true,
4: false,
@@ -1172,7 +1183,7 @@ func TestFailoverRoute(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotf := failoverRoute(tt.isConnected, &tt.failingRoute, tt.routes)
gotf := failoverRoute(smap(tt.isConnected), &tt.failingRoute, tt.routes)
if tt.want == nil && gotf != nil {
t.Fatalf("expected nil, got %+v", gotf)

View File

@@ -4,7 +4,6 @@ package hscontrol
import (
"context"
"errors"
"fmt"
"sort"
"strings"
"time"
@@ -145,7 +144,7 @@ func (api headscaleV1APIServer) ExpirePreAuthKey(
ctx context.Context,
request *v1.ExpirePreAuthKeyRequest,
) (*v1.ExpirePreAuthKeyResponse, error) {
err := api.h.db.DB.Transaction(func(tx *gorm.DB) error {
err := api.h.db.Write(func(tx *gorm.DB) error {
preAuthKey, err := db.GetPreAuthKey(tx, request.GetUser(), request.Key)
if err != nil {
return err
@@ -279,13 +278,13 @@ func (api headscaleV1APIServer) SetTags(
func validateTag(tag string) error {
if strings.Index(tag, "tag:") != 0 {
return fmt.Errorf("tag must start with the string 'tag:'")
return errors.New("tag must start with the string 'tag:'")
}
if strings.ToLower(tag) != tag {
return fmt.Errorf("tag should be lowercase")
return errors.New("tag should be lowercase")
}
if len(strings.Fields(tag)) > 1 {
return fmt.Errorf("tag should not contains space")
return errors.New("tag should not contains space")
}
return nil
}
@@ -301,7 +300,7 @@ func (api headscaleV1APIServer) DeleteNode(
changedNodes, err := api.h.db.DeleteNode(
node,
api.h.nodeNotifier.ConnectedMap(),
api.h.nodeNotifier.LikelyConnectedMap(),
)
if err != nil {
return nil, err
@@ -343,7 +342,7 @@ func (api headscaleV1APIServer) ExpireNode(
}
ctx = types.NotifyCtx(ctx, "cli-expirenode-self", node.Hostname)
api.h.nodeNotifier.NotifyByMachineKey(
api.h.nodeNotifier.NotifyByNodeID(
ctx,
types.StateUpdate{
Type: types.StateSelfUpdate,
@@ -401,7 +400,7 @@ func (api headscaleV1APIServer) ListNodes(
ctx context.Context,
request *v1.ListNodesRequest,
) (*v1.ListNodesResponse, error) {
isConnected := api.h.nodeNotifier.ConnectedMap()
isLikelyConnected := api.h.nodeNotifier.LikelyConnectedMap()
if request.GetUser() != "" {
nodes, err := db.Read(api.h.db.DB, func(rx *gorm.DB) (types.Nodes, error) {
return db.ListNodesByUser(rx, request.GetUser())
@@ -416,7 +415,9 @@ func (api headscaleV1APIServer) ListNodes(
// Populate the online field based on
// currently connected nodes.
resp.Online = isConnected[node.ID]
if val, ok := isLikelyConnected.Load(node.ID); ok && val {
resp.Online = true
}
response[index] = resp
}
@@ -439,7 +440,9 @@ func (api headscaleV1APIServer) ListNodes(
// Populate the online field based on
// currently connected nodes.
resp.Online = isConnected[node.ID]
if val, ok := isLikelyConnected.Load(node.ID); ok && val {
resp.Online = true
}
validTags, invalidTags := api.h.ACLPolicy.TagsOfNode(
node,
@@ -528,7 +531,7 @@ func (api headscaleV1APIServer) DisableRoute(
request *v1.DisableRouteRequest,
) (*v1.DisableRouteResponse, error) {
update, err := db.Write(api.h.db.DB, func(tx *gorm.DB) ([]types.NodeID, error) {
return db.DisableRoute(tx, request.GetRouteId(), api.h.nodeNotifier.ConnectedMap())
return db.DisableRoute(tx, request.GetRouteId(), api.h.nodeNotifier.LikelyConnectedMap())
})
if err != nil {
return nil, err
@@ -568,7 +571,7 @@ func (api headscaleV1APIServer) DeleteRoute(
ctx context.Context,
request *v1.DeleteRouteRequest,
) (*v1.DeleteRouteResponse, error) {
isConnected := api.h.nodeNotifier.ConnectedMap()
isConnected := api.h.nodeNotifier.LikelyConnectedMap()
update, err := db.Write(api.h.db.DB, func(tx *gorm.DB) ([]types.NodeID, error) {
return db.DeleteRoute(tx, request.GetRouteId(), isConnected)
})

View File

@@ -17,6 +17,7 @@ import (
mapset "github.com/deckarep/golang-set/v2"
"github.com/juanfont/headscale/hscontrol/db"
"github.com/juanfont/headscale/hscontrol/notifier"
"github.com/juanfont/headscale/hscontrol/policy"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/juanfont/headscale/hscontrol/util"
@@ -51,10 +52,10 @@ var debugDumpMapResponsePath = envknob.String("HEADSCALE_DEBUG_DUMP_MAPRESPONSE_
type Mapper struct {
// Configuration
// TODO(kradalby): figure out if this is the format we want this in
db *db.HSDatabase
cfg *types.Config
derpMap *tailcfg.DERPMap
isLikelyConnected types.NodeConnectedMap
db *db.HSDatabase
cfg *types.Config
derpMap *tailcfg.DERPMap
notif *notifier.Notifier
uid string
created time.Time
@@ -70,15 +71,15 @@ func NewMapper(
db *db.HSDatabase,
cfg *types.Config,
derpMap *tailcfg.DERPMap,
isLikelyConnected types.NodeConnectedMap,
notif *notifier.Notifier,
) *Mapper {
uid, _ := util.GenerateRandomStringDNSSafe(mapperIDLength)
return &Mapper{
db: db,
cfg: cfg,
derpMap: derpMap,
isLikelyConnected: isLikelyConnected,
db: db,
cfg: cfg,
derpMap: derpMap,
notif: notif,
uid: uid,
created: time.Now(),
@@ -517,7 +518,7 @@ func (m *Mapper) ListPeers(nodeID types.NodeID) (types.Nodes, error) {
}
for _, peer := range peers {
online := m.isLikelyConnected[peer.ID]
online := m.notif.IsLikelyConnected(peer.ID)
peer.IsOnline = &online
}

View File

@@ -1,6 +1,10 @@
package hscontrol
import (
"net/http"
"strconv"
"github.com/gorilla/mux"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
@@ -8,18 +12,94 @@ import (
const prometheusNamespace = "headscale"
var (
// This is a high cardinality metric (user x node), we might want to make this
// configurable/opt-in in the future.
nodeRegistrations = promauto.NewCounterVec(prometheus.CounterOpts{
mapResponseSent = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "node_registrations_total",
Help: "The total amount of registered node attempts",
}, []string{"action", "auth", "status", "user"})
updateRequestsSentToNode = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "mapresponse_sent_total",
Help: "total count of mapresponses sent to clients",
}, []string{"status", "type"})
mapResponseUpdateReceived = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "update_request_sent_to_node_total",
Help: "The number of calls/messages issued on a specific nodes update channel",
}, []string{"user", "node", "status"})
// TODO(kradalby): This is very debugging, we might want to remove it.
Name: "mapresponse_updates_received_total",
Help: "total count of mapresponse updates received on update channel",
}, []string{"type"})
mapResponseWriteUpdatesInStream = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "mapresponse_write_updates_in_stream_total",
Help: "total count of writes that occured in a stream session, pre-68 nodes",
}, []string{"status"})
mapResponseEndpointUpdates = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "mapresponse_endpoint_updates_total",
Help: "total count of endpoint updates received",
}, []string{"status"})
mapResponseReadOnly = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "mapresponse_readonly_requests_total",
Help: "total count of readonly requests received",
}, []string{"status"})
mapResponseSessions = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: prometheusNamespace,
Name: "mapresponse_current_sessions_total",
Help: "total count open map response sessions",
})
mapResponseRejected = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "mapresponse_rejected_new_sessions_total",
Help: "total count of new mapsessions rejected",
}, []string{"reason"})
httpDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: prometheusNamespace,
Name: "http_duration_seconds",
Help: "Duration of HTTP requests.",
}, []string{"path"})
httpCounter = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "http_requests_total",
Help: "Total number of http requests processed",
}, []string{"code", "method", "path"},
)
)
// prometheusMiddleware implements mux.MiddlewareFunc.
func prometheusMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
route := mux.CurrentRoute(r)
path, _ := route.GetPathTemplate()
// Ignore streaming and noise sessions
// it has its own router further down.
if path == "/ts2021" || path == "/machine/map" || path == "/derp" || path == "/derp/probe" || path == "/bootstrap-dns" {
next.ServeHTTP(w, r)
return
}
rw := &respWriterProm{ResponseWriter: w}
timer := prometheus.NewTimer(httpDuration.WithLabelValues(path))
next.ServeHTTP(rw, r)
timer.ObserveDuration()
httpCounter.WithLabelValues(strconv.Itoa(rw.status), r.Method, path).Inc()
})
}
type respWriterProm struct {
http.ResponseWriter
status int
written int64
wroteHeader bool
}
func (r *respWriterProm) WriteHeader(code int) {
r.status = code
r.wroteHeader = true
r.ResponseWriter.WriteHeader(code)
}
func (r *respWriterProm) Write(b []byte) (int, error) {
if !r.wroteHeader {
r.WriteHeader(http.StatusOK)
}
n, err := r.ResponseWriter.Write(b)
r.written += int64(n)
return n, err
}

View File

@@ -95,6 +95,7 @@ func (h *Headscale) NoiseUpgradeHandler(
// The HTTP2 server that exposes this router is created for
// a single hijacked connection from /ts2021, using netutil.NewOneConnListener
router := mux.NewRouter()
router.Use(prometheusMiddleware)
router.HandleFunc("/machine/register", noiseServer.NoiseRegistrationHandler).
Methods(http.MethodPost)
@@ -225,7 +226,6 @@ func (ns *noiseServer) NoisePollNetMapHandler(
if err != nil {
log.Error().
Str("handler", "NoisePollNetMap").
Uint64("node.id", node.ID.Uint64()).
Msgf("Failed to fetch node from the database with node key: %s", mapRequest.NodeKey.String())
http.Error(writer, "Internal error", http.StatusInternalServerError)
@@ -267,10 +267,12 @@ func (ns *noiseServer) NoisePollNetMapHandler(
defer ns.headscale.mapSessionMu.Unlock()
sess.infof("node has an open stream(%p), rejecting new stream", sess)
mapResponseRejected.WithLabelValues("exists").Inc()
return
}
ns.headscale.mapSessions[node.ID] = sess
mapResponseSessions.Inc()
ns.headscale.mapSessionMu.Unlock()
sess.tracef("releasing lock to check stream")
}
@@ -283,6 +285,7 @@ func (ns *noiseServer) NoisePollNetMapHandler(
defer ns.headscale.mapSessionMu.Unlock()
delete(ns.headscale.mapSessions, node.ID)
mapResponseSessions.Dec()
sess.tracef("releasing lock to remove stream")
}

View File

@@ -0,0 +1,32 @@
package notifier
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
const prometheusNamespace = "headscale"
var (
notifierWaitForLock = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: prometheusNamespace,
Name: "notifier_wait_for_lock_seconds",
Help: "histogram of time spent waiting for the notifier lock",
Buckets: []float64{0.001, 0.01, 0.1, 0.3, 0.5, 1, 3, 5, 10},
}, []string{"action"})
notifierUpdateSent = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "notifier_update_sent_total",
Help: "total count of update sent on nodes channel",
}, []string{"status", "type", "trigger"})
notifierUpdateReceived = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: prometheusNamespace,
Name: "notifier_update_received_total",
Help: "total count of updates received by notifier",
}, []string{"type", "trigger"})
notifierNodeUpdateChans = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: prometheusNamespace,
Name: "notifier_open_channels_total",
Help: "total count open channels in notifier",
})
)

View File

@@ -3,25 +3,35 @@ package notifier
import (
"context"
"fmt"
"slices"
"sort"
"strings"
"sync"
"time"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/puzpuzpuz/xsync/v3"
"github.com/rs/zerolog/log"
"tailscale.com/tailcfg"
"tailscale.com/util/set"
)
type Notifier struct {
l sync.RWMutex
nodes map[types.NodeID]chan<- types.StateUpdate
connected types.NodeConnectedMap
connected *xsync.MapOf[types.NodeID, bool]
b *batcher
}
func NewNotifier() *Notifier {
return &Notifier{
func NewNotifier(cfg *types.Config) *Notifier {
n := &Notifier{
nodes: make(map[types.NodeID]chan<- types.StateUpdate),
connected: make(types.NodeConnectedMap),
connected: xsync.NewMapOf[types.NodeID, bool](),
}
b := newBatcher(cfg.Tuning.BatchChangeDelay, n)
n.b = b
// TODO(kradalby): clean this up
go b.doWork()
return n
}
func (n *Notifier) AddNode(nodeID types.NodeID, c chan<- types.StateUpdate) {
@@ -31,16 +41,19 @@ func (n *Notifier) AddNode(nodeID types.NodeID, c chan<- types.StateUpdate) {
Uint64("node.id", nodeID.Uint64()).
Msg("releasing lock to add node")
start := time.Now()
n.l.Lock()
defer n.l.Unlock()
notifierWaitForLock.WithLabelValues("add").Observe(time.Since(start).Seconds())
n.nodes[nodeID] = c
n.connected[nodeID] = true
n.connected.Store(nodeID, true)
log.Trace().
Uint64("node.id", nodeID.Uint64()).
Int("open_chans", len(n.nodes)).
Msg("Added new channel")
notifierNodeUpdateChans.Inc()
}
func (n *Notifier) RemoveNode(nodeID types.NodeID) {
@@ -50,20 +63,23 @@ func (n *Notifier) RemoveNode(nodeID types.NodeID) {
Uint64("node.id", nodeID.Uint64()).
Msg("releasing lock to remove node")
start := time.Now()
n.l.Lock()
defer n.l.Unlock()
notifierWaitForLock.WithLabelValues("remove").Observe(time.Since(start).Seconds())
if len(n.nodes) == 0 {
return
}
delete(n.nodes, nodeID)
n.connected[nodeID] = false
n.connected.Store(nodeID, false)
log.Trace().
Uint64("node.id", nodeID.Uint64()).
Int("open_chans", len(n.nodes)).
Msg("Removed channel")
notifierNodeUpdateChans.Dec()
}
// IsConnected reports if a node is connected to headscale and has a
@@ -72,17 +88,22 @@ func (n *Notifier) IsConnected(nodeID types.NodeID) bool {
n.l.RLock()
defer n.l.RUnlock()
return n.connected[nodeID]
if val, ok := n.connected.Load(nodeID); ok {
return val
}
return false
}
// IsLikelyConnected reports if a node is connected to headscale and has a
// poll session open, but doesnt lock, so might be wrong.
func (n *Notifier) IsLikelyConnected(nodeID types.NodeID) bool {
return n.connected[nodeID]
if val, ok := n.connected.Load(nodeID); ok {
return val
}
return false
}
// TODO(kradalby): This returns a pointer and can be dangerous.
func (n *Notifier) ConnectedMap() types.NodeConnectedMap {
func (n *Notifier) LikelyConnectedMap() *xsync.MapOf[types.NodeID, bool] {
return n.connected
}
@@ -95,45 +116,11 @@ func (n *Notifier) NotifyWithIgnore(
update types.StateUpdate,
ignoreNodeIDs ...types.NodeID,
) {
log.Trace().Caller().Str("type", update.Type.String()).Msg("acquiring lock to notify")
defer log.Trace().
Caller().
Str("type", update.Type.String()).
Msg("releasing lock, finished notifying")
n.l.RLock()
defer n.l.RUnlock()
if update.Type == types.StatePeerChangedPatch {
log.Trace().Interface("update", update).Interface("online", n.connected).Msg("PATCH UPDATE SENT")
}
for nodeID, c := range n.nodes {
if slices.Contains(ignoreNodeIDs, nodeID) {
continue
}
select {
case <-ctx.Done():
log.Error().
Err(ctx.Err()).
Uint64("node.id", nodeID.Uint64()).
Any("origin", ctx.Value("origin")).
Any("origin-hostname", ctx.Value("hostname")).
Msgf("update not sent, context cancelled")
return
case c <- update:
log.Trace().
Uint64("node.id", nodeID.Uint64()).
Any("origin", ctx.Value("origin")).
Any("origin-hostname", ctx.Value("hostname")).
Msgf("update successfully sent on chan")
}
}
notifierUpdateReceived.WithLabelValues(update.Type.String(), types.NotifyOriginKey.Value(ctx)).Inc()
n.b.addOrPassthrough(update)
}
func (n *Notifier) NotifyByMachineKey(
func (n *Notifier) NotifyByNodeID(
ctx context.Context,
update types.StateUpdate,
nodeID types.NodeID,
@@ -144,8 +131,10 @@ func (n *Notifier) NotifyByMachineKey(
Str("type", update.Type.String()).
Msg("releasing lock, finished notifying")
start := time.Now()
n.l.RLock()
defer n.l.RUnlock()
notifierWaitForLock.WithLabelValues("notify").Observe(time.Since(start).Seconds())
if c, ok := n.nodes[nodeID]; ok {
select {
@@ -153,9 +142,10 @@ func (n *Notifier) NotifyByMachineKey(
log.Error().
Err(ctx.Err()).
Uint64("node.id", nodeID.Uint64()).
Any("origin", ctx.Value("origin")).
Any("origin-hostname", ctx.Value("hostname")).
Any("origin", types.NotifyOriginKey.Value(ctx)).
Any("origin-hostname", types.NotifyHostnameKey.Value(ctx)).
Msgf("update not sent, context cancelled")
notifierUpdateSent.WithLabelValues("cancelled", update.Type.String(), types.NotifyOriginKey.Value(ctx)).Inc()
return
case c <- update:
@@ -164,10 +154,23 @@ func (n *Notifier) NotifyByMachineKey(
Any("origin", ctx.Value("origin")).
Any("origin-hostname", ctx.Value("hostname")).
Msgf("update successfully sent on chan")
notifierUpdateSent.WithLabelValues("ok", update.Type.String(), types.NotifyOriginKey.Value(ctx)).Inc()
}
}
}
func (n *Notifier) sendAll(update types.StateUpdate) {
start := time.Now()
n.l.RLock()
defer n.l.RUnlock()
notifierWaitForLock.WithLabelValues("send-all").Observe(time.Since(start).Seconds())
for _, c := range n.nodes {
c <- update
notifierUpdateSent.WithLabelValues("ok", update.Type.String(), "send-all").Inc()
}
}
func (n *Notifier) String() string {
n.l.RLock()
defer n.l.RUnlock()
@@ -182,9 +185,173 @@ func (n *Notifier) String() string {
b.WriteString("\n")
b.WriteString("connected:\n")
for k, v := range n.connected {
n.connected.Range(func(k types.NodeID, v bool) bool {
fmt.Fprintf(&b, "\t%d: %t\n", k, v)
}
return true
})
return b.String()
}
type batcher struct {
tick *time.Ticker
mu sync.Mutex
cancelCh chan struct{}
changedNodeIDs set.Slice[types.NodeID]
nodesChanged bool
patches map[types.NodeID]tailcfg.PeerChange
patchesChanged bool
n *Notifier
}
func newBatcher(batchTime time.Duration, n *Notifier) *batcher {
return &batcher{
tick: time.NewTicker(batchTime),
cancelCh: make(chan struct{}),
patches: make(map[types.NodeID]tailcfg.PeerChange),
n: n,
}
}
func (b *batcher) close() {
b.cancelCh <- struct{}{}
}
// addOrPassthrough adds the update to the batcher, if it is not a
// type that is currently batched, it will be sent immediately.
func (b *batcher) addOrPassthrough(update types.StateUpdate) {
b.mu.Lock()
defer b.mu.Unlock()
switch update.Type {
case types.StatePeerChanged:
b.changedNodeIDs.Add(update.ChangeNodes...)
b.nodesChanged = true
case types.StatePeerChangedPatch:
for _, newPatch := range update.ChangePatches {
if curr, ok := b.patches[types.NodeID(newPatch.NodeID)]; ok {
overwritePatch(&curr, newPatch)
b.patches[types.NodeID(newPatch.NodeID)] = curr
} else {
b.patches[types.NodeID(newPatch.NodeID)] = *newPatch
}
}
b.patchesChanged = true
default:
b.n.sendAll(update)
}
}
// flush sends all the accumulated patches to all
// nodes in the notifier.
func (b *batcher) flush() {
b.mu.Lock()
defer b.mu.Unlock()
if b.nodesChanged || b.patchesChanged {
var patches []*tailcfg.PeerChange
// If a node is getting a full update from a change
// node update, then the patch can be dropped.
for nodeID, patch := range b.patches {
if b.changedNodeIDs.Contains(nodeID) {
delete(b.patches, nodeID)
} else {
patches = append(patches, &patch)
}
}
changedNodes := b.changedNodeIDs.Slice().AsSlice()
sort.Slice(changedNodes, func(i, j int) bool {
return changedNodes[i] < changedNodes[j]
})
if b.changedNodeIDs.Slice().Len() > 0 {
update := types.StateUpdate{
Type: types.StatePeerChanged,
ChangeNodes: changedNodes,
}
b.n.sendAll(update)
}
if len(patches) > 0 {
patchUpdate := types.StateUpdate{
Type: types.StatePeerChangedPatch,
ChangePatches: patches,
}
b.n.sendAll(patchUpdate)
}
b.changedNodeIDs = set.Slice[types.NodeID]{}
b.nodesChanged = false
b.patches = make(map[types.NodeID]tailcfg.PeerChange, len(b.patches))
b.patchesChanged = false
}
}
func (b *batcher) doWork() {
for {
select {
case <-b.cancelCh:
return
case <-b.tick.C:
b.flush()
}
}
}
// overwritePatch takes the current patch and a newer patch
// and override any field that has changed
func overwritePatch(currPatch, newPatch *tailcfg.PeerChange) {
if newPatch.DERPRegion != 0 {
currPatch.DERPRegion = newPatch.DERPRegion
}
if newPatch.Cap != 0 {
currPatch.Cap = newPatch.Cap
}
if newPatch.CapMap != nil {
currPatch.CapMap = newPatch.CapMap
}
if newPatch.Endpoints != nil {
currPatch.Endpoints = newPatch.Endpoints
}
if newPatch.Key != nil {
currPatch.Key = newPatch.Key
}
if newPatch.KeySignature != nil {
currPatch.KeySignature = newPatch.KeySignature
}
if newPatch.DiscoKey != nil {
currPatch.DiscoKey = newPatch.DiscoKey
}
if newPatch.Online != nil {
currPatch.Online = newPatch.Online
}
if newPatch.LastSeen != nil {
currPatch.LastSeen = newPatch.LastSeen
}
if newPatch.KeyExpiry != nil {
currPatch.KeyExpiry = newPatch.KeyExpiry
}
if newPatch.Capabilities != nil {
currPatch.Capabilities = newPatch.Capabilities
}
}

View File

@@ -0,0 +1,249 @@
package notifier
import (
"context"
"net/netip"
"testing"
"time"
"github.com/google/go-cmp/cmp"
"github.com/juanfont/headscale/hscontrol/types"
"github.com/juanfont/headscale/hscontrol/util"
"tailscale.com/tailcfg"
)
func TestBatcher(t *testing.T) {
tests := []struct {
name string
updates []types.StateUpdate
want []types.StateUpdate
}{
{
name: "full-passthrough",
updates: []types.StateUpdate{
{
Type: types.StateFullUpdate,
},
},
want: []types.StateUpdate{
{
Type: types.StateFullUpdate,
},
},
},
{
name: "derp-passthrough",
updates: []types.StateUpdate{
{
Type: types.StateDERPUpdated,
},
},
want: []types.StateUpdate{
{
Type: types.StateDERPUpdated,
},
},
},
{
name: "single-node-update",
updates: []types.StateUpdate{
{
Type: types.StatePeerChanged,
ChangeNodes: []types.NodeID{
2,
},
},
},
want: []types.StateUpdate{
{
Type: types.StatePeerChanged,
ChangeNodes: []types.NodeID{
2,
},
},
},
},
{
name: "merge-node-update",
updates: []types.StateUpdate{
{
Type: types.StatePeerChanged,
ChangeNodes: []types.NodeID{
2, 4,
},
},
{
Type: types.StatePeerChanged,
ChangeNodes: []types.NodeID{
2, 3,
},
},
},
want: []types.StateUpdate{
{
Type: types.StatePeerChanged,
ChangeNodes: []types.NodeID{
2, 3, 4,
},
},
},
},
{
name: "single-patch-update",
updates: []types.StateUpdate{
{
Type: types.StatePeerChangedPatch,
ChangePatches: []*tailcfg.PeerChange{
{
NodeID: 2,
DERPRegion: 5,
},
},
},
},
want: []types.StateUpdate{
{
Type: types.StatePeerChangedPatch,
ChangePatches: []*tailcfg.PeerChange{
{
NodeID: 2,
DERPRegion: 5,
},
},
},
},
},
{
name: "merge-patch-to-same-node-update",
updates: []types.StateUpdate{
{
Type: types.StatePeerChangedPatch,
ChangePatches: []*tailcfg.PeerChange{
{
NodeID: 2,
DERPRegion: 5,
},
},
},
{
Type: types.StatePeerChangedPatch,
ChangePatches: []*tailcfg.PeerChange{
{
NodeID: 2,
DERPRegion: 6,
},
},
},
},
want: []types.StateUpdate{
{
Type: types.StatePeerChangedPatch,
ChangePatches: []*tailcfg.PeerChange{
{
NodeID: 2,
DERPRegion: 6,
},
},
},
},
},
{
name: "merge-patch-to-multiple-node-update",
updates: []types.StateUpdate{
{
Type: types.StatePeerChangedPatch,
ChangePatches: []*tailcfg.PeerChange{
{
NodeID: 3,
Endpoints: []netip.AddrPort{
netip.MustParseAddrPort("1.1.1.1:9090"),
},
},
},
},
{
Type: types.StatePeerChangedPatch,
ChangePatches: []*tailcfg.PeerChange{
{
NodeID: 3,
Endpoints: []netip.AddrPort{
netip.MustParseAddrPort("1.1.1.1:9090"),
netip.MustParseAddrPort("2.2.2.2:8080"),
},
},
},
},
{
Type: types.StatePeerChangedPatch,
ChangePatches: []*tailcfg.PeerChange{
{
NodeID: 4,
DERPRegion: 6,
},
},
},
{
Type: types.StatePeerChangedPatch,
ChangePatches: []*tailcfg.PeerChange{
{
NodeID: 4,
Cap: tailcfg.CapabilityVersion(54),
},
},
},
},
want: []types.StateUpdate{
{
Type: types.StatePeerChangedPatch,
ChangePatches: []*tailcfg.PeerChange{
{
NodeID: 3,
Endpoints: []netip.AddrPort{
netip.MustParseAddrPort("1.1.1.1:9090"),
netip.MustParseAddrPort("2.2.2.2:8080"),
},
},
{
NodeID: 4,
DERPRegion: 6,
Cap: tailcfg.CapabilityVersion(54),
},
},
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
n := NewNotifier(&types.Config{
Tuning: types.Tuning{
// We will call flush manually for the tests,
// so do not run the worker.
BatchChangeDelay: time.Hour,
},
})
ch := make(chan types.StateUpdate, 30)
defer close(ch)
n.AddNode(1, ch)
defer n.RemoveNode(1)
for _, u := range tt.updates {
n.NotifyAll(context.Background(), u)
}
n.b.flush()
var got []types.StateUpdate
for len(ch) > 0 {
out := <-ch
got = append(got, out)
}
if diff := cmp.Diff(tt.want, got, util.Comparers...); diff != "" {
t.Errorf("batcher() unexpected result (-want +got):\n%s", diff)
}
})
}
}

View File

@@ -602,7 +602,7 @@ func (h *Headscale) registerNodeForOIDCCallback(
return err
}
if err := h.db.DB.Transaction(func(tx *gorm.DB) error {
if err := h.db.Write(func(tx *gorm.DB) error {
if _, err := db.RegisterNodeFromAuthCallback(
// TODO(kradalby): find a better way to use the cache across modules
tx,

View File

@@ -36,6 +36,38 @@ const (
expectedTokenItems = 2
)
var theInternetSet *netipx.IPSet
// theInternet returns the IPSet for the Internet.
// https://www.youtube.com/watch?v=iDbyYGrswtg
func theInternet() *netipx.IPSet {
if theInternetSet != nil {
return theInternetSet
}
var internetBuilder netipx.IPSetBuilder
internetBuilder.AddPrefix(netip.MustParsePrefix("2000::/3"))
internetBuilder.AddPrefix(netip.MustParsePrefix("0.0.0.0/0"))
// Delete Private network addresses
// https://datatracker.ietf.org/doc/html/rfc1918
internetBuilder.RemovePrefix(netip.MustParsePrefix("fc00::/7"))
internetBuilder.RemovePrefix(netip.MustParsePrefix("10.0.0.0/8"))
internetBuilder.RemovePrefix(netip.MustParsePrefix("172.16.0.0/12"))
internetBuilder.RemovePrefix(netip.MustParsePrefix("192.168.0.0/16"))
// Delete Tailscale networks
internetBuilder.RemovePrefix(netip.MustParsePrefix("fd7a:115c:a1e0::/48"))
internetBuilder.RemovePrefix(netip.MustParsePrefix("100.64.0.0/10"))
// Delete "cant find DHCP networks"
internetBuilder.RemovePrefix(netip.MustParsePrefix("fe80::/10")) // link-loca
internetBuilder.RemovePrefix(netip.MustParsePrefix("169.254.0.0/16"))
theInternetSet, _ := internetBuilder.IPSet()
return theInternetSet
}
// For some reason golang.org/x/net/internal/iana is an internal package.
const (
protocolICMP = 1 // Internet Control Message
@@ -221,28 +253,28 @@ func ReduceFilterRules(node *types.Node, rules []tailcfg.FilterRule) []tailcfg.F
// record if the rule is actually relevant for the given node.
dests := []tailcfg.NetPortRange{}
DEST_LOOP:
for _, dest := range rule.DstPorts {
expanded, err := util.ParseIPSet(dest.IP, nil)
// Fail closed, if we cant parse it, then we should not allow
// access.
if err != nil {
continue
continue DEST_LOOP
}
if node.InIPSet(expanded) {
dests = append(dests, dest)
continue DEST_LOOP
}
// If the node exposes routes, ensure they are note removed
// when the filters are reduced.
if node.Hostinfo != nil {
// TODO(kradalby): Evaluate if we should only keep
// the routes if the route is enabled. This will
// require database access in this part of the code.
if len(node.Hostinfo.RoutableIPs) > 0 {
for _, routableIP := range node.Hostinfo.RoutableIPs {
if expanded.ContainsPrefix(routableIP) {
if expanded.OverlapsPrefix(routableIP) {
dests = append(dests, dest)
continue DEST_LOOP
}
}
}
@@ -517,6 +549,7 @@ func (pol *ACLPolicy) expandSource(
// - a host
// - an ip
// - a cidr
// - an autogroup
// and transform these in IPAddresses.
func (pol *ACLPolicy) ExpandAlias(
nodes types.Nodes,
@@ -542,6 +575,10 @@ func (pol *ACLPolicy) ExpandAlias(
return pol.expandIPsFromTag(alias, nodes)
}
if isAutoGroup(alias) {
return expandAutoGroup(alias)
}
// if alias is a user
if ips, err := pol.expandIPsFromUser(alias, nodes); ips != nil {
return ips, err
@@ -862,6 +899,16 @@ func (pol *ACLPolicy) expandIPsFromIPPrefix(
return build.IPSet()
}
func expandAutoGroup(alias string) (*netipx.IPSet, error) {
switch {
case strings.HasPrefix(alias, "autogroup:internet"):
return theInternet(), nil
default:
return nil, fmt.Errorf("unknown autogroup %q", alias)
}
}
func isWildcard(str string) bool {
return str == "*"
}
@@ -874,6 +921,10 @@ func isTag(str string) bool {
return strings.HasPrefix(str, "tag:")
}
func isAutoGroup(str string) bool {
return strings.HasPrefix(str, "autogroup:")
}
// TagsOfNode will return the tags of the current node.
// Invalid tags are tags added by a user on a node, and that user doesn't have authority to add this tag.
// Valid tags are tags added by a user that is allowed in the ACL policy to add this tag.

View File

@@ -1765,6 +1765,108 @@ func TestACLPolicy_generateFilterRules(t *testing.T) {
}
}
// tsExitNodeDest is the list of destination IP ranges that are allowed when
// you dump the filter list from a Tailscale node connected to Tailscale SaaS.
var tsExitNodeDest = []tailcfg.NetPortRange{
{
IP: "0.0.0.0-9.255.255.255",
Ports: tailcfg.PortRangeAny,
},
{
IP: "11.0.0.0-100.63.255.255",
Ports: tailcfg.PortRangeAny,
},
{
IP: "100.128.0.0-169.253.255.255",
Ports: tailcfg.PortRangeAny,
},
{
IP: "169.255.0.0-172.15.255.255",
Ports: tailcfg.PortRangeAny,
},
{
IP: "172.32.0.0-192.167.255.255",
Ports: tailcfg.PortRangeAny,
},
{
IP: "192.169.0.0-255.255.255.255",
Ports: tailcfg.PortRangeAny,
},
{
IP: "2000::-3fff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
Ports: tailcfg.PortRangeAny,
},
}
// hsExitNodeDest is the list of destination IP ranges that are allowed when
// we use headscale "autogroup:internet"
var hsExitNodeDest = []tailcfg.NetPortRange{
{IP: "0.0.0.0/5", Ports: tailcfg.PortRangeAny},
{IP: "8.0.0.0/7", Ports: tailcfg.PortRangeAny},
{IP: "11.0.0.0/8", Ports: tailcfg.PortRangeAny},
{IP: "12.0.0.0/6", Ports: tailcfg.PortRangeAny},
{IP: "16.0.0.0/4", Ports: tailcfg.PortRangeAny},
{IP: "32.0.0.0/3", Ports: tailcfg.PortRangeAny},
{IP: "64.0.0.0/3", Ports: tailcfg.PortRangeAny},
{IP: "96.0.0.0/6", Ports: tailcfg.PortRangeAny},
{IP: "100.0.0.0/10", Ports: tailcfg.PortRangeAny},
{IP: "100.128.0.0/9", Ports: tailcfg.PortRangeAny},
{IP: "101.0.0.0/8", Ports: tailcfg.PortRangeAny},
{IP: "102.0.0.0/7", Ports: tailcfg.PortRangeAny},
{IP: "104.0.0.0/5", Ports: tailcfg.PortRangeAny},
{IP: "112.0.0.0/4", Ports: tailcfg.PortRangeAny},
{IP: "128.0.0.0/3", Ports: tailcfg.PortRangeAny},
{IP: "160.0.0.0/5", Ports: tailcfg.PortRangeAny},
{IP: "168.0.0.0/8", Ports: tailcfg.PortRangeAny},
{IP: "169.0.0.0/9", Ports: tailcfg.PortRangeAny},
{IP: "169.128.0.0/10", Ports: tailcfg.PortRangeAny},
{IP: "169.192.0.0/11", Ports: tailcfg.PortRangeAny},
{IP: "169.224.0.0/12", Ports: tailcfg.PortRangeAny},
{IP: "169.240.0.0/13", Ports: tailcfg.PortRangeAny},
{IP: "169.248.0.0/14", Ports: tailcfg.PortRangeAny},
{IP: "169.252.0.0/15", Ports: tailcfg.PortRangeAny},
{IP: "169.255.0.0/16", Ports: tailcfg.PortRangeAny},
{IP: "170.0.0.0/7", Ports: tailcfg.PortRangeAny},
{IP: "172.0.0.0/12", Ports: tailcfg.PortRangeAny},
{IP: "172.32.0.0/11", Ports: tailcfg.PortRangeAny},
{IP: "172.64.0.0/10", Ports: tailcfg.PortRangeAny},
{IP: "172.128.0.0/9", Ports: tailcfg.PortRangeAny},
{IP: "173.0.0.0/8", Ports: tailcfg.PortRangeAny},
{IP: "174.0.0.0/7", Ports: tailcfg.PortRangeAny},
{IP: "176.0.0.0/4", Ports: tailcfg.PortRangeAny},
{IP: "192.0.0.0/9", Ports: tailcfg.PortRangeAny},
{IP: "192.128.0.0/11", Ports: tailcfg.PortRangeAny},
{IP: "192.160.0.0/13", Ports: tailcfg.PortRangeAny},
{IP: "192.169.0.0/16", Ports: tailcfg.PortRangeAny},
{IP: "192.170.0.0/15", Ports: tailcfg.PortRangeAny},
{IP: "192.172.0.0/14", Ports: tailcfg.PortRangeAny},
{IP: "192.176.0.0/12", Ports: tailcfg.PortRangeAny},
{IP: "192.192.0.0/10", Ports: tailcfg.PortRangeAny},
{IP: "193.0.0.0/8", Ports: tailcfg.PortRangeAny},
{IP: "194.0.0.0/7", Ports: tailcfg.PortRangeAny},
{IP: "196.0.0.0/6", Ports: tailcfg.PortRangeAny},
{IP: "200.0.0.0/5", Ports: tailcfg.PortRangeAny},
{IP: "208.0.0.0/4", Ports: tailcfg.PortRangeAny},
{IP: "224.0.0.0/3", Ports: tailcfg.PortRangeAny},
{IP: "2000::/3", Ports: tailcfg.PortRangeAny},
}
func TestTheInternet(t *testing.T) {
internetSet := theInternet()
internetPrefs := internetSet.Prefixes()
for i, _ := range internetPrefs {
if internetPrefs[i].String() != hsExitNodeDest[i].IP {
t.Errorf("prefix from internet set %q != hsExit list %q", internetPrefs[i].String(), hsExitNodeDest[i].IP)
}
}
if len(internetPrefs) != len(hsExitNodeDest) {
t.Fatalf("expected same length of prefixes, internet: %d, hsExit: %d", len(internetPrefs), len(hsExitNodeDest))
}
}
func TestReduceFilterRules(t *testing.T) {
tests := []struct {
name string
@@ -1869,15 +1971,473 @@ func TestReduceFilterRules(t *testing.T) {
},
},
},
{
name: "1786-reducing-breaks-exit-nodes-the-client",
pol: ACLPolicy{
Hosts: Hosts{
// Exit node
"internal": netip.MustParsePrefix("100.64.0.100/32"),
},
Groups: Groups{
"group:team": {"user3", "user2", "user1"},
},
ACLs: []ACL{
{
Action: "accept",
Sources: []string{"group:team"},
Destinations: []string{
"internal:*",
},
},
{
Action: "accept",
Sources: []string{"group:team"},
Destinations: []string{
"autogroup:internet:*",
},
},
},
},
node: &types.Node{
IPv4: iap("100.64.0.1"),
IPv6: iap("fd7a:115c:a1e0::1"),
User: types.User{Name: "user1"},
},
peers: types.Nodes{
&types.Node{
IPv4: iap("100.64.0.2"),
IPv6: iap("fd7a:115c:a1e0::2"),
User: types.User{Name: "user2"},
},
// "internal" exit node
&types.Node{
IPv4: iap("100.64.0.100"),
IPv6: iap("fd7a:115c:a1e0::100"),
User: types.User{Name: "user100"},
Hostinfo: &tailcfg.Hostinfo{
RoutableIPs: []netip.Prefix{types.ExitRouteV4, types.ExitRouteV6},
},
},
},
want: []tailcfg.FilterRule{},
},
{
name: "1786-reducing-breaks-exit-nodes-the-exit",
pol: ACLPolicy{
Hosts: Hosts{
// Exit node
"internal": netip.MustParsePrefix("100.64.0.100/32"),
},
Groups: Groups{
"group:team": {"user3", "user2", "user1"},
},
ACLs: []ACL{
{
Action: "accept",
Sources: []string{"group:team"},
Destinations: []string{
"internal:*",
},
},
{
Action: "accept",
Sources: []string{"group:team"},
Destinations: []string{
"autogroup:internet:*",
},
},
},
},
node: &types.Node{
IPv4: iap("100.64.0.100"),
IPv6: iap("fd7a:115c:a1e0::100"),
User: types.User{Name: "user100"},
Hostinfo: &tailcfg.Hostinfo{
RoutableIPs: []netip.Prefix{types.ExitRouteV4, types.ExitRouteV6},
},
},
peers: types.Nodes{
&types.Node{
IPv4: iap("100.64.0.2"),
IPv6: iap("fd7a:115c:a1e0::2"),
User: types.User{Name: "user2"},
},
&types.Node{
IPv4: iap("100.64.0.1"),
IPv6: iap("fd7a:115c:a1e0::1"),
User: types.User{Name: "user1"},
},
},
want: []tailcfg.FilterRule{
{
SrcIPs: []string{"100.64.0.1/32", "100.64.0.2/32", "fd7a:115c:a1e0::1/128", "fd7a:115c:a1e0::2/128"},
DstPorts: []tailcfg.NetPortRange{
{
IP: "100.64.0.100/32",
Ports: tailcfg.PortRangeAny,
},
{
IP: "fd7a:115c:a1e0::100/128",
Ports: tailcfg.PortRangeAny,
},
},
},
{
SrcIPs: []string{"100.64.0.1/32", "100.64.0.2/32", "fd7a:115c:a1e0::1/128", "fd7a:115c:a1e0::2/128"},
DstPorts: hsExitNodeDest,
},
},
},
{
name: "1786-reducing-breaks-exit-nodes-the-example-from-issue",
pol: ACLPolicy{
Hosts: Hosts{
// Exit node
"internal": netip.MustParsePrefix("100.64.0.100/32"),
},
Groups: Groups{
"group:team": {"user3", "user2", "user1"},
},
ACLs: []ACL{
{
Action: "accept",
Sources: []string{"group:team"},
Destinations: []string{
"internal:*",
},
},
{
Action: "accept",
Sources: []string{"group:team"},
Destinations: []string{
"0.0.0.0/5:*",
"8.0.0.0/7:*",
"11.0.0.0/8:*",
"12.0.0.0/6:*",
"16.0.0.0/4:*",
"32.0.0.0/3:*",
"64.0.0.0/2:*",
"128.0.0.0/3:*",
"160.0.0.0/5:*",
"168.0.0.0/6:*",
"172.0.0.0/12:*",
"172.32.0.0/11:*",
"172.64.0.0/10:*",
"172.128.0.0/9:*",
"173.0.0.0/8:*",
"174.0.0.0/7:*",
"176.0.0.0/4:*",
"192.0.0.0/9:*",
"192.128.0.0/11:*",
"192.160.0.0/13:*",
"192.169.0.0/16:*",
"192.170.0.0/15:*",
"192.172.0.0/14:*",
"192.176.0.0/12:*",
"192.192.0.0/10:*",
"193.0.0.0/8:*",
"194.0.0.0/7:*",
"196.0.0.0/6:*",
"200.0.0.0/5:*",
"208.0.0.0/4:*",
},
},
},
},
node: &types.Node{
IPv4: iap("100.64.0.100"),
IPv6: iap("fd7a:115c:a1e0::100"),
User: types.User{Name: "user100"},
Hostinfo: &tailcfg.Hostinfo{
RoutableIPs: []netip.Prefix{types.ExitRouteV4, types.ExitRouteV6},
},
},
peers: types.Nodes{
&types.Node{
IPv4: iap("100.64.0.2"),
IPv6: iap("fd7a:115c:a1e0::2"),
User: types.User{Name: "user2"},
},
&types.Node{
IPv4: iap("100.64.0.1"),
IPv6: iap("fd7a:115c:a1e0::1"),
User: types.User{Name: "user1"},
},
},
want: []tailcfg.FilterRule{
{
SrcIPs: []string{"100.64.0.1/32", "100.64.0.2/32", "fd7a:115c:a1e0::1/128", "fd7a:115c:a1e0::2/128"},
DstPorts: []tailcfg.NetPortRange{
{
IP: "100.64.0.100/32",
Ports: tailcfg.PortRangeAny,
},
{
IP: "fd7a:115c:a1e0::100/128",
Ports: tailcfg.PortRangeAny,
},
},
},
{
SrcIPs: []string{"100.64.0.1/32", "100.64.0.2/32", "fd7a:115c:a1e0::1/128", "fd7a:115c:a1e0::2/128"},
DstPorts: []tailcfg.NetPortRange{
{IP: "0.0.0.0/5", Ports: tailcfg.PortRangeAny},
{IP: "8.0.0.0/7", Ports: tailcfg.PortRangeAny},
{IP: "11.0.0.0/8", Ports: tailcfg.PortRangeAny},
{IP: "12.0.0.0/6", Ports: tailcfg.PortRangeAny},
{IP: "16.0.0.0/4", Ports: tailcfg.PortRangeAny},
{IP: "32.0.0.0/3", Ports: tailcfg.PortRangeAny},
{IP: "64.0.0.0/2", Ports: tailcfg.PortRangeAny},
{IP: "fd7a:115c:a1e0::1/128", Ports: tailcfg.PortRangeAny},
{IP: "fd7a:115c:a1e0::2/128", Ports: tailcfg.PortRangeAny},
{IP: "fd7a:115c:a1e0::100/128", Ports: tailcfg.PortRangeAny},
{IP: "128.0.0.0/3", Ports: tailcfg.PortRangeAny},
{IP: "160.0.0.0/5", Ports: tailcfg.PortRangeAny},
{IP: "168.0.0.0/6", Ports: tailcfg.PortRangeAny},
{IP: "172.0.0.0/12", Ports: tailcfg.PortRangeAny},
{IP: "172.32.0.0/11", Ports: tailcfg.PortRangeAny},
{IP: "172.64.0.0/10", Ports: tailcfg.PortRangeAny},
{IP: "172.128.0.0/9", Ports: tailcfg.PortRangeAny},
{IP: "173.0.0.0/8", Ports: tailcfg.PortRangeAny},
{IP: "174.0.0.0/7", Ports: tailcfg.PortRangeAny},
{IP: "176.0.0.0/4", Ports: tailcfg.PortRangeAny},
{IP: "192.0.0.0/9", Ports: tailcfg.PortRangeAny},
{IP: "192.128.0.0/11", Ports: tailcfg.PortRangeAny},
{IP: "192.160.0.0/13", Ports: tailcfg.PortRangeAny},
{IP: "192.169.0.0/16", Ports: tailcfg.PortRangeAny},
{IP: "192.170.0.0/15", Ports: tailcfg.PortRangeAny},
{IP: "192.172.0.0/14", Ports: tailcfg.PortRangeAny},
{IP: "192.176.0.0/12", Ports: tailcfg.PortRangeAny},
{IP: "192.192.0.0/10", Ports: tailcfg.PortRangeAny},
{IP: "193.0.0.0/8", Ports: tailcfg.PortRangeAny},
{IP: "194.0.0.0/7", Ports: tailcfg.PortRangeAny},
{IP: "196.0.0.0/6", Ports: tailcfg.PortRangeAny},
{IP: "200.0.0.0/5", Ports: tailcfg.PortRangeAny},
{IP: "208.0.0.0/4", Ports: tailcfg.PortRangeAny},
},
},
},
},
{
name: "1786-reducing-breaks-exit-nodes-app-connector-like",
pol: ACLPolicy{
Hosts: Hosts{
// Exit node
"internal": netip.MustParsePrefix("100.64.0.100/32"),
},
Groups: Groups{
"group:team": {"user3", "user2", "user1"},
},
ACLs: []ACL{
{
Action: "accept",
Sources: []string{"group:team"},
Destinations: []string{
"internal:*",
},
},
{
Action: "accept",
Sources: []string{"group:team"},
Destinations: []string{
"8.0.0.0/8:*",
"16.0.0.0/8:*",
},
},
},
},
node: &types.Node{
IPv4: iap("100.64.0.100"),
IPv6: iap("fd7a:115c:a1e0::100"),
User: types.User{Name: "user100"},
Hostinfo: &tailcfg.Hostinfo{
RoutableIPs: []netip.Prefix{netip.MustParsePrefix("8.0.0.0/16"), netip.MustParsePrefix("16.0.0.0/16")},
},
},
peers: types.Nodes{
&types.Node{
IPv4: iap("100.64.0.2"),
IPv6: iap("fd7a:115c:a1e0::2"),
User: types.User{Name: "user2"},
},
&types.Node{
IPv4: iap("100.64.0.1"),
IPv6: iap("fd7a:115c:a1e0::1"),
User: types.User{Name: "user1"},
},
},
want: []tailcfg.FilterRule{
{
SrcIPs: []string{"100.64.0.1/32", "100.64.0.2/32", "fd7a:115c:a1e0::1/128", "fd7a:115c:a1e0::2/128"},
DstPorts: []tailcfg.NetPortRange{
{
IP: "100.64.0.100/32",
Ports: tailcfg.PortRangeAny,
},
{
IP: "fd7a:115c:a1e0::100/128",
Ports: tailcfg.PortRangeAny,
},
},
},
{
SrcIPs: []string{"100.64.0.1/32", "100.64.0.2/32", "fd7a:115c:a1e0::1/128", "fd7a:115c:a1e0::2/128"},
DstPorts: []tailcfg.NetPortRange{
{
IP: "8.0.0.0/8",
Ports: tailcfg.PortRangeAny,
},
{
IP: "16.0.0.0/8",
Ports: tailcfg.PortRangeAny,
},
},
},
},
},
{
name: "1786-reducing-breaks-exit-nodes-app-connector-like2",
pol: ACLPolicy{
Hosts: Hosts{
// Exit node
"internal": netip.MustParsePrefix("100.64.0.100/32"),
},
Groups: Groups{
"group:team": {"user3", "user2", "user1"},
},
ACLs: []ACL{
{
Action: "accept",
Sources: []string{"group:team"},
Destinations: []string{
"internal:*",
},
},
{
Action: "accept",
Sources: []string{"group:team"},
Destinations: []string{
"8.0.0.0/16:*",
"16.0.0.0/16:*",
},
},
},
},
node: &types.Node{
IPv4: iap("100.64.0.100"),
IPv6: iap("fd7a:115c:a1e0::100"),
User: types.User{Name: "user100"},
Hostinfo: &tailcfg.Hostinfo{
RoutableIPs: []netip.Prefix{netip.MustParsePrefix("8.0.0.0/8"), netip.MustParsePrefix("16.0.0.0/8")},
},
},
peers: types.Nodes{
&types.Node{
IPv4: iap("100.64.0.2"),
IPv6: iap("fd7a:115c:a1e0::2"),
User: types.User{Name: "user2"},
},
&types.Node{
IPv4: iap("100.64.0.1"),
IPv6: iap("fd7a:115c:a1e0::1"),
User: types.User{Name: "user1"},
},
},
want: []tailcfg.FilterRule{
{
SrcIPs: []string{"100.64.0.1/32", "100.64.0.2/32", "fd7a:115c:a1e0::1/128", "fd7a:115c:a1e0::2/128"},
DstPorts: []tailcfg.NetPortRange{
{
IP: "100.64.0.100/32",
Ports: tailcfg.PortRangeAny,
},
{
IP: "fd7a:115c:a1e0::100/128",
Ports: tailcfg.PortRangeAny,
},
},
},
{
SrcIPs: []string{"100.64.0.1/32", "100.64.0.2/32", "fd7a:115c:a1e0::1/128", "fd7a:115c:a1e0::2/128"},
DstPorts: []tailcfg.NetPortRange{
{
IP: "8.0.0.0/16",
Ports: tailcfg.PortRangeAny,
},
{
IP: "16.0.0.0/16",
Ports: tailcfg.PortRangeAny,
},
},
},
},
},
{
name: "1817-reduce-breaks-32-mask",
pol: ACLPolicy{
Hosts: Hosts{
"vlan1": netip.MustParsePrefix("172.16.0.0/24"),
"dns1": netip.MustParsePrefix("172.16.0.21/32"),
},
Groups: Groups{
"group:access": {"user1"},
},
ACLs: []ACL{
{
Action: "accept",
Sources: []string{"group:access"},
Destinations: []string{
"tag:access-servers:*",
"dns1:*",
},
},
},
},
node: &types.Node{
IPv4: iap("100.64.0.100"),
IPv6: iap("fd7a:115c:a1e0::100"),
User: types.User{Name: "user100"},
Hostinfo: &tailcfg.Hostinfo{
RoutableIPs: []netip.Prefix{netip.MustParsePrefix("172.16.0.0/24")},
},
ForcedTags: types.StringList{"tag:access-servers"},
},
peers: types.Nodes{
&types.Node{
IPv4: iap("100.64.0.1"),
IPv6: iap("fd7a:115c:a1e0::1"),
User: types.User{Name: "user1"},
},
},
want: []tailcfg.FilterRule{
{
SrcIPs: []string{"100.64.0.1/32", "fd7a:115c:a1e0::1/128"},
DstPorts: []tailcfg.NetPortRange{
{
IP: "100.64.0.100/32",
Ports: tailcfg.PortRangeAny,
},
{
IP: "fd7a:115c:a1e0::100/128",
Ports: tailcfg.PortRangeAny,
},
{
IP: "172.16.0.21/32",
Ports: tailcfg.PortRangeAny,
},
},
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rules, _ := tt.pol.CompileFilterRules(
got, _ := tt.pol.CompileFilterRules(
append(tt.peers, tt.node),
)
got := ReduceFilterRules(tt.node, rules)
got = ReduceFilterRules(tt.node, got)
if diff := cmp.Diff(tt.want, got); diff != "" {
log.Trace().Interface("got", got).Msg("result")

View File

@@ -64,12 +64,18 @@ func (h *Headscale) newMapSession(
w http.ResponseWriter,
node *types.Node,
) *mapSession {
warnf, tracef, infof, errf := logPollFunc(req, node)
warnf, infof, tracef, errf := logPollFunc(req, node)
// Use a buffered channel in case a node is not fully ready
// to receive a message to make sure we dont block the entire
// notifier.
updateChan := make(chan types.StateUpdate, h.cfg.Tuning.NodeMapSessionBufferedChanSize)
var updateChan chan types.StateUpdate
if req.Stream {
// Use a buffered channel in case a node is not fully ready
// to receive a message to make sure we dont block the entire
// notifier.
updateChan = make(chan types.StateUpdate, h.cfg.Tuning.NodeMapSessionBufferedChanSize)
updateChan <- types.StateUpdate{
Type: types.StateFullUpdate,
}
}
return &mapSession{
h: h,
@@ -196,8 +202,10 @@ func (m *mapSession) serve() {
// return
err := m.handleSaveNode()
if err != nil {
mapResponseWriteUpdatesInStream.WithLabelValues("error").Inc()
return
}
mapResponseWriteUpdatesInStream.WithLabelValues("ok").Inc()
}
// Set up the client stream
@@ -216,33 +224,26 @@ func (m *mapSession) serve() {
ctx, cancel := context.WithCancel(context.WithValue(m.ctx, nodeNameContextKey, m.node.Hostname))
defer cancel()
// TODO(kradalby): Make this available through a tuning envvar
wait := time.Second
// Add a circuit breaker, if the loop is not interrupted
// inbetween listening for the channels, some updates
// might get stale and stucked in the "changed" map
// defined below.
blockBreaker := time.NewTicker(wait)
// true means changed, false means removed
var changed map[types.NodeID]bool
var patches []*tailcfg.PeerChange
var derp bool
// Set full to true to immediatly send a full mapresponse
full := true
prev := time.Now()
lastMessage := ""
// Loop through updates and continuously send them to the
// client.
for {
// If a full update has been requested or there are patches, then send it immediately
// otherwise wait for the "batching" of changes or patches
if full || patches != nil || (changed != nil && time.Since(prev) > wait) {
// consume channels with update, keep alives or "batch" blocking signals
select {
case <-m.cancelCh:
m.tracef("poll cancelled received")
return
case <-ctx.Done():
m.tracef("poll context done")
return
// Consume all updates sent to node
case update := <-m.ch:
m.tracef("received stream update: %s %s", update.Type.String(), update.Message)
mapResponseUpdateReceived.WithLabelValues(update.Type.String()).Inc()
var data []byte
var err error
var lastMessage string
// Ensure the node object is updated, for example, there
// might have been a hostinfo update in a sidechannel
@@ -254,61 +255,46 @@ func (m *mapSession) serve() {
return
}
// If there are patches _and_ fully changed nodes, filter the
// patches and remove all patches that are present for the full
// changes updates. This allows us to send them as part of the
// PeerChange update, but only for nodes that are not fully changed.
// The fully changed nodes will be updated from the database and
// have all the updates needed.
// This means that the patches left are for nodes that has no
// updates that requires a full update.
// Patches are not suppose to be mixed in, but can be.
//
// From tailcfg docs:
// These are applied after Peers* above, but in practice the
// control server should only send these on their own, without
//
// Currently, there is no effort to merge patch updates, they
// are all sent, and the client will apply them in order.
// TODO(kradalby): Merge Patches for the same IDs to send less
// data and give the client less work.
if patches != nil && changed != nil {
var filteredPatches []*tailcfg.PeerChange
for _, patch := range patches {
if _, ok := changed[types.NodeID(patch.NodeID)]; !ok {
filteredPatches = append(filteredPatches, patch)
}
}
patches = filteredPatches
}
// When deciding what update to send, the following is considered,
// Full is a superset of all updates, when a full update is requested,
// send only that and move on, all other updates will be present in
// a full map response.
//
// If a map of changed nodes exists, prefer sending that as it will
// contain all the updates for the node, including patches, as it
// is fetched freshly from the database when building the response.
//
// If there is full changes registered, but we have patches for individual
// nodes, send them.
//
// Finally, if a DERP map is the only request, send that alone.
if full {
updateType := "full"
switch update.Type {
case types.StateFullUpdate:
m.tracef("Sending Full MapResponse")
data, err = m.mapper.FullMapResponse(m.req, m.node, m.h.ACLPolicy, fmt.Sprintf("from mapSession: %p, stream: %t", m, m.isStreaming()))
} else if changed != nil {
case types.StatePeerChanged:
changed := make(map[types.NodeID]bool, len(update.ChangeNodes))
for _, nodeID := range update.ChangeNodes {
changed[nodeID] = true
}
lastMessage = update.Message
m.tracef(fmt.Sprintf("Sending Changed MapResponse: %v", lastMessage))
data, err = m.mapper.PeerChangedResponse(m.req, m.node, changed, patches, m.h.ACLPolicy, lastMessage)
} else if patches != nil {
data, err = m.mapper.PeerChangedResponse(m.req, m.node, changed, update.ChangePatches, m.h.ACLPolicy, lastMessage)
updateType = "change"
case types.StatePeerChangedPatch:
m.tracef(fmt.Sprintf("Sending Changed Patch MapResponse: %v", lastMessage))
data, err = m.mapper.PeerChangedPatchResponse(m.req, m.node, patches, m.h.ACLPolicy)
} else if derp {
data, err = m.mapper.PeerChangedPatchResponse(m.req, m.node, update.ChangePatches, m.h.ACLPolicy)
updateType = "patch"
case types.StatePeerRemoved:
changed := make(map[types.NodeID]bool, len(update.Removed))
for _, nodeID := range update.Removed {
changed[nodeID] = false
}
m.tracef(fmt.Sprintf("Sending Changed MapResponse: %v", lastMessage))
data, err = m.mapper.PeerChangedResponse(m.req, m.node, changed, update.ChangePatches, m.h.ACLPolicy, lastMessage)
updateType = "remove"
case types.StateSelfUpdate:
lastMessage = update.Message
m.tracef(fmt.Sprintf("Sending Changed MapResponse: %v", lastMessage))
// create the map so an empty (self) update is sent
data, err = m.mapper.PeerChangedResponse(m.req, m.node, make(map[types.NodeID]bool), update.ChangePatches, m.h.ACLPolicy, lastMessage)
updateType = "remove"
case types.StateDERPUpdated:
m.tracef("Sending DERPUpdate MapResponse")
data, err = m.mapper.DERPMapResponse(m.req, m.node, m.h.DERPMap)
updateType = "derp"
}
if err != nil {
@@ -324,107 +310,52 @@ func (m *mapSession) serve() {
startWrite := time.Now()
_, err = m.w.Write(data)
if err != nil {
mapResponseSent.WithLabelValues("error", updateType).Inc()
m.errf(err, "Could not write the map response, for mapSession: %p", m)
return
}
err = rc.Flush()
if err != nil {
mapResponseSent.WithLabelValues("error", updateType).Inc()
m.errf(err, "flushing the map response to client, for mapSession: %p", m)
return
}
log.Trace().Str("node", m.node.Hostname).TimeDiff("timeSpent", time.Now(), startWrite).Str("mkey", m.node.MachineKey.String()).Msg("finished writing mapresp to node")
m.infof("update sent")
}
// reset
changed = nil
patches = nil
lastMessage = ""
full = false
derp = false
prev = time.Now()
}
// consume channels with update, keep alives or "batch" blocking signals
select {
case <-m.cancelCh:
m.tracef("poll cancelled received")
return
case <-ctx.Done():
m.tracef("poll context done")
return
// Avoid infinite block that would potentially leave
// some updates in the changed map.
case <-blockBreaker.C:
continue
// Consume all updates sent to node
case update := <-m.ch:
m.tracef("received stream update: %d %s", update.Type, update.Message)
switch update.Type {
case types.StateFullUpdate:
full = true
case types.StatePeerChanged:
if changed == nil {
changed = make(map[types.NodeID]bool)
}
for _, nodeID := range update.ChangeNodes {
changed[nodeID] = true
}
lastMessage = update.Message
case types.StatePeerChangedPatch:
patches = append(patches, update.ChangePatches...)
case types.StatePeerRemoved:
if changed == nil {
changed = make(map[types.NodeID]bool)
}
for _, nodeID := range update.Removed {
changed[nodeID] = false
}
case types.StateSelfUpdate:
// create the map so an empty (self) update is sent
if changed == nil {
changed = make(map[types.NodeID]bool)
}
lastMessage = update.Message
case types.StateDERPUpdated:
derp = true
mapResponseSent.WithLabelValues("ok", updateType).Inc()
m.tracef("update sent")
}
case <-m.keepAliveTicker.C:
data, err := m.mapper.KeepAliveResponse(m.req, m.node)
if err != nil {
m.errf(err, "Error generating the keep alive msg")
mapResponseSent.WithLabelValues("error", "keepalive").Inc()
return
}
_, err = m.w.Write(data)
if err != nil {
m.errf(err, "Cannot write keep alive message")
mapResponseSent.WithLabelValues("error", "keepalive").Inc()
return
}
err = rc.Flush()
if err != nil {
m.errf(err, "flushing keep alive to client, for mapSession: %p", m)
mapResponseSent.WithLabelValues("error", "keepalive").Inc()
return
}
mapResponseSent.WithLabelValues("ok", "keepalive").Inc()
}
}
}
func (m *mapSession) pollFailoverRoutes(where string, node *types.Node) {
update, err := db.Write(m.h.db.DB, func(tx *gorm.DB) (*types.StateUpdate, error) {
return db.FailoverNodeRoutesIfNeccessary(tx, m.h.nodeNotifier.ConnectedMap(), node)
return db.FailoverNodeRoutesIfNeccessary(tx, m.h.nodeNotifier.LikelyConnectedMap(), node)
})
if err != nil {
m.errf(err, fmt.Sprintf("failed to ensure failover routes, %s", where))
@@ -454,7 +385,7 @@ func (h *Headscale) updateNodeOnlineStatus(online bool, node *types.Node) {
node.LastSeen = &now
change.LastSeen = &now
err := h.db.DB.Transaction(func(tx *gorm.DB) error {
err := h.db.Write(func(tx *gorm.DB) error {
return db.SetLastSeen(tx, node.ID, *node.LastSeen)
})
if err != nil {
@@ -501,6 +432,7 @@ func (m *mapSession) handleEndpointUpdate() {
// If there is no changes and nothing to save,
// return early.
if peerChangeEmpty(change) && !sendUpdate {
mapResponseEndpointUpdates.WithLabelValues("noop").Inc()
return
}
@@ -518,6 +450,7 @@ func (m *mapSession) handleEndpointUpdate() {
if err != nil {
m.errf(err, "Error processing node routes")
http.Error(m.w, "", http.StatusInternalServerError)
mapResponseEndpointUpdates.WithLabelValues("error").Inc()
return
}
@@ -527,6 +460,7 @@ func (m *mapSession) handleEndpointUpdate() {
err := m.h.db.EnableAutoApprovedRoutes(m.h.ACLPolicy, m.node)
if err != nil {
m.errf(err, "Error running auto approved routes")
mapResponseEndpointUpdates.WithLabelValues("error").Inc()
}
}
@@ -534,19 +468,19 @@ func (m *mapSession) handleEndpointUpdate() {
// has an updated packetfilter allowing the new route
// if it is defined in the ACL.
ctx := types.NotifyCtx(context.Background(), "poll-nodeupdate-self-hostinfochange", m.node.Hostname)
m.h.nodeNotifier.NotifyByMachineKey(
m.h.nodeNotifier.NotifyByNodeID(
ctx,
types.StateUpdate{
Type: types.StateSelfUpdate,
ChangeNodes: []types.NodeID{m.node.ID},
},
m.node.ID)
}
if err := m.h.db.DB.Save(m.node).Error; err != nil {
m.errf(err, "Failed to persist/update node in the database")
http.Error(m.w, "", http.StatusInternalServerError)
mapResponseEndpointUpdates.WithLabelValues("error").Inc()
return
}
@@ -562,6 +496,7 @@ func (m *mapSession) handleEndpointUpdate() {
m.node.ID)
m.w.WriteHeader(http.StatusOK)
mapResponseEndpointUpdates.WithLabelValues("ok").Inc()
return
}
@@ -639,7 +574,7 @@ func (m *mapSession) handleReadOnlyRequest() {
if err != nil {
m.errf(err, "Failed to create MapResponse")
http.Error(m.w, "", http.StatusInternalServerError)
mapResponseReadOnly.WithLabelValues("error").Inc()
return
}
@@ -648,9 +583,12 @@ func (m *mapSession) handleReadOnlyRequest() {
_, err = m.w.Write(mapResp)
if err != nil {
m.errf(err, "Failed to write response")
mapResponseReadOnly.WithLabelValues("error").Inc()
return
}
m.w.WriteHeader(http.StatusOK)
mapResponseReadOnly.WithLabelValues("ok").Inc()
return
}

View File

@@ -10,6 +10,7 @@ import (
"time"
"tailscale.com/tailcfg"
"tailscale.com/util/ctxkey"
)
const (
@@ -183,10 +184,14 @@ func StateUpdateExpire(nodeID NodeID, expiry time.Time) StateUpdate {
}
}
var (
NotifyOriginKey = ctxkey.New("notify.origin", "")
NotifyHostnameKey = ctxkey.New("notify.hostname", "")
)
func NotifyCtx(ctx context.Context, origin, hostname string) context.Context {
ctx2, _ := context.WithTimeout(
context.WithValue(context.WithValue(ctx, "hostname", hostname), "origin", origin),
3*time.Second,
)
ctx2, _ := context.WithTimeout(ctx, 3*time.Second)
ctx2 = NotifyOriginKey.WithValue(ctx2, origin)
ctx2 = NotifyHostnameKey.WithValue(ctx2, hostname)
return ctx2
}

View File

@@ -28,7 +28,8 @@ var (
)
type NodeID uint64
type NodeConnectedMap map[NodeID]bool
// type NodeConnectedMap *xsync.MapOf[NodeID, bool]
func (id NodeID) StableID() tailcfg.StableNodeID {
return tailcfg.StableNodeID(strconv.FormatUint(uint64(id), util.Base10))

View File

@@ -51,7 +51,7 @@ func aclScenario(
clientsPerUser int,
) *Scenario {
t.Helper()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
spec := map[string]int{
@@ -264,7 +264,7 @@ func TestACLHostsInNetMapTable(t *testing.T) {
for name, testCase := range tests {
t.Run(name, func(t *testing.T) {
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
spec := testCase.users

View File

@@ -42,7 +42,7 @@ func TestOIDCAuthenticationPingAll(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
baseScenario, err := NewScenario()
baseScenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
scenario := AuthOIDCScenario{
@@ -100,7 +100,7 @@ func TestOIDCExpireNodesBasedOnTokenExpiry(t *testing.T) {
shortAccessTTL := 5 * time.Minute
baseScenario, err := NewScenario()
baseScenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
baseScenario.pool.MaxWait = 5 * time.Minute

View File

@@ -26,7 +26,7 @@ func TestAuthWebFlowAuthenticationPingAll(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
baseScenario, err := NewScenario()
baseScenario, err := NewScenario(dockertestMaxWait())
if err != nil {
t.Fatalf("failed to create scenario: %s", err)
}
@@ -67,7 +67,7 @@ func TestAuthWebFlowLogoutAndRelogin(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
baseScenario, err := NewScenario()
baseScenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
scenario := AuthWebFlowScenario{

View File

@@ -32,7 +32,7 @@ func TestUserCommand(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -112,7 +112,7 @@ func TestPreAuthKeyCommand(t *testing.T) {
user := "preauthkeyspace"
count := 3
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -254,7 +254,7 @@ func TestPreAuthKeyCommandWithoutExpiry(t *testing.T) {
user := "pre-auth-key-without-exp-user"
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -317,7 +317,7 @@ func TestPreAuthKeyCommandReusableEphemeral(t *testing.T) {
user := "pre-auth-key-reus-ephm-user"
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -394,7 +394,7 @@ func TestApiKeyCommand(t *testing.T) {
count := 5
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -562,7 +562,7 @@ func TestNodeTagCommand(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -695,7 +695,7 @@ func TestNodeAdvertiseTagNoACLCommand(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -745,7 +745,7 @@ func TestNodeAdvertiseTagWithACLCommand(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -808,7 +808,7 @@ func TestNodeCommand(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -1049,7 +1049,7 @@ func TestNodeExpireCommand(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -1176,7 +1176,7 @@ func TestNodeRenameCommand(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -1343,7 +1343,7 @@ func TestNodeMoveCommand(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()

View File

@@ -23,7 +23,7 @@ func TestDERPServerScenario(t *testing.T) {
IntegrationSkip(t)
// t.Parallel()
baseScenario, err := NewScenario()
baseScenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
scenario := EmbeddedDERPServerScenario{

View File

@@ -23,7 +23,7 @@ func TestPingAllByIP(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -67,7 +67,7 @@ func TestPingAllByIPPublicDERP(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -105,7 +105,7 @@ func TestAuthKeyLogoutAndRelogin(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -216,7 +216,7 @@ func TestEphemeral(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -299,7 +299,7 @@ func TestPingAllByHostname(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -348,7 +348,7 @@ func TestTaildrop(t *testing.T) {
return err
}
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -509,7 +509,7 @@ func TestResolveMagicDNS(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -577,7 +577,7 @@ func TestExpireNode(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -703,7 +703,7 @@ func TestNodeOnlineStatus(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -818,7 +818,7 @@ func TestPingAllByIPManyUpDown(t *testing.T) {
IntegrationSkip(t)
t.Parallel()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()

View File

@@ -11,6 +11,7 @@ import (
"encoding/pem"
"errors"
"fmt"
"io"
"log"
"math/big"
"net"
@@ -18,6 +19,7 @@ import (
"net/url"
"os"
"path"
"strconv"
"strings"
"time"
@@ -201,6 +203,14 @@ func WithEmbeddedDERPServerOnly() Option {
}
}
// WithTuning allows changing the tuning settings easily.
func WithTuning(batchTimeout time.Duration, mapSessionChanSize int) Option {
return func(hsic *HeadscaleInContainer) {
hsic.env["HEADSCALE_TUNING_BATCH_CHANGE_DELAY"] = batchTimeout.String()
hsic.env["HEADSCALE_TUNING_NODE_MAPSESSION_BUFFERED_CHAN_SIZE"] = strconv.Itoa(mapSessionChanSize)
}
}
// New returns a new HeadscaleInContainer instance.
func New(
pool *dockertest.Pool,
@@ -387,6 +397,14 @@ func (t *HeadscaleInContainer) Shutdown() error {
)
}
err = t.SaveMetrics("/tmp/control/metrics.txt")
if err != nil {
log.Printf(
"Failed to metrics from control: %s",
err,
)
}
// Send a interrupt signal to the "headscale" process inside the container
// allowing it to shut down gracefully and flush the profile to disk.
// The container will live for a bit longer due to the sleep at the end.
@@ -439,6 +457,25 @@ func (t *HeadscaleInContainer) SaveLog(path string) error {
return dockertestutil.SaveLog(t.pool, t.container, path)
}
func (t *HeadscaleInContainer) SaveMetrics(savePath string) error {
resp, err := http.Get(fmt.Sprintf("http://%s:9090/metrics", t.hostname))
if err != nil {
return fmt.Errorf("getting metrics: %w", err)
}
defer resp.Body.Close()
out, err := os.Create(savePath)
if err != nil {
return fmt.Errorf("creating file for metrics: %w", err)
}
defer out.Close()
_, err = io.Copy(out, resp.Body)
if err != nil {
return fmt.Errorf("copy response to file: %w", err)
}
return nil
}
func (t *HeadscaleInContainer) SaveProfile(savePath string) error {
tarFile, err := t.FetchPath("/tmp/profile")
if err != nil {

View File

@@ -28,7 +28,7 @@ func TestEnablingRoutes(t *testing.T) {
user := "enable-routing"
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErrf(t, "failed to create scenario: %s", err)
defer scenario.Shutdown()
@@ -250,9 +250,9 @@ func TestHASubnetRouterFailover(t *testing.T) {
user := "enable-routing"
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErrf(t, "failed to create scenario: %s", err)
// defer scenario.Shutdown()
defer scenario.Shutdown()
spec := map[string]int{
user: 3,
@@ -822,7 +822,7 @@ func TestEnableDisableAutoApprovedRoute(t *testing.T) {
user := "enable-disable-routing"
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErrf(t, "failed to create scenario: %s", err)
defer scenario.Shutdown()
@@ -966,7 +966,7 @@ func TestSubnetRouteACL(t *testing.T) {
user := "subnet-route-acl"
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErrf(t, "failed to create scenario: %s", err)
defer scenario.Shutdown()

View File

@@ -8,6 +8,7 @@ import (
"os"
"sort"
"sync"
"time"
v1 "github.com/juanfont/headscale/gen/go/headscale/v1"
"github.com/juanfont/headscale/hscontrol/util"
@@ -141,7 +142,7 @@ type Scenario struct {
// NewScenario creates a test Scenario which can be used to bootstraps a ControlServer with
// a set of Users and TailscaleClients.
func NewScenario() (*Scenario, error) {
func NewScenario(maxWait time.Duration) (*Scenario, error) {
hash, err := util.GenerateRandomStringDNSSafe(scenarioHashLength)
if err != nil {
return nil, err
@@ -152,7 +153,7 @@ func NewScenario() (*Scenario, error) {
return nil, fmt.Errorf("could not connect to docker: %w", err)
}
pool.MaxWait = dockertestMaxWait()
pool.MaxWait = maxWait
networkName := fmt.Sprintf("hs-%s", hash)
if overrideNetworkName := os.Getenv("HEADSCALE_TEST_NETWORK_NAME"); overrideNetworkName != "" {
@@ -510,7 +511,7 @@ func (s *Scenario) GetIPs(user string) ([]netip.Addr, error) {
return ips, fmt.Errorf("failed to get ips: %w", errNoUserAvailable)
}
// GetIPs returns all TailscaleClients associated with a User in a Scenario.
// GetClients returns all TailscaleClients associated with a User in a Scenario.
func (s *Scenario) GetClients(user string) ([]TailscaleClient, error) {
var clients []TailscaleClient
if ns, ok := s.users[user]; ok {
@@ -586,7 +587,7 @@ func (s *Scenario) ListTailscaleClientsIPs(users ...string) ([]netip.Addr, error
return allIps, nil
}
// ListTailscaleClientsIPs returns a list of FQDN based on Users
// ListTailscaleClientsFQDNs returns a list of FQDN based on Users
// passed as parameters.
func (s *Scenario) ListTailscaleClientsFQDNs(users ...string) ([]string, error) {
allFQDNs := make([]string, 0)

View File

@@ -33,7 +33,7 @@ func TestHeadscale(t *testing.T) {
user := "test-space"
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -78,7 +78,7 @@ func TestCreateTailscale(t *testing.T) {
user := "only-create-containers"
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()
@@ -114,7 +114,7 @@ func TestTailscaleNodesJoiningHeadcale(t *testing.T) {
count := 1
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
defer scenario.Shutdown()

View File

@@ -44,7 +44,7 @@ var retry = func(times int, sleepInterval time.Duration,
func sshScenario(t *testing.T, policy *policy.ACLPolicy, clientsPerUser int) *Scenario {
t.Helper()
scenario, err := NewScenario()
scenario, err := NewScenario(dockertestMaxWait())
assertNoErr(t, err)
spec := map[string]int{