2023-05-10 09:24:05 +02:00
|
|
|
package hscontrol
|
2022-08-13 20:52:11 +02:00
|
|
|
|
|
|
|
import (
|
2023-05-02 08:15:33 +02:00
|
|
|
"encoding/binary"
|
|
|
|
"encoding/json"
|
|
|
|
"io"
|
2022-08-13 20:52:11 +02:00
|
|
|
"net/http"
|
|
|
|
|
2022-12-09 16:56:43 +00:00
|
|
|
"github.com/gorilla/mux"
|
2023-06-06 10:41:30 +02:00
|
|
|
"github.com/juanfont/headscale/hscontrol/types"
|
2022-08-13 20:52:11 +02:00
|
|
|
"github.com/rs/zerolog/log"
|
|
|
|
"golang.org/x/net/http2"
|
|
|
|
"golang.org/x/net/http2/h2c"
|
2022-12-09 16:56:43 +00:00
|
|
|
"tailscale.com/control/controlbase"
|
2022-08-20 00:06:26 +02:00
|
|
|
"tailscale.com/control/controlhttp"
|
2023-05-02 08:15:33 +02:00
|
|
|
"tailscale.com/tailcfg"
|
|
|
|
"tailscale.com/types/key"
|
2022-08-13 20:52:11 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
// ts2021UpgradePath is the path that the server listens on for the WebSockets upgrade.
|
|
|
|
ts2021UpgradePath = "/ts2021"
|
2023-05-02 08:15:33 +02:00
|
|
|
|
|
|
|
// The first 9 bytes from the server to client over Noise are either an HTTP/2
|
|
|
|
// settings frame (a normal HTTP/2 setup) or, as Tailscale added later, an "early payload"
|
|
|
|
// header that's also 9 bytes long: 5 bytes (earlyPayloadMagic) followed by 4 bytes
|
|
|
|
// of length. Then that many bytes of JSON-encoded tailcfg.EarlyNoise.
|
|
|
|
// The early payload is optional. Some servers may not send it... But we do!
|
|
|
|
earlyPayloadMagic = "\xff\xff\xffTS"
|
|
|
|
|
|
|
|
// EarlyNoise was added in protocol version 49.
|
|
|
|
earlyNoiseCapabilityVersion = 49
|
2022-08-13 20:52:11 +02:00
|
|
|
)
|
|
|
|
|
2023-05-02 08:15:33 +02:00
|
|
|
type noiseServer struct {
|
2022-12-09 16:56:43 +00:00
|
|
|
headscale *Headscale
|
|
|
|
|
2023-05-02 08:15:33 +02:00
|
|
|
httpBaseConfig *http.Server
|
|
|
|
http2Server *http2.Server
|
|
|
|
conn *controlbase.Conn
|
|
|
|
machineKey key.MachinePublic
|
|
|
|
nodeKey key.NodePublic
|
|
|
|
|
|
|
|
// EarlyNoise-related stuff
|
|
|
|
challenge key.ChallengePrivate
|
|
|
|
protocolVersion int
|
2022-12-09 16:56:43 +00:00
|
|
|
}
|
|
|
|
|
2022-08-13 20:52:11 +02:00
|
|
|
// NoiseUpgradeHandler is to upgrade the connection and hijack the net.Conn
|
|
|
|
// in order to use the Noise-based TS2021 protocol. Listens in /ts2021.
|
|
|
|
func (h *Headscale) NoiseUpgradeHandler(
|
|
|
|
writer http.ResponseWriter,
|
|
|
|
req *http.Request,
|
|
|
|
) {
|
|
|
|
log.Trace().Caller().Msgf("Noise upgrade handler for client %s", req.RemoteAddr)
|
|
|
|
|
2022-09-04 16:05:21 +02:00
|
|
|
upgrade := req.Header.Get("Upgrade")
|
|
|
|
if upgrade == "" {
|
|
|
|
// This probably means that the user is running Headscale behind an
|
|
|
|
// improperly configured reverse proxy. TS2021 requires WebSockets to
|
|
|
|
// be passed to Headscale. Let's give them a hint.
|
|
|
|
log.Warn().
|
|
|
|
Caller().
|
2022-09-04 16:13:30 +02:00
|
|
|
Msg("No Upgrade header in TS2021 request. If headscale is behind a reverse proxy, make sure it is configured to pass WebSockets through.")
|
2022-09-04 16:05:21 +02:00
|
|
|
http.Error(writer, "Internal error", http.StatusInternalServerError)
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-05-02 08:15:33 +02:00
|
|
|
noiseServer := noiseServer{
|
|
|
|
headscale: h,
|
|
|
|
challenge: key.NewChallenge(),
|
|
|
|
}
|
|
|
|
|
|
|
|
noiseConn, err := controlhttp.AcceptHTTP(
|
|
|
|
req.Context(),
|
|
|
|
writer,
|
|
|
|
req,
|
|
|
|
*h.noisePrivateKey,
|
|
|
|
noiseServer.earlyNoise,
|
|
|
|
)
|
2022-08-13 20:52:11 +02:00
|
|
|
if err != nil {
|
|
|
|
log.Error().Err(err).Msg("noise upgrade failed")
|
|
|
|
http.Error(writer, err.Error(), http.StatusInternalServerError)
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-05-02 08:15:33 +02:00
|
|
|
noiseServer.conn = noiseConn
|
|
|
|
noiseServer.machineKey = noiseServer.conn.Peer()
|
|
|
|
noiseServer.protocolVersion = noiseServer.conn.ProtocolVersion()
|
2022-12-09 16:56:43 +00:00
|
|
|
|
|
|
|
// This router is served only over the Noise connection, and exposes only the new API.
|
|
|
|
//
|
|
|
|
// The HTTP2 server that exposes this router is created for
|
|
|
|
// a single hijacked connection from /ts2021, using netutil.NewOneConnListener
|
|
|
|
router := mux.NewRouter()
|
2024-04-21 18:28:17 +02:00
|
|
|
router.Use(prometheusMiddleware)
|
2022-12-09 16:56:43 +00:00
|
|
|
|
2023-05-02 08:15:33 +02:00
|
|
|
router.HandleFunc("/machine/register", noiseServer.NoiseRegistrationHandler).
|
2022-12-09 16:56:43 +00:00
|
|
|
Methods(http.MethodPost)
|
2023-05-02 08:15:33 +02:00
|
|
|
router.HandleFunc("/machine/map", noiseServer.NoisePollNetMapHandler)
|
2022-12-09 16:56:43 +00:00
|
|
|
|
2022-09-04 11:35:39 +02:00
|
|
|
server := http.Server{
|
2024-04-10 15:35:09 +02:00
|
|
|
ReadTimeout: types.HTTPTimeout,
|
2022-09-04 11:35:39 +02:00
|
|
|
}
|
2023-05-02 08:15:33 +02:00
|
|
|
|
|
|
|
noiseServer.httpBaseConfig = &http.Server{
|
|
|
|
Handler: router,
|
2024-04-10 15:35:09 +02:00
|
|
|
ReadHeaderTimeout: types.HTTPTimeout,
|
2023-05-02 08:15:33 +02:00
|
|
|
}
|
|
|
|
noiseServer.http2Server = &http2.Server{}
|
|
|
|
|
|
|
|
server.Handler = h2c.NewHandler(router, noiseServer.http2Server)
|
|
|
|
|
|
|
|
noiseServer.http2Server.ServeConn(
|
|
|
|
noiseConn,
|
|
|
|
&http2.ServeConnOpts{
|
|
|
|
BaseConfig: noiseServer.httpBaseConfig,
|
|
|
|
},
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ns *noiseServer) earlyNoise(protocolVersion int, writer io.Writer) error {
|
|
|
|
log.Trace().
|
|
|
|
Caller().
|
|
|
|
Int("protocol_version", protocolVersion).
|
|
|
|
Str("challenge", ns.challenge.Public().String()).
|
|
|
|
Msg("earlyNoise called")
|
|
|
|
|
|
|
|
if protocolVersion < earlyNoiseCapabilityVersion {
|
|
|
|
log.Trace().
|
|
|
|
Caller().
|
|
|
|
Msgf("protocol version %d does not support early noise", protocolVersion)
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
earlyJSON, err := json.Marshal(&tailcfg.EarlyNoise{
|
|
|
|
NodeKeyChallenge: ns.challenge.Public(),
|
|
|
|
})
|
2022-08-14 12:44:07 +02:00
|
|
|
if err != nil {
|
2023-05-02 08:15:33 +02:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// 5 bytes that won't be mistaken for an HTTP/2 frame:
|
|
|
|
// https://httpwg.org/specs/rfc7540.html#rfc.section.4.1 (Especially not
|
|
|
|
// an HTTP/2 settings frame, which isn't of type 'T')
|
|
|
|
var notH2Frame [5]byte
|
|
|
|
copy(notH2Frame[:], earlyPayloadMagic)
|
|
|
|
var lenBuf [4]byte
|
|
|
|
binary.BigEndian.PutUint32(lenBuf[:], uint32(len(earlyJSON)))
|
|
|
|
// These writes are all buffered by caller, so fine to do them
|
|
|
|
// separately:
|
|
|
|
if _, err := writer.Write(notH2Frame[:]); err != nil {
|
|
|
|
return err
|
2022-08-14 12:44:07 +02:00
|
|
|
}
|
2023-05-02 08:15:33 +02:00
|
|
|
if _, err := writer.Write(lenBuf[:]); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if _, err := writer.Write(earlyJSON); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2022-08-13 20:52:11 +02:00
|
|
|
}
|
2024-02-23 10:59:24 +01:00
|
|
|
|
|
|
|
const (
|
|
|
|
MinimumCapVersion tailcfg.CapabilityVersion = 58
|
|
|
|
)
|
|
|
|
|
|
|
|
// NoisePollNetMapHandler takes care of /machine/:id/map using the Noise protocol
|
|
|
|
//
|
|
|
|
// This is the busiest endpoint, as it keeps the HTTP long poll that updates
|
|
|
|
// the clients when something in the network changes.
|
|
|
|
//
|
|
|
|
// The clients POST stuff like HostInfo and their Endpoints here, but
|
|
|
|
// only after their first request (marked with the ReadOnly field).
|
|
|
|
//
|
|
|
|
// At this moment the updates are sent in a quite horrendous way, but they kinda work.
|
|
|
|
func (ns *noiseServer) NoisePollNetMapHandler(
|
|
|
|
writer http.ResponseWriter,
|
|
|
|
req *http.Request,
|
|
|
|
) {
|
|
|
|
log.Trace().
|
|
|
|
Str("handler", "NoisePollNetMap").
|
|
|
|
Msg("PollNetMapHandler called")
|
|
|
|
|
|
|
|
log.Trace().
|
|
|
|
Any("headers", req.Header).
|
|
|
|
Caller().
|
|
|
|
Msg("Headers")
|
|
|
|
|
|
|
|
body, _ := io.ReadAll(req.Body)
|
|
|
|
|
|
|
|
mapRequest := tailcfg.MapRequest{}
|
|
|
|
if err := json.Unmarshal(body, &mapRequest); err != nil {
|
|
|
|
log.Error().
|
|
|
|
Caller().
|
|
|
|
Err(err).
|
|
|
|
Msg("Cannot parse MapRequest")
|
|
|
|
http.Error(writer, "Internal error", http.StatusInternalServerError)
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reject unsupported versions
|
|
|
|
if mapRequest.Version < MinimumCapVersion {
|
|
|
|
log.Info().
|
|
|
|
Caller().
|
|
|
|
Int("min_version", int(MinimumCapVersion)).
|
|
|
|
Int("client_version", int(mapRequest.Version)).
|
|
|
|
Msg("unsupported client connected")
|
|
|
|
http.Error(writer, "Internal error", http.StatusBadRequest)
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
ns.nodeKey = mapRequest.NodeKey
|
|
|
|
|
|
|
|
node, err := ns.headscale.db.GetNodeByAnyKey(
|
|
|
|
ns.conn.Peer(),
|
|
|
|
mapRequest.NodeKey,
|
|
|
|
key.NodePublic{},
|
|
|
|
)
|
|
|
|
if err != nil {
|
|
|
|
log.Error().
|
|
|
|
Str("handler", "NoisePollNetMap").
|
|
|
|
Uint64("node.id", node.ID.Uint64()).
|
|
|
|
Msgf("Failed to fetch node from the database with node key: %s", mapRequest.NodeKey.String())
|
|
|
|
http.Error(writer, "Internal error", http.StatusInternalServerError)
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
2024-04-10 15:35:09 +02:00
|
|
|
sess := ns.headscale.newMapSession(req.Context(), mapRequest, writer, node)
|
2024-02-23 10:59:24 +01:00
|
|
|
|
2024-04-10 15:35:09 +02:00
|
|
|
sess.tracef("a node sending a MapRequest with Noise protocol")
|
2024-02-23 10:59:24 +01:00
|
|
|
|
|
|
|
// If a streaming mapSession exists for this node, close it
|
|
|
|
// and start a new one.
|
2024-04-10 15:35:09 +02:00
|
|
|
if sess.isStreaming() {
|
|
|
|
sess.tracef("aquiring lock to check stream")
|
|
|
|
|
2024-02-23 10:59:24 +01:00
|
|
|
ns.headscale.mapSessionMu.Lock()
|
2024-04-10 15:35:09 +02:00
|
|
|
if _, ok := ns.headscale.mapSessions[node.ID]; ok {
|
|
|
|
// NOTE/TODO(kradalby): From how I understand the protocol, when
|
|
|
|
// a client connects with stream=true, and already has a streaming
|
|
|
|
// connection open, the correct way is to close the current channel
|
|
|
|
// and replace it. However, I cannot manage to get that working with
|
|
|
|
// some sort of lock/block happening on the cancelCh in the streaming
|
|
|
|
// session.
|
|
|
|
// Not closing the channel and replacing it puts us in a weird state
|
|
|
|
// which keeps a ghost stream open, receiving keep alives, but no updates.
|
|
|
|
//
|
|
|
|
// Typically a new connection is opened when one exists as a client which
|
|
|
|
// is already authenticated reconnects (e.g. down, then up). The client will
|
|
|
|
// start auth and streaming at the same time, and then cancel the streaming
|
|
|
|
// when the auth has finished successfully, opening a new connection.
|
|
|
|
//
|
|
|
|
// As a work-around to not replacing, abusing the clients "resilience"
|
|
|
|
// by reject the new connection which will cause the client to immediately
|
|
|
|
// reconnect and "fix" the issue, as the other connection typically has been
|
|
|
|
// closed, meaning there is nothing to replace.
|
|
|
|
//
|
|
|
|
// sess.infof("node has an open stream(%p), replacing with %p", oldSession, sess)
|
|
|
|
// oldSession.close()
|
|
|
|
|
|
|
|
defer ns.headscale.mapSessionMu.Unlock()
|
|
|
|
|
|
|
|
sess.infof("node has an open stream(%p), rejecting new stream", sess)
|
2024-04-21 18:28:17 +02:00
|
|
|
mapResponseRejected.WithLabelValues("exists").Inc()
|
2024-04-10 15:35:09 +02:00
|
|
|
return
|
2024-02-23 10:59:24 +01:00
|
|
|
}
|
|
|
|
|
2024-04-10 15:35:09 +02:00
|
|
|
ns.headscale.mapSessions[node.ID] = sess
|
2024-04-21 18:28:17 +02:00
|
|
|
mapResponseSessions.Inc()
|
2024-02-23 10:59:24 +01:00
|
|
|
ns.headscale.mapSessionMu.Unlock()
|
2024-04-10 15:35:09 +02:00
|
|
|
sess.tracef("releasing lock to check stream")
|
2024-02-23 10:59:24 +01:00
|
|
|
}
|
|
|
|
|
2024-04-10 15:35:09 +02:00
|
|
|
sess.serve()
|
2024-02-23 10:59:24 +01:00
|
|
|
|
2024-04-10 15:35:09 +02:00
|
|
|
if sess.isStreaming() {
|
|
|
|
sess.tracef("aquiring lock to remove stream")
|
2024-02-23 10:59:24 +01:00
|
|
|
ns.headscale.mapSessionMu.Lock()
|
2024-04-10 15:35:09 +02:00
|
|
|
defer ns.headscale.mapSessionMu.Unlock()
|
2024-02-23 10:59:24 +01:00
|
|
|
|
|
|
|
delete(ns.headscale.mapSessions, node.ID)
|
2024-04-21 18:28:17 +02:00
|
|
|
mapResponseSessions.Dec()
|
2024-02-23 10:59:24 +01:00
|
|
|
|
2024-04-10 15:35:09 +02:00
|
|
|
sess.tracef("releasing lock to remove stream")
|
2024-02-23 10:59:24 +01:00
|
|
|
}
|
|
|
|
}
|