2023-01-27 21:37:20 +00:00
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
2022-01-17 23:32:20 +00:00
2022-06-02 23:20:42 +00:00
//go:build !js
2022-01-17 23:32:20 +00:00
// Package controlhttp implements the Tailscale 2021 control protocol
// base transport over HTTP.
//
// This tunnels the protocol in control/controlbase over HTTP with a
// variety of compatibility fallbacks for handling picky or deep
// inspecting proxies.
//
// In the happy path, a client makes a single cleartext HTTP request
// to the server, the server responds with 101 Switching Protocols,
// and the control base protocol takes place over plain TCP.
//
// In the compatibility path, the client does the above over HTTPS,
// resulting in double encryption (once for the control transport, and
// once for the outer TLS layer).
package controlhttp
import (
"context"
"crypto/tls"
"encoding/base64"
"errors"
"fmt"
"io"
2022-09-23 17:06:55 +00:00
"math"
2022-01-17 23:32:20 +00:00
"net"
"net/http"
"net/http/httptrace"
2022-09-23 17:06:55 +00:00
"net/netip"
2022-01-17 23:32:20 +00:00
"net/url"
2024-06-19 20:22:14 +00:00
"runtime"
2022-09-23 17:06:55 +00:00
"sort"
"sync/atomic"
2022-04-27 15:19:45 +00:00
"time"
2022-01-17 23:32:20 +00:00
"tailscale.com/control/controlbase"
2024-11-07 23:59:19 +00:00
"tailscale.com/control/controlhttp/controlhttpcommon"
2022-09-23 17:06:55 +00:00
"tailscale.com/envknob"
2024-06-19 20:22:14 +00:00
"tailscale.com/health"
2022-01-17 23:32:20 +00:00
"tailscale.com/net/dnscache"
"tailscale.com/net/dnsfallback"
2022-03-24 16:44:40 +00:00
"tailscale.com/net/netutil"
2023-02-03 20:07:58 +00:00
"tailscale.com/net/sockstats"
2022-01-17 23:32:20 +00:00
"tailscale.com/net/tlsdial"
"tailscale.com/net/tshttpproxy"
2024-08-03 12:37:01 +00:00
"tailscale.com/syncs"
2022-09-23 17:06:55 +00:00
"tailscale.com/tailcfg"
2023-08-04 23:29:44 +00:00
"tailscale.com/tstime"
2022-09-23 17:06:55 +00:00
"tailscale.com/util/multierr"
2022-01-17 23:32:20 +00:00
)
2022-09-16 19:06:25 +00:00
var stdDialer net . Dialer
// Dial connects to the HTTP server at this Dialer's Host:HTTPPort, requests to
// switch to the Tailscale control protocol, and returns an established control
2022-01-17 23:32:20 +00:00
// protocol connection.
//
2022-09-16 19:06:25 +00:00
// If Dial fails to connect using HTTP, it also tries to tunnel over TLS to the
// Dialer's Host:HTTPSPort as a compatibility fallback.
2022-02-26 17:19:18 +00:00
//
// The provided ctx is only used for the initial connection, until
// Dial returns. It does not affect the connection once established.
2022-10-17 21:50:52 +00:00
func ( a * Dialer ) Dial ( ctx context . Context ) ( * ClientConn , error ) {
2022-09-16 19:06:25 +00:00
if a . Hostname == "" {
return nil , errors . New ( "required Dialer.Hostname empty" )
2022-01-17 23:32:20 +00:00
}
2022-04-28 15:10:26 +00:00
return a . dial ( ctx )
2022-01-17 23:32:20 +00:00
}
2022-09-16 19:06:25 +00:00
func ( a * Dialer ) logf ( format string , args ... any ) {
if a . Logf != nil {
a . Logf ( format , args ... )
}
}
func ( a * Dialer ) getProxyFunc ( ) func ( * http . Request ) ( * url . URL , error ) {
if a . proxyFunc != nil {
return a . proxyFunc
}
return tshttpproxy . ProxyFromEnvironment
2022-01-17 23:32:20 +00:00
}
2022-09-16 19:06:25 +00:00
// httpsFallbackDelay is how long we'll wait for a.HTTPPort to work before
// starting to try a.HTTPSPort.
func ( a * Dialer ) httpsFallbackDelay ( ) time . Duration {
2022-04-28 15:10:26 +00:00
if v := a . testFallbackDelay ; v != 0 {
return v
}
return 500 * time . Millisecond
}
2022-09-23 17:06:55 +00:00
var _ = envknob . RegisterBool ( "TS_USE_CONTROL_DIAL_PLAN" ) // to record at init time whether it's in use
2022-10-17 21:50:52 +00:00
func ( a * Dialer ) dial ( ctx context . Context ) ( * ClientConn , error ) {
2022-09-23 17:06:55 +00:00
// If we don't have a dial plan, just fall back to dialing the single
// host we know about.
useDialPlan := envknob . BoolDefaultTrue ( "TS_USE_CONTROL_DIAL_PLAN" )
if ! useDialPlan || a . DialPlan == nil || len ( a . DialPlan . Candidates ) == 0 {
return a . dialHost ( ctx , netip . Addr { } )
}
candidates := a . DialPlan . Candidates
// Otherwise, we try dialing per the plan. Store the highest priority
// in the list, so that if we get a connection to one of those
// candidates we can return quickly.
var highestPriority int = math . MinInt
for _ , c := range candidates {
if c . Priority > highestPriority {
highestPriority = c . Priority
}
}
// This context allows us to cancel in-flight connections if we get a
// highest-priority connection before we're all done.
ctx , cancel := context . WithCancel ( ctx )
defer cancel ( )
// Now, for each candidate, kick off a dial in parallel.
type dialResult struct {
2022-10-17 21:50:52 +00:00
conn * ClientConn
2022-09-23 17:06:55 +00:00
err error
addr netip . Addr
priority int
}
resultsCh := make ( chan dialResult , len ( candidates ) )
var pending atomic . Int32
pending . Store ( int32 ( len ( candidates ) ) )
for _ , c := range candidates {
go func ( ctx context . Context , c tailcfg . ControlIPCandidate ) {
var (
2022-10-17 21:50:52 +00:00
conn * ClientConn
2022-09-23 17:06:55 +00:00
err error
)
// Always send results back to our channel.
defer func ( ) {
resultsCh <- dialResult { conn , err , c . IP , c . Priority }
if pending . Add ( - 1 ) == 0 {
close ( resultsCh )
}
} ( )
// If non-zero, wait the configured start timeout
// before we do anything.
if c . DialStartDelaySec > 0 {
a . logf ( "[v2] controlhttp: waiting %.2f seconds before dialing %q @ %v" , c . DialStartDelaySec , a . Hostname , c . IP )
2024-10-01 16:13:57 +00:00
tmr , tmrChannel := a . clock ( ) . NewTimer ( time . Duration ( c . DialStartDelaySec * float64 ( time . Second ) ) )
2022-09-23 17:06:55 +00:00
defer tmr . Stop ( )
select {
case <- ctx . Done ( ) :
err = ctx . Err ( )
return
2023-08-04 23:29:44 +00:00
case <- tmrChannel :
2022-09-23 17:06:55 +00:00
}
}
// Now, create a sub-context with the given timeout and
// try dialing the provided host.
ctx , cancel := context . WithTimeout ( ctx , time . Duration ( c . DialTimeoutSec * float64 ( time . Second ) ) )
defer cancel ( )
// This will dial, and the defer above sends it back to our parent.
a . logf ( "[v2] controlhttp: trying to dial %q @ %v" , a . Hostname , c . IP )
conn , err = a . dialHost ( ctx , c . IP )
} ( ctx , c )
}
var results [ ] dialResult
for res := range resultsCh {
// If we get a response that has the highest priority, we don't
// need to wait for any of the other connections to finish; we
// can just return this connection.
//
// TODO(andrew): we could make this better by keeping track of
// the highest remaining priority dynamically, instead of just
// checking for the highest total
if res . priority == highestPriority && res . conn != nil {
a . logf ( "[v1] controlhttp: high-priority success dialing %q @ %v from dial plan" , a . Hostname , res . addr )
// Drain the channel and any existing connections in
// the background.
go func ( ) {
for _ , res := range results {
if res . conn != nil {
res . conn . Close ( )
}
}
for res := range resultsCh {
if res . conn != nil {
res . conn . Close ( )
}
}
if a . drainFinished != nil {
close ( a . drainFinished )
}
} ( )
return res . conn , nil
}
// This isn't a highest-priority result, so just store it until
// we're done.
results = append ( results , res )
}
// After we finish this function, close any remaining open connections.
defer func ( ) {
for _ , result := range results {
// Note: below, we nil out the returned connection (if
// any) in the slice so we don't close it.
if result . conn != nil {
result . conn . Close ( )
}
}
// We don't drain asynchronously after this point, so notify our
// channel when we return.
if a . drainFinished != nil {
close ( a . drainFinished )
}
} ( )
// Sort by priority, then take the first non-error response.
sort . Slice ( results , func ( i , j int ) bool {
// NOTE: intentionally inverted so that the highest priority
// item comes first
return results [ i ] . priority > results [ j ] . priority
} )
var (
2022-10-17 21:50:52 +00:00
conn * ClientConn
2022-09-23 17:06:55 +00:00
errs [ ] error
)
for i , result := range results {
if result . err != nil {
errs = append ( errs , result . err )
continue
}
a . logf ( "[v1] controlhttp: succeeded dialing %q @ %v from dial plan" , a . Hostname , result . addr )
conn = result . conn
results [ i ] . conn = nil // so we don't close it in the defer
return conn , nil
}
merr := multierr . New ( errs ... )
// If we get here, then we didn't get anywhere with our dial plan; fall back to just using DNS.
a . logf ( "controlhttp: failed dialing using DialPlan, falling back to DNS; errs=%s" , merr . Error ( ) )
return a . dialHost ( ctx , netip . Addr { } )
}
2023-01-17 19:13:17 +00:00
// The TS_FORCE_NOISE_443 envknob forces the controlclient noise dialer to
// always use port 443 HTTPS connections to the controlplane and not try the
// port 80 HTTP fast path.
//
// This is currently (2023-01-17) needed for Docker Desktop's "VPNKit" proxy
// that breaks port 80 for us post-Noise-handshake, causing us to never try port
// 443. Until one of Docker's proxy and/or this package's port 443 fallback is
// fixed, this is a workaround. It might also be useful for future debugging.
var forceNoise443 = envknob . RegisterBool ( "TS_FORCE_NOISE_443" )
2024-10-01 16:13:57 +00:00
// forceNoise443 reports whether the controlclient noise dialer should always
// use HTTPS connections as its underlay connection (double crypto). This can
// be necessary when networks or middle boxes are messing with port 80.
func ( d * Dialer ) forceNoise443 ( ) bool {
if forceNoise443 ( ) {
return true
}
2024-09-26 17:24:08 +00:00
if d . HealthTracker . LastNoiseDialWasRecent ( ) {
// If we dialed recently, assume there was a recent failure and fall
// back to HTTPS dials for the subsequent retries.
//
// This heuristic works around networks where port 80 is MITMed and
// appears to work for a bit post-Upgrade but then gets closed,
// such as seen in https://github.com/tailscale/tailscale/issues/13597.
d . logf ( "controlhttp: forcing port 443 dial due to recent noise dial" )
return true
}
2024-10-01 16:13:57 +00:00
return false
}
func ( d * Dialer ) clock ( ) tstime . Clock {
if d . Clock != nil {
return d . Clock
}
return tstime . StdClock { }
}
2023-01-17 19:13:17 +00:00
var debugNoiseDial = envknob . RegisterBool ( "TS_DEBUG_NOISE_DIAL" )
2022-09-23 17:06:55 +00:00
// dialHost connects to the configured Dialer.Hostname and upgrades the
2024-10-02 03:07:58 +00:00
// connection into a controlbase.Conn.
//
// If optAddr is valid, then no DNS is used and the connection will be made to the
// provided address.
func ( a * Dialer ) dialHost ( ctx context . Context , optAddr netip . Addr ) ( * ClientConn , error ) {
2022-04-27 15:19:45 +00:00
// Create one shared context used by both port 80 and port 443 dials.
// If port 80 is still in flight when 443 returns, this deferred cancel
// will stop the port 80 dial.
2022-04-28 15:10:26 +00:00
ctx , cancel := context . WithCancel ( ctx )
2022-04-27 15:19:45 +00:00
defer cancel ( )
2023-04-13 01:23:22 +00:00
ctx = sockstats . WithSockStats ( ctx , sockstats . LabelControlClientDialer , a . logf )
2023-02-03 20:07:58 +00:00
2022-04-27 15:19:45 +00:00
// u80 and u443 are the URLs we'll try to hit over HTTP or HTTPS,
// respectively, in order to do the HTTP upgrade to a net.Conn over which
// we'll speak Noise.
u80 := & url . URL {
2022-01-17 23:32:20 +00:00
Scheme : "http" ,
2022-09-16 19:06:25 +00:00
Host : net . JoinHostPort ( a . Hostname , strDef ( a . HTTPPort , "80" ) ) ,
2022-02-26 17:19:18 +00:00
Path : serverUpgradePath ,
2022-01-17 23:32:20 +00:00
}
2022-04-27 15:19:45 +00:00
u443 := & url . URL {
Scheme : "https" ,
2022-09-16 19:06:25 +00:00
Host : net . JoinHostPort ( a . Hostname , strDef ( a . HTTPSPort , "443" ) ) ,
2022-04-27 15:19:45 +00:00
Path : serverUpgradePath ,
2022-01-17 23:32:20 +00:00
}
2024-10-02 03:08:55 +00:00
if a . HTTPSPort == NoPort {
u443 = nil
}
2022-04-28 15:10:26 +00:00
2022-04-27 15:19:45 +00:00
type tryURLRes struct {
2022-10-17 21:50:52 +00:00
u * url . URL // input (the URL conn+err are for/from)
conn * ClientConn // result (mutually exclusive with err)
2022-04-27 15:19:45 +00:00
err error
2022-01-17 23:32:20 +00:00
}
2022-04-27 15:19:45 +00:00
ch := make ( chan tryURLRes ) // must be unbuffered
try := func ( u * url . URL ) {
2023-01-17 19:13:17 +00:00
if debugNoiseDial ( ) {
2024-10-02 03:07:58 +00:00
a . logf ( "trying noise dial (%v, %v) ..." , u , optAddr )
2023-01-17 19:13:17 +00:00
}
2024-10-02 03:07:58 +00:00
cbConn , err := a . dialURL ( ctx , u , optAddr )
2023-01-17 19:13:17 +00:00
if debugNoiseDial ( ) {
2024-10-02 03:07:58 +00:00
a . logf ( "noise dial (%v, %v) = (%v, %v)" , u , optAddr , cbConn , err )
2023-01-17 19:13:17 +00:00
}
2022-04-27 15:19:45 +00:00
select {
2022-04-28 15:10:26 +00:00
case ch <- tryURLRes { u , cbConn , err } :
2022-04-27 15:19:45 +00:00
case <- ctx . Done ( ) :
2022-04-28 15:10:26 +00:00
if cbConn != nil {
cbConn . Close ( )
2022-04-27 15:19:45 +00:00
}
2022-01-17 23:32:20 +00:00
}
}
2024-10-02 03:08:55 +00:00
forceTLS := a . forceNoise443 ( )
2023-01-17 19:13:17 +00:00
// Start the plaintext HTTP attempt first, unless disabled by the envknob.
2024-10-02 03:08:55 +00:00
if ! forceTLS || u443 == nil {
2023-01-17 19:13:17 +00:00
go try ( u80 )
}
2022-04-27 15:19:45 +00:00
// In case outbound port 80 blocked or MITM'ed poorly, start a backup timer
// to dial port 443 if port 80 doesn't either succeed or fail quickly.
2024-10-02 03:08:55 +00:00
var try443Timer tstime . TimerController
if u443 != nil {
delay := a . httpsFallbackDelay ( )
if forceTLS {
delay = 0
}
try443Timer = a . clock ( ) . AfterFunc ( delay , func ( ) { try ( u443 ) } )
defer try443Timer . Stop ( )
}
2022-04-27 15:19:45 +00:00
var err80 , err443 error
for {
select {
case <- ctx . Done ( ) :
return nil , fmt . Errorf ( "connection attempts aborted by context: %w" , ctx . Err ( ) )
case res := <- ch :
if res . err == nil {
2022-04-28 15:10:26 +00:00
return res . conn , nil
2022-04-27 15:19:45 +00:00
}
switch res . u {
case u80 :
// Connecting over plain HTTP failed; assume it's an HTTP proxy
// being difficult and see if we can get through over HTTPS.
err80 = res . err
// Stop the fallback timer and run it immediately. We don't use
// Timer.Reset(0) here because on AfterFuncs, that can run it
// again.
2024-10-02 03:08:55 +00:00
if try443Timer != nil && try443Timer . Stop ( ) {
2022-04-27 15:19:45 +00:00
go try ( u443 )
} // else we lost the race and it started already which is what we want
case u443 :
err443 = res . err
default :
panic ( "invalid" )
}
if err80 != nil && err443 != nil {
return nil , fmt . Errorf ( "all connection attempts failed (HTTP: %v, HTTPS: %v)" , err80 , err443 )
}
}
}
2022-01-17 23:32:20 +00:00
}
2022-04-28 15:10:26 +00:00
// dialURL attempts to connect to the given URL.
2024-10-02 03:07:58 +00:00
//
// If optAddr is valid, then no DNS is used and the connection will be made to the
// provided address.
func ( a * Dialer ) dialURL ( ctx context . Context , u * url . URL , optAddr netip . Addr ) ( * ClientConn , error ) {
2022-09-16 19:06:25 +00:00
init , cont , err := controlbase . ClientDeferred ( a . MachineKey , a . ControlKey , a . ProtocolVersion )
2022-04-28 15:10:26 +00:00
if err != nil {
return nil , err
}
2024-10-02 03:07:58 +00:00
netConn , err := a . tryURLUpgrade ( ctx , u , optAddr , init )
2022-04-28 15:10:26 +00:00
if err != nil {
return nil , err
}
cbConn , err := cont ( ctx , netConn )
if err != nil {
netConn . Close ( )
return nil , err
}
2022-10-17 21:50:52 +00:00
return & ClientConn {
2022-10-28 19:14:58 +00:00
Conn : cbConn ,
2022-10-17 21:50:52 +00:00
} , nil
2022-04-28 15:10:26 +00:00
}
2023-04-30 03:25:37 +00:00
// resolver returns a.DNSCache if non-nil or a new *dnscache.Resolver
// otherwise.
func ( a * Dialer ) resolver ( ) * dnscache . Resolver {
if a . DNSCache != nil {
return a . DNSCache
}
return & dnscache . Resolver {
Forward : dnscache . Get ( ) . Forward ,
LookupIPFallback : dnsfallback . MakeLookupFunc ( a . logf , a . NetMon ) ,
UseLastGood : true ,
Logf : a . Logf , // not a.logf method; we want to propagate nil-ness
}
}
2024-06-19 20:22:14 +00:00
func isLoopback ( a net . Addr ) bool {
if ta , ok := a . ( * net . TCPAddr ) ; ok {
return ta . IP . IsLoopback ( )
}
return false
}
var macOSScreenTime = health . Register ( & health . Warnable {
2024-06-20 18:09:50 +00:00
Code : "macos-screen-time" ,
Severity : health . SeverityHigh ,
Title : "Tailscale blocked by Screen Time" ,
2024-06-19 20:22:14 +00:00
Text : func ( args health . Args ) string {
return "macOS Screen Time seems to be blocking Tailscale. Try disabling Screen Time in System Settings > Screen Time > Content & Privacy > Access to Web Content."
} ,
ImpactsConnectivity : true ,
} )
2024-10-02 03:07:58 +00:00
// tryURLUpgrade connects to u, and tries to upgrade it to a net.Conn.
//
// If optAddr is valid, then no DNS is used and the connection will be made to
// the provided address.
2022-04-27 15:19:45 +00:00
//
// Only the provided ctx is used, not a.ctx.
2024-10-02 03:07:58 +00:00
func ( a * Dialer ) tryURLUpgrade ( ctx context . Context , u * url . URL , optAddr netip . Addr , init [ ] byte ) ( _ net . Conn , retErr error ) {
2022-09-23 17:06:55 +00:00
var dns * dnscache . Resolver
// If we were provided an address to dial, then create a resolver that just
// returns that value; otherwise, fall back to DNS.
2024-10-02 03:07:58 +00:00
if optAddr . IsValid ( ) {
2022-09-23 17:06:55 +00:00
dns = & dnscache . Resolver {
2024-10-02 03:07:58 +00:00
SingleHostStaticResult : [ ] netip . Addr { optAddr } ,
2022-09-23 17:06:55 +00:00
SingleHost : u . Hostname ( ) ,
2023-03-12 14:58:11 +00:00
Logf : a . Logf , // not a.logf method; we want to propagate nil-ness
2022-09-23 17:06:55 +00:00
}
} else {
2023-04-30 03:25:37 +00:00
dns = a . resolver ( )
2022-01-17 23:32:20 +00:00
}
2022-09-16 19:06:25 +00:00
var dialer dnscache . DialContextFunc
if a . Dialer != nil {
dialer = a . Dialer
} else {
dialer = stdDialer . DialContext
}
2024-06-19 20:22:14 +00:00
// On macOS, see if Screen Time is blocking things.
if runtime . GOOS == "darwin" {
var proxydIntercepted atomic . Bool // intercepted by macOS webfilterproxyd
origDialer := dialer
dialer = func ( ctx context . Context , network , address string ) ( net . Conn , error ) {
c , err := origDialer ( ctx , network , address )
if err != nil {
return nil , err
}
if isLoopback ( c . LocalAddr ( ) ) && isLoopback ( c . RemoteAddr ( ) ) {
proxydIntercepted . Store ( true )
}
return c , nil
}
defer func ( ) {
if retErr != nil && proxydIntercepted . Load ( ) {
a . HealthTracker . SetUnhealthy ( macOSScreenTime , nil )
retErr = fmt . Errorf ( "macOS Screen Time is blocking network access: %w" , retErr )
} else {
a . HealthTracker . SetHealthy ( macOSScreenTime )
}
} ( )
}
2022-01-17 23:32:20 +00:00
tr := http . DefaultTransport . ( * http . Transport ) . Clone ( )
defer tr . CloseIdleConnections ( )
2022-09-16 19:06:25 +00:00
tr . Proxy = a . getProxyFunc ( )
2022-01-17 23:32:20 +00:00
tshttpproxy . SetTransportGetProxyConnectHeader ( tr )
2022-09-16 19:06:25 +00:00
tr . DialContext = dnscache . Dialer ( dialer , dns )
2022-01-17 23:32:20 +00:00
// Disable HTTP2, since h2 can't do protocol switching.
tr . TLSClientConfig . NextProtos = [ ] string { }
tr . TLSNextProto = map [ string ] func ( string , * tls . Conn ) http . RoundTripper { }
2024-04-26 17:12:46 +00:00
tr . TLSClientConfig = tlsdial . Config ( a . Hostname , a . HealthTracker , tr . TLSClientConfig )
2023-02-08 18:24:30 +00:00
if ! tr . TLSClientConfig . InsecureSkipVerify {
panic ( "unexpected" ) // should be set by tlsdial.Config
2022-01-17 23:32:20 +00:00
}
2023-02-08 18:24:30 +00:00
verify := tr . TLSClientConfig . VerifyConnection
if verify == nil {
panic ( "unexpected" ) // should be set by tlsdial.Config
}
// Demote all cert verification errors to log messages. We don't actually
// care about the TLS security (because we just do the Noise crypto atop whatever
// connection we get, including HTTP port 80 plaintext) so this permits
// middleboxes to MITM their users. All they'll see is some Noise.
tr . TLSClientConfig . VerifyConnection = func ( cs tls . ConnectionState ) error {
if err := verify ( cs ) ; err != nil && a . Logf != nil && ! a . omitCertErrorLogging {
a . Logf ( "warning: TLS cert verificication for %q failed: %v" , a . Hostname , err )
}
return nil // regardless
}
2022-09-16 19:06:25 +00:00
tr . DialTLSContext = dnscache . TLSDialer ( dialer , dns , tr . TLSClientConfig )
2022-01-17 23:32:20 +00:00
tr . DisableCompression = true
// (mis)use httptrace to extract the underlying net.Conn from the
2024-08-03 12:37:01 +00:00
// transport. The transport handles 101 Switching Protocols correctly,
// such that the Conn will not be reused or kept alive by the transport
// once the response has been handed back from RoundTrip.
2022-01-17 23:32:20 +00:00
//
// In theory, the machinery of net/http should make it such that
// the trace callback happens-before we get the response, but
// there's no promise of that. So, to make sure, we use a buffered
// channel as a synchronization step to avoid data races.
//
// Note that even though we're able to extract a net.Conn via this
// mechanism, we must still keep using the eventual resp.Body to
// read from, because it includes a buffer we can't get rid of. If
// the server never sends any data after sending the HTTP
// response, we could get away with it, but violating this
// assumption leads to very mysterious transport errors (lockups,
// unexpected EOFs...), and we're bound to forget someday and
// introduce a protocol optimization at a higher level that starts
// eagerly transmitting from the server.
2024-08-03 12:37:01 +00:00
var lastConn syncs . AtomicValue [ net . Conn ]
2022-01-17 23:32:20 +00:00
trace := httptrace . ClientTrace {
2024-08-03 12:37:01 +00:00
// Even though we only make a single HTTP request which should
// require a single connection, the context (with the attached
// trace configuration) might be used by our custom dialer to
// make other HTTP requests (e.g. BootstrapDNS). We only care
// about the last connection made, which should be the one to
// the control server.
2022-01-17 23:32:20 +00:00
GotConn : func ( info httptrace . GotConnInfo ) {
2024-08-03 12:37:01 +00:00
lastConn . Store ( info . Conn )
2022-01-17 23:32:20 +00:00
} ,
}
2022-04-27 15:19:45 +00:00
ctx = httptrace . WithClientTrace ( ctx , & trace )
2022-01-17 23:32:20 +00:00
req := & http . Request {
Method : "POST" ,
URL : u ,
Header : http . Header {
2024-11-07 23:59:19 +00:00
"Upgrade" : [ ] string { controlhttpcommon . UpgradeHeaderValue } ,
"Connection" : [ ] string { "upgrade" } ,
controlhttpcommon . HandshakeHeaderName : [ ] string { base64 . StdEncoding . EncodeToString ( init ) } ,
2022-01-17 23:32:20 +00:00
} ,
}
req = req . WithContext ( ctx )
resp , err := tr . RoundTrip ( req )
if err != nil {
2022-10-28 19:14:58 +00:00
return nil , err
2022-01-17 23:32:20 +00:00
}
if resp . StatusCode != http . StatusSwitchingProtocols {
2022-10-28 19:14:58 +00:00
return nil , fmt . Errorf ( "unexpected HTTP response: %s" , resp . Status )
2022-01-17 23:32:20 +00:00
}
// From here on, the underlying net.Conn is ours to use, but there
// is still a read buffer attached to it within resp.Body. So, we
// must direct I/O through resp.Body, but we can still use the
// underlying net.Conn for stuff like deadlines.
2024-08-03 12:37:01 +00:00
switchedConn := lastConn . Load ( )
2022-01-17 23:32:20 +00:00
if switchedConn == nil {
resp . Body . Close ( )
2022-10-28 19:14:58 +00:00
return nil , fmt . Errorf ( "httptrace didn't provide a connection" )
2022-01-17 23:32:20 +00:00
}
2024-11-07 23:59:19 +00:00
if next := resp . Header . Get ( "Upgrade" ) ; next != controlhttpcommon . UpgradeHeaderValue {
2022-01-17 23:32:20 +00:00
resp . Body . Close ( )
2022-10-28 19:14:58 +00:00
return nil , fmt . Errorf ( "server switched to unexpected protocol %q" , next )
2022-01-17 23:32:20 +00:00
}
rwc , ok := resp . Body . ( io . ReadWriteCloser )
if ! ok {
resp . Body . Close ( )
2022-10-28 19:14:58 +00:00
return nil , errors . New ( "http Transport did not provide a writable body" )
2022-01-17 23:32:20 +00:00
}
2022-10-28 19:14:58 +00:00
return netutil . NewAltReadWriteCloserConn ( rwc , switchedConn ) , nil
2022-01-17 23:32:20 +00:00
}