health, net/tlsdial: add healthcheck for self-signed cert

When we make a connection to a server, we previously would verify with
the system roots, and then fall back to verifying with our baked-in
Let's Encrypt root if the system root cert verification failed.

We now explicitly check for, and log a health error on, self-signed
certificates. Additionally, we now always verify against our baked-in
Let's Encrypt root certificate and log an error if that isn't
successful. We don't consider this a health failure, since if we ever
change our server certificate issuer in the future older non-updated
versions of Tailscale will no longer be healthy despite being able to
connect.

Updates #3198

Change-Id: I00be5ceb8afee544ee795e3c7a2815476abc4abf
Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
This commit is contained in:
Andrew Dunham
2023-02-01 14:29:44 -05:00
parent 7393ce5e4f
commit 2755f3843c
4 changed files with 65 additions and 11 deletions

View File

@@ -11,9 +11,11 @@
package tlsdial
import (
"bytes"
"crypto/tls"
"crypto/x509"
"errors"
"fmt"
"log"
"os"
"sync"
@@ -21,6 +23,7 @@ import (
"time"
"tailscale.com/envknob"
"tailscale.com/health"
)
var counterFallbackOK int32 // atomic
@@ -33,6 +36,11 @@ var sslKeyLogFile = os.Getenv("SSLKEYLOGFILE")
var debug = envknob.RegisterBool("TS_DEBUG_TLS_DIAL")
// tlsdialWarningPrinted tracks whether we've printed a warning about a given
// hostname already, to avoid log spam for users with custom DERP servers,
// Headscale, etc.
var tlsdialWarningPrinted sync.Map // map[string]bool
// Config returns a tls.Config for connecting to a server.
// If base is non-nil, it's cloned as the base config before
// being configured and returned.
@@ -66,6 +74,16 @@ func Config(host string, base *tls.Config) *tls.Config {
// (with the baked-in fallback root) in the VerifyConnection hook.
conf.InsecureSkipVerify = true
conf.VerifyConnection = func(cs tls.ConnectionState) error {
// Perform some health checks on this certificate before we do
// any verification.
if certIsSelfSigned(cs.PeerCertificates[0]) {
// Self-signed certs are never valid.
health.SetTLSConnectionError(cs.ServerName, fmt.Errorf("certificate is self-signed"))
} else {
// Ensure we clear any error state for this ServerName.
health.SetTLSConnectionError(cs.ServerName, nil)
}
// First try doing x509 verification with the system's
// root CA pool.
opts := x509.VerifyOptions{
@@ -79,18 +97,27 @@ func Config(host string, base *tls.Config) *tls.Config {
if debug() {
log.Printf("tlsdial(sys %q): %v", host, errSys)
}
if errSys == nil {
return nil
// Always verify with our baked-in Let's Encrypt certificate,
// so we can log an informational message. This is useful for
// detecting SSL MiTM.
opts.Roots = bakedInRoots()
_, bakedErr := cs.PeerCertificates[0].Verify(opts)
if debug() {
log.Printf("tlsdial(bake %q): %v", host, bakedErr)
} else if bakedErr != nil {
if _, loaded := tlsdialWarningPrinted.LoadOrStore(host, true); !loaded {
if errSys == nil {
log.Printf("tlsdial: warning: server cert for %q is not a Let's Encrypt cert", host)
} else {
log.Printf("tlsdial: error: server cert for %q failed to verify and is not a Let's Encrypt cert", host)
}
}
}
// If that failed, because the system's CA roots are old
// or broken, fall back to trying LetsEncrypt at least.
opts.Roots = bakedInRoots()
_, err := cs.PeerCertificates[0].Verify(opts)
if debug() {
log.Printf("tlsdial(bake %q): %v", host, err)
}
if err == nil {
if errSys == nil {
return nil
} else if bakedErr == nil {
atomic.AddInt32(&counterFallbackOK, 1)
return nil
}
@@ -99,6 +126,12 @@ func Config(host string, base *tls.Config) *tls.Config {
return conf
}
func certIsSelfSigned(cert *x509.Certificate) bool {
// A certificate is determined to be self-signed if the certificate's
// subject is the same as its issuer.
return bytes.Equal(cert.RawSubject, cert.RawIssuer)
}
// SetConfigExpectedCert modifies c to expect and verify that the server returns
// a certificate for the provided certDNSName.
//