fix: introduce measures to avoid bots crawling and indexing activities (#5728)

* fix: 404 for robots.txt and meta robots tags

* fix: add unit tests for robots txt and tag

* fix: add meta tag robots none for login pages

* fix: weird format issue in header.go

* fix: add x-robots-tag=none to grpcwebserver

* fix linting

---------

Co-authored-by: Silvan <silvan.reusser@gmail.com>
Co-authored-by: Livio Spring <livio.a@gmail.com>
This commit is contained in:
Miguel Cabrerizo 2023-05-05 10:25:02 +02:00 committed by GitHub
parent d224172a31
commit 3ca7147808
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 99 additions and 0 deletions

View File

@ -39,6 +39,7 @@ import (
http_util "github.com/zitadel/zitadel/internal/api/http"
"github.com/zitadel/zitadel/internal/api/http/middleware"
"github.com/zitadel/zitadel/internal/api/oidc"
"github.com/zitadel/zitadel/internal/api/robots_txt"
"github.com/zitadel/zitadel/internal/api/saml"
"github.com/zitadel/zitadel/internal/api/ui/console"
"github.com/zitadel/zitadel/internal/api/ui/login"
@ -305,6 +306,13 @@ func startAPIs(
return err
}
// robots.txt handler
robotsTxtHandler, err := robots_txt.Start()
if err != nil {
return fmt.Errorf("unable to start robots txt handler: %w", err)
}
apis.RegisterHandlerOnPrefix(robots_txt.HandlerPrefix, robotsTxtHandler)
// TODO: Record openapi access logs?
openAPIHandler, err := openapi.Start()
if err != nil {

View File

@ -21,6 +21,7 @@
<meta name="twitter:title" content="ZITADEL Console" />
<meta name="twitter:description" content="Management Platform for ZITADEL IAM" />
<meta name="twitter:image" content="https://www.zitadel.com/images/preview.png" />
<meta name="robots" content="none" />
</head>
<body>

View File

@ -16,6 +16,7 @@ import (
internal_authz "github.com/zitadel/zitadel/internal/api/authz"
"github.com/zitadel/zitadel/internal/api/grpc/server"
http_util "github.com/zitadel/zitadel/internal/api/http"
http_mw "github.com/zitadel/zitadel/internal/api/http/middleware"
"github.com/zitadel/zitadel/internal/api/ui/login"
"github.com/zitadel/zitadel/internal/errors"
"github.com/zitadel/zitadel/internal/logstore"
@ -167,6 +168,7 @@ func (a *API) routeGRPCWeb() {
return true
}),
)
a.router.Use(http_mw.RobotsTagHandler)
a.router.NewRoute().
Methods(http.MethodPost, http.MethodOptions).
MatcherFunc(

View File

@ -149,6 +149,7 @@ func addInterceptors(handler http.Handler, http1HostName string) http.Handler {
handler = http_mw.CallDurationHandler(handler)
handler = http1Host(handler, http1HostName)
handler = http_mw.CORSInterceptor(handler)
handler = http_mw.RobotsTagHandler(handler)
handler = http_mw.DefaultTelemetryHandler(handler)
return http_mw.DefaultMetricsHandler(handler)
}

View File

@ -23,6 +23,7 @@ const (
XUserAgent = "x-user-agent"
XGrpcWeb = "x-grpc-web"
XRequestedWith = "x-requested-with"
XRobotsTag = "x-robots-tag"
IfNoneMatch = "If-None-Match"
LastModified = "Last-Modified"
Etag = "Etag"

View File

@ -0,0 +1,14 @@
package middleware
import (
"net/http"
http_utils "github.com/zitadel/zitadel/internal/api/http"
)
func RobotsTagHandler(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set(http_utils.XRobotsTag, "none")
next.ServeHTTP(w, r)
})
}

View File

@ -0,0 +1,24 @@
package middleware
import (
"net/http"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/assert"
)
func Test_RobotsTagInterceptor(t *testing.T) {
testHandler := func(w http.ResponseWriter, r *http.Request) {}
req := httptest.NewRequest(http.MethodGet, "/", nil)
recorder := httptest.NewRecorder()
handler := RobotsTagHandler(http.HandlerFunc(testHandler))
handler.ServeHTTP(recorder, req)
res := recorder.Result()
exp := res.Header.Get("X-Robots-Tag")
assert.Equal(t, "none", exp)
defer res.Body.Close()
}

View File

@ -0,0 +1,19 @@
package robots_txt
import (
"fmt"
"net/http"
)
const (
HandlerPrefix = "/robots.txt"
)
func Start() (http.Handler, error) {
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
w.Header().Set("Content-Type", "application/text")
fmt.Fprintf(w, "User-agent: *\nDisallow: /\n")
})
return handler, nil
}

View File

@ -0,0 +1,28 @@
package robots_txt
import (
"io"
"net/http"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/assert"
)
func Test_RobotsTxt(t *testing.T) {
req := httptest.NewRequest(http.MethodGet, "/robots.txt", nil)
recorder := httptest.NewRecorder()
handler, err := Start()
handler.ServeHTTP(recorder, req)
assert.Equal(t, nil, err)
res := recorder.Result()
body, err := io.ReadAll(res.Body)
assert.Equal(t, nil, err)
assert.Equal(t, 200, res.StatusCode)
assert.Equal(t, "User-agent: *\nDisallow: /\n", string(body))
defer res.Body.Close()
}

View File

@ -23,6 +23,7 @@
<title>{{ .Title }}</title>
<meta name="description" content="{{ .Description }}"/>
<meta name="robots" content="none" />
<script src="{{ resourceUrl "scripts/theme.js" }}"></script>
</head>