feat: await initial database connection (#10869)

# Which Problems Are Solved

When Postgres was not ready when the API was started, the API failed
immediately.
This made task orchestration hard, especially in a platform agnostic
way:

- The current health check in the Nx target `@zitadel/api:prod` uses the
timeout command, which is not installed on all platforms and behaves
unpredictably
- The current health check in the Nx target `@zitadel/api:prod` requires
the DB to have been started using `@zitadel/zitadel:db`

# How the Problems Are Solved

- Additional configuration option `Database.Postgres.AwaitInitialConn`
is added and defaults to *0m* for backwards compatibility.
- If a duration is configured, the API retries to ping the database
until it succeeds
- The API sleeps for a second between each ping.
- It emits an info-level log with the error on each try.
- When the configured duration times out before the ping is successful,
the error is returned and the command exits with a failure code.
- When the ping succeeds within the configured duration, the API goes on
with the init, setup or start phase.

# Additional Context

- Relates to internally reported problems with the current DB health
check command
[here](https://zitadel.slack.com/archives/C07EUL5H83A/p1759915009839269?thread_ts=1759912259.410789&cid=C07EUL5H83A)
and
[here](https://zitadel.slack.com/archives/C07EUL5H83A/p1759918324246249?thread_ts=1759912259.410789&cid=C07EUL5H83A).

(cherry picked from commit 7ba6870baf)
This commit is contained in:
Elio Bischof
2025-10-09 13:18:34 +02:00
committed by Livio Spring
parent d45d19f575
commit 4f313093f9
7 changed files with 27 additions and 12 deletions

View File

@@ -2,6 +2,7 @@ ExternalSecure: false
TLS.Enabled: false
Database.Postgres:
Database: zitadel
AwaitInitialConn: 5m
MaxOpenConns: 20
MaxIdleConns: 20
ConnMaxLifetime: 60m

View File

@@ -33,7 +33,6 @@
"options": {
"parallel": false,
"commands": [
"timeout 300 bash -c 'until nx run @zitadel/devcontainer:compose exec ${API_AWAIT_DB_SERVICE} pg_isready -U postgres -h localhost; do echo \"Awaiting DB\"; sleep 2; done' || (echo \"Database readiness check timed out after 5 minutes\" && exit 1)",
"./.artifacts/bin/$(go env GOOS)/$(go env GOARCH)/${ZITADEL_BINARY:-zitadel.local} start-from-init --config ${API_CONFIG_FILE} --steps ${API_CONFIG_FILE} --masterkey MasterkeyNeedsToHave32Characters"
]
},

View File

@@ -7,6 +7,7 @@ Database:
postgres:
# This makes the e2e config reusable with an out-of-docker zitadel process and an /etc/hosts entry
database: zitadel
AwaitInitialConn: 5m
MaxOpenConns: 15
MaxIdleConns: 10
Database: zitadel

View File

@@ -1,5 +1,6 @@
Database:
Postgres:
AwaitInitialConn: 5m
MaxOpenConns: 20
MaxIdleConns: 20
MaxConnLifetime: 1h

View File

@@ -118,6 +118,7 @@ Database:
Host: localhost # ZITADEL_DATABASE_POSTGRES_HOST
Port: 5432 # ZITADEL_DATABASE_POSTGRES_PORT
Database: zitadel # ZITADEL_DATABASE_POSTGRES_DATABASE
AwaitInitialConn: 0m # ZITADEL_DATABASE_POSTGRES_AWAITINITIALCONN
MaxOpenConns: 10 # ZITADEL_DATABASE_POSTGRES_MAXOPENCONNS
MaxIdleConns: 5 # ZITADEL_DATABASE_POSTGRES_MAXIDLECONNS
MaxConnLifetime: 30m # ZITADEL_DATABASE_POSTGRES_MAXCONNLIFETIME

View File

@@ -13,6 +13,7 @@ Database:
Host: localhost
Port: 5432
Database: zitadel
AwaitInitialConn: 5m
MaxOpenConns: 15
MaxIdleConns: 10
MaxConnLifetime: 1h

View File

@@ -32,6 +32,7 @@ type Config struct {
Host string
Port int32
Database string
AwaitInitialConn time.Duration
MaxOpenConns uint32
MaxIdleConns uint32
MaxConnLifetime time.Duration
@@ -127,8 +128,18 @@ func (c *Config) Connect(useAdmin bool) (*sql.DB, *pgxpool.Pool, error) {
if err != nil {
return nil, nil, err
}
if err := pool.Ping(context.Background()); err != nil {
if err = pool.Ping(context.Background()); err != nil && c.AwaitInitialConn > 0 {
waitUntil := time.Now().Add(c.AwaitInitialConn)
for time.Now().Before(waitUntil) {
logging.Infof("retrying initial database connection in a second: %v", err)
time.Sleep(time.Second)
if err = pool.Ping(context.Background()); err == nil {
break
}
}
}
if err != nil {
pool.Close()
return nil, nil, err
}