fix(permissions): chunked synchronization of role permission events (#9403)

# Which Problems Are Solved

Setup fails to push all role permission events when running Zitadel with
CockroachDB. `TransactionRetryError`s were visible in logs which finally
times out the setup job with `timeout: context deadline exceeded`

# How the Problems Are Solved

As suggested in the [Cockroach documentation](timeout: context deadline
exceeded), _"break down larger transactions"_. The commands to be pushed
for the role permissions are chunked in 50 events per push. This
chunking is only done with CockroachDB.

# Additional Changes

- gci run fixed some unrelated imports
- access to `command.Commands` for the setup job, so we can reuse the
sync logic.

# Additional Context

Closes #9293

---------

Co-authored-by: Silvan <27845747+adlerhurst@users.noreply.github.com>
This commit is contained in:
Tim Möhlmann
2025-02-26 18:06:50 +02:00
committed by GitHub
parent 77499ce603
commit e670b9126c
13 changed files with 461 additions and 169 deletions

View File

@@ -180,33 +180,6 @@ func Setup(ctx context.Context, config *Config, steps *Steps, masterKey string)
err = projection.Create(ctx, dbClient, eventstoreClient, config.Projections, nil, nil, nil)
logging.OnError(err).Fatal("unable to start projections")
repeatableSteps := []migration.RepeatableMigration{
&externalConfigChange{
es: eventstoreClient,
ExternalDomain: config.ExternalDomain,
ExternalPort: config.ExternalPort,
ExternalSecure: config.ExternalSecure,
defaults: config.SystemDefaults,
},
&projectionTables{
es: eventstoreClient,
Version: build.Version(),
},
&DeleteStaleOrgFields{
eventstore: eventstoreClient,
},
&FillFieldsForInstanceDomains{
eventstore: eventstoreClient,
},
&SyncRolePermissions{
eventstore: eventstoreClient,
rolePermissionMappings: config.InternalAuthZ.RolePermissionMappings,
},
&RiverMigrateRepeatable{
client: dbClient,
},
}
for _, step := range []migration.Migration{
steps.s14NewEventsTable,
steps.s40InitPushFunc,
@@ -214,6 +187,7 @@ func Setup(ctx context.Context, config *Config, steps *Steps, masterKey string)
steps.s2AssetsTable,
steps.s28AddFieldTable,
steps.s31AddAggregateIndexToFields,
steps.s46InitPermissionFunctions,
steps.FirstInstance,
steps.s5LastFailed,
steps.s6OwnerRemoveColumns,
@@ -238,7 +212,6 @@ func Setup(ctx context.Context, config *Config, steps *Steps, masterKey string)
steps.s38BackChannelLogoutNotificationStart,
steps.s44ReplaceCurrentSequencesIndex,
steps.s45CorrectProjectOwners,
steps.s46InitPermissionFunctions,
steps.s47FillMembershipFields,
steps.s49InitPermittedOrgsFunction,
steps.s50IDPTemplate6UsePKCE,
@@ -246,6 +219,36 @@ func Setup(ctx context.Context, config *Config, steps *Steps, masterKey string)
mustExecuteMigration(ctx, eventstoreClient, step, "migration failed")
}
commands, _, _, _ := startCommandsQueries(ctx, eventstoreClient, eventstoreV4, dbClient, masterKey, config)
repeatableSteps := []migration.RepeatableMigration{
&externalConfigChange{
es: eventstoreClient,
ExternalDomain: config.ExternalDomain,
ExternalPort: config.ExternalPort,
ExternalSecure: config.ExternalSecure,
defaults: config.SystemDefaults,
},
&projectionTables{
es: eventstoreClient,
Version: build.Version(),
},
&DeleteStaleOrgFields{
eventstore: eventstoreClient,
},
&FillFieldsForInstanceDomains{
eventstore: eventstoreClient,
},
&SyncRolePermissions{
commands: commands,
eventstore: eventstoreClient,
rolePermissionMappings: config.InternalAuthZ.RolePermissionMappings,
},
&RiverMigrateRepeatable{
client: dbClient,
},
}
for _, repeatableStep := range repeatableSteps {
mustExecuteMigration(ctx, eventstoreClient, repeatableStep, "unable to migrate repeatable step")
}
@@ -271,11 +274,6 @@ func Setup(ctx context.Context, config *Config, steps *Steps, masterKey string)
initProjections(
ctx,
eventstoreClient,
eventstoreV4,
dbClient,
dbClient,
masterKey,
config,
)
}
}
@@ -336,18 +334,20 @@ func readStatements(fs embed.FS, folder, typ string) ([]statement, error) {
return statements, nil
}
func initProjections(
func startCommandsQueries(
ctx context.Context,
eventstoreClient *eventstore.Eventstore,
eventstoreV4 *es_v4.EventStore,
queryDBClient,
projectionDBClient *database.DB,
dbClient *database.DB,
masterKey string,
config *Config,
) (
*command.Commands,
*query.Queries,
*admin_view.View,
*auth_view.View,
) {
logging.Info("init-projections is currently in beta")
keyStorage, err := cryptoDB.NewKeyStorage(queryDBClient, masterKey)
keyStorage, err := cryptoDB.NewKeyStorage(dbClient, masterKey)
logging.OnError(err).Fatal("unable to start key storage")
keys, err := encryption.EnsureEncryptionKeys(ctx, config.EncryptionKeys, keyStorage)
@@ -355,7 +355,7 @@ func initProjections(
err = projection.Create(
ctx,
queryDBClient,
dbClient,
eventstoreClient,
projection.Config{
RetryFailedAfter: config.InitProjections.RetryFailedAfter,
@@ -367,19 +367,15 @@ func initProjections(
config.SystemAPIUsers,
)
logging.OnError(err).Fatal("unable to start projections")
for _, p := range projection.Projections() {
err := migration.Migrate(ctx, eventstoreClient, p)
logging.WithFields("name", p.String()).OnError(err).Fatal("migration failed")
}
staticStorage, err := config.AssetStorage.NewStorage(queryDBClient.DB)
staticStorage, err := config.AssetStorage.NewStorage(dbClient.DB)
logging.OnError(err).Fatal("unable to start asset storage")
adminView, err := admin_view.StartView(queryDBClient)
adminView, err := admin_view.StartView(dbClient)
logging.OnError(err).Fatal("unable to start admin view")
admin_handler.Register(ctx,
admin_handler.Config{
Client: queryDBClient,
Client: dbClient,
Eventstore: eventstoreClient,
BulkLimit: config.InitProjections.BulkLimit,
FailureCountUntilSkip: uint64(config.InitProjections.MaxFailureCount),
@@ -387,22 +383,18 @@ func initProjections(
adminView,
staticStorage,
)
for _, p := range admin_handler.Projections() {
err := migration.Migrate(ctx, eventstoreClient, p)
logging.WithFields("name", p.String()).OnError(err).Fatal("migration failed")
}
sessionTokenVerifier := internal_authz.SessionTokenVerifier(keys.OIDC)
cacheConnectors, err := connector.StartConnectors(config.Caches, queryDBClient)
cacheConnectors, err := connector.StartConnectors(config.Caches, dbClient)
logging.OnError(err).Fatal("unable to start caches")
queries, err := query.StartQueries(
ctx,
eventstoreClient,
eventstoreV4.Querier,
queryDBClient,
projectionDBClient,
dbClient,
dbClient,
cacheConnectors,
config.Projections,
config.SystemDefaults,
@@ -424,11 +416,11 @@ func initProjections(
)
logging.OnError(err).Fatal("unable to start queries")
authView, err := auth_view.StartView(queryDBClient, keys.OIDC, queries, eventstoreClient)
authView, err := auth_view.StartView(dbClient, keys.OIDC, queries, eventstoreClient)
logging.OnError(err).Fatal("unable to start admin view")
auth_handler.Register(ctx,
auth_handler.Config{
Client: queryDBClient,
Client: dbClient,
Eventstore: eventstoreClient,
BulkLimit: config.InitProjections.BulkLimit,
FailureCountUntilSkip: uint64(config.InitProjections.MaxFailureCount),
@@ -436,16 +428,13 @@ func initProjections(
authView,
queries,
)
for _, p := range auth_handler.Projections() {
err := migration.Migrate(ctx, eventstoreClient, p)
logging.WithFields("name", p.String()).OnError(err).Fatal("migration failed")
}
authZRepo, err := authz.Start(queries, eventstoreClient, queryDBClient, keys.OIDC, config.ExternalSecure)
authZRepo, err := authz.Start(queries, eventstoreClient, dbClient, keys.OIDC, config.ExternalSecure)
logging.OnError(err).Fatal("unable to start authz repo")
permissionCheck := func(ctx context.Context, permission, orgID, resourceID string) (err error) {
return internal_authz.CheckPermission(ctx, authZRepo, config.InternalAuthZ.RolePermissionMappings, permission, orgID, resourceID)
}
commands, err := command.StartCommands(ctx,
eventstoreClient,
cacheConnectors,
@@ -477,6 +466,7 @@ func initProjections(
config.DefaultInstance.SecretGenerators,
)
logging.OnError(err).Fatal("unable to start commands")
notify_handler.Register(
ctx,
config.Projections.Customizations["notifications"],
@@ -498,8 +488,33 @@ func initProjections(
keys.SMS,
keys.OIDC,
config.OIDC.DefaultBackChannelLogoutLifetime,
queryDBClient,
dbClient,
)
return commands, queries, adminView, authView
}
func initProjections(
ctx context.Context,
eventstoreClient *eventstore.Eventstore,
) {
logging.Info("init-projections is currently in beta")
for _, p := range projection.Projections() {
err := migration.Migrate(ctx, eventstoreClient, p)
logging.WithFields("name", p.String()).OnError(err).Fatal("migration failed")
}
for _, p := range admin_handler.Projections() {
err := migration.Migrate(ctx, eventstoreClient, p)
logging.WithFields("name", p.String()).OnError(err).Fatal("migration failed")
}
for _, p := range auth_handler.Projections() {
err := migration.Migrate(ctx, eventstoreClient, p)
logging.WithFields("name", p.String()).OnError(err).Fatal("migration failed")
}
for _, p := range notify_handler.Projections() {
err := migration.Migrate(ctx, eventstoreClient, p)
logging.WithFields("name", p.String()).OnError(err).Fatal("migration failed")

View File

@@ -2,29 +2,22 @@ package setup
import (
"context"
"database/sql"
_ "embed"
"fmt"
"strings"
"github.com/zitadel/logging"
"github.com/zitadel/zitadel/internal/api/authz"
"github.com/zitadel/zitadel/internal/database"
"github.com/zitadel/zitadel/internal/command"
"github.com/zitadel/zitadel/internal/eventstore"
"github.com/zitadel/zitadel/internal/repository/instance"
"github.com/zitadel/zitadel/internal/repository/permission"
)
var (
//go:embed sync_role_permissions.sql
getRolePermissionOperationsQuery string
)
// SyncRolePermissions is a repeatable step which synchronizes the InternalAuthZ
// RolePermissionMappings from the configuration to the database.
// This is needed until role permissions are manageable over the API.
type SyncRolePermissions struct {
commands *command.Commands
eventstore *eventstore.Eventstore
rolePermissionMappings []authz.RoleMapping
}
@@ -38,18 +31,11 @@ func (mig *SyncRolePermissions) Execute(ctx context.Context, _ eventstore.Event)
func (mig *SyncRolePermissions) executeSystem(ctx context.Context) error {
logging.WithFields("migration", mig.String()).Info("prepare system role permission sync events")
target := rolePermissionMappingsToDatabaseMap(mig.rolePermissionMappings, true)
cmds, err := mig.synchronizeCommands(ctx, "SYSTEM", target)
details, err := mig.commands.SynchronizeRolePermission(ctx, "SYSTEM", mig.rolePermissionMappings)
if err != nil {
return err
}
events, err := mig.eventstore.Push(ctx, cmds...)
if err != nil {
return err
}
logging.WithFields("migration", mig.String(), "pushed_events", len(events)).Info("pushed system role permission sync events")
logging.WithFields("migration", mig.String(), "sequence", details.Sequence).Info("pushed system role permission sync events")
return nil
}
@@ -70,51 +56,17 @@ func (mig *SyncRolePermissions) executeInstances(ctx context.Context) error {
if err != nil {
return err
}
target := rolePermissionMappingsToDatabaseMap(mig.rolePermissionMappings, false)
for i, instanceID := range instances {
logging.WithFields("instance_id", instanceID, "migration", mig.String(), "progress", fmt.Sprintf("%d/%d", i+1, len(instances))).Info("prepare instance role permission sync events")
cmds, err := mig.synchronizeCommands(ctx, instanceID, target)
details, err := mig.commands.SynchronizeRolePermission(ctx, instanceID, mig.rolePermissionMappings)
if err != nil {
return err
}
events, err := mig.eventstore.Push(ctx, cmds...)
if err != nil {
return err
}
logging.WithFields("instance_id", instanceID, "migration", mig.String(), "pushed_events", len(events)).Info("pushed instance role permission sync events")
logging.WithFields("instance_id", instanceID, "migration", mig.String(), "sequence", details.Sequence).Info("pushed instance role permission sync events")
}
return nil
}
// synchronizeCommands checks the current state of role permissions in the eventstore for the aggregate.
// It returns the commands required to reach the desired state passed in target.
// For system level permissions aggregateID must be set to `SYSTEM`,
// else it is the instance ID.
func (mig *SyncRolePermissions) synchronizeCommands(ctx context.Context, aggregateID string, target database.Map[[]string]) (cmds []eventstore.Command, err error) {
aggregate := permission.NewAggregate(aggregateID)
err = mig.eventstore.Client().QueryContext(ctx, func(rows *sql.Rows) error {
for rows.Next() {
var operation, role, perm string
if err := rows.Scan(&operation, &role, &perm); err != nil {
return err
}
logging.WithFields("aggregate_id", aggregateID, "migration", mig.String(), "operation", operation, "role", role, "permission", perm).Debug("sync role permission")
switch operation {
case "add":
cmds = append(cmds, permission.NewAddedEvent(ctx, aggregate, role, perm))
case "remove":
cmds = append(cmds, permission.NewRemovedEvent(ctx, aggregate, role, perm))
}
}
return rows.Close()
}, getRolePermissionOperationsQuery, aggregateID, target)
if err != nil {
return nil, err
}
return cmds, err
}
func (*SyncRolePermissions) String() string {
return "repeatable_sync_role_permissions"
}
@@ -122,13 +74,3 @@ func (*SyncRolePermissions) String() string {
func (*SyncRolePermissions) Check(lastRun map[string]interface{}) bool {
return true
}
func rolePermissionMappingsToDatabaseMap(mappings []authz.RoleMapping, system bool) database.Map[[]string] {
out := make(database.Map[[]string], len(mappings))
for _, m := range mappings {
if system == strings.HasPrefix(m.Role, "SYSTEM") {
out[m.Role] = m.Permissions
}
}
return out
}

View File

@@ -1,52 +0,0 @@
/*
This query creates a change set of permissions that need to be added or removed.
It compares the current state in the fields table (thru the role_permissions view)
against a passed role permission mapping as JSON, created from Zitadel's config:
{
"IAM_ADMIN_IMPERSONATOR": ["admin.impersonation", "impersonation"],
"IAM_END_USER_IMPERSONATOR": ["impersonation"],
"FOO_BAR": ["foo.bar", "bar.foo"]
}
It uses an aggregate_id as first argument which may be an instance_id or 'SYSTEM'
for system level permissions.
*/
WITH target AS (
-- unmarshal JSON representation into flattened tabular data
SELECT
key AS role,
jsonb_array_elements_text(value) AS permission
FROM jsonb_each($2::jsonb)
), add AS (
-- find all role permissions that exist in `target` and not in `role_permissions`
SELECT t.role, t.permission
FROM eventstore.role_permissions p
RIGHT JOIN target t
ON p.aggregate_id = $1::text
AND p.role = t.role
AND p.permission = t.permission
WHERE p.role IS NULL
), remove AS (
-- find all role permissions that exist `role_permissions` and not in `target`
SELECT p.role, p.permission
FROM eventstore.role_permissions p
LEFT JOIN target t
ON p.role = t.role
AND p.permission = t.permission
WHERE p.aggregate_id = $1::text
AND t.role IS NULL
)
-- return the required operations
SELECT
'add' AS operation,
role,
permission
FROM add
UNION ALL
SELECT
'remove' AS operation,
role,
permission
FROM remove
;