zitadel/internal/eventstore/repository/search_query.go

325 lines
9.8 KiB
Go
Raw Normal View History

2020-09-30 08:00:05 +00:00
package repository
2020-09-24 06:52:10 +00:00
import (
"database/sql"
"github.com/zitadel/zitadel/internal/database"
"github.com/zitadel/zitadel/internal/eventstore"
"github.com/zitadel/zitadel/internal/zerrors"
)
2020-09-30 08:00:05 +00:00
// SearchQuery defines the which and how data are queried
2020-09-30 08:00:05 +00:00
type SearchQuery struct {
Columns eventstore.Columns
2020-09-30 08:00:05 +00:00
SubQueries [][]*Filter
Tx *sql.Tx
LockRows bool
LockOption eventstore.LockOption
AllowTimeTravel bool
AwaitOpenTransactions bool
Limit uint64
Offset uint32
Desc bool
2020-09-24 06:52:10 +00:00
feat(eventstore): exclude aggregate IDs when event_type occurred (#8940) # Which Problems Are Solved For truly event-based notification handler, we need to be able to filter out events of aggregates which are already handled. For example when an event like `notify.success` or `notify.failed` was created on an aggregate, we no longer require events from that aggregate ID. # How the Problems Are Solved Extend the query builder to use a `NOT IN` clause which excludes aggregate IDs when they have certain events for a certain aggregate type. For optimization and proper index usages, certain filters are inherited from the parent query, such as: - Instance ID - Instance IDs - Position offset This is a prettified query as used by the unit tests: ```sql SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type = $2 AND event_type = $3 AND "position" > $4 AND aggregate_id NOT IN ( SELECT aggregate_id FROM eventstore.events2 WHERE aggregate_type = $5 AND event_type = ANY($6) AND instance_id = $7 AND "position" > $8 ) ORDER BY "position" DESC, in_tx_order DESC LIMIT $9 ``` I used this query to run it against the `oidc_session` aggregate looking for added events, excluding aggregates where a token was revoked, against a recent position. It fully used index scans: <details> ```json [ { "Plan": { "Node Type": "Index Scan", "Parallel Aware": false, "Async Capable": false, "Scan Direction": "Forward", "Index Name": "es_projection", "Relation Name": "events2", "Alias": "events2", "Actual Rows": 2, "Actual Loops": 1, "Index Cond": "((instance_id = '286399006995644420'::text) AND (aggregate_type = 'oidc_session'::text) AND (event_type = 'oidc_session.added'::text) AND (\"position\" > 1731582100.784168))", "Rows Removed by Index Recheck": 0, "Filter": "(NOT (hashed SubPlan 1))", "Rows Removed by Filter": 1, "Plans": [ { "Node Type": "Index Scan", "Parent Relationship": "SubPlan", "Subplan Name": "SubPlan 1", "Parallel Aware": false, "Async Capable": false, "Scan Direction": "Forward", "Index Name": "es_projection", "Relation Name": "events2", "Alias": "events2_1", "Actual Rows": 1, "Actual Loops": 1, "Index Cond": "((instance_id = '286399006995644420'::text) AND (aggregate_type = 'oidc_session'::text) AND (event_type = 'oidc_session.access_token.revoked'::text) AND (\"position\" > 1731582100.784168))", "Rows Removed by Index Recheck": 0 } ] }, "Triggers": [ ] } ] ``` </details> # Additional Changes - None # Additional Context - Related to https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: adlerhurst <silvan.reusser@gmail.com>
2024-11-25 15:25:11 +00:00
InstanceID *Filter
InstanceIDs *Filter
ExcludedInstances *Filter
Creator *Filter
Owner *Filter
Position *Filter
Sequence *Filter
CreatedAfter *Filter
CreatedBefore *Filter
ExcludeAggregateIDs []*Filter
2020-10-06 19:28:09 +00:00
}
// Filter represents all fields needed to compare a field of an event with a value
2020-09-24 06:52:10 +00:00
type Filter struct {
2020-10-05 18:39:36 +00:00
Field Field
Value interface{}
Operation Operation
2020-09-24 06:52:10 +00:00
}
// Operation defines how fields are compared
2020-09-24 06:52:10 +00:00
type Operation int32
const (
2020-10-06 19:28:09 +00:00
// OperationEquals compares two values for equality
OperationEquals Operation = iota + 1
// OperationGreater compares if the given values is greater than the stored one
OperationGreater
// OperationLess compares if the given values is less than the stored one
OperationLess
//OperationIn checks if a stored value matches one of the passed value list
OperationIn
2020-11-23 18:31:12 +00:00
//OperationJSONContains checks if a stored value matches the given json
OperationJSONContains
//OperationNotIn checks if a stored value does not match one of the passed value list
OperationNotIn
2020-10-06 19:28:09 +00:00
operationCount
2020-09-24 06:52:10 +00:00
)
// Field is the representation of a field from the event
2020-09-24 06:52:10 +00:00
type Field int32
const (
2020-10-06 19:28:09 +00:00
//FieldAggregateType represents the aggregate type field
FieldAggregateType Field = iota + 1
//FieldAggregateID represents the aggregate id field
FieldAggregateID
//FieldSequence represents the sequence field
FieldSequence
//FieldResourceOwner represents the resource owner field
FieldResourceOwner
//FieldInstanceID represents the instance id field
FieldInstanceID
2020-10-06 19:28:09 +00:00
//FieldEditorService represents the editor service field
FieldEditorService
//FieldEditorUser represents the editor user field
FieldEditorUser
//FieldEventType represents the event type field
FieldEventType
2020-11-23 18:31:12 +00:00
//FieldEventData represents the event data field
FieldEventData
//FieldCreationDate represents the creation date field
FieldCreationDate
// FieldPosition represents the field of the global sequence
FieldPosition
2020-10-06 19:28:09 +00:00
fieldCount
2020-09-24 06:52:10 +00:00
)
// NewFilter is used in tests. Use searchQuery.*Filter() instead
2020-09-24 06:52:10 +00:00
func NewFilter(field Field, value interface{}, operation Operation) *Filter {
return &Filter{
2020-10-05 18:39:36 +00:00
Field: field,
Value: value,
Operation: operation,
2020-09-24 06:52:10 +00:00
}
}
// Validate checks if the fields of the filter have valid values
2020-09-24 06:52:10 +00:00
func (f *Filter) Validate() error {
if f == nil {
return zerrors.ThrowPreconditionFailed(nil, "REPO-z6KcG", "filter is nil")
2020-09-24 06:52:10 +00:00
}
2020-10-06 19:28:09 +00:00
if f.Field <= 0 || f.Field >= fieldCount {
return zerrors.ThrowPreconditionFailed(nil, "REPO-zw62U", "field not definded")
2020-09-24 06:52:10 +00:00
}
2020-10-05 18:39:36 +00:00
if f.Value == nil {
return zerrors.ThrowPreconditionFailed(nil, "REPO-GJ9ct", "no value definded")
2020-09-24 06:52:10 +00:00
}
2020-10-06 19:28:09 +00:00
if f.Operation <= 0 || f.Operation >= operationCount {
return zerrors.ThrowPreconditionFailed(nil, "REPO-RrQTy", "operation not definded")
2020-09-24 06:52:10 +00:00
}
return nil
}
func QueryFromBuilder(builder *eventstore.SearchQueryBuilder) (*SearchQuery, error) {
if builder == nil ||
builder.GetColumns().Validate() != nil {
return nil, zerrors.ThrowPreconditionFailed(nil, "MODEL-4m9gs", "builder invalid")
}
query := &SearchQuery{
Columns: builder.GetColumns(),
Limit: builder.GetLimit(),
Offset: builder.GetOffset(),
Desc: builder.GetDesc(),
Tx: builder.GetTx(),
AllowTimeTravel: builder.GetAllowTimeTravel(),
AwaitOpenTransactions: builder.GetAwaitOpenTransactions(),
SubQueries: make([][]*Filter, len(builder.GetQueries())),
}
query.LockRows, query.LockOption = builder.GetLockRows()
for _, f := range []func(builder *eventstore.SearchQueryBuilder, query *SearchQuery) *Filter{
instanceIDFilter,
instanceIDsFilter,
editorUserFilter,
resourceOwnerFilter,
positionAfterFilter,
eventSequenceGreaterFilter,
creationDateAfterFilter,
creationDateBeforeFilter,
} {
filter := f(builder, query)
if filter == nil {
continue
}
if err := filter.Validate(); err != nil {
return nil, err
}
}
for i, q := range builder.GetQueries() {
for _, f := range []func(query *eventstore.SearchQuery) *Filter{
aggregateTypeFilter,
aggregateIDFilter,
eventTypeFilter,
eventDataFilter,
perf(oidc): nest position clause for session terminated query (#8738) # Which Problems Are Solved Optimize the query that checks for terminated sessions in the access token verifier. The verifier is used in auth middleware, userinfo and introspection. # How the Problems Are Solved The previous implementation built a query for certain events and then appended a single `PositionAfter` clause. This caused the postgreSQL planner to use indexes only for the instance ID, aggregate IDs, aggregate types and event types. Followed by an expensive sequential scan for the position. This resulting in internal over-fetching of rows before the final filter was applied. ![Screenshot_20241007_105803](https://github.com/user-attachments/assets/f2d91976-be87-428b-b604-a211399b821c) Furthermore, the query was searching for events which are not always applicable. For example, there was always a session ID search and if there was a user ID, we would also search for a browser fingerprint in event payload (expensive). Even if those argument string would be empty. This PR changes: 1. Nest the position query, so that a full `instance_id, aggregate_id, aggregate_type, event_type, "position"` index can be matched. 2. Redefine the `es_wm` index to include the `position` column. 3. Only search for events for the IDs that actually have a value. Do not search (noop) if none of session ID, user ID or fingerpint ID are set. New query plan: ![Screenshot_20241007_110648](https://github.com/user-attachments/assets/c3234c33-1b76-4b33-a4a9-796f69f3d775) # Additional Changes - cleanup how we load multi-statement migrations and make that a bit more reusable. # Additional Context - Related to https://github.com/zitadel/zitadel/issues/7639
2024-10-07 12:49:55 +00:00
eventPositionAfterFilter,
} {
filter := f(q)
if filter == nil {
continue
}
if err := filter.Validate(); err != nil {
return nil, err
}
query.SubQueries[i] = append(query.SubQueries[i], filter)
}
}
feat(eventstore): exclude aggregate IDs when event_type occurred (#8940) # Which Problems Are Solved For truly event-based notification handler, we need to be able to filter out events of aggregates which are already handled. For example when an event like `notify.success` or `notify.failed` was created on an aggregate, we no longer require events from that aggregate ID. # How the Problems Are Solved Extend the query builder to use a `NOT IN` clause which excludes aggregate IDs when they have certain events for a certain aggregate type. For optimization and proper index usages, certain filters are inherited from the parent query, such as: - Instance ID - Instance IDs - Position offset This is a prettified query as used by the unit tests: ```sql SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type = $2 AND event_type = $3 AND "position" > $4 AND aggregate_id NOT IN ( SELECT aggregate_id FROM eventstore.events2 WHERE aggregate_type = $5 AND event_type = ANY($6) AND instance_id = $7 AND "position" > $8 ) ORDER BY "position" DESC, in_tx_order DESC LIMIT $9 ``` I used this query to run it against the `oidc_session` aggregate looking for added events, excluding aggregates where a token was revoked, against a recent position. It fully used index scans: <details> ```json [ { "Plan": { "Node Type": "Index Scan", "Parallel Aware": false, "Async Capable": false, "Scan Direction": "Forward", "Index Name": "es_projection", "Relation Name": "events2", "Alias": "events2", "Actual Rows": 2, "Actual Loops": 1, "Index Cond": "((instance_id = '286399006995644420'::text) AND (aggregate_type = 'oidc_session'::text) AND (event_type = 'oidc_session.added'::text) AND (\"position\" > 1731582100.784168))", "Rows Removed by Index Recheck": 0, "Filter": "(NOT (hashed SubPlan 1))", "Rows Removed by Filter": 1, "Plans": [ { "Node Type": "Index Scan", "Parent Relationship": "SubPlan", "Subplan Name": "SubPlan 1", "Parallel Aware": false, "Async Capable": false, "Scan Direction": "Forward", "Index Name": "es_projection", "Relation Name": "events2", "Alias": "events2_1", "Actual Rows": 1, "Actual Loops": 1, "Index Cond": "((instance_id = '286399006995644420'::text) AND (aggregate_type = 'oidc_session'::text) AND (event_type = 'oidc_session.access_token.revoked'::text) AND (\"position\" > 1731582100.784168))", "Rows Removed by Index Recheck": 0 } ] }, "Triggers": [ ] } ] ``` </details> # Additional Changes - None # Additional Context - Related to https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: adlerhurst <silvan.reusser@gmail.com>
2024-11-25 15:25:11 +00:00
if excludeAggregateIDs := builder.GetExcludeAggregateIDs(); excludeAggregateIDs != nil {
for _, f := range []func(query *eventstore.ExclusionQuery) *Filter{
excludeAggregateTypeFilter,
excludeEventTypeFilter,
} {
filter := f(excludeAggregateIDs)
if filter == nil {
continue
}
if err := filter.Validate(); err != nil {
return nil, err
}
query.ExcludeAggregateIDs = append(query.ExcludeAggregateIDs, filter)
}
}
return query, nil
}
func eventSequenceGreaterFilter(builder *eventstore.SearchQueryBuilder, query *SearchQuery) *Filter {
if builder.GetEventSequenceGreater() == 0 {
return nil
}
sortOrder := OperationGreater
if builder.GetDesc() {
sortOrder = OperationLess
}
query.Sequence = NewFilter(FieldSequence, builder.GetEventSequenceGreater(), sortOrder)
return query.Sequence
}
func creationDateAfterFilter(builder *eventstore.SearchQueryBuilder, query *SearchQuery) *Filter {
if builder.GetCreationDateAfter().IsZero() {
return nil
}
query.CreatedAfter = NewFilter(FieldCreationDate, builder.GetCreationDateAfter(), OperationGreater)
return query.CreatedAfter
}
func creationDateBeforeFilter(builder *eventstore.SearchQueryBuilder, query *SearchQuery) *Filter {
if builder.GetCreationDateBefore().IsZero() {
return nil
}
query.CreatedBefore = NewFilter(FieldCreationDate, builder.GetCreationDateBefore(), OperationLess)
return query.CreatedBefore
}
func resourceOwnerFilter(builder *eventstore.SearchQueryBuilder, query *SearchQuery) *Filter {
if builder.GetResourceOwner() == "" {
return nil
}
query.Owner = NewFilter(FieldResourceOwner, builder.GetResourceOwner(), OperationEquals)
return query.Owner
}
func editorUserFilter(builder *eventstore.SearchQueryBuilder, query *SearchQuery) *Filter {
if builder.GetEditorUser() == "" {
return nil
}
query.Creator = NewFilter(FieldEditorUser, builder.GetEditorUser(), OperationEquals)
return query.Creator
}
func instanceIDFilter(builder *eventstore.SearchQueryBuilder, query *SearchQuery) *Filter {
if builder.GetInstanceID() == nil {
return nil
}
query.InstanceID = NewFilter(FieldInstanceID, *builder.GetInstanceID(), OperationEquals)
return query.InstanceID
}
func instanceIDsFilter(builder *eventstore.SearchQueryBuilder, query *SearchQuery) *Filter {
if builder.GetInstanceIDs() == nil {
return nil
}
query.InstanceIDs = NewFilter(FieldInstanceID, database.TextArray[string](builder.GetInstanceIDs()), OperationIn)
return query.InstanceIDs
}
func positionAfterFilter(builder *eventstore.SearchQueryBuilder, query *SearchQuery) *Filter {
if builder.GetPositionAfter() == 0 {
return nil
}
query.Position = NewFilter(FieldPosition, builder.GetPositionAfter(), OperationGreater)
return query.Position
}
func aggregateIDFilter(query *eventstore.SearchQuery) *Filter {
if len(query.GetAggregateIDs()) < 1 {
return nil
}
if len(query.GetAggregateIDs()) == 1 {
return NewFilter(FieldAggregateID, query.GetAggregateIDs()[0], OperationEquals)
}
return NewFilter(FieldAggregateID, database.TextArray[string](query.GetAggregateIDs()), OperationIn)
}
func eventTypeFilter(query *eventstore.SearchQuery) *Filter {
if len(query.GetEventTypes()) < 1 {
return nil
}
if len(query.GetEventTypes()) == 1 {
return NewFilter(FieldEventType, query.GetEventTypes()[0], OperationEquals)
}
return NewFilter(FieldEventType, database.TextArray[eventstore.EventType](query.GetEventTypes()), OperationIn)
}
func aggregateTypeFilter(query *eventstore.SearchQuery) *Filter {
if len(query.GetAggregateTypes()) < 1 {
return nil
}
if len(query.GetAggregateTypes()) == 1 {
return NewFilter(FieldAggregateType, query.GetAggregateTypes()[0], OperationEquals)
}
return NewFilter(FieldAggregateType, database.TextArray[eventstore.AggregateType](query.GetAggregateTypes()), OperationIn)
}
func eventDataFilter(query *eventstore.SearchQuery) *Filter {
if len(query.GetEventData()) == 0 {
return nil
}
return NewFilter(FieldEventData, query.GetEventData(), OperationJSONContains)
}
perf(oidc): nest position clause for session terminated query (#8738) # Which Problems Are Solved Optimize the query that checks for terminated sessions in the access token verifier. The verifier is used in auth middleware, userinfo and introspection. # How the Problems Are Solved The previous implementation built a query for certain events and then appended a single `PositionAfter` clause. This caused the postgreSQL planner to use indexes only for the instance ID, aggregate IDs, aggregate types and event types. Followed by an expensive sequential scan for the position. This resulting in internal over-fetching of rows before the final filter was applied. ![Screenshot_20241007_105803](https://github.com/user-attachments/assets/f2d91976-be87-428b-b604-a211399b821c) Furthermore, the query was searching for events which are not always applicable. For example, there was always a session ID search and if there was a user ID, we would also search for a browser fingerprint in event payload (expensive). Even if those argument string would be empty. This PR changes: 1. Nest the position query, so that a full `instance_id, aggregate_id, aggregate_type, event_type, "position"` index can be matched. 2. Redefine the `es_wm` index to include the `position` column. 3. Only search for events for the IDs that actually have a value. Do not search (noop) if none of session ID, user ID or fingerpint ID are set. New query plan: ![Screenshot_20241007_110648](https://github.com/user-attachments/assets/c3234c33-1b76-4b33-a4a9-796f69f3d775) # Additional Changes - cleanup how we load multi-statement migrations and make that a bit more reusable. # Additional Context - Related to https://github.com/zitadel/zitadel/issues/7639
2024-10-07 12:49:55 +00:00
func eventPositionAfterFilter(query *eventstore.SearchQuery) *Filter {
if pos := query.GetPositionAfter(); pos != 0 {
return NewFilter(FieldPosition, pos, OperationGreater)
}
return nil
}
feat(eventstore): exclude aggregate IDs when event_type occurred (#8940) # Which Problems Are Solved For truly event-based notification handler, we need to be able to filter out events of aggregates which are already handled. For example when an event like `notify.success` or `notify.failed` was created on an aggregate, we no longer require events from that aggregate ID. # How the Problems Are Solved Extend the query builder to use a `NOT IN` clause which excludes aggregate IDs when they have certain events for a certain aggregate type. For optimization and proper index usages, certain filters are inherited from the parent query, such as: - Instance ID - Instance IDs - Position offset This is a prettified query as used by the unit tests: ```sql SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type = $2 AND event_type = $3 AND "position" > $4 AND aggregate_id NOT IN ( SELECT aggregate_id FROM eventstore.events2 WHERE aggregate_type = $5 AND event_type = ANY($6) AND instance_id = $7 AND "position" > $8 ) ORDER BY "position" DESC, in_tx_order DESC LIMIT $9 ``` I used this query to run it against the `oidc_session` aggregate looking for added events, excluding aggregates where a token was revoked, against a recent position. It fully used index scans: <details> ```json [ { "Plan": { "Node Type": "Index Scan", "Parallel Aware": false, "Async Capable": false, "Scan Direction": "Forward", "Index Name": "es_projection", "Relation Name": "events2", "Alias": "events2", "Actual Rows": 2, "Actual Loops": 1, "Index Cond": "((instance_id = '286399006995644420'::text) AND (aggregate_type = 'oidc_session'::text) AND (event_type = 'oidc_session.added'::text) AND (\"position\" > 1731582100.784168))", "Rows Removed by Index Recheck": 0, "Filter": "(NOT (hashed SubPlan 1))", "Rows Removed by Filter": 1, "Plans": [ { "Node Type": "Index Scan", "Parent Relationship": "SubPlan", "Subplan Name": "SubPlan 1", "Parallel Aware": false, "Async Capable": false, "Scan Direction": "Forward", "Index Name": "es_projection", "Relation Name": "events2", "Alias": "events2_1", "Actual Rows": 1, "Actual Loops": 1, "Index Cond": "((instance_id = '286399006995644420'::text) AND (aggregate_type = 'oidc_session'::text) AND (event_type = 'oidc_session.access_token.revoked'::text) AND (\"position\" > 1731582100.784168))", "Rows Removed by Index Recheck": 0 } ] }, "Triggers": [ ] } ] ``` </details> # Additional Changes - None # Additional Context - Related to https://github.com/zitadel/zitadel/issues/8931 --------- Co-authored-by: adlerhurst <silvan.reusser@gmail.com>
2024-11-25 15:25:11 +00:00
func excludeEventTypeFilter(query *eventstore.ExclusionQuery) *Filter {
if len(query.GetEventTypes()) < 1 {
return nil
}
if len(query.GetEventTypes()) == 1 {
return NewFilter(FieldEventType, query.GetEventTypes()[0], OperationEquals)
}
return NewFilter(FieldEventType, database.TextArray[eventstore.EventType](query.GetEventTypes()), OperationIn)
}
func excludeAggregateTypeFilter(query *eventstore.ExclusionQuery) *Filter {
if len(query.GetAggregateTypes()) < 1 {
return nil
}
if len(query.GetAggregateTypes()) == 1 {
return NewFilter(FieldAggregateType, query.GetAggregateTypes()[0], OperationEquals)
}
return NewFilter(FieldAggregateType, database.TextArray[eventstore.AggregateType](query.GetAggregateTypes()), OperationIn)
}