feat(eventstore): exclude aggregate IDs when event_type occurred (#8940)

# Which Problems Are Solved

For truly event-based notification handler, we need to be able to filter
out events of aggregates which are already handled. For example when an
event like `notify.success` or `notify.failed` was created on an
aggregate, we no longer require events from that aggregate ID.

# How the Problems Are Solved

Extend the query builder to use a `NOT IN` clause which excludes
aggregate IDs when they have certain events for a certain aggregate
type. For optimization and proper index usages, certain filters are
inherited from the parent query, such as:

- Instance ID
- Instance IDs
- Position offset

This is a prettified query as used by the unit tests:

```sql
SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision
FROM eventstore.events2
WHERE instance_id = $1
    AND aggregate_type = $2 
    AND event_type = $3
    AND "position" > $4
    AND aggregate_id NOT IN (
        SELECT aggregate_id
        FROM eventstore.events2
        WHERE aggregate_type = $5
        AND event_type = ANY($6)
        AND instance_id = $7
        AND "position" > $8
    )
ORDER BY "position" DESC, in_tx_order DESC
LIMIT $9
```

I used this query to run it against the `oidc_session` aggregate looking
for added events, excluding aggregates where a token was revoked,
against a recent position. It fully used index scans:

<details>

```json
[
  {
    "Plan": {
      "Node Type": "Index Scan",
      "Parallel Aware": false,
      "Async Capable": false,
      "Scan Direction": "Forward",
      "Index Name": "es_projection",
      "Relation Name": "events2",
      "Alias": "events2",
      "Actual Rows": 2,
      "Actual Loops": 1,
      "Index Cond": "((instance_id = '286399006995644420'::text) AND (aggregate_type = 'oidc_session'::text) AND (event_type = 'oidc_session.added'::text) AND (\"position\" > 1731582100.784168))",
      "Rows Removed by Index Recheck": 0,
      "Filter": "(NOT (hashed SubPlan 1))",
      "Rows Removed by Filter": 1,
      "Plans": [
        {
          "Node Type": "Index Scan",
          "Parent Relationship": "SubPlan",
          "Subplan Name": "SubPlan 1",
          "Parallel Aware": false,
          "Async Capable": false,
          "Scan Direction": "Forward",
          "Index Name": "es_projection",
          "Relation Name": "events2",
          "Alias": "events2_1",
          "Actual Rows": 1,
          "Actual Loops": 1,
          "Index Cond": "((instance_id = '286399006995644420'::text) AND (aggregate_type = 'oidc_session'::text) AND (event_type = 'oidc_session.access_token.revoked'::text) AND (\"position\" > 1731582100.784168))",
          "Rows Removed by Index Recheck": 0
        }
      ]
    },
    "Triggers": [
    ]
  }
]
```

</details>

# Additional Changes

- None

# Additional Context

- Related to https://github.com/zitadel/zitadel/issues/8931

---------

Co-authored-by: adlerhurst <silvan.reusser@gmail.com>
This commit is contained in:
Tim Möhlmann 2024-11-25 17:25:11 +02:00 committed by GitHub
parent 91290d6195
commit ff70ede7c7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 216 additions and 10 deletions

View File

@ -66,6 +66,39 @@ func TestCRDB_Filter(t *testing.T) {
},
wantErr: false,
},
{
name: "exclude aggregate type and event type",
args: args{
searchQuery: eventstore.NewSearchQueryBuilder(eventstore.ColumnsEvent).
AddQuery().
AggregateTypes(eventstore.AggregateType(t.Name())).
Builder().
ExcludeAggregateIDs().
EventTypes("test.updated").
AggregateTypes(eventstore.AggregateType(t.Name())).
Builder(),
},
fields: fields{
existingEvents: []eventstore.Command{
generateCommand(eventstore.AggregateType(t.Name()), "306"),
generateCommand(
eventstore.AggregateType(t.Name()),
"306",
func(te *testEvent) {
te.EventType = "test.updated"
},
),
generateCommand(
eventstore.AggregateType(t.Name()),
"308",
),
},
},
res: res{
eventCount: 1,
},
wantErr: false,
},
}
for _, tt := range tests {
for querierName, querier := range queriers {

View File

@ -31,6 +31,7 @@ type SearchQuery struct {
Sequence *Filter
CreatedAfter *Filter
CreatedBefore *Filter
ExcludeAggregateIDs []*Filter
}
// Filter represents all fields needed to compare a field of an event with a value
@ -171,6 +172,21 @@ func QueryFromBuilder(builder *eventstore.SearchQueryBuilder) (*SearchQuery, err
query.SubQueries[i] = append(query.SubQueries[i], filter)
}
}
if excludeAggregateIDs := builder.GetExcludeAggregateIDs(); excludeAggregateIDs != nil {
for _, f := range []func(query *eventstore.ExclusionQuery) *Filter{
excludeAggregateTypeFilter,
excludeEventTypeFilter,
} {
filter := f(excludeAggregateIDs)
if filter == nil {
continue
}
if err := filter.Validate(); err != nil {
return nil, err
}
query.ExcludeAggregateIDs = append(query.ExcludeAggregateIDs, filter)
}
}
return query, nil
}
@ -286,3 +302,23 @@ func eventPositionAfterFilter(query *eventstore.SearchQuery) *Filter {
}
return nil
}
func excludeEventTypeFilter(query *eventstore.ExclusionQuery) *Filter {
if len(query.GetEventTypes()) < 1 {
return nil
}
if len(query.GetEventTypes()) == 1 {
return NewFilter(FieldEventType, query.GetEventTypes()[0], OperationEquals)
}
return NewFilter(FieldEventType, database.TextArray[eventstore.EventType](query.GetEventTypes()), OperationIn)
}
func excludeAggregateTypeFilter(query *eventstore.ExclusionQuery) *Filter {
if len(query.GetAggregateTypes()) < 1 {
return nil
}
if len(query.GetAggregateTypes()) == 1 {
return NewFilter(FieldAggregateType, query.GetAggregateTypes()[0], OperationEquals)
}
return NewFilter(FieldAggregateType, database.TextArray[eventstore.AggregateType](query.GetAggregateTypes()), OperationIn)
}

View File

@ -283,6 +283,23 @@ func prepareConditions(criteria querier, query *repository.SearchQuery, useV1 bo
args = append(args, additionalArgs...)
}
excludeAggregateIDs := query.ExcludeAggregateIDs
if len(excludeAggregateIDs) > 0 {
excludeAggregateIDs = append(excludeAggregateIDs, query.InstanceID, query.InstanceIDs, query.Position)
}
excludeAggregateIDsClauses, excludeAggregateIDsArgs := prepareQuery(criteria, useV1, excludeAggregateIDs...)
if excludeAggregateIDsClauses != "" {
if clauses != "" {
clauses += " AND "
}
if useV1 {
clauses += "aggregate_id NOT IN (SELECT aggregate_id FROM eventstore.events WHERE " + excludeAggregateIDsClauses + ")"
} else {
clauses += "aggregate_id NOT IN (SELECT aggregate_id FROM eventstore.events2 WHERE " + excludeAggregateIDsClauses + ")"
}
args = append(args, excludeAggregateIDsArgs...)
}
if query.AwaitOpenTransactions {
instanceIDs := make(database.TextArray[string], 0, 3)
if query.InstanceID != nil {

View File

@ -5,6 +5,7 @@ import (
"database/sql"
"database/sql/driver"
"reflect"
"regexp"
"strconv"
"testing"
"time"
@ -744,6 +745,7 @@ func Test_query_events_mocked(t *testing.T) {
type args struct {
query *eventstore.SearchQueryBuilder
dest interface{}
useV1 bool
}
type res struct {
wantErr bool
@ -767,6 +769,7 @@ func Test_query_events_mocked(t *testing.T) {
AddQuery().
AggregateTypes("user").
Builder(),
useV1: true,
},
fields: fields{
mock: newMockClient(t).expectQuery(t,
@ -789,6 +792,7 @@ func Test_query_events_mocked(t *testing.T) {
AddQuery().
AggregateTypes("user").
Builder(),
useV1: true,
},
fields: fields{
mock: newMockClient(t).expectQuery(t,
@ -811,6 +815,7 @@ func Test_query_events_mocked(t *testing.T) {
AddQuery().
AggregateTypes("user").
Builder(),
useV1: true,
},
fields: fields{
mock: newMockClient(t).expectQuery(t,
@ -834,6 +839,7 @@ func Test_query_events_mocked(t *testing.T) {
AddQuery().
AggregateTypes("user").
Builder(),
useV1: true,
},
fields: fields{
mock: newMockClient(t).expectQuery(t,
@ -855,6 +861,7 @@ func Test_query_events_mocked(t *testing.T) {
AddQuery().
AggregateTypes("user").
Builder().LockRowsDuringTx(nil, eventstore.LockOptionWait),
useV1: true,
},
fields: fields{
mock: newMockClient(t).expectQuery(t,
@ -876,6 +883,7 @@ func Test_query_events_mocked(t *testing.T) {
AddQuery().
AggregateTypes("user").
Builder().LockRowsDuringTx(nil, eventstore.LockOptionNoWait),
useV1: true,
},
fields: fields{
mock: newMockClient(t).expectQuery(t,
@ -897,6 +905,7 @@ func Test_query_events_mocked(t *testing.T) {
AddQuery().
AggregateTypes("user").
Builder().LockRowsDuringTx(nil, eventstore.LockOptionSkipLocked),
useV1: true,
},
fields: fields{
mock: newMockClient(t).expectQuery(t,
@ -919,6 +928,7 @@ func Test_query_events_mocked(t *testing.T) {
AddQuery().
AggregateTypes("user").
Builder(),
useV1: true,
},
fields: fields{
mock: newMockClient(t).expectQueryErr(t,
@ -941,6 +951,7 @@ func Test_query_events_mocked(t *testing.T) {
AddQuery().
AggregateTypes("user").
Builder(),
useV1: true,
},
fields: fields{
mock: newMockClient(t).expectQueryScanErr(t,
@ -975,6 +986,7 @@ func Test_query_events_mocked(t *testing.T) {
AggregateTypes("org").
AggregateIDs("asdf42").
Builder(),
useV1: true,
},
fields: fields{
mock: newMockClient(t).expectQuery(t,
@ -986,6 +998,68 @@ func Test_query_events_mocked(t *testing.T) {
wantErr: false,
},
},
{
name: "aggregate / event type, position and exclusion, v1",
args: args{
dest: &[]*repository.Event{},
query: eventstore.NewSearchQueryBuilder(eventstore.ColumnsEvent).
InstanceID("instanceID").
OrderDesc().
Limit(5).
PositionAfter(123.456).
AddQuery().
AggregateTypes("notify").
EventTypes("notify.foo.bar").
Builder().
ExcludeAggregateIDs().
AggregateTypes("notify").
EventTypes("notification.failed", "notification.success").
Builder(),
useV1: true,
},
fields: fields{
mock: newMockClient(t).expectQuery(t,
regexp.QuoteMeta(
`SELECT creation_date, event_type, event_sequence, event_data, editor_user, resource_owner, instance_id, aggregate_type, aggregate_id, aggregate_version FROM eventstore.events WHERE instance_id = $1 AND aggregate_type = $2 AND event_type = $3 AND "position" > $4 AND aggregate_id NOT IN (SELECT aggregate_id FROM eventstore.events WHERE aggregate_type = $5 AND event_type = ANY($6) AND instance_id = $7 AND "position" > $8) ORDER BY event_sequence DESC LIMIT $9`,
),
[]driver.Value{"instanceID", eventstore.AggregateType("notify"), eventstore.EventType("notify.foo.bar"), 123.456, eventstore.AggregateType("notify"), []eventstore.EventType{"notification.failed", "notification.success"}, "instanceID", 123.456, uint64(5)},
),
},
res: res{
wantErr: false,
},
},
{
name: "aggregate / event type, position and exclusion, v2",
args: args{
dest: &[]*repository.Event{},
query: eventstore.NewSearchQueryBuilder(eventstore.ColumnsEvent).
InstanceID("instanceID").
OrderDesc().
Limit(5).
PositionAfter(123.456).
AddQuery().
AggregateTypes("notify").
EventTypes("notify.foo.bar").
Builder().
ExcludeAggregateIDs().
AggregateTypes("notify").
EventTypes("notification.failed", "notification.success").
Builder(),
useV1: false,
},
fields: fields{
mock: newMockClient(t).expectQuery(t,
regexp.QuoteMeta(
`SELECT created_at, event_type, "sequence", "position", payload, creator, "owner", instance_id, aggregate_type, aggregate_id, revision FROM eventstore.events2 WHERE instance_id = $1 AND aggregate_type = $2 AND event_type = $3 AND "position" > $4 AND aggregate_id NOT IN (SELECT aggregate_id FROM eventstore.events2 WHERE aggregate_type = $5 AND event_type = ANY($6) AND instance_id = $7 AND "position" > $8) ORDER BY "position" DESC, in_tx_order DESC LIMIT $9`,
),
[]driver.Value{"instanceID", eventstore.AggregateType("notify"), eventstore.EventType("notify.foo.bar"), 123.456, eventstore.AggregateType("notify"), []eventstore.EventType{"notification.failed", "notification.success"}, "instanceID", 123.456, uint64(5)},
),
},
res: res{
wantErr: false,
},
},
}
crdb := NewCRDB(&database.DB{Database: new(testDB)})
for _, tt := range tests {
@ -994,7 +1068,7 @@ func Test_query_events_mocked(t *testing.T) {
crdb.DB.DB = tt.fields.mock.client
}
err := query(context.Background(), crdb, tt.args.query, tt.args.dest, true)
err := query(context.Background(), crdb, tt.args.query, tt.args.dest, tt.args.useV1)
if (err != nil) != tt.res.wantErr {
t.Errorf("query() error = %v, wantErr %v", err, tt.res.wantErr)
}

View File

@ -21,6 +21,7 @@ type SearchQueryBuilder struct {
instanceIDs []string
editorUser string
queries []*SearchQuery
excludeAggregateIDs *ExclusionQuery
tx *sql.Tx
lockRows bool
lockOption LockOption
@ -68,6 +69,10 @@ func (b *SearchQueryBuilder) GetQueries() []*SearchQuery {
return b.queries
}
func (b *SearchQueryBuilder) GetExcludeAggregateIDs() *ExclusionQuery {
return b.excludeAggregateIDs
}
func (b *SearchQueryBuilder) GetTx() *sql.Tx {
return b.tx
}
@ -136,6 +141,20 @@ func (q SearchQuery) GetPositionAfter() float64 {
return q.positionAfter
}
type ExclusionQuery struct {
builder *SearchQueryBuilder
aggregateTypes []AggregateType
eventTypes []EventType
}
func (q ExclusionQuery) GetAggregateTypes() []AggregateType {
return q.aggregateTypes
}
func (q ExclusionQuery) GetEventTypes() []EventType {
return q.eventTypes
}
// Columns defines which fields of the event are needed for the query
type Columns int8
@ -346,6 +365,16 @@ func (builder *SearchQueryBuilder) AddQuery() *SearchQuery {
return query
}
// ExcludeAggregateIDs excludes events from the aggregate IDs returned by the [ExclusionQuery].
// There can be only 1 exclusion query. Subsequent calls overwrite previous definitions.
func (builder *SearchQueryBuilder) ExcludeAggregateIDs() *ExclusionQuery {
query := &ExclusionQuery{
builder: builder,
}
builder.excludeAggregateIDs = query
return query
}
// Or creates a new sub query on the search query builder
func (query SearchQuery) Or() *SearchQuery {
return query.builder.AddQuery()
@ -398,3 +427,20 @@ func (query *SearchQuery) matches(command Command) bool {
}
return true
}
// AggregateTypes filters for events with the given aggregate types
func (query *ExclusionQuery) AggregateTypes(types ...AggregateType) *ExclusionQuery {
query.aggregateTypes = types
return query
}
// EventTypes filters for events with the given event types
func (query *ExclusionQuery) EventTypes(types ...EventType) *ExclusionQuery {
query.eventTypes = types
return query
}
// Builder returns the SearchQueryBuilder of the sub query
func (query *ExclusionQuery) Builder() *SearchQueryBuilder {
return query.builder
}