From 8de928b5e9441d3968e5f79860f4de08d52f24ed Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Mon, 7 Mar 2022 11:33:18 +0100 Subject: [PATCH] [performance] Database optimizations (#419) * create first index on notifications * tidy up + add tests * log queries for trace, ops for debug * index commonly used fields * rearrange query * add a few more indexes * remove schema-breaking index (add this back in later) * re-add cleanup query index --- internal/db/bundb/bundb.go | 4 +- .../20220305130328_database_optimizations.go | 291 ++++++++++++++++++ internal/db/bundb/notification.go | 11 +- internal/db/bundb/notification_test.go | 123 ++++++++ internal/db/bundb/trace.go | 6 +- 5 files changed, 427 insertions(+), 8 deletions(-) create mode 100644 internal/db/bundb/migrations/20220305130328_database_optimizations.go create mode 100644 internal/db/bundb/notification_test.go diff --git a/internal/db/bundb/bundb.go b/internal/db/bundb/bundb.go index ebdbc4ba..5ccca062 100644 --- a/internal/db/bundb/bundb.go +++ b/internal/db/bundb/bundb.go @@ -137,8 +137,8 @@ func NewBunDBService(ctx context.Context) (db.DB, error) { } // add a hook to just log queries and the time they take - // only do this for trace logging where performance isn't 1st concern - if logrus.GetLevel() >= logrus.TraceLevel { + // only do this for logging where performance isn't 1st concern + if logrus.GetLevel() >= logrus.DebugLevel { conn.DB.AddQueryHook(newDebugQueryHook()) } diff --git a/internal/db/bundb/migrations/20220305130328_database_optimizations.go b/internal/db/bundb/migrations/20220305130328_database_optimizations.go new file mode 100644 index 00000000..b6a19cac --- /dev/null +++ b/internal/db/bundb/migrations/20220305130328_database_optimizations.go @@ -0,0 +1,291 @@ +/* + GoToSocial + Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +package migrations + +import ( + "context" + + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/uptrace/bun" +) + +func init() { + up := func(ctx context.Context, db *bun.DB) error { + return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + // ACCOUNTS are often selected by URI, username, or domain + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Account{}). + Index("accounts_uri_idx"). + Column("uri"). + Exec(ctx); err != nil { + return err + } + + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Account{}). + Index("accounts_username_idx"). + Column("username"). + Exec(ctx); err != nil { + return err + } + + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Account{}). + Index("accounts_domain_idx"). + Column("domain"). + Exec(ctx); err != nil { + return err + } + + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Account{}). + Index("accounts_username_domain_idx"). // for selecting local accounts by username + Column("username", "domain"). + Exec(ctx); err != nil { + return err + } + + // NOTIFICATIONS are commonly selected by their target_account_id + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Notification{}). + Index("notifications_target_account_id_idx"). + Column("target_account_id"). + Exec(ctx); err != nil { + return err + } + + // STATUSES are selected in many different ways, so we need quite few indexes + // to avoid queries becoming painfully slow as more statuses get stored + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Status{}). + Index("statuses_uri_idx"). + Column("uri"). + Exec(ctx); err != nil { + return err + } + + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Status{}). + Index("statuses_account_id_idx"). + Column("account_id"). + Exec(ctx); err != nil { + return err + } + + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Status{}). + Index("statuses_in_reply_to_account_id_idx"). + Column("in_reply_to_account_id"). + Exec(ctx); err != nil { + return err + } + + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Status{}). + Index("statuses_boost_of_account_id_idx"). + Column("boost_of_account_id"). + Exec(ctx); err != nil { + return err + } + + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Status{}). + Index("statuses_in_reply_to_id_idx"). + Column("in_reply_to_id"). + Exec(ctx); err != nil { + return err + } + + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Status{}). + Index("statuses_boost_of_id_idx"). + Column("boost_of_id"). + Exec(ctx); err != nil { + return err + } + + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Status{}). + Index("statuses_visibility_idx"). + Column("visibility"). + Exec(ctx); err != nil { + return err + } + + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Status{}). + Index("statuses_local_idx"). + Column("local"). + Exec(ctx); err != nil { + return err + } + + // DOMAIN BLOCKS are almost always selected by their domain + if _, err := tx. + NewCreateIndex(). + Model(>smodel.DomainBlock{}). + Index("domain_blocks_domain_idx"). + Column("domain"). + Exec(ctx); err != nil { + return err + } + + // INSTANCES are usually selected by their domain + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Instance{}). + Index("instances_domain_idx"). + Column("domain"). + Exec(ctx); err != nil { + return err + } + + // STATUS FAVES are almost always selected by their target status + if _, err := tx. + NewCreateIndex(). + Model(>smodel.StatusFave{}). + Index("status_faves_status_id_idx"). + Column("status_id"). + Exec(ctx); err != nil { + return err + } + + // MENTIONS are almost always selected by their originating status + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Mention{}). + Index("mentions_status_id_idx"). + Column("status_id"). + Exec(ctx); err != nil { + return err + } + + // FOLLOW_REQUESTS and FOLLOWS are usually selected by who they originate from, and who they target + if _, err := tx. + NewCreateIndex(). + Model(>smodel.FollowRequest{}). + Index("follow_requests_account_id_idx"). + Column("account_id"). + Exec(ctx); err != nil { + return err + } + + if _, err := tx. + NewCreateIndex(). + Model(>smodel.FollowRequest{}). + Index("follow_requests_target_account_id_idx"). + Column("target_account_id"). + Exec(ctx); err != nil { + return err + } + + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Follow{}). + Index("follows_account_id_idx"). + Column("account_id"). + Exec(ctx); err != nil { + return err + } + + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Follow{}). + Index("follows_target_account_id_idx"). + Column("target_account_id"). + Exec(ctx); err != nil { + return err + } + + // BLOCKS are usually selected simultaneously by who they originate from and who they target + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Block{}). + Index("blocks_account_id_target_account_id_idx"). + Column("account_id", "target_account_id"). + Exec(ctx); err != nil { + return err + } + + // MEDIA ATTACHMENTS are often selected by status ID, the account that owns the attachment + if _, err := tx. + NewCreateIndex(). + Model(>smodel.MediaAttachment{}). + Index("media_attachments_status_id_idx"). + Column("status_id"). + Exec(ctx); err != nil { + return err + } + + if _, err := tx. + NewCreateIndex(). + Model(>smodel.MediaAttachment{}). + Index("media_attachments_account_id_idx"). + Column("account_id"). + Exec(ctx); err != nil { + return err + } + + // for media cleanup jobs, attachments will be selected on a bunch of fields so make an index of this... + if _, err := tx. + NewCreateIndex(). + Model(>smodel.MediaAttachment{}). + Index("media_attachments_cleanup_idx"). + Column("cached", "avatar", "header", "created_at", "remote_url"). + Exec(ctx); err != nil { + return err + } + + // TOKENS are usually selected via Access field for user-level tokens + if _, err := tx. + NewCreateIndex(). + Model(>smodel.Token{}). + Index("tokens_access_idx"). + Column("access"). + Exec(ctx); err != nil { + return err + } + + return nil + }) + } + + down := func(ctx context.Context, db *bun.DB) error { + return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + return nil + }) + } + + if err := Migrations.Register(up, down); err != nil { + panic(err) + } +} diff --git a/internal/db/bundb/notification.go b/internal/db/bundb/notification.go index 35b7e32a..e030cc62 100644 --- a/internal/db/bundb/notification.go +++ b/internal/db/bundb/notification.go @@ -66,9 +66,7 @@ func (n *notificationDB) GetNotifications(ctx context.Context, accountID string, q := n.conn. NewSelect(). Model(¬ifications). - Column("id"). - Where("target_account_id = ?", accountID). - Order("id DESC") + Column("id") if maxID != "" { q = q.Where("id < ?", maxID) @@ -78,6 +76,10 @@ func (n *notificationDB) GetNotifications(ctx context.Context, accountID string, q = q.Where("id > ?", sinceID) } + q = q. + Where("target_account_id = ?", accountID). + Order("id DESC") + if limit != 0 { q = q.Limit(limit) } @@ -120,8 +122,7 @@ func (n *notificationDB) putNotificationCache(notif *gtsmodel.Notification) { } func (n *notificationDB) getNotificationDB(ctx context.Context, id string, dst *gtsmodel.Notification) error { - q := n.newNotificationQ(dst). - Where("notification.id = ?", id) + q := n.newNotificationQ(dst).WherePK() if err := q.Scan(ctx); err != nil { return n.conn.ProcessError(err) diff --git a/internal/db/bundb/notification_test.go b/internal/db/bundb/notification_test.go new file mode 100644 index 00000000..b14704dc --- /dev/null +++ b/internal/db/bundb/notification_test.go @@ -0,0 +1,123 @@ +/* + GoToSocial + Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . +*/ + +package bundb_test + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/id" +) + +func (suite *NotificationTestSuite) spamNotifs() { + // spam a shit ton of notifs into the database + // half of them will be for zork, the other half + // will be for random accounts + notifCount := 10000 + + zork := suite.testAccounts["local_account_1"] + + for i := 0; i < notifCount; i++ { + notifID, err := id.NewULID() + if err != nil { + panic(err) + } + + var targetAccountID string + if i%2 == 0 { + targetAccountID = zork.ID + } else { + randomAssID, err := id.NewRandomULID() + if err != nil { + panic(err) + } + targetAccountID = randomAssID + } + + statusID, err := id.NewRandomULID() + if err != nil { + panic(err) + } + + originAccountID, err := id.NewRandomULID() + if err != nil { + panic(err) + } + + notif := >smodel.Notification{ + ID: notifID, + NotificationType: gtsmodel.NotificationFave, + CreatedAt: time.Now(), + TargetAccountID: targetAccountID, + OriginAccountID: originAccountID, + StatusID: statusID, + Read: false, + } + + if err := suite.db.Put(context.Background(), notif); err != nil { + panic(err) + } + } + + fmt.Printf("\n\n\nput %d notifs in the db\n\n\n", notifCount) +} + +type NotificationTestSuite struct { + BunDBStandardTestSuite +} + +func (suite *NotificationTestSuite) TestGetNotificationsWithSpam() { + suite.spamNotifs() + testAccount := suite.testAccounts["local_account_1"] + before := time.Now() + notifications, err := suite.db.GetNotifications(context.Background(), testAccount.ID, 20, "ZZZZZZZZZZZZZZZZZZZZZZZZZZ", "00000000000000000000000000") + suite.NoError(err) + timeTaken := time.Since(before) + fmt.Printf("\n\n\n withSpam: got %d notifications in %s\n\n\n", len(notifications), timeTaken) + + suite.NotNil(notifications) + for _, n := range notifications { + suite.Equal(testAccount.ID, n.TargetAccountID) + } +} + +func (suite *NotificationTestSuite) TestGetNotificationsWithoutSpam() { + testAccount := suite.testAccounts["local_account_1"] + before := time.Now() + notifications, err := suite.db.GetNotifications(context.Background(), testAccount.ID, 20, "ZZZZZZZZZZZZZZZZZZZZZZZZZZ", "00000000000000000000000000") + suite.NoError(err) + timeTaken := time.Since(before) + fmt.Printf("\n\n\n withoutSpam: got %d notifications in %s\n\n\n", len(notifications), timeTaken) + + suite.NotNil(notifications) + for _, n := range notifications { + suite.Equal(testAccount.ID, n.TargetAccountID) + suite.NotNil(n.OriginAccount) + suite.NotNil(n.TargetAccount) + suite.NotNil(n.Status) + } +} + +func TestNotificationTestSuite(t *testing.T) { + suite.Run(t, new(NotificationTestSuite)) +} diff --git a/internal/db/bundb/trace.go b/internal/db/bundb/trace.go index 9eaad688..93c23178 100644 --- a/internal/db/bundb/trace.go +++ b/internal/db/bundb/trace.go @@ -47,5 +47,9 @@ func (q *debugQueryHook) AfterQuery(_ context.Context, event *bun.QueryEvent) { "operation": event.Operation(), }) - l.Tracef("[%s] %s", dur, event.Operation()) + if logrus.GetLevel() == logrus.TraceLevel { + l.Tracef("[%s] %s", dur, event.Query) + } else { + l.Debugf("[%s] %s", dur, event.Operation()) + } }