[GITEA] Avoid WHERE IN for comment migration query

- Rewrite `UpdateCommentsMigrationsByType` to not use `WHERE IN` as
that's a performance diaster for MariaDB, it now use batching to query
the the relevant comment IDs via JOINs (which is not possible in a
UPDATE query for SQLite) and then update them in a seperate query.
- Add unit test.
- Resolves https://codeberg.org/forgejo/forgejo/issues/1856
This commit is contained in:
Gusted 2023-12-17 16:39:58 +01:00
parent caff546249
commit 8098ca9d2e
No known key found for this signature in database
GPG key ID: FD821B732837125F
2 changed files with 57 additions and 16 deletions

View file

@ -1177,22 +1177,36 @@ func DeleteComment(ctx context.Context, comment *Comment) error {
// UpdateCommentsMigrationsByType updates comments' migrations information via given git service type and original id and poster id
func UpdateCommentsMigrationsByType(ctx context.Context, tp structs.GitServiceType, originalAuthorID string, posterID int64) error {
_, err := db.GetEngine(ctx).Table("comment").
Where(builder.In("issue_id",
builder.Select("issue.id").
From("issue").
InnerJoin("repository", "issue.repo_id = repository.id").
Where(builder.Eq{
"repository.original_service_type": tp,
}),
)).
And("comment.original_author_id = ?", originalAuthorID).
Update(map[string]any{
"poster_id": posterID,
"original_author": "",
"original_author_id": 0,
})
return err
const batchSize = 50
for {
commentIDs := make([]int64, 0, batchSize)
if err := db.GetEngine(ctx).
Table("comment").
Select("`comment`.id").
Join("INNER", "issue", "`comment`.issue_id = `issue`.id").
Join("INNER", "repository", "`repository`.id = `issue`.repo_id").
Where("`repository`.original_service_type = ? AND `comment`.original_author_id = ?", tp, originalAuthorID).
Limit(batchSize, 0).
Find(&commentIDs); err != nil {
return err
}
for _, commentID := range commentIDs {
if _, err := db.GetEngine(ctx).Table(&Comment{}).ID(commentID).Update(map[string]any{
"poster_id": posterID,
"original_author": "",
"original_author_id": 0,
}); err != nil {
return err
}
}
if len(commentIDs) < batchSize {
break
}
}
return nil
}
// CreateAutoMergeComment is a internal function, only use it for CommentTypePRScheduledToAutoMerge and CommentTypePRUnScheduledToAutoMerge CommentTypes

View file

@ -12,6 +12,7 @@ import (
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/models/unittest"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/structs"
"github.com/stretchr/testify/assert"
)
@ -97,3 +98,29 @@ func TestMigrate_InsertIssueComments(t *testing.T) {
unittest.CheckConsistencyFor(t, &issues_model.Issue{})
}
func TestUpdateCommentsMigrationsByType(t *testing.T) {
assert.NoError(t, unittest.PrepareTestDatabase())
issue := unittest.AssertExistsAndLoadBean(t, &issues_model.Issue{ID: 1})
repo := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: issue.RepoID})
comment := unittest.AssertExistsAndLoadBean(t, &issues_model.Comment{ID: 1, IssueID: issue.ID})
// Set repository to migrated from Gitea.
repo.OriginalServiceType = structs.GiteaService
repo_model.UpdateRepositoryCols(db.DefaultContext, repo, "original_service_type")
// Set comment to have an original author.
comment.OriginalAuthor = "Example User"
comment.OriginalAuthorID = 1
comment.PosterID = 0
_, err := db.GetEngine(db.DefaultContext).ID(comment.ID).Cols("original_author", "original_author_id", "poster_id").Update(comment)
assert.NoError(t, err)
assert.NoError(t, issues_model.UpdateCommentsMigrationsByType(db.DefaultContext, structs.GiteaService, "1", 513))
comment = unittest.AssertExistsAndLoadBean(t, &issues_model.Comment{ID: 1, IssueID: issue.ID})
assert.Empty(t, comment.OriginalAuthor)
assert.Empty(t, comment.OriginalAuthorID)
assert.EqualValues(t, 513, comment.PosterID)
}