forgejo/services/webhook/webhook.go
wxiaoguang 6f9c278559
Rewrite queue (#24505)
# ⚠️ Breaking

Many deprecated queue config options are removed (actually, they should
have been removed in 1.18/1.19).

If you see the fatal message when starting Gitea: "Please update your
app.ini to remove deprecated config options", please follow the error
messages to remove these options from your app.ini.

Example:

```
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].ISSUE_INDEXER_QUEUE_TYPE`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].UPDATE_BUFFER_LEN`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [F] Please update your app.ini to remove deprecated config options
```

Many options in `[queue]` are are dropped, including:
`WRAP_IF_NECESSARY`, `MAX_ATTEMPTS`, `TIMEOUT`, `WORKERS`,
`BLOCK_TIMEOUT`, `BOOST_TIMEOUT`, `BOOST_WORKERS`, they can be removed
from app.ini.

# The problem

The old queue package has some legacy problems:

* complexity: I doubt few people could tell how it works.
* maintainability: Too many channels and mutex/cond are mixed together,
too many different structs/interfaces depends each other.
* stability: due to the complexity & maintainability, sometimes there
are strange bugs and difficult to debug, and some code doesn't have test
(indeed some code is difficult to test because a lot of things are mixed
together).
* general applicability: although it is called "queue", its behavior is
not a well-known queue.
* scalability: it doesn't seem easy to make it work with a cluster
without breaking its behaviors.

It came from some very old code to "avoid breaking", however, its
technical debt is too heavy now. It's a good time to introduce a better
"queue" package.

# The new queue package

It keeps using old config and concept as much as possible.

* It only contains two major kinds of concepts:
    * The "base queue": channel, levelqueue, redis
* They have the same abstraction, the same interface, and they are
tested by the same testing code.
* The "WokerPoolQueue", it uses the "base queue" to provide "worker
pool" function, calls the "handler" to process the data in the base
queue.
* The new code doesn't do "PushBack"
* Think about a queue with many workers, the "PushBack" can't guarantee
the order for re-queued unhandled items, so in new code it just does
"normal push"
* The new code doesn't do "pause/resume"
* The "pause/resume" was designed to handle some handler's failure: eg:
document indexer (elasticsearch) is down
* If a queue is paused for long time, either the producers blocks or the
new items are dropped.
* The new code doesn't do such "pause/resume" trick, it's not a common
queue's behavior and it doesn't help much.
* If there are unhandled items, the "push" function just blocks for a
few seconds and then re-queue them and retry.
* The new code doesn't do "worker booster"
* Gitea's queue's handlers are light functions, the cost is only the
go-routine, so it doesn't make sense to "boost" them.
* The new code only use "max worker number" to limit the concurrent
workers.
* The new "Push" never blocks forever
* Instead of creating more and more blocking goroutines, return an error
is more friendly to the server and to the end user.

There are more details in code comments: eg: the "Flush" problem, the
strange "code.index" hanging problem, the "immediate" queue problem.

Almost ready for review.

TODO:

* [x] add some necessary comments during review
* [x] add some more tests if necessary
* [x] update documents and config options
* [x] test max worker / active worker
* [x] re-run the CI tasks to see whether any test is flaky
* [x] improve the `handleOldLengthConfiguration` to provide more
friendly messages
* [x] fine tune default config values (eg: length?)

## Code coverage:

![image](https://user-images.githubusercontent.com/2114189/236620635-55576955-f95d-4810-b12f-879026a3afdf.png)
2023-05-08 19:49:59 +08:00

272 lines
6.9 KiB
Go

// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package webhook
import (
"context"
"fmt"
"strings"
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
webhook_model "code.gitea.io/gitea/models/webhook"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/queue"
"code.gitea.io/gitea/modules/setting"
api "code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/util"
webhook_module "code.gitea.io/gitea/modules/webhook"
"github.com/gobwas/glob"
)
type webhook struct {
name webhook_module.HookType
payloadCreator func(p api.Payloader, event webhook_module.HookEventType, meta string) (api.Payloader, error)
}
var webhooks = map[webhook_module.HookType]*webhook{
webhook_module.SLACK: {
name: webhook_module.SLACK,
payloadCreator: GetSlackPayload,
},
webhook_module.DISCORD: {
name: webhook_module.DISCORD,
payloadCreator: GetDiscordPayload,
},
webhook_module.DINGTALK: {
name: webhook_module.DINGTALK,
payloadCreator: GetDingtalkPayload,
},
webhook_module.TELEGRAM: {
name: webhook_module.TELEGRAM,
payloadCreator: GetTelegramPayload,
},
webhook_module.MSTEAMS: {
name: webhook_module.MSTEAMS,
payloadCreator: GetMSTeamsPayload,
},
webhook_module.FEISHU: {
name: webhook_module.FEISHU,
payloadCreator: GetFeishuPayload,
},
webhook_module.MATRIX: {
name: webhook_module.MATRIX,
payloadCreator: GetMatrixPayload,
},
webhook_module.WECHATWORK: {
name: webhook_module.WECHATWORK,
payloadCreator: GetWechatworkPayload,
},
webhook_module.PACKAGIST: {
name: webhook_module.PACKAGIST,
payloadCreator: GetPackagistPayload,
},
}
// IsValidHookTaskType returns true if a webhook registered
func IsValidHookTaskType(name string) bool {
if name == webhook_module.GITEA || name == webhook_module.GOGS {
return true
}
_, ok := webhooks[name]
return ok
}
// hookQueue is a global queue of web hooks
var hookQueue *queue.WorkerPoolQueue[int64]
// getPayloadBranch returns branch for hook event, if applicable.
func getPayloadBranch(p api.Payloader) string {
switch pp := p.(type) {
case *api.CreatePayload:
if pp.RefType == "branch" {
return pp.Ref
}
case *api.DeletePayload:
if pp.RefType == "branch" {
return pp.Ref
}
case *api.PushPayload:
if strings.HasPrefix(pp.Ref, git.BranchPrefix) {
return pp.Ref[len(git.BranchPrefix):]
}
}
return ""
}
// EventSource represents the source of a webhook action. Repository and/or Owner must be set.
type EventSource struct {
Repository *repo_model.Repository
Owner *user_model.User
}
// handle delivers hook tasks
func handler(items ...int64) []int64 {
ctx := graceful.GetManager().HammerContext()
for _, taskID := range items {
task, err := webhook_model.GetHookTaskByID(ctx, taskID)
if err != nil {
log.Error("GetHookTaskByID[%d] failed: %v", taskID, err)
continue
}
if task.IsDelivered {
// Already delivered in the meantime
log.Trace("Task[%d] has already been delivered", task.ID)
continue
}
if err := Deliver(ctx, task); err != nil {
log.Error("Unable to deliver webhook task[%d]: %v", task.ID, err)
}
}
return nil
}
func enqueueHookTask(taskID int64) error {
err := hookQueue.Push(taskID)
if err != nil && err != queue.ErrAlreadyInQueue {
return err
}
return nil
}
func checkBranch(w *webhook_model.Webhook, branch string) bool {
if w.BranchFilter == "" || w.BranchFilter == "*" {
return true
}
g, err := glob.Compile(w.BranchFilter)
if err != nil {
// should not really happen as BranchFilter is validated
log.Error("CheckBranch failed: %s", err)
return false
}
return g.Match(branch)
}
// PrepareWebhook creates a hook task and enqueues it for processing
func PrepareWebhook(ctx context.Context, w *webhook_model.Webhook, event webhook_module.HookEventType, p api.Payloader) error {
// Skip sending if webhooks are disabled.
if setting.DisableWebhooks {
return nil
}
for _, e := range w.EventCheckers() {
if event == e.Type {
if !e.Has() {
return nil
}
break
}
}
// Avoid sending "0 new commits" to non-integration relevant webhooks (e.g. slack, discord, etc.).
// Integration webhooks (e.g. drone) still receive the required data.
if pushEvent, ok := p.(*api.PushPayload); ok &&
w.Type != webhook_module.GITEA && w.Type != webhook_module.GOGS &&
len(pushEvent.Commits) == 0 {
return nil
}
// If payload has no associated branch (e.g. it's a new tag, issue, etc.),
// branch filter has no effect.
if branch := getPayloadBranch(p); branch != "" {
if !checkBranch(w, branch) {
log.Info("Branch %q doesn't match branch filter %q, skipping", branch, w.BranchFilter)
return nil
}
}
var payloader api.Payloader
var err error
webhook, ok := webhooks[w.Type]
if ok {
payloader, err = webhook.payloadCreator(p, event, w.Meta)
if err != nil {
return fmt.Errorf("create payload for %s[%s]: %w", w.Type, event, err)
}
} else {
payloader = p
}
task, err := webhook_model.CreateHookTask(ctx, &webhook_model.HookTask{
HookID: w.ID,
Payloader: payloader,
EventType: event,
})
if err != nil {
return fmt.Errorf("CreateHookTask: %w", err)
}
return enqueueHookTask(task.ID)
}
// PrepareWebhooks adds new webhooks to task queue for given payload.
func PrepareWebhooks(ctx context.Context, source EventSource, event webhook_module.HookEventType, p api.Payloader) error {
owner := source.Owner
var ws []*webhook_model.Webhook
if source.Repository != nil {
repoHooks, err := webhook_model.ListWebhooksByOpts(ctx, &webhook_model.ListWebhookOptions{
RepoID: source.Repository.ID,
IsActive: util.OptionalBoolTrue,
})
if err != nil {
return fmt.Errorf("ListWebhooksByOpts: %w", err)
}
ws = append(ws, repoHooks...)
owner = source.Repository.MustOwner(ctx)
}
// append additional webhooks of a user or organization
if owner != nil {
ownerHooks, err := webhook_model.ListWebhooksByOpts(ctx, &webhook_model.ListWebhookOptions{
OwnerID: owner.ID,
IsActive: util.OptionalBoolTrue,
})
if err != nil {
return fmt.Errorf("ListWebhooksByOpts: %w", err)
}
ws = append(ws, ownerHooks...)
}
// Add any admin-defined system webhooks
systemHooks, err := webhook_model.GetSystemWebhooks(ctx, util.OptionalBoolTrue)
if err != nil {
return fmt.Errorf("GetSystemWebhooks: %w", err)
}
ws = append(ws, systemHooks...)
if len(ws) == 0 {
return nil
}
for _, w := range ws {
if err := PrepareWebhook(ctx, w, event, p); err != nil {
return err
}
}
return nil
}
// ReplayHookTask replays a webhook task
func ReplayHookTask(ctx context.Context, w *webhook_model.Webhook, uuid string) error {
task, err := webhook_model.ReplayHookTask(ctx, w.ID, uuid)
if err != nil {
return err
}
return enqueueHookTask(task.ID)
}