forgejo/services/webhook/deliver.go
wxiaoguang 6f9c278559
Rewrite queue (#24505)
# ⚠️ Breaking

Many deprecated queue config options are removed (actually, they should
have been removed in 1.18/1.19).

If you see the fatal message when starting Gitea: "Please update your
app.ini to remove deprecated config options", please follow the error
messages to remove these options from your app.ini.

Example:

```
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].ISSUE_INDEXER_QUEUE_TYPE`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].UPDATE_BUFFER_LEN`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [F] Please update your app.ini to remove deprecated config options
```

Many options in `[queue]` are are dropped, including:
`WRAP_IF_NECESSARY`, `MAX_ATTEMPTS`, `TIMEOUT`, `WORKERS`,
`BLOCK_TIMEOUT`, `BOOST_TIMEOUT`, `BOOST_WORKERS`, they can be removed
from app.ini.

# The problem

The old queue package has some legacy problems:

* complexity: I doubt few people could tell how it works.
* maintainability: Too many channels and mutex/cond are mixed together,
too many different structs/interfaces depends each other.
* stability: due to the complexity & maintainability, sometimes there
are strange bugs and difficult to debug, and some code doesn't have test
(indeed some code is difficult to test because a lot of things are mixed
together).
* general applicability: although it is called "queue", its behavior is
not a well-known queue.
* scalability: it doesn't seem easy to make it work with a cluster
without breaking its behaviors.

It came from some very old code to "avoid breaking", however, its
technical debt is too heavy now. It's a good time to introduce a better
"queue" package.

# The new queue package

It keeps using old config and concept as much as possible.

* It only contains two major kinds of concepts:
    * The "base queue": channel, levelqueue, redis
* They have the same abstraction, the same interface, and they are
tested by the same testing code.
* The "WokerPoolQueue", it uses the "base queue" to provide "worker
pool" function, calls the "handler" to process the data in the base
queue.
* The new code doesn't do "PushBack"
* Think about a queue with many workers, the "PushBack" can't guarantee
the order for re-queued unhandled items, so in new code it just does
"normal push"
* The new code doesn't do "pause/resume"
* The "pause/resume" was designed to handle some handler's failure: eg:
document indexer (elasticsearch) is down
* If a queue is paused for long time, either the producers blocks or the
new items are dropped.
* The new code doesn't do such "pause/resume" trick, it's not a common
queue's behavior and it doesn't help much.
* If there are unhandled items, the "push" function just blocks for a
few seconds and then re-queue them and retry.
* The new code doesn't do "worker booster"
* Gitea's queue's handlers are light functions, the cost is only the
go-routine, so it doesn't make sense to "boost" them.
* The new code only use "max worker number" to limit the concurrent
workers.
* The new "Push" never blocks forever
* Instead of creating more and more blocking goroutines, return an error
is more friendly to the server and to the end user.

There are more details in code comments: eg: the "Flush" problem, the
strange "code.index" hanging problem, the "immediate" queue problem.

Almost ready for review.

TODO:

* [x] add some necessary comments during review
* [x] add some more tests if necessary
* [x] update documents and config options
* [x] test max worker / active worker
* [x] re-run the CI tasks to see whether any test is flaky
* [x] improve the `handleOldLengthConfiguration` to provide more
friendly messages
* [x] fine tune default config values (eg: length?)

## Code coverage:

![image](https://user-images.githubusercontent.com/2114189/236620635-55576955-f95d-4810-b12f-879026a3afdf.png)
2023-05-08 19:49:59 +08:00

326 lines
9.4 KiB
Go

// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package webhook
import (
"context"
"crypto/hmac"
"crypto/sha1"
"crypto/tls"
"encoding/hex"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"sync"
"time"
webhook_model "code.gitea.io/gitea/models/webhook"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/hostmatcher"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/proxy"
"code.gitea.io/gitea/modules/queue"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
webhook_module "code.gitea.io/gitea/modules/webhook"
"github.com/gobwas/glob"
"github.com/minio/sha256-simd"
)
// Deliver deliver hook task
func Deliver(ctx context.Context, t *webhook_model.HookTask) error {
w, err := webhook_model.GetWebhookByID(t.HookID)
if err != nil {
return err
}
defer func() {
err := recover()
if err == nil {
return
}
// There was a panic whilst delivering a hook...
log.Error("PANIC whilst trying to deliver webhook task[%d] to webhook %s Panic: %v\nStacktrace: %s", t.ID, w.URL, err, log.Stack(2))
}()
t.IsDelivered = true
var req *http.Request
switch w.HTTPMethod {
case "":
log.Info("HTTP Method for webhook %s empty, setting to POST as default", w.URL)
fallthrough
case http.MethodPost:
switch w.ContentType {
case webhook_model.ContentTypeJSON:
req, err = http.NewRequest("POST", w.URL, strings.NewReader(t.PayloadContent))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")
case webhook_model.ContentTypeForm:
forms := url.Values{
"payload": []string{t.PayloadContent},
}
req, err = http.NewRequest("POST", w.URL, strings.NewReader(forms.Encode()))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
}
case http.MethodGet:
u, err := url.Parse(w.URL)
if err != nil {
return fmt.Errorf("unable to deliver webhook task[%d] as cannot parse webhook url %s: %w", t.ID, w.URL, err)
}
vals := u.Query()
vals["payload"] = []string{t.PayloadContent}
u.RawQuery = vals.Encode()
req, err = http.NewRequest("GET", u.String(), nil)
if err != nil {
return fmt.Errorf("unable to deliver webhook task[%d] as unable to create HTTP request for webhook url %s: %w", t.ID, w.URL, err)
}
case http.MethodPut:
switch w.Type {
case webhook_module.MATRIX:
txnID, err := getMatrixTxnID([]byte(t.PayloadContent))
if err != nil {
return err
}
url := fmt.Sprintf("%s/%s", w.URL, url.PathEscape(txnID))
req, err = http.NewRequest("PUT", url, strings.NewReader(t.PayloadContent))
if err != nil {
return fmt.Errorf("unable to deliver webhook task[%d] as cannot create matrix request for webhook url %s: %w", t.ID, w.URL, err)
}
default:
return fmt.Errorf("invalid http method for webhook task[%d] in webhook %s: %v", t.ID, w.URL, w.HTTPMethod)
}
default:
return fmt.Errorf("invalid http method for webhook task[%d] in webhook %s: %v", t.ID, w.URL, w.HTTPMethod)
}
var signatureSHA1 string
var signatureSHA256 string
if len(w.Secret) > 0 {
sig1 := hmac.New(sha1.New, []byte(w.Secret))
sig256 := hmac.New(sha256.New, []byte(w.Secret))
_, err = io.MultiWriter(sig1, sig256).Write([]byte(t.PayloadContent))
if err != nil {
log.Error("prepareWebhooks.sigWrite: %v", err)
}
signatureSHA1 = hex.EncodeToString(sig1.Sum(nil))
signatureSHA256 = hex.EncodeToString(sig256.Sum(nil))
}
event := t.EventType.Event()
eventType := string(t.EventType)
req.Header.Add("X-Gitea-Delivery", t.UUID)
req.Header.Add("X-Gitea-Event", event)
req.Header.Add("X-Gitea-Event-Type", eventType)
req.Header.Add("X-Gitea-Signature", signatureSHA256)
req.Header.Add("X-Gogs-Delivery", t.UUID)
req.Header.Add("X-Gogs-Event", event)
req.Header.Add("X-Gogs-Event-Type", eventType)
req.Header.Add("X-Gogs-Signature", signatureSHA256)
req.Header.Add("X-Hub-Signature", "sha1="+signatureSHA1)
req.Header.Add("X-Hub-Signature-256", "sha256="+signatureSHA256)
req.Header["X-GitHub-Delivery"] = []string{t.UUID}
req.Header["X-GitHub-Event"] = []string{event}
req.Header["X-GitHub-Event-Type"] = []string{eventType}
// Add Authorization Header
authorization, err := w.HeaderAuthorization()
if err != nil {
log.Error("Webhook could not get Authorization header [%d]: %v", w.ID, err)
return err
}
if authorization != "" {
req.Header["Authorization"] = []string{authorization}
}
// Record delivery information.
t.RequestInfo = &webhook_model.HookRequest{
URL: req.URL.String(),
HTTPMethod: req.Method,
Headers: map[string]string{},
}
for k, vals := range req.Header {
t.RequestInfo.Headers[k] = strings.Join(vals, ",")
}
t.ResponseInfo = &webhook_model.HookResponse{
Headers: map[string]string{},
}
// OK We're now ready to attempt to deliver the task - we must double check that it
// has not been delivered in the meantime
updated, err := webhook_model.MarkTaskDelivered(ctx, t)
if err != nil {
log.Error("MarkTaskDelivered[%d]: %v", t.ID, err)
return fmt.Errorf("unable to mark task[%d] delivered in the db: %w", t.ID, err)
}
if !updated {
// This webhook task has already been attempted to be delivered or is in the process of being delivered
log.Trace("Webhook Task[%d] already delivered", t.ID)
return nil
}
// All code from this point will update the hook task
defer func() {
t.Delivered = timeutil.TimeStampNanoNow()
if t.IsSucceed {
log.Trace("Hook delivered: %s", t.UUID)
} else if !w.IsActive {
log.Trace("Hook delivery skipped as webhook is inactive: %s", t.UUID)
} else {
log.Trace("Hook delivery failed: %s", t.UUID)
}
if err := webhook_model.UpdateHookTask(t); err != nil {
log.Error("UpdateHookTask [%d]: %v", t.ID, err)
}
// Update webhook last delivery status.
if t.IsSucceed {
w.LastStatus = webhook_module.HookStatusSucceed
} else {
w.LastStatus = webhook_module.HookStatusFail
}
if err = webhook_model.UpdateWebhookLastStatus(w); err != nil {
log.Error("UpdateWebhookLastStatus: %v", err)
return
}
}()
if setting.DisableWebhooks {
return fmt.Errorf("webhook task skipped (webhooks disabled): [%d]", t.ID)
}
if !w.IsActive {
log.Trace("Webhook %s in Webhook Task[%d] is not active", w.URL, t.ID)
return nil
}
resp, err := webhookHTTPClient.Do(req.WithContext(ctx))
if err != nil {
t.ResponseInfo.Body = fmt.Sprintf("Delivery: %v", err)
return fmt.Errorf("unable to deliver webhook task[%d] in %s due to error in http client: %w", t.ID, w.URL, err)
}
defer resp.Body.Close()
// Status code is 20x can be seen as succeed.
t.IsSucceed = resp.StatusCode/100 == 2
t.ResponseInfo.Status = resp.StatusCode
for k, vals := range resp.Header {
t.ResponseInfo.Headers[k] = strings.Join(vals, ",")
}
p, err := io.ReadAll(resp.Body)
if err != nil {
t.ResponseInfo.Body = fmt.Sprintf("read body: %s", err)
return fmt.Errorf("unable to deliver webhook task[%d] in %s as unable to read response body: %w", t.ID, w.URL, err)
}
t.ResponseInfo.Body = string(p)
return nil
}
var (
webhookHTTPClient *http.Client
once sync.Once
hostMatchers []glob.Glob
)
func webhookProxy() func(req *http.Request) (*url.URL, error) {
if setting.Webhook.ProxyURL == "" {
return proxy.Proxy()
}
once.Do(func() {
for _, h := range setting.Webhook.ProxyHosts {
if g, err := glob.Compile(h); err == nil {
hostMatchers = append(hostMatchers, g)
} else {
log.Error("glob.Compile %s failed: %v", h, err)
}
}
})
return func(req *http.Request) (*url.URL, error) {
for _, v := range hostMatchers {
if v.Match(req.URL.Host) {
return http.ProxyURL(setting.Webhook.ProxyURLFixed)(req)
}
}
return http.ProxyFromEnvironment(req)
}
}
// Init starts the hooks delivery thread
func Init() error {
timeout := time.Duration(setting.Webhook.DeliverTimeout) * time.Second
allowedHostListValue := setting.Webhook.AllowedHostList
if allowedHostListValue == "" {
allowedHostListValue = hostmatcher.MatchBuiltinExternal
}
allowedHostMatcher := hostmatcher.ParseHostMatchList("webhook.ALLOWED_HOST_LIST", allowedHostListValue)
webhookHTTPClient = &http.Client{
Timeout: timeout,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: setting.Webhook.SkipTLSVerify},
Proxy: webhookProxy(),
DialContext: hostmatcher.NewDialContext("webhook", allowedHostMatcher, nil),
},
}
hookQueue = queue.CreateUniqueQueue("webhook_sender", handler)
if hookQueue == nil {
return fmt.Errorf("Unable to create webhook_sender Queue")
}
go graceful.GetManager().RunWithShutdownFns(hookQueue.Run)
go graceful.GetManager().RunWithShutdownContext(populateWebhookSendingQueue)
return nil
}
func populateWebhookSendingQueue(ctx context.Context) {
ctx, _, finished := process.GetManager().AddContext(ctx, "Webhook: Populate sending queue")
defer finished()
lowerID := int64(0)
for {
taskIDs, err := webhook_model.FindUndeliveredHookTaskIDs(ctx, lowerID)
if err != nil {
log.Error("Unable to populate webhook queue as FindUndeliveredHookTaskIDs failed: %v", err)
return
}
if len(taskIDs) == 0 {
return
}
lowerID = taskIDs[len(taskIDs)-1]
for _, taskID := range taskIDs {
select {
case <-ctx.Done():
log.Warn("Shutdown before Webhook Sending queue finishing being populated")
return
default:
}
if err := enqueueHookTask(taskID); err != nil {
log.Error("Unable to push HookTask[%d] to the Webhook Sending queue: %v", taskID, err)
}
}
}
}