2023-03-19 21:42:21 +00:00
|
|
|
// Copyright 2023 Woodpecker Authors
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2017-06-28 17:21:22 +00:00
|
|
|
package rpc
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"encoding/json"
|
2022-02-24 14:53:44 +00:00
|
|
|
"strings"
|
2019-09-14 12:21:16 +00:00
|
|
|
"time"
|
2017-06-28 17:21:22 +00:00
|
|
|
|
2023-08-18 13:13:13 +00:00
|
|
|
"github.com/cenkalti/backoff/v4"
|
2021-10-12 07:25:13 +00:00
|
|
|
"github.com/rs/zerolog/log"
|
2017-06-28 17:21:22 +00:00
|
|
|
"google.golang.org/grpc"
|
2017-06-29 21:27:06 +00:00
|
|
|
"google.golang.org/grpc/codes"
|
2021-09-24 14:29:26 +00:00
|
|
|
"google.golang.org/grpc/status"
|
2024-09-18 14:29:56 +00:00
|
|
|
grpcproto "google.golang.org/protobuf/proto"
|
2021-10-12 07:25:13 +00:00
|
|
|
|
2023-12-08 07:15:08 +00:00
|
|
|
backend "go.woodpecker-ci.org/woodpecker/v2/pipeline/backend/types"
|
|
|
|
"go.woodpecker-ci.org/woodpecker/v2/pipeline/rpc"
|
|
|
|
"go.woodpecker-ci.org/woodpecker/v2/pipeline/rpc/proto"
|
2017-06-28 17:21:22 +00:00
|
|
|
)
|
|
|
|
|
2024-09-18 14:29:56 +00:00
|
|
|
const (
|
|
|
|
// Set grpc version on compile time to compare against server version response.
|
|
|
|
ClientGrpcVersion int32 = proto.Version
|
|
|
|
|
|
|
|
// Maximum size of an outgoing log message.
|
|
|
|
// Picked to prevent it from going over GRPC size limit (4 MiB) with a large safety margin.
|
|
|
|
maxLogBatchSize int = 1 * 1024 * 1024
|
|
|
|
|
|
|
|
// Maximum amount of time between sending consecutive batched log messages.
|
|
|
|
// Controls the delay between the CI job generating a log record, and web users receiving it.
|
|
|
|
maxLogFlushPeriod time.Duration = time.Second
|
|
|
|
)
|
2023-03-19 21:42:21 +00:00
|
|
|
|
2017-06-28 17:21:22 +00:00
|
|
|
type client struct {
|
2021-09-29 00:10:09 +00:00
|
|
|
client proto.WoodpeckerClient
|
2017-06-28 17:21:22 +00:00
|
|
|
conn *grpc.ClientConn
|
2024-09-18 14:29:56 +00:00
|
|
|
logs chan *proto.LogEntry
|
2017-06-28 17:21:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewGrpcClient returns a new grpc Client.
|
2024-09-18 14:29:56 +00:00
|
|
|
func NewGrpcClient(ctx context.Context, conn *grpc.ClientConn) rpc.Peer {
|
2017-06-28 17:21:22 +00:00
|
|
|
client := new(client)
|
2021-09-29 00:10:09 +00:00
|
|
|
client.client = proto.NewWoodpeckerClient(conn)
|
2017-06-28 17:21:22 +00:00
|
|
|
client.conn = conn
|
2024-09-18 14:29:56 +00:00
|
|
|
client.logs = make(chan *proto.LogEntry, 10) // max memory use: 10 lines * 1 MiB
|
|
|
|
go client.processLogs(ctx)
|
2017-06-28 17:21:22 +00:00
|
|
|
return client
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *client) Close() error {
|
2024-09-18 14:29:56 +00:00
|
|
|
close(c.logs)
|
2017-06-28 17:21:22 +00:00
|
|
|
return c.conn.Close()
|
|
|
|
}
|
|
|
|
|
2023-08-18 13:13:13 +00:00
|
|
|
func (c *client) newBackOff() backoff.BackOff {
|
|
|
|
b := backoff.NewExponentialBackOff()
|
agent: Continue to retry indefinitely (#3599)
When the woodpecker server is not reachable (eg. for update,
maintenance, agent connection issue, ...) for a long period of time, the
agent tries continuously to reconnect, without any delay. This creates
**several GB** of logs in a short period of time.
Here is a sample line, repeated indefinitely:
```
{"level":"error","error":"rpc error: code = Unavailable desc = connection error: desc = \"transport: Error while dialing: dial tcp x.x.x.x:xxx: connect: connection refused\"","time":"2024-04-07T17:29:59Z","message":"grpc error: done(): code: Unavailable"}
```
It appears that the [backoff
package](https://pkg.go.dev/github.com/cenkalti/backoff/v4#BackOff),
after a certain amount of time, returns `backoff.Stop` (-1) instead of a
valid delay to wait. It means that no more retry should be made, [as
shown in the
example](https://pkg.go.dev/github.com/cenkalti/backoff/v4#BackOff). But
the code doesn't handle that case and takes -1 as the next delay.
This led to continuous retry with no delay between them and creates a
huge amount of logs.
[`MaxElapsedTime` default is 15
minutes](https://pkg.go.dev/github.com/cenkalti/backoff/v4#pkg-constants),
passed this time, `NextBackOff` returns `backoff.Stop` (-1) instead of
`MaxInterval`.
This commit sets `MaxElapsedTime` to 0, [to avoid `Stop`
return](https://pkg.go.dev/github.com/cenkalti/backoff/v4#ExponentialBackOff).
2024-04-09 00:24:19 +00:00
|
|
|
b.MaxElapsedTime = 0
|
2024-05-13 20:58:21 +00:00
|
|
|
b.MaxInterval = 10 * time.Second //nolint:mnd
|
|
|
|
b.InitialInterval = 10 * time.Millisecond //nolint:mnd
|
2023-08-18 13:13:13 +00:00
|
|
|
return b
|
|
|
|
}
|
|
|
|
|
2024-05-13 20:58:21 +00:00
|
|
|
// Version returns the server- & grpc-version.
|
2023-03-19 21:42:21 +00:00
|
|
|
func (c *client) Version(ctx context.Context) (*rpc.Version, error) {
|
|
|
|
res, err := c.client.Version(ctx, &proto.Empty{})
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return &rpc.Version{
|
|
|
|
GrpcVersion: res.GrpcVersion,
|
|
|
|
ServerVersion: res.ServerVersion,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2023-08-21 16:30:19 +00:00
|
|
|
// Next returns the next workflow in the queue.
|
2024-08-30 09:44:56 +00:00
|
|
|
func (c *client) Next(ctx context.Context, filter rpc.Filter) (*rpc.Workflow, error) {
|
2023-03-19 21:42:21 +00:00
|
|
|
var res *proto.NextResponse
|
2017-06-28 17:21:22 +00:00
|
|
|
var err error
|
2023-08-18 13:13:13 +00:00
|
|
|
retry := c.newBackOff()
|
2017-06-28 17:21:22 +00:00
|
|
|
req := new(proto.NextRequest)
|
|
|
|
req.Filter = new(proto.Filter)
|
2024-08-30 09:44:56 +00:00
|
|
|
req.Filter.Labels = filter.Labels
|
2017-06-28 17:21:22 +00:00
|
|
|
for {
|
|
|
|
res, err = c.client.Next(ctx, req)
|
|
|
|
if err == nil {
|
|
|
|
break
|
2023-03-18 19:35:27 +00:00
|
|
|
}
|
|
|
|
|
2021-09-24 14:29:26 +00:00
|
|
|
switch status.Code(err) {
|
2024-07-13 01:24:55 +00:00
|
|
|
case codes.Canceled:
|
|
|
|
if ctx.Err() != nil {
|
|
|
|
// expected as context was canceled
|
2024-07-13 23:06:20 +00:00
|
|
|
log.Debug().Err(err).Msgf("grpc error: next(): context canceled")
|
2024-07-13 01:24:55 +00:00
|
|
|
return nil, nil
|
|
|
|
}
|
2024-07-13 23:06:20 +00:00
|
|
|
log.Error().Err(err).Msgf("grpc error: next(): code: %v", status.Code(err))
|
2024-07-13 01:24:55 +00:00
|
|
|
return nil, err
|
2017-09-12 16:03:32 +00:00
|
|
|
case
|
|
|
|
codes.Aborted,
|
|
|
|
codes.DataLoss,
|
|
|
|
codes.DeadlineExceeded,
|
|
|
|
codes.Internal,
|
|
|
|
codes.Unavailable:
|
|
|
|
// non-fatal errors
|
2024-07-13 01:24:55 +00:00
|
|
|
// TODO: remove after adding continuous data exchange by something like #536
|
|
|
|
if strings.Contains(err.Error(), "\"too_many_pings\"") {
|
|
|
|
// https://github.com/woodpecker-ci/woodpecker/issues/717#issuecomment-1049365104
|
|
|
|
log.Trace().Err(err).Msg("grpc: to many keepalive pings without sending data")
|
|
|
|
} else {
|
2024-07-13 23:06:20 +00:00
|
|
|
log.Warn().Err(err).Msgf("grpc error: next(): code: %v", status.Code(err))
|
2024-07-13 01:24:55 +00:00
|
|
|
}
|
2017-09-12 16:03:32 +00:00
|
|
|
default:
|
2024-07-13 23:06:20 +00:00
|
|
|
log.Error().Err(err).Msgf("grpc error: next(): code: %v", status.Code(err))
|
|
|
|
return nil, err
|
2017-06-28 17:21:22 +00:00
|
|
|
}
|
2023-08-18 13:13:13 +00:00
|
|
|
|
|
|
|
select {
|
|
|
|
case <-time.After(retry.NextBackOff()):
|
|
|
|
case <-ctx.Done():
|
2024-07-13 01:24:55 +00:00
|
|
|
return nil, nil
|
2017-07-20 16:21:15 +00:00
|
|
|
}
|
2017-06-28 17:21:22 +00:00
|
|
|
}
|
|
|
|
|
2023-08-28 16:00:52 +00:00
|
|
|
if res.GetWorkflow() == nil {
|
2017-06-28 17:21:22 +00:00
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
2023-08-21 16:30:19 +00:00
|
|
|
w := new(rpc.Workflow)
|
2023-08-28 16:00:52 +00:00
|
|
|
w.ID = res.GetWorkflow().GetId()
|
|
|
|
w.Timeout = res.GetWorkflow().GetTimeout()
|
2023-08-21 16:30:19 +00:00
|
|
|
w.Config = new(backend.Config)
|
2023-08-28 16:00:52 +00:00
|
|
|
if err := json.Unmarshal(res.GetWorkflow().GetPayload(), w.Config); err != nil {
|
2023-08-21 16:30:19 +00:00
|
|
|
log.Error().Err(err).Msgf("could not unmarshal workflow config of '%s'", w.ID)
|
2021-11-23 14:36:52 +00:00
|
|
|
}
|
2023-08-21 16:30:19 +00:00
|
|
|
return w, nil
|
2017-06-28 17:21:22 +00:00
|
|
|
}
|
|
|
|
|
2023-08-21 16:30:19 +00:00
|
|
|
// Wait blocks until the workflow is complete.
|
2024-08-30 09:44:56 +00:00
|
|
|
func (c *client) Wait(ctx context.Context, workflowID string) (err error) {
|
2023-08-18 13:13:13 +00:00
|
|
|
retry := c.newBackOff()
|
2017-06-28 17:21:22 +00:00
|
|
|
req := new(proto.WaitRequest)
|
2024-08-30 09:44:56 +00:00
|
|
|
req.Id = workflowID
|
2017-06-28 17:21:22 +00:00
|
|
|
for {
|
|
|
|
_, err = c.client.Wait(ctx, req)
|
|
|
|
if err == nil {
|
|
|
|
break
|
|
|
|
}
|
2023-03-18 19:35:27 +00:00
|
|
|
|
2021-09-24 14:29:26 +00:00
|
|
|
switch status.Code(err) {
|
2024-07-13 23:06:20 +00:00
|
|
|
case codes.Canceled:
|
|
|
|
if ctx.Err() != nil {
|
|
|
|
// expected as context was canceled
|
|
|
|
log.Debug().Err(err).Msgf("grpc error: wait(): context canceled")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
log.Error().Err(err).Msgf("grpc error: wait(): code: %v", status.Code(err))
|
|
|
|
return err
|
2017-09-12 16:03:32 +00:00
|
|
|
case
|
|
|
|
codes.Aborted,
|
|
|
|
codes.DataLoss,
|
|
|
|
codes.DeadlineExceeded,
|
|
|
|
codes.Internal,
|
|
|
|
codes.Unavailable:
|
|
|
|
// non-fatal errors
|
2024-07-13 23:06:20 +00:00
|
|
|
log.Warn().Err(err).Msgf("grpc error: wait(): code: %v", status.Code(err))
|
2017-09-12 16:03:32 +00:00
|
|
|
default:
|
2024-07-13 23:06:20 +00:00
|
|
|
log.Error().Err(err).Msgf("grpc error: wait(): code: %v", status.Code(err))
|
2017-06-29 21:27:06 +00:00
|
|
|
return err
|
2017-06-28 17:21:22 +00:00
|
|
|
}
|
2023-08-18 13:13:13 +00:00
|
|
|
|
|
|
|
select {
|
|
|
|
case <-time.After(retry.NextBackOff()):
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
}
|
2017-06-28 17:21:22 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-08-21 16:30:19 +00:00
|
|
|
// Init signals the workflow is initialized.
|
2024-07-01 09:20:55 +00:00
|
|
|
func (c *client) Init(ctx context.Context, workflowID string, state rpc.WorkflowState) (err error) {
|
2023-08-18 13:13:13 +00:00
|
|
|
retry := c.newBackOff()
|
2017-06-28 17:21:22 +00:00
|
|
|
req := new(proto.InitRequest)
|
2024-07-01 09:20:55 +00:00
|
|
|
req.Id = workflowID
|
|
|
|
req.State = new(proto.WorkflowState)
|
2017-06-28 17:21:22 +00:00
|
|
|
req.State.Started = state.Started
|
2024-07-01 09:20:55 +00:00
|
|
|
req.State.Finished = state.Finished
|
|
|
|
req.State.Error = state.Error
|
2017-06-28 17:21:22 +00:00
|
|
|
for {
|
|
|
|
_, err = c.client.Init(ctx, req)
|
|
|
|
if err == nil {
|
|
|
|
break
|
|
|
|
}
|
2023-03-18 19:35:27 +00:00
|
|
|
|
2024-01-10 19:57:12 +00:00
|
|
|
log.Error().Err(err).Msgf("grpc error: init(): code: %v", status.Code(err))
|
2023-03-18 19:35:27 +00:00
|
|
|
|
2021-09-24 14:29:26 +00:00
|
|
|
switch status.Code(err) {
|
2024-07-13 23:06:20 +00:00
|
|
|
case codes.Canceled:
|
|
|
|
if ctx.Err() != nil {
|
|
|
|
// expected as context was canceled
|
|
|
|
log.Debug().Err(err).Msgf("grpc error: init(): context canceled")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
log.Error().Err(err).Msgf("grpc error: init(): code: %v", status.Code(err))
|
|
|
|
return err
|
2017-09-12 16:03:32 +00:00
|
|
|
case
|
|
|
|
codes.Aborted,
|
|
|
|
codes.DataLoss,
|
|
|
|
codes.DeadlineExceeded,
|
|
|
|
codes.Internal,
|
|
|
|
codes.Unavailable:
|
|
|
|
// non-fatal errors
|
2024-07-13 23:06:20 +00:00
|
|
|
log.Warn().Err(err).Msgf("grpc error: init(): code: %v", status.Code(err))
|
2017-09-12 16:03:32 +00:00
|
|
|
default:
|
2024-07-13 23:06:20 +00:00
|
|
|
log.Error().Err(err).Msgf("grpc error: init(): code: %v", status.Code(err))
|
2017-06-29 21:27:06 +00:00
|
|
|
return err
|
2017-06-28 17:21:22 +00:00
|
|
|
}
|
2023-08-18 13:13:13 +00:00
|
|
|
|
|
|
|
select {
|
|
|
|
case <-time.After(retry.NextBackOff()):
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
}
|
2017-06-28 17:21:22 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-07-01 09:20:55 +00:00
|
|
|
// Done signals the workflow is complete.
|
|
|
|
func (c *client) Done(ctx context.Context, workflowID string, state rpc.WorkflowState) (err error) {
|
2023-08-18 13:13:13 +00:00
|
|
|
retry := c.newBackOff()
|
2017-06-28 17:21:22 +00:00
|
|
|
req := new(proto.DoneRequest)
|
2024-07-01 09:20:55 +00:00
|
|
|
req.Id = workflowID
|
|
|
|
req.State = new(proto.WorkflowState)
|
2017-06-28 17:21:22 +00:00
|
|
|
req.State.Started = state.Started
|
2024-07-01 09:20:55 +00:00
|
|
|
req.State.Finished = state.Finished
|
|
|
|
req.State.Error = state.Error
|
2017-06-28 17:21:22 +00:00
|
|
|
for {
|
|
|
|
_, err = c.client.Done(ctx, req)
|
|
|
|
if err == nil {
|
|
|
|
break
|
|
|
|
}
|
2023-03-18 19:35:27 +00:00
|
|
|
|
2024-01-10 19:57:12 +00:00
|
|
|
log.Error().Err(err).Msgf("grpc error: done(): code: %v", status.Code(err))
|
2023-03-18 19:35:27 +00:00
|
|
|
|
2021-09-24 14:29:26 +00:00
|
|
|
switch status.Code(err) {
|
2024-07-13 23:06:20 +00:00
|
|
|
case codes.Canceled:
|
|
|
|
if ctx.Err() != nil {
|
|
|
|
// expected as context was canceled
|
|
|
|
log.Debug().Err(err).Msgf("grpc error: done(): context canceled")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
log.Error().Err(err).Msgf("grpc error: done(): code: %v", status.Code(err))
|
|
|
|
return err
|
2017-09-12 16:03:32 +00:00
|
|
|
case
|
|
|
|
codes.Aborted,
|
|
|
|
codes.DataLoss,
|
|
|
|
codes.DeadlineExceeded,
|
|
|
|
codes.Internal,
|
|
|
|
codes.Unavailable:
|
|
|
|
// non-fatal errors
|
2024-07-13 23:06:20 +00:00
|
|
|
log.Warn().Err(err).Msgf("grpc error: done(): code: %v", status.Code(err))
|
2017-09-12 16:03:32 +00:00
|
|
|
default:
|
2024-07-13 23:06:20 +00:00
|
|
|
log.Error().Err(err).Msgf("grpc error: done(): code: %v", status.Code(err))
|
2017-06-29 21:27:06 +00:00
|
|
|
return err
|
2017-06-28 17:21:22 +00:00
|
|
|
}
|
2023-08-18 13:13:13 +00:00
|
|
|
|
|
|
|
select {
|
|
|
|
case <-time.After(retry.NextBackOff()):
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
}
|
2017-06-28 17:21:22 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-05-13 20:58:21 +00:00
|
|
|
// Extend extends the workflow deadline.
|
2024-08-30 09:44:56 +00:00
|
|
|
func (c *client) Extend(ctx context.Context, workflowID string) (err error) {
|
2023-08-18 13:13:13 +00:00
|
|
|
retry := c.newBackOff()
|
2017-06-28 17:21:22 +00:00
|
|
|
req := new(proto.ExtendRequest)
|
2024-08-30 09:44:56 +00:00
|
|
|
req.Id = workflowID
|
2017-06-28 17:21:22 +00:00
|
|
|
for {
|
|
|
|
_, err = c.client.Extend(ctx, req)
|
|
|
|
if err == nil {
|
|
|
|
break
|
|
|
|
}
|
2023-03-18 19:35:27 +00:00
|
|
|
|
2024-01-10 19:57:12 +00:00
|
|
|
log.Error().Err(err).Msgf("grpc error: extend(): code: %v", status.Code(err))
|
2023-03-18 19:35:27 +00:00
|
|
|
|
2021-09-24 14:29:26 +00:00
|
|
|
switch status.Code(err) {
|
2024-07-13 23:06:20 +00:00
|
|
|
case codes.Canceled:
|
|
|
|
if ctx.Err() != nil {
|
|
|
|
// expected as context was canceled
|
|
|
|
log.Debug().Err(err).Msgf("grpc error: extend(): context canceled")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
log.Error().Err(err).Msgf("grpc error: extend(): code: %v", status.Code(err))
|
|
|
|
return err
|
2017-09-12 16:03:32 +00:00
|
|
|
case
|
|
|
|
codes.Aborted,
|
|
|
|
codes.DataLoss,
|
|
|
|
codes.DeadlineExceeded,
|
|
|
|
codes.Internal,
|
|
|
|
codes.Unavailable:
|
|
|
|
// non-fatal errors
|
2024-07-13 23:06:20 +00:00
|
|
|
log.Warn().Err(err).Msgf("grpc error: extend(): code: %v", status.Code(err))
|
2017-09-12 16:03:32 +00:00
|
|
|
default:
|
2024-07-13 23:06:20 +00:00
|
|
|
log.Error().Err(err).Msgf("grpc error: extend(): code: %v", status.Code(err))
|
2017-06-29 21:27:06 +00:00
|
|
|
return err
|
2017-06-28 17:21:22 +00:00
|
|
|
}
|
2023-08-18 13:13:13 +00:00
|
|
|
|
|
|
|
select {
|
|
|
|
case <-time.After(retry.NextBackOff()):
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
}
|
2017-06-28 17:21:22 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-08-21 16:30:19 +00:00
|
|
|
// Update updates the workflow state.
|
2024-08-30 09:44:56 +00:00
|
|
|
func (c *client) Update(ctx context.Context, workflowID string, state rpc.StepState) (err error) {
|
2023-08-18 13:13:13 +00:00
|
|
|
retry := c.newBackOff()
|
2017-06-28 17:21:22 +00:00
|
|
|
req := new(proto.UpdateRequest)
|
2024-08-30 09:44:56 +00:00
|
|
|
req.Id = workflowID
|
2024-07-01 09:20:55 +00:00
|
|
|
req.State = new(proto.StepState)
|
2024-01-09 14:39:09 +00:00
|
|
|
req.State.StepUuid = state.StepUUID
|
2024-07-01 09:20:55 +00:00
|
|
|
req.State.Started = state.Started
|
|
|
|
req.State.Finished = state.Finished
|
|
|
|
req.State.Exited = state.Exited
|
|
|
|
req.State.ExitCode = int32(state.ExitCode)
|
|
|
|
req.State.Error = state.Error
|
2017-06-28 17:21:22 +00:00
|
|
|
for {
|
|
|
|
_, err = c.client.Update(ctx, req)
|
|
|
|
if err == nil {
|
|
|
|
break
|
|
|
|
}
|
2023-03-18 19:35:27 +00:00
|
|
|
|
2024-01-10 19:57:12 +00:00
|
|
|
log.Error().Err(err).Msgf("grpc error: update(): code: %v", status.Code(err))
|
2023-03-18 19:35:27 +00:00
|
|
|
|
2021-09-24 14:29:26 +00:00
|
|
|
switch status.Code(err) {
|
2024-07-13 23:06:20 +00:00
|
|
|
case codes.Canceled:
|
|
|
|
if ctx.Err() != nil {
|
|
|
|
// expected as context was canceled
|
|
|
|
log.Debug().Err(err).Msgf("grpc error: update(): context canceled")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
log.Error().Err(err).Msgf("grpc error: update(): code: %v", status.Code(err))
|
|
|
|
return err
|
2017-09-12 16:03:32 +00:00
|
|
|
case
|
|
|
|
codes.Aborted,
|
|
|
|
codes.DataLoss,
|
|
|
|
codes.DeadlineExceeded,
|
|
|
|
codes.Internal,
|
|
|
|
codes.Unavailable:
|
|
|
|
// non-fatal errors
|
2024-07-13 23:06:20 +00:00
|
|
|
log.Warn().Err(err).Msgf("grpc error: update(): code: %v", status.Code(err))
|
2017-09-12 16:03:32 +00:00
|
|
|
default:
|
2024-07-13 23:06:20 +00:00
|
|
|
log.Error().Err(err).Msgf("grpc error: update(): code: %v", status.Code(err))
|
2017-06-29 21:27:06 +00:00
|
|
|
return err
|
2017-06-28 17:21:22 +00:00
|
|
|
}
|
2023-08-18 13:13:13 +00:00
|
|
|
|
|
|
|
select {
|
|
|
|
case <-time.After(retry.NextBackOff()):
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
}
|
2017-06-28 17:21:22 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-09-18 14:29:56 +00:00
|
|
|
// EnqueueLog queues the log entry to be written in a batch later.
|
|
|
|
func (c *client) EnqueueLog(logEntry *rpc.LogEntry) {
|
|
|
|
c.logs <- &proto.LogEntry{
|
|
|
|
StepUuid: logEntry.StepUUID,
|
|
|
|
Data: logEntry.Data,
|
|
|
|
Line: int32(logEntry.Line),
|
|
|
|
Time: logEntry.Time,
|
|
|
|
Type: int32(logEntry.Type),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *client) processLogs(ctx context.Context) {
|
|
|
|
var entries []*proto.LogEntry
|
|
|
|
var bytes int
|
|
|
|
|
|
|
|
send := func() {
|
|
|
|
if len(entries) == 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Debug().
|
|
|
|
Int("entries", len(entries)).
|
|
|
|
Int("bytes", bytes).
|
|
|
|
Msg("log drain: sending queued logs")
|
|
|
|
|
|
|
|
if err := c.sendLogs(ctx, entries); err != nil {
|
|
|
|
log.Error().Err(err).Msg("log drain: could not send logs to server")
|
|
|
|
}
|
|
|
|
|
|
|
|
// even if send failed, we don't have infinite memory; retry has already been used
|
|
|
|
entries = entries[:0]
|
|
|
|
bytes = 0
|
|
|
|
}
|
|
|
|
|
|
|
|
// ctx.Done() is covered by the log channel being closed
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case entry, ok := <-c.logs:
|
|
|
|
if !ok {
|
|
|
|
log.Info().Msg("log drain: channel closed")
|
|
|
|
send()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
entries = append(entries, entry)
|
|
|
|
bytes += grpcproto.Size(entry) // cspell:words grpcproto
|
|
|
|
|
|
|
|
if bytes >= maxLogBatchSize {
|
|
|
|
send()
|
|
|
|
}
|
|
|
|
|
|
|
|
case <-time.After(maxLogFlushPeriod):
|
|
|
|
send()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *client) sendLogs(ctx context.Context, entries []*proto.LogEntry) error {
|
|
|
|
req := &proto.LogRequest{LogEntries: entries}
|
2023-08-18 13:13:13 +00:00
|
|
|
retry := c.newBackOff()
|
2024-09-18 14:29:56 +00:00
|
|
|
|
2017-06-28 17:21:22 +00:00
|
|
|
for {
|
2024-09-18 14:29:56 +00:00
|
|
|
_, err := c.client.Log(ctx, req)
|
2017-06-28 17:21:22 +00:00
|
|
|
if err == nil {
|
|
|
|
break
|
|
|
|
}
|
2023-03-18 19:35:27 +00:00
|
|
|
|
2021-09-24 14:29:26 +00:00
|
|
|
switch status.Code(err) {
|
2024-07-13 23:06:20 +00:00
|
|
|
case codes.Canceled:
|
|
|
|
if ctx.Err() != nil {
|
|
|
|
// expected as context was canceled
|
|
|
|
log.Debug().Err(err).Msgf("grpc error: log(): context canceled")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
log.Error().Err(err).Msgf("grpc error: log(): code: %v", status.Code(err))
|
|
|
|
return err
|
2017-09-12 16:03:32 +00:00
|
|
|
case
|
|
|
|
codes.Aborted,
|
|
|
|
codes.DataLoss,
|
|
|
|
codes.DeadlineExceeded,
|
|
|
|
codes.Internal,
|
|
|
|
codes.Unavailable:
|
|
|
|
// non-fatal errors
|
2024-07-13 23:06:20 +00:00
|
|
|
log.Warn().Err(err).Msgf("grpc error: log(): code: %v", status.Code(err))
|
2017-09-12 16:03:32 +00:00
|
|
|
default:
|
2024-07-13 23:06:20 +00:00
|
|
|
log.Error().Err(err).Msgf("grpc error: log(): code: %v", status.Code(err))
|
2017-06-29 21:27:06 +00:00
|
|
|
return err
|
2017-06-28 17:21:22 +00:00
|
|
|
}
|
2023-08-18 13:13:13 +00:00
|
|
|
|
|
|
|
select {
|
|
|
|
case <-time.After(retry.NextBackOff()):
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
}
|
2017-06-28 17:21:22 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
2023-01-28 13:13:04 +00:00
|
|
|
|
2024-10-06 15:13:41 +00:00
|
|
|
func (c *client) RegisterAgent(ctx context.Context, info rpc.AgentInfo) (int64, error) {
|
2023-01-28 13:13:04 +00:00
|
|
|
req := new(proto.RegisterAgentRequest)
|
2024-10-06 15:13:41 +00:00
|
|
|
req.Info = &proto.AgentInfo{
|
|
|
|
Platform: info.Platform,
|
|
|
|
Backend: info.Backend,
|
|
|
|
Version: info.Version,
|
|
|
|
Capacity: int32(info.Capacity),
|
|
|
|
CustomLabels: info.CustomLabels,
|
|
|
|
}
|
2023-01-28 13:13:04 +00:00
|
|
|
|
|
|
|
res, err := c.client.RegisterAgent(ctx, req)
|
|
|
|
return res.GetAgentId(), err
|
|
|
|
}
|
|
|
|
|
2023-11-01 23:53:47 +00:00
|
|
|
func (c *client) UnregisterAgent(ctx context.Context) error {
|
|
|
|
_, err := c.client.UnregisterAgent(ctx, &proto.Empty{})
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-01-28 13:13:04 +00:00
|
|
|
func (c *client) ReportHealth(ctx context.Context) (err error) {
|
2023-08-18 13:13:13 +00:00
|
|
|
retry := c.newBackOff()
|
2023-01-28 13:13:04 +00:00
|
|
|
req := new(proto.ReportHealthRequest)
|
|
|
|
req.Status = "I am alive!"
|
|
|
|
|
|
|
|
for {
|
|
|
|
_, err = c.client.ReportHealth(ctx, req)
|
|
|
|
if err == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
switch status.Code(err) {
|
2024-07-13 23:06:20 +00:00
|
|
|
case codes.Canceled:
|
|
|
|
if ctx.Err() != nil {
|
|
|
|
// expected as context was canceled
|
|
|
|
log.Debug().Err(err).Msgf("grpc error: report_health(): context canceled")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
log.Error().Err(err).Msgf("grpc error: report_health(): code: %v", status.Code(err))
|
|
|
|
return err
|
2023-01-28 13:13:04 +00:00
|
|
|
case
|
|
|
|
codes.Aborted,
|
|
|
|
codes.DataLoss,
|
|
|
|
codes.DeadlineExceeded,
|
|
|
|
codes.Internal,
|
|
|
|
codes.Unavailable:
|
|
|
|
// non-fatal errors
|
2024-07-13 23:06:20 +00:00
|
|
|
log.Warn().Err(err).Msgf("grpc error: report_health(): code: %v", status.Code(err))
|
2023-01-28 13:13:04 +00:00
|
|
|
default:
|
2024-07-13 23:06:20 +00:00
|
|
|
log.Error().Err(err).Msgf("grpc error: report_health(): code: %v", status.Code(err))
|
2023-01-28 13:13:04 +00:00
|
|
|
return err
|
|
|
|
}
|
2023-08-18 13:13:13 +00:00
|
|
|
|
|
|
|
select {
|
|
|
|
case <-time.After(retry.NextBackOff()):
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
}
|
2023-01-28 13:13:04 +00:00
|
|
|
}
|
|
|
|
}
|