From c7467b982895740778a002421b2125eae14bc63f Mon Sep 17 00:00:00 2001 From: Fernando Barbosa Date: Mon, 5 Feb 2024 18:46:14 -0300 Subject: [PATCH] fix: agent panic when node is terminated during step execution (#3331) Fixes https://github.com/woodpecker-ci/woodpecker/issues/3330 This adds error handling on the agent's WaitStep function, on two sections where it could encounter a `panic: runtime error: invalid memory address or nil pointer dereference` in case it could no longer access complete information about a specific pod. This error was found to happen if the node in which the pod was running was terminated during the step's execution. spite active pipelines being executed on the node. Now instead of a panic on the agent's logs and undefined behavior on the UI it will display a more helpful error message on the UI. ### Additional context We observed the bug first on v2.1.1, but tested the fix internally on top of 2.3.0. ![image](https://github.com/woodpecker-ci/woodpecker/assets/7269710/dfbcf089-85f7-4b5d-8102-f21af95c5cda) --- pipeline/backend/kubernetes/kubernetes.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/pipeline/backend/kubernetes/kubernetes.go b/pipeline/backend/kubernetes/kubernetes.go index 10c0ec841..cf24c84b3 100644 --- a/pipeline/backend/kubernetes/kubernetes.go +++ b/pipeline/backend/kubernetes/kubernetes.go @@ -263,11 +263,23 @@ func (e *kube) WaitStep(ctx context.Context, step *types.Step, taskUUID string) } if isImagePullBackOffState(pod) { - return nil, fmt.Errorf("could not pull image for pod %s", pod.Name) + return nil, fmt.Errorf("could not pull image for pod %s", podName) + } + + if len(pod.Status.ContainerStatuses) == 0 { + return nil, fmt.Errorf("no container statuses found for pod %s", podName) + } + + cs := pod.Status.ContainerStatuses[0] + + if cs.State.Terminated == nil { + err := fmt.Errorf("no terminated state found for container %s/%s", podName, cs.Name) + log.Error().Str("taskUUID", taskUUID).Str("pod", podName).Str("container", cs.Name).Interface("state", cs.State).Msg(err.Error()) + return nil, err } bs := &types.State{ - ExitCode: int(pod.Status.ContainerStatuses[0].State.Terminated.ExitCode), + ExitCode: int(cs.State.Terminated.ExitCode), Exited: true, OOMKilled: false, }