From 61b567205140d049d510c9414aede9c89e28616d Mon Sep 17 00:00:00 2001 From: Kleber Rocha Date: Tue, 22 Aug 2023 17:34:59 -0300 Subject: [PATCH] Add option to configure tolerations in kubernetes backend (#2249) This code add a feature to support tolerations in Kubernetes Backend --------- Signed-off-by: Kleber Rocha --- .../22-backends/40-kubernetes.md | 11 ++++++++ pipeline/backend/kubernetes/pod.go | 17 +++++++++++++ pipeline/backend/types/backend_kubernetes.go | 25 +++++++++++++++++++ pipeline/frontend/yaml/compiler/convert.go | 12 +++++++++ .../frontend/yaml/types/backend_options.go | 24 ++++++++++++++++++ 5 files changed, 89 insertions(+) diff --git a/docs/docs/30-administration/22-backends/40-kubernetes.md b/docs/docs/30-administration/22-backends/40-kubernetes.md index 20c84a022..21182e630 100644 --- a/docs/docs/30-administration/22-backends/40-kubernetes.md +++ b/docs/docs/30-administration/22-backends/40-kubernetes.md @@ -84,6 +84,11 @@ Specify the label which is used to select the node where the job should be execu By default the pod will use "kubernetes.io/arch" inferred from top-level "platform" setting which is deducted from the agents' environment variable CI_SYSTEM_PLATFORM. To overwrite this, you need to specify this label in the nodeSelector section. See the [kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector) for more information on using nodeSelector. +### tolerations + +When you use nodeSelector and the node pool is configured with Taints, you need to specify the Tolerations. Tolerations allow the scheduler to schedule pods with matching taints. +See the [kubernetes documentation](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) for more information on using tolerations. + Example pipeline configuration: ```yaml @@ -105,6 +110,12 @@ steps: memory: 256Mi nodeSelector: beta.kubernetes.io/instance-type: p3.8xlarge + tolerations: + - key: "key1" + operator: "Equal" + value: "value1" + effect: "NoSchedule" + tolerationSeconds: 3600 ``` ### Volumes diff --git a/pipeline/backend/kubernetes/pod.go b/pipeline/backend/kubernetes/pod.go index da2a3e58a..fd7b42d4b 100644 --- a/pipeline/backend/kubernetes/pod.go +++ b/pipeline/backend/kubernetes/pod.go @@ -125,6 +125,22 @@ func Pod(namespace string, step *types.Step, labels, annotations map[string]stri } } + var tolerations []v1.Toleration + beTolerations := step.BackendOptions.Kubernetes.Tolerations + if len(beTolerations) > 0 { + for _, t := range step.BackendOptions.Kubernetes.Tolerations { + toleration := v1.Toleration{ + Key: t.Key, + Operator: v1.TolerationOperator(t.Operator), + Value: t.Value, + Effect: v1.TaintEffect(t.Effect), + TolerationSeconds: t.TolerationSeconds, + } + tolerations = append(tolerations, toleration) + } + log.Trace().Msgf("Tolerations that will be used in the backend options: %v", beTolerations) + } + pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: podName, @@ -136,6 +152,7 @@ func Pod(namespace string, step *types.Step, labels, annotations map[string]stri RestartPolicy: v1.RestartPolicyNever, HostAliases: hostAliases, NodeSelector: nodeSelector, + Tolerations: tolerations, ServiceAccountName: serviceAccountName, Containers: []v1.Container{{ Name: podName, diff --git a/pipeline/backend/types/backend_kubernetes.go b/pipeline/backend/types/backend_kubernetes.go index b7cf36586..0b5947108 100644 --- a/pipeline/backend/types/backend_kubernetes.go +++ b/pipeline/backend/types/backend_kubernetes.go @@ -19,6 +19,7 @@ type KubernetesBackendOptions struct { Resources Resources `json:"resouces,omitempty"` ServiceAccountName string `json:"serviceAccountName,omitempty"` NodeSelector map[string]string `json:"nodeSelector,omitempty"` + Tolerations []Toleration `json:"tolerations,omitempty"` } // Resources defines two maps for kubernetes resource definitions @@ -26,3 +27,27 @@ type Resources struct { Requests map[string]string `json:"requests,omitempty"` Limits map[string]string `json:"limits,omitempty"` } + +// Defines Kubernetes toleration +type Toleration struct { + Key string `json:"key,omitempty"` + Operator TolerationOperator `json:"operator,omitempty"` + Value string `json:"value,omitempty"` + Effect TaintEffect `json:"effect,omitempty"` + TolerationSeconds *int64 `json:"tolerationSeconds,omitempty"` +} + +type TaintEffect string + +const ( + TaintEffectNoSchedule TaintEffect = "NoSchedule" + TaintEffectPreferNoSchedule TaintEffect = "PreferNoSchedule" + TaintEffectNoExecute TaintEffect = "NoExecute" +) + +type TolerationOperator string + +const ( + TolerationOpExists TolerationOperator = "Exists" + TolerationOpEqual TolerationOperator = "Equal" +) diff --git a/pipeline/frontend/yaml/compiler/convert.go b/pipeline/frontend/yaml/compiler/convert.go index fbf576f5e..4fb10489b 100644 --- a/pipeline/frontend/yaml/compiler/convert.go +++ b/pipeline/frontend/yaml/compiler/convert.go @@ -116,6 +116,17 @@ func (c *Compiler) createProcess(name string, container *yaml_types.Container, s } } + var tolerations []backend_types.Toleration + for _, t := range container.BackendOptions.Kubernetes.Tolerations { + tolerations = append(tolerations, backend_types.Toleration{ + Key: t.Key, + Operator: backend_types.TolerationOperator(t.Operator), + Value: t.Value, + Effect: backend_types.TaintEffect(t.Effect), + TolerationSeconds: t.TolerationSeconds, + }) + } + // Kubernetes advanced settings backendOptions := backend_types.BackendOptions{ Kubernetes: backend_types.KubernetesBackendOptions{ @@ -125,6 +136,7 @@ func (c *Compiler) createProcess(name string, container *yaml_types.Container, s }, ServiceAccountName: container.BackendOptions.Kubernetes.ServiceAccountName, NodeSelector: container.BackendOptions.Kubernetes.NodeSelector, + Tolerations: tolerations, }, } diff --git a/pipeline/frontend/yaml/types/backend_options.go b/pipeline/frontend/yaml/types/backend_options.go index b99999798..650b7de16 100644 --- a/pipeline/frontend/yaml/types/backend_options.go +++ b/pipeline/frontend/yaml/types/backend_options.go @@ -23,9 +23,33 @@ type KubernetesBackendOptions struct { Resources Resources `yaml:"resources,omitempty"` ServiceAccountName string `yaml:"serviceAccountName,omitempty"` NodeSelector map[string]string `yaml:"nodeSelector,omitempty"` + Tolerations []Toleration `yaml:"tolerations,omitempty"` } type Resources struct { Requests map[string]string `yaml:"requests,omitempty"` Limits map[string]string `yaml:"limits,omitempty"` } + +type Toleration struct { + Key string `yaml:"key,omitempty"` + Operator TolerationOperator `yaml:"operator,omitempty"` + Value string `yaml:"value,omitempty"` + Effect TaintEffect `yaml:"effect,omitempty"` + TolerationSeconds *int64 `yaml:"tolerationSeconds,omitempty"` +} + +type TaintEffect string + +const ( + TaintEffectNoSchedule TaintEffect = "NoSchedule" + TaintEffectPreferNoSchedule TaintEffect = "PreferNoSchedule" + TaintEffectNoExecute TaintEffect = "NoExecute" +) + +type TolerationOperator string + +const ( + TolerationOpExists TolerationOperator = "Exists" + TolerationOpEqual TolerationOperator = "Equal" +)