From 09078b4e23f17b313a8039ec7a2f9aea1d705f1c Mon Sep 17 00:00:00 2001 From: tnsimon Date: Tue, 26 Nov 2024 22:20:28 +1100 Subject: [PATCH] feat: add node autonomy duration to lifecycle controller (#2201) * feat: add node autonomy duration to lifecycle controller Co-authored-by: Simon Tien --- pkg/projectinfo/projectinfo.go | 5 + .../controller/util/node/controller_utils.go | 16 ++- .../podbinding/pod_binding_controller.go | 55 ++++++-- .../podbinding/pod_binding_controller_test.go | 122 ++++++++++++++++++ 4 files changed, 185 insertions(+), 13 deletions(-) diff --git a/pkg/projectinfo/projectinfo.go b/pkg/projectinfo/projectinfo.go index fa83f5d4127..0142dd7f1d5 100644 --- a/pkg/projectinfo/projectinfo.go +++ b/pkg/projectinfo/projectinfo.go @@ -101,6 +101,11 @@ func GetAutonomyAnnotation() string { return fmt.Sprintf("node.beta.%s/autonomy", labelPrefix) } +// GetNodeAutonomyDurationAnnotation returns annotation key for node autonomy duration +func GetNodeAutonomyDurationAnnotation() string { + return fmt.Sprintf("node.%s/autonomy-duration", labelPrefix) +} + // normalizeGitCommit reserve 7 characters for gitCommit func normalizeGitCommit(commit string) string { if len(commit) > 7 { diff --git a/pkg/yurtmanager/controller/util/node/controller_utils.go b/pkg/yurtmanager/controller/util/node/controller_utils.go index 825db699007..fb9ad0eb48b 100644 --- a/pkg/yurtmanager/controller/util/node/controller_utils.go +++ b/pkg/yurtmanager/controller/util/node/controller_utils.go @@ -447,12 +447,18 @@ func addOrUpdateLabelsOnNode(kubeClient clientset.Interface, nodeName string, la }) } +// IsPodBoundenToNode checks if the pod is bound to the node based on annotations. +// If the pod is bound to the node, it will return true; otherwise, it will return false. +// The pod is bound to the node if the pod has the following annotations: +// - apps.openyurt.io/binding: "true" +// - openyurt.beta.io/autonomy: "true" +// - openyurt.io/autonomy-duration: "duration" func IsPodBoundenToNode(node *v1.Node) bool { - if node.Annotations != nil && - (node.Annotations[projectinfo.GetAutonomyAnnotation()] == "true" || - node.Annotations[PodBindingAnnotation] == "true") { - return true + if node.Annotations == nil { + return false } - return false + return node.Annotations[PodBindingAnnotation] == "true" || + node.Annotations[projectinfo.GetAutonomyAnnotation()] == "true" || + node.Annotations[projectinfo.GetNodeAutonomyDurationAnnotation()] != "" } diff --git a/pkg/yurtmanager/controller/yurtcoordinator/podbinding/pod_binding_controller.go b/pkg/yurtmanager/controller/yurtcoordinator/podbinding/pod_binding_controller.go index 1de42d01328..d8eea09e720 100644 --- a/pkg/yurtmanager/controller/yurtcoordinator/podbinding/pod_binding_controller.go +++ b/pkg/yurtmanager/controller/yurtcoordinator/podbinding/pod_binding_controller.go @@ -19,6 +19,7 @@ package podbinding import ( "context" "fmt" + "time" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -34,12 +35,16 @@ import ( yurtClient "github.com/openyurtio/openyurt/cmd/yurt-manager/app/client" appconfig "github.com/openyurtio/openyurt/cmd/yurt-manager/app/config" "github.com/openyurtio/openyurt/cmd/yurt-manager/names" + "github.com/openyurtio/openyurt/pkg/projectinfo" nodeutil "github.com/openyurtio/openyurt/pkg/yurtmanager/controller/util/node" ) +const ( + defaultTolerationSeconds int64 = 300 +) + var ( - controllerKind = appsv1.SchemeGroupVersion.WithKind("Node") - defaultTolerationSeconds = 300 + controllerKind = appsv1.SchemeGroupVersion.WithKind("Node") notReadyToleration = corev1.Toleration{ Key: corev1.TaintNodeNotReady, @@ -149,12 +154,8 @@ func (r *ReconcilePodBinding) processNode(node *corev1.Node) error { // pod binding takes precedence against node autonomy if nodeutil.IsPodBoundenToNode(node) { - if err := r.configureTolerationForPod(pod, nil); err != nil { - klog.Errorf(Format("could not configure toleration of pod, %v", err)) - } - } else { - tolerationSeconds := int64(defaultTolerationSeconds) - if err := r.configureTolerationForPod(pod, &tolerationSeconds); err != nil { + durationSeconds := getPodTolerationSeconds(node) + if err := r.configureTolerationForPod(pod, durationSeconds); err != nil { klog.Errorf(Format("could not configure toleration of pod, %v", err)) } } @@ -247,3 +248,41 @@ func addOrUpdateTolerationInPodSpec(spec *corev1.PodSpec, toleration *corev1.Tol spec.Tolerations = newTolerations return true } + +// getPodTolerationSeconds returns the tolerationSeconds for the pod on the node. +// The tolerationSeconds is calculated based on the following rules: +// 1. The default tolerationSeconds is 300 if node autonomy and autonomy duration are not set. +// 2. Node autonomy is set, the tolerationSeconds is nil. +// 3. If the node has node autonomy duration annotation, the tolerationSeconds is the duration. +// 4. If the autonomy duration is parsed as 0, the tolerationSeconds is nil which means the pod will not be evicted. +func getPodTolerationSeconds(node *corev1.Node) *int64 { + tolerationSeconds := defaultTolerationSeconds + if len(node.Annotations) == 0 { + return &tolerationSeconds + } + + // Pod binding takes precedence against node autonomy + if node.Annotations[nodeutil.PodBindingAnnotation] == "true" || + node.Annotations[projectinfo.GetAutonomyAnnotation()] == "true" { + return nil + } + + // Node autonomy duration has the least precedence + duration, ok := node.Annotations[projectinfo.GetNodeAutonomyDurationAnnotation()] + if !ok { + return &tolerationSeconds + } + + durationTime, err := time.ParseDuration(duration) + if err != nil { + klog.Errorf(Format("could not parse duration %s, %v", duration, err)) + return nil + } + + if durationTime == 0 { + return nil + } + + tolerationSeconds = int64(durationTime.Seconds()) + return &tolerationSeconds +} diff --git a/pkg/yurtmanager/controller/yurtcoordinator/podbinding/pod_binding_controller_test.go b/pkg/yurtmanager/controller/yurtcoordinator/podbinding/pod_binding_controller_test.go index 42be82c65aa..df1840d4fd6 100644 --- a/pkg/yurtmanager/controller/yurtcoordinator/podbinding/pod_binding_controller_test.go +++ b/pkg/yurtmanager/controller/yurtcoordinator/podbinding/pod_binding_controller_test.go @@ -61,6 +61,44 @@ func prepareNodes() []client.Object { }, }, }, + &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node4", + Annotations: map[string]string{ + "node.openyurt.io/autonomy-duration": "0", + }, + }, + }, + &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node5", + Annotations: map[string]string{ + "node.openyurt.io/autonomy-duration": "2h", + }, + }, + }, + &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node6", + Annotations: map[string]string{ + "node.openyurt.io/autonomy-duration": "", + }, + }, + }, + &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node7", + Annotations: map[string]string{}, + }, + }, + &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node8", + Annotations: map[string]string{ + "other.annotation": "true", + }, + }, + }, } return nodes } @@ -403,6 +441,31 @@ func TestIsPodBoundenToNode(t *testing.T) { node: nodes[2].(*corev1.Node), want: true, }, + { + name: "node4", + node: nodes[3].(*corev1.Node), + want: true, + }, + { + name: "node5", + node: nodes[4].(*corev1.Node), + want: true, + }, + { + name: "node6", + node: nodes[5].(*corev1.Node), + want: false, + }, + { + name: "node7", + node: nodes[6].(*corev1.Node), + want: false, + }, + { + name: "node8", + node: nodes[7].(*corev1.Node), + want: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -412,3 +475,62 @@ func TestIsPodBoundenToNode(t *testing.T) { }) } } + +func TestGetPodTolerationSeconds(t *testing.T) { + expectedToleration := int64(7200) + defaultTolerationSeconds := int64(300) + nodes := prepareNodes() + tests := []struct { + name string + node *corev1.Node + want *int64 + }{ + { + name: "node1", + node: nodes[0].(*corev1.Node), + want: &defaultTolerationSeconds, + }, + { + name: "node2", + node: nodes[1].(*corev1.Node), + want: nil, + }, + { + name: "node3", + node: nodes[2].(*corev1.Node), + want: nil, + }, + { + name: "node4", + node: nodes[3].(*corev1.Node), + want: nil, + }, + { + name: "node5", + node: nodes[4].(*corev1.Node), + want: &expectedToleration, + }, + { + name: "node6", + node: nodes[5].(*corev1.Node), + want: nil, + }, + { + name: "node7", + node: nodes[6].(*corev1.Node), + want: &defaultTolerationSeconds, + }, + { + name: "node8", + node: nodes[7].(*corev1.Node), + want: &defaultTolerationSeconds, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := getPodTolerationSeconds(tt.node); !reflect.DeepEqual(got, tt.want) { + t.Errorf("getPodTolerationSeconds() = %v, want %v", got, tt.want) + } + }) + } +}