diff --git a/pkg/reboot/calculator.go b/pkg/reboot/calculator.go index 61890ed1..c969310a 100644 --- a/pkg/reboot/calculator.go +++ b/pkg/reboot/calculator.go @@ -60,7 +60,13 @@ func (r *calculator) GetRebootDuration(k8sClient client.Client, ctx context.Cont return 0, errors.New("SelfNodeRemediationConfig not set yet, can't calculate minimum reboot duration") } - watchdogTimeout := utils.GetWatchdogTimeout(node) + watchdogTimeout, err := utils.GetWatchdogTimeout(node) + if err != nil { + // 60s is the maximum default watchdog timeout according to https://docs.kernel.org/watchdog/watchdog-parameters.html + defaultWatchdogTimeout := 60 * time.Second + r.log.Error(err, "failed to get watchdog timeout from node annotations, will use the default timeout", "node", node.Name, "default timeout in seconds", defaultWatchdogTimeout.Seconds()) + watchdogTimeout = defaultWatchdogTimeout + } minimumCalculatedRebootDuration, err := r.calculateMinimumRebootDuration(k8sClient, ctx, watchdogTimeout) if err != nil { return 0, errors.Wrap(err, "failed to calculate minimum reboot duration") diff --git a/pkg/utils/annotations.go b/pkg/utils/annotations.go index 47db7336..327ebffb 100644 --- a/pkg/utils/annotations.go +++ b/pkg/utils/annotations.go @@ -72,15 +72,15 @@ func IsSoftwareRebootEnabled() (bool, error) { return softwareRebootEnabled, nil } -func GetWatchdogTimeout(node *v1.Node) time.Duration { +func GetWatchdogTimeout(node *v1.Node) (time.Duration, error) { if node.Annotations == nil { - return 0 + return 0, errors.New("node has no annotations") } timeout, err := strconv.Atoi(node.Annotations[WatchdogTimeoutSecondsAnnotation]) if err != nil { - return 0 + return 0, errors.Wrapf(err, "failed to convert watchdog timeout to int. value is: %s", node.Annotations[WatchdogTimeoutSecondsAnnotation]) } - return time.Duration(timeout) * time.Second + return time.Duration(timeout) * time.Second, nil }