From 43632a2a510cea8ec4f82fbf1bc85fc4b737e569 Mon Sep 17 00:00:00 2001 From: Johann Wagner Date: Wed, 19 Jun 2019 12:28:17 +0200 Subject: [PATCH] update-agent: Added reboot-wait parameter This adds an reboot-wait parameter, which waits, after the last pod was terminated, an fixed amount of time to finalize operations before reboot. This solves some problems this storage provisioners like rook. --- cmd/update-agent/main.go | 6 +++++- pkg/agent/agent.go | 9 +++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/cmd/update-agent/main.go b/cmd/update-agent/main.go index 8bf046a6b..e7dc3b371 100644 --- a/cmd/update-agent/main.go +++ b/cmd/update-agent/main.go @@ -19,6 +19,8 @@ var ( reapTimeout = flag.Int("grace-period", 600, "Period of time in seconds given to a pod to terminate when rebooting for an update") + rebootWait = flag.Int("reboot-wait", 0, + "Period of time in seconds waiting after last pod deletion for reboot") ) func main() { @@ -44,8 +46,10 @@ func main() { } rt := time.Duration(*reapTimeout) * time.Second + rw := time.Duration(*rebootWait) * time.Second - a, err := agent.New(*node, rt) + klog.Infof("Waiting %v for reboot", rw) + a, err := agent.New(*node, rt, rw) if err != nil { klog.Fatalf("Failed to initialize %s: %v", os.Args[0], err) } diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index 4e3dc1aeb..fca22a564 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -35,6 +35,7 @@ type Klocksmith struct { ue *updateengine.Client lc *login1.Conn reapTimeout time.Duration + rebootWait time.Duration } const ( @@ -48,7 +49,7 @@ var shouldRebootSelector = fields.Set(map[string]string{ }).AsSelector() // New returns initialized Klocksmith. -func New(node string, reapTimeout time.Duration) (*Klocksmith, error) { +func New(node string, reapTimeout time.Duration, rebootWait time.Duration) (*Klocksmith, error) { // Set up kubernetes in-cluster client. kc, err := k8sutil.GetClient("") if err != nil { @@ -70,7 +71,7 @@ func New(node string, reapTimeout time.Duration) (*Klocksmith, error) { return nil, fmt.Errorf("error establishing connection to logind dbus: %w", err) } - return &Klocksmith{node, kc, nc, ue, lc, reapTimeout}, nil + return &Klocksmith{node, kc, nc, ue, lc, reapTimeout, rebootWait}, nil } // Run starts the agent to listen for an update_engine reboot signal and react @@ -249,6 +250,10 @@ func (k *Klocksmith) process(stop <-chan struct{}) error { wg.Wait() + // We wait a little bit more time to perform finalizing operations + // This solves problems with some storage provisioners like rook. + klog.Infof("Waiting for finalizing operations, waiting %v", k.rebootWait) + klog.Info("Node drained, rebooting") // Reboot.