Skip to content

Commit

Permalink
update-agent: Added reboot-wait parameter
Browse files Browse the repository at this point in the history
This adds an reboot-wait parameter, which waits, after the last pod was terminated, an
fixed amount of time to finalize operations before reboot. This solves some problems
this storage provisioners like rook.
  • Loading branch information
Johann Wagner authored and Jasper-Ben committed Feb 11, 2021
1 parent eb213a8 commit 43632a2
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 3 deletions.
6 changes: 5 additions & 1 deletion cmd/update-agent/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ var (

reapTimeout = flag.Int("grace-period", 600,
"Period of time in seconds given to a pod to terminate when rebooting for an update")
rebootWait = flag.Int("reboot-wait", 0,
"Period of time in seconds waiting after last pod deletion for reboot")
)

func main() {
Expand All @@ -44,8 +46,10 @@ func main() {
}

rt := time.Duration(*reapTimeout) * time.Second
rw := time.Duration(*rebootWait) * time.Second

a, err := agent.New(*node, rt)
klog.Infof("Waiting %v for reboot", rw)
a, err := agent.New(*node, rt, rw)
if err != nil {
klog.Fatalf("Failed to initialize %s: %v", os.Args[0], err)
}
Expand Down
9 changes: 7 additions & 2 deletions pkg/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type Klocksmith struct {
ue *updateengine.Client
lc *login1.Conn
reapTimeout time.Duration
rebootWait time.Duration
}

const (
Expand All @@ -48,7 +49,7 @@ var shouldRebootSelector = fields.Set(map[string]string{
}).AsSelector()

// New returns initialized Klocksmith.
func New(node string, reapTimeout time.Duration) (*Klocksmith, error) {
func New(node string, reapTimeout time.Duration, rebootWait time.Duration) (*Klocksmith, error) {
// Set up kubernetes in-cluster client.
kc, err := k8sutil.GetClient("")
if err != nil {
Expand All @@ -70,7 +71,7 @@ func New(node string, reapTimeout time.Duration) (*Klocksmith, error) {
return nil, fmt.Errorf("error establishing connection to logind dbus: %w", err)
}

return &Klocksmith{node, kc, nc, ue, lc, reapTimeout}, nil
return &Klocksmith{node, kc, nc, ue, lc, reapTimeout, rebootWait}, nil
}

// Run starts the agent to listen for an update_engine reboot signal and react
Expand Down Expand Up @@ -249,6 +250,10 @@ func (k *Klocksmith) process(stop <-chan struct{}) error {

wg.Wait()

// We wait a little bit more time to perform finalizing operations
// This solves problems with some storage provisioners like rook.
klog.Infof("Waiting for finalizing operations, waiting %v", k.rebootWait)

klog.Info("Node drained, rebooting")

// Reboot.
Expand Down

0 comments on commit 43632a2

Please sign in to comment.