Skip to content

Commit

Permalink
Update checkpoint/restore support to match docker/master
Browse files Browse the repository at this point in the history
Docker-DCO-1.1-Signed-off-by: Ross Boucher <[email protected]> (github: boucher)
  • Loading branch information
boucher committed May 26, 2015
1 parent 9053cee commit fc69937
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 143 deletions.
4 changes: 2 additions & 2 deletions daemon/execdriver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ type Driver interface {
Kill(c *Command, sig int) error
Pause(c *Command) error
Unpause(c *Command) error
Checkpoint(c *Command) error
Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback) (int, error)
Checkpoint(c *Command, opts *libcontainer.CriuOpts) error
Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback, opts *libcontainer.CriuOpts, forceRestore bool) (ExitStatus, error)
Name() string // Driver name
Info(id string) Info // "temporary" hack (until we move state from core to plugins)
GetPidsForContainer(id string) ([]int, error) // Returns a list of pids for the given container.
Expand Down
6 changes: 3 additions & 3 deletions daemon/execdriver/lxc/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -547,12 +547,12 @@ func (d *driver) Unpause(c *execdriver.Command) error {
return err
}

func (d *driver) Checkpoint(c *execdriver.Command) error {
func (d *driver) Checkpoint(c *execdriver.Command, opts *libcontainer.CriuOpts) error {
return fmt.Errorf("Checkpointing lxc containers not supported yet\n")
}

func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) {
return 0, fmt.Errorf("Restoring lxc containers not supported yet\n")
func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *libcontainer.CriuOpts, forceRestore bool) (execdriver.ExitStatus, error) {
return execdriver.ExitStatus{ExitCode: 0}, fmt.Errorf("Restoring lxc containers not supported yet\n")
}

func (d *driver) Terminate(c *execdriver.Command) error {
Expand Down
19 changes: 0 additions & 19 deletions daemon/execdriver/native/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ package native

import (
"errors"
"encoding/json"
"fmt"
"net"
"strings"
Expand Down Expand Up @@ -89,24 +88,6 @@ func generateIfaceName() (string, error) {
return "", errors.New("Failed to find name for new interface")
}

// Re-create the container type from the image that was saved during checkpoint.
func (d *driver) createRestoreContainer(c *execdriver.Command, imageDir string) (*libcontainer.Config, error) {
// Read the container.json.
f1, err := os.Open(filepath.Join(imageDir, "container.json"))
if err != nil {
return nil, err
}
defer f1.Close()

var container *libcontainer.Config
err = json.NewDecoder(f1).Decode(&container)
if err != nil {
return nil, err
}

return container, nil
}

func (d *driver) createNetwork(container *configs.Config, c *execdriver.Command) error {
if c.Network.ContainerID != "" {
d.Lock()
Expand Down
183 changes: 64 additions & 119 deletions daemon/execdriver/native/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"github.com/docker/docker/pkg/reexec"
sysinfo "github.com/docker/docker/pkg/system"
"github.com/docker/docker/pkg/term"
"github.com/docker/docker/utils"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/apparmor"
"github.com/docker/libcontainer/cgroups/systemd"
Expand Down Expand Up @@ -275,153 +274,99 @@ func (d *driver) Unpause(c *execdriver.Command) error {
return active.Resume()
}

// XXX Where is the right place for the following
// const and getCheckpointImageDir() function?
const (
containersDir = "/var/lib/docker/containers"
criuImgDir = "criu_img"
)

func getCheckpointImageDir(containerId string) string {
return filepath.Join(containersDir, containerId, criuImgDir)
}

func (d *driver) Checkpoint(c *execdriver.Command) error {
func (d *driver) Checkpoint(c *execdriver.Command, opts *libcontainer.CriuOpts) error {
active := d.activeContainers[c.ID]
if active == nil {
return fmt.Errorf("active container for %s does not exist", c.ID)
}
container := active.container

// Create an image directory for this container (which
// may already exist from a previous checkpoint).
imageDir := getCheckpointImageDir(c.ID)
err := os.MkdirAll(imageDir, 0700)
if err != nil && !os.IsExist(err) {
return err
}

// Copy container.json and state.json files to the CRIU
// image directory for later use during restore. Do this
// before checkpointing because after checkpoint the container
// will exit and these files will be removed.
log.CRDbg("saving container.json and state.json before calling CRIU in %s", imageDir)
srcFiles := []string{"container.json", "state.json"}
for _, f := range srcFiles {
srcFile := filepath.Join(d.root, c.ID, f)
dstFile := filepath.Join(imageDir, f)
if _, err := utils.CopyFile(srcFile, dstFile); err != nil {
return err
}
}

d.Lock()
defer d.Unlock()
err = namespaces.Checkpoint(container, imageDir, c.ProcessConfig.Process.Pid)
err := active.Checkpoint(opts)
if err != nil {
return err
}

return nil
}

type restoreOutput struct {
exitCode int
err error
}
func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback, opts *libcontainer.CriuOpts, forceRestore bool) (execdriver.ExitStatus, error) {
var (
cont libcontainer.Container
err error
)

func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) {
imageDir := getCheckpointImageDir(c.ID)
container, err := d.createRestoreContainer(c, imageDir)
cont, err = d.factory.Load(c.ID)
if err != nil {
return 1, err
if forceRestore {
var config *configs.Config
config, err = d.createContainer(c)
if err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
cont, err = d.factory.Create(c.ID, config)
if err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
} else {
return execdriver.ExitStatus{ExitCode: -1}, err
}
}

var term execdriver.Terminal

if c.ProcessConfig.Tty {
term, err = NewTtyConsole(&c.ProcessConfig, pipes)
} else {
term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes)
p := &libcontainer.Process{
Args: append([]string{c.ProcessConfig.Entrypoint}, c.ProcessConfig.Arguments...),
Env: c.ProcessConfig.Env,
Cwd: c.WorkingDir,
User: c.ProcessConfig.User,
}
if err != nil {
return -1, err

config := cont.Config()
if err := setupPipes(&config, &c.ProcessConfig, p, pipes); err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}
c.ProcessConfig.Terminal = term

d.Lock()
d.activeContainers[c.ID] = &activeContainer{
container: container,
cmd: &c.ProcessConfig.Cmd,
}
d.activeContainers[c.ID] = cont
d.Unlock()
defer d.cleanContainer(c.ID)
defer func() {
cont.Destroy()
d.cleanContainer(c.ID)
}()

// Since the CRIU binary exits after restoring the container, we
// need to reap its child by setting PR_SET_CHILD_SUBREAPER (36)
// so that it'll be owned by this process (Docker daemon) after restore.
//
// XXX This really belongs to where the Docker daemon starts.
if _, _, syserr := syscall.RawSyscall(syscall.SYS_PRCTL, 36, 1, 0); syserr != 0 {
return -1, fmt.Errorf("Could not set PR_SET_CHILD_SUBREAPER (syserr %d)", syserr)
if err := cont.Restore(p, opts); err != nil {
return execdriver.ExitStatus{ExitCode: -1}, err
}

restoreOutputChan := make(chan restoreOutput, 1)
waitForRestore := make(chan struct{})

go func() {
exitCode, err := namespaces.Restore(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, filepath.Join(d.root, c.ID), imageDir,
func(child *os.File, args []string) *exec.Cmd {
cmd := new(exec.Cmd)
cmd.Path = d.initPath
cmd.Args = append([]string{
DriverName,
"-restore",
"-pipe", "3",
"--",
}, args...)
cmd.ExtraFiles = []*os.File{child}
return cmd
},
func(restorePid int) error {
log.CRDbg("restorePid=%d", restorePid)
if restorePid == 0 {
restoreCallback(&c.ProcessConfig, 0)
return nil
}

// The container.json file should be written *after* the container
// has started because its StdFds cannot be initialized before.
//
// XXX How do we handle error here?
d.writeContainerFile(container, c.ID)
close(waitForRestore)
if restoreCallback != nil {
c.ProcessConfig.Process, err = os.FindProcess(restorePid)
if err != nil {
log.Debugf("cannot find restored process %d", restorePid)
return err
}
c.ContainerPid = c.ProcessConfig.Process.Pid
restoreCallback(&c.ProcessConfig, c.ContainerPid)
}
return nil
})
restoreOutputChan <- restoreOutput{exitCode, err}
}()
// FIXME: no idea if any of this is needed...
if restoreCallback != nil {
pid, err := p.Pid()
if err != nil {
p.Signal(os.Kill)
p.Wait()
return execdriver.ExitStatus{ExitCode: -1}, err
}
restoreCallback(&c.ProcessConfig, pid)
}

select {
case restoreOutput := <-restoreOutputChan:
// there was an error
return restoreOutput.exitCode, restoreOutput.err
case <-waitForRestore:
// container restored
break
oom := notifyOnOOM(cont)
waitF := p.Wait
if nss := cont.Config().Namespaces; !nss.Contains(configs.NEWPID) {
// we need such hack for tracking processes with inherited fds,
// because cmd.Wait() waiting for all streams to be copied
waitF = waitInPIDHost(p, cont)
}
ps, err := waitF()
if err != nil {
execErr, ok := err.(*exec.ExitError)
if !ok {
return execdriver.ExitStatus{ExitCode: -1}, err
}
ps = execErr.ProcessState
}

// Wait for the container to exit.
restoreOutput := <-restoreOutputChan
return restoreOutput.exitCode, restoreOutput.err
cont.Destroy()
_, oomKill := <-oom
return execdriver.ExitStatus{ExitCode: utils.ExitStatus(ps.Sys().(syscall.WaitStatus)), OOMKilled: oomKill}, nil
}

func (d *driver) Terminate(c *execdriver.Command) error {
Expand Down

0 comments on commit fc69937

Please sign in to comment.