Skip to content

Commit

Permalink
Checkpoint/Restore Support: add exec driver methods
Browse files Browse the repository at this point in the history
Methods for checkpointing and restoring containers were added to the
native driver.  The LXC driver returns an error message that these
methods are not implemented yet.

Signed-off-by: Saied Kazemi <[email protected]>

Conflicts:
	daemon/execdriver/native/create.go
	daemon/execdriver/native/driver.go
	daemon/execdriver/native/init.go

Conflicts:
	daemon/execdriver/driver.go
	daemon/execdriver/native/create.go
  • Loading branch information
Saied Kazemi authored and boucher committed Oct 1, 2015
1 parent 99d13e7 commit cdf1ce2
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 1 deletion.
6 changes: 6 additions & 0 deletions daemon/execdriver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ type Hooks struct {
PostStop []DriverCallback
}

type RestoreCallback func(*ProcessConfig, int)

// Info is driver specific information based on
// processes registered with the driver
type Info interface {
Expand Down Expand Up @@ -84,6 +86,10 @@ type Driver interface {
// Unpause unpauses a container.
Unpause(c *Command) error

Checkpoint(c *Command) error

Restore(c *Command, pipes *Pipes, restoreCallback RestoreCallback) (int, error)

// Name returns the name of the driver.
Name() string

Expand Down
9 changes: 8 additions & 1 deletion daemon/execdriver/lxc/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,14 @@ func (d *Driver) Unpause(c *execdriver.Command) error {
return err
}

// Terminate implements the exec driver Driver interface.
func (d *driver) Checkpoint(c *execdriver.Command) error {
return fmt.Errorf("Checkpointing lxc containers not supported yet\n")
}

func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) {
return 0, fmt.Errorf("Restoring lxc containers not supported yet\n")
}

func (d *Driver) Terminate(c *execdriver.Command) error {
return killLxc(c.ID, 9)
}
Expand Down
19 changes: 19 additions & 0 deletions daemon/execdriver/native/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package native

import (
"errors"
"encoding/json"
"fmt"
"net"
"strings"
Expand Down Expand Up @@ -119,6 +120,24 @@ func generateIfaceName() (string, error) {
return "", errors.New("Failed to find name for new interface")
}

// Re-create the container type from the image that was saved during checkpoint.
func (d *driver) createRestoreContainer(c *execdriver.Command, imageDir string) (*libcontainer.Config, error) {
// Read the container.json.
f1, err := os.Open(filepath.Join(imageDir, "container.json"))
if err != nil {
return nil, err
}
defer f1.Close()

var container *libcontainer.Config
err = json.NewDecoder(f1).Decode(&container)
if err != nil {
return nil, err
}

return container, nil
}

func (d *Driver) createNetwork(container *configs.Config, c *execdriver.Command, hooks execdriver.Hooks) error {
if c.Network == nil {
return nil
Expand Down
150 changes: 150 additions & 0 deletions daemon/execdriver/native/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/docker/docker/pkg/reexec"
sysinfo "github.com/docker/docker/pkg/system"
"github.com/docker/docker/pkg/term"
"github.com/docker/docker/utils"
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/apparmor"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
Expand Down Expand Up @@ -299,6 +300,155 @@ func (d *Driver) Unpause(c *execdriver.Command) error {
return active.Resume()
}

// XXX Where is the right place for the following
// const and getCheckpointImageDir() function?
const (
containersDir = "/var/lib/docker/containers"
criuImgDir = "criu_img"
)

func getCheckpointImageDir(containerId string) string {
return filepath.Join(containersDir, containerId, criuImgDir)
}

func (d *driver) Checkpoint(c *execdriver.Command) error {
active := d.activeContainers[c.ID]
if active == nil {
return fmt.Errorf("active container for %s does not exist", c.ID)
}
container := active.container

// Create an image directory for this container (which
// may already exist from a previous checkpoint).
imageDir := getCheckpointImageDir(c.ID)
err := os.MkdirAll(imageDir, 0700)
if err != nil && !os.IsExist(err) {
return err
}

// Copy container.json and state.json files to the CRIU
// image directory for later use during restore. Do this
// before checkpointing because after checkpoint the container
// will exit and these files will be removed.
log.CRDbg("saving container.json and state.json before calling CRIU in %s", imageDir)
srcFiles := []string{"container.json", "state.json"}
for _, f := range srcFiles {
srcFile := filepath.Join(d.root, c.ID, f)
dstFile := filepath.Join(imageDir, f)
if _, err := utils.CopyFile(srcFile, dstFile); err != nil {
return err
}
}

d.Lock()
defer d.Unlock()
err = namespaces.Checkpoint(container, imageDir, c.ProcessConfig.Process.Pid)
if err != nil {
return err
}

return nil
}

type restoreOutput struct {
exitCode int
err error
}

func (d *driver) Restore(c *execdriver.Command, pipes *execdriver.Pipes, restoreCallback execdriver.RestoreCallback) (int, error) {
imageDir := getCheckpointImageDir(c.ID)
container, err := d.createRestoreContainer(c, imageDir)
if err != nil {
return 1, err
}

var term execdriver.Terminal

if c.ProcessConfig.Tty {
term, err = NewTtyConsole(&c.ProcessConfig, pipes)
} else {
term, err = execdriver.NewStdConsole(&c.ProcessConfig, pipes)
}
if err != nil {
return -1, err
}
c.ProcessConfig.Terminal = term

d.Lock()
d.activeContainers[c.ID] = &activeContainer{
container: container,
cmd: &c.ProcessConfig.Cmd,
}
d.Unlock()
defer d.cleanContainer(c.ID)

// Since the CRIU binary exits after restoring the container, we
// need to reap its child by setting PR_SET_CHILD_SUBREAPER (36)
// so that it'll be owned by this process (Docker daemon) after restore.
//
// XXX This really belongs to where the Docker daemon starts.
if _, _, syserr := syscall.RawSyscall(syscall.SYS_PRCTL, 36, 1, 0); syserr != 0 {
return -1, fmt.Errorf("Could not set PR_SET_CHILD_SUBREAPER (syserr %d)", syserr)
}

restoreOutputChan := make(chan restoreOutput, 1)
waitForRestore := make(chan struct{})

go func() {
exitCode, err := namespaces.Restore(container, c.ProcessConfig.Stdin, c.ProcessConfig.Stdout, c.ProcessConfig.Stderr, c.ProcessConfig.Console, filepath.Join(d.root, c.ID), imageDir,
func(child *os.File, args []string) *exec.Cmd {
cmd := new(exec.Cmd)
cmd.Path = d.initPath
cmd.Args = append([]string{
DriverName,
"-restore",
"-pipe", "3",
"--",
}, args...)
cmd.ExtraFiles = []*os.File{child}
return cmd
},
func(restorePid int) error {
log.CRDbg("restorePid=%d", restorePid)
if restorePid == 0 {
restoreCallback(&c.ProcessConfig, 0)
return nil
}

// The container.json file should be written *after* the container
// has started because its StdFds cannot be initialized before.
//
// XXX How do we handle error here?
d.writeContainerFile(container, c.ID)
close(waitForRestore)
if restoreCallback != nil {
c.ProcessConfig.Process, err = os.FindProcess(restorePid)
if err != nil {
log.Debugf("cannot find restored process %d", restorePid)
return err
}
c.ContainerPid = c.ProcessConfig.Process.Pid
restoreCallback(&c.ProcessConfig, c.ContainerPid)
}
return nil
})
restoreOutputChan <- restoreOutput{exitCode, err}
}()

select {
case restoreOutput := <-restoreOutputChan:
// there was an error
return restoreOutput.exitCode, restoreOutput.err
case <-waitForRestore:
// container restored
break
}

// Wait for the container to exit.
restoreOutput := <-restoreOutputChan
return restoreOutput.exitCode, restoreOutput.err
}

// Terminate implements the exec driver Driver interface.
func (d *Driver) Terminate(c *execdriver.Command) error {
defer d.cleanContainer(c.ID)
Expand Down

0 comments on commit cdf1ce2

Please sign in to comment.