Skip to content

Commit

Permalink
Update checkpoint/restore support in the daemon.
Browse files Browse the repository at this point in the history
Docker-DCO-1.1-Signed-off-by: Ross Boucher <[email protected]> (github: boucher)
  • Loading branch information
boucher committed May 24, 2015
1 parent 8538052 commit 90a38e3
Show file tree
Hide file tree
Showing 9 changed files with 171 additions and 170 deletions.
25 changes: 19 additions & 6 deletions api/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"github.com/docker/docker/pkg/version"
"github.com/docker/docker/runconfig"
"github.com/docker/docker/utils"
"github.com/docker/libcontainer"
"github.com/docker/libnetwork/portallocator"
)

Expand Down Expand Up @@ -1286,32 +1287,44 @@ func (s *Server) postContainersCopy(version version.Version, w http.ResponseWrit
return nil
}

func postContainersCheckpoint(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
func (s *Server) postContainersCheckpoint(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
if vars == nil {
return fmt.Errorf("Missing parameter")
}
if err := parseForm(r); err != nil {
return err
}
job := eng.Job("checkpoint", vars["name"])
if err := job.Run(); err != nil {

criuOpts := &libcontainer.CriuOpts{}
if err := json.NewDecoder(r.Body).Decode(criuOpts); err != nil {
return err
}

if err := s.daemon.ContainerCheckpoint(vars["name"], criuOpts); err != nil {
return err
}

w.WriteHeader(http.StatusNoContent)
return nil
}

func postContainersRestore(eng *engine.Engine, version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
func (s *Server) postContainersRestore(version version.Version, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
if vars == nil {
return fmt.Errorf("Missing parameter")
}
if err := parseForm(r); err != nil {
return err
}
job := eng.Job("restore", vars["name"])
if err := job.Run(); err != nil {

restoreOpts := runconfig.RestoreConfig{}
if err := json.NewDecoder(r.Body).Decode(&restoreOpts); err != nil {
return err
}

if err := s.daemon.ContainerRestore(vars["name"], &restoreOpts.CriuOpts, restoreOpts.ForceRestore); err != nil {
return err
}

w.WriteHeader(http.StatusNoContent)
return nil
}
Expand Down
91 changes: 46 additions & 45 deletions daemon/checkpoint.go
Original file line number Diff line number Diff line change
@@ -1,55 +1,56 @@
package daemon

import (
"github.com/docker/docker/engine"
"fmt"

"github.com/docker/libcontainer"
)

// Checkpoint a running container.
func (daemon *Daemon) ContainerCheckpoint(job *engine.Job) engine.Status {
if len(job.Args) != 1 {
return job.Errorf("Usage: %s CONTAINER\n", job.Name)
}

name := job.Args[0]
container, err := daemon.Get(name)
if err != nil {
return job.Error(err)
}
if !container.IsRunning() {
return job.Errorf("Container %s not running", name)
}

if err := container.Checkpoint(); err != nil {
return job.Errorf("Cannot checkpoint container %s: %s", name, err)
}

container.LogEvent("checkpoint")
return engine.StatusOK
func (daemon *Daemon) ContainerCheckpoint(name string, opts *libcontainer.CriuOpts) error {
container, err := daemon.Get(name)
if err != nil {
return err
}
if !container.IsRunning() {
return fmt.Errorf("Container %s not running", name)
}
if err := container.Checkpoint(opts); err != nil {
return fmt.Errorf("Cannot checkpoint container %s: %s", name, err)
}

container.LogEvent("checkpoint")
return nil
}

// Restore a checkpointed container.
func (daemon *Daemon) ContainerRestore(job *engine.Job) engine.Status {
if len(job.Args) != 1 {
return job.Errorf("Usage: %s CONTAINER\n", job.Name)
}

name := job.Args[0]
container, err := daemon.Get(name)
if err != nil {
return job.Error(err)
}
if container.IsRunning() {
return job.Errorf("Container %s already running", name)
}
if !container.State.IsCheckpointed() {
return job.Errorf("Container %s is not checkpointed", name)
}

if err := container.Restore(); err != nil {
container.LogEvent("die")
return job.Errorf("Cannot restore container %s: %s", name, err)
}

container.LogEvent("restore")
return engine.StatusOK
func (daemon *Daemon) ContainerRestore(name string, opts *libcontainer.CriuOpts, forceRestore bool) error {
container, err := daemon.Get(name)
if err != nil {
return err
}

if !forceRestore {
// TODO: It's possible we only want to bypass the checkpointed check,
// I'm not sure how this will work if the container is already running
if container.IsRunning() {
return fmt.Errorf("Container %s already running", name)
}

if !container.IsCheckpointed() {
return fmt.Errorf("Container %s is not checkpointed", name)
}
} else {
if !container.HasBeenCheckpointed() && opts.ImagesDirectory == "" {
return fmt.Errorf("You must specify an image directory to restore from %s", name)
}
}

if err = container.Restore(opts, forceRestore); err != nil {
container.LogEvent("die")
return fmt.Errorf("Cannot restore container %s: %s", name, err)
}

container.LogEvent("restore")
return nil
}
87 changes: 44 additions & 43 deletions daemon/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"syscall"
"time"

"github.com/docker/libcontainer"
"github.com/docker/libcontainer/label"

"github.com/Sirupsen/logrus"
Expand Down Expand Up @@ -255,7 +256,7 @@ func (container *Container) Start() (err error) {
if err := container.Mount(); err != nil {
return err
}
if err := container.initializeNetworking(); err != nil {
if err := container.initializeNetworking(false); err != nil {
return err
}
container.verifyDaemonSettings()
Expand Down Expand Up @@ -339,12 +340,11 @@ func (container *Container) isNetworkAllocated() bool {
return container.NetworkSettings.IPAddress != ""
}


// cleanup releases any network resources allocated to the container along with any rules
// around how containers are linked together. It also unmounts the container's root filesystem.
func (container *Container) cleanup() {
if container.IsCheckpointed() {
log.CRDbg("not calling ReleaseNetwork() for checkpointed container %s", container.ID)
logrus.Debugf("not calling ReleaseNetwork() for checkpointed container %s", container.ID)
} else {
container.ReleaseNetwork()
}
Expand Down Expand Up @@ -581,6 +581,43 @@ func (container *Container) Checkpoint(opts *libcontainer.CriuOpts) error {
return nil
}

func (container *Container) Restore(opts *libcontainer.CriuOpts, forceRestore bool) error {
var err error
container.Lock()
defer container.Unlock()

defer func() {
if err != nil {
container.cleanup()
}
}()
if err := container.Mount(); err != nil {
return err
}
if err = container.initializeNetworking(true); err != nil {
return err
}
container.verifyDaemonSettings()

linkedEnv, err := container.setupLinkedContainers()
if err != nil {
return err
}
if err = container.setupWorkingDirectory(); err != nil {
return err
}

env := container.createDaemonEnvironment(linkedEnv)
if err = populateCommand(container, env); err != nil {
return err
}

if err = container.setupMounts(); err != nil {
return err
}

return container.waitForRestore(opts, forceRestore)
}

func (container *Container) Copy(resource string) (io.ReadCloser, error) {
container.Lock()
Expand Down Expand Up @@ -641,41 +678,6 @@ func (container *Container) Copy(resource string) (io.ReadCloser, error) {
nil
}

func (container *Container) Checkpoint() error {
return container.daemon.Checkpoint(container)
}

func (container *Container) Restore() error {
var err error

container.Lock()
defer container.Unlock()

defer func() {
if err != nil {
container.cleanup()
}
}()

if err = container.initializeNetworking(); err != nil {
return err
}

linkedEnv, err := container.setupLinkedContainers()
if err != nil {
return err
}
if err = container.setupWorkingDirectory(); err != nil {
return err
}
env := container.createDaemonEnvironment(linkedEnv)
if err = populateCommandRestore(container, env); err != nil {
return err
}

return container.waitForRestore()
}

// Returns true if the container exposes a certain port
func (container *Container) Exposes(p nat.Port) bool {
_, exists := container.Config.ExposedPorts[p]
Expand Down Expand Up @@ -762,10 +764,7 @@ func (container *Container) waitForStart() error {
return nil
}

// Like waitForStart() but for restoring a container.
//
// XXX Does RestartPolicy apply here?
func (container *Container) waitForRestore() error {
func (container *Container) waitForRestore(opts *libcontainer.CriuOpts, forceRestore bool) error {
container.monitor = newContainerMonitor(container, container.hostConfig.RestartPolicy)

// After calling promise.Go() we'll have two goroutines:
Expand All @@ -778,7 +777,7 @@ func (container *Container) waitForRestore() error {
if container.ExitCode != 0 {
return fmt.Errorf("restore process failed")
}
case err := <-promise.Go(container.monitor.Restore):
case err := <-promise.Go(func() error { return container.monitor.Restore(opts, forceRestore) }):
return err
}

Expand Down Expand Up @@ -962,6 +961,7 @@ func attach(streamConfig *StreamConfig, openStdin, stdinOnce, tty bool, stdin io
wg.Add(1)
}


// Connect stdin of container to the http conn.
go func() {
if stdin == nil || !openStdin {
Expand Down Expand Up @@ -989,6 +989,7 @@ func attach(streamConfig *StreamConfig, openStdin, stdinOnce, tty bool, stdin io
_, err = copyEscapable(cStdin, stdin)
} else {
_, err = io.Copy(cStdin, stdin)

}
if err == io.ErrClosedPipe {
err = nil
Expand Down
Loading

0 comments on commit 90a38e3

Please sign in to comment.