From 98f004182b9a23245e35aa34a42901de3535afb6 Mon Sep 17 00:00:00 2001 From: Deng Guangxing Date: Wed, 24 Aug 2016 17:48:56 +0800 Subject: [PATCH] add pre-dump and parent-path to checkpoint CRIU gets pre-dump to complete iterative migration. pre-dump saves process memory info only. And it need parent-path to specify the former memory files. This patch add pre-dump and parent-path arguments to runc checkpoint Signed-off-by: Deng Guangxing Signed-off-by: Adrian Reber --- checkpoint.go | 2 + libcontainer/container_linux.go | 92 ++++++++++++++------- libcontainer/criu_opts_unix.go | 2 + libcontainer/integration/checkpoint_test.go | 30 ++++++- man/runc-checkpoint.8.md | 2 + restore.go | 2 + tests/integration/checkpoint.bats | 57 +++++++++++++ 7 files changed, 155 insertions(+), 32 deletions(-) diff --git a/checkpoint.go b/checkpoint.go index aefedb8e..dd7704f6 100644 --- a/checkpoint.go +++ b/checkpoint.go @@ -24,12 +24,14 @@ checkpointed.`, Flags: []cli.Flag{ cli.StringFlag{Name: "image-path", Value: "", Usage: "path for saving criu image files"}, cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"}, + cli.StringFlag{Name: "parent-path", Value: "", Usage: "path for previous criu image files in pre-dump"}, cli.BoolFlag{Name: "leave-running", Usage: "leave the process running after checkpointing"}, cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"}, cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"}, cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"}, cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"}, cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"}, + cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"}, cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'"}, cli.StringSliceFlag{Name: "empty-ns", Usage: "create a namespace, but don't restore its properies"}, }, diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 9f0043fb..6347d899 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -652,6 +652,12 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { } } + //pre-dump may need parentImage param to complete iterative migration + if criuOpts.ParentImage != "" { + rpcOpts.ParentImg = proto.String(criuOpts.ParentImage) + rpcOpts.TrackMem = proto.Bool(true) + } + // append optional manage cgroups mode if criuOpts.ManageCgroupsMode != 0 { if err := c.checkCriuVersion("1.7"); err != nil { @@ -661,48 +667,55 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { rpcOpts.ManageCgroupsMode = &mode } - t := criurpc.CriuReqType_DUMP + var t criurpc.CriuReqType + if criuOpts.PreDump { + t = criurpc.CriuReqType_PRE_DUMP + } else { + t = criurpc.CriuReqType_DUMP + } req := &criurpc.CriuReq{ Type: &t, Opts: &rpcOpts, } - for _, m := range c.config.Mounts { - switch m.Device { - case "bind": - c.addCriuDumpMount(req, m) - break - case "cgroup": - binds, err := getCgroupMounts(m) - if err != nil { - return err + //no need to dump these information in pre-dump + if !criuOpts.PreDump { + for _, m := range c.config.Mounts { + switch m.Device { + case "bind": + c.addCriuDumpMount(req, m) + break + case "cgroup": + binds, err := getCgroupMounts(m) + if err != nil { + return err + } + for _, b := range binds { + c.addCriuDumpMount(req, b) + } + break } - for _, b := range binds { - c.addCriuDumpMount(req, b) - } - break } - } - if err := c.addMaskPaths(req); err != nil { - return err - } + if err := c.addMaskPaths(req); err != nil { + return err + } - for _, node := range c.config.Devices { - m := &configs.Mount{Destination: node.Path, Source: node.Path} - c.addCriuDumpMount(req, m) - } + for _, node := range c.config.Devices { + m := &configs.Mount{Destination: node.Path, Source: node.Path} + c.addCriuDumpMount(req, m) + } - // Write the FD info to a file in the image directory + // Write the FD info to a file in the image directory + fdsJSON, err := json.Marshal(c.initProcess.externalDescriptors()) + if err != nil { + return err + } - fdsJSON, err := json.Marshal(c.initProcess.externalDescriptors()) - if err != nil { - return err - } - - err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0655) - if err != nil { - return err + err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0655) + if err != nil { + return err + } } err = c.criuSwrk(nil, req, criuOpts, false) @@ -1015,6 +1028,23 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts * case t == criurpc.CriuReqType_RESTORE: case t == criurpc.CriuReqType_DUMP: break + case t == criurpc.CriuReqType_PRE_DUMP: + // In pre-dump mode CRIU is in a loop and waits for + // the final DUMP command. + // The current runc pre-dump approach, however, is + // start criu in PRE_DUMP once for a single pre-dump + // and not the whole series of pre-dump, pre-dump, ...m, dump + // If we got the message CriuReqType_PRE_DUMP it means + // CRIU was successful and we need to forcefully stop CRIU + logrus.Debugf("PRE_DUMP finished. Send close signal to CRIU service") + criuClient.Close() + // Process status won't be success, because one end of sockets is closed + _, err := cmd.Process.Wait() + if err != nil { + logrus.Debugf("After PRE_DUMP CRIU exiting failed") + return err + } + return nil default: return fmt.Errorf("unable to parse the response %s", resp.String()) } diff --git a/libcontainer/criu_opts_unix.go b/libcontainer/criu_opts_unix.go index b163fbbb..9d7d4dc8 100644 --- a/libcontainer/criu_opts_unix.go +++ b/libcontainer/criu_opts_unix.go @@ -25,11 +25,13 @@ type VethPairName struct { type CriuOpts struct { ImagesDirectory string // directory for storing image files WorkDirectory string // directory to cd and write logs/pidfiles/stats to + ParentImage string // direcotry for storing parent image files in pre-dump and dump LeaveRunning bool // leave container in running state after checkpoint TcpEstablished bool // checkpoint/restore established TCP connections ExternalUnixConnections bool // allow external unix connections ShellJob bool // allow to dump and restore shell jobs FileLocks bool // handle file locks, for safety + PreDump bool // call criu predump to perform iterative checkpoint PageServer CriuPageServerInfo // allow to dump to criu page server VethPairs []VethPairName // pass the veth to criu when restore ManageCgroupsMode cgMode // dump or restore cgroup mode diff --git a/libcontainer/integration/checkpoint_test.go b/libcontainer/integration/checkpoint_test.go index 7c5746b4..bc5b0a30 100644 --- a/libcontainer/integration/checkpoint_test.go +++ b/libcontainer/integration/checkpoint_test.go @@ -106,6 +106,33 @@ func TestCheckpoint(t *testing.T) { t.Fatal(err) } + parentDir, err := ioutil.TempDir("", "criu-parent") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(parentDir) + + preDumpOpts := &libcontainer.CriuOpts{ + ImagesDirectory: parentDir, + WorkDirectory: parentDir, + PreDump: true, + } + preDumpLog := filepath.Join(preDumpOpts.WorkDirectory, "dump.log") + + if err := container.Checkpoint(preDumpOpts); err != nil { + showFile(t, preDumpLog) + t.Fatal(err) + } + + state, err := container.Status() + if err != nil { + t.Fatal(err) + } + + if state != libcontainer.Running { + t.Fatal("Unexpected preDump state: ", state) + } + imagesDir, err := ioutil.TempDir("", "criu") if err != nil { t.Fatal(err) @@ -115,6 +142,7 @@ func TestCheckpoint(t *testing.T) { checkpointOpts := &libcontainer.CriuOpts{ ImagesDirectory: imagesDir, WorkDirectory: imagesDir, + ParentImage: "../criu-parent", } dumpLog := filepath.Join(checkpointOpts.WorkDirectory, "dump.log") restoreLog := filepath.Join(checkpointOpts.WorkDirectory, "restore.log") @@ -124,7 +152,7 @@ func TestCheckpoint(t *testing.T) { t.Fatal(err) } - state, err := container.Status() + state, err = container.Status() if err != nil { t.Fatal(err) } diff --git a/man/runc-checkpoint.8.md b/man/runc-checkpoint.8.md index 632dcabb..b0ce2f3c 100644 --- a/man/runc-checkpoint.8.md +++ b/man/runc-checkpoint.8.md @@ -13,11 +13,13 @@ checkpointed. # OPTIONS --image-path value path for saving criu image files --work-path value path for saving work files and logs + --parent-path value path for previous criu image files in pre-dump --leave-running leave the process running after checkpointing --tcp-established allow open tcp connections --ext-unix-sk allow external unix sockets --shell-job allow shell jobs --page-server value ADDRESS:PORT of the page server --file-locks handle file locks, for safety + --pre-dump dump container's memory information only, leave the container running after this --manage-cgroups-mode value cgroups mode: 'soft' (default), 'full' and 'strict' --empty-ns value create a namespace, but don't restore its properies diff --git a/restore.go b/restore.go index 91b1efe5..c95c34ef 100644 --- a/restore.go +++ b/restore.go @@ -195,10 +195,12 @@ func criuOptions(context *cli.Context) *libcontainer.CriuOpts { return &libcontainer.CriuOpts{ ImagesDirectory: imagePath, WorkDirectory: context.String("work-path"), + ParentImage: context.String("parent-path"), LeaveRunning: context.Bool("leave-running"), TcpEstablished: context.Bool("tcp-established"), ExternalUnixConnections: context.Bool("ext-unix-sk"), ShellJob: context.Bool("shell-job"), FileLocks: context.Bool("file-locks"), + PreDump: context.Bool("pre-dump"), } } diff --git a/tests/integration/checkpoint.bats b/tests/integration/checkpoint.bats index 4a369396..34d1b036 100644 --- a/tests/integration/checkpoint.bats +++ b/tests/integration/checkpoint.bats @@ -57,3 +57,60 @@ function teardown() { [ "$status" -eq 0 ] [[ "${output}" == *"running"* ]] } + +@test "checkpoint(pre-dump) and restore" { + requires criu + + # criu does not work with external terminals so.. + # setting terminal and root:readonly: to false + sed -i 's;"terminal": true;"terminal": false;' config.json + sed -i 's;"readonly": true;"readonly": false;' config.json + sed -i 's/"sh"/"sh","-c","while :; do date; sleep 1; done"/' config.json + + ( + # run busybox (not detached) + runc run test_busybox + [ "$status" -eq 0 ] + ) & + + # check state + wait_for_container 15 1 test_busybox + + runc state test_busybox + [ "$status" -eq 0 ] + [[ "${output}" == *"running"* ]] + + + #test checkpoint pre-dump + mkdir parent-dir + runc --criu "$CRIU" checkpoint --pre-dump --image-path ./parent-dir test_busybox + [ "$status" -eq 0 ] + + # busybox should still be running + runc state test_busybox + [ "$status" -eq 0 ] + [[ "${output}" == *"running"* ]] + + # checkpoint the running container + mkdir image-dir + runc --criu "$CRIU" checkpoint --parent-path ./parent-dir --image-path ./image-dir test_busybox + [ "$status" -eq 0 ] + + # after checkpoint busybox is no longer running + runc state test_busybox + [ "$status" -ne 0 ] + + # restore from checkpoint + ( + runc --criu "$CRIU" restore --image-path ./image-dir test_busybox + [ "$status" -eq 0 ] + ) & + + # check state + wait_for_container 15 1 test_busybox + + # busybox should be back up and running + runc state test_busybox + [ "$status" -eq 0 ] + [[ "${output}" == *"running"* ]] +}