add pre-dump and parent-path to checkpoint
CRIU gets pre-dump to complete iterative migration. pre-dump saves process memory info only. And it need parent-path to specify the former memory files. This patch add pre-dump and parent-path arguments to runc checkpoint Signed-off-by: Deng Guangxing <dengguangxing@huawei.com> Signed-off-by: Adrian Reber <areber@redhat.com>
This commit is contained in:
parent
4f21aea40d
commit
98f004182b
|
@ -24,12 +24,14 @@ checkpointed.`,
|
|||
Flags: []cli.Flag{
|
||||
cli.StringFlag{Name: "image-path", Value: "", Usage: "path for saving criu image files"},
|
||||
cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"},
|
||||
cli.StringFlag{Name: "parent-path", Value: "", Usage: "path for previous criu image files in pre-dump"},
|
||||
cli.BoolFlag{Name: "leave-running", Usage: "leave the process running after checkpointing"},
|
||||
cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"},
|
||||
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
|
||||
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
|
||||
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
|
||||
cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
|
||||
cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"},
|
||||
cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'"},
|
||||
cli.StringSliceFlag{Name: "empty-ns", Usage: "create a namespace, but don't restore its properies"},
|
||||
},
|
||||
|
|
|
@ -652,6 +652,12 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
|||
}
|
||||
}
|
||||
|
||||
//pre-dump may need parentImage param to complete iterative migration
|
||||
if criuOpts.ParentImage != "" {
|
||||
rpcOpts.ParentImg = proto.String(criuOpts.ParentImage)
|
||||
rpcOpts.TrackMem = proto.Bool(true)
|
||||
}
|
||||
|
||||
// append optional manage cgroups mode
|
||||
if criuOpts.ManageCgroupsMode != 0 {
|
||||
if err := c.checkCriuVersion("1.7"); err != nil {
|
||||
|
@ -661,48 +667,55 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
|||
rpcOpts.ManageCgroupsMode = &mode
|
||||
}
|
||||
|
||||
t := criurpc.CriuReqType_DUMP
|
||||
var t criurpc.CriuReqType
|
||||
if criuOpts.PreDump {
|
||||
t = criurpc.CriuReqType_PRE_DUMP
|
||||
} else {
|
||||
t = criurpc.CriuReqType_DUMP
|
||||
}
|
||||
req := &criurpc.CriuReq{
|
||||
Type: &t,
|
||||
Opts: &rpcOpts,
|
||||
}
|
||||
|
||||
for _, m := range c.config.Mounts {
|
||||
switch m.Device {
|
||||
case "bind":
|
||||
c.addCriuDumpMount(req, m)
|
||||
break
|
||||
case "cgroup":
|
||||
binds, err := getCgroupMounts(m)
|
||||
if err != nil {
|
||||
return err
|
||||
//no need to dump these information in pre-dump
|
||||
if !criuOpts.PreDump {
|
||||
for _, m := range c.config.Mounts {
|
||||
switch m.Device {
|
||||
case "bind":
|
||||
c.addCriuDumpMount(req, m)
|
||||
break
|
||||
case "cgroup":
|
||||
binds, err := getCgroupMounts(m)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, b := range binds {
|
||||
c.addCriuDumpMount(req, b)
|
||||
}
|
||||
break
|
||||
}
|
||||
for _, b := range binds {
|
||||
c.addCriuDumpMount(req, b)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if err := c.addMaskPaths(req); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := c.addMaskPaths(req); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, node := range c.config.Devices {
|
||||
m := &configs.Mount{Destination: node.Path, Source: node.Path}
|
||||
c.addCriuDumpMount(req, m)
|
||||
}
|
||||
for _, node := range c.config.Devices {
|
||||
m := &configs.Mount{Destination: node.Path, Source: node.Path}
|
||||
c.addCriuDumpMount(req, m)
|
||||
}
|
||||
|
||||
// Write the FD info to a file in the image directory
|
||||
// Write the FD info to a file in the image directory
|
||||
fdsJSON, err := json.Marshal(c.initProcess.externalDescriptors())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fdsJSON, err := json.Marshal(c.initProcess.externalDescriptors())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0655)
|
||||
if err != nil {
|
||||
return err
|
||||
err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0655)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
err = c.criuSwrk(nil, req, criuOpts, false)
|
||||
|
@ -1015,6 +1028,23 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
|||
case t == criurpc.CriuReqType_RESTORE:
|
||||
case t == criurpc.CriuReqType_DUMP:
|
||||
break
|
||||
case t == criurpc.CriuReqType_PRE_DUMP:
|
||||
// In pre-dump mode CRIU is in a loop and waits for
|
||||
// the final DUMP command.
|
||||
// The current runc pre-dump approach, however, is
|
||||
// start criu in PRE_DUMP once for a single pre-dump
|
||||
// and not the whole series of pre-dump, pre-dump, ...m, dump
|
||||
// If we got the message CriuReqType_PRE_DUMP it means
|
||||
// CRIU was successful and we need to forcefully stop CRIU
|
||||
logrus.Debugf("PRE_DUMP finished. Send close signal to CRIU service")
|
||||
criuClient.Close()
|
||||
// Process status won't be success, because one end of sockets is closed
|
||||
_, err := cmd.Process.Wait()
|
||||
if err != nil {
|
||||
logrus.Debugf("After PRE_DUMP CRIU exiting failed")
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("unable to parse the response %s", resp.String())
|
||||
}
|
||||
|
|
|
@ -25,11 +25,13 @@ type VethPairName struct {
|
|||
type CriuOpts struct {
|
||||
ImagesDirectory string // directory for storing image files
|
||||
WorkDirectory string // directory to cd and write logs/pidfiles/stats to
|
||||
ParentImage string // direcotry for storing parent image files in pre-dump and dump
|
||||
LeaveRunning bool // leave container in running state after checkpoint
|
||||
TcpEstablished bool // checkpoint/restore established TCP connections
|
||||
ExternalUnixConnections bool // allow external unix connections
|
||||
ShellJob bool // allow to dump and restore shell jobs
|
||||
FileLocks bool // handle file locks, for safety
|
||||
PreDump bool // call criu predump to perform iterative checkpoint
|
||||
PageServer CriuPageServerInfo // allow to dump to criu page server
|
||||
VethPairs []VethPairName // pass the veth to criu when restore
|
||||
ManageCgroupsMode cgMode // dump or restore cgroup mode
|
||||
|
|
|
@ -106,6 +106,33 @@ func TestCheckpoint(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
parentDir, err := ioutil.TempDir("", "criu-parent")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer os.RemoveAll(parentDir)
|
||||
|
||||
preDumpOpts := &libcontainer.CriuOpts{
|
||||
ImagesDirectory: parentDir,
|
||||
WorkDirectory: parentDir,
|
||||
PreDump: true,
|
||||
}
|
||||
preDumpLog := filepath.Join(preDumpOpts.WorkDirectory, "dump.log")
|
||||
|
||||
if err := container.Checkpoint(preDumpOpts); err != nil {
|
||||
showFile(t, preDumpLog)
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
state, err := container.Status()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if state != libcontainer.Running {
|
||||
t.Fatal("Unexpected preDump state: ", state)
|
||||
}
|
||||
|
||||
imagesDir, err := ioutil.TempDir("", "criu")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
|
@ -115,6 +142,7 @@ func TestCheckpoint(t *testing.T) {
|
|||
checkpointOpts := &libcontainer.CriuOpts{
|
||||
ImagesDirectory: imagesDir,
|
||||
WorkDirectory: imagesDir,
|
||||
ParentImage: "../criu-parent",
|
||||
}
|
||||
dumpLog := filepath.Join(checkpointOpts.WorkDirectory, "dump.log")
|
||||
restoreLog := filepath.Join(checkpointOpts.WorkDirectory, "restore.log")
|
||||
|
@ -124,7 +152,7 @@ func TestCheckpoint(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
state, err := container.Status()
|
||||
state, err = container.Status()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
|
|
@ -13,11 +13,13 @@ checkpointed.
|
|||
# OPTIONS
|
||||
--image-path value path for saving criu image files
|
||||
--work-path value path for saving work files and logs
|
||||
--parent-path value path for previous criu image files in pre-dump
|
||||
--leave-running leave the process running after checkpointing
|
||||
--tcp-established allow open tcp connections
|
||||
--ext-unix-sk allow external unix sockets
|
||||
--shell-job allow shell jobs
|
||||
--page-server value ADDRESS:PORT of the page server
|
||||
--file-locks handle file locks, for safety
|
||||
--pre-dump dump container's memory information only, leave the container running after this
|
||||
--manage-cgroups-mode value cgroups mode: 'soft' (default), 'full' and 'strict'
|
||||
--empty-ns value create a namespace, but don't restore its properies
|
||||
|
|
|
@ -195,10 +195,12 @@ func criuOptions(context *cli.Context) *libcontainer.CriuOpts {
|
|||
return &libcontainer.CriuOpts{
|
||||
ImagesDirectory: imagePath,
|
||||
WorkDirectory: context.String("work-path"),
|
||||
ParentImage: context.String("parent-path"),
|
||||
LeaveRunning: context.Bool("leave-running"),
|
||||
TcpEstablished: context.Bool("tcp-established"),
|
||||
ExternalUnixConnections: context.Bool("ext-unix-sk"),
|
||||
ShellJob: context.Bool("shell-job"),
|
||||
FileLocks: context.Bool("file-locks"),
|
||||
PreDump: context.Bool("pre-dump"),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -57,3 +57,60 @@ function teardown() {
|
|||
[ "$status" -eq 0 ]
|
||||
[[ "${output}" == *"running"* ]]
|
||||
}
|
||||
|
||||
@test "checkpoint(pre-dump) and restore" {
|
||||
requires criu
|
||||
|
||||
# criu does not work with external terminals so..
|
||||
# setting terminal and root:readonly: to false
|
||||
sed -i 's;"terminal": true;"terminal": false;' config.json
|
||||
sed -i 's;"readonly": true;"readonly": false;' config.json
|
||||
sed -i 's/"sh"/"sh","-c","while :; do date; sleep 1; done"/' config.json
|
||||
|
||||
(
|
||||
# run busybox (not detached)
|
||||
runc run test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
) &
|
||||
|
||||
# check state
|
||||
wait_for_container 15 1 test_busybox
|
||||
|
||||
runc state test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "${output}" == *"running"* ]]
|
||||
|
||||
|
||||
#test checkpoint pre-dump
|
||||
mkdir parent-dir
|
||||
runc --criu "$CRIU" checkpoint --pre-dump --image-path ./parent-dir test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
# busybox should still be running
|
||||
runc state test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "${output}" == *"running"* ]]
|
||||
|
||||
# checkpoint the running container
|
||||
mkdir image-dir
|
||||
runc --criu "$CRIU" checkpoint --parent-path ./parent-dir --image-path ./image-dir test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
# after checkpoint busybox is no longer running
|
||||
runc state test_busybox
|
||||
[ "$status" -ne 0 ]
|
||||
|
||||
# restore from checkpoint
|
||||
(
|
||||
runc --criu "$CRIU" restore --image-path ./image-dir test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
) &
|
||||
|
||||
# check state
|
||||
wait_for_container 15 1 test_busybox
|
||||
|
||||
# busybox should be back up and running
|
||||
runc state test_busybox
|
||||
[ "$status" -eq 0 ]
|
||||
[[ "${output}" == *"running"* ]]
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue