runc checkpoint: fix --status-fd to accept fd

1. The command `runc checkpoint --lazy-server --status-fd $FD` actually
accepts a file name as an $FD. Make it accept a file descriptor,
like its name implies and the documentation states.

In addition, since runc itself does not use the result of CRIU status
fd, remove the code which relays it, and pass the FD directly to CRIU.

Note 1: runc should close this file descriptor itself after passing it
to criu, otherwise whoever waits on it might wait forever.

Note 2: due to the way criu swrk consumes the fd (it reopens
/proc/$SENDER_PID/fd/$FD), runc can't close it as soon as criu swrk has
started. There is no good way to know when criu swrk has reopened the
fd, so we assume that as soon as we have received something back, the
fd is already reopened.

2. Since the meaning of --status-fd has changed, the test case using
it needs to be fixed as well.

Modify the lazy migration test to remove "sleep 2", actually waiting
for the the lazy page server to be ready.

While at it,

 - remove the double fork (using shell's background process is
   sufficient here);

 - check the exit code for "runc checkpoint" and "criu lazy-pages";

 - remove the check for no errors in dump.log after restore, as we
   are already checking its exit code.

[v2: properly close status fd after spawning criu]
[v3: move close status fd to after the first read]

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
This commit is contained in:
Kir Kolyshkin 2020-04-21 02:43:24 -07:00
parent 2c8d668eee
commit ca1d135bd4
5 changed files with 56 additions and 64 deletions

View File

@ -34,7 +34,7 @@ checkpointed.`,
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
cli.BoolFlag{Name: "lazy-pages", Usage: "use userfaultfd to lazily restore memory pages"},
cli.StringFlag{Name: "status-fd", Value: "", Usage: "criu writes \\0 to this FD once lazy-pages is ready"},
cli.IntFlag{Name: "status-fd", Value: -1, Usage: "criu writes \\0 to this FD once lazy-pages is ready"},
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"},

View File

@ -881,26 +881,6 @@ func (c *linuxContainer) addMaskPaths(req *criurpc.CriuReq) error {
return nil
}
func waitForCriuLazyServer(r *os.File, status string) error {
data := make([]byte, 1)
_, err := r.Read(data)
if err != nil {
return err
}
fd, err := os.OpenFile(status, os.O_TRUNC|os.O_WRONLY, os.ModeAppend)
if err != nil {
return err
}
_, err = fd.Write(data)
fd.Close()
if err != nil {
return err
}
return nil
}
func (c *linuxContainer) handleCriuConfigurationFile(rpcOpts *criurpc.CriuOpts) {
// CRIU will evaluate a configuration starting with release 3.11.
// Settings in the configuration file will overwrite RPC settings.
@ -1079,27 +1059,34 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
} else {
t = criurpc.CriuReqType_DUMP
}
req := &criurpc.CriuReq{
Type: &t,
Opts: &rpcOpts,
}
if criuOpts.LazyPages {
// lazy migration requested; check if criu supports it
feat := criurpc.CriuFeatures{
LazyPages: proto.Bool(true),
}
if err := c.checkCriuFeatures(criuOpts, &rpcOpts, &feat); err != nil {
return err
}
statusRead, statusWrite, err := os.Pipe()
if err != nil {
return err
if fd := criuOpts.StatusFd; fd != -1 {
// check that the FD is valid
flags, err := unix.FcntlInt(uintptr(fd), unix.F_GETFL, 0)
if err != nil {
return fmt.Errorf("invalid --status-fd argument %d: %w", fd, err)
}
// and writable
if flags&unix.O_WRONLY == 0 {
return fmt.Errorf("invalid --status-fd argument %d: not writable", fd)
}
rpcOpts.StatusFd = proto.Int32(int32(fd))
}
rpcOpts.StatusFd = proto.Int32(int32(statusWrite.Fd()))
go waitForCriuLazyServer(statusRead, criuOpts.StatusFd)
}
req := &criurpc.CriuReq{
Type: &t,
Opts: &rpcOpts,
}
// no need to dump all this in pre-dump
@ -1583,6 +1570,15 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
oob := make([]byte, 4096)
for true {
n, oobn, _, _, err := criuClientCon.ReadMsgUnix(buf, oob)
if req.Opts != nil && req.Opts.StatusFd != nil {
// Close status_fd as soon as we got something back from criu,
// assuming it has consumed (reopened) it by this time.
// Otherwise it will might be left open forever and whoever
// is waiting on it will wait forever.
fd := int(*req.Opts.StatusFd)
_ = unix.Close(fd)
req.Opts.StatusFd = nil
}
if err != nil {
return err
}

View File

@ -36,5 +36,5 @@ type CriuOpts struct {
EmptyNs uint32 // don't c/r properties for namespace from this mask
AutoDedup bool // auto deduplication for incremental dumps
LazyPages bool // restore memory pages lazily using userfaultfd
StatusFd string // fd for feedback when lazy server is ready
StatusFd int // fd for feedback when lazy server is ready
}

View File

@ -137,6 +137,6 @@ func criuOptions(context *cli.Context) *libcontainer.CriuOpts {
PreDump: context.Bool("pre-dump"),
AutoDedup: context.Bool("auto-dedup"),
LazyPages: context.Bool("lazy-pages"),
StatusFd: context.String("status-fd"),
StatusFd: context.Int("status-fd"),
}
}

View File

@ -136,11 +136,6 @@ function simple_cr() {
# This should not be necessary: https://github.com/checkpoint-restore/criu/issues/575
sed -i 's;"readonly": true;"readonly": false;' config.json
# For lazy migration we need to know when CRIU is ready to serve
# the memory pages via TCP.
lazy_pipe=`mktemp -u /tmp/lazy-pipe-XXXXXX`
mkfifo $lazy_pipe
# TCP port for lazy migration
port=27277
@ -153,34 +148,31 @@ function simple_cr() {
# checkpoint the running container
mkdir image-dir
mkdir work-dir
# Double fork taken from helpers.bats
# We need to start 'runc checkpoint --lazy-pages' in the background,
# so we double fork in the shell.
(runc --criu "$CRIU" checkpoint --lazy-pages --page-server 0.0.0.0:${port} --status-fd ${lazy_pipe} --work-path ./work-dir --image-path ./image-dir test_busybox & ) &
# Sleeping here. This is ugly, but not sure how else to handle it.
# The return code of the in the background running runc is needed, if
# there is some basic error. If the lazy migration is ready can
# be handled by $lazy_pipe. Which probably will always be ready
# after sleeping two seconds.
sleep 2
# For lazy migration we need to know when CRIU is ready to serve
# the memory pages via TCP.
exec 72<> <(:)
exec 70</proc/self/fd/72 71>/proc/self/fd/72
exec 72>&-
__runc --criu "$CRIU" checkpoint --lazy-pages --page-server 0.0.0.0:${port} --status-fd 71 --work-path ./work-dir --image-path ./image-dir test_busybox &
cpt_pid=$!
# wait for lazy page server to be ready
out=$(timeout 2 dd if=/proc/self/fd/70 bs=1 count=1 2>/dev/null | od)
exec 71>&-
out=$(echo $out) # rm newlines
# show errors if there are any before we fail
grep -B5 Error ./work-dir/dump.log || true
# expecting \0 which od prints as
[ "$out" = "0000000 000000 0000001" ]
# Check if inventory.img was written
[ -e image-dir/inventory.img ]
# If the inventory.img exists criu checkpointed some things, let's see
# if there were other errors in the log file.
run grep -B 5 Error ./work-dir/dump.log -q
[ "$status" -eq 1 ]
# This will block until CRIU is ready to serve memory pages
cat $lazy_pipe
[ "$status" -eq 1 ]
unlink $lazy_pipe
# Double fork taken from helpers.bats
# We need to start 'criu lazy-pages' in the background,
# so we double fork in the shell.
# Start CRIU in lazy-daemon mode
$(${CRIU} lazy-pages --page-server --address 127.0.0.1 --port ${port} -D image-dir &) &
${CRIU} lazy-pages --page-server --address 127.0.0.1 --port ${port} -D image-dir &
lp_pid=$!
# Restore lazily from checkpoint.
# The restored container needs a different name as the checkpointed
@ -190,8 +182,6 @@ function simple_cr() {
# continue to run if the migration failed at some point.
__runc --criu "$CRIU" restore -d --work-path ./image-dir --image-path ./image-dir --lazy-pages test_busybox_restore <&60 >&51 2>&51
[ $? -eq 0 ]
run grep -B 5 Error ./work-dir/dump.log -q
[ "$status" -eq 1 ]
# busybox should be back up and running
testcontainer test_busybox_restore running
@ -200,6 +190,12 @@ function simple_cr() {
[ "$status" -eq 0 ]
[[ ${output} == "ok" ]]
wait $cpt_pid
[ $? -eq 0 ]
wait $lp_pid
[ $? -eq 0 ]
check_pipes
}