runc checkpoint: fix --status-fd to accept fd
1. The command `runc checkpoint --lazy-server --status-fd $FD` actually accepts a file name as an $FD. Make it accept a file descriptor, like its name implies and the documentation states. In addition, since runc itself does not use the result of CRIU status fd, remove the code which relays it, and pass the FD directly to CRIU. Note 1: runc should close this file descriptor itself after passing it to criu, otherwise whoever waits on it might wait forever. Note 2: due to the way criu swrk consumes the fd (it reopens /proc/$SENDER_PID/fd/$FD), runc can't close it as soon as criu swrk has started. There is no good way to know when criu swrk has reopened the fd, so we assume that as soon as we have received something back, the fd is already reopened. 2. Since the meaning of --status-fd has changed, the test case using it needs to be fixed as well. Modify the lazy migration test to remove "sleep 2", actually waiting for the the lazy page server to be ready. While at it, - remove the double fork (using shell's background process is sufficient here); - check the exit code for "runc checkpoint" and "criu lazy-pages"; - remove the check for no errors in dump.log after restore, as we are already checking its exit code. [v2: properly close status fd after spawning criu] [v3: move close status fd to after the first read] Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
This commit is contained in:
parent
2c8d668eee
commit
ca1d135bd4
|
@ -34,7 +34,7 @@ checkpointed.`,
|
|||
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
|
||||
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
|
||||
cli.BoolFlag{Name: "lazy-pages", Usage: "use userfaultfd to lazily restore memory pages"},
|
||||
cli.StringFlag{Name: "status-fd", Value: "", Usage: "criu writes \\0 to this FD once lazy-pages is ready"},
|
||||
cli.IntFlag{Name: "status-fd", Value: -1, Usage: "criu writes \\0 to this FD once lazy-pages is ready"},
|
||||
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
|
||||
cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
|
||||
cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"},
|
||||
|
|
|
@ -881,26 +881,6 @@ func (c *linuxContainer) addMaskPaths(req *criurpc.CriuReq) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func waitForCriuLazyServer(r *os.File, status string) error {
|
||||
|
||||
data := make([]byte, 1)
|
||||
_, err := r.Read(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fd, err := os.OpenFile(status, os.O_TRUNC|os.O_WRONLY, os.ModeAppend)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = fd.Write(data)
|
||||
fd.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) handleCriuConfigurationFile(rpcOpts *criurpc.CriuOpts) {
|
||||
// CRIU will evaluate a configuration starting with release 3.11.
|
||||
// Settings in the configuration file will overwrite RPC settings.
|
||||
|
@ -1079,27 +1059,34 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
|
|||
} else {
|
||||
t = criurpc.CriuReqType_DUMP
|
||||
}
|
||||
req := &criurpc.CriuReq{
|
||||
Type: &t,
|
||||
Opts: &rpcOpts,
|
||||
}
|
||||
|
||||
if criuOpts.LazyPages {
|
||||
// lazy migration requested; check if criu supports it
|
||||
feat := criurpc.CriuFeatures{
|
||||
LazyPages: proto.Bool(true),
|
||||
}
|
||||
|
||||
if err := c.checkCriuFeatures(criuOpts, &rpcOpts, &feat); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
statusRead, statusWrite, err := os.Pipe()
|
||||
if err != nil {
|
||||
return err
|
||||
if fd := criuOpts.StatusFd; fd != -1 {
|
||||
// check that the FD is valid
|
||||
flags, err := unix.FcntlInt(uintptr(fd), unix.F_GETFL, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid --status-fd argument %d: %w", fd, err)
|
||||
}
|
||||
// and writable
|
||||
if flags&unix.O_WRONLY == 0 {
|
||||
return fmt.Errorf("invalid --status-fd argument %d: not writable", fd)
|
||||
}
|
||||
|
||||
rpcOpts.StatusFd = proto.Int32(int32(fd))
|
||||
}
|
||||
rpcOpts.StatusFd = proto.Int32(int32(statusWrite.Fd()))
|
||||
go waitForCriuLazyServer(statusRead, criuOpts.StatusFd)
|
||||
}
|
||||
|
||||
req := &criurpc.CriuReq{
|
||||
Type: &t,
|
||||
Opts: &rpcOpts,
|
||||
}
|
||||
|
||||
// no need to dump all this in pre-dump
|
||||
|
@ -1583,6 +1570,15 @@ func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *
|
|||
oob := make([]byte, 4096)
|
||||
for true {
|
||||
n, oobn, _, _, err := criuClientCon.ReadMsgUnix(buf, oob)
|
||||
if req.Opts != nil && req.Opts.StatusFd != nil {
|
||||
// Close status_fd as soon as we got something back from criu,
|
||||
// assuming it has consumed (reopened) it by this time.
|
||||
// Otherwise it will might be left open forever and whoever
|
||||
// is waiting on it will wait forever.
|
||||
fd := int(*req.Opts.StatusFd)
|
||||
_ = unix.Close(fd)
|
||||
req.Opts.StatusFd = nil
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -36,5 +36,5 @@ type CriuOpts struct {
|
|||
EmptyNs uint32 // don't c/r properties for namespace from this mask
|
||||
AutoDedup bool // auto deduplication for incremental dumps
|
||||
LazyPages bool // restore memory pages lazily using userfaultfd
|
||||
StatusFd string // fd for feedback when lazy server is ready
|
||||
StatusFd int // fd for feedback when lazy server is ready
|
||||
}
|
||||
|
|
|
@ -137,6 +137,6 @@ func criuOptions(context *cli.Context) *libcontainer.CriuOpts {
|
|||
PreDump: context.Bool("pre-dump"),
|
||||
AutoDedup: context.Bool("auto-dedup"),
|
||||
LazyPages: context.Bool("lazy-pages"),
|
||||
StatusFd: context.String("status-fd"),
|
||||
StatusFd: context.Int("status-fd"),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -136,11 +136,6 @@ function simple_cr() {
|
|||
# This should not be necessary: https://github.com/checkpoint-restore/criu/issues/575
|
||||
sed -i 's;"readonly": true;"readonly": false;' config.json
|
||||
|
||||
# For lazy migration we need to know when CRIU is ready to serve
|
||||
# the memory pages via TCP.
|
||||
lazy_pipe=`mktemp -u /tmp/lazy-pipe-XXXXXX`
|
||||
mkfifo $lazy_pipe
|
||||
|
||||
# TCP port for lazy migration
|
||||
port=27277
|
||||
|
||||
|
@ -153,34 +148,31 @@ function simple_cr() {
|
|||
# checkpoint the running container
|
||||
mkdir image-dir
|
||||
mkdir work-dir
|
||||
# Double fork taken from helpers.bats
|
||||
# We need to start 'runc checkpoint --lazy-pages' in the background,
|
||||
# so we double fork in the shell.
|
||||
(runc --criu "$CRIU" checkpoint --lazy-pages --page-server 0.0.0.0:${port} --status-fd ${lazy_pipe} --work-path ./work-dir --image-path ./image-dir test_busybox & ) &
|
||||
# Sleeping here. This is ugly, but not sure how else to handle it.
|
||||
# The return code of the in the background running runc is needed, if
|
||||
# there is some basic error. If the lazy migration is ready can
|
||||
# be handled by $lazy_pipe. Which probably will always be ready
|
||||
# after sleeping two seconds.
|
||||
sleep 2
|
||||
|
||||
# For lazy migration we need to know when CRIU is ready to serve
|
||||
# the memory pages via TCP.
|
||||
exec 72<> <(:)
|
||||
exec 70</proc/self/fd/72 71>/proc/self/fd/72
|
||||
exec 72>&-
|
||||
|
||||
__runc --criu "$CRIU" checkpoint --lazy-pages --page-server 0.0.0.0:${port} --status-fd 71 --work-path ./work-dir --image-path ./image-dir test_busybox &
|
||||
cpt_pid=$!
|
||||
|
||||
# wait for lazy page server to be ready
|
||||
out=$(timeout 2 dd if=/proc/self/fd/70 bs=1 count=1 2>/dev/null | od)
|
||||
exec 71>&-
|
||||
out=$(echo $out) # rm newlines
|
||||
# show errors if there are any before we fail
|
||||
grep -B5 Error ./work-dir/dump.log || true
|
||||
# expecting \0 which od prints as
|
||||
[ "$out" = "0000000 000000 0000001" ]
|
||||
|
||||
# Check if inventory.img was written
|
||||
[ -e image-dir/inventory.img ]
|
||||
# If the inventory.img exists criu checkpointed some things, let's see
|
||||
# if there were other errors in the log file.
|
||||
run grep -B 5 Error ./work-dir/dump.log -q
|
||||
[ "$status" -eq 1 ]
|
||||
|
||||
# This will block until CRIU is ready to serve memory pages
|
||||
cat $lazy_pipe
|
||||
[ "$status" -eq 1 ]
|
||||
|
||||
unlink $lazy_pipe
|
||||
|
||||
# Double fork taken from helpers.bats
|
||||
# We need to start 'criu lazy-pages' in the background,
|
||||
# so we double fork in the shell.
|
||||
# Start CRIU in lazy-daemon mode
|
||||
$(${CRIU} lazy-pages --page-server --address 127.0.0.1 --port ${port} -D image-dir &) &
|
||||
${CRIU} lazy-pages --page-server --address 127.0.0.1 --port ${port} -D image-dir &
|
||||
lp_pid=$!
|
||||
|
||||
# Restore lazily from checkpoint.
|
||||
# The restored container needs a different name as the checkpointed
|
||||
|
@ -190,8 +182,6 @@ function simple_cr() {
|
|||
# continue to run if the migration failed at some point.
|
||||
__runc --criu "$CRIU" restore -d --work-path ./image-dir --image-path ./image-dir --lazy-pages test_busybox_restore <&60 >&51 2>&51
|
||||
[ $? -eq 0 ]
|
||||
run grep -B 5 Error ./work-dir/dump.log -q
|
||||
[ "$status" -eq 1 ]
|
||||
|
||||
# busybox should be back up and running
|
||||
testcontainer test_busybox_restore running
|
||||
|
@ -200,6 +190,12 @@ function simple_cr() {
|
|||
[ "$status" -eq 0 ]
|
||||
[[ ${output} == "ok" ]]
|
||||
|
||||
wait $cpt_pid
|
||||
[ $? -eq 0 ]
|
||||
|
||||
wait $lp_pid
|
||||
[ $? -eq 0 ]
|
||||
|
||||
check_pipes
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue