2015-06-30 07:49:13 +08:00
|
|
|
// +build linux
|
|
|
|
|
2015-06-22 10:31:12 +08:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
2020-05-17 08:20:44 +08:00
|
|
|
"errors"
|
2015-06-22 10:31:12 +08:00
|
|
|
"fmt"
|
Disable rootless mode except RootlessCgMgr when executed as the root in userns
This PR decomposes `libcontainer/configs.Config.Rootless bool` into `RootlessEUID bool` and
`RootlessCgroups bool`, so as to make "runc-in-userns" to be more compatible with "rootful" runc.
`RootlessEUID` denotes that runc is being executed as a non-root user (euid != 0) in
the current user namespace. `RootlessEUID` is almost identical to the former `Rootless`
except cgroups stuff.
`RootlessCgroups` denotes that runc is unlikely to have the full access to cgroups.
`RootlessCgroups` is set to false if runc is executed as the root (euid == 0) in the initial namespace.
Otherwise `RootlessCgroups` is set to true.
(Hint: if `RootlessEUID` is true, `RootlessCgroups` becomes true as well)
When runc is executed as the root (euid == 0) in an user namespace (e.g. by Docker-in-LXD, Podman, Usernetes),
`RootlessEUID` is set to false but `RootlessCgroups` is set to true.
So, "runc-in-userns" behaves almost same as "rootful" runc except that cgroups errors are ignored.
This PR does not have any impact on CLI flags and `state.json`.
Note about CLI:
* Now `runc --rootless=(auto|true|false)` CLI flag is only used for setting `RootlessCgroups`.
* Now `runc spec --rootless` is only required when `RootlessEUID` is set to true.
For runc-in-userns, `runc spec` without `--rootless` should work, when sufficient numbers of
UID/GID are mapped.
Note about `$XDG_RUNTIME_DIR` (e.g. `/run/user/1000`):
* `$XDG_RUNTIME_DIR` is ignored if runc is being executed as the root (euid == 0) in the initial namespace, for backward compatibility.
(`/run/runc` is used)
* If runc is executed as the root (euid == 0) in an user namespace, `$XDG_RUNTIME_DIR` is honored if `$USER != "" && $USER != "root"`.
This allows unprivileged users to allow execute runc as the root in userns, without mounting writable `/run/runc`.
Note about `state.json`:
* `rootless` is set to true when `RootlessEUID == true && RootlessCgroups == true`.
Signed-off-by: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
2018-07-05 14:28:21 +08:00
|
|
|
"os"
|
2015-06-22 10:31:12 +08:00
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
|
2020-08-10 10:25:53 +08:00
|
|
|
criu "github.com/checkpoint-restore/go-criu/v4/rpc"
|
2015-06-22 10:31:12 +08:00
|
|
|
"github.com/opencontainers/runc/libcontainer"
|
Disable rootless mode except RootlessCgMgr when executed as the root in userns
This PR decomposes `libcontainer/configs.Config.Rootless bool` into `RootlessEUID bool` and
`RootlessCgroups bool`, so as to make "runc-in-userns" to be more compatible with "rootful" runc.
`RootlessEUID` denotes that runc is being executed as a non-root user (euid != 0) in
the current user namespace. `RootlessEUID` is almost identical to the former `Rootless`
except cgroups stuff.
`RootlessCgroups` denotes that runc is unlikely to have the full access to cgroups.
`RootlessCgroups` is set to false if runc is executed as the root (euid == 0) in the initial namespace.
Otherwise `RootlessCgroups` is set to true.
(Hint: if `RootlessEUID` is true, `RootlessCgroups` becomes true as well)
When runc is executed as the root (euid == 0) in an user namespace (e.g. by Docker-in-LXD, Podman, Usernetes),
`RootlessEUID` is set to false but `RootlessCgroups` is set to true.
So, "runc-in-userns" behaves almost same as "rootful" runc except that cgroups errors are ignored.
This PR does not have any impact on CLI flags and `state.json`.
Note about CLI:
* Now `runc --rootless=(auto|true|false)` CLI flag is only used for setting `RootlessCgroups`.
* Now `runc spec --rootless` is only required when `RootlessEUID` is set to true.
For runc-in-userns, `runc spec` without `--rootless` should work, when sufficient numbers of
UID/GID are mapped.
Note about `$XDG_RUNTIME_DIR` (e.g. `/run/user/1000`):
* `$XDG_RUNTIME_DIR` is ignored if runc is being executed as the root (euid == 0) in the initial namespace, for backward compatibility.
(`/run/runc` is used)
* If runc is executed as the root (euid == 0) in an user namespace, `$XDG_RUNTIME_DIR` is honored if `$USER != "" && $USER != "root"`.
This allows unprivileged users to allow execute runc as the root in userns, without mounting writable `/run/runc`.
Note about `state.json`:
* `rootless` is set to true when `RootlessEUID == true && RootlessCgroups == true`.
Signed-off-by: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
2018-07-05 14:28:21 +08:00
|
|
|
"github.com/opencontainers/runc/libcontainer/system"
|
2016-05-26 02:27:34 +08:00
|
|
|
"github.com/opencontainers/runtime-spec/specs-go"
|
Disable rootless mode except RootlessCgMgr when executed as the root in userns
This PR decomposes `libcontainer/configs.Config.Rootless bool` into `RootlessEUID bool` and
`RootlessCgroups bool`, so as to make "runc-in-userns" to be more compatible with "rootful" runc.
`RootlessEUID` denotes that runc is being executed as a non-root user (euid != 0) in
the current user namespace. `RootlessEUID` is almost identical to the former `Rootless`
except cgroups stuff.
`RootlessCgroups` denotes that runc is unlikely to have the full access to cgroups.
`RootlessCgroups` is set to false if runc is executed as the root (euid == 0) in the initial namespace.
Otherwise `RootlessCgroups` is set to true.
(Hint: if `RootlessEUID` is true, `RootlessCgroups` becomes true as well)
When runc is executed as the root (euid == 0) in an user namespace (e.g. by Docker-in-LXD, Podman, Usernetes),
`RootlessEUID` is set to false but `RootlessCgroups` is set to true.
So, "runc-in-userns" behaves almost same as "rootful" runc except that cgroups errors are ignored.
This PR does not have any impact on CLI flags and `state.json`.
Note about CLI:
* Now `runc --rootless=(auto|true|false)` CLI flag is only used for setting `RootlessCgroups`.
* Now `runc spec --rootless` is only required when `RootlessEUID` is set to true.
For runc-in-userns, `runc spec` without `--rootless` should work, when sufficient numbers of
UID/GID are mapped.
Note about `$XDG_RUNTIME_DIR` (e.g. `/run/user/1000`):
* `$XDG_RUNTIME_DIR` is ignored if runc is being executed as the root (euid == 0) in the initial namespace, for backward compatibility.
(`/run/runc` is used)
* If runc is executed as the root (euid == 0) in an user namespace, `$XDG_RUNTIME_DIR` is honored if `$USER != "" && $USER != "root"`.
This allows unprivileged users to allow execute runc as the root in userns, without mounting writable `/run/runc`.
Note about `state.json`:
* `rootless` is set to true when `RootlessEUID == true && RootlessCgroups == true`.
Signed-off-by: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
2018-07-05 14:28:21 +08:00
|
|
|
"github.com/sirupsen/logrus"
|
2016-06-07 02:45:46 +08:00
|
|
|
"github.com/urfave/cli"
|
2017-05-11 23:06:37 +08:00
|
|
|
|
|
|
|
"golang.org/x/sys/unix"
|
2015-06-22 10:31:12 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
var checkpointCommand = cli.Command{
|
|
|
|
Name: "checkpoint",
|
|
|
|
Usage: "checkpoint a running container",
|
2016-02-11 01:30:06 +08:00
|
|
|
ArgsUsage: `<container-id>
|
|
|
|
|
|
|
|
Where "<container-id>" is the name for the instance of the container to be
|
|
|
|
checkpointed.`,
|
|
|
|
Description: `The checkpoint command saves the state of the container instance.`,
|
2015-06-22 10:31:12 +08:00
|
|
|
Flags: []cli.Flag{
|
|
|
|
cli.StringFlag{Name: "image-path", Value: "", Usage: "path for saving criu image files"},
|
|
|
|
cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"},
|
2016-08-24 17:48:56 +08:00
|
|
|
cli.StringFlag{Name: "parent-path", Value: "", Usage: "path for previous criu image files in pre-dump"},
|
2015-06-22 10:31:12 +08:00
|
|
|
cli.BoolFlag{Name: "leave-running", Usage: "leave the process running after checkpointing"},
|
|
|
|
cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"},
|
|
|
|
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
|
|
|
|
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
|
2017-07-24 23:43:14 +08:00
|
|
|
cli.BoolFlag{Name: "lazy-pages", Usage: "use userfaultfd to lazily restore memory pages"},
|
runc checkpoint: fix --status-fd to accept fd
1. The command `runc checkpoint --lazy-server --status-fd $FD` actually
accepts a file name as an $FD. Make it accept a file descriptor,
like its name implies and the documentation states.
In addition, since runc itself does not use the result of CRIU status
fd, remove the code which relays it, and pass the FD directly to CRIU.
Note 1: runc should close this file descriptor itself after passing it
to criu, otherwise whoever waits on it might wait forever.
Note 2: due to the way criu swrk consumes the fd (it reopens
/proc/$SENDER_PID/fd/$FD), runc can't close it as soon as criu swrk has
started. There is no good way to know when criu swrk has reopened the
fd, so we assume that as soon as we have received something back, the
fd is already reopened.
2. Since the meaning of --status-fd has changed, the test case using
it needs to be fixed as well.
Modify the lazy migration test to remove "sleep 2", actually waiting
for the the lazy page server to be ready.
While at it,
- remove the double fork (using shell's background process is
sufficient here);
- check the exit code for "runc checkpoint" and "criu lazy-pages";
- remove the check for no errors in dump.log after restore, as we
are already checking its exit code.
[v2: properly close status fd after spawning criu]
[v3: move close status fd to after the first read]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-04-21 17:43:24 +08:00
|
|
|
cli.IntFlag{Name: "status-fd", Value: -1, Usage: "criu writes \\0 to this FD once lazy-pages is ready"},
|
2015-06-22 10:31:12 +08:00
|
|
|
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
|
2015-06-27 17:56:24 +08:00
|
|
|
cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
|
2016-08-24 17:48:56 +08:00
|
|
|
cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"},
|
2016-05-28 13:33:57 +08:00
|
|
|
cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'"},
|
2017-04-21 10:41:02 +08:00
|
|
|
cli.StringSliceFlag{Name: "empty-ns", Usage: "create a namespace, but don't restore its properties"},
|
2017-08-18 22:19:21 +08:00
|
|
|
cli.BoolFlag{Name: "auto-dedup", Usage: "enable auto deduplication of memory images"},
|
2015-06-22 10:31:12 +08:00
|
|
|
},
|
2016-05-10 13:58:09 +08:00
|
|
|
Action: func(context *cli.Context) error {
|
2016-10-28 23:43:10 +08:00
|
|
|
if err := checkArgs(context, 1, exactArgs); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-04-23 21:39:42 +08:00
|
|
|
// XXX: Currently this is untested with rootless containers.
|
Disable rootless mode except RootlessCgMgr when executed as the root in userns
This PR decomposes `libcontainer/configs.Config.Rootless bool` into `RootlessEUID bool` and
`RootlessCgroups bool`, so as to make "runc-in-userns" to be more compatible with "rootful" runc.
`RootlessEUID` denotes that runc is being executed as a non-root user (euid != 0) in
the current user namespace. `RootlessEUID` is almost identical to the former `Rootless`
except cgroups stuff.
`RootlessCgroups` denotes that runc is unlikely to have the full access to cgroups.
`RootlessCgroups` is set to false if runc is executed as the root (euid == 0) in the initial namespace.
Otherwise `RootlessCgroups` is set to true.
(Hint: if `RootlessEUID` is true, `RootlessCgroups` becomes true as well)
When runc is executed as the root (euid == 0) in an user namespace (e.g. by Docker-in-LXD, Podman, Usernetes),
`RootlessEUID` is set to false but `RootlessCgroups` is set to true.
So, "runc-in-userns" behaves almost same as "rootful" runc except that cgroups errors are ignored.
This PR does not have any impact on CLI flags and `state.json`.
Note about CLI:
* Now `runc --rootless=(auto|true|false)` CLI flag is only used for setting `RootlessCgroups`.
* Now `runc spec --rootless` is only required when `RootlessEUID` is set to true.
For runc-in-userns, `runc spec` without `--rootless` should work, when sufficient numbers of
UID/GID are mapped.
Note about `$XDG_RUNTIME_DIR` (e.g. `/run/user/1000`):
* `$XDG_RUNTIME_DIR` is ignored if runc is being executed as the root (euid == 0) in the initial namespace, for backward compatibility.
(`/run/runc` is used)
* If runc is executed as the root (euid == 0) in an user namespace, `$XDG_RUNTIME_DIR` is honored if `$USER != "" && $USER != "root"`.
This allows unprivileged users to allow execute runc as the root in userns, without mounting writable `/run/runc`.
Note about `state.json`:
* `rootless` is set to true when `RootlessEUID == true && RootlessCgroups == true`.
Signed-off-by: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
2018-07-05 14:28:21 +08:00
|
|
|
if os.Geteuid() != 0 || system.RunningInUserNS() {
|
|
|
|
logrus.Warn("runc checkpoint is untested with rootless containers")
|
2016-04-23 21:39:42 +08:00
|
|
|
}
|
|
|
|
|
2015-06-22 10:31:12 +08:00
|
|
|
container, err := getContainer(context)
|
|
|
|
if err != nil {
|
2016-05-10 13:58:09 +08:00
|
|
|
return err
|
2015-06-22 10:31:12 +08:00
|
|
|
}
|
2016-09-25 23:39:23 +08:00
|
|
|
status, err := container.Status()
|
|
|
|
if err != nil {
|
2016-10-19 02:37:18 +08:00
|
|
|
return err
|
2016-09-25 23:39:23 +08:00
|
|
|
}
|
2017-12-07 15:43:56 +08:00
|
|
|
if status == libcontainer.Created || status == libcontainer.Stopped {
|
2020-05-03 07:18:30 +08:00
|
|
|
fatal(fmt.Errorf("Container cannot be checkpointed in %s state", status.String()))
|
2016-09-25 23:39:23 +08:00
|
|
|
}
|
2015-06-22 10:31:12 +08:00
|
|
|
options := criuOptions(context)
|
2020-04-19 09:20:34 +08:00
|
|
|
if !(options.LeaveRunning || options.PreDump) {
|
|
|
|
// destroy container unless we tell CRIU to keep it
|
2020-03-18 01:17:07 +08:00
|
|
|
defer destroy(container)
|
|
|
|
}
|
2015-06-22 10:31:12 +08:00
|
|
|
// these are the mandatory criu options for a container
|
|
|
|
setPageServer(context, options)
|
2015-08-06 23:14:59 +08:00
|
|
|
setManageCgroupsMode(context, options)
|
2016-05-26 02:27:34 +08:00
|
|
|
if err := setEmptyNsMask(context, options); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2018-10-14 03:14:03 +08:00
|
|
|
return container.Checkpoint(options)
|
2015-06-22 10:31:12 +08:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
func getCheckpointImagePath(context *cli.Context) string {
|
|
|
|
imagePath := context.String("image-path")
|
|
|
|
if imagePath == "" {
|
|
|
|
imagePath = getDefaultImagePath(context)
|
|
|
|
}
|
|
|
|
return imagePath
|
|
|
|
}
|
|
|
|
|
|
|
|
func setPageServer(context *cli.Context, options *libcontainer.CriuOpts) {
|
|
|
|
// xxx following criu opts are optional
|
|
|
|
// The dump image can be sent to a criu page server
|
|
|
|
if psOpt := context.String("page-server"); psOpt != "" {
|
|
|
|
addressPort := strings.Split(psOpt, ":")
|
|
|
|
if len(addressPort) != 2 {
|
2020-05-17 08:20:44 +08:00
|
|
|
fatal(errors.New("Use --page-server ADDRESS:PORT to specify page server"))
|
2015-06-22 10:31:12 +08:00
|
|
|
}
|
2015-08-05 05:44:45 +08:00
|
|
|
portInt, err := strconv.Atoi(addressPort[1])
|
2015-06-22 10:31:12 +08:00
|
|
|
if err != nil {
|
2020-05-17 08:20:44 +08:00
|
|
|
fatal(errors.New("Invalid port number"))
|
2015-06-22 10:31:12 +08:00
|
|
|
}
|
|
|
|
options.PageServer = libcontainer.CriuPageServerInfo{
|
|
|
|
Address: addressPort[0],
|
2015-08-05 05:44:45 +08:00
|
|
|
Port: int32(portInt),
|
2015-06-22 10:31:12 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-08-06 23:14:59 +08:00
|
|
|
|
|
|
|
func setManageCgroupsMode(context *cli.Context, options *libcontainer.CriuOpts) {
|
|
|
|
if cgOpt := context.String("manage-cgroups-mode"); cgOpt != "" {
|
|
|
|
switch cgOpt {
|
|
|
|
case "soft":
|
2020-08-10 10:25:53 +08:00
|
|
|
options.ManageCgroupsMode = criu.CriuCgMode_SOFT
|
2015-08-06 23:14:59 +08:00
|
|
|
case "full":
|
2020-08-10 10:25:53 +08:00
|
|
|
options.ManageCgroupsMode = criu.CriuCgMode_FULL
|
2015-08-06 23:14:59 +08:00
|
|
|
case "strict":
|
2020-08-10 10:25:53 +08:00
|
|
|
options.ManageCgroupsMode = criu.CriuCgMode_STRICT
|
2015-08-06 23:14:59 +08:00
|
|
|
default:
|
2020-05-17 08:20:44 +08:00
|
|
|
fatal(errors.New("Invalid manage cgroups mode"))
|
2015-08-06 23:14:59 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2016-05-26 02:27:34 +08:00
|
|
|
|
2016-12-17 13:01:53 +08:00
|
|
|
var namespaceMapping = map[specs.LinuxNamespaceType]int{
|
2017-05-11 23:06:37 +08:00
|
|
|
specs.NetworkNamespace: unix.CLONE_NEWNET,
|
2016-05-26 02:27:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func setEmptyNsMask(context *cli.Context, options *libcontainer.CriuOpts) error {
|
2018-07-11 00:53:34 +08:00
|
|
|
/* Runc doesn't manage network devices and their configuration */
|
|
|
|
nsmask := unix.CLONE_NEWNET
|
2016-05-26 02:27:34 +08:00
|
|
|
|
|
|
|
for _, ns := range context.StringSlice("empty-ns") {
|
2016-12-17 13:01:53 +08:00
|
|
|
f, exists := namespaceMapping[specs.LinuxNamespaceType(ns)]
|
2016-05-26 02:27:34 +08:00
|
|
|
if !exists {
|
|
|
|
return fmt.Errorf("namespace %q is not supported", ns)
|
|
|
|
}
|
|
|
|
nsmask |= f
|
|
|
|
}
|
|
|
|
|
|
|
|
options.EmptyNs = uint32(nsmask)
|
|
|
|
return nil
|
|
|
|
}
|