2015-06-30 07:49:13 +08:00
|
|
|
// +build linux
|
|
|
|
|
2015-06-22 10:31:12 +08:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"os"
|
|
|
|
|
|
|
|
"github.com/opencontainers/runc/libcontainer"
|
Disable rootless mode except RootlessCgMgr when executed as the root in userns
This PR decomposes `libcontainer/configs.Config.Rootless bool` into `RootlessEUID bool` and
`RootlessCgroups bool`, so as to make "runc-in-userns" to be more compatible with "rootful" runc.
`RootlessEUID` denotes that runc is being executed as a non-root user (euid != 0) in
the current user namespace. `RootlessEUID` is almost identical to the former `Rootless`
except cgroups stuff.
`RootlessCgroups` denotes that runc is unlikely to have the full access to cgroups.
`RootlessCgroups` is set to false if runc is executed as the root (euid == 0) in the initial namespace.
Otherwise `RootlessCgroups` is set to true.
(Hint: if `RootlessEUID` is true, `RootlessCgroups` becomes true as well)
When runc is executed as the root (euid == 0) in an user namespace (e.g. by Docker-in-LXD, Podman, Usernetes),
`RootlessEUID` is set to false but `RootlessCgroups` is set to true.
So, "runc-in-userns" behaves almost same as "rootful" runc except that cgroups errors are ignored.
This PR does not have any impact on CLI flags and `state.json`.
Note about CLI:
* Now `runc --rootless=(auto|true|false)` CLI flag is only used for setting `RootlessCgroups`.
* Now `runc spec --rootless` is only required when `RootlessEUID` is set to true.
For runc-in-userns, `runc spec` without `--rootless` should work, when sufficient numbers of
UID/GID are mapped.
Note about `$XDG_RUNTIME_DIR` (e.g. `/run/user/1000`):
* `$XDG_RUNTIME_DIR` is ignored if runc is being executed as the root (euid == 0) in the initial namespace, for backward compatibility.
(`/run/runc` is used)
* If runc is executed as the root (euid == 0) in an user namespace, `$XDG_RUNTIME_DIR` is honored if `$USER != "" && $USER != "root"`.
This allows unprivileged users to allow execute runc as the root in userns, without mounting writable `/run/runc`.
Note about `state.json`:
* `rootless` is set to true when `RootlessEUID == true && RootlessCgroups == true`.
Signed-off-by: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
2018-07-05 14:28:21 +08:00
|
|
|
"github.com/opencontainers/runc/libcontainer/system"
|
|
|
|
"github.com/sirupsen/logrus"
|
2016-06-07 02:45:46 +08:00
|
|
|
"github.com/urfave/cli"
|
2015-06-22 10:31:12 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
var restoreCommand = cli.Command{
|
|
|
|
Name: "restore",
|
|
|
|
Usage: "restore a container from a previous checkpoint",
|
2016-02-11 01:30:06 +08:00
|
|
|
ArgsUsage: `<container-id>
|
|
|
|
|
|
|
|
Where "<container-id>" is the name for the instance of the container to be
|
|
|
|
restored.`,
|
|
|
|
Description: `Restores the saved state of the container instance that was previously saved
|
|
|
|
using the runc checkpoint command.`,
|
2015-06-22 10:31:12 +08:00
|
|
|
Flags: []cli.Flag{
|
2017-03-02 16:02:15 +08:00
|
|
|
cli.StringFlag{
|
|
|
|
Name: "console-socket",
|
|
|
|
Value: "",
|
|
|
|
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
|
|
|
|
},
|
2015-09-02 00:32:29 +08:00
|
|
|
cli.StringFlag{
|
|
|
|
Name: "image-path",
|
|
|
|
Value: "",
|
|
|
|
Usage: "path to criu image files for restoring",
|
|
|
|
},
|
|
|
|
cli.StringFlag{
|
|
|
|
Name: "work-path",
|
|
|
|
Value: "",
|
|
|
|
Usage: "path for saving work files and logs",
|
|
|
|
},
|
|
|
|
cli.BoolFlag{
|
|
|
|
Name: "tcp-established",
|
|
|
|
Usage: "allow open tcp connections",
|
|
|
|
},
|
|
|
|
cli.BoolFlag{
|
|
|
|
Name: "ext-unix-sk",
|
|
|
|
Usage: "allow external unix sockets",
|
|
|
|
},
|
|
|
|
cli.BoolFlag{
|
|
|
|
Name: "shell-job",
|
|
|
|
Usage: "allow shell jobs",
|
|
|
|
},
|
|
|
|
cli.BoolFlag{
|
|
|
|
Name: "file-locks",
|
|
|
|
Usage: "handle file locks, for safety",
|
|
|
|
},
|
2015-08-06 23:14:59 +08:00
|
|
|
cli.StringFlag{
|
|
|
|
Name: "manage-cgroups-mode",
|
|
|
|
Value: "",
|
2016-05-28 13:33:57 +08:00
|
|
|
Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'",
|
2015-08-06 23:14:59 +08:00
|
|
|
},
|
2015-09-02 00:32:29 +08:00
|
|
|
cli.StringFlag{
|
2015-10-28 03:23:44 +08:00
|
|
|
Name: "bundle, b",
|
|
|
|
Value: "",
|
|
|
|
Usage: "path to the root of the bundle directory",
|
2015-09-02 00:32:29 +08:00
|
|
|
},
|
2016-02-02 07:00:09 +08:00
|
|
|
cli.BoolFlag{
|
|
|
|
Name: "detach,d",
|
|
|
|
Usage: "detach from the container's process",
|
|
|
|
},
|
|
|
|
cli.StringFlag{
|
|
|
|
Name: "pid-file",
|
|
|
|
Value: "",
|
|
|
|
Usage: "specify the file to write the process id to",
|
|
|
|
},
|
2016-03-15 06:46:08 +08:00
|
|
|
cli.BoolFlag{
|
|
|
|
Name: "no-subreaper",
|
|
|
|
Usage: "disable the use of the subreaper used to reap reparented processes",
|
|
|
|
},
|
2016-03-31 02:12:03 +08:00
|
|
|
cli.BoolFlag{
|
|
|
|
Name: "no-pivot",
|
|
|
|
Usage: "do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk",
|
|
|
|
},
|
2016-06-08 01:22:40 +08:00
|
|
|
cli.StringSliceFlag{
|
|
|
|
Name: "empty-ns",
|
2017-04-21 10:41:02 +08:00
|
|
|
Usage: "create a namespace, but don't restore its properties",
|
2016-06-08 01:22:40 +08:00
|
|
|
},
|
2017-08-18 22:19:21 +08:00
|
|
|
cli.BoolFlag{
|
|
|
|
Name: "auto-dedup",
|
|
|
|
Usage: "enable auto deduplication of memory images",
|
|
|
|
},
|
2017-07-24 23:43:14 +08:00
|
|
|
cli.BoolFlag{
|
|
|
|
Name: "lazy-pages",
|
|
|
|
Usage: "use userfaultfd to lazily restore memory pages",
|
|
|
|
},
|
2015-06-22 10:31:12 +08:00
|
|
|
},
|
2016-05-10 13:58:09 +08:00
|
|
|
Action: func(context *cli.Context) error {
|
2016-10-28 23:43:10 +08:00
|
|
|
if err := checkArgs(context, 1, exactArgs); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-04-23 21:39:42 +08:00
|
|
|
// XXX: Currently this is untested with rootless containers.
|
Disable rootless mode except RootlessCgMgr when executed as the root in userns
This PR decomposes `libcontainer/configs.Config.Rootless bool` into `RootlessEUID bool` and
`RootlessCgroups bool`, so as to make "runc-in-userns" to be more compatible with "rootful" runc.
`RootlessEUID` denotes that runc is being executed as a non-root user (euid != 0) in
the current user namespace. `RootlessEUID` is almost identical to the former `Rootless`
except cgroups stuff.
`RootlessCgroups` denotes that runc is unlikely to have the full access to cgroups.
`RootlessCgroups` is set to false if runc is executed as the root (euid == 0) in the initial namespace.
Otherwise `RootlessCgroups` is set to true.
(Hint: if `RootlessEUID` is true, `RootlessCgroups` becomes true as well)
When runc is executed as the root (euid == 0) in an user namespace (e.g. by Docker-in-LXD, Podman, Usernetes),
`RootlessEUID` is set to false but `RootlessCgroups` is set to true.
So, "runc-in-userns" behaves almost same as "rootful" runc except that cgroups errors are ignored.
This PR does not have any impact on CLI flags and `state.json`.
Note about CLI:
* Now `runc --rootless=(auto|true|false)` CLI flag is only used for setting `RootlessCgroups`.
* Now `runc spec --rootless` is only required when `RootlessEUID` is set to true.
For runc-in-userns, `runc spec` without `--rootless` should work, when sufficient numbers of
UID/GID are mapped.
Note about `$XDG_RUNTIME_DIR` (e.g. `/run/user/1000`):
* `$XDG_RUNTIME_DIR` is ignored if runc is being executed as the root (euid == 0) in the initial namespace, for backward compatibility.
(`/run/runc` is used)
* If runc is executed as the root (euid == 0) in an user namespace, `$XDG_RUNTIME_DIR` is honored if `$USER != "" && $USER != "root"`.
This allows unprivileged users to allow execute runc as the root in userns, without mounting writable `/run/runc`.
Note about `state.json`:
* `rootless` is set to true when `RootlessEUID == true && RootlessCgroups == true`.
Signed-off-by: Akihiro Suda <suda.akihiro@lab.ntt.co.jp>
2018-07-05 14:28:21 +08:00
|
|
|
if os.Geteuid() != 0 || system.RunningInUserNS() {
|
|
|
|
logrus.Warn("runc checkpoint is untested with rootless containers")
|
2016-04-23 21:39:42 +08:00
|
|
|
}
|
|
|
|
|
2017-03-02 15:48:00 +08:00
|
|
|
spec, err := setupSpec(context)
|
2015-06-22 10:31:12 +08:00
|
|
|
if err != nil {
|
2016-05-10 13:58:09 +08:00
|
|
|
return err
|
2015-06-22 10:31:12 +08:00
|
|
|
}
|
2017-03-02 15:48:00 +08:00
|
|
|
options := criuOptions(context)
|
2018-08-17 16:03:21 +08:00
|
|
|
if err := setEmptyNsMask(context, options); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2017-03-02 15:48:00 +08:00
|
|
|
status, err := startContainer(context, spec, CT_ACT_RESTORE, options)
|
2015-06-22 10:31:12 +08:00
|
|
|
if err != nil {
|
2016-05-10 13:58:09 +08:00
|
|
|
return err
|
2015-06-22 10:31:12 +08:00
|
|
|
}
|
2017-03-02 15:48:00 +08:00
|
|
|
// exit with the container's exit status so any external supervisor is
|
|
|
|
// notified of the exit with the correct exit status.
|
|
|
|
os.Exit(status)
|
|
|
|
return nil
|
2015-06-22 10:31:12 +08:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
func criuOptions(context *cli.Context) *libcontainer.CriuOpts {
|
|
|
|
imagePath := getCheckpointImagePath(context)
|
2019-03-11 23:49:52 +08:00
|
|
|
if err := os.MkdirAll(imagePath, 0755); err != nil {
|
2015-06-22 10:31:12 +08:00
|
|
|
fatal(err)
|
|
|
|
}
|
|
|
|
return &libcontainer.CriuOpts{
|
|
|
|
ImagesDirectory: imagePath,
|
|
|
|
WorkDirectory: context.String("work-path"),
|
2016-08-24 17:48:56 +08:00
|
|
|
ParentImage: context.String("parent-path"),
|
2015-06-22 10:31:12 +08:00
|
|
|
LeaveRunning: context.Bool("leave-running"),
|
2015-08-22 06:59:43 +08:00
|
|
|
TcpEstablished: context.Bool("tcp-established"),
|
2015-06-22 10:31:12 +08:00
|
|
|
ExternalUnixConnections: context.Bool("ext-unix-sk"),
|
|
|
|
ShellJob: context.Bool("shell-job"),
|
2015-06-27 17:56:24 +08:00
|
|
|
FileLocks: context.Bool("file-locks"),
|
2016-08-24 17:48:56 +08:00
|
|
|
PreDump: context.Bool("pre-dump"),
|
2017-08-18 22:19:21 +08:00
|
|
|
AutoDedup: context.Bool("auto-dedup"),
|
2017-07-24 23:43:14 +08:00
|
|
|
LazyPages: context.Bool("lazy-pages"),
|
runc checkpoint: fix --status-fd to accept fd
1. The command `runc checkpoint --lazy-server --status-fd $FD` actually
accepts a file name as an $FD. Make it accept a file descriptor,
like its name implies and the documentation states.
In addition, since runc itself does not use the result of CRIU status
fd, remove the code which relays it, and pass the FD directly to CRIU.
Note 1: runc should close this file descriptor itself after passing it
to criu, otherwise whoever waits on it might wait forever.
Note 2: due to the way criu swrk consumes the fd (it reopens
/proc/$SENDER_PID/fd/$FD), runc can't close it as soon as criu swrk has
started. There is no good way to know when criu swrk has reopened the
fd, so we assume that as soon as we have received something back, the
fd is already reopened.
2. Since the meaning of --status-fd has changed, the test case using
it needs to be fixed as well.
Modify the lazy migration test to remove "sleep 2", actually waiting
for the the lazy page server to be ready.
While at it,
- remove the double fork (using shell's background process is
sufficient here);
- check the exit code for "runc checkpoint" and "criu lazy-pages";
- remove the check for no errors in dump.log after restore, as we
are already checking its exit code.
[v2: properly close status fd after spawning criu]
[v3: move close status fd to after the first read]
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2020-04-21 17:43:24 +08:00
|
|
|
StatusFd: context.Int("status-fd"),
|
2015-06-22 10:31:12 +08:00
|
|
|
}
|
|
|
|
}
|