new-api: execute a process inside an existing container
A new constructor function (like nsenter) is added in this patch. This function gets arguments from environment variables and its behaviour doesn't depend on a command line arguments. A program which calls factory.StartInitialization() must import the nsenter package. It looks ugly, but I don't know another way how to enter into CT from a go code. Signed-off-by: Andrey Vagin <avagin@openvz.org>
This commit is contained in:
parent
11ce56a9e0
commit
d572094b75
|
@ -41,12 +41,7 @@ func (c *linuxContainer) RunState() (configs.RunState, error) {
|
||||||
// return Running if the init process is alive
|
// return Running if the init process is alive
|
||||||
err := syscall.Kill(c.state.InitPid, 0)
|
err := syscall.Kill(c.state.InitPid, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errn, y := err.(syscall.Errno)
|
if err == syscall.ESRCH {
|
||||||
if !y {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if errn == syscall.ESRCH {
|
|
||||||
return configs.Destroyed, nil
|
return configs.Destroyed, nil
|
||||||
}
|
}
|
||||||
return 0, err
|
return 0, err
|
||||||
|
@ -82,18 +77,32 @@ func (c *linuxContainer) Stats() (*ContainerStats, error) {
|
||||||
return stats, nil
|
return stats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *linuxContainer) StartProcess(pconfig *ProcessConfig) (int, error) {
|
func (c *linuxContainer) StartProcess(config *ProcessConfig) (int, error) {
|
||||||
state, err := c.RunState()
|
state, err := c.RunState()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return -1, err
|
return -1, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if state != configs.Destroyed {
|
cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...)
|
||||||
glog.Info("start new container process")
|
cmd.Stdin = config.Stdin
|
||||||
panic("not implemented")
|
cmd.Stdout = config.Stdout
|
||||||
|
cmd.Stderr = config.Stderr
|
||||||
|
|
||||||
|
cmd.Env = config.Env
|
||||||
|
cmd.Dir = c.config.RootFs
|
||||||
|
|
||||||
|
if cmd.SysProcAttr == nil {
|
||||||
|
cmd.SysProcAttr = &syscall.SysProcAttr{}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := c.startInitProcess(pconfig); err != nil {
|
cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
|
||||||
|
|
||||||
|
if state != configs.Destroyed {
|
||||||
|
glog.Info("start new container process")
|
||||||
|
return namespaces.ExecIn(config.Args, config.Env, cmd, c.config, c.state)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := c.startInitProcess(cmd, config); err != nil {
|
||||||
return -1, err
|
return -1, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,21 +132,8 @@ func (c *linuxContainer) updateStateFile() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *linuxContainer) startInitProcess(config *ProcessConfig) error {
|
func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, config *ProcessConfig) error {
|
||||||
cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...)
|
|
||||||
cmd.Stdin = config.Stdin
|
|
||||||
cmd.Stdout = config.Stdout
|
|
||||||
cmd.Stderr = config.Stderr
|
|
||||||
|
|
||||||
cmd.Env = config.Env
|
|
||||||
cmd.Dir = c.config.RootFs
|
|
||||||
|
|
||||||
if cmd.SysProcAttr == nil {
|
|
||||||
cmd.SysProcAttr = &syscall.SysProcAttr{}
|
|
||||||
}
|
|
||||||
|
|
||||||
cmd.SysProcAttr.Cloneflags = uintptr(namespaces.GetNamespaceFlags(c.config.Namespaces))
|
cmd.SysProcAttr.Cloneflags = uintptr(namespaces.GetNamespaceFlags(c.config.Namespaces))
|
||||||
cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
|
|
||||||
|
|
||||||
err := namespaces.Exec(config.Args, config.Env, cmd, c.config, c.state)
|
err := namespaces.Exec(config.Args, config.Env, cmd, c.config, c.state)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -167,5 +167,10 @@ func (l *linuxFactory) loadContainerState(root string) (*configs.State, error) {
|
||||||
func (f *linuxFactory) StartInitialization(pipefd uintptr) (err error) {
|
func (f *linuxFactory) StartInitialization(pipefd uintptr) (err error) {
|
||||||
pipe := os.NewFile(uintptr(pipefd), "pipe")
|
pipe := os.NewFile(uintptr(pipefd), "pipe")
|
||||||
|
|
||||||
|
pid := os.Getenv("_LIBCONTAINER_INITPID")
|
||||||
|
if pid != "" {
|
||||||
|
return namespaces.InitIn(pipe)
|
||||||
|
}
|
||||||
|
|
||||||
return namespaces.Init(pipe)
|
return namespaces.Init(pipe)
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,12 +5,9 @@ package namespaces
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
|
||||||
"strconv"
|
|
||||||
"syscall"
|
|
||||||
|
|
||||||
"github.com/docker/libcontainer/apparmor"
|
"github.com/docker/libcontainer/apparmor"
|
||||||
"github.com/docker/libcontainer/cgroups"
|
"github.com/docker/libcontainer/cgroups"
|
||||||
|
@ -19,27 +16,10 @@ import (
|
||||||
"github.com/docker/libcontainer/system"
|
"github.com/docker/libcontainer/system"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ExecIn reexec's the initPath with the argv 0 rewrite to "nsenter" so that it is able to run the
|
// ExecIn reexec's cmd with _LIBCONTAINER_INITPID=PID so that it is able to run the
|
||||||
// setns code in a single threaded environment joining the existing containers' namespaces.
|
// setns code in a single threaded environment joining the existing containers' namespaces.
|
||||||
func ExecIn(container *configs.Config, state *configs.State, userArgs []string, initPath, action string,
|
func ExecIn(args []string, env []string, cmd *exec.Cmd, container *configs.Config, state *configs.State) (int, error) {
|
||||||
stdin io.Reader, stdout, stderr io.Writer, console string, startCallback func(*exec.Cmd)) (int, error) {
|
var err error
|
||||||
|
|
||||||
args := []string{fmt.Sprintf("nsenter-%s", action), "--nspid", strconv.Itoa(state.InitPid)}
|
|
||||||
|
|
||||||
if console != "" {
|
|
||||||
args = append(args, "--console", console)
|
|
||||||
}
|
|
||||||
|
|
||||||
cmd := &exec.Cmd{
|
|
||||||
Path: initPath,
|
|
||||||
Args: append(args, append([]string{"--"}, userArgs...)...),
|
|
||||||
}
|
|
||||||
|
|
||||||
if filepath.Base(initPath) == initPath {
|
|
||||||
if lp, err := exec.LookPath(initPath); err == nil {
|
|
||||||
cmd.Path = lp
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
parent, child, err := newInitPipe()
|
parent, child, err := newInitPipe()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -47,13 +27,8 @@ func ExecIn(container *configs.Config, state *configs.State, userArgs []string,
|
||||||
}
|
}
|
||||||
defer parent.Close()
|
defer parent.Close()
|
||||||
|
|
||||||
// Note: these are only used in non-tty mode
|
|
||||||
// if there is a tty for the container it will be opened within the namespace and the
|
|
||||||
// fds will be duped to stdin, stdiout, and stderr
|
|
||||||
cmd.Stdin = stdin
|
|
||||||
cmd.Stdout = stdout
|
|
||||||
cmd.Stderr = stderr
|
|
||||||
cmd.ExtraFiles = []*os.File{child}
|
cmd.ExtraFiles = []*os.File{child}
|
||||||
|
cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", state.InitPid))
|
||||||
|
|
||||||
if err := cmd.Start(); err != nil {
|
if err := cmd.Start(); err != nil {
|
||||||
child.Close()
|
child.Close()
|
||||||
|
@ -68,6 +43,20 @@ func ExecIn(container *configs.Config, state *configs.State, userArgs []string,
|
||||||
return -1, terr
|
return -1, terr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
encoder := json.NewEncoder(parent)
|
||||||
|
|
||||||
|
if err := encoder.Encode(container); err != nil {
|
||||||
|
return terminate(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
process := processArgs{
|
||||||
|
Env: append(env[0:], container.Env...),
|
||||||
|
Args: args,
|
||||||
|
}
|
||||||
|
if err := encoder.Encode(process); err != nil {
|
||||||
|
return terminate(err)
|
||||||
|
}
|
||||||
|
|
||||||
// Enter cgroups.
|
// Enter cgroups.
|
||||||
if err := EnterCgroups(state, cmd.Process.Pid); err != nil {
|
if err := EnterCgroups(state, cmd.Process.Pid); err != nil {
|
||||||
return terminate(err)
|
return terminate(err)
|
||||||
|
@ -77,21 +66,54 @@ func ExecIn(container *configs.Config, state *configs.State, userArgs []string,
|
||||||
return terminate(err)
|
return terminate(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if startCallback != nil {
|
return cmd.Process.Pid, nil
|
||||||
startCallback(cmd)
|
}
|
||||||
|
|
||||||
|
// Finalize entering into a container and execute a specified command
|
||||||
|
func InitIn(pipe *os.File) (err error) {
|
||||||
|
defer func() {
|
||||||
|
// if we have an error during the initialization of the container's init then send it back to the
|
||||||
|
// parent process in the form of an initError.
|
||||||
|
if err != nil {
|
||||||
|
// ensure that any data sent from the parent is consumed so it doesn't
|
||||||
|
// receive ECONNRESET when the child writes to the pipe.
|
||||||
|
ioutil.ReadAll(pipe)
|
||||||
|
if err := json.NewEncoder(pipe).Encode(initError{
|
||||||
|
Message: err.Error(),
|
||||||
|
}); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// ensure that this pipe is always closed
|
||||||
|
pipe.Close()
|
||||||
|
}()
|
||||||
|
|
||||||
|
decoder := json.NewDecoder(pipe)
|
||||||
|
|
||||||
|
var container *configs.Config
|
||||||
|
if err := decoder.Decode(&container); err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := cmd.Wait(); err != nil {
|
var process *processArgs
|
||||||
if _, ok := err.(*exec.ExitError); !ok {
|
if err := decoder.Decode(&process); err != nil {
|
||||||
return -1, err
|
return err
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil
|
|
||||||
|
if err := FinalizeSetns(container); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := system.Execv(process.Args[0], process.Args[0:], process.Env); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
panic("unreachable")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Finalize expects that the setns calls have been setup and that is has joined an
|
// Finalize expects that the setns calls have been setup and that is has joined an
|
||||||
// existing namespace
|
// existing namespace
|
||||||
func FinalizeSetns(container *configs.Config, args []string) error {
|
func FinalizeSetns(container *configs.Config) error {
|
||||||
// clear the current processes env and replace it with the environment defined on the container
|
// clear the current processes env and replace it with the environment defined on the container
|
||||||
if err := LoadContainerEnvironment(container); err != nil {
|
if err := LoadContainerEnvironment(container); err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -111,11 +133,7 @@ func FinalizeSetns(container *configs.Config, args []string) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := system.Execv(args[0], args[0:], os.Environ()); err != nil {
|
return nil
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
panic("unreachable")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func EnterCgroups(state *configs.State, pid int) error {
|
func EnterCgroups(state *configs.State, pid int) error {
|
||||||
|
|
|
@ -5,6 +5,7 @@ package nsenter
|
||||||
/*
|
/*
|
||||||
__attribute__((constructor)) init() {
|
__attribute__((constructor)) init() {
|
||||||
nsenter();
|
nsenter();
|
||||||
|
nsexec();
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
import "C"
|
import "C"
|
||||||
|
|
|
@ -0,0 +1,114 @@
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include <linux/limits.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <signal.h>
|
||||||
|
|
||||||
|
// Use raw setns syscall for versions of glibc that don't include it (namely glibc-2.12)
|
||||||
|
#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <sched.h>
|
||||||
|
#include "syscall.h"
|
||||||
|
#ifdef SYS_setns
|
||||||
|
int setns(int fd, int nstype)
|
||||||
|
{
|
||||||
|
return syscall(SYS_setns, fd, nstype);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void nsexec()
|
||||||
|
{
|
||||||
|
char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt" };
|
||||||
|
const int num = sizeof(namespaces) / sizeof(char *);
|
||||||
|
char buf[PATH_MAX], *val;
|
||||||
|
int child, i, tfd;
|
||||||
|
pid_t pid;
|
||||||
|
|
||||||
|
val = getenv("_LIBCONTAINER_INITPID");
|
||||||
|
if (val == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
pid = atoi(val);
|
||||||
|
snprintf(buf, sizeof(buf), "%d", pid);
|
||||||
|
if (strcmp(val, buf)) {
|
||||||
|
fprintf(stderr, "Unable to parse _LIBCONTAINER_INITPID");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check that the specified process exists */
|
||||||
|
snprintf(buf, PATH_MAX - 1, "/proc/%d/ns", pid);
|
||||||
|
tfd = open(buf, O_DIRECTORY | O_RDONLY);
|
||||||
|
if (tfd == -1) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"nsenter: Failed to open \"%s\" with error: \"%s\"\n",
|
||||||
|
buf, strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < num; i++) {
|
||||||
|
struct stat st;
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
/* Symlinks on all namespaces exist for dead processes, but they can't be opened */
|
||||||
|
if (fstatat(tfd, namespaces[i], &st, AT_SYMLINK_NOFOLLOW) == -1) {
|
||||||
|
// Ignore nonexistent namespaces.
|
||||||
|
if (errno == ENOENT)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
fd = openat(tfd, namespaces[i], O_RDONLY);
|
||||||
|
if (fd == -1) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"nsenter: Failed to open ns file \"%s\" for ns \"%s\" with error: \"%s\"\n",
|
||||||
|
buf, namespaces[i], strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
// Set the namespace.
|
||||||
|
if (setns(fd, 0) == -1) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"nsenter: Failed to setns for \"%s\" with error: \"%s\"\n",
|
||||||
|
namespaces[i], strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
child = fork();
|
||||||
|
if (child < 0) {
|
||||||
|
fprintf(stderr, "Unable to fork: %s", strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
// We must fork to actually enter the PID namespace.
|
||||||
|
if (child == 0) {
|
||||||
|
// Finish executing, let the Go runtime take over.
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
// Parent, wait for the child.
|
||||||
|
int status = 0;
|
||||||
|
if (waitpid(child, &status, 0) == -1) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"nsenter: Failed to waitpid with error: \"%s\"\n",
|
||||||
|
strerror(errno));
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
// Forward the child's exit code or re-send its death signal.
|
||||||
|
if (WIFEXITED(status)) {
|
||||||
|
exit(WEXITSTATUS(status));
|
||||||
|
} else if (WIFSIGNALED(status)) {
|
||||||
|
kill(getpid(), WTERMSIG(status));
|
||||||
|
}
|
||||||
|
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
|
@ -5,6 +5,7 @@ import (
|
||||||
|
|
||||||
"github.com/codegangsta/cli"
|
"github.com/codegangsta/cli"
|
||||||
"github.com/docker/libcontainer"
|
"github.com/docker/libcontainer"
|
||||||
|
_ "github.com/docker/libcontainer/namespaces/nsenter"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|
Loading…
Reference in New Issue