From d914bf73473868501bb744023c76be798cb6461d Mon Sep 17 00:00:00 2001 From: "Daniel, Dao Quang Minh" Date: Sat, 17 Oct 2015 15:14:26 +0000 Subject: [PATCH 1/2] setns: add bootstrap data add bootstrap data to setns process. If we have any bootstrap data then copy it to the bootstrap process (i.e. nsexec) using the sync pipe. This will allow us to eventually replace environment variable usage with more structured data to setup namespaces, write pid/gid map, setgroup etc. Signed-off-by: Daniel, Dao Quang Minh --- libcontainer/container_linux.go | 6 +++--- libcontainer/process_linux.go | 30 ++++++++++++++++++------------ 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 912673a3..0b7fdeb3 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -218,7 +218,7 @@ func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProces return nil, newSystemError(err) } if !doInit { - return c.newSetnsProcess(p, cmd, parentPipe, childPipe), nil + return c.newSetnsProcess(p, cmd, parentPipe, childPipe) } return c.newInitProcess(p, cmd, parentPipe, childPipe) } @@ -273,7 +273,7 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c }, nil } -func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) *setnsProcess { +func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) { cmd.Env = append(cmd.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.initProcess.pid()), "_LIBCONTAINER_INITTYPE=setns", @@ -289,7 +289,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, parentPipe: parentPipe, config: c.newInitConfig(p), process: p, - } + }, nil } func (c *linuxContainer) newInitConfig(process *Process) *initConfig { diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index 4d17cbc5..f27b6cf4 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -41,13 +41,14 @@ type parentProcess interface { } type setnsProcess struct { - cmd *exec.Cmd - parentPipe *os.File - childPipe *os.File - cgroupPaths map[string]string - config *initConfig - fds []string - process *Process + cmd *exec.Cmd + parentPipe *os.File + childPipe *os.File + cgroupPaths map[string]string + config *initConfig + fds []string + process *Process + bootstrapData io.Reader } func (p *setnsProcess) startTime() (string, error) { @@ -64,6 +65,16 @@ func (p *setnsProcess) signal(sig os.Signal) error { func (p *setnsProcess) start() (err error) { defer p.parentPipe.Close() + err = p.cmd.Start() + p.childPipe.Close() + if err != nil { + return newSystemError(err) + } + if p.bootstrapData != nil { + if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil { + return newSystemError(err) + } + } if err = p.execSetns(); err != nil { return newSystemError(err) } @@ -96,11 +107,6 @@ func (p *setnsProcess) start() (err error) { // before the go runtime boots, we wait on the process to die and receive the child's pid // over the provided pipe. func (p *setnsProcess) execSetns() error { - err := p.cmd.Start() - p.childPipe.Close() - if err != nil { - return newSystemError(err) - } status, err := p.cmd.Process.Wait() if err != nil { p.cmd.Wait() From 7d423cb7a11ac85427dfe740f2b7cddbfbb68f95 Mon Sep 17 00:00:00 2001 From: "Daniel, Dao Quang Minh" Date: Sat, 17 Oct 2015 15:35:36 +0000 Subject: [PATCH 2/2] setns: replace env with netlink for bootstrap data replace passing of pid and console path via environment variable with passing them with netlink message via an established pipe. this change requires us to set _LIBCONTAINER_INITTYPE and _LIBCONTAINER_INITPIPE as the env environment of the bootstrap process as we only send the bootstrap data for setns process right now. When init and setns bootstrap process are unified (i.e., init use nsexec instead of Go to clone new process), we can remove _LIBCONTAINER_INITTYPE. Note: - we read nlmsghdr first before reading the content so we can get the total length of the payload and allocate buffer properly instead of allocating one large buffer. - check read bytes vs the wanted number. It's an error if we failed to read the desired number of bytes from the pipe into the buffer. Signed-off-by: Daniel, Dao Quang Minh --- libcontainer/container_linux.go | 50 ++++++++++---- libcontainer/message_linux.go | 60 +++++++++++++++++ libcontainer/nsenter/nsenter_test.go | 98 +++++++++++++++++++++------- libcontainer/nsenter/nsexec.c | 96 +++++++++++++++++++++------ 4 files changed, 248 insertions(+), 56 deletions(-) create mode 100644 libcontainer/message_linux.go diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 0b7fdeb3..82476ed9 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -3,8 +3,10 @@ package libcontainer import ( + "bytes" "encoding/json" "fmt" + "io" "io/ioutil" "os" "os/exec" @@ -19,6 +21,7 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/criurpc" + "github.com/vishvananda/netlink/nl" ) const stdioFdCount = 3 @@ -274,21 +277,22 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c } func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe *os.File) (*setnsProcess, error) { - cmd.Env = append(cmd.Env, - fmt.Sprintf("_LIBCONTAINER_INITPID=%d", c.initProcess.pid()), - "_LIBCONTAINER_INITTYPE=setns", - ) - if p.consolePath != "" { - cmd.Env = append(cmd.Env, "_LIBCONTAINER_CONSOLE_PATH="+p.consolePath) + cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE=setns") + // for setns process, we dont have to set cloneflags as the process namespaces + // will only be set via setns syscall + data, err := c.bootstrapData(0, c.initProcess.pid(), p.consolePath) + if err != nil { + return nil, err } // TODO: set on container for process management return &setnsProcess{ - cmd: cmd, - cgroupPaths: c.cgroupManager.GetPaths(), - childPipe: childPipe, - parentPipe: parentPipe, - config: c.newInitConfig(p), - process: p, + cmd: cmd, + cgroupPaths: c.cgroupManager.GetPaths(), + childPipe: childPipe, + parentPipe: parentPipe, + config: c.newInitConfig(p), + process: p, + bootstrapData: data, }, nil } @@ -1021,3 +1025,25 @@ func (c *linuxContainer) currentState() (*State, error) { } return state, nil } + +// bootstrapData encodes the necessary data in netlink binary format as a io.Reader. +// Consumer can write the data to a bootstrap program such as one that uses +// nsenter package to bootstrap the container's init process correctly, i.e. with +// correct namespaces, uid/gid mapping etc. +func (c *linuxContainer) bootstrapData(cloneFlags uintptr, pid int, consolePath string) (io.Reader, error) { + // create the netlink message + r := nl.NewNetlinkRequest(int(InitMsg), 0) + // write pid + r.AddData(&Int32msg{ + Type: PidAttr, + Value: uint32(pid), + }) + // write console path + if consolePath != "" { + r.AddData(&Bytemsg{ + Type: ConsolePathAttr, + Value: []byte(consolePath), + }) + } + return bytes.NewReader(r.Serialize()), nil +} diff --git a/libcontainer/message_linux.go b/libcontainer/message_linux.go new file mode 100644 index 00000000..0e95e3b0 --- /dev/null +++ b/libcontainer/message_linux.go @@ -0,0 +1,60 @@ +// +build linux + +package libcontainer + +import ( + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +// list of known message types we want to send to bootstrap program +// The number is randomly chosen to not conflict with known netlink types +const ( + InitMsg uint16 = 62000 + PidAttr uint16 = 27281 + ConsolePathAttr uint16 = 27282 +) + +type Int32msg struct { + Type uint16 + Value uint32 +} + +// int32msg has the following representation +// | nlattr len | nlattr type | +// | uint32 value | +func (msg *Int32msg) Serialize() []byte { + buf := make([]byte, msg.Len()) + native := nl.NativeEndian() + native.PutUint16(buf[0:2], uint16(msg.Len())) + native.PutUint16(buf[2:4], msg.Type) + native.PutUint32(buf[4:8], msg.Value) + return buf +} + +func (msg *Int32msg) Len() int { + return syscall.NLA_HDRLEN + 4 +} + +// bytemsg has the following representation +// | nlattr len | nlattr type | +// | value | pad | +type Bytemsg struct { + Type uint16 + Value []byte +} + +func (msg *Bytemsg) Serialize() []byte { + l := msg.Len() + buf := make([]byte, (l+syscall.NLA_ALIGNTO-1) & ^(syscall.NLA_ALIGNTO-1)) + native := nl.NativeEndian() + native.PutUint16(buf[0:2], uint16(l)) + native.PutUint16(buf[2:4], msg.Type) + copy(buf[4:], msg.Value) + return buf +} + +func (msg *Bytemsg) Len() int { + return syscall.NLA_HDRLEN + len(msg.Value) + 1 // null-terminated +} diff --git a/libcontainer/nsenter/nsenter_test.go b/libcontainer/nsenter/nsenter_test.go index db27b8a4..976ae6bb 100644 --- a/libcontainer/nsenter/nsenter_test.go +++ b/libcontainer/nsenter/nsenter_test.go @@ -1,12 +1,17 @@ package nsenter import ( + "bytes" "encoding/json" - "fmt" + "io" "os" "os/exec" "strings" + "syscall" "testing" + + "github.com/opencontainers/runc/libcontainer" + "github.com/vishvananda/netlink/nl" ) type pid struct { @@ -15,7 +20,7 @@ type pid struct { func TestNsenterAlivePid(t *testing.T) { args := []string{"nsenter-exec"} - r, w, err := os.Pipe() + parent, child, err := newPipe() if err != nil { t.Fatalf("failed to create pipe %v", err) } @@ -23,16 +28,22 @@ func TestNsenterAlivePid(t *testing.T) { cmd := &exec.Cmd{ Path: os.Args[0], Args: args, - ExtraFiles: []*os.File{w}, - Env: []string{fmt.Sprintf("_LIBCONTAINER_INITPID=%d", os.Getpid()), "_LIBCONTAINER_INITPIPE=3"}, + ExtraFiles: []*os.File{child}, + Env: []string{"_LIBCONTAINER_INITTYPE=setns", "_LIBCONTAINER_INITPIPE=3"}, } if err := cmd.Start(); err != nil { t.Fatalf("nsenter failed to start %v", err) } - w.Close() - - decoder := json.NewDecoder(r) + r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0) + r.AddData(&libcontainer.Int32msg{ + Type: libcontainer.PidAttr, + Value: uint32(os.Getpid()), + }) + if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil { + t.Fatal(err) + } + decoder := json.NewDecoder(parent) var pid *pid if err := decoder.Decode(&pid); err != nil { @@ -51,34 +62,67 @@ func TestNsenterAlivePid(t *testing.T) { func TestNsenterInvalidPid(t *testing.T) { args := []string{"nsenter-exec"} - - cmd := &exec.Cmd{ - Path: os.Args[0], - Args: args, - Env: []string{"_LIBCONTAINER_INITPID=-1"}, + parent, child, err := newPipe() + if err != nil { + t.Fatalf("failed to create pipe %v", err) } - err := cmd.Run() - if err == nil { + cmd := &exec.Cmd{ + Path: os.Args[0], + Args: args, + ExtraFiles: []*os.File{child}, + Env: []string{"_LIBCONTAINER_INITTYPE=setns", "_LIBCONTAINER_INITPIPE=3"}, + } + + if err := cmd.Start(); err != nil { + t.Fatal("nsenter exits with a zero exit status") + } + r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0) + r.AddData(&libcontainer.Int32msg{ + Type: libcontainer.PidAttr, + Value: 0, + }) + if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil { + t.Fatal(err) + } + + if err := cmd.Wait(); err == nil { t.Fatal("nsenter exits with a zero exit status") } } func TestNsenterDeadPid(t *testing.T) { - dead_cmd := exec.Command("true") - if err := dead_cmd.Run(); err != nil { + deadCmd := exec.Command("true") + if err := deadCmd.Run(); err != nil { t.Fatal(err) } args := []string{"nsenter-exec"} - - cmd := &exec.Cmd{ - Path: os.Args[0], - Args: args, - Env: []string{fmt.Sprintf("_LIBCONTAINER_INITPID=%d", dead_cmd.Process.Pid)}, + parent, child, err := newPipe() + if err != nil { + t.Fatalf("failed to create pipe %v", err) } - err := cmd.Run() - if err == nil { + cmd := &exec.Cmd{ + Path: os.Args[0], + Args: args, + ExtraFiles: []*os.File{child}, + Env: []string{"_LIBCONTAINER_INITTYPE=setns", "_LIBCONTAINER_INITPIPE=3"}, + } + + if err := cmd.Start(); err != nil { + t.Fatal("nsenter exits with a zero exit status") + } + + r := nl.NewNetlinkRequest(int(libcontainer.InitMsg), 0) + r.AddData(&libcontainer.Int32msg{ + Type: libcontainer.PidAttr, + Value: uint32(deadCmd.Process.Pid), + }) + if _, err := io.Copy(parent, bytes.NewReader(r.Serialize())); err != nil { + t.Fatal(err) + } + + if err := cmd.Wait(); err == nil { t.Fatal("nsenter exits with a zero exit status") } } @@ -89,3 +133,11 @@ func init() { } return } + +func newPipe() (parent *os.File, child *os.File, err error) { + fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) + if err != nil { + return nil, nil, err + } + return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil +} diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index 01450a90..27e6e53d 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -17,6 +17,11 @@ #include #include +#include +#include +#include +#include + /* All arguments should be above stack, because it grows down */ struct clone_arg { /* @@ -63,24 +68,33 @@ static int clone_parent(jmp_buf * env) return child; } +static uint32_t readint32(char *buf) +{ + return *(uint32_t *) buf; +} + +// list of known message types we want to send to bootstrap program +// These are defined in libcontainer/message_linux.go +#define INIT_MSG 62000 +#define PID_ATTR 27281 +#define CONSOLE_PATH_ATTR 27282 + void nsexec() { char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt", "user" }; const int num = sizeof(namespaces) / sizeof(char *); jmp_buf env; char buf[PATH_MAX], *val; - int i, tfd, self_tfd, child, len, pipenum, consolefd = -1; - pid_t pid; - char *console; + int i, tfd, self_tfd, child, n, len, pipenum, consolefd = -1; + pid_t pid = 0; - val = getenv("_LIBCONTAINER_INITPID"); - if (val == NULL) + // if we dont have INITTYPE or this is the init process, skip the bootstrap process + val = getenv("_LIBCONTAINER_INITTYPE"); + if (val == NULL || strcmp(val, "standard") == 0) { return; - - pid = atoi(val); - snprintf(buf, sizeof(buf), "%d", pid); - if (strcmp(val, buf)) { - pr_perror("Unable to parse _LIBCONTAINER_INITPID"); + } + if (strcmp(val, "setns") != 0) { + pr_perror("Invalid inittype %s", val); exit(1); } @@ -89,7 +103,6 @@ void nsexec() pr_perror("Child pipe not found"); exit(1); } - pipenum = atoi(val); snprintf(buf, sizeof(buf), "%d", pipenum); if (strcmp(val, buf)) { @@ -97,13 +110,56 @@ void nsexec() exit(1); } - console = getenv("_LIBCONTAINER_CONSOLE_PATH"); - if (console != NULL) { - consolefd = open(console, O_RDWR); - if (consolefd < 0) { - pr_perror("Failed to open console %s", console); - exit(1); + char nlbuf[NLMSG_HDRLEN]; + struct nlmsghdr *nh; + if ((n = read(pipenum, nlbuf, NLMSG_HDRLEN)) != NLMSG_HDRLEN) { + pr_perror("Failed to read netlink header, got %d", n); + exit(1); + } + + nh = (struct nlmsghdr *)nlbuf; + if (nh->nlmsg_type == NLMSG_ERROR) { + pr_perror("Invalid netlink header message"); + exit(1); + } + if (nh->nlmsg_type != INIT_MSG) { + pr_perror("Unexpected netlink message type %d", nh->nlmsg_type); + exit(1); + } + // read the netlink payload + len = NLMSG_PAYLOAD(nh, 0); + char data[len]; + if ((n = read(pipenum, data, len)) != len) { + pr_perror("Failed to read netlink payload, got %d", n); + exit(1); + } + + int start = 0; + struct nlattr *attr; + while (start < len) { + int payload_len; + attr = (struct nlattr *)((void *)data + start); + start += NLA_HDRLEN; + payload_len = attr->nla_len - NLA_HDRLEN; + switch (attr->nla_type) { + case PID_ATTR: + pid = (pid_t) readint32(data + start); + break; + case CONSOLE_PATH_ATTR: + consolefd = open((char *)data + start, O_RDWR); + if (consolefd < 0) { + pr_perror("Failed to open console %s", (char *)data + start); + exit(1); + } + break; } + start += NLA_ALIGN(payload_len); + } + + // required pid to be passed + if (pid == 0) { + pr_perror("missing pid"); + exit(1); } /* Check that the specified process exists */ @@ -133,15 +189,13 @@ void nsexec() } /* Skip namespaces we're already part of */ - if (fstatat(self_tfd, namespaces[i], &self_st, 0) != -1 && - st.st_ino == self_st.st_ino) { + if (fstatat(self_tfd, namespaces[i], &self_st, 0) != -1 && st.st_ino == self_st.st_ino) { continue; } fd = openat(tfd, namespaces[i], O_RDONLY); if (fd == -1) { - pr_perror("Failed to open ns file %s for ns %s", buf, - namespaces[i]); + pr_perror("Failed to open ns file %s for ns %s", buf, namespaces[i]); exit(1); } // Set the namespace.