diff --git a/cgroups/cgroups.go b/cgroups/cgroups.go index 60b1135a..7ed9be81 100644 --- a/cgroups/cgroups.go +++ b/cgroups/cgroups.go @@ -3,7 +3,7 @@ package cgroups import ( "fmt" - "github.com/docker/libcontainer/devices" + "github.com/docker/libcontainer/configs" ) type Manager interface { @@ -17,7 +17,7 @@ type Manager interface { GetStats() (*Stats, error) // Toggles the freezer cgroup according with specified state - Freeze(state FreezerState) error + Freeze(state configs.FreezerState) error // Destroys the cgroup set Destroy() error @@ -33,14 +33,6 @@ type Manager interface { GetPaths() map[string]string } -type FreezerState string - -const ( - Undefined FreezerState = "" - Frozen FreezerState = "FROZEN" - Thawed FreezerState = "THAWED" -) - type NotFoundError struct { Subsystem string } @@ -59,26 +51,6 @@ func IsNotFound(err error) bool { if err == nil { return false } - _, ok := err.(*NotFoundError) return ok } - -type Cgroup struct { - Name string `json:"name,omitempty"` - Parent string `json:"parent,omitempty"` // name of parent cgroup or slice - - AllowAllDevices bool `json:"allow_all_devices,omitempty"` // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. - AllowedDevices []*devices.Device `json:"allowed_devices,omitempty"` - Memory int64 `json:"memory,omitempty"` // Memory limit (in bytes) - MemoryReservation int64 `json:"memory_reservation,omitempty"` // Memory reservation or soft_limit (in bytes) - MemorySwap int64 `json:"memory_swap,omitempty"` // Total memory usage (memory + swap); set `-1' to disable swap - CpuShares int64 `json:"cpu_shares,omitempty"` // CPU shares (relative weight vs. other containers) - CpuQuota int64 `json:"cpu_quota,omitempty"` // CPU hardcap limit (in usecs). Allowed cpu time in a given period. - CpuPeriod int64 `json:"cpu_period,omitempty"` // CPU period to be used for hardcapping (in usecs). 0 to use system default. - CpusetCpus string `json:"cpuset_cpus,omitempty"` // CPU to use - CpusetMems string `json:"cpuset_mems,omitempty"` // MEM to use - BlkioWeight int64 `json:"blkio_weight,omitempty"` // Specifies per cgroup weight, range is from 10 to 1000. - Freezer FreezerState `json:"freezer,omitempty"` // set the freeze value for the process - Slice string `json:"slice,omitempty"` // Parent slice to use for systemd -} diff --git a/cgroups/fs/apply_raw.go b/cgroups/fs/apply_raw.go index 11d35d7a..4a3a8864 100644 --- a/cgroups/fs/apply_raw.go +++ b/cgroups/fs/apply_raw.go @@ -8,6 +8,7 @@ import ( "strconv" "github.com/docker/libcontainer/cgroups" + "github.com/docker/libcontainer/configs" ) var ( @@ -24,8 +25,17 @@ var ( CgroupProcesses = "cgroup.procs" ) +type subsystem interface { + // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. + GetStats(path string, stats *cgroups.Stats) error + // Removes the cgroup represented by 'data'. + Remove(*data) error + // Creates and joins the cgroup represented by data. + Set(*data) error +} + type Manager struct { - Cgroups *cgroups.Cgroup + Cgroups *configs.Cgroup Paths map[string]string } @@ -46,19 +56,10 @@ func init() { } } -type subsystem interface { - // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. - GetStats(path string, stats *cgroups.Stats) error - // Removes the cgroup represented by 'data'. - Remove(*data) error - // Creates and joins the cgroup represented by data. - Set(*data) error -} - type data struct { root string cgroup string - c *cgroups.Cgroup + c *configs.Cgroup pid int } @@ -109,7 +110,7 @@ func (m *Manager) GetPaths() map[string]string { // Symmetrical public function to update device based cgroups. Also available // in the systemd implementation. -func ApplyDevices(c *cgroups.Cgroup, pid int) error { +func ApplyDevices(c *configs.Cgroup, pid int) error { d, err := getCgroupData(c, pid) if err != nil { return err @@ -137,7 +138,7 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) { // Freeze toggles the container's freezer cgroup depending on the state // provided -func (m *Manager) Freeze(state cgroups.FreezerState) error { +func (m *Manager) Freeze(state configs.FreezerState) error { d, err := getCgroupData(m.Cgroups, 0) if err != nil { return err @@ -170,7 +171,7 @@ func (m *Manager) GetPids() ([]int, error) { return cgroups.ReadProcsFile(dir) } -func getCgroupData(c *cgroups.Cgroup, pid int) (*data, error) { +func getCgroupData(c *configs.Cgroup, pid int) (*data, error) { if cgroupRoot == "" { return nil, fmt.Errorf("failed to find the cgroup root") } diff --git a/cgroups/fs/devices.go b/cgroups/fs/devices.go index 98d5d2d7..e904e10c 100644 --- a/cgroups/fs/devices.go +++ b/cgroups/fs/devices.go @@ -17,7 +17,7 @@ func (s *DevicesGroup) Set(d *data) error { } for _, dev := range d.c.AllowedDevices { - if err := writeFile(dir, "devices.allow", dev.GetCgroupAllowString()); err != nil { + if err := writeFile(dir, "devices.allow", dev.CgroupString()); err != nil { return err } } diff --git a/cgroups/fs/freezer.go b/cgroups/fs/freezer.go index c6b677fa..b881d0d4 100644 --- a/cgroups/fs/freezer.go +++ b/cgroups/fs/freezer.go @@ -5,6 +5,7 @@ import ( "time" "github.com/docker/libcontainer/cgroups" + "github.com/docker/libcontainer/configs" ) type FreezerGroup struct { @@ -12,7 +13,7 @@ type FreezerGroup struct { func (s *FreezerGroup) Set(d *data) error { switch d.c.Freezer { - case cgroups.Frozen, cgroups.Thawed: + case configs.Frozen, configs.Thawed: dir, err := d.path("freezer") if err != nil { return err diff --git a/cgroups/manager/manager.go b/cgroups/manager/manager.go index bd5fd48a..b8e2010e 100644 --- a/cgroups/manager/manager.go +++ b/cgroups/manager/manager.go @@ -4,13 +4,14 @@ import ( "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups/fs" "github.com/docker/libcontainer/cgroups/systemd" + "github.com/docker/libcontainer/configs" ) // Create a new cgroup manager with specified configuration // TODO this object is not really initialized until Apply() is called. // Maybe make this to the equivalent of Apply() at some point? // @vmarmol -func NewCgroupManager(cgroups *cgroups.Cgroup) cgroups.Manager { +func NewCgroupManager(cgroups *configs.Cgroup) cgroups.Manager { if systemd.UseSystemd() { return &systemd.Manager{ Cgroups: cgroups, @@ -23,7 +24,7 @@ func NewCgroupManager(cgroups *cgroups.Cgroup) cgroups.Manager { } // Restore a cgroup manager with specified configuration and state -func LoadCgroupManager(cgroups *cgroups.Cgroup, paths map[string]string) cgroups.Manager { +func LoadCgroupManager(cgroups *configs.Cgroup, paths map[string]string) cgroups.Manager { if systemd.UseSystemd() { return &systemd.Manager{ Cgroups: cgroups, diff --git a/cgroups/systemd/apply_nosystemd.go b/cgroups/systemd/apply_nosystemd.go index 62928b82..8a46ea8b 100644 --- a/cgroups/systemd/apply_nosystemd.go +++ b/cgroups/systemd/apply_nosystemd.go @@ -6,10 +6,11 @@ import ( "fmt" "github.com/docker/libcontainer/cgroups" + "github.com/docker/libcontainer/configs" ) type Manager struct { - Cgroups *cgroups.Cgroup + Cgroups *configs.Cgroup Paths map[string]string } @@ -37,14 +38,14 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) { return nil, fmt.Errorf("Systemd not supported") } -func (m *Manager) Freeze(state cgroups.FreezerState) error { +func (m *Manager) Freeze(state configs.FreezerState) error { return fmt.Errorf("Systemd not supported") } -func ApplyDevices(c *cgroups.Cgroup, pid int) error { +func ApplyDevices(c *configs.Cgroup, pid int) error { return fmt.Errorf("Systemd not supported") } -func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error { +func Freeze(c *configs.Cgroup, state configs.FreezerState) error { return fmt.Errorf("Systemd not supported") } diff --git a/cgroups/systemd/apply_systemd.go b/cgroups/systemd/apply_systemd.go index 7143a595..f46067b4 100644 --- a/cgroups/systemd/apply_systemd.go +++ b/cgroups/systemd/apply_systemd.go @@ -16,11 +16,12 @@ import ( systemd "github.com/coreos/go-systemd/dbus" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups/fs" + "github.com/docker/libcontainer/configs" "github.com/godbus/dbus" ) type Manager struct { - Cgroups *cgroups.Cgroup + Cgroups *configs.Cgroup Paths map[string]string } @@ -190,7 +191,7 @@ func writeFile(dir, file, data string) error { return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) } -func joinFreezer(c *cgroups.Cgroup, pid int) error { +func joinFreezer(c *configs.Cgroup, pid int) error { path, err := getSubsystemPath(c, "freezer") if err != nil { return err @@ -203,7 +204,7 @@ func joinFreezer(c *cgroups.Cgroup, pid int) error { return ioutil.WriteFile(filepath.Join(path, "cgroup.procs"), []byte(strconv.Itoa(pid)), 0700) } -func getSubsystemPath(c *cgroups.Cgroup, subsystem string) (string, error) { +func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) { mountpoint, err := cgroups.FindCgroupMountpoint(subsystem) if err != nil { return "", err @@ -222,7 +223,7 @@ func getSubsystemPath(c *cgroups.Cgroup, subsystem string) (string, error) { return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil } -func (m *Manager) Freeze(state cgroups.FreezerState) error { +func (m *Manager) Freeze(state configs.FreezerState) error { path, err := getSubsystemPath(m.Cgroups, "freezer") if err != nil { return err @@ -260,7 +261,7 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) { panic("not implemented") } -func getUnitName(c *cgroups.Cgroup) string { +func getUnitName(c *configs.Cgroup) string { return fmt.Sprintf("%s-%s.scope", c.Parent, c.Name) } @@ -275,7 +276,7 @@ func getUnitName(c *cgroups.Cgroup) string { // Note: we can't use systemd to set up the initial limits, and then change the cgroup // because systemd will re-write the device settings if it needs to re-apply the cgroup context. // This happens at least for v208 when any sibling unit is started. -func joinDevices(c *cgroups.Cgroup, pid int) error { +func joinDevices(c *configs.Cgroup, pid int) error { path, err := getSubsystemPath(c, "devices") if err != nil { return err @@ -294,7 +295,7 @@ func joinDevices(c *cgroups.Cgroup, pid int) error { } for _, dev := range c.AllowedDevices { - if err := writeFile(path, "devices.allow", dev.GetCgroupAllowString()); err != nil { + if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil { return err } } @@ -304,11 +305,11 @@ func joinDevices(c *cgroups.Cgroup, pid int) error { // Symmetrical public function to update device based cgroups. Also available // in the fs implementation. -func ApplyDevices(c *cgroups.Cgroup, pid int) error { +func ApplyDevices(c *configs.Cgroup, pid int) error { return joinDevices(c, pid) } -func joinMemory(c *cgroups.Cgroup, pid int) error { +func joinMemory(c *configs.Cgroup, pid int) error { memorySwap := c.MemorySwap if memorySwap == 0 { @@ -327,7 +328,7 @@ func joinMemory(c *cgroups.Cgroup, pid int) error { // systemd does not atm set up the cpuset controller, so we must manually // join it. Additionally that is a very finicky controller where each // level must have a full setup as the default for a new directory is "no cpus" -func joinCpuset(c *cgroups.Cgroup, pid int) error { +func joinCpuset(c *configs.Cgroup, pid int) error { path, err := getSubsystemPath(c, "cpuset") if err != nil { return err diff --git a/configs/cgroup.go b/configs/cgroup.go new file mode 100644 index 00000000..0dffc640 --- /dev/null +++ b/configs/cgroup.go @@ -0,0 +1,54 @@ +package configs + +type FreezerState string + +const ( + Undefined FreezerState = "" + Frozen FreezerState = "FROZEN" + Thawed FreezerState = "THAWED" +) + +type Cgroup struct { + Name string `json:"name,omitempty"` + + // name of parent cgroup or slice + Parent string `json:"parent,omitempty"` + + // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. + AllowAllDevices bool `json:"allow_all_devices,omitempty"` + + AllowedDevices []*Device `json:"allowed_devices,omitempty"` + + // Memory limit (in bytes) + Memory int64 `json:"memory,omitempty"` + + // Memory reservation or soft_limit (in bytes) + MemoryReservation int64 `json:"memory_reservation,omitempty"` + + // Total memory usage (memory + swap); set `-1' to disable swap + MemorySwap int64 `json:"memory_swap,omitempty"` + + // CPU shares (relative weight vs. other containers) + CpuShares int64 `json:"cpu_shares,omitempty"` + + // CPU hardcap limit (in usecs). Allowed cpu time in a given period. + CpuQuota int64 `json:"cpu_quota,omitempty"` + + // CPU period to be used for hardcapping (in usecs). 0 to use system default. + CpuPeriod int64 `json:"cpu_period,omitempty"` + + // CPU to use + CpusetCpus string `json:"cpuset_cpus,omitempty"` + + // MEM to use + CpusetMems string `json:"cpuset_mems,omitempty"` + + // Specifies per cgroup weight, range is from 10 to 1000. + BlkioWeight int64 `json:"blkio_weight,omitempty"` + + // set the freeze value for the process + Freezer FreezerState `json:"freezer,omitempty"` + + // Parent slice to use for systemd TODO: remove in favor or parent + Slice string `json:"slice,omitempty"` +} diff --git a/configs/config.go b/configs/config.go index d1e03f61..844a9cad 100644 --- a/configs/config.go +++ b/configs/config.go @@ -1,70 +1,43 @@ package configs -import ( - "github.com/docker/libcontainer/cgroups" - "github.com/docker/libcontainer/mount" - "github.com/docker/libcontainer/network" -) +import "fmt" -type MountConfig mount.MountConfig - -type Network network.Network - -type NamespaceType string - -const ( - NEWNET NamespaceType = "NEWNET" - NEWPID NamespaceType = "NEWPID" - NEWNS NamespaceType = "NEWNS" - NEWUTS NamespaceType = "NEWUTS" - NEWIPC NamespaceType = "NEWIPC" - NEWUSER NamespaceType = "NEWUSER" -) - -// Namespace defines configuration for each namespace. It specifies an -// alternate path that is able to be joined via setns. -type Namespace struct { - Type NamespaceType `json:"type"` - Path string `json:"path,omitempty"` +type Rlimit struct { + Type int `json:"type,omitempty"` + Hard uint64 `json:"hard,omitempty"` + Soft uint64 `json:"soft,omitempty"` } -type Namespaces []Namespace - -func (n *Namespaces) Remove(t NamespaceType) bool { - i := n.index(t) - if i == -1 { - return false - } - *n = append((*n)[:i], (*n)[i+1:]...) - return true -} - -func (n *Namespaces) Add(t NamespaceType, path string) { - i := n.index(t) - if i == -1 { - *n = append(*n, Namespace{Type: t, Path: path}) - return - } - (*n)[i].Path = path -} - -func (n *Namespaces) index(t NamespaceType) int { - for i, ns := range *n { - if ns.Type == t { - return i - } - } - return -1 -} - -func (n *Namespaces) Contains(t NamespaceType) bool { - return n.index(t) != -1 +// IDMap represents UID/GID Mappings for User Namespaces. +type IDMap struct { + ContainerID int `json:"container_id,omitempty"` + HostID int `json:"host_id,omitempty"` + Size int `json:"size,omitempty"` } // Config defines configuration options for executing a process inside a contained environment. type Config struct { - // Mount specific options. - MountConfig *MountConfig `json:"mount_config,omitempty"` + // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs + // This is a common option when the container is running in ramdisk + NoPivotRoot bool `json:"no_pivot_root,omitempty"` + + // PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set. + // When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable. + // This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot. + PivotDir string `json:"pivot_dir,omitempty"` + + // ReadonlyFs will remount the container's rootfs as readonly where only externally mounted + // bind mounts are writtable + ReadonlyFs bool `json:"readonly_fs,omitempty"` + + // Mounts specify additional source and destination paths that will be mounted inside the container's + // rootfs and mount namespace if specified + Mounts []*Mount `json:"mounts,omitempty"` + + // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! + DeviceNodes []*Device `json:"device_nodes,omitempty"` + + MountLabel string `json:"mount_label,omitempty"` // Pathname to container's root filesystem RootFs string `json:"root_fs,omitempty"` @@ -83,9 +56,8 @@ type Config struct { // provided in Env are provided to the process Env []string `json:"environment,omitempty"` - // Tty when true will allocate a pty slave on the host for access by the container's process - // and ensure that it is mounted inside the container's rootfs - Tty bool `json:"tty,omitempty"` + // Console is the path to the console allocated to the container. + Console string `json:"console,omitempty"` // Namespaces specifies the container's namespaces that it should setup when cloning the init process // If a namespace is not provided that namespace is shared from the container's parent process @@ -103,7 +75,7 @@ type Config struct { // Cgroups specifies specific cgroup settings for the various subsystems that the container is // placed into to limit the resources the container has available - Cgroups *cgroups.Cgroup `json:"cgroups,omitempty"` + Cgroups *Cgroup `json:"cgroups,omitempty"` // AppArmorProfile specifies the profile to apply to the process running in the container and is // change at the time the process is execed @@ -124,6 +96,7 @@ type Config struct { // AdditionalGroups specifies the gids that should be added to supplementary groups // in addition to those that the user belongs to. AdditionalGroups []int `json:"additional_groups,omitempty"` + // UidMappings is an array of User ID mappings for User Namespaces UidMappings []IDMap `json:"uid_mappings,omitempty"` @@ -131,36 +104,48 @@ type Config struct { GidMappings []IDMap `json:"gid_mappings,omitempty"` } -// Routes can be specified to create entries in the route table as the container is started -// -// All of destination, source, and gateway should be either IPv4 or IPv6. -// One of the three options must be present, and ommitted entries will use their -// IP family default for the route table. For IPv4 for example, setting the -// gateway to 1.2.3.4 and the interface to eth0 will set up a standard -// destination of 0.0.0.0(or *) when viewed in the route table. -type Route struct { - // Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6 - Destination string `json:"destination,omitempty"` - - // Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6 - Source string `json:"source,omitempty"` - - // Sets the gateway. Accepts IPv4 and IPv6 - Gateway string `json:"gateway,omitempty"` - - // The device to set this route up for, for example: eth0 - InterfaceName string `json:"interface_name,omitempty"` +// Gets the root uid for the process on host which could be non-zero +// when user namespaces are enabled. +func (c *Config) HostUID() (int, error) { + if c.Namespaces.Contains(NEWUSER) { + if c.UidMappings == nil { + return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.") + } + id, found := c.hostIDFromMapping(0, c.UidMappings) + if !found { + return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") + } + return id, nil + } + // Return default root uid 0 + return 0, nil } -type Rlimit struct { - Type int `json:"type,omitempty"` - Hard uint64 `json:"hard,omitempty"` - Soft uint64 `json:"soft,omitempty"` +// Gets the root uid for the process on host which could be non-zero +// when user namespaces are enabled. +func (c *Config) HostGID() (int, error) { + if c.Namespaces.Contains(NEWUSER) { + if c.GidMappings == nil { + return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.") + } + id, found := c.hostIDFromMapping(0, c.GidMappings) + if !found { + return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") + } + return id, nil + } + // Return default root uid 0 + return 0, nil } -// IDMap represents UID/GID Mappings for User Namespaces. -type IDMap struct { - ContainerID int `json:"container_id,omitempty"` - HostID int `json:"host_id,omitempty"` - Size int `json:"size,omitempty"` +// Utility function that gets a host ID for a container ID from user namespace map +// if that ID is present in the map. +func (c *Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) { + for _, m := range uMap { + if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) { + hostID := m.HostID + (containerID - m.ContainerID) + return hostID, true + } + } + return -1, false } diff --git a/configs/device.go b/configs/device.go new file mode 100644 index 00000000..18d73232 --- /dev/null +++ b/configs/device.go @@ -0,0 +1,42 @@ +package configs + +import ( + "fmt" + "os" +) + +const ( + Wildcard = -1 +) + +type Device struct { + Type rune `json:"type,omitempty"` + // It is fine if this is an empty string in the case that you are using Wildcards + Path string `json:"path,omitempty"` + // Use the wildcard constant for wildcards. + Major int64 `json:"major,omitempty"` + // Use the wildcard constant for wildcards. + Minor int64 `json:"minor,omitempty"` + // Typically just "rwm" + Permissions string `json:"permissions,omitempty"` + // The permission bits of the file's mode + FileMode os.FileMode `json:"file_mode,omitempty"` + Uid uint32 `json:"uid,omitempty"` + Gid uint32 `json:"gid,omitempty"` +} + +func (d *Device) CgroupString() string { + return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions) +} + +func (d *Device) Mkdev() int { + return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12)) +} + +// deviceNumberString converts the device number to a string return result. +func deviceNumberString(number int64) string { + if number == Wildcard { + return "*" + } + return fmt.Sprint(number) +} diff --git a/mount/mount.go b/configs/mount.go similarity index 82% rename from mount/mount.go rename to configs/mount.go index c1b42421..f6f39992 100644 --- a/mount/mount.go +++ b/configs/mount.go @@ -1,4 +1,4 @@ -package mount +package configs import ( "fmt" @@ -10,6 +10,8 @@ import ( "github.com/docker/libcontainer/label" ) +const DefaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV + type Mount struct { Type string `json:"type,omitempty"` Source string `json:"source,omitempty"` // Source path, in the host namespace @@ -101,9 +103,31 @@ func (m *Mount) tmpfsMount(rootfs, mountLabel string) error { return fmt.Errorf("creating new tmpfs mount target %s", err) } - if err := syscall.Mount("tmpfs", dest, "tmpfs", uintptr(defaultMountFlags), l); err != nil { + if err := syscall.Mount("tmpfs", dest, "tmpfs", uintptr(DefaultMountFlags), l); err != nil { return fmt.Errorf("%s mounting %s in tmpfs", err, dest) } return nil } + +func createIfNotExists(path string, isDir bool) error { + if _, err := os.Stat(path); err != nil { + if os.IsNotExist(err) { + if isDir { + if err := os.MkdirAll(path, 0755); err != nil { + return err + } + } else { + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return err + } + f, err := os.OpenFile(path, os.O_CREATE, 0755) + if err != nil { + return err + } + f.Close() + } + } + } + return nil +} diff --git a/configs/namespaces.go b/configs/namespaces.go new file mode 100644 index 00000000..5e891eab --- /dev/null +++ b/configs/namespaces.go @@ -0,0 +1,52 @@ +package configs + +type NamespaceType string + +const ( + NEWNET NamespaceType = "NEWNET" + NEWPID NamespaceType = "NEWPID" + NEWNS NamespaceType = "NEWNS" + NEWUTS NamespaceType = "NEWUTS" + NEWIPC NamespaceType = "NEWIPC" + NEWUSER NamespaceType = "NEWUSER" +) + +// Namespace defines configuration for each namespace. It specifies an +// alternate path that is able to be joined via setns. +type Namespace struct { + Type NamespaceType `json:"type"` + Path string `json:"path,omitempty"` +} + +type Namespaces []Namespace + +func (n *Namespaces) Remove(t NamespaceType) bool { + i := n.index(t) + if i == -1 { + return false + } + *n = append((*n)[:i], (*n)[i+1:]...) + return true +} + +func (n *Namespaces) Add(t NamespaceType, path string) { + i := n.index(t) + if i == -1 { + *n = append(*n, Namespace{Type: t, Path: path}) + return + } + (*n)[i].Path = path +} + +func (n *Namespaces) index(t NamespaceType) int { + for i, ns := range *n { + if ns.Type == t { + return i + } + } + return -1 +} + +func (n *Namespaces) Contains(t NamespaceType) bool { + return n.index(t) != -1 +} diff --git a/configs/network.go b/configs/network.go new file mode 100644 index 00000000..54218363 --- /dev/null +++ b/configs/network.go @@ -0,0 +1,62 @@ +package configs + +// Network defines configuration for a container's networking stack +// +// The network configuration can be omited from a container causing the +// container to be setup with the host's networking stack +type Network struct { + // Type sets the networks type, commonly veth and loopback + Type string `json:"type,omitempty"` + + // The bridge to use. + Bridge string `json:"bridge,omitempty"` + + // Prefix for the veth interfaces. + VethPrefix string `json:"veth_prefix,omitempty"` + + // MacAddress contains the MAC address to set on the network interface + MacAddress string `json:"mac_address,omitempty"` + + // Address contains the IPv4 and mask to set on the network interface + Address string `json:"address,omitempty"` + + // IPv6Address contains the IPv6 and mask to set on the network interface + IPv6Address string `json:"ipv6_address,omitempty"` + + // Gateway sets the gateway address that is used as the default for the interface + Gateway string `json:"gateway,omitempty"` + + // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface + IPv6Gateway string `json:"ipv6_gateway,omitempty"` + + // Mtu sets the mtu value for the interface and will be mirrored on both the host and + // container's interfaces if a pair is created, specifically in the case of type veth + // Note: This does not apply to loopback interfaces. + Mtu int `json:"mtu,omitempty"` + + // TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and + // container's interfaces if a pair is created, specifically in the case of type veth + // Note: This does not apply to loopback interfaces. + TxQueueLen int `json:"txqueuelen,omitempty"` +} + +// Routes can be specified to create entries in the route table as the container is started +// +// All of destination, source, and gateway should be either IPv4 or IPv6. +// One of the three options must be present, and ommitted entries will use their +// IP family default for the route table. For IPv4 for example, setting the +// gateway to 1.2.3.4 and the interface to eth0 will set up a standard +// destination of 0.0.0.0(or *) when viewed in the route table. +type Route struct { + // Sets the destination and mask, should be a CIDR. Accepts IPv4 and IPv6 + Destination string `json:"destination,omitempty"` + + // Sets the source and mask, should be a CIDR. Accepts IPv4 and IPv6 + Source string `json:"source,omitempty"` + + // Sets the gateway. Accepts IPv4 and IPv6 + Gateway string `json:"gateway,omitempty"` + + // The device to set this route up for, for example: eth0 + InterfaceName string `json:"interface_name,omitempty"` +} diff --git a/configs/state.go b/configs/state.go index 9dc77006..27122c44 100644 --- a/configs/state.go +++ b/configs/state.go @@ -1,13 +1,5 @@ package configs -import ( - "encoding/json" - "os" - "path/filepath" - - "github.com/docker/libcontainer/network" -) - // State represents a running container's state type State struct { // InitPid is the init process id in the parent namespace @@ -17,21 +9,30 @@ type State struct { InitStartTime string `json:"init_start_time,omitempty"` // Network runtime state. - NetworkState network.NetworkState `json:"network_state,omitempty"` + NetworkState NetworkState `json:"network_state,omitempty"` // Path to all the cgroups setup for a container. Key is cgroup subsystem name. CgroupPaths map[string]string `json:"cgroup_paths,omitempty"` + + Status Status `json:"status,omitempty"` } -// The running state of the container. -type RunState int +// Struct describing the network specific runtime state that will be maintained by libcontainer for all running containers +// Do not depend on it outside of libcontainer. +// TODO: move veth names to config time +type NetworkState struct { + // The name of the veth interface on the Host. + VethHost string `json:"veth_host,omitempty"` + // The name of the veth interface created inside the container for the child. + VethChild string `json:"veth_child,omitempty"` +} + +// The status of a container. +type Status int const ( - // The name of the runtime state file - stateFile = "state.json" - // The container exists and is running. - Running RunState = iota + 1 + Running Status = iota + 1 // The container exists, it is in the process of being paused. Pausing @@ -42,36 +43,3 @@ const ( // The container does not exist. Destroyed ) - -// SaveState writes the container's runtime state to a state.json file -// in the specified path -func SaveState(basePath string, state *State) error { - f, err := os.Create(filepath.Join(basePath, stateFile)) - if err != nil { - return err - } - defer f.Close() - - return json.NewEncoder(f).Encode(state) -} - -// GetState reads the state.json file for a running container -func GetState(basePath string) (*State, error) { - f, err := os.Open(filepath.Join(basePath, stateFile)) - if err != nil { - return nil, err - } - defer f.Close() - - var state *State - if err := json.NewDecoder(f).Decode(&state); err != nil { - return nil, err - } - - return state, nil -} - -// DeleteState deletes the state.json file -func DeleteState(basePath string) error { - return os.Remove(filepath.Join(basePath, stateFile)) -} diff --git a/container.go b/container.go index e04a43df..4348e962 100644 --- a/container.go +++ b/container.go @@ -4,9 +4,16 @@ NOTE: The API is in flux and mainly not implemented. Proceed with caution until package libcontainer import ( + "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/configs" + "github.com/docker/libcontainer/network" ) +type Stats struct { + NetworkStats *network.NetworkStats `json:"network_stats,omitempty"` + CgroupStats *cgroups.Stats `json:"cgroup_stats,omitempty"` +} + // A libcontainer container object. // // Each container is thread-safe within the same process. Since a container can @@ -16,12 +23,11 @@ type Container interface { // Returns the ID of the container ID() string - // Returns the current run state of the container. + // Returns the current statusof the container. // // errors: - // ContainerDestroyed - Container no longer exists, // Systemerror - System error. - RunState() (configs.RunState, error) + Status() (configs.Status, error) // Returns the current config of the container. Config() *configs.Config @@ -41,7 +47,7 @@ type Container interface { // errors: // ContainerDestroyed - Container no longer exists, // Systemerror - System error. - Stats() (*ContainerStats, error) + Stats() (*Stats, error) // Start a process inside the container. Returns the PID of the new process (in the caller process's namespace) and a channel that will return the exit status of the process whenever it dies. // @@ -101,4 +107,10 @@ type Container interface { // ContainerDestroyed - Container no longer exists, // Systemerror - System error. WaitProcess(pid int) (exitStatus int, err error) + + // OOM returns a read-only channel signaling when the container receives an OOM notification. + // + // errors: + // Systemerror - System error. + OOM() (<-chan struct{}, error) } diff --git a/devices/defaults.go b/devices/defaults.go index e0ad0b08..3923ccd5 100644 --- a/devices/defaults.go +++ b/devices/defaults.go @@ -1,147 +1,127 @@ package devices +import "github.com/docker/libcontainer/configs" + var ( // These are devices that are to be both allowed and created. - - DefaultSimpleDevices = []*Device{ + DefaultSimpleDevices = []*configs.Device{ // /dev/null and zero { - Path: "/dev/null", - Type: 'c', - MajorNumber: 1, - MinorNumber: 3, - CgroupPermissions: "rwm", - FileMode: 0666, + Path: "/dev/null", + Type: 'c', + Major: 1, + Minor: 3, + Permissions: "rwm", + FileMode: 0666, }, { - Path: "/dev/zero", - Type: 'c', - MajorNumber: 1, - MinorNumber: 5, - CgroupPermissions: "rwm", - FileMode: 0666, + Path: "/dev/zero", + Type: 'c', + Major: 1, + Minor: 5, + Permissions: "rwm", + FileMode: 0666, }, { - Path: "/dev/full", - Type: 'c', - MajorNumber: 1, - MinorNumber: 7, - CgroupPermissions: "rwm", - FileMode: 0666, + Path: "/dev/full", + Type: 'c', + Major: 1, + Minor: 7, + Permissions: "rwm", + FileMode: 0666, }, // consoles and ttys { - Path: "/dev/tty", - Type: 'c', - MajorNumber: 5, - MinorNumber: 0, - CgroupPermissions: "rwm", - FileMode: 0666, + Path: "/dev/tty", + Type: 'c', + Major: 5, + Minor: 0, + Permissions: "rwm", + FileMode: 0666, }, // /dev/urandom,/dev/random { - Path: "/dev/urandom", - Type: 'c', - MajorNumber: 1, - MinorNumber: 9, - CgroupPermissions: "rwm", - FileMode: 0666, + Path: "/dev/urandom", + Type: 'c', + Major: 1, + Minor: 9, + Permissions: "rwm", + FileMode: 0666, }, { - Path: "/dev/random", - Type: 'c', - MajorNumber: 1, - MinorNumber: 8, - CgroupPermissions: "rwm", - FileMode: 0666, + Path: "/dev/random", + Type: 'c', + Major: 1, + Minor: 8, + Permissions: "rwm", + FileMode: 0666, }, } - - DefaultAllowedDevices = append([]*Device{ + DefaultAllowedDevices = append([]*configs.Device{ // allow mknod for any device { - Type: 'c', - MajorNumber: Wildcard, - MinorNumber: Wildcard, - CgroupPermissions: "m", + Type: 'c', + Major: configs.Wildcard, + Minor: configs.Wildcard, + Permissions: "m", }, { - Type: 'b', - MajorNumber: Wildcard, - MinorNumber: Wildcard, - CgroupPermissions: "m", + Type: 'b', + Major: configs.Wildcard, + Minor: configs.Wildcard, + Permissions: "m", }, { - Path: "/dev/console", - Type: 'c', - MajorNumber: 5, - MinorNumber: 1, - CgroupPermissions: "rwm", + Path: "/dev/console", + Type: 'c', + Major: 5, + Minor: 1, + Permissions: "rwm", }, { - Path: "/dev/tty0", - Type: 'c', - MajorNumber: 4, - MinorNumber: 0, - CgroupPermissions: "rwm", + Path: "/dev/tty0", + Type: 'c', + Major: 4, + Minor: 0, + Permissions: "rwm", }, { - Path: "/dev/tty1", - Type: 'c', - MajorNumber: 4, - MinorNumber: 1, - CgroupPermissions: "rwm", + Path: "/dev/tty1", + Type: 'c', + Major: 4, + Minor: 1, + Permissions: "rwm", }, // /dev/pts/ - pts namespaces are "coming soon" { - Path: "", - Type: 'c', - MajorNumber: 136, - MinorNumber: Wildcard, - CgroupPermissions: "rwm", + Path: "", + Type: 'c', + Major: 136, + Minor: configs.Wildcard, + Permissions: "rwm", }, { - Path: "", - Type: 'c', - MajorNumber: 5, - MinorNumber: 2, - CgroupPermissions: "rwm", + Path: "", + Type: 'c', + Major: 5, + Minor: 2, + Permissions: "rwm", }, // tuntap { - Path: "", - Type: 'c', - MajorNumber: 10, - MinorNumber: 200, - CgroupPermissions: "rwm", + Path: "", + Type: 'c', + Major: 10, + Minor: 200, + Permissions: "rwm", }, - - /*// fuse - { - Path: "", - Type: 'c', - MajorNumber: 10, - MinorNumber: 229, - CgroupPermissions: "rwm", - }, - - // rtc - { - Path: "", - Type: 'c', - MajorNumber: 254, - MinorNumber: 0, - CgroupPermissions: "rwm", - }, - */ }, DefaultSimpleDevices...) - - DefaultAutoCreatedDevices = append([]*Device{ + DefaultAutoCreatedDevices = append([]*configs.Device{ { // /dev/fuse is created but not allowed. // This is to allow java to work. Because java @@ -149,11 +129,11 @@ var ( // https://github.com/docker/docker/issues/514 // https://github.com/docker/docker/issues/2393 // - Path: "/dev/fuse", - Type: 'c', - MajorNumber: 10, - MinorNumber: 229, - CgroupPermissions: "rwm", + Path: "/dev/fuse", + Type: 'c', + Major: 10, + Minor: 229, + Permissions: "rwm", }, }, DefaultSimpleDevices...) ) diff --git a/devices/devices.go b/devices/devices.go index 8e86d952..b3f67aa3 100644 --- a/devices/devices.go +++ b/devices/devices.go @@ -7,14 +7,12 @@ import ( "os" "path/filepath" "syscall" -) -const ( - Wildcard = -1 + "github.com/docker/libcontainer/configs" ) var ( - ErrNotADeviceNode = errors.New("not a device node") + ErrNotADevice = errors.New("not a device node") ) // Testing dependencies @@ -23,45 +21,20 @@ var ( ioutilReadDir = ioutil.ReadDir ) -type Device struct { - Type rune `json:"type,omitempty"` - Path string `json:"path,omitempty"` // It is fine if this is an empty string in the case that you are using Wildcards - MajorNumber int64 `json:"major_number,omitempty"` // Use the wildcard constant for wildcards. - MinorNumber int64 `json:"minor_number,omitempty"` // Use the wildcard constant for wildcards. - CgroupPermissions string `json:"cgroup_permissions,omitempty"` // Typically just "rwm" - FileMode os.FileMode `json:"file_mode,omitempty"` // The permission bits of the file's mode - Uid uint32 `json:"uid,omitempty"` - Gid uint32 `json:"gid,omitempty"` -} - -func GetDeviceNumberString(deviceNumber int64) string { - if deviceNumber == Wildcard { - return "*" - } else { - return fmt.Sprintf("%d", deviceNumber) - } -} - -func (device *Device) GetCgroupAllowString() string { - return fmt.Sprintf("%c %s:%s %s", device.Type, GetDeviceNumberString(device.MajorNumber), GetDeviceNumberString(device.MinorNumber), device.CgroupPermissions) -} - // Given the path to a device and it's cgroup_permissions(which cannot be easilly queried) look up the information about a linux device and return that information as a Device struct. -func GetDevice(path, cgroupPermissions string) (*Device, error) { +func DeviceFromPath(path, permissions string) (*configs.Device, error) { fileInfo, err := osLstat(path) if err != nil { return nil, err } - var ( devType rune mode = fileInfo.Mode() fileModePermissionBits = os.FileMode.Perm(mode) ) - switch { case mode&os.ModeDevice == 0: - return nil, ErrNotADeviceNode + return nil, ErrNotADevice case mode&os.ModeCharDevice != 0: fileModePermissionBits |= syscall.S_IFCHR devType = 'c' @@ -69,36 +42,33 @@ func GetDevice(path, cgroupPermissions string) (*Device, error) { fileModePermissionBits |= syscall.S_IFBLK devType = 'b' } - stat_t, ok := fileInfo.Sys().(*syscall.Stat_t) if !ok { return nil, fmt.Errorf("cannot determine the device number for device %s", path) } devNumber := int(stat_t.Rdev) - - return &Device{ - Type: devType, - Path: path, - MajorNumber: Major(devNumber), - MinorNumber: Minor(devNumber), - CgroupPermissions: cgroupPermissions, - FileMode: fileModePermissionBits, - Uid: stat_t.Uid, - Gid: stat_t.Gid, + return &configs.Device{ + Type: devType, + Path: path, + Major: Major(devNumber), + Minor: Minor(devNumber), + Permissions: permissions, + FileMode: fileModePermissionBits, + Uid: stat_t.Uid, + Gid: stat_t.Gid, }, nil } -func GetHostDeviceNodes() ([]*Device, error) { +func HostDevices() ([]*configs.Device, error) { return getDeviceNodes("/dev") } -func getDeviceNodes(path string) ([]*Device, error) { +func getDeviceNodes(path string) ([]*configs.Device, error) { files, err := ioutilReadDir(path) if err != nil { return nil, err } - - out := []*Device{} + out := []*configs.Device{} for _, f := range files { switch { case f.IsDir(): @@ -117,16 +87,14 @@ func getDeviceNodes(path string) ([]*Device, error) { case f.Name() == "console": continue } - - device, err := GetDevice(filepath.Join(path, f.Name()), "rwm") + device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm") if err != nil { - if err == ErrNotADeviceNode { + if err == ErrNotADevice { continue } return nil, err } out = append(out, device) } - return out, nil } diff --git a/devices/number.go b/devices/number.go index 3aae380b..9e8feb83 100644 --- a/devices/number.go +++ b/devices/number.go @@ -20,7 +20,3 @@ func Major(devNumber int) int64 { func Minor(devNumber int) int64 { return int64((devNumber & 0xff) | ((devNumber >> 12) & 0xfff00)) } - -func Mkdev(majorNumber int64, minorNumber int64) int { - return int((majorNumber << 8) | (minorNumber & 0xff) | ((minorNumber & 0xfff00) << 12)) -} diff --git a/linux_container.go b/linux_container.go index e5c6826a..c3b43c6c 100644 --- a/linux_container.go +++ b/linux_container.go @@ -34,11 +34,10 @@ func (c *linuxContainer) Config() *configs.Config { return c.config } -func (c *linuxContainer) RunState() (configs.RunState, error) { +func (c *linuxContainer) Status() (configs.Status, error) { if c.state.InitPid <= 0 { return configs.Destroyed, nil } - // return Running if the init process is alive err := syscall.Kill(c.state.InitPid, 0) if err != nil { @@ -47,14 +46,10 @@ func (c *linuxContainer) RunState() (configs.RunState, error) { } return 0, err } - if c.config.Cgroups != nil && - c.config.Cgroups.Freezer == cgroups.Frozen { + c.config.Cgroups.Freezer == configs.Frozen { return configs.Paused, nil } - - //FIXME get a cgroup state to check other states - return configs.Running, nil } @@ -67,13 +62,12 @@ func (c *linuxContainer) Processes() ([]int, error) { return pids, nil } -func (c *linuxContainer) Stats() (*ContainerStats, error) { +func (c *linuxContainer) Stats() (*Stats, error) { glog.Info("fetch container stats") var ( err error - stats = &ContainerStats{} + stats = &Stats{} ) - if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil { return stats, newGenericError(err, SystemError) } @@ -84,7 +78,7 @@ func (c *linuxContainer) Stats() (*ContainerStats, error) { } func (c *linuxContainer) StartProcess(config *ProcessConfig) (int, error) { - state, err := c.RunState() + status, err := c.Status() if err != nil { return -1, err } @@ -103,15 +97,13 @@ func (c *linuxContainer) StartProcess(config *ProcessConfig) (int, error) { cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL - if state != configs.Destroyed { + if status != configs.Destroyed { glog.Info("start new container process") return namespaces.ExecIn(config.Args, config.Env, config.Console, cmd, c.config, c.state) } - if err := c.startInitProcess(cmd, config); err != nil { return -1, err } - return c.state.InitPid, nil } @@ -154,25 +146,22 @@ func (c *linuxContainer) startInitProcess(cmd *exec.Cmd, config *ProcessConfig) } func (c *linuxContainer) Destroy() error { - state, err := c.RunState() + status, err := c.Status() if err != nil { return err } - - if state != configs.Destroyed { + if status != configs.Destroyed { return newGenericError(nil, ContainerNotStopped) } - - os.RemoveAll(c.root) - return nil + return os.RemoveAll(c.root) } func (c *linuxContainer) Pause() error { - return c.cgroupManager.Freeze(cgroups.Frozen) + return c.cgroupManager.Freeze(configs.Frozen) } func (c *linuxContainer) Resume() error { - return c.cgroupManager.Freeze(cgroups.Thawed) + return c.cgroupManager.Freeze(configs.Thawed) } func (c *linuxContainer) Signal(pid, signal int) error { @@ -194,3 +183,7 @@ func (c *linuxContainer) WaitProcess(pid int) (int, error) { return int(status), err } + +func (c *linuxContainer) OOM() (<-chan struct{}, error) { + return NotifyOnOOM(c.state) +} diff --git a/linux_factory.go b/linux_factory.go index b88a66de..9f23b154 100644 --- a/linux_factory.go +++ b/linux_factory.go @@ -33,7 +33,6 @@ func New(root string, initArgs []string) (Factory, error) { return nil, newGenericError(err, SystemError) } } - return &linuxFactory{ root: root, initArgs: initArgs, diff --git a/mount/init.go b/mount/init.go index 91a27294..4f994333 100644 --- a/mount/init.go +++ b/mount/init.go @@ -8,11 +8,10 @@ import ( "path/filepath" "syscall" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/label" - "github.com/docker/libcontainer/mount/nodes" ) -// default mount point flags const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV type mount struct { @@ -25,85 +24,60 @@ type mount struct { // InitializeMountNamespace sets up the devices, mount points, and filesystems for use inside a // new mount namespace. -func InitializeMountNamespace(rootfs, console string, sysReadonly bool, hostRootUid, hostRootGid int, mountConfig *MountConfig) error { - var ( - err error - flag = syscall.MS_PRIVATE - ) - - if mountConfig.NoPivotRoot { - flag = syscall.MS_SLAVE +func InitializeMountNamespace(config *configs.Config) (err error) { + if err := prepareRoot(config); err != nil { + return err } - - if err := syscall.Mount("", "/", "", uintptr(flag|syscall.MS_REC), ""); err != nil { - return fmt.Errorf("mounting / with flags %X %s", (flag | syscall.MS_REC), err) + if err := mountSystem(config); err != nil { + return err } - - if err := syscall.Mount(rootfs, rootfs, "bind", syscall.MS_BIND|syscall.MS_REC, ""); err != nil { - return fmt.Errorf("mouting %s as bind %s", rootfs, err) - } - - if err := mountSystem(rootfs, sysReadonly, mountConfig); err != nil { - return fmt.Errorf("mount system %s", err) - } - // apply any user specified mounts within the new mount namespace - for _, m := range mountConfig.Mounts { - if err := m.Mount(rootfs, mountConfig.MountLabel); err != nil { + for _, m := range config.Mounts { + if err := m.Mount(config.RootFs, config.MountLabel); err != nil { return err } } - - if err := nodes.CreateDeviceNodes(rootfs, mountConfig.DeviceNodes); err != nil { - return fmt.Errorf("create device nodes %s", err) - } - - if err := SetupPtmx(rootfs, console, mountConfig.MountLabel, hostRootUid, hostRootGid); err != nil { + if err := createDeviceNodes(config); err != nil { + return err + } + if err := setupPtmx(config); err != nil { return err } - // stdin, stdout and stderr could be pointing to /dev/null from parent namespace. // Re-open them inside this namespace. // FIXME: Need to fix this for user namespaces. - if hostRootUid == 0 { - if err := reOpenDevNull(rootfs); err != nil { - return fmt.Errorf("Failed to reopen /dev/null %s", err) + if 0 == 0 { + if err := reOpenDevNull(config.RootFs); err != nil { + return err } } - - if err := setupDevSymlinks(rootfs); err != nil { - return fmt.Errorf("dev symlinks %s", err) + if err := setupDevSymlinks(config.RootFs); err != nil { + return err } - - if err := syscall.Chdir(rootfs); err != nil { - return fmt.Errorf("chdir into %s %s", rootfs, err) + if err := syscall.Chdir(config.RootFs); err != nil { + return err } - - if mountConfig.NoPivotRoot { - err = MsMoveRoot(rootfs) + if config.NoPivotRoot { + err = msMoveRoot(config.RootFs) } else { - err = PivotRoot(rootfs, mountConfig.PivotDir) + err = pivotRoot(config.RootFs, config.PivotDir) } - if err != nil { return err } - - if mountConfig.ReadonlyFs { - if err := SetReadonly(); err != nil { + if config.ReadonlyFs { + if err := setReadonly(); err != nil { return fmt.Errorf("set readonly %s", err) } } - syscall.Umask(0022) - return nil } // mountSystem sets up linux specific system mounts like mqueue, sys, proc, shm, and devpts // inside the mount namespace -func mountSystem(rootfs string, sysReadonly bool, mountConfig *MountConfig) error { - for _, m := range newSystemMounts(rootfs, mountConfig.MountLabel, sysReadonly) { +func mountSystem(config *configs.Config) error { + for _, m := range newSystemMounts(config.RootFs, config.MountLabel, config.RestrictSys) { if err := os.MkdirAll(m.path, 0755); err != nil && !os.IsExist(err) { return fmt.Errorf("mkdirall %s %s", m.path, err) } @@ -114,28 +88,6 @@ func mountSystem(rootfs string, sysReadonly bool, mountConfig *MountConfig) erro return nil } -func createIfNotExists(path string, isDir bool) error { - if _, err := os.Stat(path); err != nil { - if os.IsNotExist(err) { - if isDir { - if err := os.MkdirAll(path, 0755); err != nil { - return err - } - } else { - if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { - return err - } - f, err := os.OpenFile(path, os.O_CREATE, 0755) - if err != nil { - return err - } - f.Close() - } - } - } - return nil -} - func setupDevSymlinks(rootfs string) error { var links = [][2]string{ {"/proc/self/fd", "/dev/fd"}, @@ -210,3 +162,54 @@ func reOpenDevNull(rootfs string) error { } return nil } + +// Create the device nodes in the container. +func createDeviceNodes(config *configs.Config) error { + oldMask := syscall.Umask(0000) + for _, node := range config.DeviceNodes { + if err := createDeviceNode(config.RootFs, node); err != nil { + syscall.Umask(oldMask) + return err + } + } + syscall.Umask(oldMask) + return nil +} + +// Creates the device node in the rootfs of the container. +func createDeviceNode(rootfs string, node *configs.Device) error { + var ( + dest = filepath.Join(rootfs, node.Path) + parent = filepath.Dir(dest) + ) + if err := os.MkdirAll(parent, 0755); err != nil { + return err + } + fileMode := node.FileMode + switch node.Type { + case 'c': + fileMode |= syscall.S_IFCHR + case 'b': + fileMode |= syscall.S_IFBLK + default: + return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path) + } + if err := syscall.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil && !os.IsExist(err) { + return fmt.Errorf("mknod %s %s", node.Path, err) + } + if err := syscall.Chown(dest, int(node.Uid), int(node.Gid)); err != nil { + return fmt.Errorf("chown %s to %d:%d", node.Path, node.Uid, node.Gid) + } + return nil +} + +func prepareRoot(config *configs.Config) error { + flag := syscall.MS_PRIVATE | syscall.MS_REC + if config.NoPivotRoot { + flag = syscall.MS_SLAVE | syscall.MS_REC + } + if err := syscall.Mount("", "/", "", uintptr(flag), ""); err != nil { + return err + } + return syscall.Mount(config.RootFs, config.RootFs, "bind", syscall.MS_BIND|syscall.MS_REC, "") +} diff --git a/mount/mount_config.go b/mount/mount_config.go deleted file mode 100644 index f19465e6..00000000 --- a/mount/mount_config.go +++ /dev/null @@ -1,33 +0,0 @@ -package mount - -import ( - "errors" - - "github.com/docker/libcontainer/devices" -) - -var ErrUnsupported = errors.New("Unsupported method") - -type MountConfig struct { - // NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs - // This is a common option when the container is running in ramdisk - NoPivotRoot bool `json:"no_pivot_root,omitempty"` - - // PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set. - // When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable. - // This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot. - PivotDir string `json:"pivot_dir,omitempty"` - - // ReadonlyFs will remount the container's rootfs as readonly where only externally mounted - // bind mounts are writtable - ReadonlyFs bool `json:"readonly_fs,omitempty"` - - // Mounts specify additional source and destination paths that will be mounted inside the container's - // rootfs and mount namespace if specified - Mounts []*Mount `json:"mounts,omitempty"` - - // The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well! - DeviceNodes []*devices.Device `json:"device_nodes,omitempty"` - - MountLabel string `json:"mount_label,omitempty"` -} diff --git a/mount/msmoveroot.go b/mount/msmoveroot.go index 94afd3a9..17b73293 100644 --- a/mount/msmoveroot.go +++ b/mount/msmoveroot.go @@ -2,19 +2,14 @@ package mount -import ( - "fmt" - "syscall" -) +import "syscall" -func MsMoveRoot(rootfs string) error { +func msMoveRoot(rootfs string) error { if err := syscall.Mount(rootfs, "/", "", syscall.MS_MOVE, ""); err != nil { - return fmt.Errorf("mount move %s into / %s", rootfs, err) + return err } - if err := syscall.Chroot("."); err != nil { - return fmt.Errorf("chroot . %s", err) + return err } - return syscall.Chdir("/") } diff --git a/mount/nodes/nodes.go b/mount/nodes/nodes.go deleted file mode 100644 index 322c0c0e..00000000 --- a/mount/nodes/nodes.go +++ /dev/null @@ -1,57 +0,0 @@ -// +build linux - -package nodes - -import ( - "fmt" - "os" - "path/filepath" - "syscall" - - "github.com/docker/libcontainer/devices" -) - -// Create the device nodes in the container. -func CreateDeviceNodes(rootfs string, nodesToCreate []*devices.Device) error { - oldMask := syscall.Umask(0000) - defer syscall.Umask(oldMask) - - for _, node := range nodesToCreate { - if err := CreateDeviceNode(rootfs, node); err != nil { - return err - } - } - return nil -} - -// Creates the device node in the rootfs of the container. -func CreateDeviceNode(rootfs string, node *devices.Device) error { - var ( - dest = filepath.Join(rootfs, node.Path) - parent = filepath.Dir(dest) - ) - - if err := os.MkdirAll(parent, 0755); err != nil { - return err - } - - fileMode := node.FileMode - switch node.Type { - case 'c': - fileMode |= syscall.S_IFCHR - case 'b': - fileMode |= syscall.S_IFBLK - default: - return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path) - } - - if err := syscall.Mknod(dest, uint32(fileMode), devices.Mkdev(node.MajorNumber, node.MinorNumber)); err != nil && !os.IsExist(err) { - return fmt.Errorf("mknod %s %s", node.Path, err) - } - - if err := syscall.Chown(dest, int(node.Uid), int(node.Gid)); err != nil { - return fmt.Errorf("chown %s to %d:%d", node.Path, node.Uid, node.Gid) - } - - return nil -} diff --git a/mount/nodes/nodes_unsupported.go b/mount/nodes/nodes_unsupported.go deleted file mode 100644 index 83660715..00000000 --- a/mount/nodes/nodes_unsupported.go +++ /dev/null @@ -1,13 +0,0 @@ -// +build !linux - -package nodes - -import ( - "errors" - - "github.com/docker/libcontainer/devices" -) - -func CreateDeviceNodes(rootfs string, nodesToCreate []*devices.Device) error { - return errors.New("Unsupported method") -} diff --git a/mount/pivotroot.go b/mount/pivotroot.go index acc3be24..3d422774 100644 --- a/mount/pivotroot.go +++ b/mount/pivotroot.go @@ -10,7 +10,7 @@ import ( "syscall" ) -func PivotRoot(rootfs, pivotBaseDir string) error { +func pivotRoot(rootfs, pivotBaseDir string) error { if pivotBaseDir == "" { pivotBaseDir = "/" } @@ -22,20 +22,16 @@ func PivotRoot(rootfs, pivotBaseDir string) error { if err != nil { return fmt.Errorf("can't create pivot_root dir %s, error %v", pivotDir, err) } - if err := syscall.PivotRoot(rootfs, pivotDir); err != nil { return fmt.Errorf("pivot_root %s", err) } - if err := syscall.Chdir("/"); err != nil { return fmt.Errorf("chdir / %s", err) } - // path to pivot dir now changed, update pivotDir = filepath.Join(pivotBaseDir, filepath.Base(pivotDir)) if err := syscall.Unmount(pivotDir, syscall.MNT_DETACH); err != nil { return fmt.Errorf("unmount pivot_root dir %s", err) } - return os.Remove(pivotDir) } diff --git a/mount/ptmx.go b/mount/ptmx.go index 5b558775..25de75b1 100644 --- a/mount/ptmx.go +++ b/mount/ptmx.go @@ -7,24 +7,29 @@ import ( "os" "path/filepath" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/console" ) -func SetupPtmx(rootfs, consolePath, mountLabel string, hostRootUid, hostRootGid int) error { - ptmx := filepath.Join(rootfs, "dev/ptmx") +func setupPtmx(config *configs.Config) error { + ptmx := filepath.Join(config.RootFs, "dev/ptmx") if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { return err } - if err := os.Symlink("pts/ptmx", ptmx); err != nil { return fmt.Errorf("symlink dev ptmx %s", err) } - - if consolePath != "" { - if err := console.Setup(rootfs, consolePath, mountLabel, hostRootUid, hostRootGid); err != nil { + if config.Console != "" { + uid, err := config.HostUID() + if err != nil { return err } + gid, err := config.HostGID() + if err != nil { + return err + } + // TODO: (crosbymichael) get uid/gid + return console.Setup(config.RootFs, config.Console, config.MountLabel, uid, gid) } - return nil } diff --git a/mount/readonly.go b/mount/readonly.go index 9b4a6f70..855c9f99 100644 --- a/mount/readonly.go +++ b/mount/readonly.go @@ -6,6 +6,6 @@ import ( "syscall" ) -func SetReadonly() error { +func setReadonly() error { return syscall.Mount("/", "/", "bind", syscall.MS_BIND|syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_REC, "") } diff --git a/namespaces/exec.go b/namespaces/exec.go index 1c157e3a..ec462cbc 100644 --- a/namespaces/exec.go +++ b/namespaces/exec.go @@ -20,27 +20,23 @@ const ( EXIT_SIGNAL_OFFSET = 128 ) -func executeSetupCmd(args []string, ppid int, container *configs.Config, process *processArgs, networkState *network.NetworkState) error { +func executeSetupCmd(args []string, ppid int, container *configs.Config, process *processArgs, networkState *configs.NetworkState) error { command := exec.Command(args[0], args[1:]...) - parent, child, err := newInitPipe() if err != nil { return err } defer parent.Close() command.ExtraFiles = []*os.File{child} - command.Dir = container.RootFs command.Env = append(command.Env, fmt.Sprintf("_LIBCONTAINER_INITPID=%d", ppid), fmt.Sprintf("_LIBCONTAINER_USERNS=1")) - err = command.Start() child.Close() if err != nil { return err } - s, err := command.Process.Wait() if err != nil { return err @@ -48,36 +44,29 @@ func executeSetupCmd(args []string, ppid int, container *configs.Config, process if !s.Success() { return &exec.ExitError{s} } - decoder := json.NewDecoder(parent) var pid *pid - if err := decoder.Decode(&pid); err != nil { return err } - p, err := os.FindProcess(pid.Pid) if err != nil { return err } - terminate := func(terr error) error { // TODO: log the errors for kill and wait p.Kill() p.Wait() return terr } - // send the state to the container's init process then shutdown writes for the parent if err := json.NewEncoder(parent).Encode(process); err != nil { return terminate(err) } - // shutdown writes for the parent side of the pipe if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil { return terminate(err) } - // wait for the child process to fully complete and receive an error message // if one was encoutered var ierr *initError @@ -87,7 +76,6 @@ func executeSetupCmd(args []string, ppid int, container *configs.Config, process if ierr != nil { return ierr } - s, err = p.Wait() if err != nil { return err @@ -95,7 +83,6 @@ func executeSetupCmd(args []string, ppid int, container *configs.Config, process if !s.Success() { return &exec.ExitError{s} } - return nil } @@ -165,7 +152,7 @@ func Exec(args []string, env []string, console string, command *exec.Cmd, contai } }() - var networkState network.NetworkState + var networkState configs.NetworkState if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil { return terminate(err) } @@ -218,7 +205,7 @@ func killAllPids(m cgroups.Manager) error { var ( procs []*os.Process ) - m.Freeze(cgroups.Frozen) + m.Freeze(configs.Frozen) pids, err := m.GetPids() if err != nil { return err @@ -231,61 +218,13 @@ func killAllPids(m cgroups.Manager) error { p.Kill() } } - m.Freeze(cgroups.Thawed) + m.Freeze(configs.Thawed) for _, p := range procs { p.Wait() } return err } -// Utility function that gets a host ID for a container ID from user namespace map -// if that ID is present in the map. -func hostIDFromMapping(containerID int, uMap []configs.IDMap) (int, bool) { - for _, m := range uMap { - if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) { - hostID := m.HostID + (containerID - m.ContainerID) - return hostID, true - } - } - return -1, false -} - -// Gets the root uid for the process on host which could be non-zero -// when user namespaces are enabled. -func GetHostRootGid(container *configs.Config) (int, error) { - if container.Namespaces.Contains(configs.NEWUSER) { - if container.GidMappings == nil { - return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.") - } - hostRootGid, found := hostIDFromMapping(0, container.GidMappings) - if !found { - return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") - } - return hostRootGid, nil - } - - // Return default root uid 0 - return 0, nil -} - -// Gets the root uid for the process on host which could be non-zero -// when user namespaces are enabled. -func GetHostRootUid(container *configs.Config) (int, error) { - if container.Namespaces.Contains(configs.NEWUSER) { - if container.UidMappings == nil { - return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.") - } - hostRootUid, found := hostIDFromMapping(0, container.UidMappings) - if !found { - return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.") - } - return hostRootUid, nil - } - - // Return default root uid 0 - return 0, nil -} - // Converts IDMap to SysProcIDMap array and adds it to SysProcAttr. func AddUidGidMappings(sys *syscall.SysProcAttr, container *configs.Config) { if container.UidMappings != nil { @@ -309,13 +248,13 @@ func AddUidGidMappings(sys *syscall.SysProcAttr, container *configs.Config) { // InitializeNetworking creates the container's network stack outside of the namespace and moves // interfaces into the container's net namespaces if necessary -func InitializeNetworking(container *configs.Config, nspid int, networkState *network.NetworkState) error { +func InitializeNetworking(container *configs.Config, nspid int, networkState *configs.NetworkState) error { for _, config := range container.Networks { strategy, err := network.GetStrategy(config.Type) if err != nil { return err } - if err := strategy.Create((*network.Network)(config), nspid, networkState); err != nil { + if err := strategy.Create(config, nspid, networkState); err != nil { return err } } diff --git a/namespaces/execin.go b/namespaces/execin.go index 3b51a587..75e70a06 100644 --- a/namespaces/execin.go +++ b/namespaces/execin.go @@ -16,7 +16,6 @@ import ( "github.com/docker/libcontainer/label" "github.com/docker/libcontainer/mount" "github.com/docker/libcontainer/system" - "github.com/docker/libcontainer/utils" ) type pid struct { @@ -173,12 +172,14 @@ func FinalizeSetns(container *configs.Config) error { func SetupContainer(process *processArgs) error { container := process.Config networkState := process.NetworkState - consolePath := process.ConsolePath - rootfs, err := utils.ResolveRootfs(container.RootFs) - if err != nil { - return err - } + // TODO : move to validation + /* + rootfs, err := utils.ResolveRootfs(container.RootFs) + if err != nil { + return err + } + */ // clear the current processes env and replace it with the environment // defined on the container @@ -203,30 +204,12 @@ func SetupContainer(process *processArgs) error { label.Init() - hostRootUid, err := GetHostRootUid(container) - if err != nil { - return fmt.Errorf("failed to get hostRootUid %s", err) - } - - hostRootGid, err := GetHostRootGid(container) - if err != nil { - return fmt.Errorf("failed to get hostRootGid %s", err) - } - // InitializeMountNamespace() can be executed only for a new mount namespace - if (cloneFlags & syscall.CLONE_NEWNS) == 0 { - if container.MountConfig != nil { - return fmt.Errorf("mount config is set without mount namespace") + if (cloneFlags & syscall.CLONE_NEWNS) != 0 { + if err := mount.InitializeMountNamespace(container); err != nil { + return fmt.Errorf("setup mount namespace %s", err) } - } else if err := mount.InitializeMountNamespace(rootfs, - consolePath, - container.RestrictSys, - hostRootUid, - hostRootGid, - (*mount.MountConfig)(container.MountConfig)); err != nil { - return fmt.Errorf("setup mount namespace %s", err) } - return nil } diff --git a/namespaces/init.go b/namespaces/init.go index 58254514..d651352a 100644 --- a/namespaces/init.go +++ b/namespaces/init.go @@ -30,7 +30,7 @@ type processArgs struct { Env []string `json:"environment,omitempty"` ConsolePath string `json:"console_path,omitempty"` Config *configs.Config `json:"config,omitempty"` - NetworkState *network.NetworkState `json:"network_state,omitempty"` + NetworkState *configs.NetworkState `json:"network_state,omitempty"` } // TODO(vishh): This is part of the libcontainer API and it does much more than just namespaces related work. @@ -88,10 +88,13 @@ func initDefault(uncleanRootfs string, process *processArgs) (err error) { container := process.Config networkState := process.NetworkState - rootfs, err := utils.ResolveRootfs(uncleanRootfs) - if err != nil { - return err - } + // TODO: move to validation + /* + rootfs, err := utils.ResolveRootfs(uncleanRootfs) + if err != nil { + return err + } + */ // clear the current processes env and replace it with the environment // defined on the container @@ -139,20 +142,14 @@ func initDefault(uncleanRootfs string, process *processArgs) (err error) { label.Init() // InitializeMountNamespace() can be executed only for a new mount namespace - if (cloneFlags & syscall.CLONE_NEWNS) == 0 { - if container.MountConfig != nil { - return fmt.Errorf("mount config is set without mount namespace") + if (cloneFlags & syscall.CLONE_NEWNS) != 0 { + if err := mount.InitializeMountNamespace(container); err != nil { + return err } - } else if err := mount.InitializeMountNamespace(rootfs, - process.ConsolePath, - container.RestrictSys, - 0, // Default Root Uid - 0, // Default Root Gid - (*mount.MountConfig)(container.MountConfig)); err != nil { - return fmt.Errorf("setup mount namespace %s", err) } if container.Hostname != "" { + // TODO: (crosbymichael) move this to pre spawn validation if (cloneFlags & syscall.CLONE_NEWUTS) == 0 { return fmt.Errorf("unable to set the hostname without UTS namespace") } @@ -357,14 +354,14 @@ func SetupUser(container *configs.Config) error { // setupVethNetwork uses the Network config if it is not nil to initialize // the new veth interface inside the container for use by changing the name to eth0 // setting the MTU and IP address along with the default gateway -func setupNetwork(container *configs.Config, networkState *network.NetworkState) error { +func setupNetwork(container *configs.Config, networkState *configs.NetworkState) error { for _, config := range container.Networks { strategy, err := network.GetStrategy(config.Type) if err != nil { return err } - err1 := strategy.Initialize((*network.Network)(config), networkState) + err1 := strategy.Initialize(config, networkState) if err1 != nil { return err1 } diff --git a/network/loopback.go b/network/loopback.go index 1667b4d8..b208dfc6 100644 --- a/network/loopback.go +++ b/network/loopback.go @@ -4,17 +4,19 @@ package network import ( "fmt" + + "github.com/docker/libcontainer/configs" ) // Loopback is a network strategy that provides a basic loopback device type Loopback struct { } -func (l *Loopback) Create(n *Network, nspid int, networkState *NetworkState) error { +func (l *Loopback) Create(n *configs.Network, nspid int, networkState *configs.NetworkState) error { return nil } -func (l *Loopback) Initialize(config *Network, networkState *NetworkState) error { +func (l *Loopback) Initialize(config *configs.Network, networkState *configs.NetworkState) error { // Do not set the MTU on the loopback interface - use the default. if err := InterfaceUp("lo"); err != nil { return fmt.Errorf("lo up %s", err) diff --git a/network/stats.go b/network/stats.go index e2156c74..329a16bf 100644 --- a/network/stats.go +++ b/network/stats.go @@ -5,6 +5,8 @@ import ( "path/filepath" "strconv" "strings" + + "github.com/docker/libcontainer/configs" ) type NetworkStats struct { @@ -19,7 +21,7 @@ type NetworkStats struct { } // Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo. -func GetStats(networkState *NetworkState) (*NetworkStats, error) { +func GetStats(networkState *configs.NetworkState) (*NetworkStats, error) { // This can happen if the network runtime information is missing - possible if the container was created by an old version of libcontainer. if networkState.VethHost == "" { return &NetworkStats{}, nil diff --git a/network/strategy.go b/network/strategy.go index 019fe62f..bc4a023b 100644 --- a/network/strategy.go +++ b/network/strategy.go @@ -4,6 +4,8 @@ package network import ( "errors" + + "github.com/docker/libcontainer/configs" ) var ( @@ -18,8 +20,8 @@ var strategies = map[string]NetworkStrategy{ // NetworkStrategy represents a specific network configuration for // a container's networking stack type NetworkStrategy interface { - Create(*Network, int, *NetworkState) error - Initialize(*Network, *NetworkState) error + Create(*configs.Network, int, *configs.NetworkState) error + Initialize(*configs.Network, *configs.NetworkState) error } // GetStrategy returns the specific network strategy for the diff --git a/network/types.go b/network/types.go index dcf00420..1ae2e9d5 100644 --- a/network/types.go +++ b/network/types.go @@ -1,50 +1 @@ package network - -// Network defines configuration for a container's networking stack -// -// The network configuration can be omited from a container causing the -// container to be setup with the host's networking stack -type Network struct { - // Type sets the networks type, commonly veth and loopback - Type string `json:"type,omitempty"` - - // The bridge to use. - Bridge string `json:"bridge,omitempty"` - - // Prefix for the veth interfaces. - VethPrefix string `json:"veth_prefix,omitempty"` - - // MacAddress contains the MAC address to set on the network interface - MacAddress string `json:"mac_address,omitempty"` - - // Address contains the IPv4 and mask to set on the network interface - Address string `json:"address,omitempty"` - - // IPv6Address contains the IPv6 and mask to set on the network interface - IPv6Address string `json:"ipv6_address,omitempty"` - - // Gateway sets the gateway address that is used as the default for the interface - Gateway string `json:"gateway,omitempty"` - - // IPv6Gateway sets the ipv6 gateway address that is used as the default for the interface - IPv6Gateway string `json:"ipv6_gateway,omitempty"` - - // Mtu sets the mtu value for the interface and will be mirrored on both the host and - // container's interfaces if a pair is created, specifically in the case of type veth - // Note: This does not apply to loopback interfaces. - Mtu int `json:"mtu,omitempty"` - - // TxQueueLen sets the tx_queuelen value for the interface and will be mirrored on both the host and - // container's interfaces if a pair is created, specifically in the case of type veth - // Note: This does not apply to loopback interfaces. - TxQueueLen int `json:"txqueuelen,omitempty"` -} - -// Struct describing the network specific runtime state that will be maintained by libcontainer for all running containers -// Do not depend on it outside of libcontainer. -type NetworkState struct { - // The name of the veth interface on the Host. - VethHost string `json:"veth_host,omitempty"` - // The name of the veth interface created inside the container for the child. - VethChild string `json:"veth_child,omitempty"` -} diff --git a/network/veth.go b/network/veth.go index 3d7dc872..7bcc3910 100644 --- a/network/veth.go +++ b/network/veth.go @@ -5,6 +5,7 @@ package network import ( "fmt" + "github.com/docker/libcontainer/configs" "github.com/docker/libcontainer/netlink" "github.com/docker/libcontainer/utils" ) @@ -17,7 +18,7 @@ type Veth struct { const defaultDevice = "eth0" -func (v *Veth) Create(n *Network, nspid int, networkState *NetworkState) error { +func (v *Veth) Create(n *configs.Network, nspid int, networkState *configs.NetworkState) error { var ( bridge = n.Bridge prefix = n.VethPrefix @@ -51,7 +52,7 @@ func (v *Veth) Create(n *Network, nspid int, networkState *NetworkState) error { return nil } -func (v *Veth) Initialize(config *Network, networkState *NetworkState) error { +func (v *Veth) Initialize(config *configs.Network, networkState *configs.NetworkState) error { var vethChild = networkState.VethChild if vethChild == "" { return fmt.Errorf("vethChild is not specified") diff --git a/nsinit/exec.go b/nsinit/exec.go index 525991d3..ef7762ed 100644 --- a/nsinit/exec.go +++ b/nsinit/exec.go @@ -26,17 +26,15 @@ var execCommand = cli.Command{ Usage: "execute a new command inside a container", Action: execAction, Flags: []cli.Flag{ - cli.BoolFlag{Name: "list", Usage: "list all registered exec functions"}, - cli.StringFlag{Name: "func", Value: "exec", Usage: "function name to exec inside a container"}, + cli.BoolFlag{Name: "tty", Usage: "allocate a TTY to the container"}, }, } func getContainer(context *cli.Context) (libcontainer.Container, error) { - factory, err := libcontainer.New(context.GlobalString("root"), []string{os.Args[0], "init", "--fd", "3", "--"}) + factory, err := loadFactory(context) if err != nil { log.Fatal(err) } - id := fmt.Sprintf("%x", md5.Sum([]byte(dataPath))) container, err := factory.Load(id) if err != nil && !os.IsNotExist(err) { @@ -72,7 +70,7 @@ func execAction(context *cli.Context) { log.Fatal(err) } - if container.Config().Tty { + if context.Bool("tty") { stdin = nil stdout = nil stderr = nil diff --git a/nsinit/main.go b/nsinit/main.go index 2de7bc3e..034afb6f 100644 --- a/nsinit/main.go +++ b/nsinit/main.go @@ -7,24 +7,16 @@ import ( "github.com/codegangsta/cli" ) -var ( - logPath = os.Getenv("log") -) - func main() { app := cli.NewApp() - app.Name = "nsinit" - app.Version = "0.1" + app.Version = "1" app.Author = "libcontainer maintainers" app.Flags = []cli.Flag{ cli.StringFlag{Name: "nspid"}, cli.StringFlag{Name: "console"}, cli.StringFlag{Name: "root", Value: ".", Usage: "root directory for containers"}, } - - app.Before = preload - app.Commands = []cli.Command{ configCommand, execCommand, @@ -34,7 +26,6 @@ func main() { statsCommand, unpauseCommand, } - if err := app.Run(os.Args); err != nil { log.Fatal(err) } diff --git a/nsinit/oom.go b/nsinit/oom.go index f7a333d4..194c45cb 100644 --- a/nsinit/oom.go +++ b/nsinit/oom.go @@ -4,8 +4,6 @@ import ( "log" "github.com/codegangsta/cli" - "github.com/docker/libcontainer" - "github.com/docker/libcontainer/configs" ) var oomCommand = cli.Command{ @@ -15,11 +13,15 @@ var oomCommand = cli.Command{ } func oomAction(context *cli.Context) { - state, err := configs.GetState(dataPath) + factory, err := loadFactory(context) if err != nil { log.Fatal(err) } - n, err := libcontainer.NotifyOnOOM(state) + container, err := factory.Load("nsinit") + if err != nil { + log.Fatal(err) + } + n, err := container.OOM() if err != nil { log.Fatal(err) } diff --git a/nsinit/utils.go b/nsinit/utils.go index e02a1b3a..c46d98b1 100644 --- a/nsinit/utils.go +++ b/nsinit/utils.go @@ -2,59 +2,27 @@ package main import ( "encoding/json" - "log" "os" "path/filepath" "github.com/codegangsta/cli" + "github.com/docker/libcontainer" "github.com/docker/libcontainer/configs" ) -// rFunc is a function registration for calling after an execin -type rFunc struct { - Usage string - Action func(*configs.Config, []string) -} - func loadConfig() (*configs.Config, error) { f, err := os.Open(filepath.Join(dataPath, "container.json")) if err != nil { return nil, err } defer f.Close() - var container *configs.Config if err := json.NewDecoder(f).Decode(&container); err != nil { return nil, err } - return container, nil } -func openLog(name string) error { - f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0755) - if err != nil { - return err - } - - log.SetOutput(f) - - return nil -} - -func findUserArgs() []string { - i := 0 - for _, a := range os.Args { - i++ - - if a == "--" { - break - } - } - - return os.Args[i:] -} - // loadConfigFromFd loads a container's config from the sync pipe that is provided by // fd 3 when running a process func loadConfigFromFd() (*configs.Config, error) { @@ -68,23 +36,6 @@ func loadConfigFromFd() (*configs.Config, error) { return config, nil } -func preload(context *cli.Context) error { - if logPath != "" { - if err := openLog(logPath); err != nil { - return err - } - } - - return nil -} - -func runFunc(f *rFunc) { - userArgs := findUserArgs() - - config, err := loadConfigFromFd() - if err != nil { - log.Fatalf("unable to receive config from sync pipe: %s", err) - } - - f.Action(config, userArgs) +func loadFactory(context *cli.Context) (libcontainer.Factory, error) { + return libcontainer.New(context.GlobalString("root"), []string{os.Args[0], "init", "--fd", "3", "--"}) } diff --git a/types.go b/types.go deleted file mode 100644 index c341137e..00000000 --- a/types.go +++ /dev/null @@ -1,11 +0,0 @@ -package libcontainer - -import ( - "github.com/docker/libcontainer/cgroups" - "github.com/docker/libcontainer/network" -) - -type ContainerStats struct { - NetworkStats *network.NetworkStats `json:"network_stats,omitempty"` - CgroupStats *cgroups.Stats `json:"cgroup_stats,omitempty"` -}