rootless: add rootless cgroup manager
The rootless cgroup manager acts as a noop for all set and apply operations. It is just used for rootless setups. Currently this is far too simple (we need to add opportunistic cgroup management), but is good enough as a first-pass at a noop cgroup manager. Signed-off-by: Aleksa Sarai <asarai@suse.de>
This commit is contained in:
parent
d2f49696b0
commit
baeef29858
|
@ -267,25 +267,8 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
|
|||
}, nil
|
||||
}
|
||||
|
||||
func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) {
|
||||
// Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating
|
||||
// process could in container and shared pid namespace with host, and
|
||||
// /proc/1/cgroup could point to whole other world of cgroups.
|
||||
initPath, err := cgroups.GetThisCgroupDir(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// This is needed for nested containers, because in /proc/self/cgroup we
|
||||
// see pathes from host, which don't exist in container.
|
||||
relDir, err := filepath.Rel(root, initPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return filepath.Join(mountpoint, relDir), nil
|
||||
}
|
||||
|
||||
func (raw *cgroupData) path(subsystem string) (string, error) {
|
||||
mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem)
|
||||
mnt, err := cgroups.FindCgroupMountpoint(subsystem)
|
||||
// If we didn't mount the subsystem, there is no point we make the path.
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
@ -297,7 +280,10 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
|
|||
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
|
||||
}
|
||||
|
||||
parentPath, err := raw.parentPath(subsystem, mnt, root)
|
||||
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
|
||||
// process could in container and shared pid namespace with host, and
|
||||
// /proc/1/cgroup could point to whole other world of cgroups.
|
||||
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
// +build linux
|
||||
|
||||
package rootless
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs/validate"
|
||||
)
|
||||
|
||||
// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code
|
||||
// needlessly. We should probably export this list.
|
||||
|
||||
var subsystems = []subsystem{
|
||||
&fs.CpusetGroup{},
|
||||
&fs.DevicesGroup{},
|
||||
&fs.MemoryGroup{},
|
||||
&fs.CpuGroup{},
|
||||
&fs.CpuacctGroup{},
|
||||
&fs.PidsGroup{},
|
||||
&fs.BlkioGroup{},
|
||||
&fs.HugetlbGroup{},
|
||||
&fs.NetClsGroup{},
|
||||
&fs.NetPrioGroup{},
|
||||
&fs.PerfEventGroup{},
|
||||
&fs.FreezerGroup{},
|
||||
&fs.NameGroup{GroupName: "name=systemd"},
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
// Name returns the name of the subsystem.
|
||||
Name() string
|
||||
|
||||
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
|
||||
GetStats(path string, stats *cgroups.Stats) error
|
||||
}
|
||||
|
||||
// The noop cgroup manager is used for rootless containers, because we currently
|
||||
// cannot manage cgroups if we are in a rootless setup. This manager is chosen
|
||||
// by factory if we are in rootless mode. We error out if any cgroup options are
|
||||
// set in the config -- this may change in the future with upcoming kernel features
|
||||
// like the cgroup namespace.
|
||||
|
||||
type Manager struct {
|
||||
Cgroups *configs.Cgroup
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) error {
|
||||
// If there are no cgroup settings, there's nothing to do.
|
||||
if m.Cgroups == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// We can't set paths.
|
||||
// TODO(cyphar): Implement the case where the runner of a rootless container
|
||||
// owns their own cgroup, which would allow us to set up a
|
||||
// cgroup for each path.
|
||||
if m.Cgroups.Paths != nil {
|
||||
return fmt.Errorf("cannot change cgroup path in rootless container")
|
||||
}
|
||||
|
||||
// We load the paths into the manager.
|
||||
paths := make(map[string]string)
|
||||
for _, sys := range subsystems {
|
||||
name := sys.Name()
|
||||
|
||||
path, err := cgroups.GetOwnCgroupPath(name)
|
||||
if err != nil {
|
||||
// Ignore paths we couldn't resolve.
|
||||
continue
|
||||
}
|
||||
|
||||
paths[name] = path
|
||||
}
|
||||
|
||||
m.Paths = paths
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
return m.Paths
|
||||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
// We have to re-do the validation here, since someone might decide to
|
||||
// update a rootless container.
|
||||
return validate.New().Validate(container)
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
dir, err := cgroups.GetOwnCgroupPath("devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetPids(dir)
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
dir, err := cgroups.GetOwnCgroupPath("devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetAllPids(dir)
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
// TODO(cyphar): We can make this work if we figure out a way to allow usage
|
||||
// of cgroups with a rootless container. While this doesn't
|
||||
// actually require write access to a cgroup directory, the
|
||||
// statistics are not useful if they can be affected by
|
||||
// non-container processes.
|
||||
return nil, fmt.Errorf("cannot get cgroup stats in rootless container")
|
||||
}
|
||||
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
// TODO(cyphar): We can make this work if we figure out a way to allow usage
|
||||
// of cgroups with a rootless container.
|
||||
return fmt.Errorf("cannot use freezer cgroup in rootless container")
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
// We don't have to do anything here because we didn't do any setup.
|
||||
return nil
|
||||
}
|
|
@ -426,7 +426,7 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
|
|||
return "", err
|
||||
}
|
||||
|
||||
initPath, err := cgroups.GetInitCgroupDir(subsystem)
|
||||
initPath, err := cgroups.GetInitCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
|
|
@ -109,7 +109,7 @@ type Mount struct {
|
|||
Subsystems []string
|
||||
}
|
||||
|
||||
func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
|
||||
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
|
||||
if len(m.Subsystems) == 0 {
|
||||
return "", fmt.Errorf("no subsystem for mount")
|
||||
}
|
||||
|
@ -203,8 +203,8 @@ func GetAllSubsystems() ([]string, error) {
|
|||
return subsystems, nil
|
||||
}
|
||||
|
||||
// GetThisCgroupDir returns the relative path to the cgroup docker is running in.
|
||||
func GetThisCgroupDir(subsystem string) (string, error) {
|
||||
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
|
||||
func GetOwnCgroup(subsystem string) (string, error) {
|
||||
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
@ -213,8 +213,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
|
|||
return getControllerPath(subsystem, cgroups)
|
||||
}
|
||||
|
||||
func GetInitCgroupDir(subsystem string) (string, error) {
|
||||
func GetOwnCgroupPath(subsystem string) (string, error) {
|
||||
cgroup, err := GetOwnCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getCgroupPathHelper(subsystem, cgroup)
|
||||
}
|
||||
|
||||
func GetInitCgroup(subsystem string) (string, error) {
|
||||
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
@ -223,6 +231,31 @@ func GetInitCgroupDir(subsystem string) (string, error) {
|
|||
return getControllerPath(subsystem, cgroups)
|
||||
}
|
||||
|
||||
func GetInitCgroupPath(subsystem string) (string, error) {
|
||||
cgroup, err := GetInitCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getCgroupPathHelper(subsystem, cgroup)
|
||||
}
|
||||
|
||||
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
|
||||
mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// This is needed for nested containers, because in /proc/self/cgroup we
|
||||
// see pathes from host, which don't exist in container.
|
||||
relCgroup, err := filepath.Rel(root, cgroup)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(mnt, relCgroup), nil
|
||||
}
|
||||
|
||||
func readProcsFile(dir string) ([]int, error) {
|
||||
f, err := os.Open(filepath.Join(dir, CgroupProcesses))
|
||||
if err != nil {
|
||||
|
|
|
@ -520,10 +520,18 @@ func (c *linuxContainer) Resume() error {
|
|||
}
|
||||
|
||||
func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) {
|
||||
// XXX(cyphar): This requires cgroups.
|
||||
if c.config.Rootless {
|
||||
return nil, fmt.Errorf("cannot get OOM notifications from rootless container")
|
||||
}
|
||||
return notifyOnOOM(c.cgroupManager.GetPaths())
|
||||
}
|
||||
|
||||
func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) {
|
||||
// XXX(cyphar): This requires cgroups.
|
||||
if c.config.Rootless {
|
||||
return nil, fmt.Errorf("cannot get memory pressure notifications from rootless container")
|
||||
}
|
||||
return notifyMemoryPressure(c.cgroupManager.GetPaths(), level)
|
||||
}
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@ import (
|
|||
"github.com/docker/docker/pkg/mount"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/rootless"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs/validate"
|
||||
|
@ -73,6 +74,20 @@ func Cgroupfs(l *LinuxFactory) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// RootlessCgroups is an options func to configure a LinuxFactory to
|
||||
// return containers that use the "rootless" cgroup manager, which will
|
||||
// fail to do any operations not possible to do with an unprivileged user.
|
||||
// It should only be used in conjunction with rootless containers.
|
||||
func RootlessCgroups(l *LinuxFactory) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &rootless.Manager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
|
||||
func TmpfsRoot(l *LinuxFactory) error {
|
||||
mounted, err := mount.Mounted(l.Root)
|
||||
|
@ -169,6 +184,9 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
|
|||
if err := os.Chown(containerRoot, uid, gid); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
if config.Rootless {
|
||||
RootlessCgroups(l)
|
||||
}
|
||||
c := &linuxContainer{
|
||||
id: id,
|
||||
root: containerRoot,
|
||||
|
@ -195,6 +213,10 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
|
|||
processStartTime: state.InitProcessStartTime,
|
||||
fds: state.ExternalDescriptors,
|
||||
}
|
||||
// We have to use the RootlessManager.
|
||||
if state.Rootless {
|
||||
RootlessCgroups(l)
|
||||
}
|
||||
c := &linuxContainer{
|
||||
initProcess: r,
|
||||
initProcessStartTime: state.InitProcessStartTime,
|
||||
|
|
|
@ -254,15 +254,14 @@ func (p *initProcess) start() error {
|
|||
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
|
||||
}
|
||||
p.setExternalDescriptors(fds)
|
||||
if !p.container.config.Rootless {
|
||||
// Do this before syncing with child so that no children can escape the
|
||||
// cgroup. We can't do this if we're not running as root.
|
||||
if err := p.manager.Apply(p.pid()); err != nil {
|
||||
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
|
||||
}
|
||||
// Do this before syncing with child so that no children can escape the
|
||||
// cgroup. We don't need to worry about not doing this and not being root
|
||||
// because we'd be using the rootless cgroup manager in that case.
|
||||
if err := p.manager.Apply(p.pid()); err != nil {
|
||||
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
|
||||
}
|
||||
defer func() {
|
||||
if err != nil && !p.container.config.Rootless {
|
||||
if err != nil {
|
||||
// TODO: should not be the responsibility to call here
|
||||
p.manager.Destroy()
|
||||
}
|
||||
|
@ -281,11 +280,8 @@ func (p *initProcess) start() error {
|
|||
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
|
||||
switch sync.Type {
|
||||
case procReady:
|
||||
// We can't set cgroups if we're in a rootless container.
|
||||
if !p.container.config.Rootless {
|
||||
if err := p.manager.Set(p.config.Config); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
|
||||
}
|
||||
if err := p.manager.Set(p.config.Config); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
|
||||
}
|
||||
// set rlimits, this has to be done here because we lose permissions
|
||||
// to raise the limits once we enter a user-namespace
|
||||
|
|
|
@ -348,7 +348,7 @@ func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
|
|||
var binds []*configs.Mount
|
||||
|
||||
for _, mm := range mounts {
|
||||
dir, err := mm.GetThisCgroupDir(cgroupPaths)
|
||||
dir, err := mm.GetOwnCgroup(cgroupPaths)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue