From b2bec9806f999f55c0d0f9ce1b1064ffbc62f4a9 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Mon, 4 May 2020 22:39:37 +1000 Subject: [PATCH] cgroup: devices: eradicate the Allow/Deny lists These lists have been in the codebase for a very long time, and have been unused for a large portion of that time -- specconv doesn't generate them and the only user of these flags has been tests (which doesn't inspire much confidence). In addition, we had an incorrect implementation of a white-list policy. This wasn't exploitable because all of our users explicitly specify "deny all" as the first rule, but it was a pretty glaring issue that came from the "feature" that users can select whether they prefer a white- or black- list. Fix this by always writing a deny-all rule (which is what our users were doing anyway, to work around this bug). This is one of many changes needed to clean up the devices cgroup code. Signed-off-by: Aleksa Sarai --- libcontainer/README.md | 5 +- libcontainer/cgroups/fs/devices.go | 46 +++------ libcontainer/cgroups/fs/devices_test.go | 88 ++++------------- libcontainer/cgroups/fs2/devices.go | 33 +++---- libcontainer/configs/cgroup_linux.go | 9 +- libcontainer/configs/device_defaults.go | 111 ---------------------- libcontainer/integration/template_test.go | 7 +- libcontainer/rootfs_linux.go | 5 +- libcontainer/specconv/spec_linux.go | 17 +++- 9 files changed, 73 insertions(+), 248 deletions(-) delete mode 100644 libcontainer/configs/device_defaults.go diff --git a/libcontainer/README.md b/libcontainer/README.md index a791ca2d..6803ef56 100644 --- a/libcontainer/README.md +++ b/libcontainer/README.md @@ -155,8 +155,7 @@ config := &configs.Config{ Parent: "system", Resources: &configs.Resources{ MemorySwappiness: nil, - AllowAllDevices: nil, - AllowedDevices: configs.DefaultAllowedDevices, + Devices: specconv.AllowedDevices, }, }, MaskPaths: []string{ @@ -166,7 +165,7 @@ config := &configs.Config{ ReadonlyPaths: []string{ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", }, - Devices: configs.DefaultAutoCreatedDevices, + Devices: specconv.AllowedDevices, Hostname: "testing", Mounts: []*configs.Mount{ { diff --git a/libcontainer/cgroups/fs/devices.go b/libcontainer/cgroups/fs/devices.go index 036c8db7..164bf326 100644 --- a/libcontainer/cgroups/fs/devices.go +++ b/libcontainer/cgroups/fs/devices.go @@ -31,44 +31,24 @@ func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error { return nil } + // The devices list is a whitelist, so we must first deny all devices. + // XXX: This is incorrect for device list updates as it will result in + // spurrious errors in the container, but we will solve that + // separately. + if err := fscommon.WriteFile(path, "devices.deny", "a"); err != nil { + return err + } + devices := cgroup.Resources.Devices - if len(devices) > 0 { - for _, dev := range devices { - file := "devices.deny" - if dev.Allow { - file = "devices.allow" - } - if err := fscommon.WriteFile(path, file, dev.CgroupString()); err != nil { - return err - } + for _, dev := range devices { + file := "devices.deny" + if dev.Allow { + file = "devices.allow" } - return nil - } - if cgroup.Resources.AllowAllDevices != nil { - if *cgroup.Resources.AllowAllDevices == false { - if err := fscommon.WriteFile(path, "devices.deny", "a"); err != nil { - return err - } - - for _, dev := range cgroup.Resources.AllowedDevices { - if err := fscommon.WriteFile(path, "devices.allow", dev.CgroupString()); err != nil { - return err - } - } - return nil - } - - if err := fscommon.WriteFile(path, "devices.allow", "a"); err != nil { + if err := fscommon.WriteFile(path, file, dev.CgroupString()); err != nil { return err } } - - for _, dev := range cgroup.Resources.DeniedDevices { - if err := fscommon.WriteFile(path, "devices.deny", dev.CgroupString()); err != nil { - return err - } - } - return nil } diff --git a/libcontainer/cgroups/fs/devices_test.go b/libcontainer/cgroups/fs/devices_test.go index 648f4a2f..90b9f084 100644 --- a/libcontainer/cgroups/fs/devices_test.go +++ b/libcontainer/cgroups/fs/devices_test.go @@ -9,8 +9,16 @@ import ( "github.com/opencontainers/runc/libcontainer/configs" ) -var ( - allowedDevices = []*configs.Device{ +func TestDevicesSetAllow(t *testing.T) { + helper := NewCgroupTestUtil("devices", t) + defer helper.cleanup() + + helper.writeFileContents(map[string]string{ + "devices.allow": "", + "devices.deny": "", + }) + + helper.CgroupData.config.Resources.Devices = []*configs.Device{ { Path: "/dev/zero", Type: 'c', @@ -18,82 +26,28 @@ var ( Minor: 5, Permissions: "rwm", FileMode: 0666, + Allow: true, }, } - allowedList = "c 1:5 rwm" - deniedDevices = []*configs.Device{ - { - Path: "/dev/null", - Type: 'c', - Major: 1, - Minor: 3, - Permissions: "rwm", - FileMode: 0666, - }, - } - deniedList = "c 1:3 rwm" -) -func TestDevicesSetAllow(t *testing.T) { - helper := NewCgroupTestUtil("devices", t) - defer helper.cleanup() - - helper.writeFileContents(map[string]string{ - "devices.deny": "a", - }) - allowAllDevices := false - helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices - helper.CgroupData.config.Resources.AllowedDevices = allowedDevices - devices := &DevicesGroup{} - if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { - t.Fatal(err) - } - - value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow") - if err != nil { - t.Fatalf("Failed to parse devices.allow - %s", err) - } - - if value != allowedList { - t.Fatal("Got the wrong value, set devices.allow failed.") - } - - // When AllowAllDevices is nil, devices.allow file should not be modified. - helper.CgroupData.config.Resources.AllowAllDevices = nil - if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { - t.Fatal(err) - } - value, err = fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow") - if err != nil { - t.Fatalf("Failed to parse devices.allow - %s", err) - } - if value != allowedList { - t.Fatal("devices policy shouldn't have changed on AllowedAllDevices=nil.") - } -} - -func TestDevicesSetDeny(t *testing.T) { - helper := NewCgroupTestUtil("devices", t) - defer helper.cleanup() - - helper.writeFileContents(map[string]string{ - "devices.allow": "a", - }) - - allowAllDevices := true - helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices - helper.CgroupData.config.Resources.DeniedDevices = deniedDevices devices := &DevicesGroup{} if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil { t.Fatal(err) } + // The default deny rule must be written. value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.deny") if err != nil { - t.Fatalf("Failed to parse devices.deny - %s", err) + t.Fatalf("Failed to parse devices.deny: %s", err) + } + if value[0] != 'a' { + t.Errorf("Got the wrong value (%q), set devices.deny failed.", value) } - if value != deniedList { - t.Fatal("Got the wrong value, set devices.deny failed.") + // Permitted rule must be written. + if value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow"); err != nil { + t.Fatalf("Failed to parse devices.allow: %s", err) + } else if value != "c 1:5 rwm" { + t.Errorf("Got the wrong value (%q), set devices.allow failed.", value) } } diff --git a/libcontainer/cgroups/fs2/devices.go b/libcontainer/cgroups/fs2/devices.go index 6e45c787..96d6daeb 100644 --- a/libcontainer/cgroups/fs2/devices.go +++ b/libcontainer/cgroups/fs2/devices.go @@ -39,26 +39,10 @@ func canSkipEBPFError(cgroup *configs.Cgroup) bool { } func setDevices(dirPath string, cgroup *configs.Cgroup) error { + // XXX: This is currently a white-list (but all callers pass a blacklist of + // devices). This is bad for a whole variety of reasons, but will need + // to be fixed with co-ordinated effort with downstreams. devices := cgroup.Devices - // never set by OCI specconv - if allowAllDevices := cgroup.Resources.AllowAllDevices; allowAllDevices != nil { - if *allowAllDevices == true { - if len(cgroup.Resources.DeniedDevices) != 0 { - return errors.New("libcontainer: can't use DeniedDevices together with AllowAllDevices") - } - return nil - } - // *allowAllDevices=false is still used by the integration test - for _, ad := range cgroup.Resources.AllowedDevices { - d := *ad - d.Allow = true - devices = append(devices, &d) - } - } - if len(cgroup.Resources.DeniedDevices) != 0 { - // never set by OCI specconv - return errors.New("libcontainer DeniedDevices is not supported, use Devices") - } insts, license, err := devicefilter.DeviceFilter(devices) if err != nil { return err @@ -68,6 +52,17 @@ func setDevices(dirPath string, cgroup *configs.Cgroup) error { return errors.Errorf("cannot get dir FD for %s", dirPath) } defer unix.Close(dirFD) + // XXX: This code is currently incorrect when it comes to updating an + // existing cgroup with new rules (new rulesets are just appended to + // the program list because this uses BPF_F_ALLOW_MULTI). If we didn't + // use BPF_F_ALLOW_MULTI we could actually atomically swap the + // programs. + // + // The real issue is that BPF_F_ALLOW_MULTI makes it hard to have a + // race-free blacklist because it acts as a whitelist by default, and + // having a deny-everything program cannot be overriden by other + // programs. You could temporarily insert a deny-everything program + // but that would result in spurrious failures during updates. if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil { if !canSkipEBPFError(cgroup) { return err diff --git a/libcontainer/configs/cgroup_linux.go b/libcontainer/configs/cgroup_linux.go index d1cb7318..1d2dca43 100644 --- a/libcontainer/configs/cgroup_linux.go +++ b/libcontainer/configs/cgroup_linux.go @@ -41,14 +41,7 @@ type Cgroup struct { } type Resources struct { - // If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list. - // Deprecated - AllowAllDevices *bool `json:"allow_all_devices,omitempty"` - // Deprecated - AllowedDevices []*Device `json:"allowed_devices,omitempty"` - // Deprecated - DeniedDevices []*Device `json:"denied_devices,omitempty"` - + // Devices is the set of access rules for devices in the container. Devices []*Device `json:"devices"` // Memory limit (in bytes) diff --git a/libcontainer/configs/device_defaults.go b/libcontainer/configs/device_defaults.go deleted file mode 100644 index e4f423c5..00000000 --- a/libcontainer/configs/device_defaults.go +++ /dev/null @@ -1,111 +0,0 @@ -// +build linux - -package configs - -var ( - // DefaultSimpleDevices are devices that are to be both allowed and created. - DefaultSimpleDevices = []*Device{ - // /dev/null and zero - { - Path: "/dev/null", - Type: 'c', - Major: 1, - Minor: 3, - Permissions: "rwm", - FileMode: 0666, - }, - { - Path: "/dev/zero", - Type: 'c', - Major: 1, - Minor: 5, - Permissions: "rwm", - FileMode: 0666, - }, - - { - Path: "/dev/full", - Type: 'c', - Major: 1, - Minor: 7, - Permissions: "rwm", - FileMode: 0666, - }, - - // consoles and ttys - { - Path: "/dev/tty", - Type: 'c', - Major: 5, - Minor: 0, - Permissions: "rwm", - FileMode: 0666, - }, - - // /dev/urandom,/dev/random - { - Path: "/dev/urandom", - Type: 'c', - Major: 1, - Minor: 9, - Permissions: "rwm", - FileMode: 0666, - }, - { - Path: "/dev/random", - Type: 'c', - Major: 1, - Minor: 8, - Permissions: "rwm", - FileMode: 0666, - }, - } - DefaultAllowedDevices = append([]*Device{ - // allow mknod for any device - { - Type: 'c', - Major: Wildcard, - Minor: Wildcard, - Permissions: "m", - }, - { - Type: 'b', - Major: Wildcard, - Minor: Wildcard, - Permissions: "m", - }, - - { - Path: "/dev/console", - Type: 'c', - Major: 5, - Minor: 1, - Permissions: "rwm", - }, - // /dev/pts/ - pts namespaces are "coming soon" - { - Path: "", - Type: 'c', - Major: 136, - Minor: Wildcard, - Permissions: "rwm", - }, - { - Path: "", - Type: 'c', - Major: 5, - Minor: 2, - Permissions: "rwm", - }, - - // tuntap - { - Path: "", - Type: 'c', - Major: 10, - Minor: 200, - Permissions: "rwm", - }, - }, DefaultSimpleDevices...) - DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...) -) diff --git a/libcontainer/integration/template_test.go b/libcontainer/integration/template_test.go index 5f7cab53..3f3a0811 100644 --- a/libcontainer/integration/template_test.go +++ b/libcontainer/integration/template_test.go @@ -2,6 +2,7 @@ package integration import ( "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/specconv" "golang.org/x/sys/unix" ) @@ -20,7 +21,6 @@ const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV // it uses a network strategy of just setting a loopback interface // and the default setup for devices func newTemplateConfig(rootfs string) *configs.Config { - allowAllDevices := false return &configs.Config{ Rootfs: rootfs, Capabilities: &configs.Capabilities{ @@ -116,8 +116,7 @@ func newTemplateConfig(rootfs string) *configs.Config { Path: "integration/test", Resources: &configs.Resources{ MemorySwappiness: nil, - AllowAllDevices: &allowAllDevices, - AllowedDevices: configs.DefaultAllowedDevices, + Devices: specconv.AllowedDevices, }, }, MaskPaths: []string{ @@ -127,7 +126,7 @@ func newTemplateConfig(rootfs string) *configs.Config { ReadonlyPaths: []string{ "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", }, - Devices: configs.DefaultAutoCreatedDevices, + Devices: specconv.AllowedDevices, Hostname: "integration", Mounts: []*configs.Mount{ { diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index 62fad7ed..40a5686d 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -606,11 +606,14 @@ func bindMountDeviceNode(dest string, node *configs.Device) error { // Creates the device node in the rootfs of the container. func createDeviceNode(rootfs string, node *configs.Device, bind bool) error { + if node.Path == "" { + // The node only exists for cgroup reasons, ignore it here. + return nil + } dest := filepath.Join(rootfs, node.Path) if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil { return err } - if bind { return bindMountDeviceNode(dest, node) } diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index f6212b83..055d5ad4 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -48,7 +48,21 @@ var mountPropagationMapping = map[string]int{ "": 0, } -// AllowedDevices is exposed for devicefilter_test.go +// AllowedDevices is the set of devices which are automatically included for +// all containers. +// +// XXX (cyphar) +// This behaviour is at the very least "questionable" (if not outright +// wrong) according to the runtime-spec. +// +// Yes, we have to include certain devices other than the ones the user +// specifies, but several devices listed here are not part of the spec +// (including "mknod for any device"?!). In addition, these rules are +// appended to the user-provided set which means that users *cannot disable +// this behaviour*. +// +// ... unfortunately I'm too scared to change this now because who knows how +// many people depend on this (incorrect and arguably insecure) behaviour. var AllowedDevices = []*configs.Device{ // allow mknod for any device { @@ -420,7 +434,6 @@ func CreateCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) { // In rootless containers, any attempt to make cgroup changes is likely to fail. // libcontainer will validate this but ignores the error. - c.Resources.AllowedDevices = AllowedDevices if spec.Linux != nil { r := spec.Linux.Resources if r != nil {