cgroup: devices: eradicate the Allow/Deny lists

These lists have been in the codebase for a very long time, and have
been unused for a large portion of that time -- specconv doesn't
generate them and the only user of these flags has been tests (which
doesn't inspire much confidence).

In addition, we had an incorrect implementation of a white-list policy.
This wasn't exploitable because all of our users explicitly specify
"deny all" as the first rule, but it was a pretty glaring issue that
came from the "feature" that users can select whether they prefer a
white- or black- list. Fix this by always writing a deny-all rule (which
is what our users were doing anyway, to work around this bug).

This is one of many changes needed to clean up the devices cgroup code.

Signed-off-by: Aleksa Sarai <asarai@suse.de>
This commit is contained in:
Aleksa Sarai 2020-05-04 22:39:37 +10:00
parent 859a780d6f
commit b2bec9806f
No known key found for this signature in database
GPG Key ID: 9E18AA267DDB8DB4
9 changed files with 73 additions and 248 deletions

View File

@ -155,8 +155,7 @@ config := &configs.Config{
Parent: "system",
Resources: &configs.Resources{
MemorySwappiness: nil,
AllowAllDevices: nil,
AllowedDevices: configs.DefaultAllowedDevices,
Devices: specconv.AllowedDevices,
},
},
MaskPaths: []string{
@ -166,7 +165,7 @@ config := &configs.Config{
ReadonlyPaths: []string{
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
},
Devices: configs.DefaultAutoCreatedDevices,
Devices: specconv.AllowedDevices,
Hostname: "testing",
Mounts: []*configs.Mount{
{

View File

@ -31,44 +31,24 @@ func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
return nil
}
// The devices list is a whitelist, so we must first deny all devices.
// XXX: This is incorrect for device list updates as it will result in
// spurrious errors in the container, but we will solve that
// separately.
if err := fscommon.WriteFile(path, "devices.deny", "a"); err != nil {
return err
}
devices := cgroup.Resources.Devices
if len(devices) > 0 {
for _, dev := range devices {
file := "devices.deny"
if dev.Allow {
file = "devices.allow"
}
if err := fscommon.WriteFile(path, file, dev.CgroupString()); err != nil {
return err
}
for _, dev := range devices {
file := "devices.deny"
if dev.Allow {
file = "devices.allow"
}
return nil
}
if cgroup.Resources.AllowAllDevices != nil {
if *cgroup.Resources.AllowAllDevices == false {
if err := fscommon.WriteFile(path, "devices.deny", "a"); err != nil {
return err
}
for _, dev := range cgroup.Resources.AllowedDevices {
if err := fscommon.WriteFile(path, "devices.allow", dev.CgroupString()); err != nil {
return err
}
}
return nil
}
if err := fscommon.WriteFile(path, "devices.allow", "a"); err != nil {
if err := fscommon.WriteFile(path, file, dev.CgroupString()); err != nil {
return err
}
}
for _, dev := range cgroup.Resources.DeniedDevices {
if err := fscommon.WriteFile(path, "devices.deny", dev.CgroupString()); err != nil {
return err
}
}
return nil
}

View File

@ -9,8 +9,16 @@ import (
"github.com/opencontainers/runc/libcontainer/configs"
)
var (
allowedDevices = []*configs.Device{
func TestDevicesSetAllow(t *testing.T) {
helper := NewCgroupTestUtil("devices", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"devices.allow": "",
"devices.deny": "",
})
helper.CgroupData.config.Resources.Devices = []*configs.Device{
{
Path: "/dev/zero",
Type: 'c',
@ -18,82 +26,28 @@ var (
Minor: 5,
Permissions: "rwm",
FileMode: 0666,
Allow: true,
},
}
allowedList = "c 1:5 rwm"
deniedDevices = []*configs.Device{
{
Path: "/dev/null",
Type: 'c',
Major: 1,
Minor: 3,
Permissions: "rwm",
FileMode: 0666,
},
}
deniedList = "c 1:3 rwm"
)
func TestDevicesSetAllow(t *testing.T) {
helper := NewCgroupTestUtil("devices", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"devices.deny": "a",
})
allowAllDevices := false
helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices
helper.CgroupData.config.Resources.AllowedDevices = allowedDevices
devices := &DevicesGroup{}
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow")
if err != nil {
t.Fatalf("Failed to parse devices.allow - %s", err)
}
if value != allowedList {
t.Fatal("Got the wrong value, set devices.allow failed.")
}
// When AllowAllDevices is nil, devices.allow file should not be modified.
helper.CgroupData.config.Resources.AllowAllDevices = nil
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
value, err = fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow")
if err != nil {
t.Fatalf("Failed to parse devices.allow - %s", err)
}
if value != allowedList {
t.Fatal("devices policy shouldn't have changed on AllowedAllDevices=nil.")
}
}
func TestDevicesSetDeny(t *testing.T) {
helper := NewCgroupTestUtil("devices", t)
defer helper.cleanup()
helper.writeFileContents(map[string]string{
"devices.allow": "a",
})
allowAllDevices := true
helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices
helper.CgroupData.config.Resources.DeniedDevices = deniedDevices
devices := &DevicesGroup{}
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
t.Fatal(err)
}
// The default deny rule must be written.
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.deny")
if err != nil {
t.Fatalf("Failed to parse devices.deny - %s", err)
t.Fatalf("Failed to parse devices.deny: %s", err)
}
if value[0] != 'a' {
t.Errorf("Got the wrong value (%q), set devices.deny failed.", value)
}
if value != deniedList {
t.Fatal("Got the wrong value, set devices.deny failed.")
// Permitted rule must be written.
if value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow"); err != nil {
t.Fatalf("Failed to parse devices.allow: %s", err)
} else if value != "c 1:5 rwm" {
t.Errorf("Got the wrong value (%q), set devices.allow failed.", value)
}
}

View File

@ -39,26 +39,10 @@ func canSkipEBPFError(cgroup *configs.Cgroup) bool {
}
func setDevices(dirPath string, cgroup *configs.Cgroup) error {
// XXX: This is currently a white-list (but all callers pass a blacklist of
// devices). This is bad for a whole variety of reasons, but will need
// to be fixed with co-ordinated effort with downstreams.
devices := cgroup.Devices
// never set by OCI specconv
if allowAllDevices := cgroup.Resources.AllowAllDevices; allowAllDevices != nil {
if *allowAllDevices == true {
if len(cgroup.Resources.DeniedDevices) != 0 {
return errors.New("libcontainer: can't use DeniedDevices together with AllowAllDevices")
}
return nil
}
// *allowAllDevices=false is still used by the integration test
for _, ad := range cgroup.Resources.AllowedDevices {
d := *ad
d.Allow = true
devices = append(devices, &d)
}
}
if len(cgroup.Resources.DeniedDevices) != 0 {
// never set by OCI specconv
return errors.New("libcontainer DeniedDevices is not supported, use Devices")
}
insts, license, err := devicefilter.DeviceFilter(devices)
if err != nil {
return err
@ -68,6 +52,17 @@ func setDevices(dirPath string, cgroup *configs.Cgroup) error {
return errors.Errorf("cannot get dir FD for %s", dirPath)
}
defer unix.Close(dirFD)
// XXX: This code is currently incorrect when it comes to updating an
// existing cgroup with new rules (new rulesets are just appended to
// the program list because this uses BPF_F_ALLOW_MULTI). If we didn't
// use BPF_F_ALLOW_MULTI we could actually atomically swap the
// programs.
//
// The real issue is that BPF_F_ALLOW_MULTI makes it hard to have a
// race-free blacklist because it acts as a whitelist by default, and
// having a deny-everything program cannot be overriden by other
// programs. You could temporarily insert a deny-everything program
// but that would result in spurrious failures during updates.
if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
if !canSkipEBPFError(cgroup) {
return err

View File

@ -41,14 +41,7 @@ type Cgroup struct {
}
type Resources struct {
// If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
// Deprecated
AllowAllDevices *bool `json:"allow_all_devices,omitempty"`
// Deprecated
AllowedDevices []*Device `json:"allowed_devices,omitempty"`
// Deprecated
DeniedDevices []*Device `json:"denied_devices,omitempty"`
// Devices is the set of access rules for devices in the container.
Devices []*Device `json:"devices"`
// Memory limit (in bytes)

View File

@ -1,111 +0,0 @@
// +build linux
package configs
var (
// DefaultSimpleDevices are devices that are to be both allowed and created.
DefaultSimpleDevices = []*Device{
// /dev/null and zero
{
Path: "/dev/null",
Type: 'c',
Major: 1,
Minor: 3,
Permissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/zero",
Type: 'c',
Major: 1,
Minor: 5,
Permissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/full",
Type: 'c',
Major: 1,
Minor: 7,
Permissions: "rwm",
FileMode: 0666,
},
// consoles and ttys
{
Path: "/dev/tty",
Type: 'c',
Major: 5,
Minor: 0,
Permissions: "rwm",
FileMode: 0666,
},
// /dev/urandom,/dev/random
{
Path: "/dev/urandom",
Type: 'c',
Major: 1,
Minor: 9,
Permissions: "rwm",
FileMode: 0666,
},
{
Path: "/dev/random",
Type: 'c',
Major: 1,
Minor: 8,
Permissions: "rwm",
FileMode: 0666,
},
}
DefaultAllowedDevices = append([]*Device{
// allow mknod for any device
{
Type: 'c',
Major: Wildcard,
Minor: Wildcard,
Permissions: "m",
},
{
Type: 'b',
Major: Wildcard,
Minor: Wildcard,
Permissions: "m",
},
{
Path: "/dev/console",
Type: 'c',
Major: 5,
Minor: 1,
Permissions: "rwm",
},
// /dev/pts/ - pts namespaces are "coming soon"
{
Path: "",
Type: 'c',
Major: 136,
Minor: Wildcard,
Permissions: "rwm",
},
{
Path: "",
Type: 'c',
Major: 5,
Minor: 2,
Permissions: "rwm",
},
// tuntap
{
Path: "",
Type: 'c',
Major: 10,
Minor: 200,
Permissions: "rwm",
},
}, DefaultSimpleDevices...)
DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...)
)

View File

@ -2,6 +2,7 @@ package integration
import (
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/specconv"
"golang.org/x/sys/unix"
)
@ -20,7 +21,6 @@ const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
// it uses a network strategy of just setting a loopback interface
// and the default setup for devices
func newTemplateConfig(rootfs string) *configs.Config {
allowAllDevices := false
return &configs.Config{
Rootfs: rootfs,
Capabilities: &configs.Capabilities{
@ -116,8 +116,7 @@ func newTemplateConfig(rootfs string) *configs.Config {
Path: "integration/test",
Resources: &configs.Resources{
MemorySwappiness: nil,
AllowAllDevices: &allowAllDevices,
AllowedDevices: configs.DefaultAllowedDevices,
Devices: specconv.AllowedDevices,
},
},
MaskPaths: []string{
@ -127,7 +126,7 @@ func newTemplateConfig(rootfs string) *configs.Config {
ReadonlyPaths: []string{
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
},
Devices: configs.DefaultAutoCreatedDevices,
Devices: specconv.AllowedDevices,
Hostname: "integration",
Mounts: []*configs.Mount{
{

View File

@ -606,11 +606,14 @@ func bindMountDeviceNode(dest string, node *configs.Device) error {
// Creates the device node in the rootfs of the container.
func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
if node.Path == "" {
// The node only exists for cgroup reasons, ignore it here.
return nil
}
dest := filepath.Join(rootfs, node.Path)
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
return err
}
if bind {
return bindMountDeviceNode(dest, node)
}

View File

@ -48,7 +48,21 @@ var mountPropagationMapping = map[string]int{
"": 0,
}
// AllowedDevices is exposed for devicefilter_test.go
// AllowedDevices is the set of devices which are automatically included for
// all containers.
//
// XXX (cyphar)
// This behaviour is at the very least "questionable" (if not outright
// wrong) according to the runtime-spec.
//
// Yes, we have to include certain devices other than the ones the user
// specifies, but several devices listed here are not part of the spec
// (including "mknod for any device"?!). In addition, these rules are
// appended to the user-provided set which means that users *cannot disable
// this behaviour*.
//
// ... unfortunately I'm too scared to change this now because who knows how
// many people depend on this (incorrect and arguably insecure) behaviour.
var AllowedDevices = []*configs.Device{
// allow mknod for any device
{
@ -420,7 +434,6 @@ func CreateCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) {
// In rootless containers, any attempt to make cgroup changes is likely to fail.
// libcontainer will validate this but ignores the error.
c.Resources.AllowedDevices = AllowedDevices
if spec.Linux != nil {
r := spec.Linux.Resources
if r != nil {