cgroup: devices: eradicate the Allow/Deny lists
These lists have been in the codebase for a very long time, and have been unused for a large portion of that time -- specconv doesn't generate them and the only user of these flags has been tests (which doesn't inspire much confidence). In addition, we had an incorrect implementation of a white-list policy. This wasn't exploitable because all of our users explicitly specify "deny all" as the first rule, but it was a pretty glaring issue that came from the "feature" that users can select whether they prefer a white- or black- list. Fix this by always writing a deny-all rule (which is what our users were doing anyway, to work around this bug). This is one of many changes needed to clean up the devices cgroup code. Signed-off-by: Aleksa Sarai <asarai@suse.de>
This commit is contained in:
parent
859a780d6f
commit
b2bec9806f
|
@ -155,8 +155,7 @@ config := &configs.Config{
|
|||
Parent: "system",
|
||||
Resources: &configs.Resources{
|
||||
MemorySwappiness: nil,
|
||||
AllowAllDevices: nil,
|
||||
AllowedDevices: configs.DefaultAllowedDevices,
|
||||
Devices: specconv.AllowedDevices,
|
||||
},
|
||||
},
|
||||
MaskPaths: []string{
|
||||
|
@ -166,7 +165,7 @@ config := &configs.Config{
|
|||
ReadonlyPaths: []string{
|
||||
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
|
||||
},
|
||||
Devices: configs.DefaultAutoCreatedDevices,
|
||||
Devices: specconv.AllowedDevices,
|
||||
Hostname: "testing",
|
||||
Mounts: []*configs.Mount{
|
||||
{
|
||||
|
|
|
@ -31,44 +31,24 @@ func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// The devices list is a whitelist, so we must first deny all devices.
|
||||
// XXX: This is incorrect for device list updates as it will result in
|
||||
// spurrious errors in the container, but we will solve that
|
||||
// separately.
|
||||
if err := fscommon.WriteFile(path, "devices.deny", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
devices := cgroup.Resources.Devices
|
||||
if len(devices) > 0 {
|
||||
for _, dev := range devices {
|
||||
file := "devices.deny"
|
||||
if dev.Allow {
|
||||
file = "devices.allow"
|
||||
}
|
||||
if err := fscommon.WriteFile(path, file, dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, dev := range devices {
|
||||
file := "devices.deny"
|
||||
if dev.Allow {
|
||||
file = "devices.allow"
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if cgroup.Resources.AllowAllDevices != nil {
|
||||
if *cgroup.Resources.AllowAllDevices == false {
|
||||
if err := fscommon.WriteFile(path, "devices.deny", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, dev := range cgroup.Resources.AllowedDevices {
|
||||
if err := fscommon.WriteFile(path, "devices.allow", dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := fscommon.WriteFile(path, "devices.allow", "a"); err != nil {
|
||||
if err := fscommon.WriteFile(path, file, dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, dev := range cgroup.Resources.DeniedDevices {
|
||||
if err := fscommon.WriteFile(path, "devices.deny", dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -9,8 +9,16 @@ import (
|
|||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var (
|
||||
allowedDevices = []*configs.Device{
|
||||
func TestDevicesSetAllow(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("devices", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"devices.allow": "",
|
||||
"devices.deny": "",
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.Devices = []*configs.Device{
|
||||
{
|
||||
Path: "/dev/zero",
|
||||
Type: 'c',
|
||||
|
@ -18,82 +26,28 @@ var (
|
|||
Minor: 5,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
Allow: true,
|
||||
},
|
||||
}
|
||||
allowedList = "c 1:5 rwm"
|
||||
deniedDevices = []*configs.Device{
|
||||
{
|
||||
Path: "/dev/null",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 3,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
}
|
||||
deniedList = "c 1:3 rwm"
|
||||
)
|
||||
|
||||
func TestDevicesSetAllow(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("devices", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"devices.deny": "a",
|
||||
})
|
||||
allowAllDevices := false
|
||||
helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices
|
||||
helper.CgroupData.config.Resources.AllowedDevices = allowedDevices
|
||||
devices := &DevicesGroup{}
|
||||
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse devices.allow - %s", err)
|
||||
}
|
||||
|
||||
if value != allowedList {
|
||||
t.Fatal("Got the wrong value, set devices.allow failed.")
|
||||
}
|
||||
|
||||
// When AllowAllDevices is nil, devices.allow file should not be modified.
|
||||
helper.CgroupData.config.Resources.AllowAllDevices = nil
|
||||
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
value, err = fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse devices.allow - %s", err)
|
||||
}
|
||||
if value != allowedList {
|
||||
t.Fatal("devices policy shouldn't have changed on AllowedAllDevices=nil.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDevicesSetDeny(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("devices", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"devices.allow": "a",
|
||||
})
|
||||
|
||||
allowAllDevices := true
|
||||
helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices
|
||||
helper.CgroupData.config.Resources.DeniedDevices = deniedDevices
|
||||
devices := &DevicesGroup{}
|
||||
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// The default deny rule must be written.
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.deny")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse devices.deny - %s", err)
|
||||
t.Fatalf("Failed to parse devices.deny: %s", err)
|
||||
}
|
||||
if value[0] != 'a' {
|
||||
t.Errorf("Got the wrong value (%q), set devices.deny failed.", value)
|
||||
}
|
||||
|
||||
if value != deniedList {
|
||||
t.Fatal("Got the wrong value, set devices.deny failed.")
|
||||
// Permitted rule must be written.
|
||||
if value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow"); err != nil {
|
||||
t.Fatalf("Failed to parse devices.allow: %s", err)
|
||||
} else if value != "c 1:5 rwm" {
|
||||
t.Errorf("Got the wrong value (%q), set devices.allow failed.", value)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,26 +39,10 @@ func canSkipEBPFError(cgroup *configs.Cgroup) bool {
|
|||
}
|
||||
|
||||
func setDevices(dirPath string, cgroup *configs.Cgroup) error {
|
||||
// XXX: This is currently a white-list (but all callers pass a blacklist of
|
||||
// devices). This is bad for a whole variety of reasons, but will need
|
||||
// to be fixed with co-ordinated effort with downstreams.
|
||||
devices := cgroup.Devices
|
||||
// never set by OCI specconv
|
||||
if allowAllDevices := cgroup.Resources.AllowAllDevices; allowAllDevices != nil {
|
||||
if *allowAllDevices == true {
|
||||
if len(cgroup.Resources.DeniedDevices) != 0 {
|
||||
return errors.New("libcontainer: can't use DeniedDevices together with AllowAllDevices")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
// *allowAllDevices=false is still used by the integration test
|
||||
for _, ad := range cgroup.Resources.AllowedDevices {
|
||||
d := *ad
|
||||
d.Allow = true
|
||||
devices = append(devices, &d)
|
||||
}
|
||||
}
|
||||
if len(cgroup.Resources.DeniedDevices) != 0 {
|
||||
// never set by OCI specconv
|
||||
return errors.New("libcontainer DeniedDevices is not supported, use Devices")
|
||||
}
|
||||
insts, license, err := devicefilter.DeviceFilter(devices)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -68,6 +52,17 @@ func setDevices(dirPath string, cgroup *configs.Cgroup) error {
|
|||
return errors.Errorf("cannot get dir FD for %s", dirPath)
|
||||
}
|
||||
defer unix.Close(dirFD)
|
||||
// XXX: This code is currently incorrect when it comes to updating an
|
||||
// existing cgroup with new rules (new rulesets are just appended to
|
||||
// the program list because this uses BPF_F_ALLOW_MULTI). If we didn't
|
||||
// use BPF_F_ALLOW_MULTI we could actually atomically swap the
|
||||
// programs.
|
||||
//
|
||||
// The real issue is that BPF_F_ALLOW_MULTI makes it hard to have a
|
||||
// race-free blacklist because it acts as a whitelist by default, and
|
||||
// having a deny-everything program cannot be overriden by other
|
||||
// programs. You could temporarily insert a deny-everything program
|
||||
// but that would result in spurrious failures during updates.
|
||||
if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
|
||||
if !canSkipEBPFError(cgroup) {
|
||||
return err
|
||||
|
|
|
@ -41,14 +41,7 @@ type Cgroup struct {
|
|||
}
|
||||
|
||||
type Resources struct {
|
||||
// If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
|
||||
// Deprecated
|
||||
AllowAllDevices *bool `json:"allow_all_devices,omitempty"`
|
||||
// Deprecated
|
||||
AllowedDevices []*Device `json:"allowed_devices,omitempty"`
|
||||
// Deprecated
|
||||
DeniedDevices []*Device `json:"denied_devices,omitempty"`
|
||||
|
||||
// Devices is the set of access rules for devices in the container.
|
||||
Devices []*Device `json:"devices"`
|
||||
|
||||
// Memory limit (in bytes)
|
||||
|
|
|
@ -1,111 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package configs
|
||||
|
||||
var (
|
||||
// DefaultSimpleDevices are devices that are to be both allowed and created.
|
||||
DefaultSimpleDevices = []*Device{
|
||||
// /dev/null and zero
|
||||
{
|
||||
Path: "/dev/null",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 3,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
{
|
||||
Path: "/dev/zero",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 5,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
|
||||
{
|
||||
Path: "/dev/full",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 7,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
|
||||
// consoles and ttys
|
||||
{
|
||||
Path: "/dev/tty",
|
||||
Type: 'c',
|
||||
Major: 5,
|
||||
Minor: 0,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
|
||||
// /dev/urandom,/dev/random
|
||||
{
|
||||
Path: "/dev/urandom",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 9,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
{
|
||||
Path: "/dev/random",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 8,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
}
|
||||
DefaultAllowedDevices = append([]*Device{
|
||||
// allow mknod for any device
|
||||
{
|
||||
Type: 'c',
|
||||
Major: Wildcard,
|
||||
Minor: Wildcard,
|
||||
Permissions: "m",
|
||||
},
|
||||
{
|
||||
Type: 'b',
|
||||
Major: Wildcard,
|
||||
Minor: Wildcard,
|
||||
Permissions: "m",
|
||||
},
|
||||
|
||||
{
|
||||
Path: "/dev/console",
|
||||
Type: 'c',
|
||||
Major: 5,
|
||||
Minor: 1,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
// /dev/pts/ - pts namespaces are "coming soon"
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 136,
|
||||
Minor: Wildcard,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 5,
|
||||
Minor: 2,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
|
||||
// tuntap
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 10,
|
||||
Minor: 200,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
}, DefaultSimpleDevices...)
|
||||
DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...)
|
||||
)
|
|
@ -2,6 +2,7 @@ package integration
|
|||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/specconv"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
@ -20,7 +21,6 @@ const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
|
|||
// it uses a network strategy of just setting a loopback interface
|
||||
// and the default setup for devices
|
||||
func newTemplateConfig(rootfs string) *configs.Config {
|
||||
allowAllDevices := false
|
||||
return &configs.Config{
|
||||
Rootfs: rootfs,
|
||||
Capabilities: &configs.Capabilities{
|
||||
|
@ -116,8 +116,7 @@ func newTemplateConfig(rootfs string) *configs.Config {
|
|||
Path: "integration/test",
|
||||
Resources: &configs.Resources{
|
||||
MemorySwappiness: nil,
|
||||
AllowAllDevices: &allowAllDevices,
|
||||
AllowedDevices: configs.DefaultAllowedDevices,
|
||||
Devices: specconv.AllowedDevices,
|
||||
},
|
||||
},
|
||||
MaskPaths: []string{
|
||||
|
@ -127,7 +126,7 @@ func newTemplateConfig(rootfs string) *configs.Config {
|
|||
ReadonlyPaths: []string{
|
||||
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
|
||||
},
|
||||
Devices: configs.DefaultAutoCreatedDevices,
|
||||
Devices: specconv.AllowedDevices,
|
||||
Hostname: "integration",
|
||||
Mounts: []*configs.Mount{
|
||||
{
|
||||
|
|
|
@ -606,11 +606,14 @@ func bindMountDeviceNode(dest string, node *configs.Device) error {
|
|||
|
||||
// Creates the device node in the rootfs of the container.
|
||||
func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
|
||||
if node.Path == "" {
|
||||
// The node only exists for cgroup reasons, ignore it here.
|
||||
return nil
|
||||
}
|
||||
dest := filepath.Join(rootfs, node.Path)
|
||||
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if bind {
|
||||
return bindMountDeviceNode(dest, node)
|
||||
}
|
||||
|
|
|
@ -48,7 +48,21 @@ var mountPropagationMapping = map[string]int{
|
|||
"": 0,
|
||||
}
|
||||
|
||||
// AllowedDevices is exposed for devicefilter_test.go
|
||||
// AllowedDevices is the set of devices which are automatically included for
|
||||
// all containers.
|
||||
//
|
||||
// XXX (cyphar)
|
||||
// This behaviour is at the very least "questionable" (if not outright
|
||||
// wrong) according to the runtime-spec.
|
||||
//
|
||||
// Yes, we have to include certain devices other than the ones the user
|
||||
// specifies, but several devices listed here are not part of the spec
|
||||
// (including "mknod for any device"?!). In addition, these rules are
|
||||
// appended to the user-provided set which means that users *cannot disable
|
||||
// this behaviour*.
|
||||
//
|
||||
// ... unfortunately I'm too scared to change this now because who knows how
|
||||
// many people depend on this (incorrect and arguably insecure) behaviour.
|
||||
var AllowedDevices = []*configs.Device{
|
||||
// allow mknod for any device
|
||||
{
|
||||
|
@ -420,7 +434,6 @@ func CreateCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) {
|
|||
|
||||
// In rootless containers, any attempt to make cgroup changes is likely to fail.
|
||||
// libcontainer will validate this but ignores the error.
|
||||
c.Resources.AllowedDevices = AllowedDevices
|
||||
if spec.Linux != nil {
|
||||
r := spec.Linux.Resources
|
||||
if r != nil {
|
||||
|
|
Loading…
Reference in New Issue