Merge pull request #2391 from cyphar/devices-cgroup
cgroup: devices: major cleanups and minimal transition rules
This commit is contained in:
commit
3f1e886991
|
@ -65,7 +65,7 @@ func bail(err error) {
|
|||
os.Exit(1)
|
||||
}
|
||||
|
||||
func handleSingle(path string) error {
|
||||
func handleSingle(path string, noStdin bool) error {
|
||||
// Open a socket.
|
||||
ln, err := net.Listen("unix", path)
|
||||
if err != nil {
|
||||
|
@ -113,10 +113,12 @@ func handleSingle(path string) error {
|
|||
io.Copy(os.Stdout, c)
|
||||
quitChan <- struct{}{}
|
||||
}()
|
||||
go func() {
|
||||
io.Copy(c, os.Stdin)
|
||||
quitChan <- struct{}{}
|
||||
}()
|
||||
if !noStdin {
|
||||
go func() {
|
||||
io.Copy(c, os.Stdin)
|
||||
quitChan <- struct{}{}
|
||||
}()
|
||||
}
|
||||
|
||||
// Only close the master fd once we've stopped copying.
|
||||
<-quitChan
|
||||
|
@ -201,6 +203,10 @@ func main() {
|
|||
Value: "",
|
||||
Usage: "Path to write daemon process ID to",
|
||||
},
|
||||
cli.BoolFlag{
|
||||
Name: "no-stdin",
|
||||
Usage: "Disable stdin handling (no-op for null mode)",
|
||||
},
|
||||
}
|
||||
|
||||
app.Action = func(ctx *cli.Context) error {
|
||||
|
@ -218,9 +224,10 @@ func main() {
|
|||
}
|
||||
}
|
||||
|
||||
noStdin := ctx.Bool("no-stdin")
|
||||
switch ctx.String("mode") {
|
||||
case "single":
|
||||
if err := handleSingle(path); err != nil {
|
||||
if err := handleSingle(path, noStdin); err != nil {
|
||||
return err
|
||||
}
|
||||
case "null":
|
||||
|
|
|
@ -155,8 +155,7 @@ config := &configs.Config{
|
|||
Parent: "system",
|
||||
Resources: &configs.Resources{
|
||||
MemorySwappiness: nil,
|
||||
AllowAllDevices: nil,
|
||||
AllowedDevices: configs.DefaultAllowedDevices,
|
||||
Devices: specconv.AllowedDevices,
|
||||
},
|
||||
},
|
||||
MaskPaths: []string{
|
||||
|
@ -166,7 +165,7 @@ config := &configs.Config{
|
|||
ReadonlyPaths: []string{
|
||||
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
|
||||
},
|
||||
Devices: configs.DefaultAutoCreatedDevices,
|
||||
Devices: specconv.AllowedDevices,
|
||||
Hostname: "testing",
|
||||
Mounts: []*configs.Mount{
|
||||
{
|
||||
|
|
|
@ -44,6 +44,9 @@ type Manager interface {
|
|||
|
||||
// GetCgroups returns the cgroup data as configured.
|
||||
GetCgroups() (*configs.Cgroup, error)
|
||||
|
||||
// GetFreezerState retrieves the current FreezerState of the cgroup.
|
||||
GetFreezerState() (configs.FreezerState, error)
|
||||
}
|
||||
|
||||
type NotFoundError struct {
|
||||
|
|
|
@ -0,0 +1,355 @@
|
|||
// +build linux
|
||||
|
||||
package devices
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// deviceMeta is a DeviceRule without the Allow or Permissions fields, and no
|
||||
// wildcard-type support. It's effectively the "match" portion of a metadata
|
||||
// rule, for the purposes of our emulation.
|
||||
type deviceMeta struct {
|
||||
node configs.DeviceType
|
||||
major int64
|
||||
minor int64
|
||||
}
|
||||
|
||||
// deviceRule is effectively the tuple (deviceMeta, DevicePermissions).
|
||||
type deviceRule struct {
|
||||
meta deviceMeta
|
||||
perms configs.DevicePermissions
|
||||
}
|
||||
|
||||
// deviceRules is a mapping of device metadata rules to the associated
|
||||
// permissions in the ruleset.
|
||||
type deviceRules map[deviceMeta]configs.DevicePermissions
|
||||
|
||||
func (r deviceRules) orderedEntries() []deviceRule {
|
||||
var rules []deviceRule
|
||||
for meta, perms := range r {
|
||||
rules = append(rules, deviceRule{meta: meta, perms: perms})
|
||||
}
|
||||
sort.Slice(rules, func(i, j int) bool {
|
||||
// Sort by (major, minor, type).
|
||||
a, b := rules[i].meta, rules[j].meta
|
||||
return a.major < b.major ||
|
||||
(a.major == b.major && a.minor < b.minor) ||
|
||||
(a.major == b.major && a.minor == b.minor && a.node < b.node)
|
||||
})
|
||||
return rules
|
||||
}
|
||||
|
||||
type Emulator struct {
|
||||
defaultAllow bool
|
||||
rules deviceRules
|
||||
}
|
||||
|
||||
func (e *Emulator) IsBlacklist() bool {
|
||||
return e.defaultAllow
|
||||
}
|
||||
|
||||
func (e *Emulator) IsAllowAll() bool {
|
||||
return e.IsBlacklist() && len(e.rules) == 0
|
||||
}
|
||||
|
||||
var devicesListRegexp = regexp.MustCompile(`^([abc])\s+(\d+|\*):(\d+|\*)\s+([rwm]+)$`)
|
||||
|
||||
func parseLine(line string) (*deviceRule, error) {
|
||||
matches := devicesListRegexp.FindStringSubmatch(line)
|
||||
if matches == nil {
|
||||
return nil, errors.Errorf("line doesn't match devices.list format")
|
||||
}
|
||||
var (
|
||||
rule deviceRule
|
||||
node = matches[1]
|
||||
major = matches[2]
|
||||
minor = matches[3]
|
||||
perms = matches[4]
|
||||
)
|
||||
|
||||
// Parse the node type.
|
||||
switch node {
|
||||
case "a":
|
||||
// Super-special case -- "a" always means every device with every
|
||||
// access mode. In fact, for devices.list this actually indicates that
|
||||
// the cgroup is in black-list mode.
|
||||
// TODO: Double-check that the entire file is "a *:* rwm".
|
||||
return nil, nil
|
||||
case "b":
|
||||
rule.meta.node = configs.BlockDevice
|
||||
case "c":
|
||||
rule.meta.node = configs.CharDevice
|
||||
default:
|
||||
// Should never happen!
|
||||
return nil, errors.Errorf("unknown device type %q", node)
|
||||
}
|
||||
|
||||
// Parse the major number.
|
||||
if major == "*" {
|
||||
rule.meta.major = configs.Wildcard
|
||||
} else {
|
||||
val, err := strconv.ParseUint(major, 10, 32)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "parse major number")
|
||||
}
|
||||
rule.meta.major = int64(val)
|
||||
}
|
||||
|
||||
// Parse the minor number.
|
||||
if minor == "*" {
|
||||
rule.meta.minor = configs.Wildcard
|
||||
} else {
|
||||
val, err := strconv.ParseUint(minor, 10, 32)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "parse minor number")
|
||||
}
|
||||
rule.meta.minor = int64(val)
|
||||
}
|
||||
|
||||
// Parse the access permissions.
|
||||
rule.perms = configs.DevicePermissions(perms)
|
||||
if !rule.perms.IsValid() || rule.perms.IsEmpty() {
|
||||
// Should never happen!
|
||||
return nil, errors.Errorf("parse access mode: contained unknown modes or is empty: %q", perms)
|
||||
}
|
||||
return &rule, nil
|
||||
}
|
||||
|
||||
func (e *Emulator) addRule(rule deviceRule) error {
|
||||
if e.rules == nil {
|
||||
e.rules = make(map[deviceMeta]configs.DevicePermissions)
|
||||
}
|
||||
|
||||
// Merge with any pre-existing permissions.
|
||||
oldPerms := e.rules[rule.meta]
|
||||
newPerms := rule.perms.Union(oldPerms)
|
||||
e.rules[rule.meta] = newPerms
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *Emulator) rmRule(rule deviceRule) error {
|
||||
// Give an error if any of the permissions requested to be removed are
|
||||
// present in a partially-matching wildcard rule, because such rules will
|
||||
// be ignored by cgroupv1.
|
||||
//
|
||||
// This is a diversion from cgroupv1, but is necessary to avoid leading
|
||||
// users into a false sense of security. cgroupv1 will silently(!) ignore
|
||||
// requests to remove partial exceptions, but we really shouldn't do that.
|
||||
//
|
||||
// It may seem like we could just "split" wildcard rules which hit this
|
||||
// issue, but unfortunately there are 2^32 possible major and minor
|
||||
// numbers, which would exhaust kernel memory quickly if we did this. Not
|
||||
// to mention it'd be really slow (the kernel side is implemented as a
|
||||
// linked-list of exceptions).
|
||||
for _, partialMeta := range []deviceMeta{
|
||||
{node: rule.meta.node, major: configs.Wildcard, minor: rule.meta.minor},
|
||||
{node: rule.meta.node, major: rule.meta.major, minor: configs.Wildcard},
|
||||
{node: rule.meta.node, major: configs.Wildcard, minor: configs.Wildcard},
|
||||
} {
|
||||
// This wildcard rule is equivalent to the requested rule, so skip it.
|
||||
if rule.meta == partialMeta {
|
||||
continue
|
||||
}
|
||||
// Only give an error if the set of permissions overlap.
|
||||
partialPerms := e.rules[partialMeta]
|
||||
if !partialPerms.Intersection(rule.perms).IsEmpty() {
|
||||
return errors.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms)
|
||||
}
|
||||
}
|
||||
|
||||
// Subtract all of the permissions listed from the full match rule. If the
|
||||
// rule didn't exist, all of this is a no-op.
|
||||
newPerms := e.rules[rule.meta].Difference(rule.perms)
|
||||
if newPerms.IsEmpty() {
|
||||
delete(e.rules, rule.meta)
|
||||
} else {
|
||||
e.rules[rule.meta] = newPerms
|
||||
}
|
||||
// TODO: The actual cgroup code doesn't care if an exception didn't exist
|
||||
// during removal, so not erroring out here is /accurate/ but quite
|
||||
// worrying. Maybe we should do additional validation, but again we
|
||||
// have to worry about backwards-compatibility.
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *Emulator) allow(rule *deviceRule) error {
|
||||
// This cgroup is configured as a black-list. Reset the entire emulator,
|
||||
// and put is into black-list mode.
|
||||
if rule == nil || rule.meta.node == configs.WildcardDevice {
|
||||
*e = Emulator{
|
||||
defaultAllow: true,
|
||||
rules: nil,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error
|
||||
if e.defaultAllow {
|
||||
err = errors.Wrap(e.rmRule(*rule), "remove 'deny' exception")
|
||||
} else {
|
||||
err = errors.Wrap(e.addRule(*rule), "add 'allow' exception")
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (e *Emulator) deny(rule *deviceRule) error {
|
||||
// This cgroup is configured as a white-list. Reset the entire emulator,
|
||||
// and put is into white-list mode.
|
||||
if rule == nil || rule.meta.node == configs.WildcardDevice {
|
||||
*e = Emulator{
|
||||
defaultAllow: false,
|
||||
rules: nil,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error
|
||||
if e.defaultAllow {
|
||||
err = errors.Wrap(e.addRule(*rule), "add 'deny' exception")
|
||||
} else {
|
||||
err = errors.Wrap(e.rmRule(*rule), "remove 'allow' exception")
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (e *Emulator) Apply(rule configs.DeviceRule) error {
|
||||
if !rule.Type.CanCgroup() {
|
||||
return errors.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type)
|
||||
}
|
||||
|
||||
innerRule := &deviceRule{
|
||||
meta: deviceMeta{
|
||||
node: rule.Type,
|
||||
major: rule.Major,
|
||||
minor: rule.Minor,
|
||||
},
|
||||
perms: rule.Permissions,
|
||||
}
|
||||
if innerRule.meta.node == configs.WildcardDevice {
|
||||
innerRule = nil
|
||||
}
|
||||
|
||||
if rule.Allow {
|
||||
return e.allow(innerRule)
|
||||
} else {
|
||||
return e.deny(innerRule)
|
||||
}
|
||||
}
|
||||
|
||||
// EmulatorFromList takes a reader to a "devices.list"-like source, and returns
|
||||
// a new Emulator that represents the state of the devices cgroup. Note that
|
||||
// black-list devices cgroups cannot be fully reconstructed, due to limitations
|
||||
// in the devices cgroup API. Instead, such cgroups are always treated as
|
||||
// "allow all" cgroups.
|
||||
func EmulatorFromList(list io.Reader) (*Emulator, error) {
|
||||
// Normally cgroups are in black-list mode by default, but the way we
|
||||
// figure out the current mode is whether or not devices.list has an
|
||||
// allow-all rule. So we default to a white-list, and the existence of an
|
||||
// "a *:* rwm" entry will tell us otherwise.
|
||||
e := &Emulator{
|
||||
defaultAllow: false,
|
||||
}
|
||||
|
||||
// Parse the "devices.list".
|
||||
s := bufio.NewScanner(list)
|
||||
for s.Scan() {
|
||||
line := s.Text()
|
||||
deviceRule, err := parseLine(line)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "parsing line %q", line)
|
||||
}
|
||||
// "devices.list" is an allow list. Note that this means that in
|
||||
// black-list mode, we have no idea what rules are in play. As a
|
||||
// result, we need to be very careful in Transition().
|
||||
if err := e.allow(deviceRule); err != nil {
|
||||
return nil, errors.Wrapf(err, "adding devices.list rule")
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, errors.Wrap(err, "reading devices.list lines")
|
||||
}
|
||||
return e, nil
|
||||
}
|
||||
|
||||
// Transition calculates what is the minimally-disruptive set of rules need to
|
||||
// be applied to a devices cgroup in order to transition to the given target.
|
||||
// This means that any already-existing rules will not be applied, and
|
||||
// disruptive rules (like denying all device access) will only be applied if
|
||||
// necessary.
|
||||
//
|
||||
// This function is the sole reason for all of Emulator -- to allow us
|
||||
// to figure out how to update a containers' cgroups without causing spurrious
|
||||
// device errors (if possible).
|
||||
func (source *Emulator) Transition(target *Emulator) ([]*configs.DeviceRule, error) {
|
||||
var transitionRules []*configs.DeviceRule
|
||||
oldRules := source.rules
|
||||
|
||||
// If the default policy doesn't match, we need to include a "disruptive"
|
||||
// rule (either allow-all or deny-all) in order to switch the cgroup to the
|
||||
// correct default policy.
|
||||
//
|
||||
// However, due to a limitation in "devices.list" we cannot be sure what
|
||||
// deny rules are in place in a black-list cgroup. Thus if the source is a
|
||||
// black-list we also have to include a disruptive rule.
|
||||
if source.IsBlacklist() || source.defaultAllow != target.defaultAllow {
|
||||
transitionRules = append(transitionRules, &configs.DeviceRule{
|
||||
Type: 'a',
|
||||
Major: -1,
|
||||
Minor: -1,
|
||||
Permissions: configs.DevicePermissions("rwm"),
|
||||
Allow: target.defaultAllow,
|
||||
})
|
||||
// The old rules are only relevant if we aren't starting out with a
|
||||
// disruptive rule.
|
||||
oldRules = nil
|
||||
}
|
||||
|
||||
// NOTE: We traverse through the rules in a sorted order so we always write
|
||||
// the same set of rules (this is to aid testing).
|
||||
|
||||
// First, we create inverse rules for any old rules not in the new set.
|
||||
// This includes partial-inverse rules for specific permissions. This is a
|
||||
// no-op if we added a disruptive rule, since oldRules will be empty.
|
||||
for _, rule := range oldRules.orderedEntries() {
|
||||
meta, oldPerms := rule.meta, rule.perms
|
||||
newPerms := target.rules[meta]
|
||||
droppedPerms := oldPerms.Difference(newPerms)
|
||||
if !droppedPerms.IsEmpty() {
|
||||
transitionRules = append(transitionRules, &configs.DeviceRule{
|
||||
Type: meta.node,
|
||||
Major: meta.major,
|
||||
Minor: meta.minor,
|
||||
Permissions: droppedPerms,
|
||||
Allow: target.defaultAllow,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Add any additional rules which weren't in the old set. We happen to
|
||||
// filter out rules which are present in both sets, though this isn't
|
||||
// strictly necessary.
|
||||
for _, rule := range target.rules.orderedEntries() {
|
||||
meta, newPerms := rule.meta, rule.perms
|
||||
oldPerms := oldRules[meta]
|
||||
gainedPerms := newPerms.Difference(oldPerms)
|
||||
if !gainedPerms.IsEmpty() {
|
||||
transitionRules = append(transitionRules, &configs.DeviceRule{
|
||||
Type: meta.node,
|
||||
Major: meta.major,
|
||||
Minor: meta.minor,
|
||||
Permissions: gainedPerms,
|
||||
Allow: !target.defaultAllow,
|
||||
})
|
||||
}
|
||||
}
|
||||
return transitionRules, nil
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -22,7 +22,7 @@ const (
|
|||
)
|
||||
|
||||
// DeviceFilter returns eBPF device filter program and its license string
|
||||
func DeviceFilter(devices []*configs.Device) (asm.Instructions, string, error) {
|
||||
func DeviceFilter(devices []*configs.DeviceRule) (asm.Instructions, string, error) {
|
||||
p := &program{}
|
||||
p.init()
|
||||
for i := len(devices) - 1; i >= 0; i-- {
|
||||
|
@ -68,7 +68,7 @@ func (p *program) init() {
|
|||
}
|
||||
|
||||
// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element.
|
||||
func (p *program) appendDevice(dev *configs.Device) error {
|
||||
func (p *program) appendDevice(dev *configs.DeviceRule) error {
|
||||
if p.blockID < 0 {
|
||||
return errors.New("the program is finalized")
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ func hash(s, comm string) string {
|
|||
return strings.Join(res, "\n")
|
||||
}
|
||||
|
||||
func testDeviceFilter(t testing.TB, devices []*configs.Device, expectedStr string) {
|
||||
func testDeviceFilter(t testing.TB, devices []*configs.DeviceRule, expectedStr string) {
|
||||
insts, _, err := DeviceFilter(devices)
|
||||
if err != nil {
|
||||
t.Fatalf("%s: %v (devices: %+v)", t.Name(), err, devices)
|
||||
|
@ -83,71 +83,69 @@ block-2:
|
|||
19: Exit
|
||||
block-3:
|
||||
20: JNEImm dst: r2 off: -1 imm: 2 <block-4>
|
||||
21: JNEImm dst: r4 off: -1 imm: 5 <block-4>
|
||||
22: JNEImm dst: r5 off: -1 imm: 1 <block-4>
|
||||
21: JNEImm dst: r4 off: -1 imm: 1 <block-4>
|
||||
22: JNEImm dst: r5 off: -1 imm: 9 <block-4>
|
||||
23: Mov32Imm dst: r0 imm: 1
|
||||
24: Exit
|
||||
block-4:
|
||||
25: JNEImm dst: r2 off: -1 imm: 2 <block-5>
|
||||
26: JNEImm dst: r4 off: -1 imm: 1 <block-5>
|
||||
27: JNEImm dst: r5 off: -1 imm: 9 <block-5>
|
||||
27: JNEImm dst: r5 off: -1 imm: 5 <block-5>
|
||||
28: Mov32Imm dst: r0 imm: 1
|
||||
29: Exit
|
||||
block-5:
|
||||
30: JNEImm dst: r2 off: -1 imm: 2 <block-6>
|
||||
31: JNEImm dst: r4 off: -1 imm: 1 <block-6>
|
||||
32: JNEImm dst: r5 off: -1 imm: 5 <block-6>
|
||||
31: JNEImm dst: r4 off: -1 imm: 5 <block-6>
|
||||
32: JNEImm dst: r5 off: -1 imm: 0 <block-6>
|
||||
33: Mov32Imm dst: r0 imm: 1
|
||||
34: Exit
|
||||
block-6:
|
||||
35: JNEImm dst: r2 off: -1 imm: 2 <block-7>
|
||||
36: JNEImm dst: r4 off: -1 imm: 5 <block-7>
|
||||
37: JNEImm dst: r5 off: -1 imm: 0 <block-7>
|
||||
36: JNEImm dst: r4 off: -1 imm: 1 <block-7>
|
||||
37: JNEImm dst: r5 off: -1 imm: 7 <block-7>
|
||||
38: Mov32Imm dst: r0 imm: 1
|
||||
39: Exit
|
||||
block-7:
|
||||
40: JNEImm dst: r2 off: -1 imm: 2 <block-8>
|
||||
41: JNEImm dst: r4 off: -1 imm: 1 <block-8>
|
||||
42: JNEImm dst: r5 off: -1 imm: 7 <block-8>
|
||||
42: JNEImm dst: r5 off: -1 imm: 8 <block-8>
|
||||
43: Mov32Imm dst: r0 imm: 1
|
||||
44: Exit
|
||||
block-8:
|
||||
45: JNEImm dst: r2 off: -1 imm: 2 <block-9>
|
||||
46: JNEImm dst: r4 off: -1 imm: 1 <block-9>
|
||||
47: JNEImm dst: r5 off: -1 imm: 8 <block-9>
|
||||
47: JNEImm dst: r5 off: -1 imm: 3 <block-9>
|
||||
48: Mov32Imm dst: r0 imm: 1
|
||||
49: Exit
|
||||
block-9:
|
||||
50: JNEImm dst: r2 off: -1 imm: 2 <block-10>
|
||||
51: JNEImm dst: r4 off: -1 imm: 1 <block-10>
|
||||
52: JNEImm dst: r5 off: -1 imm: 3 <block-10>
|
||||
53: Mov32Imm dst: r0 imm: 1
|
||||
54: Exit
|
||||
block-10:
|
||||
// (b, wildcard, wildcard, m, true)
|
||||
55: JNEImm dst: r2 off: -1 imm: 1 <block-11>
|
||||
56: Mov32Reg dst: r1 src: r3
|
||||
57: And32Imm dst: r1 imm: 1
|
||||
58: JEqImm dst: r1 off: -1 imm: 0 <block-11>
|
||||
59: Mov32Imm dst: r0 imm: 1
|
||||
60: Exit
|
||||
block-11:
|
||||
50: JNEImm dst: r2 off: -1 imm: 1 <block-10>
|
||||
51: Mov32Reg dst: r1 src: r3
|
||||
52: And32Imm dst: r1 imm: 1
|
||||
53: JEqImm dst: r1 off: -1 imm: 0 <block-10>
|
||||
54: Mov32Imm dst: r0 imm: 1
|
||||
55: Exit
|
||||
block-10:
|
||||
// (c, wildcard, wildcard, m, true)
|
||||
61: JNEImm dst: r2 off: -1 imm: 2 <block-12>
|
||||
62: Mov32Reg dst: r1 src: r3
|
||||
63: And32Imm dst: r1 imm: 1
|
||||
64: JEqImm dst: r1 off: -1 imm: 0 <block-12>
|
||||
65: Mov32Imm dst: r0 imm: 1
|
||||
66: Exit
|
||||
block-12:
|
||||
67: Mov32Imm dst: r0 imm: 0
|
||||
68: Exit
|
||||
56: JNEImm dst: r2 off: -1 imm: 2 <block-11>
|
||||
57: Mov32Reg dst: r1 src: r3
|
||||
58: And32Imm dst: r1 imm: 1
|
||||
59: JEqImm dst: r1 off: -1 imm: 0 <block-11>
|
||||
60: Mov32Imm dst: r0 imm: 1
|
||||
61: Exit
|
||||
block-11:
|
||||
62: Mov32Imm dst: r0 imm: 0
|
||||
63: Exit
|
||||
`
|
||||
testDeviceFilter(t, specconv.AllowedDevices, expected)
|
||||
var devices []*configs.DeviceRule
|
||||
for _, device := range specconv.AllowedDevices {
|
||||
devices = append(devices, &device.DeviceRule)
|
||||
}
|
||||
testDeviceFilter(t, devices, expected)
|
||||
}
|
||||
|
||||
func TestDeviceFilter_Privileged(t *testing.T) {
|
||||
devices := []*configs.Device{
|
||||
devices := []*configs.DeviceRule{
|
||||
{
|
||||
Type: 'a',
|
||||
Major: -1,
|
||||
|
@ -174,7 +172,7 @@ block-0:
|
|||
}
|
||||
|
||||
func TestDeviceFilter_PrivilegedExceptSingleDevice(t *testing.T) {
|
||||
devices := []*configs.Device{
|
||||
devices := []*configs.DeviceRule{
|
||||
{
|
||||
Type: 'a',
|
||||
Major: -1,
|
||||
|
@ -214,7 +212,7 @@ block-1:
|
|||
}
|
||||
|
||||
func TestDeviceFilter_Weird(t *testing.T) {
|
||||
devices := []*configs.Device{
|
||||
devices := []*configs.DeviceRule{
|
||||
{
|
||||
Type: 'b',
|
||||
Major: 8,
|
||||
|
|
|
@ -166,9 +166,6 @@ func (m *manager) Apply(pid int) (err error) {
|
|||
}
|
||||
|
||||
for _, sys := range m.getSubsystems() {
|
||||
// TODO: Apply should, ideally, be reentrant or be broken up into a separate
|
||||
// create and join phase so that the cgroup hierarchy for a container can be
|
||||
// created then join consists of writing the process pids to cgroup.procs
|
||||
p, err := d.path(sys.Name())
|
||||
if err != nil {
|
||||
// The non-presence of the devices subsystem is
|
||||
|
@ -181,10 +178,10 @@ func (m *manager) Apply(pid int) (err error) {
|
|||
m.paths[sys.Name()] = p
|
||||
|
||||
if err := sys.Apply(d); err != nil {
|
||||
// In the case of rootless (including euid=0 in userns), where an explicit cgroup path hasn't
|
||||
// been set, we don't bail on error in case of permission problems.
|
||||
// Cases where limits have been set (and we couldn't create our own
|
||||
// cgroup) are handled by Set.
|
||||
// In the case of rootless (including euid=0 in userns), where an
|
||||
// explicit cgroup path hasn't been set, we don't bail on error in
|
||||
// case of permission problems. Cases where limits have been set
|
||||
// (and we couldn't create our own cgroup) are handled by Set.
|
||||
if isIgnorableError(m.rootless, err) && m.cgroups.Path == "" {
|
||||
delete(m.paths, sys.Name())
|
||||
continue
|
||||
|
@ -272,22 +269,25 @@ func (m *manager) Set(container *configs.Config) error {
|
|||
|
||||
// Freeze toggles the container's freezer cgroup depending on the state
|
||||
// provided
|
||||
func (m *manager) Freeze(state configs.FreezerState) error {
|
||||
if m.cgroups == nil {
|
||||
func (m *manager) Freeze(state configs.FreezerState) (Err error) {
|
||||
path := m.GetPaths()["freezer"]
|
||||
if m.cgroups == nil || path == "" {
|
||||
return errors.New("cannot toggle freezer: cgroups not configured for container")
|
||||
}
|
||||
|
||||
paths := m.GetPaths()
|
||||
dir := paths["freezer"]
|
||||
prevState := m.cgroups.Resources.Freezer
|
||||
m.cgroups.Resources.Freezer = state
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
m.cgroups.Resources.Freezer = prevState
|
||||
}
|
||||
}()
|
||||
|
||||
freezer, err := m.getSubsystems().Get("freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = freezer.Set(dir, m.cgroups)
|
||||
if err != nil {
|
||||
m.cgroups.Resources.Freezer = prevState
|
||||
if err := freezer.Set(path, m.cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
|
@ -413,3 +413,15 @@ func (m *manager) GetPaths() map[string]string {
|
|||
func (m *manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.cgroups, nil
|
||||
}
|
||||
|
||||
func (m *manager) GetFreezerState() (configs.FreezerState, error) {
|
||||
paths := m.GetPaths()
|
||||
dir := paths["freezer"]
|
||||
freezer, err := m.getSubsystems().Get("freezer")
|
||||
|
||||
// If the container doesn't have the freezer cgroup, say it's undefined.
|
||||
if err != nil || dir == "" {
|
||||
return configs.Undefined, nil
|
||||
}
|
||||
return freezer.(*FreezerGroup).GetState(dir)
|
||||
}
|
||||
|
|
|
@ -3,13 +3,19 @@
|
|||
package fs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
||||
type DevicesGroup struct {
|
||||
testingSkipFinalCheck bool
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Name() string {
|
||||
|
@ -26,49 +32,74 @@ func (s *DevicesGroup) Apply(d *cgroupData) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func loadEmulator(path string) (*devices.Emulator, error) {
|
||||
list, err := fscommon.ReadFile(path, "devices.list")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return devices.EmulatorFromList(bytes.NewBufferString(list))
|
||||
}
|
||||
|
||||
func buildEmulator(rules []*configs.DeviceRule) (*devices.Emulator, error) {
|
||||
// This defaults to a white-list -- which is what we want!
|
||||
emu := &devices.Emulator{}
|
||||
for _, rule := range rules {
|
||||
if err := emu.Apply(*rule); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return emu, nil
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if system.RunningInUserNS() {
|
||||
return nil
|
||||
}
|
||||
|
||||
devices := cgroup.Resources.Devices
|
||||
if len(devices) > 0 {
|
||||
for _, dev := range devices {
|
||||
file := "devices.deny"
|
||||
if dev.Allow {
|
||||
file = "devices.allow"
|
||||
}
|
||||
if err := fscommon.WriteFile(path, file, dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
// Generate two emulators, one for the current state of the cgroup and one
|
||||
// for the requested state by the user.
|
||||
current, err := loadEmulator(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
target, err := buildEmulator(cgroup.Resources.Devices)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if cgroup.Resources.AllowAllDevices != nil {
|
||||
if *cgroup.Resources.AllowAllDevices == false {
|
||||
if err := fscommon.WriteFile(path, "devices.deny", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, dev := range cgroup.Resources.AllowedDevices {
|
||||
if err := fscommon.WriteFile(path, "devices.allow", dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
// Compute the minimal set of transition rules needed to achieve the
|
||||
// requested state.
|
||||
transitionRules, err := current.Transition(target)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, rule := range transitionRules {
|
||||
file := "devices.deny"
|
||||
if rule.Allow {
|
||||
file = "devices.allow"
|
||||
}
|
||||
|
||||
if err := fscommon.WriteFile(path, "devices.allow", "a"); err != nil {
|
||||
if err := fscommon.WriteFile(path, file, rule.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, dev := range cgroup.Resources.DeniedDevices {
|
||||
if err := fscommon.WriteFile(path, "devices.deny", dev.CgroupString()); err != nil {
|
||||
// Final safety check -- ensure that the resulting state is what was
|
||||
// requested. This is only really correct for white-lists, but for
|
||||
// black-lists we can at least check that the cgroup is in the right mode.
|
||||
//
|
||||
// This safety-check is skipped for the unit tests because we cannot
|
||||
// currently mock devices.list correctly.
|
||||
if !s.testingSkipFinalCheck {
|
||||
currentAfter, err := loadEmulator(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !target.IsBlacklist() && !reflect.DeepEqual(currentAfter, target) {
|
||||
return fmt.Errorf("resulting devices cgroup doesn't precisely match target")
|
||||
} else if target.IsBlacklist() != currentAfter.IsBlacklist() {
|
||||
return fmt.Errorf("resulting devices cgroup doesn't match target mode")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -9,91 +9,44 @@ import (
|
|||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var (
|
||||
allowedDevices = []*configs.Device{
|
||||
{
|
||||
Path: "/dev/zero",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 5,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
}
|
||||
allowedList = "c 1:5 rwm"
|
||||
deniedDevices = []*configs.Device{
|
||||
{
|
||||
Path: "/dev/null",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 3,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
}
|
||||
deniedList = "c 1:3 rwm"
|
||||
)
|
||||
|
||||
func TestDevicesSetAllow(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("devices", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"devices.deny": "a",
|
||||
})
|
||||
allowAllDevices := false
|
||||
helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices
|
||||
helper.CgroupData.config.Resources.AllowedDevices = allowedDevices
|
||||
devices := &DevicesGroup{}
|
||||
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse devices.allow - %s", err)
|
||||
}
|
||||
|
||||
if value != allowedList {
|
||||
t.Fatal("Got the wrong value, set devices.allow failed.")
|
||||
}
|
||||
|
||||
// When AllowAllDevices is nil, devices.allow file should not be modified.
|
||||
helper.CgroupData.config.Resources.AllowAllDevices = nil
|
||||
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
value, err = fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse devices.allow - %s", err)
|
||||
}
|
||||
if value != allowedList {
|
||||
t.Fatal("devices policy shouldn't have changed on AllowedAllDevices=nil.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDevicesSetDeny(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("devices", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"devices.allow": "a",
|
||||
"devices.allow": "",
|
||||
"devices.deny": "",
|
||||
"devices.list": "a *:* rwm",
|
||||
})
|
||||
|
||||
allowAllDevices := true
|
||||
helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices
|
||||
helper.CgroupData.config.Resources.DeniedDevices = deniedDevices
|
||||
devices := &DevicesGroup{}
|
||||
helper.CgroupData.config.Resources.Devices = []*configs.DeviceRule{
|
||||
{
|
||||
Type: configs.CharDevice,
|
||||
Major: 1,
|
||||
Minor: 5,
|
||||
Permissions: configs.DevicePermissions("rwm"),
|
||||
Allow: true,
|
||||
},
|
||||
}
|
||||
|
||||
devices := &DevicesGroup{testingSkipFinalCheck: true}
|
||||
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// The default deny rule must be written.
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.deny")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse devices.deny - %s", err)
|
||||
t.Fatalf("Failed to parse devices.deny: %s", err)
|
||||
}
|
||||
if value[0] != 'a' {
|
||||
t.Errorf("Got the wrong value (%q), set devices.deny failed.", value)
|
||||
}
|
||||
|
||||
if value != deniedList {
|
||||
t.Fatal("Got the wrong value, set devices.deny failed.")
|
||||
// Permitted rule must be written.
|
||||
if value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow"); err != nil {
|
||||
t.Fatalf("Failed to parse devices.allow: %s", err)
|
||||
} else if value != "c 1:5 rwm" {
|
||||
t.Errorf("Got the wrong value (%q), set devices.allow failed.", value)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,13 +3,16 @@
|
|||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type FreezerGroup struct {
|
||||
|
@ -39,11 +42,11 @@ func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
return err
|
||||
}
|
||||
|
||||
state, err := fscommon.ReadFile(path, "freezer.state")
|
||||
state, err := s.GetState(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) {
|
||||
if state == cgroup.Resources.Freezer {
|
||||
break
|
||||
}
|
||||
|
||||
|
@ -65,3 +68,30 @@ func (s *FreezerGroup) Remove(d *cgroupData) error {
|
|||
func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) {
|
||||
for {
|
||||
state, err := fscommon.ReadFile(path, "freezer.state")
|
||||
if err != nil {
|
||||
// If the kernel is too old, then we just treat the freezer as
|
||||
// being in an "undefined" state.
|
||||
if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
|
||||
err = nil
|
||||
}
|
||||
return configs.Undefined, err
|
||||
}
|
||||
switch strings.TrimSpace(state) {
|
||||
case "THAWED":
|
||||
return configs.Thawed, nil
|
||||
case "FROZEN":
|
||||
return configs.Frozen, nil
|
||||
case "FREEZING":
|
||||
// Make sure we get a stable freezer state, so retry if the cgroup
|
||||
// is still undergoing freezing. This should be a temporary delay.
|
||||
time.Sleep(1 * time.Millisecond)
|
||||
continue
|
||||
default:
|
||||
return configs.Undefined, fmt.Errorf("unknown freezer.state %q", state)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,12 +10,10 @@ import (
|
|||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func isRWM(cgroupPermissions string) bool {
|
||||
r := false
|
||||
w := false
|
||||
m := false
|
||||
for _, rn := range cgroupPermissions {
|
||||
switch rn {
|
||||
func isRWM(perms configs.DevicePermissions) bool {
|
||||
var r, w, m bool
|
||||
for _, perm := range perms {
|
||||
switch perm {
|
||||
case 'r':
|
||||
r = true
|
||||
case 'w':
|
||||
|
@ -39,26 +37,10 @@ func canSkipEBPFError(cgroup *configs.Cgroup) bool {
|
|||
}
|
||||
|
||||
func setDevices(dirPath string, cgroup *configs.Cgroup) error {
|
||||
// XXX: This is currently a white-list (but all callers pass a blacklist of
|
||||
// devices). This is bad for a whole variety of reasons, but will need
|
||||
// to be fixed with co-ordinated effort with downstreams.
|
||||
devices := cgroup.Devices
|
||||
// never set by OCI specconv
|
||||
if allowAllDevices := cgroup.Resources.AllowAllDevices; allowAllDevices != nil {
|
||||
if *allowAllDevices == true {
|
||||
if len(cgroup.Resources.DeniedDevices) != 0 {
|
||||
return errors.New("libcontainer: can't use DeniedDevices together with AllowAllDevices")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
// *allowAllDevices=false is still used by the integration test
|
||||
for _, ad := range cgroup.Resources.AllowedDevices {
|
||||
d := *ad
|
||||
d.Allow = true
|
||||
devices = append(devices, &d)
|
||||
}
|
||||
}
|
||||
if len(cgroup.Resources.DeniedDevices) != 0 {
|
||||
// never set by OCI specconv
|
||||
return errors.New("libcontainer DeniedDevices is not supported, use Devices")
|
||||
}
|
||||
insts, license, err := devicefilter.DeviceFilter(devices)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -68,6 +50,17 @@ func setDevices(dirPath string, cgroup *configs.Cgroup) error {
|
|||
return errors.Errorf("cannot get dir FD for %s", dirPath)
|
||||
}
|
||||
defer unix.Close(dirFD)
|
||||
// XXX: This code is currently incorrect when it comes to updating an
|
||||
// existing cgroup with new rules (new rulesets are just appended to
|
||||
// the program list because this uses BPF_F_ALLOW_MULTI). If we didn't
|
||||
// use BPF_F_ALLOW_MULTI we could actually atomically swap the
|
||||
// programs.
|
||||
//
|
||||
// The real issue is that BPF_F_ALLOW_MULTI makes it hard to have a
|
||||
// race-free blacklist because it acts as a whitelist by default, and
|
||||
// having a deny-everything program cannot be overriden by other
|
||||
// programs. You could temporarily insert a deny-everything program
|
||||
// but that would result in spurrious failures during updates.
|
||||
if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
|
||||
if !canSkipEBPFError(cgroup) {
|
||||
return err
|
||||
|
|
|
@ -3,32 +3,49 @@
|
|||
package fs2
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
stdErrors "errors"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func setFreezer(dirPath string, state configs.FreezerState) error {
|
||||
var desired int
|
||||
if err := supportsFreezer(dirPath); err != nil {
|
||||
// We can ignore this request as long as the user didn't ask us to
|
||||
// freeze the container (since without the freezer cgroup, that's a
|
||||
// no-op).
|
||||
if state == configs.Undefined || state == configs.Thawed {
|
||||
err = nil
|
||||
}
|
||||
return errors.Wrap(err, "freezer not supported")
|
||||
}
|
||||
|
||||
var stateStr string
|
||||
switch state {
|
||||
case configs.Undefined:
|
||||
return nil
|
||||
case configs.Frozen:
|
||||
desired = 1
|
||||
stateStr = "1"
|
||||
case configs.Thawed:
|
||||
desired = 0
|
||||
stateStr = "0"
|
||||
default:
|
||||
return errors.Errorf("unknown freezer state %+v", state)
|
||||
return errors.Errorf("invalid freezer state %q requested", state)
|
||||
}
|
||||
supportedErr := supportsFreezer(dirPath)
|
||||
if supportedErr != nil && desired != 0 {
|
||||
// can ignore error if desired == 1
|
||||
return errors.Wrap(supportedErr, "freezer not supported")
|
||||
|
||||
if err := fscommon.WriteFile(dirPath, "cgroup.freeze", stateStr); err != nil {
|
||||
return err
|
||||
}
|
||||
return freezeWithInt(dirPath, desired)
|
||||
// Confirm that the cgroup did actually change states.
|
||||
if actualState, err := getFreezer(dirPath); err != nil {
|
||||
return err
|
||||
} else if actualState != state {
|
||||
return errors.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func supportsFreezer(dirPath string) error {
|
||||
|
@ -36,18 +53,22 @@ func supportsFreezer(dirPath string) error {
|
|||
return err
|
||||
}
|
||||
|
||||
// freeze writes desired int to "cgroup.freeze".
|
||||
func freezeWithInt(dirPath string, desired int) error {
|
||||
desiredS := strconv.Itoa(desired)
|
||||
if err := fscommon.WriteFile(dirPath, "cgroup.freeze", desiredS); err != nil {
|
||||
return err
|
||||
}
|
||||
got, err := fscommon.ReadFile(dirPath, "cgroup.freeze")
|
||||
func getFreezer(dirPath string) (configs.FreezerState, error) {
|
||||
state, err := fscommon.ReadFile(dirPath, "cgroup.freeze")
|
||||
if err != nil {
|
||||
return err
|
||||
// If the kernel is too old, then we just treat the freezer as being in
|
||||
// an "undefined" state.
|
||||
if os.IsNotExist(err) || stdErrors.Is(err, unix.ENODEV) {
|
||||
err = nil
|
||||
}
|
||||
return configs.Undefined, err
|
||||
}
|
||||
if gotS := strings.TrimSpace(string(got)); gotS != desiredS {
|
||||
return errors.Errorf("expected \"cgroup.freeze\" in %q to be %q, got %q", dirPath, desiredS, gotS)
|
||||
switch strings.TrimSpace(state) {
|
||||
case "0":
|
||||
return configs.Thawed, nil
|
||||
case "1":
|
||||
return configs.Frozen, nil
|
||||
default:
|
||||
return configs.Undefined, errors.Errorf(`unknown "cgroup.freeze" state: %q`, state)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -240,3 +240,7 @@ func (m *manager) GetPaths() map[string]string {
|
|||
func (m *manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.config, nil
|
||||
}
|
||||
|
||||
func (m *manager) GetFreezerState() (configs.FreezerState, error) {
|
||||
return getFreezer(m.dirPath)
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package systemd
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
@ -9,6 +10,7 @@ import (
|
|||
|
||||
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
|
||||
dbus "github.com/godbus/dbus/v5"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
|
@ -69,6 +71,209 @@ func ExpandSlice(slice string) (string, error) {
|
|||
return path, nil
|
||||
}
|
||||
|
||||
func groupPrefix(ruleType configs.DeviceType) (string, error) {
|
||||
switch ruleType {
|
||||
case configs.BlockDevice:
|
||||
return "block-", nil
|
||||
case configs.CharDevice:
|
||||
return "char-", nil
|
||||
default:
|
||||
return "", errors.Errorf("device type %v has no group prefix", ruleType)
|
||||
}
|
||||
}
|
||||
|
||||
// findDeviceGroup tries to find the device group name (as listed in
|
||||
// /proc/devices) with the type prefixed as requried for DeviceAllow, for a
|
||||
// given (type, major) combination. If more than one device group exists, an
|
||||
// arbitrary one is chosen.
|
||||
func findDeviceGroup(ruleType configs.DeviceType, ruleMajor int64) (string, error) {
|
||||
fh, err := os.Open("/proc/devices")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
prefix, err := groupPrefix(ruleType)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(fh)
|
||||
var currentType configs.DeviceType
|
||||
for scanner.Scan() {
|
||||
// We need to strip spaces because the first number is column-aligned.
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
|
||||
// Handle the "header" lines.
|
||||
switch line {
|
||||
case "Block devices:":
|
||||
currentType = configs.BlockDevice
|
||||
continue
|
||||
case "Character devices:":
|
||||
currentType = configs.CharDevice
|
||||
continue
|
||||
case "":
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip lines unrelated to our type.
|
||||
if currentType != ruleType {
|
||||
continue
|
||||
}
|
||||
|
||||
// Parse out the (major, name).
|
||||
var (
|
||||
currMajor int64
|
||||
currName string
|
||||
)
|
||||
if n, err := fmt.Sscanf(line, "%d %s", &currMajor, &currName); err != nil || n != 2 {
|
||||
if err == nil {
|
||||
err = errors.Errorf("wrong number of fields")
|
||||
}
|
||||
return "", errors.Wrapf(err, "scan /proc/devices line %q", line)
|
||||
}
|
||||
|
||||
if currMajor == ruleMajor {
|
||||
return prefix + currName, nil
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return "", errors.Wrap(err, "reading /proc/devices")
|
||||
}
|
||||
// Couldn't find the device group.
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// generateDeviceProperties takes the configured device rules and generates a
|
||||
// corresponding set of systemd properties to configure the devices correctly.
|
||||
func generateDeviceProperties(rules []*configs.DeviceRule) ([]systemdDbus.Property, error) {
|
||||
// DeviceAllow is the type "a(ss)" which means we need a temporary struct
|
||||
// to represent it in Go.
|
||||
type deviceAllowEntry struct {
|
||||
Path string
|
||||
Perms string
|
||||
}
|
||||
|
||||
properties := []systemdDbus.Property{
|
||||
// Always run in the strictest white-list mode.
|
||||
newProp("DevicePolicy", "strict"),
|
||||
// Empty the DeviceAllow array before filling it.
|
||||
newProp("DeviceAllow", []deviceAllowEntry{}),
|
||||
}
|
||||
|
||||
// Figure out the set of rules.
|
||||
configEmu := &devices.Emulator{}
|
||||
for _, rule := range rules {
|
||||
if err := configEmu.Apply(*rule); err != nil {
|
||||
return nil, errors.Wrap(err, "apply rule for systemd")
|
||||
}
|
||||
}
|
||||
// systemd doesn't support blacklists. So we log a warning, and tell
|
||||
// systemd to act as a deny-all whitelist. This ruleset will be replaced
|
||||
// with our normal fallback code. This may result in spurrious errors, but
|
||||
// the only other option is to error out here.
|
||||
if configEmu.IsBlacklist() {
|
||||
// However, if we're dealing with an allow-all rule then we can do it.
|
||||
if configEmu.IsAllowAll() {
|
||||
return []systemdDbus.Property{
|
||||
// Run in white-list mode by setting to "auto" and removing all
|
||||
// DeviceAllow rules.
|
||||
newProp("DevicePolicy", "auto"),
|
||||
newProp("DeviceAllow", []deviceAllowEntry{}),
|
||||
}, nil
|
||||
}
|
||||
logrus.Warn("systemd doesn't support blacklist device rules -- applying temporary deny-all rule")
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
// Now generate the set of rules we actually need to apply. Unlike the
|
||||
// normal devices cgroup, in "strict" mode systemd defaults to a deny-all
|
||||
// whitelist which is the default for devices.Emulator.
|
||||
baseEmu := &devices.Emulator{}
|
||||
finalRules, err := baseEmu.Transition(configEmu)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "get simplified rules for systemd")
|
||||
}
|
||||
var deviceAllowList []deviceAllowEntry
|
||||
for _, rule := range finalRules {
|
||||
if !rule.Allow {
|
||||
// Should never happen.
|
||||
return nil, errors.Errorf("[internal error] cannot add deny rule to systemd DeviceAllow list: %v", *rule)
|
||||
}
|
||||
switch rule.Type {
|
||||
case configs.BlockDevice, configs.CharDevice:
|
||||
default:
|
||||
// Should never happen.
|
||||
return nil, errors.Errorf("invalid device type for DeviceAllow: %v", rule.Type)
|
||||
}
|
||||
|
||||
entry := deviceAllowEntry{
|
||||
Perms: string(rule.Permissions),
|
||||
}
|
||||
|
||||
// systemd has a fairly odd (though understandable) syntax here, and
|
||||
// because of the OCI configuration format we have to do quite a bit of
|
||||
// trickery to convert things:
|
||||
//
|
||||
// * Concrete rules with non-wildcard major/minor numbers have to use
|
||||
// /dev/{block,char} paths. This is slightly odd because it means
|
||||
// that we cannot add whitelist rules for devices that don't exist,
|
||||
// but there's not too much we can do about that.
|
||||
//
|
||||
// However, path globbing is not support for path-based rules so we
|
||||
// need to handle wildcards in some other manner.
|
||||
//
|
||||
// * Wildcard-minor rules have to specify a "device group name" (the
|
||||
// second column in /proc/devices).
|
||||
//
|
||||
// * Wildcard (major and minor) rules can just specify a glob with the
|
||||
// type ("char-*" or "block-*").
|
||||
//
|
||||
// The only type of rule we can't handle is wildcard-major rules, and
|
||||
// so we'll give a warning in that case (note that the fallback code
|
||||
// will insert any rules systemd couldn't handle). What amazing fun.
|
||||
|
||||
if rule.Major == configs.Wildcard {
|
||||
// "_ *:n _" rules aren't supported by systemd.
|
||||
if rule.Minor != configs.Wildcard {
|
||||
logrus.Warnf("systemd doesn't support '*:n' device rules -- temporarily ignoring rule: %v", *rule)
|
||||
continue
|
||||
}
|
||||
|
||||
// "_ *:* _" rules just wildcard everything.
|
||||
prefix, err := groupPrefix(rule.Type)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
entry.Path = prefix + "*"
|
||||
} else if rule.Minor == configs.Wildcard {
|
||||
// "_ n:* _" rules require a device group from /proc/devices.
|
||||
group, err := findDeviceGroup(rule.Type, rule.Major)
|
||||
if err != nil {
|
||||
return nil, errors.Wrapf(err, "find device '%v/%d'", rule.Type, rule.Major)
|
||||
}
|
||||
if group == "" {
|
||||
// Couldn't find a group.
|
||||
logrus.Warnf("could not find device group for '%v/%d' in /proc/devices -- temporarily ignoring rule: %v", rule.Type, rule.Major, *rule)
|
||||
continue
|
||||
}
|
||||
entry.Path = group
|
||||
} else {
|
||||
// "_ n:m _" rules are just a path in /dev/{block,char}/.
|
||||
switch rule.Type {
|
||||
case configs.BlockDevice:
|
||||
entry.Path = fmt.Sprintf("/dev/block/%d:%d", rule.Major, rule.Minor)
|
||||
case configs.CharDevice:
|
||||
entry.Path = fmt.Sprintf("/dev/char/%d:%d", rule.Major, rule.Minor)
|
||||
}
|
||||
}
|
||||
deviceAllowList = append(deviceAllowList, entry)
|
||||
}
|
||||
|
||||
properties = append(properties, newProp("DeviceAllow", deviceAllowList))
|
||||
return properties, nil
|
||||
}
|
||||
|
||||
// getDbusConnection lazy initializes systemd dbus connection
|
||||
// and returns it
|
||||
func getDbusConnection(rootless bool) (*systemdDbus.Conn, error) {
|
||||
|
|
|
@ -15,6 +15,7 @@ import (
|
|||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type legacyManager struct {
|
||||
|
@ -70,6 +71,13 @@ var legacySubsystems = subsystemSet{
|
|||
|
||||
func genV1ResourcesProperties(c *configs.Cgroup) ([]systemdDbus.Property, error) {
|
||||
var properties []systemdDbus.Property
|
||||
|
||||
deviceProperties, err := generateDeviceProperties(c.Resources.Devices)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
properties = append(properties, deviceProperties...)
|
||||
|
||||
if c.Resources.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLimit", uint64(c.Resources.Memory)))
|
||||
|
@ -381,13 +389,40 @@ func (m *legacyManager) Set(container *configs.Config) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dbusConnection, err := getDbusConnection(false)
|
||||
|
||||
// Figure out the current freezer state, so we can revert to it after we
|
||||
// temporarily freeze the container.
|
||||
targetFreezerState, err := m.GetFreezerState()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := dbusConnection.SetUnitProperties(getUnitName(container.Cgroups), true, properties...); err != nil {
|
||||
if targetFreezerState == configs.Undefined {
|
||||
targetFreezerState = configs.Thawed
|
||||
}
|
||||
|
||||
// We have to freeze the container while systemd sets the cgroup settings.
|
||||
// The reason for this is that systemd's application of DeviceAllow rules
|
||||
// is done disruptively, resulting in spurrious errors to common devices
|
||||
// (unlike our fs driver, they will happily write deny-all rules to running
|
||||
// containers). So we freeze the container to avoid them hitting the cgroup
|
||||
// error. But if the freezer cgroup isn't supported, we just warn about it.
|
||||
if err := m.Freeze(configs.Frozen); err != nil {
|
||||
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
|
||||
}
|
||||
|
||||
dbusConnection, err := getDbusConnection(false)
|
||||
if err != nil {
|
||||
_ = m.Freeze(targetFreezerState)
|
||||
return err
|
||||
}
|
||||
if err := dbusConnection.SetUnitProperties(getUnitName(container.Cgroups), true, properties...); err != nil {
|
||||
_ = m.Freeze(targetFreezerState)
|
||||
return err
|
||||
}
|
||||
|
||||
// Reset freezer state before we apply the configuration, to avoid clashing
|
||||
// with the freezer setting in the configuration.
|
||||
_ = m.Freeze(targetFreezerState)
|
||||
|
||||
for _, sys := range legacySubsystems {
|
||||
// Get the subsystem path, but don't error out for not found cgroups.
|
||||
|
@ -395,7 +430,6 @@ func (m *legacyManager) Set(container *configs.Config) error {
|
|||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -439,3 +473,15 @@ func (m *legacyManager) GetPaths() map[string]string {
|
|||
func (m *legacyManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.cgroups, nil
|
||||
}
|
||||
|
||||
func (m *legacyManager) GetFreezerState() (configs.FreezerState, error) {
|
||||
path, err := getSubsystemPath(m.cgroups, "freezer")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
freezer, err := legacySubsystems.Get("freezer")
|
||||
if err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
return freezer.(*fs.FreezerGroup).GetState(path)
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@ import (
|
|||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type unifiedManager struct {
|
||||
|
@ -37,6 +38,17 @@ func NewUnifiedManager(config *configs.Cgroup, path string, rootless bool) cgrou
|
|||
func genV2ResourcesProperties(c *configs.Cgroup) ([]systemdDbus.Property, error) {
|
||||
var properties []systemdDbus.Property
|
||||
|
||||
// NOTE: This is of questionable correctness because we insert our own
|
||||
// devices eBPF program later. Two programs with identical rules
|
||||
// aren't the end of the world, but it is a bit concerning. However
|
||||
// it's unclear if systemd removes all eBPF programs attached when
|
||||
// doing SetUnitProperties...
|
||||
deviceProperties, err := generateDeviceProperties(c.Resources.Devices)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
properties = append(properties, deviceProperties...)
|
||||
|
||||
if c.Resources.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryMax", uint64(c.Resources.Memory)))
|
||||
|
@ -295,14 +307,41 @@ func (m *unifiedManager) Set(container *configs.Config) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dbusConnection, err := getDbusConnection(m.rootless)
|
||||
|
||||
// Figure out the current freezer state, so we can revert to it after we
|
||||
// temporarily freeze the container.
|
||||
targetFreezerState, err := m.GetFreezerState()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if targetFreezerState == configs.Undefined {
|
||||
targetFreezerState = configs.Thawed
|
||||
}
|
||||
|
||||
// We have to freeze the container while systemd sets the cgroup settings.
|
||||
// The reason for this is that systemd's application of DeviceAllow rules
|
||||
// is done disruptively, resulting in spurrious errors to common devices
|
||||
// (unlike our fs driver, they will happily write deny-all rules to running
|
||||
// containers). So we freeze the container to avoid them hitting the cgroup
|
||||
// error. But if the freezer cgroup isn't supported, we just warn about it.
|
||||
if err := m.Freeze(configs.Frozen); err != nil {
|
||||
logrus.Infof("freeze container before SetUnitProperties failed: %v", err)
|
||||
}
|
||||
|
||||
dbusConnection, err := getDbusConnection(m.rootless)
|
||||
if err != nil {
|
||||
_ = m.Freeze(targetFreezerState)
|
||||
return err
|
||||
}
|
||||
if err := dbusConnection.SetUnitProperties(getUnitName(m.cgroups), true, properties...); err != nil {
|
||||
_ = m.Freeze(targetFreezerState)
|
||||
return errors.Wrap(err, "error while setting unit properties")
|
||||
}
|
||||
|
||||
// Reset freezer state before we apply the configuration, to avoid clashing
|
||||
// with the freezer setting in the configuration.
|
||||
_ = m.Freeze(targetFreezerState)
|
||||
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -319,3 +358,11 @@ func (m *unifiedManager) GetPaths() map[string]string {
|
|||
func (m *unifiedManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.cgroups, nil
|
||||
}
|
||||
|
||||
func (m *unifiedManager) GetFreezerState() (configs.FreezerState, error) {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return configs.Undefined, err
|
||||
}
|
||||
return fsMgr.GetFreezerState()
|
||||
}
|
||||
|
|
|
@ -41,15 +41,8 @@ type Cgroup struct {
|
|||
}
|
||||
|
||||
type Resources struct {
|
||||
// If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
|
||||
// Deprecated
|
||||
AllowAllDevices *bool `json:"allow_all_devices,omitempty"`
|
||||
// Deprecated
|
||||
AllowedDevices []*Device `json:"allowed_devices,omitempty"`
|
||||
// Deprecated
|
||||
DeniedDevices []*Device `json:"denied_devices,omitempty"`
|
||||
|
||||
Devices []*Device `json:"devices"`
|
||||
// Devices is the set of access rules for devices in the container.
|
||||
Devices []*DeviceRule `json:"devices"`
|
||||
|
||||
// Memory limit (in bytes)
|
||||
Memory int64 `json:"memory"`
|
||||
|
|
|
@ -1,8 +1,12 @@
|
|||
package configs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -12,21 +16,11 @@ const (
|
|||
// TODO Windows: This can be factored out in the future
|
||||
|
||||
type Device struct {
|
||||
// Device type, block, char, etc.
|
||||
Type rune `json:"type"`
|
||||
DeviceRule
|
||||
|
||||
// Path to the device.
|
||||
Path string `json:"path"`
|
||||
|
||||
// Major is the device's major number.
|
||||
Major int64 `json:"major"`
|
||||
|
||||
// Minor is the device's minor number.
|
||||
Minor int64 `json:"minor"`
|
||||
|
||||
// Cgroup permissions format, rwm.
|
||||
Permissions string `json:"permissions"`
|
||||
|
||||
// FileMode permission bits for the device.
|
||||
FileMode os.FileMode `json:"file_mode"`
|
||||
|
||||
|
@ -35,23 +29,154 @@ type Device struct {
|
|||
|
||||
// Gid of the device.
|
||||
Gid uint32 `json:"gid"`
|
||||
}
|
||||
|
||||
// Write the file to the allowed list
|
||||
// DevicePermissions is a cgroupv1-style string to represent device access. It
|
||||
// has to be a string for backward compatibility reasons, hence why it has
|
||||
// methods to do set operations.
|
||||
type DevicePermissions string
|
||||
|
||||
const (
|
||||
deviceRead uint = (1 << iota)
|
||||
deviceWrite
|
||||
deviceMknod
|
||||
)
|
||||
|
||||
func (p DevicePermissions) toSet() uint {
|
||||
var set uint
|
||||
for _, perm := range p {
|
||||
switch perm {
|
||||
case 'r':
|
||||
set |= deviceRead
|
||||
case 'w':
|
||||
set |= deviceWrite
|
||||
case 'm':
|
||||
set |= deviceMknod
|
||||
}
|
||||
}
|
||||
return set
|
||||
}
|
||||
|
||||
func fromSet(set uint) DevicePermissions {
|
||||
var perm string
|
||||
if set&deviceRead == deviceRead {
|
||||
perm += "r"
|
||||
}
|
||||
if set&deviceWrite == deviceWrite {
|
||||
perm += "w"
|
||||
}
|
||||
if set&deviceMknod == deviceMknod {
|
||||
perm += "m"
|
||||
}
|
||||
return DevicePermissions(perm)
|
||||
}
|
||||
|
||||
// Union returns the union of the two sets of DevicePermissions.
|
||||
func (p DevicePermissions) Union(o DevicePermissions) DevicePermissions {
|
||||
lhs := p.toSet()
|
||||
rhs := o.toSet()
|
||||
return fromSet(lhs | rhs)
|
||||
}
|
||||
|
||||
// Difference returns the set difference of the two sets of DevicePermissions.
|
||||
// In set notation, A.Difference(B) gives you A\B.
|
||||
func (p DevicePermissions) Difference(o DevicePermissions) DevicePermissions {
|
||||
lhs := p.toSet()
|
||||
rhs := o.toSet()
|
||||
return fromSet(lhs &^ rhs)
|
||||
}
|
||||
|
||||
// Intersection computes the intersection of the two sets of DevicePermissions.
|
||||
func (p DevicePermissions) Intersection(o DevicePermissions) DevicePermissions {
|
||||
lhs := p.toSet()
|
||||
rhs := o.toSet()
|
||||
return fromSet(lhs & rhs)
|
||||
}
|
||||
|
||||
// IsEmpty returns whether the set of permissions in a DevicePermissions is
|
||||
// empty.
|
||||
func (p DevicePermissions) IsEmpty() bool {
|
||||
return p == DevicePermissions("")
|
||||
}
|
||||
|
||||
// IsValid returns whether the set of permissions is a subset of valid
|
||||
// permissions (namely, {r,w,m}).
|
||||
func (p DevicePermissions) IsValid() bool {
|
||||
return p == fromSet(p.toSet())
|
||||
}
|
||||
|
||||
type DeviceType rune
|
||||
|
||||
const (
|
||||
WildcardDevice DeviceType = 'a'
|
||||
BlockDevice DeviceType = 'b'
|
||||
CharDevice DeviceType = 'c' // or 'u'
|
||||
FifoDevice DeviceType = 'p'
|
||||
)
|
||||
|
||||
func (t DeviceType) IsValid() bool {
|
||||
switch t {
|
||||
case WildcardDevice, BlockDevice, CharDevice, FifoDevice:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (t DeviceType) CanMknod() bool {
|
||||
switch t {
|
||||
case BlockDevice, CharDevice, FifoDevice:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (t DeviceType) CanCgroup() bool {
|
||||
switch t {
|
||||
case WildcardDevice, BlockDevice, CharDevice:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
type DeviceRule struct {
|
||||
// Type of device ('c' for char, 'b' for block). If set to 'a', this rule
|
||||
// acts as a wildcard and all fields other than Allow are ignored.
|
||||
Type DeviceType `json:"type"`
|
||||
|
||||
// Major is the device's major number.
|
||||
Major int64 `json:"major"`
|
||||
|
||||
// Minor is the device's minor number.
|
||||
Minor int64 `json:"minor"`
|
||||
|
||||
// Permissions is the set of permissions that this rule applies to (in the
|
||||
// cgroupv1 format -- any combination of "rwm").
|
||||
Permissions DevicePermissions `json:"permissions"`
|
||||
|
||||
// Allow specifies whether this rule is allowed.
|
||||
Allow bool `json:"allow"`
|
||||
}
|
||||
|
||||
func (d *Device) CgroupString() string {
|
||||
return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions)
|
||||
}
|
||||
|
||||
func (d *Device) Mkdev() int {
|
||||
return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12))
|
||||
}
|
||||
|
||||
// deviceNumberString converts the device number to a string return result.
|
||||
func deviceNumberString(number int64) string {
|
||||
if number == Wildcard {
|
||||
return "*"
|
||||
func (d *DeviceRule) CgroupString() string {
|
||||
var (
|
||||
major = strconv.FormatInt(d.Major, 10)
|
||||
minor = strconv.FormatInt(d.Minor, 10)
|
||||
)
|
||||
if d.Major == Wildcard {
|
||||
major = "*"
|
||||
}
|
||||
return fmt.Sprint(number)
|
||||
if d.Minor == Wildcard {
|
||||
minor = "*"
|
||||
}
|
||||
return fmt.Sprintf("%c %s:%s %s", d.Type, major, minor, d.Permissions)
|
||||
}
|
||||
|
||||
func (d *DeviceRule) Mkdev() (uint64, error) {
|
||||
if d.Major == Wildcard || d.Minor == Wildcard {
|
||||
return 0, errors.New("cannot mkdev() device with wildcards")
|
||||
}
|
||||
return unix.Mkdev(uint32(d.Major), uint32(d.Minor)), nil
|
||||
}
|
||||
|
|
|
@ -1,111 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package configs
|
||||
|
||||
var (
|
||||
// DefaultSimpleDevices are devices that are to be both allowed and created.
|
||||
DefaultSimpleDevices = []*Device{
|
||||
// /dev/null and zero
|
||||
{
|
||||
Path: "/dev/null",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 3,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
{
|
||||
Path: "/dev/zero",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 5,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
|
||||
{
|
||||
Path: "/dev/full",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 7,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
|
||||
// consoles and ttys
|
||||
{
|
||||
Path: "/dev/tty",
|
||||
Type: 'c',
|
||||
Major: 5,
|
||||
Minor: 0,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
|
||||
// /dev/urandom,/dev/random
|
||||
{
|
||||
Path: "/dev/urandom",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 9,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
{
|
||||
Path: "/dev/random",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 8,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
}
|
||||
DefaultAllowedDevices = append([]*Device{
|
||||
// allow mknod for any device
|
||||
{
|
||||
Type: 'c',
|
||||
Major: Wildcard,
|
||||
Minor: Wildcard,
|
||||
Permissions: "m",
|
||||
},
|
||||
{
|
||||
Type: 'b',
|
||||
Major: Wildcard,
|
||||
Minor: Wildcard,
|
||||
Permissions: "m",
|
||||
},
|
||||
|
||||
{
|
||||
Path: "/dev/console",
|
||||
Type: 'c',
|
||||
Major: 5,
|
||||
Minor: 1,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
// /dev/pts/ - pts namespaces are "coming soon"
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 136,
|
||||
Minor: Wildcard,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 5,
|
||||
Minor: 2,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
|
||||
// tuntap
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 10,
|
||||
Minor: 200,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
}, DefaultSimpleDevices...)
|
||||
DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...)
|
||||
)
|
|
@ -1847,30 +1847,11 @@ func (c *linuxContainer) runType() Status {
|
|||
}
|
||||
|
||||
func (c *linuxContainer) isPaused() (bool, error) {
|
||||
var filename, pausedState string
|
||||
|
||||
fcg := c.cgroupManager.Path("freezer")
|
||||
if !cgroups.IsCgroup2UnifiedMode() {
|
||||
if fcg == "" {
|
||||
// container doesn't have a freezer cgroup
|
||||
return false, nil
|
||||
}
|
||||
filename = "freezer.state"
|
||||
pausedState = "FROZEN"
|
||||
} else {
|
||||
filename = "cgroup.freeze"
|
||||
pausedState = "1"
|
||||
}
|
||||
|
||||
data, err := ioutil.ReadFile(filepath.Join(fcg, filename))
|
||||
state, err := c.cgroupManager.GetFreezerState()
|
||||
if err != nil {
|
||||
// If freezer cgroup is not mounted, the container would just be not paused.
|
||||
if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
|
||||
return false, nil
|
||||
}
|
||||
return false, newSystemErrorWithCause(err, "checking if container is paused")
|
||||
return false, err
|
||||
}
|
||||
return bytes.Equal(bytes.TrimSpace(data), []byte(pausedState)), nil
|
||||
return state == configs.Frozen, nil
|
||||
}
|
||||
|
||||
func (c *linuxContainer) currentState() (*State, error) {
|
||||
|
|
|
@ -61,10 +61,15 @@ func (m *mockCgroupManager) Path(subsys string) string {
|
|||
func (m *mockCgroupManager) Freeze(state configs.FreezerState) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mockCgroupManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *mockCgroupManager) GetFreezerState() (configs.FreezerState, error) {
|
||||
return configs.Thawed, nil
|
||||
}
|
||||
|
||||
func (m *mockIntelRdtManager) Apply(pid int) error {
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -31,33 +31,33 @@ func DeviceFromPath(path, permissions string) (*configs.Device, error) {
|
|||
}
|
||||
|
||||
var (
|
||||
devType configs.DeviceType
|
||||
mode = stat.Mode
|
||||
devNumber = uint64(stat.Rdev)
|
||||
major = unix.Major(devNumber)
|
||||
minor = unix.Minor(devNumber)
|
||||
)
|
||||
if major == 0 {
|
||||
return nil, ErrNotADevice
|
||||
}
|
||||
|
||||
var (
|
||||
devType rune
|
||||
mode = stat.Mode
|
||||
)
|
||||
switch {
|
||||
case mode&unix.S_IFBLK == unix.S_IFBLK:
|
||||
devType = 'b'
|
||||
devType = configs.BlockDevice
|
||||
case mode&unix.S_IFCHR == unix.S_IFCHR:
|
||||
devType = 'c'
|
||||
devType = configs.CharDevice
|
||||
case mode&unix.S_IFIFO == unix.S_IFIFO:
|
||||
devType = configs.FifoDevice
|
||||
default:
|
||||
return nil, ErrNotADevice
|
||||
}
|
||||
return &configs.Device{
|
||||
Type: devType,
|
||||
Path: path,
|
||||
Major: int64(major),
|
||||
Minor: int64(minor),
|
||||
Permissions: permissions,
|
||||
FileMode: os.FileMode(mode),
|
||||
Uid: stat.Uid,
|
||||
Gid: stat.Gid,
|
||||
DeviceRule: configs.DeviceRule{
|
||||
Type: devType,
|
||||
Major: int64(major),
|
||||
Minor: int64(minor),
|
||||
Permissions: configs.DevicePermissions(permissions),
|
||||
},
|
||||
Path: path,
|
||||
FileMode: os.FileMode(mode),
|
||||
Uid: stat.Uid,
|
||||
Gid: stat.Gid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@ package integration
|
|||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/specconv"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
@ -20,7 +21,10 @@ const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
|
|||
// it uses a network strategy of just setting a loopback interface
|
||||
// and the default setup for devices
|
||||
func newTemplateConfig(rootfs string) *configs.Config {
|
||||
allowAllDevices := false
|
||||
var allowedDevices []*configs.DeviceRule
|
||||
for _, device := range specconv.AllowedDevices {
|
||||
allowedDevices = append(allowedDevices, &device.DeviceRule)
|
||||
}
|
||||
return &configs.Config{
|
||||
Rootfs: rootfs,
|
||||
Capabilities: &configs.Capabilities{
|
||||
|
@ -116,8 +120,7 @@ func newTemplateConfig(rootfs string) *configs.Config {
|
|||
Path: "integration/test",
|
||||
Resources: &configs.Resources{
|
||||
MemorySwappiness: nil,
|
||||
AllowAllDevices: &allowAllDevices,
|
||||
AllowedDevices: configs.DefaultAllowedDevices,
|
||||
Devices: allowedDevices,
|
||||
},
|
||||
},
|
||||
MaskPaths: []string{
|
||||
|
@ -127,7 +130,7 @@ func newTemplateConfig(rootfs string) *configs.Config {
|
|||
ReadonlyPaths: []string{
|
||||
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
|
||||
},
|
||||
Devices: configs.DefaultAutoCreatedDevices,
|
||||
Devices: specconv.AllowedDevices,
|
||||
Hostname: "integration",
|
||||
Mounts: []*configs.Mount{
|
||||
{
|
||||
|
|
|
@ -606,11 +606,14 @@ func bindMountDeviceNode(dest string, node *configs.Device) error {
|
|||
|
||||
// Creates the device node in the rootfs of the container.
|
||||
func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
|
||||
if node.Path == "" {
|
||||
// The node only exists for cgroup reasons, ignore it here.
|
||||
return nil
|
||||
}
|
||||
dest := filepath.Join(rootfs, node.Path)
|
||||
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if bind {
|
||||
return bindMountDeviceNode(dest, node)
|
||||
}
|
||||
|
@ -628,16 +631,20 @@ func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
|
|||
func mknodDevice(dest string, node *configs.Device) error {
|
||||
fileMode := node.FileMode
|
||||
switch node.Type {
|
||||
case 'c', 'u':
|
||||
fileMode |= unix.S_IFCHR
|
||||
case 'b':
|
||||
case configs.BlockDevice:
|
||||
fileMode |= unix.S_IFBLK
|
||||
case 'p':
|
||||
case configs.CharDevice:
|
||||
fileMode |= unix.S_IFCHR
|
||||
case configs.FifoDevice:
|
||||
fileMode |= unix.S_IFIFO
|
||||
default:
|
||||
return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
|
||||
}
|
||||
if err := unix.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil {
|
||||
dev, err := node.Mkdev()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := unix.Mknod(dest, uint32(fileMode), int(dev)); err != nil {
|
||||
return err
|
||||
}
|
||||
return unix.Chown(dest, int(node.Uid), int(node.Gid))
|
||||
|
|
|
@ -48,104 +48,147 @@ var mountPropagationMapping = map[string]int{
|
|||
"": 0,
|
||||
}
|
||||
|
||||
// AllowedDevices is exposed for devicefilter_test.go
|
||||
// AllowedDevices is the set of devices which are automatically included for
|
||||
// all containers.
|
||||
//
|
||||
// XXX (cyphar)
|
||||
// This behaviour is at the very least "questionable" (if not outright
|
||||
// wrong) according to the runtime-spec.
|
||||
//
|
||||
// Yes, we have to include certain devices other than the ones the user
|
||||
// specifies, but several devices listed here are not part of the spec
|
||||
// (including "mknod for any device"?!). In addition, these rules are
|
||||
// appended to the user-provided set which means that users *cannot disable
|
||||
// this behaviour*.
|
||||
//
|
||||
// ... unfortunately I'm too scared to change this now because who knows how
|
||||
// many people depend on this (incorrect and arguably insecure) behaviour.
|
||||
var AllowedDevices = []*configs.Device{
|
||||
// allow mknod for any device
|
||||
{
|
||||
Type: 'c',
|
||||
Major: wildcard,
|
||||
Minor: wildcard,
|
||||
Permissions: "m",
|
||||
Allow: true,
|
||||
DeviceRule: configs.DeviceRule{
|
||||
Type: configs.CharDevice,
|
||||
Major: configs.Wildcard,
|
||||
Minor: configs.Wildcard,
|
||||
Permissions: "m",
|
||||
Allow: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: 'b',
|
||||
Major: wildcard,
|
||||
Minor: wildcard,
|
||||
Permissions: "m",
|
||||
Allow: true,
|
||||
DeviceRule: configs.DeviceRule{
|
||||
Type: configs.BlockDevice,
|
||||
Major: configs.Wildcard,
|
||||
Minor: configs.Wildcard,
|
||||
Permissions: "m",
|
||||
Allow: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/null",
|
||||
Major: 1,
|
||||
Minor: 3,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
Path: "/dev/null",
|
||||
FileMode: 0666,
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
DeviceRule: configs.DeviceRule{
|
||||
Type: configs.CharDevice,
|
||||
Major: 1,
|
||||
Minor: 3,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/random",
|
||||
Major: 1,
|
||||
Minor: 8,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
Path: "/dev/random",
|
||||
FileMode: 0666,
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
DeviceRule: configs.DeviceRule{
|
||||
Type: configs.CharDevice,
|
||||
Major: 1,
|
||||
Minor: 8,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/full",
|
||||
Major: 1,
|
||||
Minor: 7,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
Path: "/dev/full",
|
||||
FileMode: 0666,
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
DeviceRule: configs.DeviceRule{
|
||||
Type: configs.CharDevice,
|
||||
Major: 1,
|
||||
Minor: 7,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/tty",
|
||||
Major: 5,
|
||||
Minor: 0,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
Path: "/dev/tty",
|
||||
FileMode: 0666,
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
DeviceRule: configs.DeviceRule{
|
||||
Type: configs.CharDevice,
|
||||
Major: 5,
|
||||
Minor: 0,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/zero",
|
||||
Major: 1,
|
||||
Minor: 5,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
Path: "/dev/zero",
|
||||
FileMode: 0666,
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
DeviceRule: configs.DeviceRule{
|
||||
Type: configs.CharDevice,
|
||||
Major: 1,
|
||||
Minor: 5,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/urandom",
|
||||
Major: 1,
|
||||
Minor: 9,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
{
|
||||
Path: "/dev/console",
|
||||
Type: 'c',
|
||||
Major: 5,
|
||||
Minor: 1,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
Path: "/dev/urandom",
|
||||
FileMode: 0666,
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
DeviceRule: configs.DeviceRule{
|
||||
Type: configs.CharDevice,
|
||||
Major: 1,
|
||||
Minor: 9,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
},
|
||||
// /dev/pts/ - pts namespaces are "coming soon"
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 136,
|
||||
Minor: wildcard,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
DeviceRule: configs.DeviceRule{
|
||||
Type: configs.CharDevice,
|
||||
Major: 136,
|
||||
Minor: configs.Wildcard,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 5,
|
||||
Minor: 2,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
DeviceRule: configs.DeviceRule{
|
||||
Type: configs.CharDevice,
|
||||
Major: 5,
|
||||
Minor: 2,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
},
|
||||
// tuntap
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 10,
|
||||
Minor: 200,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
DeviceRule: configs.DeviceRule{
|
||||
Type: configs.CharDevice,
|
||||
Major: 10,
|
||||
Minor: 200,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -420,7 +463,6 @@ func CreateCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) {
|
|||
|
||||
// In rootless containers, any attempt to make cgroup changes is likely to fail.
|
||||
// libcontainer will validate this but ignores the error.
|
||||
c.Resources.AllowedDevices = AllowedDevices
|
||||
if spec.Linux != nil {
|
||||
r := spec.Linux.Resources
|
||||
if r != nil {
|
||||
|
@ -446,14 +488,13 @@ func CreateCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dd := &configs.Device{
|
||||
c.Resources.Devices = append(c.Resources.Devices, &configs.DeviceRule{
|
||||
Type: dt,
|
||||
Major: major,
|
||||
Minor: minor,
|
||||
Permissions: d.Access,
|
||||
Permissions: configs.DevicePermissions(d.Access),
|
||||
Allow: d.Allow,
|
||||
}
|
||||
c.Resources.Devices = append(c.Resources.Devices, dd)
|
||||
})
|
||||
}
|
||||
if r.Memory != nil {
|
||||
if r.Memory.Limit != nil {
|
||||
|
@ -578,98 +619,48 @@ func CreateCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) {
|
|||
}
|
||||
}
|
||||
}
|
||||
// append the default allowed devices to the end of the list
|
||||
c.Resources.Devices = append(c.Resources.Devices, AllowedDevices...)
|
||||
// Append the default allowed devices to the end of the list.
|
||||
// XXX: Really this should be prefixed...
|
||||
for _, device := range AllowedDevices {
|
||||
c.Resources.Devices = append(c.Resources.Devices, &device.DeviceRule)
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func stringToCgroupDeviceRune(s string) (rune, error) {
|
||||
func stringToCgroupDeviceRune(s string) (configs.DeviceType, error) {
|
||||
switch s {
|
||||
case "a":
|
||||
return 'a', nil
|
||||
return configs.WildcardDevice, nil
|
||||
case "b":
|
||||
return 'b', nil
|
||||
return configs.BlockDevice, nil
|
||||
case "c":
|
||||
return 'c', nil
|
||||
return configs.CharDevice, nil
|
||||
default:
|
||||
return 0, fmt.Errorf("invalid cgroup device type %q", s)
|
||||
}
|
||||
}
|
||||
|
||||
func stringToDeviceRune(s string) (rune, error) {
|
||||
func stringToDeviceRune(s string) (configs.DeviceType, error) {
|
||||
switch s {
|
||||
case "p":
|
||||
return 'p', nil
|
||||
case "u":
|
||||
return 'u', nil
|
||||
return configs.FifoDevice, nil
|
||||
case "u", "c":
|
||||
return configs.CharDevice, nil
|
||||
case "b":
|
||||
return 'b', nil
|
||||
case "c":
|
||||
return 'c', nil
|
||||
return configs.BlockDevice, nil
|
||||
default:
|
||||
return 0, fmt.Errorf("invalid device type %q", s)
|
||||
}
|
||||
}
|
||||
|
||||
func createDevices(spec *specs.Spec, config *configs.Config) error {
|
||||
// add whitelisted devices
|
||||
config.Devices = []*configs.Device{
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/null",
|
||||
Major: 1,
|
||||
Minor: 3,
|
||||
FileMode: 0666,
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/random",
|
||||
Major: 1,
|
||||
Minor: 8,
|
||||
FileMode: 0666,
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/full",
|
||||
Major: 1,
|
||||
Minor: 7,
|
||||
FileMode: 0666,
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/tty",
|
||||
Major: 5,
|
||||
Minor: 0,
|
||||
FileMode: 0666,
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/zero",
|
||||
Major: 1,
|
||||
Minor: 5,
|
||||
FileMode: 0666,
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/urandom",
|
||||
Major: 1,
|
||||
Minor: 9,
|
||||
FileMode: 0666,
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
},
|
||||
// Add default set of devices.
|
||||
for _, device := range AllowedDevices {
|
||||
if device.Path != "" {
|
||||
config.Devices = append(config.Devices, device)
|
||||
}
|
||||
}
|
||||
// merge in additional devices from the spec
|
||||
// Merge in additional devices from the spec.
|
||||
if spec.Linux != nil {
|
||||
for _, d := range spec.Linux.Devices {
|
||||
var uid, gid uint32
|
||||
|
@ -689,10 +680,12 @@ func createDevices(spec *specs.Spec, config *configs.Config) error {
|
|||
filemode = *d.FileMode
|
||||
}
|
||||
device := &configs.Device{
|
||||
Type: dt,
|
||||
DeviceRule: configs.DeviceRule{
|
||||
Type: dt,
|
||||
Major: d.Major,
|
||||
Minor: d.Minor,
|
||||
},
|
||||
Path: d.Path,
|
||||
Major: d.Major,
|
||||
Minor: d.Minor,
|
||||
FileMode: filemode,
|
||||
Uid: uid,
|
||||
Gid: gid,
|
||||
|
|
|
@ -126,12 +126,19 @@ function init_cgroup_paths() {
|
|||
if stat -f -c %t /sys/fs/cgroup | grep -qFw 63677270; then
|
||||
CGROUP_UNIFIED=yes
|
||||
# "pseudo" controllers do not appear in /sys/fs/cgroup/cgroup.controllers.
|
||||
# - devices (since kernel 4.15)
|
||||
# - freezer (since kernel 5.2)
|
||||
# Assume these are always available, as it is hard to detect
|
||||
CGROUP_SUBSYSTEMS=$(cat /sys/fs/cgroup/cgroup.controllers; echo devices freezer)
|
||||
# - devices (since kernel 4.15) we must assume to be supported because
|
||||
# it's quite hard to test.
|
||||
# - freezer (since kernel 5.2) we can auto-detect by looking for the
|
||||
# "cgroup.freeze" file a *non-root* cgroup.
|
||||
CGROUP_SUBSYSTEMS=$(cat /sys/fs/cgroup/cgroup.controllers; echo devices)
|
||||
CGROUP_BASE_PATH=/sys/fs/cgroup
|
||||
CGROUP_PATH=${CGROUP_BASE_PATH}${REL_CGROUPS_PATH}
|
||||
|
||||
# Find any cgroup.freeze files...
|
||||
if [ -n "$(find "$CGROUP_BASE_PATH" -type f -name "cgroup.freeze" -print -quit)" ]
|
||||
then
|
||||
CGROUP_SUBSYSTEMS+=" freezer"
|
||||
fi
|
||||
else
|
||||
CGROUP_UNIFIED=no
|
||||
CGROUP_SUBSYSTEMS=$(awk '!/^#/ {print $1}' /proc/cgroups)
|
||||
|
@ -174,7 +181,7 @@ function check_systemd_value() {
|
|||
unitname=$1
|
||||
source=$2
|
||||
expected=$3
|
||||
|
||||
|
||||
if [ $(id -u) = "0" ]; then
|
||||
current=$(systemctl show $unitname | grep $source)
|
||||
else
|
||||
|
@ -200,75 +207,85 @@ function fail() {
|
|||
# support it, the test is skipped with a message.
|
||||
function requires() {
|
||||
for var in "$@"; do
|
||||
local skip_me
|
||||
case $var in
|
||||
criu)
|
||||
if [ ! -e "$CRIU" ]; then
|
||||
skip "test requires ${var}"
|
||||
skip_me=1
|
||||
fi
|
||||
;;
|
||||
root)
|
||||
if [ "$ROOTLESS" -ne 0 ]; then
|
||||
skip "test requires ${var}"
|
||||
skip_me=1
|
||||
fi
|
||||
;;
|
||||
rootless)
|
||||
if [ "$ROOTLESS" -eq 0 ]; then
|
||||
skip "test requires ${var}"
|
||||
skip_me=1
|
||||
fi
|
||||
;;
|
||||
rootless_idmap)
|
||||
if [[ "$ROOTLESS_FEATURES" != *"idmap"* ]]; then
|
||||
skip "test requires ${var}"
|
||||
skip_me=1
|
||||
fi
|
||||
;;
|
||||
rootless_cgroup)
|
||||
if [[ "$ROOTLESS_FEATURES" != *"cgroup"* ]]; then
|
||||
skip "test requires ${var}"
|
||||
skip_me=1
|
||||
fi
|
||||
;;
|
||||
rootless_no_cgroup)
|
||||
if [[ "$ROOTLESS_FEATURES" == *"cgroup"* ]]; then
|
||||
skip "test requires ${var}"
|
||||
skip_me=1
|
||||
fi
|
||||
;;
|
||||
cgroups_freezer)
|
||||
init_cgroup_paths
|
||||
if [[ "$CGROUP_SUBSYSTEMS" != *"freezer"* ]]; then
|
||||
skip_me=1
|
||||
fi
|
||||
;;
|
||||
cgroups_kmem)
|
||||
init_cgroup_paths
|
||||
if [ ! -e "${CGROUP_MEMORY_BASE_PATH}/memory.kmem.limit_in_bytes" ]; then
|
||||
skip "Test requires ${var}"
|
||||
skip_me=1
|
||||
fi
|
||||
;;
|
||||
cgroups_rt)
|
||||
init_cgroup_paths
|
||||
if [ ! -e "${CGROUP_CPU_BASE_PATH}/cpu.rt_period_us" ]; then
|
||||
skip "Test requires ${var}"
|
||||
skip_me=1
|
||||
fi
|
||||
;;
|
||||
cgroups_v1)
|
||||
init_cgroup_paths
|
||||
if [ "$CGROUP_UNIFIED" != "no" ]; then
|
||||
skip "Test requires cgroups v1"
|
||||
skip_me=1
|
||||
fi
|
||||
;;
|
||||
cgroups_v2)
|
||||
init_cgroup_paths
|
||||
if [ "$CGROUP_UNIFIED" != "yes" ]; then
|
||||
skip "Test requires cgroups v2 (unified)"
|
||||
skip_me=1
|
||||
fi
|
||||
;;
|
||||
systemd)
|
||||
if [ -z "${RUNC_USE_SYSTEMD}" ]; then
|
||||
skip "Test requires systemd"
|
||||
skip_me=1
|
||||
fi
|
||||
;;
|
||||
no_systemd)
|
||||
if [ -n "${RUNC_USE_SYSTEMD}" ]; then
|
||||
skip "Test requires no systemd"
|
||||
skip_me=1
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
fail "BUG: Invalid requires ${var}."
|
||||
fail "BUG: Invalid requires $var."
|
||||
;;
|
||||
esac
|
||||
if [ -n "$skip_me" ]; then
|
||||
skip "test requires $var"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
|
|
|
@ -12,8 +12,12 @@ function teardown() {
|
|||
}
|
||||
|
||||
@test "runc pause and resume" {
|
||||
# XXX: currently cgroups require root containers.
|
||||
requires root
|
||||
if [[ "$ROOTLESS" -ne 0 ]]
|
||||
then
|
||||
requires rootless_cgroup
|
||||
set_cgroups_path "$BUSYBOX_BUNDLE"
|
||||
fi
|
||||
requires cgroups_freezer
|
||||
|
||||
# run busybox detached
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
||||
|
@ -37,8 +41,12 @@ function teardown() {
|
|||
}
|
||||
|
||||
@test "runc pause and resume with nonexist container" {
|
||||
# XXX: currently cgroups require root containers.
|
||||
requires root
|
||||
if [[ "$ROOTLESS" -ne 0 ]]
|
||||
then
|
||||
requires rootless_cgroup
|
||||
set_cgroups_path "$BUSYBOX_BUNDLE"
|
||||
fi
|
||||
requires cgroups_freezer
|
||||
|
||||
# run test_busybox detached
|
||||
runc run -d --console-socket $CONSOLE_SOCKET test_busybox
|
||||
|
|
|
@ -312,3 +312,70 @@ EOF
|
|||
check_cgroup_value "cpu.rt_period_us" 900001
|
||||
check_cgroup_value "cpu.rt_runtime_us" 600001
|
||||
}
|
||||
|
||||
@test "update devices [minimal transition rules]" {
|
||||
[[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup
|
||||
# This test currently only makes sense on cgroupv1.
|
||||
requires cgroups_v1
|
||||
|
||||
# Run a basic shell script that tries to write to /dev/null. If "runc
|
||||
# update" makes use of minimal transition rules, updates should not cause
|
||||
# writes to fail at any point.
|
||||
jq '.process.args = ["sh", "-c", "while true; do echo >/dev/null; done"]' config.json > config.json.tmp
|
||||
mv config.json{.tmp,}
|
||||
|
||||
# Set up a temporary console socket and recvtty so we can get the stdio.
|
||||
TMP_RECVTTY_DIR="$(mktemp -d "$BATS_TMPDIR/runc-tmp-recvtty.XXXXXX")"
|
||||
TMP_RECVTTY_PID="$TMP_RECVTTY_DIR/recvtty.pid"
|
||||
TMP_CONSOLE_SOCKET="$TMP_RECVTTY_DIR/console.sock"
|
||||
CONTAINER_OUTPUT="$TMP_RECVTTY_DIR/output"
|
||||
("$RECVTTY" --no-stdin --pid-file "$TMP_RECVTTY_PID" \
|
||||
--mode single "$TMP_CONSOLE_SOCKET" &>"$CONTAINER_OUTPUT" ) &
|
||||
retry 10 0.1 [ -e "$TMP_CONSOLE_SOCKET" ]
|
||||
|
||||
# Run the container in the background.
|
||||
runc run -d --console-socket "$TMP_CONSOLE_SOCKET" test_update
|
||||
cat "$CONTAINER_OUTPUT"
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
# Trigger an update. This update doesn't actually change the device rules,
|
||||
# but it will trigger the devices cgroup code to reapply the current rules.
|
||||
# We trigger the update a few times to make sure we hit the race.
|
||||
for _ in {1..12}
|
||||
do
|
||||
# TODO: Update "runc update" so we can change the device rules.
|
||||
runc update --pids-limit 30 test_update
|
||||
[ "$status" -eq 0 ]
|
||||
done
|
||||
|
||||
# Kill recvtty.
|
||||
kill -9 "$(<"$TMP_RECVTTY_PID")"
|
||||
|
||||
# There should've been no output from the container.
|
||||
cat "$CONTAINER_OUTPUT"
|
||||
[ -z "$(<"$CONTAINER_OUTPUT")" ]
|
||||
}
|
||||
|
||||
@test "update paused container" {
|
||||
[[ "$ROOTLESS" -ne 0 ]] && requires rootless_cgroup
|
||||
requires cgroups_freezer
|
||||
|
||||
# Run the container in the background.
|
||||
runc run -d --console-socket "$CONSOLE_SOCKET" test_update
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
# Pause the container.
|
||||
runc pause test_update
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
# Trigger an unrelated update.
|
||||
runc update --pids-limit 30 test_update
|
||||
[ "$status" -eq 0 ]
|
||||
|
||||
# The container should still be paused.
|
||||
testcontainer test_update paused
|
||||
|
||||
# Resume the container.
|
||||
runc resume test_update
|
||||
[ "$status" -eq 0 ]
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue