Merge pull request #1279 from xiaochenshen/rdt-cat-resource-manager-v1

libcontainer: add support for Intel RDT/CAT in runc
This commit is contained in:
Mrunal Patel 2017-09-06 14:36:02 -07:00 committed by GitHub
commit 5274430fee
18 changed files with 1039 additions and 30 deletions

View File

@ -11,6 +11,7 @@ import (
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
@ -25,11 +26,12 @@ type event struct {
// stats is the runc specific stats structure for stability when encoding and decoding stats.
type stats struct {
CPU cpu `json:"cpu"`
Memory memory `json:"memory"`
Pids pids `json:"pids"`
Blkio blkio `json:"blkio"`
Hugetlb map[string]hugetlb `json:"hugetlb"`
CPU cpu `json:"cpu"`
Memory memory `json:"memory"`
Pids pids `json:"pids"`
Blkio blkio `json:"blkio"`
Hugetlb map[string]hugetlb `json:"hugetlb"`
IntelRdt intelRdt `json:"intel_rdt"`
}
type hugetlb struct {
@ -96,6 +98,23 @@ type memory struct {
Raw map[string]uint64 `json:"raw,omitempty"`
}
type l3CacheInfo struct {
CbmMask string `json:"cbm_mask,omitempty"`
MinCbmBits uint64 `json:"min_cbm_bits,omitempty"`
NumClosids uint64 `json:"num_closids,omitempty"`
}
type intelRdt struct {
// The read-only L3 cache information
L3CacheInfo *l3CacheInfo `json:"l3_cache_info,omitempty"`
// The read-only L3 cache schema in root
L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"`
// The L3 cache schema in 'container_id' group
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
}
var eventsCommand = cli.Command{
Name: "events",
Usage: "display container events such as OOM notifications, cpu, memory, and IO usage statistics",
@ -227,6 +246,13 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *stats {
for k, v := range cg.HugetlbStats {
s.Hugetlb[k] = convertHugtlb(v)
}
if is := ls.IntelRdtStats; is != nil {
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
}
return &s
}
@ -259,3 +285,11 @@ func convertBlkioEntry(c []cgroups.BlkioStatEntry) []blkioEntry {
}
return out
}
func convertL3CacheInfo(i *intelrdt.L3CacheInfo) *l3CacheInfo {
return &l3CacheInfo{
CbmMask: i.CbmMask,
MinCbmBits: i.MinCbmBits,
NumClosids: i.NumClosids,
}
}

View File

@ -154,6 +154,90 @@ that no processes or threads escape the cgroups. This sync is
done via a pipe ( specified in the runtime section below ) that the container's
init process will block waiting for the parent to finish setup.
### IntelRdt
Intel platforms with new Xeon CPU support Intel Resource Director Technology
(RDT). Cache Allocation Technology (CAT) is a sub-feature of RDT, which
currently supports L3 cache resource allocation.
This feature provides a way for the software to restrict cache allocation to a
defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
The different subsets are identified by class of service (CLOS) and each CLOS
has a capacity bitmask (CBM).
It can be used to handle L3 cache resource allocation for containers if
hardware and kernel support Intel RDT/CAT.
In Linux 4.10 kernel or newer, the interface is defined and exposed via
"resource control" filesystem, which is a "cgroup-like" interface.
Comparing with cgroups, it has similar process management lifecycle and
interfaces in a container. But unlike cgroups' hierarchy, it has single level
filesystem layout.
Intel RDT "resource control" filesystem hierarchy:
```
mount -t resctrl resctrl /sys/fs/resctrl
tree /sys/fs/resctrl
/sys/fs/resctrl/
|-- info
| |-- L3
| |-- cbm_mask
| |-- min_cbm_bits
| |-- num_closids
|-- cpus
|-- schemata
|-- tasks
|-- <container_id>
|-- cpus
|-- schemata
|-- tasks
```
For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
resource constraints.
The file `tasks` has a list of tasks that belongs to this group (e.g.,
<container_id>" group). Tasks can be added to a group by writing the task ID
to the "tasks" file (which will automatically remove them from the previous
group to which they belonged). New tasks created by fork(2) and clone(2) are
added to the same group as their parent. If a pid is not in any sub group, it
is in root group.
The file `schemata` has allocation masks/values for L3 cache on each socket,
which contains L3 cache id and capacity bitmask (CBM).
```
Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
```
For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
be set is less than the max bit. The max bits in the CBM is varied among
supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
layout, the CBM in a group should be a subset of the CBM in root. Kernel will
check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
For more information about Intel RDT/CAT kernel interface:
https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
An example for runc:
```
Consider a two-socket machine with two L3 caches where the default CBM is
0xfffff and the max CBM length is 20 bits. With this configuration, tasks
inside the container only have access to the "upper" 80% of L3 cache id 0 and
the "lower" 50% L3 cache id 1:
"linux": {
"intelRdt": {
"l3CacheSchema": "L3:0=ffff0;1=3ff"
}
}
```
### Security
The standard set of Linux capabilities that are set in a container

View File

@ -187,6 +187,10 @@ type Config struct {
// Rootless specifies whether the container is a rootless container.
Rootless bool `json:"rootless"`
// IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into
// to limit the resources (e.g., L3 cache) the container has available
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
}
type Hooks struct {

View File

@ -0,0 +1,7 @@
package configs
type IntelRdt struct {
// The schema for L3 cache id and capacity bitmask (CBM)
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
}

View File

@ -7,6 +7,7 @@ import (
"strings"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
selinux "github.com/opencontainers/selinux/go-selinux"
)
@ -40,6 +41,9 @@ func (v *ConfigValidator) Validate(config *configs.Config) error {
if err := v.sysctl(config); err != nil {
return err
}
if err := v.intelrdt(config); err != nil {
return err
}
if config.Rootless {
if err := v.rootless(config); err != nil {
return err
@ -153,6 +157,19 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
return nil
}
func (v *ConfigValidator) intelrdt(config *configs.Config) error {
if config.IntelRdt != nil {
if !intelrdt.IsIntelRdtEnabled() {
return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled")
}
if config.IntelRdt.L3CacheSchema == "" {
return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
}
}
return nil
}
func isSymbolicLink(path string) (bool, error) {
fi, err := os.Lstat(path)
if err != nil {

View File

@ -21,6 +21,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/criurpc"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
@ -38,6 +39,7 @@ type linuxContainer struct {
root string
config *configs.Config
cgroupManager cgroups.Manager
intelRdtManager intelrdt.Manager
initArgs []string
initProcess parentProcess
initProcessStartTime uint64
@ -67,6 +69,9 @@ type State struct {
// Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore
ExternalDescriptors []string `json:"external_descriptors,omitempty"`
// Intel RDT "resource control" filesystem path
IntelRdtPath string `json:"intel_rdt_path"`
}
// Container is a libcontainer container object.
@ -163,6 +168,11 @@ func (c *linuxContainer) Stats() (*Stats, error) {
if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil {
return stats, newSystemErrorWithCause(err, "getting container stats from cgroups")
}
if c.intelRdtManager != nil {
if stats.IntelRdtStats, err = c.intelRdtManager.GetStats(); err != nil {
return stats, newSystemErrorWithCause(err, "getting container's Intel RDT stats")
}
}
for _, iface := range c.config.Networks {
switch iface.Type {
case "veth":
@ -193,6 +203,15 @@ func (c *linuxContainer) Set(config configs.Config) error {
}
return err
}
if c.intelRdtManager != nil {
if err := c.intelRdtManager.Set(&config); err != nil {
// Set configs back
if err2 := c.intelRdtManager.Set(c.config); err2 != nil {
logrus.Warnf("Setting back intelrdt configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2)
}
return err
}
}
// After config setting succeed, update config and states
c.config = &config
_, err = c.updateState(nil)
@ -434,15 +453,16 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c
return nil, err
}
return &initProcess{
cmd: cmd,
childPipe: childPipe,
parentPipe: parentPipe,
manager: c.cgroupManager,
config: c.newInitConfig(p),
container: c,
process: p,
bootstrapData: data,
sharePidns: sharePidns,
cmd: cmd,
childPipe: childPipe,
parentPipe: parentPipe,
manager: c.cgroupManager,
intelRdtManager: c.intelRdtManager,
config: c.newInitConfig(p),
container: c,
process: p,
bootstrapData: data,
sharePidns: sharePidns,
}, nil
}
@ -461,6 +481,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe,
return &setnsProcess{
cmd: cmd,
cgroupPaths: c.cgroupManager.GetPaths(),
intelRdtPath: state.IntelRdtPath,
childPipe: childPipe,
parentPipe: parentPipe,
config: c.newInitConfig(p),
@ -1519,6 +1540,10 @@ func (c *linuxContainer) currentState() (*State, error) {
startTime, _ = c.initProcess.startTime()
externalDescriptors = c.initProcess.externalDescriptors()
}
intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID())
if err != nil {
intelRdtPath = ""
}
state := &State{
BaseState: BaseState{
ID: c.ID(),
@ -1529,6 +1554,7 @@ func (c *linuxContainer) currentState() (*State, error) {
},
Rootless: c.config.Rootless,
CgroupPaths: c.cgroupManager.GetPaths(),
IntelRdtPath: intelRdtPath,
NamespacePaths: make(map[configs.NamespaceType]string),
ExternalDescriptors: externalDescriptors,
}

View File

@ -9,6 +9,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/system"
)
@ -19,6 +20,11 @@ type mockCgroupManager struct {
paths map[string]string
}
type mockIntelRdtManager struct {
stats *intelrdt.Stats
path string
}
func (m *mockCgroupManager) GetPids() ([]int, error) {
return m.pids, nil
}
@ -51,6 +57,26 @@ func (m *mockCgroupManager) Freeze(state configs.FreezerState) error {
return nil
}
func (m *mockIntelRdtManager) Apply(pid int) error {
return nil
}
func (m *mockIntelRdtManager) GetStats() (*intelrdt.Stats, error) {
return m.stats, nil
}
func (m *mockIntelRdtManager) Destroy() error {
return nil
}
func (m *mockIntelRdtManager) GetPath() string {
return m.path
}
func (m *mockIntelRdtManager) Set(container *configs.Config) error {
return nil
}
type mockProcess struct {
_pid int
started uint64
@ -118,6 +144,11 @@ func TestGetContainerStats(t *testing.T) {
},
},
},
intelRdtManager: &mockIntelRdtManager{
stats: &intelrdt.Stats{
L3CacheSchema: "L3:0=f;1=f0",
},
},
}
stats, err := container.Stats()
if err != nil {
@ -129,13 +160,22 @@ func TestGetContainerStats(t *testing.T) {
if stats.CgroupStats.MemoryStats.Usage.Usage != 1024 {
t.Fatalf("expected memory usage 1024 but recevied %d", stats.CgroupStats.MemoryStats.Usage.Usage)
}
if intelrdt.IsIntelRdtEnabled() {
if stats.IntelRdtStats == nil {
t.Fatal("intel rdt stats are nil")
}
if stats.IntelRdtStats.L3CacheSchema != "L3:0=f;1=f0" {
t.Fatalf("expected L3CacheSchema L3:0=f;1=f0 but recevied %s", stats.IntelRdtStats.L3CacheSchema)
}
}
}
func TestGetContainerState(t *testing.T) {
var (
pid = os.Getpid()
expectedMemoryPath = "/sys/fs/cgroup/memory/myid"
expectedNetworkPath = fmt.Sprintf("/proc/%d/ns/net", pid)
pid = os.Getpid()
expectedMemoryPath = "/sys/fs/cgroup/memory/myid"
expectedNetworkPath = fmt.Sprintf("/proc/%d/ns/net", pid)
expectedIntelRdtPath = "/sys/fs/resctrl/myid"
)
container := &linuxContainer{
id: "myid",
@ -166,6 +206,12 @@ func TestGetContainerState(t *testing.T) {
"memory": expectedMemoryPath,
},
},
intelRdtManager: &mockIntelRdtManager{
stats: &intelrdt.Stats{
L3CacheSchema: "L3:0=f0;1=f",
},
path: expectedIntelRdtPath,
},
}
container.state = &createdState{c: container}
state, err := container.State()
@ -185,6 +231,15 @@ func TestGetContainerState(t *testing.T) {
if memPath := paths["memory"]; memPath != expectedMemoryPath {
t.Fatalf("expected memory path %q but received %q", expectedMemoryPath, memPath)
}
if intelrdt.IsIntelRdtEnabled() {
intelRdtPath := state.IntelRdtPath
if intelRdtPath == "" {
t.Fatal("intel rdt path should not be empty")
}
if intelRdtPath != expectedIntelRdtPath {
t.Fatalf("expected intel rdt path %q but received %q", expectedIntelRdtPath, intelRdtPath)
}
}
for _, ns := range container.config.Namespaces {
path := state.NamespacePaths[ns.Type]
if path == "" {

View File

@ -18,6 +18,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/utils"
"golang.org/x/sys/unix"
@ -86,6 +87,20 @@ func RootlessCgroups(l *LinuxFactory) error {
return nil
}
// IntelRdtfs is an options func to configure a LinuxFactory to return
// containers that use the Intel RDT "resource control" filesystem to
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
func IntelRdtFs(l *LinuxFactory) error {
l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager {
return &intelrdt.IntelRdtManager{
Config: config,
Id: id,
Path: path,
}
}
return nil
}
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
func TmpfsRoot(l *LinuxFactory) error {
mounted, err := mount.Mounted(l.Root)
@ -150,6 +165,9 @@ type LinuxFactory struct {
// NewCgroupsManager returns an initialized cgroups manager for a single container.
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
// NewIntelRdtManager returns an initialized Intel RDT manager for a single container.
NewIntelRdtManager func(config *configs.Config, id string, path string) intelrdt.Manager
}
func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
@ -185,6 +203,10 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
}
c.intelRdtManager = nil
if intelrdt.IsIntelRdtEnabled() && c.config.IntelRdt != nil {
c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
}
c.state = &stoppedState{c: c}
return c, nil
}
@ -222,6 +244,10 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
if err := c.refreshState(); err != nil {
return nil, err
}
c.intelRdtManager = nil
if intelrdt.IsIntelRdtEnabled() && c.config.IntelRdt != nil {
c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
}
return c, nil
}

View File

@ -50,6 +50,32 @@ func TestFactoryNew(t *testing.T) {
}
}
func TestFactoryNewIntelRdt(t *testing.T) {
root, rerr := newTestRoot()
if rerr != nil {
t.Fatal(rerr)
}
defer os.RemoveAll(root)
factory, err := New(root, Cgroupfs, IntelRdtFs)
if err != nil {
t.Fatal(err)
}
if factory == nil {
t.Fatal("factory should not be nil")
}
lfactory, ok := factory.(*LinuxFactory)
if !ok {
t.Fatal("expected linux factory returned on linux based systems")
}
if lfactory.Root != root {
t.Fatalf("expected factory root to be %q but received %q", root, lfactory.Root)
}
if factory.Type() != "libcontainer" {
t.Fatalf("unexpected factory type: %q, expected %q", factory.Type(), "libcontainer")
}
}
func TestFactoryNewTmpfs(t *testing.T) {
root, rerr := newTestRoot()
if rerr != nil {
@ -164,7 +190,7 @@ func TestFactoryLoadContainer(t *testing.T) {
if err := marshal(filepath.Join(root, id, stateFilename), expectedState); err != nil {
t.Fatal(err)
}
factory, err := New(root, Cgroupfs)
factory, err := New(root, Cgroupfs, IntelRdtFs)
if err != nil {
t.Fatal(err)
}

View File

@ -0,0 +1,545 @@
// +build linux
package intelrdt
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"github.com/opencontainers/runc/libcontainer/configs"
)
/*
* About Intel RDT/CAT feature:
* Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
* Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3
* Cache is the only resource that is supported in RDT.
*
* This feature provides a way for the software to restrict cache allocation to a
* defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
* The different subsets are identified by class of service (CLOS) and each CLOS
* has a capacity bitmask (CBM).
*
* For more information about Intel RDT/CAT can be found in the section 17.17
* of Intel Software Developer Manual.
*
* About Intel RDT/CAT kernel interface:
* In Linux 4.10 kernel or newer, the interface is defined and exposed via
* "resource control" filesystem, which is a "cgroup-like" interface.
*
* Comparing with cgroups, it has similar process management lifecycle and
* interfaces in a container. But unlike cgroups' hierarchy, it has single level
* filesystem layout.
*
* Intel RDT "resource control" filesystem hierarchy:
* mount -t resctrl resctrl /sys/fs/resctrl
* tree /sys/fs/resctrl
* /sys/fs/resctrl/
* |-- info
* | |-- L3
* | |-- cbm_mask
* | |-- min_cbm_bits
* | |-- num_closids
* |-- cpus
* |-- schemata
* |-- tasks
* |-- <container_id>
* |-- cpus
* |-- schemata
* |-- tasks
*
* For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
* resource constraints.
*
* The file `tasks` has a list of tasks that belongs to this group (e.g.,
* <container_id>" group). Tasks can be added to a group by writing the task ID
* to the "tasks" file (which will automatically remove them from the previous
* group to which they belonged). New tasks created by fork(2) and clone(2) are
* added to the same group as their parent. If a pid is not in any sub group, it is
* in root group.
*
* The file `schemata` has allocation bitmasks/values for L3 cache on each socket,
* which contains L3 cache id and capacity bitmask (CBM).
* Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
* For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
* which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
*
* The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
* be set is less than the max bit. The max bits in the CBM is varied among
* supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
* layout, the CBM in a group should be a subset of the CBM in root. Kernel will
* check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
* of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
* values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
*
* For more information about Intel RDT/CAT kernel interface:
* https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
*
* An example for runc:
* Consider a two-socket machine with two L3 caches where the default CBM is
* 0xfffff and the max CBM length is 20 bits. With this configuration, tasks
* inside the container only have access to the "upper" 80% of L3 cache id 0 and
* the "lower" 50% L3 cache id 1:
*
* "linux": {
* "intelRdt": {
* "l3CacheSchema": "L3:0=ffff0;1=3ff"
* }
* }
*/
type Manager interface {
// Applies Intel RDT configuration to the process with the specified pid
Apply(pid int) error
// Returns statistics for Intel RDT
GetStats() (*Stats, error)
// Destroys the Intel RDT 'container_id' group
Destroy() error
// Returns Intel RDT path to save in a state file and to be able to
// restore the object later
GetPath() string
// Set Intel RDT "resource control" filesystem as configured.
Set(container *configs.Config) error
}
// This implements interface Manager
type IntelRdtManager struct {
mu sync.Mutex
Config *configs.Config
Id string
Path string
}
const (
IntelRdtTasks = "tasks"
)
var (
// The absolute root path of the Intel RDT "resource control" filesystem
intelRdtRoot string
intelRdtRootLock sync.Mutex
// The flag to indicate if Intel RDT is supported
isIntelRdtEnabled bool
)
type intelRdtData struct {
root string
config *configs.Config
pid int
}
// Return the mount point path of Intel RDT "resource control" filesysem
func findIntelRdtMountpointDir() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
defer f.Close()
s := bufio.NewScanner(f)
for s.Scan() {
text := s.Text()
fields := strings.Split(text, " ")
// Safe as mountinfo encodes mountpoints with spaces as \040.
index := strings.Index(text, " - ")
postSeparatorFields := strings.Fields(text[index+3:])
numPostFields := len(postSeparatorFields)
// This is an error as we can't detect if the mount is for "Intel RDT"
if numPostFields == 0 {
return "", fmt.Errorf("Found no fields post '-' in %q", text)
}
if postSeparatorFields[0] == "resctrl" {
// Check that the mount is properly formated.
if numPostFields < 3 {
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
}
return fields[4], nil
}
}
if err := s.Err(); err != nil {
return "", err
}
return "", NewNotFoundError("Intel RDT")
}
// Gets the root path of Intel RDT "resource control" filesystem
func getIntelRdtRoot() (string, error) {
intelRdtRootLock.Lock()
defer intelRdtRootLock.Unlock()
if intelRdtRoot != "" {
return intelRdtRoot, nil
}
root, err := findIntelRdtMountpointDir()
if err != nil {
return "", err
}
if _, err := os.Stat(root); err != nil {
return "", err
}
intelRdtRoot = root
return intelRdtRoot, nil
}
func isIntelRdtMounted() bool {
_, err := getIntelRdtRoot()
if err != nil {
return false
}
return true
}
func parseCpuInfoFile(path string) (bool, error) {
f, err := os.Open(path)
if err != nil {
return false, err
}
defer f.Close()
s := bufio.NewScanner(f)
for s.Scan() {
if err := s.Err(); err != nil {
return false, err
}
text := s.Text()
flags := strings.Split(text, " ")
// "cat_l3" flag is set if Intel RDT/CAT is supported
for _, flag := range flags {
if flag == "cat_l3" {
return true, nil
}
}
}
return false, nil
}
func parseUint(s string, base, bitSize int) (uint64, error) {
value, err := strconv.ParseUint(s, base, bitSize)
if err != nil {
intValue, intErr := strconv.ParseInt(s, base, bitSize)
// 1. Handle negative values greater than MinInt64 (and)
// 2. Handle negative values lesser than MinInt64
if intErr == nil && intValue < 0 {
return 0, nil
} else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
return 0, nil
}
return value, err
}
return value, nil
}
// Gets a single uint64 value from the specified file.
func getIntelRdtParamUint(path, file string) (uint64, error) {
fileName := filepath.Join(path, file)
contents, err := ioutil.ReadFile(fileName)
if err != nil {
return 0, err
}
res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64)
if err != nil {
return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName)
}
return res, nil
}
// Gets a string value from the specified file
func getIntelRdtParamString(path, file string) (string, error) {
contents, err := ioutil.ReadFile(filepath.Join(path, file))
if err != nil {
return "", err
}
return strings.TrimSpace(string(contents)), nil
}
func readTasksFile(dir string) ([]int, error) {
f, err := os.Open(filepath.Join(dir, IntelRdtTasks))
if err != nil {
return nil, err
}
defer f.Close()
var (
s = bufio.NewScanner(f)
out = []int{}
)
for s.Scan() {
if t := s.Text(); t != "" {
pid, err := strconv.Atoi(t)
if err != nil {
return nil, err
}
out = append(out, pid)
}
}
return out, nil
}
func writeFile(dir, file, data string) error {
if dir == "" {
return fmt.Errorf("no such directory for %s", file)
}
if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
}
return nil
}
func getIntelRdtData(c *configs.Config, pid int) (*intelRdtData, error) {
rootPath, err := getIntelRdtRoot()
if err != nil {
return nil, err
}
return &intelRdtData{
root: rootPath,
config: c,
pid: pid,
}, nil
}
// Get the read-only L3 cache information
func getL3CacheInfo() (*L3CacheInfo, error) {
l3CacheInfo := &L3CacheInfo{}
rootPath, err := getIntelRdtRoot()
if err != nil {
return l3CacheInfo, err
}
path := filepath.Join(rootPath, "info", "L3")
cbmMask, err := getIntelRdtParamString(path, "cbm_mask")
if err != nil {
return l3CacheInfo, err
}
minCbmBits, err := getIntelRdtParamUint(path, "min_cbm_bits")
if err != nil {
return l3CacheInfo, err
}
numClosids, err := getIntelRdtParamUint(path, "num_closids")
if err != nil {
return l3CacheInfo, err
}
l3CacheInfo.CbmMask = cbmMask
l3CacheInfo.MinCbmBits = minCbmBits
l3CacheInfo.NumClosids = numClosids
return l3CacheInfo, nil
}
// WriteIntelRdtTasks writes the specified pid into the "tasks" file
func WriteIntelRdtTasks(dir string, pid int) error {
if dir == "" {
return fmt.Errorf("no such directory for %s", IntelRdtTasks)
}
// Dont attach any pid if -1 is specified as a pid
if pid != -1 {
if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err)
}
}
return nil
}
// Check if Intel RDT is enabled
func IsIntelRdtEnabled() bool {
// We have checked the flag before
if isIntelRdtEnabled {
return true
}
// 1. Check if hardware and kernel support Intel RDT/CAT feature
// "cat_l3" flag is set if supported
isFlagSet, err := parseCpuInfoFile("/proc/cpuinfo")
if !isFlagSet || err != nil {
isIntelRdtEnabled = false
return false
}
// 2. Check if Intel RDT "resource control" filesystem is mounted
// The user guarantees to mount the filesystem
isIntelRdtEnabled = isIntelRdtMounted()
return isIntelRdtEnabled
}
// Get the 'container_id' path in Intel RDT "resource control" filesystem
func GetIntelRdtPath(id string) (string, error) {
rootPath, err := getIntelRdtRoot()
if err != nil {
return "", err
}
path := filepath.Join(rootPath, id)
return path, nil
}
// Applies Intel RDT configuration to the process with the specified pid
func (m *IntelRdtManager) Apply(pid int) (err error) {
d, err := getIntelRdtData(m.Config, pid)
if err != nil && !IsNotFound(err) {
return err
}
m.mu.Lock()
defer m.mu.Unlock()
path, err := d.join(m.Id)
if err != nil {
return err
}
m.Path = path
return nil
}
// Destroys the Intel RDT 'container_id' group
func (m *IntelRdtManager) Destroy() error {
m.mu.Lock()
defer m.mu.Unlock()
if err := os.RemoveAll(m.Path); err != nil {
return err
}
m.Path = ""
return nil
}
// Returns Intel RDT path to save in a state file and to be able to
// restore the object later
func (m *IntelRdtManager) GetPath() string {
if m.Path == "" {
m.Path, _ = GetIntelRdtPath(m.Id)
}
return m.Path
}
// Returns statistics for Intel RDT
func (m *IntelRdtManager) GetStats() (*Stats, error) {
m.mu.Lock()
defer m.mu.Unlock()
stats := NewStats()
// The read-only L3 cache information
l3CacheInfo, err := getL3CacheInfo()
if err != nil {
return nil, err
}
stats.L3CacheInfo = l3CacheInfo
// The read-only L3 cache schema in root
rootPath, err := getIntelRdtRoot()
if err != nil {
return nil, err
}
tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata")
if err != nil {
return nil, err
}
// L3 cache schema is in the first line
schemaRootStrings := strings.Split(tmpRootStrings, "\n")
stats.L3CacheSchemaRoot = schemaRootStrings[0]
// The L3 cache schema in 'container_id' group
tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata")
if err != nil {
return nil, err
}
// L3 cache schema is in the first line
schemaStrings := strings.Split(tmpStrings, "\n")
stats.L3CacheSchema = schemaStrings[0]
return stats, nil
}
// Set Intel RDT "resource control" filesystem as configured.
func (m *IntelRdtManager) Set(container *configs.Config) error {
path := m.GetPath()
// About L3 cache schema file:
// The schema has allocation masks/values for L3 cache on each socket,
// which contains L3 cache id and capacity bitmask (CBM).
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
// For example, on a two-socket machine, L3's schema line could be:
// L3:0=ff;1=c0
// Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
//
// About L3 cache CBM validity:
// The valid L3 cache CBM is a *contiguous bits set* and number of
// bits that can be set is less than the max bit. The max bits in the
// CBM is varied among supported Intel Xeon platforms. In Intel RDT
// "resource control" filesystem layout, the CBM in a group should
// be a subset of the CBM in root. Kernel will check if it is valid
// when writing.
// e.g., 0xfffff in root indicates the max bits of CBM is 20 bits,
// which mapping to entire L3 cache capacity. Some valid CBM values
// to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
if container.IntelRdt != nil {
l3CacheSchema := container.IntelRdt.L3CacheSchema
if l3CacheSchema != "" {
if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
return err
}
}
}
return nil
}
func (raw *intelRdtData) join(id string) (string, error) {
path := filepath.Join(raw.root, id)
if err := os.MkdirAll(path, 0755); err != nil {
return "", err
}
if err := WriteIntelRdtTasks(path, raw.pid); err != nil {
return "", err
}
return path, nil
}
type NotFoundError struct {
ResourceControl string
}
func (e *NotFoundError) Error() string {
return fmt.Sprintf("mountpoint for %s not found", e.ResourceControl)
}
func NewNotFoundError(res string) error {
return &NotFoundError{
ResourceControl: res,
}
}
func IsNotFound(err error) bool {
if err == nil {
return false
}
_, ok := err.(*NotFoundError)
return ok
}

View File

@ -0,0 +1,46 @@
// +build linux
package intelrdt
import (
"strings"
"testing"
)
func TestIntelRdtSetL3CacheSchema(t *testing.T) {
if !IsIntelRdtEnabled() {
return
}
helper := NewIntelRdtTestUtil(t)
defer helper.cleanup()
const (
l3CacheSchemaBefore = "L3:0=f;1=f0"
l3CacheSchemeAfter = "L3:0=f0;1=f"
)
helper.writeFileContents(map[string]string{
"schemata": l3CacheSchemaBefore + "\n",
})
helper.IntelRdtData.config.IntelRdt.L3CacheSchema = l3CacheSchemeAfter
intelrdt := &IntelRdtManager{
Config: helper.IntelRdtData.config,
Path: helper.IntelRdtPath,
}
if err := intelrdt.Set(helper.IntelRdtData.config); err != nil {
t.Fatal(err)
}
tmpStrings, err := getIntelRdtParamString(helper.IntelRdtPath, "schemata")
if err != nil {
t.Fatalf("Failed to parse file 'schemata' - %s", err)
}
values := strings.Split(tmpStrings, "\n")
value := values[0]
if value != l3CacheSchemeAfter {
t.Fatal("Got the wrong value, set 'schemata' failed.")
}
}

View File

@ -0,0 +1,24 @@
// +build linux
package intelrdt
type L3CacheInfo struct {
CbmMask string `json:"cbm_mask,omitempty"`
MinCbmBits uint64 `json:"min_cbm_bits,omitempty"`
NumClosids uint64 `json:"num_closids,omitempty"`
}
type Stats struct {
// The read-only L3 cache information
L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"`
// The read-only L3 cache schema in root
L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"`
// The L3 cache schema in 'container_id' group
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
}
func NewStats() *Stats {
return &Stats{}
}

View File

@ -0,0 +1,67 @@
// +build linux
/*
* Utility for testing Intel RDT operations.
* Creates a mock of the Intel RDT "resource control" filesystem for the duration of the test.
*/
package intelrdt
import (
"io/ioutil"
"os"
"path/filepath"
"testing"
"github.com/opencontainers/runc/libcontainer/configs"
)
type intelRdtTestUtil struct {
// intelRdt data to use in tests
IntelRdtData *intelRdtData
// Path to the mock Intel RDT "resource control" filesystem directory
IntelRdtPath string
// Temporary directory to store mock Intel RDT "resource control" filesystem
tempDir string
t *testing.T
}
// Creates a new test util
func NewIntelRdtTestUtil(t *testing.T) *intelRdtTestUtil {
d := &intelRdtData{
config: &configs.Config{
IntelRdt: &configs.IntelRdt{},
},
}
tempDir, err := ioutil.TempDir("", "intelrdt_test")
if err != nil {
t.Fatal(err)
}
d.root = tempDir
testIntelRdtPath := filepath.Join(d.root, "resctrl")
if err != nil {
t.Fatal(err)
}
// Ensure the full mock Intel RDT "resource control" filesystem path exists
err = os.MkdirAll(testIntelRdtPath, 0755)
if err != nil {
t.Fatal(err)
}
return &intelRdtTestUtil{IntelRdtData: d, IntelRdtPath: testIntelRdtPath, tempDir: tempDir, t: t}
}
func (c *intelRdtTestUtil) cleanup() {
os.RemoveAll(c.tempDir)
}
// Write the specified contents on the mock of the specified Intel RDT "resource control" files
func (c *intelRdtTestUtil) writeFileContents(fileContents map[string]string) {
for file, contents := range fileContents {
err := writeFile(c.IntelRdtPath, file, contents)
if err != nil {
c.t.Fatal(err)
}
}
}

View File

@ -15,6 +15,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
@ -49,6 +50,7 @@ type setnsProcess struct {
parentPipe *os.File
childPipe *os.File
cgroupPaths map[string]string
intelRdtPath string
config *initConfig
fds []string
process *Process
@ -89,6 +91,15 @@ func (p *setnsProcess) start() (err error) {
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
}
}
if p.intelRdtPath != "" {
// if Intel RDT "resource control" filesystem path exists
_, err := os.Stat(p.intelRdtPath)
if err == nil {
if err := intelrdt.WriteIntelRdtTasks(p.intelRdtPath, p.pid()); err != nil {
return newSystemErrorWithCausef(err, "adding pid %d to Intel RDT resource control filesystem", p.pid())
}
}
}
// set rlimits, this has to be done here because we lose permissions
// to raise the limits once we enter a user-namespace
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
@ -193,16 +204,17 @@ func (p *setnsProcess) setExternalDescriptors(newFds []string) {
}
type initProcess struct {
cmd *exec.Cmd
parentPipe *os.File
childPipe *os.File
config *initConfig
manager cgroups.Manager
container *linuxContainer
fds []string
process *Process
bootstrapData io.Reader
sharePidns bool
cmd *exec.Cmd
parentPipe *os.File
childPipe *os.File
config *initConfig
manager cgroups.Manager
intelRdtManager intelrdt.Manager
container *linuxContainer
fds []string
process *Process
bootstrapData io.Reader
sharePidns bool
}
func (p *initProcess) pid() int {
@ -281,10 +293,18 @@ func (p *initProcess) start() error {
if err := p.manager.Apply(p.pid()); err != nil {
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
}
if p.intelRdtManager != nil {
if err := p.intelRdtManager.Apply(p.pid()); err != nil {
return newSystemErrorWithCause(err, "applying Intel RDT configuration for process")
}
}
defer func() {
if err != nil {
// TODO: should not be the responsibility to call here
p.manager.Destroy()
if p.intelRdtManager != nil {
p.intelRdtManager.Destroy()
}
}
}()
if err := p.createNetworkInterfaces(); err != nil {
@ -312,6 +332,11 @@ func (p *initProcess) start() error {
if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
}
if p.intelRdtManager != nil {
if err := p.intelRdtManager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting Intel RDT config for ready process")
}
}
if p.config.Config.Hooks != nil {
s := configs.HookState{
@ -337,6 +362,11 @@ func (p *initProcess) start() error {
if err := p.manager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting cgroup config for procHooks process")
}
if p.intelRdtManager != nil {
if err := p.intelRdtManager.Set(p.config.Config); err != nil {
return newSystemErrorWithCause(err, "setting Intel RDT config for procHooks process")
}
}
if p.config.Config.Hooks != nil {
s := configs.HookState{
Version: p.container.config.Version,

View File

@ -250,6 +250,12 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
}
createHooks(spec, config)
config.Version = specs.Version
if spec.Linux.IntelRdt != nil {
config.IntelRdt = &configs.IntelRdt{}
if spec.Linux.IntelRdt.L3CacheSchema != "" {
config.IntelRdt.L3CacheSchema = spec.Linux.IntelRdt.L3CacheSchema
}
}
return config, nil
}

View File

@ -45,6 +45,11 @@ func destroy(c *linuxContainer) error {
}
}
err := c.cgroupManager.Destroy()
if c.intelRdtManager != nil {
if ierr := c.intelRdtManager.Destroy(); err == nil {
err = ierr
}
}
if rerr := os.RemoveAll(c.root); err == nil {
err = rerr
}

View File

@ -1,8 +1,10 @@
package libcontainer
import "github.com/opencontainers/runc/libcontainer/cgroups"
import "github.com/opencontainers/runc/libcontainer/intelrdt"
type Stats struct {
Interfaces []*NetworkInterface
CgroupStats *cgroups.Stats
Interfaces []*NetworkInterface
CgroupStats *cgroups.Stats
IntelRdtStats *intelrdt.Stats
}

View File

@ -13,6 +13,7 @@ import (
"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/intelrdt"
"github.com/opencontainers/runc/libcontainer/specconv"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/opencontainers/runtime-spec/specs-go"
@ -42,6 +43,10 @@ func loadFactory(context *cli.Context) (libcontainer.Factory, error) {
return nil, fmt.Errorf("systemd cgroup flag passed, but systemd support for managing cgroups is not available")
}
}
if intelrdt.IsIntelRdtEnabled() {
intelRdtManager := libcontainer.IntelRdtFs
return libcontainer.New(abs, cgroupManager, intelRdtManager, libcontainer.CriuPath(context.GlobalString("criu")))
}
return libcontainer.New(abs, cgroupManager, libcontainer.CriuPath(context.GlobalString("criu")))
}