type Manager interface { // Applies cgroup configuration to the process with the specified pid Apply(pid int) error
// Returns the PIDs inside the cgroup set GetPids() ([]int, error)
// Returns the PIDs inside the cgroup set & all sub-cgroups GetAllPids() ([]int, error)
// Returns statistics for the cgroup set GetStats() (*Stats, error)
// Toggles the freezer cgroup according with specified state Freeze(state configs.FreezerState) error
// Destroys the cgroup set Destroy() error
// Path returns a cgroup path to the specified controller/subsystem. // For cgroupv2, the argument is unused and can be empty. Path(string) string
// Sets the cgroup as configured. Set(container *configs.Config) error
// GetPaths returns cgroup path(s) to save in a state file in order to restore later. // // For cgroup v1, a key is cgroup subsystem name, and the value is the path // to the cgroup for this subsystem. // // For cgroup v2 unified hierarchy, a key is "", and the value is the unified path. GetPaths() map[string]string
// GetCgroups returns the cgroup data as configured. GetCgroups() (*configs.Cgroup, error)
// GetFreezerState retrieves the current FreezerState of the cgroup. GetFreezerState() (configs.FreezerState, error)
// Whether the cgroup path exists or not Exists() bool }
cgroup v1
v1支持直接管理和通过systemd管理两种方式,两种方式的家大体结构是一致的;
graph TB
O(manager) --> A[subsystemSet]
A --> CpusetGroup -.-> B(subsystem)
A --> DevicesGroup -.-> B
A --> MemoryGroup -.-> B
A --> CpuGroup -.-> B
A --> CpuacctGroup -.-> B
A --> PidsGroup -.-> B
A --> BlkioGroup -.-> B
A --> HugetlbGroup -.-> B
A --> NetClsGroup -.-> B
A --> NetPrioGroup -.-> B
A --> PerfEventGroup -.-> B
A --> FreezerGroup -.-> B
A --> NameGroup -.-> B
直接管理
manager实现统一Manager的接口,具体定义如下:
1 2 3 4 5 6
type manager struct { mu sync.Mutex cgroups *configs.Cgroup // cgroup的配置 rootless bool// ignore permission-related errors paths map[string]string// 存储cgroup各子系统的路径,以子系统名为key(Apply是初始化) }
type subsystem interface { // Name returns the name of the subsystem. Name() string // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. GetStats(path string, stats *cgroups.Stats) error // Removes the cgroup represented by 'cgroupData'. Remove(*cgroupData) error // Creates and joins the cgroup represented by 'cgroupData'. Apply(*cgroupData) error // Set the cgroup represented by cgroup. Set(path string, cgroup *configs.Cgroup) error }
func(m *manager)Apply(pid int)(err error) { if m.cgroups == nil { returnnil } m.mu.Lock() defer m.mu.Unlock() var c = m.cgroups d, err := getCgroupData(m.cgroups, pid) if err != nil { return err } m.paths = make(map[string]string) if c.Paths != nil { // 容器已配置各子系统所在的路径 for name, path := range c.Paths { _, err := d.path(name) if err != nil { if cgroups.IsNotFound(err) { continue } return err } m.paths[name] = path } // 把pid加入到配置的cgroup的子系统 return cgroups.EnterPid(m.paths, pid) } // 依次把pid加入到系统支持的cgroup子系统 for _, sys := range m.getSubsystems() { p, err := d.path(sys.Name()) if err != nil { // The non-presence of the devices subsystem is // considered fatal for security reasons. if cgroups.IsNotFound(err) && sys.Name() != "devices" { continue } return err } m.paths[sys.Name()] = p
// 调用subsystem的Apply接口,依赖各子系统的实现(cpuset为例分析) if err := sys.Apply(d); err != nil { // In the case of rootless (including euid=0 in userns), where an // explicit cgroup path hasn't been set, we don't bail on error in // case of permission problems. Cases where limits have been set // (and we couldn't create our own cgroup) are handled by Set. if isIgnorableError(m.rootless, err) && m.cgroups.Path == "" { delete(m.paths, sys.Name()) continue } return err }
func(s *CpusetGroup)ApplyDir(dir string, cgroup *configs.Cgroup, pid int)error { // This might happen if we have no cpuset cgroup mounted. // Just do nothing and don't fail. if dir == "" { returnnil } //获取挂载点路径 root, err := getMount(dir) if err != nil { return err } root = filepath.Dir(root) // 'ensureParent' start with parent because we don't want to // explicitly inherit from parent, it could conflict with // 'cpuset.cpu_exclusive'. if err := s.ensureParent(filepath.Dir(dir), root); err != nil { return err } if err := os.MkdirAll(dir, 0755); err != nil { return err } // We didn't inherit cpuset configs from parent, but we have // to ensure cpuset configs are set before moving task into the // cgroup. // The logic is, if user specified cpuset configs, use these // specified configs, otherwise, inherit from parent. This makes // cpuset configs work correctly with 'cpuset.cpu_exclusive', and // keep backward compatibility. if err := s.ensureCpusAndMems(dir, cgroup); err != nil { return err } // because we are not using d.join we need to place the pid into the procs file // unlike the other subsystems // 把pid加入cgroup的子系统 return cgroups.WriteCgroupProc(dir, pid) }
type legacyManager struct { mu sync.Mutex cgroups *configs.Cgroup paths map[string]string }
subsystem接口
1 2 3 4 5 6 7 8
type subsystem interface { // Name returns the name of the subsystem. Name() string // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. GetStats(path string, stats *cgroups.Stats) error // Set the cgroup represented by cgroup. Set(path string, cgroup *configs.Cgroup) error }
// if we create a slice, the parent is defined via a Wants= if strings.HasSuffix(unitName, ".slice") { properties = append(properties, systemdDbus.PropWants(slice)) } else { // otherwise, we use Slice= properties = append(properties, systemdDbus.PropSlice(slice)) }
// only add pid if its valid, -1 is used w/ general slice creation. if pid != -1 { properties = append(properties, newProp("PIDs", []uint32{uint32(pid)})) }
// Check if we can delegate. This is only supported on systemd versions 218 and above. if !strings.HasSuffix(unitName, ".slice") { // Assume scopes always support delegation. properties = append(properties, newProp("Delegate", true)) }
// Always enable accounting, this gets us the same behaviour as the fs implementation, // plus the kernel has some problems with joining the memory cgroup at a later time. properties = append(properties, newProp("MemoryAccounting", true), newProp("CPUAccounting", true), newProp("BlockIOAccounting", true))
// Assume DefaultDependencies= will always work (the check for it was previously broken.) properties = append(properties, newProp("DefaultDependencies", false))
// We have to set kernel memory here, as we can't change it once // processes have been attached to the cgroup. if c.Resources.KernelMemory != 0 { if err := enableKmem(c); err != nil { return err } }
// 记录cgroup子系统的路径 paths := make(map[string]string) for _, s := range legacySubsystems { subsystemPath, err := getSubsystemPath(m.cgroups, s.Name()) if err != nil { // Don't fail if a cgroup hierarchy was not found, just skip this subsystem if cgroups.IsNotFound(err) { continue } return err } paths[s.Name()] = subsystemPath } m.paths = paths returnnil }