forked from mirrors/gotosocial
acc333c40b
When GTS is running in a container runtime which has configured CPU or memory limits or under an init system that uses cgroups to impose CPU and memory limits the values the Go runtime sees for GOMAXPROCS and GOMEMLIMIT are still based on the host resources, not the cgroup. At least for the throttling middlewares which use GOMAXPROCS to configure their queue size, this can result in GTS running with values too big compared to the resources that will actuall be available to it. This introduces 2 dependencies which can pick up resource contraints from the current cgroup and tune the Go runtime accordingly. This should result in the different queues being appropriately sized and in general more predictable performance. These dependencies are a no-op on non-Linux systems or if running in a cgroup that doesn't set a limit on CPU or memory. The automatic tuning of GOMEMLIMIT can be disabled by either explicitly setting GOMEMLIMIT yourself or by setting AUTOMEMLIMIT=off. The automatic tuning of GOMAXPROCS can similarly be counteracted by setting GOMAXPROCS yourself.
436 lines
10 KiB
Go
436 lines
10 KiB
Go
/*
|
|
Copyright The containerd Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package v2
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"math"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/containerd/cgroups/v2/stats"
|
|
|
|
"github.com/godbus/dbus/v5"
|
|
"github.com/opencontainers/runtime-spec/specs-go"
|
|
"github.com/sirupsen/logrus"
|
|
)
|
|
|
|
const (
|
|
cgroupProcs = "cgroup.procs"
|
|
defaultDirPerm = 0755
|
|
)
|
|
|
|
// defaultFilePerm is a var so that the test framework can change the filemode
|
|
// of all files created when the tests are running. The difference between the
|
|
// tests and real world use is that files like "cgroup.procs" will exist when writing
|
|
// to a read cgroup filesystem and do not exist prior when running in the tests.
|
|
// this is set to a non 0 value in the test code
|
|
var defaultFilePerm = os.FileMode(0)
|
|
|
|
// remove will remove a cgroup path handling EAGAIN and EBUSY errors and
|
|
// retrying the remove after a exp timeout
|
|
func remove(path string) error {
|
|
var err error
|
|
delay := 10 * time.Millisecond
|
|
for i := 0; i < 5; i++ {
|
|
if i != 0 {
|
|
time.Sleep(delay)
|
|
delay *= 2
|
|
}
|
|
if err = os.RemoveAll(path); err == nil {
|
|
return nil
|
|
}
|
|
}
|
|
return fmt.Errorf("cgroups: unable to remove path %q: %w", path, err)
|
|
}
|
|
|
|
// parseCgroupProcsFile parses /sys/fs/cgroup/$GROUPPATH/cgroup.procs
|
|
func parseCgroupProcsFile(path string) ([]uint64, error) {
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer f.Close()
|
|
var (
|
|
out []uint64
|
|
s = bufio.NewScanner(f)
|
|
)
|
|
for s.Scan() {
|
|
if t := s.Text(); t != "" {
|
|
pid, err := strconv.ParseUint(t, 10, 0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
out = append(out, pid)
|
|
}
|
|
}
|
|
if err := s.Err(); err != nil {
|
|
return nil, err
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
func parseKV(raw string) (string, interface{}, error) {
|
|
parts := strings.Fields(raw)
|
|
switch len(parts) {
|
|
case 2:
|
|
v, err := parseUint(parts[1], 10, 64)
|
|
if err != nil {
|
|
// if we cannot parse as a uint, parse as a string
|
|
return parts[0], parts[1], nil
|
|
}
|
|
return parts[0], v, nil
|
|
default:
|
|
return "", 0, ErrInvalidFormat
|
|
}
|
|
}
|
|
|
|
func parseUint(s string, base, bitSize int) (uint64, error) {
|
|
v, err := strconv.ParseUint(s, base, bitSize)
|
|
if err != nil {
|
|
intValue, intErr := strconv.ParseInt(s, base, bitSize)
|
|
// 1. Handle negative values greater than MinInt64 (and)
|
|
// 2. Handle negative values lesser than MinInt64
|
|
if intErr == nil && intValue < 0 {
|
|
return 0, nil
|
|
} else if intErr != nil &&
|
|
intErr.(*strconv.NumError).Err == strconv.ErrRange &&
|
|
intValue < 0 {
|
|
return 0, nil
|
|
}
|
|
return 0, err
|
|
}
|
|
return v, nil
|
|
}
|
|
|
|
// parseCgroupFile parses /proc/PID/cgroup file and return string
|
|
func parseCgroupFile(path string) (string, error) {
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer f.Close()
|
|
return parseCgroupFromReader(f)
|
|
}
|
|
|
|
func parseCgroupFromReader(r io.Reader) (string, error) {
|
|
var (
|
|
s = bufio.NewScanner(r)
|
|
)
|
|
for s.Scan() {
|
|
var (
|
|
text = s.Text()
|
|
parts = strings.SplitN(text, ":", 3)
|
|
)
|
|
if len(parts) < 3 {
|
|
return "", fmt.Errorf("invalid cgroup entry: %q", text)
|
|
}
|
|
// text is like "0::/user.slice/user-1001.slice/session-1.scope"
|
|
if parts[0] == "0" && parts[1] == "" {
|
|
return parts[2], nil
|
|
}
|
|
}
|
|
if err := s.Err(); err != nil {
|
|
return "", err
|
|
}
|
|
return "", fmt.Errorf("cgroup path not found")
|
|
}
|
|
|
|
// ToResources converts the oci LinuxResources struct into a
|
|
// v2 Resources type for use with this package.
|
|
//
|
|
// converting cgroups configuration from v1 to v2
|
|
// ref: https://github.com/containers/crun/blob/master/crun.1.md#cgroup-v2
|
|
func ToResources(spec *specs.LinuxResources) *Resources {
|
|
var resources Resources
|
|
if cpu := spec.CPU; cpu != nil {
|
|
resources.CPU = &CPU{
|
|
Cpus: cpu.Cpus,
|
|
Mems: cpu.Mems,
|
|
}
|
|
if shares := cpu.Shares; shares != nil {
|
|
convertedWeight := 1 + ((*shares-2)*9999)/262142
|
|
resources.CPU.Weight = &convertedWeight
|
|
}
|
|
if period := cpu.Period; period != nil {
|
|
resources.CPU.Max = NewCPUMax(cpu.Quota, period)
|
|
}
|
|
}
|
|
if mem := spec.Memory; mem != nil {
|
|
resources.Memory = &Memory{}
|
|
if swap := mem.Swap; swap != nil {
|
|
resources.Memory.Swap = swap
|
|
}
|
|
if l := mem.Limit; l != nil {
|
|
resources.Memory.Max = l
|
|
}
|
|
if l := mem.Reservation; l != nil {
|
|
resources.Memory.Low = l
|
|
}
|
|
}
|
|
if hugetlbs := spec.HugepageLimits; hugetlbs != nil {
|
|
hugeTlbUsage := HugeTlb{}
|
|
for _, hugetlb := range hugetlbs {
|
|
hugeTlbUsage = append(hugeTlbUsage, HugeTlbEntry{
|
|
HugePageSize: hugetlb.Pagesize,
|
|
Limit: hugetlb.Limit,
|
|
})
|
|
}
|
|
resources.HugeTlb = &hugeTlbUsage
|
|
}
|
|
if pids := spec.Pids; pids != nil {
|
|
resources.Pids = &Pids{
|
|
Max: pids.Limit,
|
|
}
|
|
}
|
|
if i := spec.BlockIO; i != nil {
|
|
resources.IO = &IO{}
|
|
if i.Weight != nil {
|
|
resources.IO.BFQ.Weight = 1 + (*i.Weight-10)*9999/990
|
|
}
|
|
for t, devices := range map[IOType][]specs.LinuxThrottleDevice{
|
|
ReadBPS: i.ThrottleReadBpsDevice,
|
|
WriteBPS: i.ThrottleWriteBpsDevice,
|
|
ReadIOPS: i.ThrottleReadIOPSDevice,
|
|
WriteIOPS: i.ThrottleWriteIOPSDevice,
|
|
} {
|
|
for _, d := range devices {
|
|
resources.IO.Max = append(resources.IO.Max, Entry{
|
|
Type: t,
|
|
Major: d.Major,
|
|
Minor: d.Minor,
|
|
Rate: d.Rate,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
if i := spec.Rdma; i != nil {
|
|
resources.RDMA = &RDMA{}
|
|
for device, value := range spec.Rdma {
|
|
if device != "" && (value.HcaHandles != nil && value.HcaObjects != nil) {
|
|
resources.RDMA.Limit = append(resources.RDMA.Limit, RDMAEntry{
|
|
Device: device,
|
|
HcaHandles: *value.HcaHandles,
|
|
HcaObjects: *value.HcaObjects,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
return &resources
|
|
}
|
|
|
|
// Gets uint64 parsed content of single value cgroup stat file
|
|
func getStatFileContentUint64(filePath string) uint64 {
|
|
contents, err := ioutil.ReadFile(filePath)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
trimmed := strings.TrimSpace(string(contents))
|
|
if trimmed == "max" {
|
|
return math.MaxUint64
|
|
}
|
|
|
|
res, err := parseUint(trimmed, 10, 64)
|
|
if err != nil {
|
|
logrus.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), filePath)
|
|
return res
|
|
}
|
|
|
|
return res
|
|
}
|
|
|
|
func readIoStats(path string) []*stats.IOEntry {
|
|
// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
|
|
var usage []*stats.IOEntry
|
|
fpath := filepath.Join(path, "io.stat")
|
|
currentData, err := ioutil.ReadFile(fpath)
|
|
if err != nil {
|
|
return usage
|
|
}
|
|
entries := strings.Split(string(currentData), "\n")
|
|
|
|
for _, entry := range entries {
|
|
parts := strings.Split(entry, " ")
|
|
if len(parts) < 2 {
|
|
continue
|
|
}
|
|
majmin := strings.Split(parts[0], ":")
|
|
if len(majmin) != 2 {
|
|
continue
|
|
}
|
|
major, err := strconv.ParseUint(majmin[0], 10, 0)
|
|
if err != nil {
|
|
return usage
|
|
}
|
|
minor, err := strconv.ParseUint(majmin[1], 10, 0)
|
|
if err != nil {
|
|
return usage
|
|
}
|
|
parts = parts[1:]
|
|
ioEntry := stats.IOEntry{
|
|
Major: major,
|
|
Minor: minor,
|
|
}
|
|
for _, s := range parts {
|
|
keyPairValue := strings.Split(s, "=")
|
|
if len(keyPairValue) != 2 {
|
|
continue
|
|
}
|
|
v, err := strconv.ParseUint(keyPairValue[1], 10, 0)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
switch keyPairValue[0] {
|
|
case "rbytes":
|
|
ioEntry.Rbytes = v
|
|
case "wbytes":
|
|
ioEntry.Wbytes = v
|
|
case "rios":
|
|
ioEntry.Rios = v
|
|
case "wios":
|
|
ioEntry.Wios = v
|
|
}
|
|
}
|
|
usage = append(usage, &ioEntry)
|
|
}
|
|
return usage
|
|
}
|
|
|
|
func rdmaStats(filepath string) []*stats.RdmaEntry {
|
|
currentData, err := ioutil.ReadFile(filepath)
|
|
if err != nil {
|
|
return []*stats.RdmaEntry{}
|
|
}
|
|
return toRdmaEntry(strings.Split(string(currentData), "\n"))
|
|
}
|
|
|
|
func parseRdmaKV(raw string, entry *stats.RdmaEntry) {
|
|
var value uint64
|
|
var err error
|
|
|
|
parts := strings.Split(raw, "=")
|
|
switch len(parts) {
|
|
case 2:
|
|
if parts[1] == "max" {
|
|
value = math.MaxUint32
|
|
} else {
|
|
value, err = parseUint(parts[1], 10, 32)
|
|
if err != nil {
|
|
return
|
|
}
|
|
}
|
|
if parts[0] == "hca_handle" {
|
|
entry.HcaHandles = uint32(value)
|
|
} else if parts[0] == "hca_object" {
|
|
entry.HcaObjects = uint32(value)
|
|
}
|
|
}
|
|
}
|
|
|
|
func toRdmaEntry(strEntries []string) []*stats.RdmaEntry {
|
|
var rdmaEntries []*stats.RdmaEntry
|
|
for i := range strEntries {
|
|
parts := strings.Fields(strEntries[i])
|
|
switch len(parts) {
|
|
case 3:
|
|
entry := new(stats.RdmaEntry)
|
|
entry.Device = parts[0]
|
|
parseRdmaKV(parts[1], entry)
|
|
parseRdmaKV(parts[2], entry)
|
|
|
|
rdmaEntries = append(rdmaEntries, entry)
|
|
default:
|
|
continue
|
|
}
|
|
}
|
|
return rdmaEntries
|
|
}
|
|
|
|
// isUnitExists returns true if the error is that a systemd unit already exists.
|
|
func isUnitExists(err error) bool {
|
|
if err != nil {
|
|
if dbusError, ok := err.(dbus.Error); ok {
|
|
return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists")
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func systemdUnitFromPath(path string) string {
|
|
_, unit := filepath.Split(path)
|
|
return unit
|
|
}
|
|
|
|
func readHugeTlbStats(path string) []*stats.HugeTlbStat {
|
|
var usage = []*stats.HugeTlbStat{}
|
|
var keyUsage = make(map[string]*stats.HugeTlbStat)
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
return usage
|
|
}
|
|
files, err := f.Readdir(-1)
|
|
f.Close()
|
|
if err != nil {
|
|
return usage
|
|
}
|
|
|
|
for _, file := range files {
|
|
if strings.Contains(file.Name(), "hugetlb") &&
|
|
(strings.HasSuffix(file.Name(), "max") || strings.HasSuffix(file.Name(), "current")) {
|
|
var hugeTlb *stats.HugeTlbStat
|
|
var ok bool
|
|
fileName := strings.Split(file.Name(), ".")
|
|
pageSize := fileName[1]
|
|
if hugeTlb, ok = keyUsage[pageSize]; !ok {
|
|
hugeTlb = &stats.HugeTlbStat{}
|
|
}
|
|
hugeTlb.Pagesize = pageSize
|
|
out, err := ioutil.ReadFile(filepath.Join(path, file.Name()))
|
|
if err != nil {
|
|
continue
|
|
}
|
|
var value uint64
|
|
stringVal := strings.TrimSpace(string(out))
|
|
if stringVal == "max" {
|
|
value = math.MaxUint64
|
|
} else {
|
|
value, err = strconv.ParseUint(stringVal, 10, 64)
|
|
}
|
|
if err != nil {
|
|
continue
|
|
}
|
|
switch fileName[2] {
|
|
case "max":
|
|
hugeTlb.Max = value
|
|
case "current":
|
|
hugeTlb.Current = value
|
|
}
|
|
keyUsage[pageSize] = hugeTlb
|
|
}
|
|
}
|
|
for _, entry := range keyUsage {
|
|
usage = append(usage, entry)
|
|
}
|
|
return usage
|
|
}
|