mirror of https://gitee.com/openkylin/runc.git
merge upstream 1.1.0
This commit is contained in:
parent
f67506f80e
commit
1dc29861c3
|
@ -0,0 +1,158 @@
|
|||
---
|
||||
# We use Cirrus for Vagrant tests and native CentOS 7 and 8, because macOS
|
||||
# instances of GHA are too slow and flaky, and Linux instances of GHA do not
|
||||
# support KVM.
|
||||
|
||||
# NOTE Cirrus execution environments lack a terminal, needed for
|
||||
# some integration tests. So we use `ssh -tt` command to fake a terminal.
|
||||
|
||||
task:
|
||||
timeout_in: 30m
|
||||
|
||||
env:
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
HOME: /root
|
||||
# yamllint disable rule:key-duplicates
|
||||
matrix:
|
||||
DISTRO: fedora
|
||||
|
||||
name: vagrant DISTRO:$DISTRO
|
||||
|
||||
compute_engine_instance:
|
||||
image_project: cirrus-images
|
||||
image: family/docker-kvm
|
||||
platform: linux
|
||||
nested_virtualization: true
|
||||
# CPU limit: `16 / NTASK`: see https://cirrus-ci.org/faq/#are-there-any-limits
|
||||
cpu: 8
|
||||
# Memory limit: `4GB * NCPU`
|
||||
memory: 32G
|
||||
|
||||
host_info_script: |
|
||||
uname -a
|
||||
echo "-----"
|
||||
cat /etc/os-release
|
||||
echo "-----"
|
||||
cat /proc/cpuinfo
|
||||
echo "-----"
|
||||
df -T
|
||||
install_libvirt_vagrant_script: |
|
||||
apt-get update
|
||||
apt-get install -y libvirt-daemon libvirt-daemon-system vagrant vagrant-libvirt
|
||||
systemctl enable --now libvirtd
|
||||
vagrant_cache:
|
||||
fingerprint_script: uname -s ; cat Vagrantfile.$DISTRO
|
||||
folder: /root/.vagrant.d
|
||||
vagrant_up_script: |
|
||||
ln -sf Vagrantfile.$DISTRO Vagrantfile
|
||||
# Retry if it fails (download.fedoraproject.org returns 404 sometimes)
|
||||
vagrant up --no-tty || vagrant up --no-tty
|
||||
mkdir -p -m 0700 /root/.ssh
|
||||
vagrant ssh-config >> /root/.ssh/config
|
||||
guest_info_script: |
|
||||
ssh default 'sh -exc "uname -a && systemctl --version && df -T && cat /etc/os-release"'
|
||||
unit_tests_script: |
|
||||
ssh default 'sudo -i make -C /vagrant localunittest'
|
||||
integration_systemd_script: |
|
||||
ssh -tt default "sudo -i make -C /vagrant localintegration RUNC_USE_SYSTEMD=yes"
|
||||
integration_fs_script: |
|
||||
ssh -tt default "sudo -i make -C /vagrant localintegration"
|
||||
integration_systemd_rootless_script: |
|
||||
ssh -tt default "sudo -i make -C /vagrant localrootlessintegration RUNC_USE_SYSTEMD=yes"
|
||||
integration_fs_rootless_script: |
|
||||
ssh -tt default "sudo -i make -C /vagrant localrootlessintegration"
|
||||
|
||||
task:
|
||||
timeout_in: 30m
|
||||
|
||||
env:
|
||||
HOME: /root
|
||||
CIRRUS_WORKING_DIR: /home/runc
|
||||
GO_VERSION: "1.17.3"
|
||||
BATS_VERSION: "v1.3.0"
|
||||
# yamllint disable rule:key-duplicates
|
||||
matrix:
|
||||
DISTRO: centos-7
|
||||
DISTRO: centos-stream-8
|
||||
|
||||
name: ci / $DISTRO
|
||||
|
||||
compute_engine_instance:
|
||||
image_project: centos-cloud
|
||||
image: family/$DISTRO
|
||||
platform: linux
|
||||
cpu: 4
|
||||
memory: 8G
|
||||
|
||||
install_dependencies_script: |
|
||||
case $DISTRO in
|
||||
centos-7)
|
||||
(cd /etc/yum.repos.d && curl -O https://copr.fedorainfracloud.org/coprs/adrian/criu-el7/repo/epel-7/adrian-criu-el7-epel-7.repo)
|
||||
# sysctl
|
||||
echo "user.max_user_namespaces=15076" > /etc/sysctl.d/userns.conf
|
||||
sysctl --system
|
||||
;;
|
||||
centos-stream-8)
|
||||
yum config-manager --set-enabled powertools # for glibc-static
|
||||
;;
|
||||
esac
|
||||
# Work around dnf mirror failures by retrying a few times.
|
||||
for i in $(seq 0 2); do
|
||||
sleep $i
|
||||
yum install -y -q gcc git iptables jq glibc-static libseccomp-devel make criu fuse-sshfs && break
|
||||
done
|
||||
[ $? -eq 0 ] # fail if yum failed
|
||||
# install Go
|
||||
curl -fsSL "https://dl.google.com/go/go${GO_VERSION}.linux-amd64.tar.gz" | tar Cxz /usr/local
|
||||
# install bats
|
||||
cd /tmp
|
||||
git clone https://github.com/bats-core/bats-core
|
||||
cd bats-core
|
||||
git checkout $BATS_VERSION
|
||||
./install.sh /usr/local
|
||||
cd -
|
||||
# Add a user for rootless tests
|
||||
useradd -u2000 -m -d/home/rootless -s/bin/bash rootless
|
||||
# Allow root and rootless itself to execute `ssh rootless@localhost` in tests/rootless.sh
|
||||
ssh-keygen -t ecdsa -N "" -f /root/rootless.key
|
||||
mkdir -m 0700 -p /home/rootless/.ssh
|
||||
cp /root/rootless.key /home/rootless/.ssh/id_ecdsa
|
||||
cat /root/rootless.key.pub >> /home/rootless/.ssh/authorized_keys
|
||||
chown -R rootless.rootless /home/rootless
|
||||
# set PATH
|
||||
echo 'export PATH=/usr/local/go/bin:/usr/local/bin:$PATH' >> /root/.bashrc
|
||||
# Setup ssh localhost for terminal emulation (script -e did not work)
|
||||
ssh-keygen -t ed25519 -f /root/.ssh/id_ed25519 -N ""
|
||||
cat /root/.ssh/id_ed25519.pub >> /root/.ssh/authorized_keys
|
||||
chmod 400 /root/.ssh/authorized_keys
|
||||
ssh-keyscan localhost >> /root/.ssh/known_hosts
|
||||
echo -e "Host localhost\n\tStrictHostKeyChecking no\t\nIdentityFile /root/.ssh/id_ed25519\n" >> /root/.ssh/config
|
||||
sed -e "s,PermitRootLogin.*,PermitRootLogin prohibit-password,g" -i /etc/ssh/sshd_config
|
||||
systemctl restart sshd
|
||||
host_info_script: |
|
||||
uname -a
|
||||
echo "-----"
|
||||
cat /etc/os-release
|
||||
echo "-----"
|
||||
cat /proc/cpuinfo
|
||||
echo "-----"
|
||||
df -T
|
||||
echo "-----"
|
||||
systemctl --version
|
||||
unit_tests_script: |
|
||||
ssh -tt localhost "make -C /home/runc localunittest"
|
||||
integration_systemd_script: |
|
||||
ssh -tt localhost "make -C /home/runc localintegration RUNC_USE_SYSTEMD=yes"
|
||||
integration_fs_script: |
|
||||
ssh -tt localhost "make -C /home/runc localintegration"
|
||||
integration_systemd_rootless_script: |
|
||||
echo "SKIP: integration_systemd_rootless_script requires cgroup v2"
|
||||
integration_fs_rootless_script: |
|
||||
case $DISTRO in
|
||||
centos-7)
|
||||
echo "SKIP: FIXME: integration_fs_rootless_script is skipped because of EPERM on writing cgroup.procs"
|
||||
;;
|
||||
centos-stream-8)
|
||||
ssh -tt localhost "make -C /home/runc localrootlessintegration"
|
||||
;;
|
||||
esac
|
|
@ -0,0 +1,3 @@
|
|||
[codespell]
|
||||
skip = ./vendor,./.git
|
||||
ignore-words-list = clos,creat
|
|
@ -0,0 +1,25 @@
|
|||
# Please see the documentation for all configuration options:
|
||||
# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
# Dependencies listed in go.mod
|
||||
- package-ecosystem: "gomod"
|
||||
directory: "/" # Location of package manifests
|
||||
schedule:
|
||||
interval: "daily"
|
||||
ignore:
|
||||
# a regression in v1.22.2, see https://github.com/urfave/cli/issues/1092
|
||||
- dependency-name: "github.com/urfave/cli"
|
||||
|
||||
# Dependencies listed in .github/workflows/*.yml
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
|
||||
# Dependencies listed in Dockerfile
|
||||
- package-ecosystem: "docker"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
|
@ -0,0 +1,129 @@
|
|||
# NOTE Github Actions execution environments lack a terminal, needed for
|
||||
# some integration tests. So we use `script` command to fake a terminal.
|
||||
|
||||
name: ci
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- v*
|
||||
branches:
|
||||
- master
|
||||
- release-*
|
||||
pull_request:
|
||||
|
||||
env:
|
||||
# Don't ignore C warnings. Note that the output of "go env CGO_CFLAGS" by default is "-g -O2", so we keep them.
|
||||
CGO_CFLAGS: -g -O2 -Werror
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
go-version: [1.16.x, 1.17.x]
|
||||
rootless: ["rootless", ""]
|
||||
race: ["-race", ""]
|
||||
criu: [""]
|
||||
include:
|
||||
# Also test against latest criu-dev
|
||||
- go-version: 1.17.x
|
||||
rootless: ""
|
||||
race: ""
|
||||
criu: "criu-dev"
|
||||
|
||||
steps:
|
||||
|
||||
- name: checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: install deps
|
||||
if: matrix.criu == ''
|
||||
env:
|
||||
REPO: https://download.opensuse.org/repositories/devel:/tools:/criu/xUbuntu_20.04
|
||||
run: |
|
||||
# criu repo
|
||||
curl -fSsl $REPO/Release.key | sudo apt-key add -
|
||||
echo "deb $REPO/ /" | sudo tee /etc/apt/sources.list.d/criu.list
|
||||
sudo apt update
|
||||
sudo apt install libseccomp-dev criu sshfs
|
||||
|
||||
- name: install deps (criu ${{ matrix.criu }})
|
||||
if: matrix.criu != ''
|
||||
run: |
|
||||
sudo apt -q update
|
||||
sudo apt -q install libseccomp-dev sshfs \
|
||||
libcap-dev libnet1-dev libnl-3-dev \
|
||||
libprotobuf-c-dev libprotobuf-dev protobuf-c-compiler protobuf-compiler
|
||||
git clone https://github.com/checkpoint-restore/criu.git ~/criu
|
||||
(cd ~/criu && git checkout ${{ matrix.criu }} && sudo make install-criu)
|
||||
rm -rf ~/criu
|
||||
|
||||
- name: install go ${{ matrix.go-version }}
|
||||
uses: actions/setup-go@v2
|
||||
with:
|
||||
stable: '!contains(${{ matrix.go-version }}, "beta") && !contains(${{ matrix.go-version }}, "rc")'
|
||||
go-version: ${{ matrix.go-version }}
|
||||
|
||||
- name: build
|
||||
run: sudo -E PATH="$PATH" make EXTRA_FLAGS="${{ matrix.race }}" all
|
||||
|
||||
- name: install bats
|
||||
uses: mig4/setup-bats@v1
|
||||
with:
|
||||
bats-version: 1.3.0
|
||||
|
||||
- name: unit test
|
||||
if: matrix.rootless != 'rootless'
|
||||
run: sudo -E PATH="$PATH" -- make TESTFLAGS="${{ matrix.race }}" localunittest
|
||||
|
||||
- name: add rootless user
|
||||
if: matrix.rootless == 'rootless'
|
||||
run: |
|
||||
sudo useradd -u2000 -m -d/home/rootless -s/bin/bash rootless
|
||||
# Allow root and rootless itself to execute `ssh rootless@localhost` in tests/rootless.sh
|
||||
ssh-keygen -t ecdsa -N "" -f $HOME/rootless.key
|
||||
sudo mkdir -m 0700 -p /home/rootless/.ssh
|
||||
sudo cp $HOME/rootless.key /home/rootless/.ssh/id_ecdsa
|
||||
sudo cp $HOME/rootless.key.pub /home/rootless/.ssh/authorized_keys
|
||||
sudo chown -R rootless.rootless /home/rootless
|
||||
|
||||
- name: integration test (fs driver)
|
||||
run: sudo -E PATH="$PATH" script -e -c 'make local${{ matrix.rootless }}integration'
|
||||
|
||||
- name: integration test (systemd driver)
|
||||
# can't use systemd driver with cgroupv1
|
||||
if: matrix.rootless != 'rootless'
|
||||
run: sudo -E PATH="$PATH" script -e -c 'make RUNC_USE_SYSTEMD=yes local${{ matrix.rootless }}integration'
|
||||
|
||||
# We need to continue support for 32-bit ARM.
|
||||
# However, we do not have 32-bit ARM CI, so we use i386 for testing 32bit stuff.
|
||||
# We are not interested in providing official support for i386.
|
||||
cross-i386:
|
||||
runs-on: ubuntu-20.04
|
||||
|
||||
steps:
|
||||
|
||||
- name: checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: install deps
|
||||
run: |
|
||||
sudo dpkg --add-architecture i386
|
||||
# add criu repo
|
||||
sudo add-apt-repository -y ppa:criu/ppa
|
||||
# apt-add-repository runs apt update so we don't have to.
|
||||
|
||||
# Due to a bug in apt, we have to update it first
|
||||
# (see https://bugs.launchpad.net/ubuntu-cdimage/+bug/1871268)
|
||||
sudo apt -q install apt
|
||||
sudo apt -q install libseccomp-dev libseccomp-dev:i386 gcc-multilib criu
|
||||
|
||||
- name: install go
|
||||
uses: actions/setup-go@v2
|
||||
with:
|
||||
go-version: 1.x # Latest stable
|
||||
|
||||
- name: unit test
|
||||
# cgo is disabled by default when cross-compiling
|
||||
run: sudo -E PATH="$PATH" -- make GOARCH=386 CGO_ENABLED=1 localunittest
|
|
@ -0,0 +1,198 @@
|
|||
name: validate
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- v*
|
||||
branches:
|
||||
- master
|
||||
- release-*
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
|
||||
lint:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: install deps
|
||||
run: |
|
||||
sudo apt -q update
|
||||
sudo apt -q install libseccomp-dev
|
||||
- uses: golangci/golangci-lint-action@v2
|
||||
with:
|
||||
# must be specified without patch version
|
||||
version: v1.42
|
||||
|
||||
lint-extra:
|
||||
# Extra linters, only checking new code from pull requests.
|
||||
if: github.event_name == 'pull_request'
|
||||
runs-on: ubuntu-20.04
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: install deps
|
||||
run: |
|
||||
sudo apt -q update
|
||||
sudo apt -q install libseccomp-dev
|
||||
- uses: golangci/golangci-lint-action@v2
|
||||
with:
|
||||
only-new-issues: true
|
||||
args: --config .golangci-extra.yml
|
||||
# must be specified without patch version
|
||||
version: v1.43
|
||||
|
||||
|
||||
compile-buildtags:
|
||||
runs-on: ubuntu-20.04
|
||||
env:
|
||||
# Don't ignore C warnings. Note that the output of "go env CGO_CFLAGS" by default is "-g -O2", so we keep them.
|
||||
CGO_CFLAGS: -g -O2 -Werror
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: install go
|
||||
uses: actions/setup-go@v2
|
||||
with:
|
||||
go-version: 1.x # Latest stable
|
||||
- name: compile with no build tags
|
||||
run: make BUILDTAGS=""
|
||||
|
||||
codespell:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: install deps
|
||||
# Version of codespell bundled with Ubuntu is way old, so use pip.
|
||||
run: pip install codespell
|
||||
- name: run codespell
|
||||
run: codespell
|
||||
|
||||
shfmt:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: vars
|
||||
run: |
|
||||
echo "VERSION=3.3.1" >> $GITHUB_ENV
|
||||
echo "$(go env GOPATH)/bin" >> $GITHUB_PATH
|
||||
- name: cache go mod and $GOCACHE
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: |
|
||||
~/go/pkg/mod
|
||||
~/.cache/go-build
|
||||
key: ${{ runner.os }}-shfmt-${{ env.VERSION }}
|
||||
restore-keys: ${{ runner.os }}-shfmt-
|
||||
- name: install shfmt
|
||||
run: |
|
||||
command -v shfmt || \
|
||||
(cd ~ && GO111MODULE=on time go get mvdan.cc/sh/v3/cmd/shfmt@v$VERSION)
|
||||
- name: shfmt
|
||||
run: make shfmt
|
||||
|
||||
shellcheck:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: vars
|
||||
run: |
|
||||
echo 'VERSION=v0.7.2' >> $GITHUB_ENV
|
||||
echo 'BASEURL=https://github.com/koalaman/shellcheck/releases/download' >> $GITHUB_ENV
|
||||
echo 'SHA256SUM=12ee2e0b90a3d1e9cae24ac9b2838be66b48573cb2c8e8f3c566b959df6f050c' >> $GITHUB_ENV
|
||||
echo ~/bin >> $GITHUB_PATH
|
||||
- name: install shellcheck
|
||||
run: |
|
||||
mkdir ~/bin
|
||||
curl -sSfL --retry 5 $BASEURL/$VERSION/shellcheck-$VERSION.linux.x86_64.tar.xz |
|
||||
tar xfJ - -C ~/bin --strip 1 shellcheck-$VERSION/shellcheck
|
||||
sha256sum ~/bin/shellcheck | grep -q $SHA256SUM
|
||||
# make sure to remove the old version
|
||||
sudo rm -f /usr/bin/shellcheck
|
||||
- uses: lumaxis/shellcheck-problem-matchers@v1
|
||||
- name: shellcheck
|
||||
run: |
|
||||
make shellcheck
|
||||
|
||||
deps:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: install go
|
||||
uses: actions/setup-go@v2
|
||||
with:
|
||||
go-version: 1.x # Latest stable
|
||||
- name: cache go mod and $GOCACHE
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: |
|
||||
~/go/pkg/mod
|
||||
~/.cache/go-build
|
||||
key: ${{ runner.os }}-go.sum-${{ hashFiles('**/go.sum') }}
|
||||
restore-keys: ${{ runner.os }}-go.sum-
|
||||
- name: verify deps
|
||||
run: make verify-dependencies
|
||||
|
||||
|
||||
commit:
|
||||
runs-on: ubuntu-20.04
|
||||
# Only check commits on pull requests.
|
||||
if: github.event_name == 'pull_request'
|
||||
steps:
|
||||
- name: get pr commits
|
||||
id: 'get-pr-commits'
|
||||
uses: tim-actions/get-pr-commits@v1.1.0
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: check subject line length
|
||||
uses: tim-actions/commit-message-checker-with-regex@v0.3.1
|
||||
with:
|
||||
commits: ${{ steps.get-pr-commits.outputs.commits }}
|
||||
pattern: '^.{0,72}(\n.*)*$'
|
||||
error: 'Subject too long (max 72)'
|
||||
|
||||
|
||||
cfmt:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: install deps
|
||||
run: |
|
||||
sudo apt -qq update
|
||||
sudo apt -qq install indent
|
||||
- name: cfmt
|
||||
run: |
|
||||
make cfmt
|
||||
git diff --exit-code
|
||||
|
||||
|
||||
release:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
# We have to run this under Docker as Ubuntu (host) does not support all
|
||||
# the architectures we want to compile test against, and Dockerfile uses
|
||||
# Debian (which does).
|
||||
#
|
||||
# XXX: as currently this is the only job that is using Docker, we are
|
||||
# building and using the runcimage locally. In case more jobs running
|
||||
# under Docker will emerge, it will be good to have a separate make
|
||||
# runcimage job and share its result (the docker image) with whoever
|
||||
# needs it.
|
||||
- uses: satackey/action-docker-layer-caching@v0.0.11
|
||||
continue-on-error: true
|
||||
- name: build docker image
|
||||
run: make runcimage
|
||||
- name: make releaseall
|
||||
run: make releaseall
|
||||
- name: upload artifacts
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: release-${{ github.run_id }}
|
||||
path: release/*
|
|
@ -2,5 +2,9 @@ vendor/pkg
|
|||
/runc
|
||||
/runc-*
|
||||
contrib/cmd/recvtty/recvtty
|
||||
contrib/cmd/sd-helper/sd-helper
|
||||
contrib/cmd/seccompagent/seccompagent
|
||||
man/man8
|
||||
release
|
||||
Vagrantfile
|
||||
.vagrant
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
# This is golangci-lint config file which is used to check new code in
|
||||
# github PRs only (see lint-extra job in .github/workflows/validate.yml).
|
||||
#
|
||||
# For the default linter config, see .golangci.yml. This config should
|
||||
# only enable additional linters not enabled in the default config.
|
||||
|
||||
run:
|
||||
build-tags:
|
||||
- seccomp
|
||||
|
||||
linters:
|
||||
disable-all: true
|
||||
enable:
|
||||
- godot
|
||||
- revive
|
|
@ -0,0 +1,12 @@
|
|||
# For documentation, see https://golangci-lint.run/usage/configuration/
|
||||
|
||||
run:
|
||||
build-tags:
|
||||
- seccomp
|
||||
|
||||
linters:
|
||||
enable:
|
||||
- gofumpt
|
||||
- errorlint
|
||||
- unconvert
|
||||
- unparam
|
|
@ -1,10 +0,0 @@
|
|||
approve_by_comment: true
|
||||
approve_regex: ^LGTM
|
||||
reject_regex: ^Rejected
|
||||
reset_on_push: true
|
||||
author_approval: ignored
|
||||
reviewers:
|
||||
teams:
|
||||
- runc-maintainers
|
||||
name: default
|
||||
required: 2
|
54
.travis.yml
54
.travis.yml
|
@ -1,54 +0,0 @@
|
|||
dist: bionic
|
||||
language: go
|
||||
go:
|
||||
- 1.11.x
|
||||
- 1.12.x
|
||||
- tip
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- go: 1.12.x
|
||||
env:
|
||||
- RUNC_USE_SYSTEMD=1
|
||||
script:
|
||||
- make BUILDTAGS="${BUILDTAGS}" all
|
||||
- sudo PATH="$PATH" make localintegration RUNC_USE_SYSTEMD=1
|
||||
- go: 1.12.x
|
||||
env:
|
||||
- VIRTUALBOX_VERSION=6.0
|
||||
- VAGRANT_VERSION=2.2.6
|
||||
- FEDORA_VERSION=31
|
||||
before_install:
|
||||
- cat /proc/cpuinfo
|
||||
- wget -q https://www.virtualbox.org/download/oracle_vbox_2016.asc -O- | sudo apt-key add - && sudo sh -c "echo deb https://download.virtualbox.org/virtualbox/debian $(lsb_release -cs) contrib >> /etc/apt/sources.list" && sudo apt-get update && sudo apt-get install -yq build-essential gcc make linux-headers-$(uname -r) virtualbox-${VIRTUALBOX_VERSION} && sudo usermod -aG vboxusers $(whoami)
|
||||
- wget https://releases.hashicorp.com/vagrant/${VAGRANT_VERSION}/vagrant_${VAGRANT_VERSION}_$(uname -m).deb && sudo dpkg -i vagrant_${VAGRANT_VERSION}_$(uname -m).deb
|
||||
- vagrant init bento/fedora-${FEDORA_VERSION} && vagrant up && mkdir -p ~/.ssh && vagrant ssh-config >> ~/.ssh/config
|
||||
- ssh default sudo dnf install -y podman
|
||||
script:
|
||||
- ssh default sudo podman build -t test /vagrant
|
||||
- ssh default sudo podman run --privileged --cgroupns=private test make localunittest
|
||||
allow_failures:
|
||||
- go: tip
|
||||
|
||||
go_import_path: github.com/opencontainers/runc
|
||||
|
||||
# `make ci` uses Docker.
|
||||
sudo: required
|
||||
services:
|
||||
- docker
|
||||
|
||||
env:
|
||||
global:
|
||||
- BUILDTAGS="seccomp apparmor selinux ambient"
|
||||
|
||||
before_install:
|
||||
- sudo apt-get -qq update
|
||||
- sudo apt-get install -y libseccomp-dev
|
||||
- go get -u golang.org/x/lint/golint
|
||||
- go get -u github.com/vbatts/git-validation
|
||||
- env | grep TRAVIS_
|
||||
|
||||
script:
|
||||
- git-validation -run DCO,short-subject -v
|
||||
- make BUILDTAGS="${BUILDTAGS}"
|
||||
- make BUILDTAGS="${BUILDTAGS}" clean ci cross
|
|
@ -0,0 +1,248 @@
|
|||
# Changelog/
|
||||
This file documents all notable changes made to this project since runc 1.0.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
## [1.1.0] - 2022-01-14
|
||||
|
||||
> A plan depends as much upon execution as it does upon concept.
|
||||
|
||||
## Changed
|
||||
* libcontainer will now refuse to build without the nsenter package being
|
||||
correctly compiled (specifically this requires CGO to be enabled). This
|
||||
should avoid folks accidentally creating broken runc binaries (and
|
||||
incorrectly importing our internal libraries into their projects). (#3331)
|
||||
|
||||
## [1.1.0-rc.1] - 2021-12-14
|
||||
|
||||
> He who controls the spice controls the universe.
|
||||
|
||||
### Deprecated
|
||||
* runc run/start now warns if a new container cgroup is non-empty or frozen;
|
||||
this warning will become an error in runc 1.2. (#3132, #3223)
|
||||
* runc can only be built with Go 1.16 or later from this release onwards.
|
||||
(#3100, #3245, #3325)
|
||||
|
||||
### Removed
|
||||
* `cgroup.GetHugePageSizes` has been removed entirely, and been replaced with
|
||||
`cgroup.HugePageSizes` which is more efficient. (#3234)
|
||||
* `intelrdt.GetIntelRdtPath` has been removed. Users who were using this
|
||||
function to get the intelrdt root should use the new `intelrdt.Root`
|
||||
instead. (#2920, #3239)
|
||||
|
||||
### Added
|
||||
* Add support for RDMA cgroup added in Linux 4.11. (#2883)
|
||||
* runc exec now produces exit code of 255 when the exec failed.
|
||||
This may help in distinguishing between runc exec failures
|
||||
(such as invalid options, non-running container or non-existent
|
||||
binary etc.) and failures of the command being executed. (#3073)
|
||||
* runc run: new `--keep` option to skip removal exited containers artefacts.
|
||||
This might be useful to check the state (e.g. of cgroup controllers) after
|
||||
the container hasexited. (#2817, #2825)
|
||||
* seccomp: add support for `SCMP_ACT_KILL_PROCESS` and `SCMP_ACT_KILL_THREAD`
|
||||
(the latter is just an alias for `SCMP_ACT_KILL`). (#3204)
|
||||
* seccomp: add support for `SCMP_ACT_NOTIFY` (seccomp actions). This allows
|
||||
users to create sophisticated seccomp filters where syscalls can be
|
||||
efficiently emulated by privileged processes on the host. (#2682)
|
||||
* checkpoint/restore: add an option (`--lsm-mount-context`) to set
|
||||
a different LSM mount context on restore. (#3068)
|
||||
* runc releases are now cross-compiled for several architectures. Static
|
||||
builds for said architectures will be available for all future releases.
|
||||
(#3197)
|
||||
* intelrdt: support ClosID parameter. (#2920)
|
||||
* runc exec --cgroup: an option to specify a (non-top) in-container cgroup
|
||||
to use for the process being executed. (#3040, #3059)
|
||||
* cgroup v1 controllers now support hybrid hierarchy (i.e. when on a cgroup v1
|
||||
machine a cgroup2 filesystem is mounted to /sys/fs/cgroup/unified, runc
|
||||
run/exec now adds the container to the appropriate cgroup under it). (#2087,
|
||||
#3059)
|
||||
* sysctl: allow slashes in sysctl names, to better match `sysctl(8)`'s
|
||||
behaviour. (#3254, #3257)
|
||||
* mounts: add support for bind-mounts which are inaccessible after switching
|
||||
the user namespace. Note that this does not permit the container any
|
||||
additional access to the host filesystem, it simply allows containers to
|
||||
have bind-mounts configured for paths the user can access but have
|
||||
restrictive access control settings for other users. (#2576)
|
||||
* Add support for recursive mount attributes using `mount_setattr(2)`. These
|
||||
have the same names as the proposed `mount(8)` options -- just prepend `r`
|
||||
to the option name (such as `rro`). (#3272)
|
||||
* Add `runc features` subcommand to allow runc users to detect what features
|
||||
runc has been built with. This includes critical information such as
|
||||
supported mount flags, hook names, and so on. Note that the output of this
|
||||
command is subject to change and will not be considered stable until runc
|
||||
1.2 at the earliest. The runtime-spec specification for this feature is
|
||||
being developed in [opencontainers/runtime-spec#1130]. (#3296)
|
||||
|
||||
[opencontainers/runtime-spec#1130]: https://github.com/opencontainers/runtime-spec/pull/1130
|
||||
|
||||
### Changed
|
||||
* system: improve performance of `/proc/$pid/stat` parsing. (#2696)
|
||||
* cgroup2: when `/sys/fs/cgroup` is configured as a read-write mount, change
|
||||
the ownership of certain cgroup control files (as per
|
||||
`/sys/kernel/cgroup/delegate`) to allow for proper deferral to the container
|
||||
process. (#3057)
|
||||
* docs: series of improvements to man pages to make them easier to read and
|
||||
use. (#3032)
|
||||
|
||||
#### libcontainer API
|
||||
* internal api: remove internal error types and handling system, switch to Go
|
||||
wrapped errors. (#3033)
|
||||
* New configs.Cgroup structure fields (#3177):
|
||||
* Systemd (whether to use systemd cgroup manager); and
|
||||
* Rootless (whether to use rootless cgroups).
|
||||
* New cgroups/manager package aiming to simplify cgroup manager instantiation.
|
||||
(#3177)
|
||||
* All cgroup managers' instantiation methods now initialize cgroup paths and
|
||||
can return errors. This allows to use any cgroup manager method (e.g.
|
||||
Exists, Destroy, Set, GetStats) right after instantiation, which was not
|
||||
possible before (as paths were initialized in Apply only). (#3178)
|
||||
|
||||
### Fixed
|
||||
* nsenter: do not try to close already-closed fds during container setup and
|
||||
bail on close(2) failures. (#3058)
|
||||
* runc checkpoint/restore: fixed for containers with an external bind mount
|
||||
which destination is a symlink. (#3047).
|
||||
* cgroup: improve openat2 handling for cgroup directory handle hardening.
|
||||
(#3030)
|
||||
* `runc delete -f` now succeeds (rather than timing out) on a paused
|
||||
container. (#3134)
|
||||
* runc run/start/exec now refuses a frozen cgroup (paused container in case of
|
||||
exec). Users can disable this using `--ignore-paused`. (#3132, #3223)
|
||||
* config: do not permit null bytes in mount fields. (#3287)
|
||||
|
||||
|
||||
## [1.0.3] - 2021-12-06
|
||||
|
||||
> If you were waiting for the opportune moment, that was it.
|
||||
|
||||
### Security
|
||||
* A potential vulnerability was discovered in runc (related to an internal
|
||||
usage of netlink), however upon further investigation we discovered that
|
||||
while this bug was exploitable on the master branch of runc, no released
|
||||
version of runc could be exploited using this bug. The exploit required being
|
||||
able to create a netlink attribute with a length that would overflow a uint16
|
||||
but this was not possible in any released version of runc. For more
|
||||
information, see [GHSA-v95c-p5hm-xq8f][] and CVE-2021-43784.
|
||||
|
||||
### Fixed
|
||||
* Fixed inability to start a container with read-write bind mount of a
|
||||
read-only fuse host mount. (#3283, #3292)
|
||||
* Fixed inability to start when read-only /dev in set in spec (#3276, #3277)
|
||||
* Fixed not removing sub-cgroups upon container delete, when rootless cgroup v2
|
||||
is used with older systemd. (#3226, #3297)
|
||||
* Fixed returning error from GetStats when hugetlb is unsupported (which causes
|
||||
excessive logging for Kubernetes). (#3233, #3295)
|
||||
* Improved an error message when dbus-user-session is not installed and
|
||||
rootless + cgroup2 + systemd are used (#3212)
|
||||
|
||||
[GHSA-v95c-p5hm-xq8f]: https://github.com/opencontainers/runc/security/advisories/GHSA-v95c-p5hm-xq8f
|
||||
|
||||
|
||||
## [1.0.2] - 2021-07-16
|
||||
|
||||
> Given the right lever, you can move a planet.
|
||||
|
||||
### Changed
|
||||
* Made release builds reproducible from now on. (#3099, #3142)
|
||||
|
||||
### Fixed
|
||||
* Fixed a failure to set CPU quota period in some cases on cgroup v1. (#3090
|
||||
#3115)
|
||||
* Fixed the inability to start a container with the "adding seccomp filter
|
||||
rule for syscall ..." error, caused by redundant seccomp rules (i.e. those
|
||||
that has action equal to the default one). Such redundant rules are now
|
||||
skipped. (#3109, #3129)
|
||||
* Fixed a rare debug log race in runc init, which can result in occasional
|
||||
harmful "failed to decode ..." errors from runc run or exec. (#3120, #3130)
|
||||
* Fixed the check in cgroup v1 systemd manager if a container needs to be
|
||||
frozen before Set, and add a setting to skip such freeze unconditionally.
|
||||
The previous fix for that issue, done in runc 1.0.1, was not working.
|
||||
(#3166, #3167)
|
||||
|
||||
|
||||
## [1.0.1] - 2021-07-16
|
||||
|
||||
> If in doubt, Meriadoc, always follow your nose.
|
||||
|
||||
### Fixed
|
||||
* Fixed occasional runc exec/run failure ("interrupted system call") on an
|
||||
Azure volume. (#3045, #3074)
|
||||
* Fixed "unable to find groups ... token too long" error with /etc/group
|
||||
containing lines longer than 64K characters. (#3062, #3079)
|
||||
* cgroup/systemd/v1: fix leaving cgroup frozen after Set if a parent cgroup is
|
||||
frozen. This is a regression in 1.0.0, not affecting runc itself but some
|
||||
of libcontainer users (e.g Kubernetes). (#3081, #3085)
|
||||
* cgroupv2: bpf: Ignore inaccessible existing programs in case of
|
||||
permission error when handling replacement of existing bpf cgroup
|
||||
programs. This fixes a regression in 1.0.0, where some SELinux
|
||||
policies would block runc from being able to run entirely. (#3055, #3087)
|
||||
* cgroup/systemd/v2: don't freeze cgroup on Set. (#3067, #3092)
|
||||
* cgroup/systemd/v1: avoid unnecessary freeze on Set. (#3082, #3093)
|
||||
|
||||
|
||||
## [1.0.0] - 2021-06-22
|
||||
|
||||
> A wizard is never late, nor is he early, he arrives precisely when he means
|
||||
> to.
|
||||
|
||||
As runc follows Semantic Versioning, we will endeavour to not make any
|
||||
breaking changes without bumping the major version number of runc.
|
||||
However, it should be noted that Go API usage of runc's internal
|
||||
implementation (libcontainer) is *not* covered by this policy.
|
||||
|
||||
### Removed
|
||||
* Removed libcontainer/configs.Device* identifiers (deprecated since rc94,
|
||||
use libcontainer/devices). (#2999)
|
||||
* Removed libcontainer/system.RunningInUserNS function (deprecated since
|
||||
rc94, use libcontainer/userns). (#2999)
|
||||
|
||||
### Deprecated
|
||||
* The usage of relative paths for mountpoints will now produce a warning
|
||||
(such configurations are outside of the spec, and in future runc will
|
||||
produce an error when given such configurations). (#2917, #3004)
|
||||
|
||||
### Fixed
|
||||
* cgroupv2: devices: rework the filter generation to produce consistent
|
||||
results with cgroupv1, and always clobber any existing eBPF
|
||||
program(s) to fix `runc update` and avoid leaking eBPF programs
|
||||
(resulting in errors when managing containers). (#2951)
|
||||
* cgroupv2: correctly convert "number of IOs" statistics in a
|
||||
cgroupv1-compatible way. (#2965, #2967, #2968, #2964)
|
||||
* cgroupv2: support larger than 32-bit IO statistics on 32-bit architectures.
|
||||
* cgroupv2: wait for freeze to finish before returning from the freezing
|
||||
code, optimize the method for checking whether a cgroup is frozen. (#2955)
|
||||
* cgroups/systemd: fixed "retry on dbus disconnect" logic introduced in rc94
|
||||
* cgroups/systemd: fixed returning "unit already exists" error from a systemd
|
||||
cgroup manager (regression in rc94) (#2997, #2996)
|
||||
|
||||
### Added
|
||||
* cgroupv2: support SkipDevices with systemd driver. (#2958, #3019)
|
||||
* cgroup1: blkio: support BFQ weights. (#3010)
|
||||
* cgroupv2: set per-device io weights if BFQ IO scheduler is available.
|
||||
(#3022)
|
||||
|
||||
### Changed
|
||||
* cgroup/systemd: return, not ignore, stop unit error from Destroy (#2946)
|
||||
* Fix all golangci-lint failures. (#2781, #2962)
|
||||
* Make `runc --version` output sane even when built with `go get` or
|
||||
otherwise outside of our build scripts. (#2962)
|
||||
* cgroups: set SkipDevices during runc update (so we don't modify
|
||||
cgroups at all during `runc update`). (#2994)
|
||||
|
||||
<!-- minor releases -->
|
||||
[Unreleased]: https://github.com/opencontainers/runc/compare/v1.1.0...HEAD
|
||||
[1.1.0]: https://github.com/opencontainers/runc/compare/v1.1.0-rc.1...v1.1.0
|
||||
[1.0.0]: https://github.com/opencontainers/runc/releases/tag/v1.0.0
|
||||
|
||||
<!-- 1.0.z patch releases -->
|
||||
[Unreleased 1.0.z]: https://github.com/opencontainers/runc/compare/v1.0.3...release-1.0
|
||||
[1.0.3]: https://github.com/opencontainers/runc/compare/v1.0.2...v1.0.3
|
||||
[1.0.2]: https://github.com/opencontainers/runc/compare/v1.0.1...v1.0.2
|
||||
[1.0.1]: https://github.com/opencontainers/runc/compare/v1.0.0...v1.0.1
|
||||
|
||||
<!-- 1.1.z patch releases -->
|
||||
[Unreleased 1.1.z]: https://github.com/opencontainers/runc/compare/v1.1.0...release-1.1
|
||||
[1.1.0-rc.1]: https://github.com/opencontainers/runc/compare/v1.0.0...v1.1.0-rc.1
|
96
Dockerfile
96
Dockerfile
|
@ -1,34 +1,41 @@
|
|||
FROM golang:1.12-stretch
|
||||
ARG GO_VERSION=1.17
|
||||
ARG BATS_VERSION=v1.3.0
|
||||
ARG LIBSECCOMP_VERSION=2.5.3
|
||||
|
||||
RUN dpkg --add-architecture armel \
|
||||
FROM golang:${GO_VERSION}-bullseye
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
ARG CRIU_REPO=https://download.opensuse.org/repositories/devel:/tools:/criu/Debian_11
|
||||
|
||||
RUN KEYFILE=/usr/share/keyrings/criu-repo-keyring.gpg; \
|
||||
wget -nv $CRIU_REPO/Release.key -O- | gpg --dearmor > "$KEYFILE" \
|
||||
&& echo "deb [signed-by=$KEYFILE] $CRIU_REPO/ /" > /etc/apt/sources.list.d/criu.list \
|
||||
&& dpkg --add-architecture armel \
|
||||
&& dpkg --add-architecture armhf \
|
||||
&& dpkg --add-architecture arm64 \
|
||||
&& dpkg --add-architecture ppc64el \
|
||||
&& apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
curl \
|
||||
sudo \
|
||||
gawk \
|
||||
iptables \
|
||||
jq \
|
||||
pkg-config \
|
||||
libaio-dev \
|
||||
libcap-dev \
|
||||
libprotobuf-dev \
|
||||
libprotobuf-c0-dev \
|
||||
libnl-3-dev \
|
||||
libnet-dev \
|
||||
libseccomp2 \
|
||||
libseccomp-dev \
|
||||
protobuf-c-compiler \
|
||||
protobuf-compiler \
|
||||
python-minimal \
|
||||
uidmap \
|
||||
kmod \
|
||||
crossbuild-essential-armel crossbuild-essential-armhf crossbuild-essential-arm64 crossbuild-essential-ppc64el \
|
||||
libseccomp-dev:armel libseccomp-dev:armhf libseccomp-dev:arm64 libseccomp-dev:ppc64el \
|
||||
--no-install-recommends \
|
||||
&& apt-get clean
|
||||
&& apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
criu \
|
||||
crossbuild-essential-arm64 \
|
||||
crossbuild-essential-armel \
|
||||
crossbuild-essential-armhf \
|
||||
crossbuild-essential-ppc64el \
|
||||
crossbuild-essential-s390x \
|
||||
curl \
|
||||
gawk \
|
||||
gcc \
|
||||
gperf \
|
||||
iptables \
|
||||
jq \
|
||||
kmod \
|
||||
pkg-config \
|
||||
python3-minimal \
|
||||
sshfs \
|
||||
sudo \
|
||||
uidmap \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/cache/apt /var/lib/apt/lists/* /etc/apt/sources.list.d/*.list
|
||||
|
||||
# Add a dummy user for the rootless integration tests. While runC does
|
||||
# not require an entry in /etc/passwd to operate, one of the tests uses
|
||||
|
@ -37,30 +44,21 @@ RUN dpkg --add-architecture armel \
|
|||
RUN useradd -u1000 -m -d/home/rootless -s/bin/bash rootless
|
||||
|
||||
# install bats
|
||||
ARG BATS_VERSION
|
||||
RUN cd /tmp \
|
||||
&& git clone https://github.com/sstephenson/bats.git \
|
||||
&& cd bats \
|
||||
&& git reset --hard 03608115df2071fff4eaaff1605768c275e5f81f \
|
||||
&& git clone https://github.com/bats-core/bats-core.git \
|
||||
&& cd bats-core \
|
||||
&& git reset --hard "${BATS_VERSION}" \
|
||||
&& ./install.sh /usr/local \
|
||||
&& rm -rf /tmp/bats
|
||||
&& rm -rf /tmp/bats-core
|
||||
|
||||
# install criu
|
||||
ENV CRIU_VERSION v3.12
|
||||
RUN mkdir -p /usr/src/criu \
|
||||
&& curl -sSL https://github.com/checkpoint-restore/criu/archive/${CRIU_VERSION}.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1 \
|
||||
&& cd /usr/src/criu \
|
||||
&& make install-criu \
|
||||
&& rm -rf /usr/src/criu
|
||||
# install libseccomp
|
||||
ARG LIBSECCOMP_VERSION
|
||||
COPY script/* /tmp/script/
|
||||
RUN mkdir -p /opt/libseccomp \
|
||||
&& /tmp/script/seccomp.sh "$LIBSECCOMP_VERSION" /opt/libseccomp arm64 armel armhf ppc64le s390x
|
||||
ENV LIBSECCOMP_VERSION=$LIBSECCOMP_VERSION
|
||||
ENV LD_LIBRARY_PATH=/opt/libseccomp/lib
|
||||
ENV PKG_CONFIG_PATH=/opt/libseccomp/lib/pkgconfig
|
||||
|
||||
# setup a playground for us to spawn containers in
|
||||
ENV ROOTFS /busybox
|
||||
RUN mkdir -p ${ROOTFS}
|
||||
|
||||
COPY script/tmpmount /
|
||||
WORKDIR /go/src/github.com/opencontainers/runc
|
||||
ENTRYPOINT ["/tmpmount"]
|
||||
|
||||
ADD . /go/src/github.com/opencontainers/runc
|
||||
|
||||
RUN . tests/integration/multi-arch.bash \
|
||||
&& curl -o- -sSL `get_busybox` | tar xfJC - ${ROOTFS}
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
## Emeritus ##
|
||||
|
||||
We would like to acknowledge previous runc maintainers and their huge
|
||||
contributions to our collective success:
|
||||
|
||||
* Alexander Morozov (@lk4d4)
|
||||
* Andrei Vagin (@avagin)
|
||||
* Rohit Jnagal (@rjnagal)
|
||||
* Victor Marmol (@vmarmol)
|
||||
|
||||
We thank these members for their service to the OCI community.
|
|
@ -1,5 +1,8 @@
|
|||
Michael Crosby <michael@docker.com> (@crosbymichael)
|
||||
Michael Crosby <michael@thepasture.io> (@crosbymichael)
|
||||
Mrunal Patel <mpatel@redhat.com> (@mrunalp)
|
||||
Daniel, Dao Quang Minh <dqminh89@gmail.com> (@dqminh)
|
||||
Qiang Huang <h.huangqiang@huawei.com> (@hqhq)
|
||||
Aleksa Sarai <asarai@suse.de> (@cyphar)
|
||||
Aleksa Sarai <cyphar@cyphar.com> (@cyphar)
|
||||
Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp> (@AkihiroSuda)
|
||||
Kir Kolyshkin <kolyshkin@gmail.com> (@kolyshkin)
|
||||
Sebastiaan van Stijn <github@gone.nl> (@thaJeztah)
|
||||
|
|
175
Makefile
175
Makefile
|
@ -1,133 +1,158 @@
|
|||
.PHONY: all shell dbuild man release \
|
||||
localtest localunittest localintegration \
|
||||
test unittest integration \
|
||||
cross localcross
|
||||
|
||||
CONTAINER_ENGINE := docker
|
||||
GO := go
|
||||
GO ?= go
|
||||
|
||||
SOURCES := $(shell find . 2>&1 | grep -E '.*\.(c|h|go)$$')
|
||||
PREFIX := $(DESTDIR)/usr/local
|
||||
PREFIX ?= /usr/local
|
||||
BINDIR := $(PREFIX)/sbin
|
||||
MANDIR := $(PREFIX)/share/man
|
||||
|
||||
GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
|
||||
GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g")
|
||||
RUNC_IMAGE := runc_dev$(if $(GIT_BRANCH_CLEAN),:$(GIT_BRANCH_CLEAN))
|
||||
PROJECT := github.com/opencontainers/runc
|
||||
BUILDTAGS ?= seccomp
|
||||
COMMIT_NO := $(shell git rev-parse HEAD 2> /dev/null || true)
|
||||
COMMIT ?= $(if $(shell git status --porcelain --untracked-files=no),"${COMMIT_NO}-dirty","${COMMIT_NO}")
|
||||
COMMIT ?= $(shell git describe --dirty --long --always)
|
||||
VERSION := $(shell cat ./VERSION)
|
||||
|
||||
MAN_DIR := $(CURDIR)/man/man8
|
||||
MAN_PAGES = $(shell ls $(MAN_DIR)/*.8)
|
||||
MAN_PAGES_BASE = $(notdir $(MAN_PAGES))
|
||||
MAN_INSTALL_PATH := ${PREFIX}/share/man/man8/
|
||||
ifeq ($(shell $(GO) env GOOS),linux)
|
||||
ifeq (,$(filter $(shell $(GO) env GOARCH),mips mipsle mips64 mips64le ppc64))
|
||||
ifeq (,$(findstring -race,$(EXTRA_FLAGS)))
|
||||
GO_BUILDMODE := "-buildmode=pie"
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
GO_BUILD := $(GO) build -trimpath $(GO_BUILDMODE) $(EXTRA_FLAGS) -tags "$(BUILDTAGS)" \
|
||||
-ldflags "-X main.gitCommit=$(COMMIT) -X main.version=$(VERSION) $(EXTRA_LDFLAGS)"
|
||||
GO_BUILD_STATIC := CGO_ENABLED=1 $(GO) build -trimpath $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo osusergo" \
|
||||
-ldflags "-extldflags -static -X main.gitCommit=$(COMMIT) -X main.version=$(VERSION) $(EXTRA_LDFLAGS)"
|
||||
|
||||
RELEASE_DIR := $(CURDIR)/release
|
||||
|
||||
VERSION := ${shell cat ./VERSION}
|
||||
|
||||
SHELL := $(shell command -v bash 2>/dev/null)
|
||||
GPG_KEYID ?= asarai@suse.de
|
||||
|
||||
.DEFAULT: runc
|
||||
|
||||
runc: $(SOURCES)
|
||||
$(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc .
|
||||
runc:
|
||||
$(GO_BUILD) -o runc .
|
||||
|
||||
all: runc recvtty
|
||||
all: runc recvtty sd-helper seccompagent
|
||||
|
||||
recvtty: contrib/cmd/recvtty/recvtty
|
||||
recvtty sd-helper seccompagent:
|
||||
$(GO_BUILD) -o contrib/cmd/$@/$@ ./contrib/cmd/$@
|
||||
|
||||
contrib/cmd/recvtty/recvtty: $(SOURCES)
|
||||
$(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty
|
||||
static:
|
||||
$(GO_BUILD_STATIC) -o runc .
|
||||
|
||||
static: $(SOURCES)
|
||||
CGO_ENABLED=1 $(GO) build $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo osusergo" -installsuffix netgo -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -o runc .
|
||||
CGO_ENABLED=1 $(GO) build $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo osusergo" -installsuffix netgo -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty
|
||||
releaseall: RELEASE_ARGS := "-a arm64 -a armel -a armhf -a ppc64le -a s390x"
|
||||
releaseall: release
|
||||
|
||||
release:
|
||||
script/release.sh -r release/$(VERSION) -v $(VERSION)
|
||||
release: runcimage
|
||||
$(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \
|
||||
--rm -v $(CURDIR):/go/src/$(PROJECT) \
|
||||
-e RELEASE_ARGS=$(RELEASE_ARGS) \
|
||||
$(RUNC_IMAGE) make localrelease
|
||||
script/release_sign.sh -S $(GPG_KEYID) -r release/$(VERSION) -v $(VERSION)
|
||||
|
||||
localrelease:
|
||||
script/release_build.sh -r release/$(VERSION) -v $(VERSION) $(RELEASE_ARGS)
|
||||
|
||||
dbuild: runcimage
|
||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} --rm -v $(CURDIR):/go/src/$(PROJECT) --privileged $(RUNC_IMAGE) make clean all
|
||||
$(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \
|
||||
--privileged --rm \
|
||||
-v $(CURDIR):/go/src/$(PROJECT) \
|
||||
$(RUNC_IMAGE) make clean all
|
||||
|
||||
lint:
|
||||
$(GO) vet $(allpackages)
|
||||
$(GO) fmt $(allpackages)
|
||||
golangci-lint run ./...
|
||||
|
||||
man:
|
||||
man/md2man-all.sh
|
||||
|
||||
runcimage:
|
||||
$(CONTAINER_ENGINE) build ${CONTAINER_ENGINE_BUILD_FLAGS} -t $(RUNC_IMAGE) .
|
||||
$(CONTAINER_ENGINE) build $(CONTAINER_ENGINE_BUILD_FLAGS) -t $(RUNC_IMAGE) .
|
||||
|
||||
test:
|
||||
make unittest integration rootlessintegration
|
||||
test: unittest integration rootlessintegration
|
||||
|
||||
localtest:
|
||||
make localunittest localintegration localrootlessintegration
|
||||
localtest: localunittest localintegration localrootlessintegration
|
||||
|
||||
unittest: runcimage
|
||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -t --privileged --rm -v /lib/modules:/lib/modules:ro -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localunittest TESTFLAGS=${TESTFLAGS}
|
||||
$(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \
|
||||
-t --privileged --rm \
|
||||
-v /lib/modules:/lib/modules:ro \
|
||||
-v $(CURDIR):/go/src/$(PROJECT) \
|
||||
$(RUNC_IMAGE) make localunittest TESTFLAGS=$(TESTFLAGS)
|
||||
|
||||
localunittest: all
|
||||
$(GO) test -timeout 3m -tags "$(BUILDTAGS)" ${TESTFLAGS} -v $(allpackages)
|
||||
$(GO) test -timeout 3m -tags "$(BUILDTAGS)" $(TESTFLAGS) -v ./...
|
||||
|
||||
integration: runcimage
|
||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -t --privileged --rm -v /lib/modules:/lib/modules:ro -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localintegration TESTPATH=${TESTPATH}
|
||||
$(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \
|
||||
-t --privileged --rm \
|
||||
-v /lib/modules:/lib/modules:ro \
|
||||
-v $(CURDIR):/go/src/$(PROJECT) \
|
||||
$(RUNC_IMAGE) make localintegration TESTPATH=$(TESTPATH)
|
||||
|
||||
localintegration: all
|
||||
bats -t tests/integration${TESTPATH}
|
||||
bats -t tests/integration$(TESTPATH)
|
||||
|
||||
rootlessintegration: runcimage
|
||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localrootlessintegration
|
||||
$(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \
|
||||
-t --privileged --rm \
|
||||
-v $(CURDIR):/go/src/$(PROJECT) \
|
||||
-e ROOTLESS_TESTPATH \
|
||||
$(RUNC_IMAGE) make localrootlessintegration
|
||||
|
||||
localrootlessintegration: all
|
||||
tests/rootless.sh
|
||||
|
||||
shell: runcimage
|
||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) bash
|
||||
$(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \
|
||||
-ti --privileged --rm \
|
||||
-v $(CURDIR):/go/src/$(PROJECT) \
|
||||
$(RUNC_IMAGE) bash
|
||||
|
||||
install:
|
||||
install -D -m0755 runc $(BINDIR)/runc
|
||||
install -D -m0755 runc $(DESTDIR)$(BINDIR)/runc
|
||||
|
||||
install-bash:
|
||||
install -D -m0644 contrib/completions/bash/runc $(PREFIX)/share/bash-completion/completions/runc
|
||||
install -D -m0644 contrib/completions/bash/runc $(DESTDIR)$(PREFIX)/share/bash-completion/completions/runc
|
||||
|
||||
install-man:
|
||||
install -d -m 755 $(MAN_INSTALL_PATH)
|
||||
install -m 644 $(MAN_PAGES) $(MAN_INSTALL_PATH)
|
||||
|
||||
uninstall:
|
||||
rm -f $(BINDIR)/runc
|
||||
|
||||
uninstall-bash:
|
||||
rm -f $(PREFIX)/share/bash-completion/completions/runc
|
||||
|
||||
uninstall-man:
|
||||
rm -f $(addprefix $(MAN_INSTALL_PATH),$(MAN_PAGES_BASE))
|
||||
install-man: man
|
||||
install -d -m 755 $(DESTDIR)$(MANDIR)/man8
|
||||
install -D -m 644 man/man8/*.8 $(DESTDIR)$(MANDIR)/man8
|
||||
|
||||
clean:
|
||||
rm -f runc runc-*
|
||||
rm -f contrib/cmd/recvtty/recvtty
|
||||
rm -rf $(RELEASE_DIR)
|
||||
rm -rf $(MAN_DIR)
|
||||
rm -f contrib/cmd/sd-helper/sd-helper
|
||||
rm -f contrib/cmd/seccompagent/seccompagent
|
||||
rm -rf release
|
||||
rm -rf man/man8
|
||||
|
||||
validate:
|
||||
script/validate-gofmt
|
||||
script/validate-c
|
||||
$(GO) vet $(allpackages)
|
||||
cfmt: C_SRC=$(shell git ls-files '*.c' | grep -v '^vendor/')
|
||||
cfmt:
|
||||
indent -linux -l120 -il0 -ppi2 -cp1 -T size_t -T jmp_buf $(C_SRC)
|
||||
|
||||
ci: validate test release
|
||||
shellcheck:
|
||||
shellcheck tests/integration/*.bats tests/integration/*.sh \
|
||||
tests/integration/*.bash tests/*.sh \
|
||||
script/release_*.sh script/seccomp.sh script/lib.sh
|
||||
# TODO: add shellcheck for more sh files
|
||||
|
||||
cross: runcimage
|
||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -e BUILDTAGS="$(BUILDTAGS)" --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localcross
|
||||
shfmt:
|
||||
shfmt -ln bats -d -w tests/integration/*.bats
|
||||
shfmt -ln bash -d -w man/*.sh script/* tests/*.sh tests/integration/*.bash
|
||||
|
||||
localcross:
|
||||
CGO_ENABLED=1 GOARCH=arm GOARM=6 CC=arm-linux-gnueabi-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-armel .
|
||||
CGO_ENABLED=1 GOARCH=arm GOARM=7 CC=arm-linux-gnueabihf-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-armhf .
|
||||
CGO_ENABLED=1 GOARCH=arm64 CC=aarch64-linux-gnu-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-arm64 .
|
||||
CGO_ENABLED=1 GOARCH=ppc64le CC=powerpc64le-linux-gnu-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-ppc64le .
|
||||
vendor:
|
||||
$(GO) mod tidy
|
||||
$(GO) mod vendor
|
||||
$(GO) mod verify
|
||||
|
||||
# memoize allpackages, so that it's executed only once and only if used
|
||||
_allpackages = $(shell $(GO) list ./... | grep -v vendor)
|
||||
allpackages = $(if $(__allpackages),,$(eval __allpackages := $$(_allpackages)))$(__allpackages)
|
||||
verify-dependencies: vendor
|
||||
@test -z "$$(git status --porcelain -- go.mod go.sum vendor/)" \
|
||||
|| (echo -e "git status:\n $$(git status -- go.mod go.sum vendor/)\nerror: vendor/, go.mod and/or go.sum not up to date. Run \"make vendor\" to update"; exit 1) \
|
||||
&& echo "all vendor files are up to date."
|
||||
|
||||
.PHONY: runc all recvtty sd-helper seccompagent static releaseall release \
|
||||
localrelease dbuild lint man runcimage \
|
||||
test localtest unittest localunittest integration localintegration \
|
||||
rootlessintegration localrootlessintegration shell install install-bash \
|
||||
install-man clean cfmt shfmt shellcheck \
|
||||
vendor verify-dependencies
|
||||
|
|
88
README.md
88
README.md
|
@ -1,39 +1,33 @@
|
|||
# runc
|
||||
|
||||
[![Build Status](https://travis-ci.org/opencontainers/runc.svg?branch=master)](https://travis-ci.org/opencontainers/runc)
|
||||
[![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runc)](https://goreportcard.com/report/github.com/opencontainers/runc)
|
||||
[![GoDoc](https://godoc.org/github.com/opencontainers/runc?status.svg)](https://godoc.org/github.com/opencontainers/runc)
|
||||
[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/588/badge)](https://bestpractices.coreinfrastructure.org/projects/588)
|
||||
[![gha/validate](https://github.com/opencontainers/runc/workflows/validate/badge.svg)](https://github.com/opencontainers/runc/actions?query=workflow%3Avalidate)
|
||||
[![gha/ci](https://github.com/opencontainers/runc/workflows/ci/badge.svg)](https://github.com/opencontainers/runc/actions?query=workflow%3Aci)
|
||||
|
||||
## Introduction
|
||||
|
||||
`runc` is a CLI tool for spawning and running containers according to the OCI specification.
|
||||
`runc` is a CLI tool for spawning and running containers on Linux according to the OCI specification.
|
||||
|
||||
## Releases
|
||||
|
||||
`runc` depends on and tracks the [runtime-spec](https://github.com/opencontainers/runtime-spec) repository.
|
||||
We will try to make sure that `runc` and the OCI specification major versions stay in lockstep.
|
||||
This means that `runc` 1.0.0 should implement the 1.0 version of the specification.
|
||||
|
||||
You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page.
|
||||
|
||||
Currently, the following features are not considered to be production-ready:
|
||||
|
||||
* Support for cgroup v2
|
||||
|
||||
## Security
|
||||
|
||||
The reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/).
|
||||
The reporting process and disclosure communications are outlined [here](https://github.com/opencontainers/org/blob/master/SECURITY.md).
|
||||
|
||||
### Security Audit
|
||||
A third party security audit was performed by Cure53, you can see the full report [here](https://github.com/opencontainers/runc/blob/master/docs/Security-Audit.pdf).
|
||||
|
||||
## Building
|
||||
|
||||
`runc` currently supports the Linux platform with various architecture support.
|
||||
It must be built with Go version 1.6 or higher in order for some features to function properly.
|
||||
`runc` only supports Linux. It must be built with Go version 1.16 or higher.
|
||||
|
||||
In order to enable seccomp support you will need to install `libseccomp` on your platform.
|
||||
> e.g. `libseccomp-devel` for CentOS, or `libseccomp-dev` for Ubuntu
|
||||
|
||||
Otherwise, if you do not want to build `runc` with seccomp support you can add `BUILDTAGS=""` when running make.
|
||||
|
||||
```bash
|
||||
# create a 'github.com/opencontainers' in your GOPATH/src
|
||||
cd github.com/opencontainers
|
||||
|
@ -58,21 +52,24 @@ sudo make install
|
|||
|
||||
#### Build Tags
|
||||
|
||||
`runc` supports optional build tags for compiling support of various features.
|
||||
To add build tags to the make option the `BUILDTAGS` variable must be set.
|
||||
`runc` supports optional build tags for compiling support of various features,
|
||||
with some of them enabled by default (see `BUILDTAGS` in top-level `Makefile`).
|
||||
|
||||
To change build tags from the default, set the `BUILDTAGS` variable for make,
|
||||
e.g. to disable seccomp:
|
||||
|
||||
```bash
|
||||
make BUILDTAGS='seccomp apparmor'
|
||||
make BUILDTAGS=""
|
||||
```
|
||||
|
||||
| Build Tag | Feature | Dependency |
|
||||
|-----------|------------------------------------|-------------|
|
||||
| seccomp | Syscall filtering | libseccomp |
|
||||
| selinux | selinux process and mount labeling | <none> |
|
||||
| apparmor | apparmor profile support | <none> |
|
||||
| ambient | ambient capability support | kernel 4.3 |
|
||||
| nokmem | disable kernel memory account | <none> |
|
||||
| Build Tag | Feature | Enabled by default | Dependency |
|
||||
|-----------|------------------------------------|--------------------|------------|
|
||||
| seccomp | Syscall filtering | yes | libseccomp |
|
||||
|
||||
The following build tags were used earlier, but are now obsoleted:
|
||||
- **nokmem** (since runc v1.0.0-rc94 kernel memory settings are ignored)
|
||||
- **apparmor** (since runc v1.0.0-rc93 the feature is always enabled)
|
||||
- **selinux** (since runc v1.0.0-rc93 the feature is always enabled)
|
||||
|
||||
### Running the test suite
|
||||
|
||||
|
@ -97,20 +94,41 @@ You can run a specific integration test by setting the `TESTPATH` variable.
|
|||
# make test TESTPATH="/checkpoint.bats"
|
||||
```
|
||||
|
||||
You can run a test in your proxy environment by setting `DOCKER_BUILD_PROXY` and `DOCKER_RUN_PROXY` variables.
|
||||
You can run a specific rootless integration test by setting the `ROOTLESS_TESTPATH` variable.
|
||||
|
||||
```bash
|
||||
# make test DOCKER_BUILD_PROXY="--build-arg HTTP_PROXY=http://yourproxy/" DOCKER_RUN_PROXY="-e HTTP_PROXY=http://yourproxy/"
|
||||
# make test ROOTLESS_TESTPATH="/checkpoint.bats"
|
||||
```
|
||||
|
||||
You can run a test using your container engine's flags by setting `CONTAINER_ENGINE_BUILD_FLAGS` and `CONTAINER_ENGINE_RUN_FLAGS` variables.
|
||||
|
||||
```bash
|
||||
# make test CONTAINER_ENGINE_BUILD_FLAGS="--build-arg http_proxy=http://yourproxy/" CONTAINER_ENGINE_RUN_FLAGS="-e http_proxy=http://yourproxy/"
|
||||
```
|
||||
|
||||
### Dependencies Management
|
||||
|
||||
`runc` uses [vndr](https://github.com/LK4D4/vndr) for dependencies management.
|
||||
Please refer to [vndr](https://github.com/LK4D4/vndr) for how to add or update
|
||||
`runc` uses [Go Modules](https://github.com/golang/go/wiki/Modules) for dependencies management.
|
||||
Please refer to [Go Modules](https://github.com/golang/go/wiki/Modules) for how to add or update
|
||||
new dependencies.
|
||||
|
||||
```
|
||||
# Update vendored dependencies
|
||||
make vendor
|
||||
# Verify all dependencies
|
||||
make verify-dependencies
|
||||
```
|
||||
|
||||
## Using runc
|
||||
|
||||
Please note that runc is a low level tool not designed with an end user
|
||||
in mind. It is mostly employed by other higher level container software.
|
||||
|
||||
Therefore, unless there is some specific use case that prevents the use
|
||||
of tools like Docker or Podman, it is not recommended to use runc directly.
|
||||
|
||||
If you still want to use runc, here's how.
|
||||
|
||||
### Creating an OCI Bundle
|
||||
|
||||
In order to use runc you must have your container in the format of an OCI bundle.
|
||||
|
@ -152,7 +170,9 @@ If you used the unmodified `runc spec` template this should give you a `sh` sess
|
|||
|
||||
The second way to start a container is using the specs lifecycle operations.
|
||||
This gives you more power over how the container is created and managed while it is running.
|
||||
This will also launch the container in the background so you will have to edit the `config.json` to remove the `terminal` setting for the simple examples here.
|
||||
This will also launch the container in the background so you will have to edit
|
||||
the `config.json` to remove the `terminal` setting for the simple examples
|
||||
below (see more details about [runc terminal handling](docs/terminals.md)).
|
||||
Your process field in the `config.json` should look like this below with `"terminal": false` and `"args": ["sleep", "5"]`.
|
||||
|
||||
|
||||
|
@ -275,6 +295,14 @@ PIDFile=/run/mycontainerid.pid
|
|||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
## More documentation
|
||||
|
||||
* [cgroup v2](./docs/cgroup-v2.md)
|
||||
* [Checkpoint and restore](./docs/checkpoint-restore.md)
|
||||
* [systemd cgroup driver](./docs/systemd.md)
|
||||
* [Terminals and standard IO](./docs/terminals.md)
|
||||
* [Experimental features](./docs/experimental.md)
|
||||
|
||||
## License
|
||||
|
||||
The code and docs are released under the [Apache 2.0 license](LICENSE).
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
# Security
|
||||
|
||||
The reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/).
|
||||
The reporting process and disclosure communications are outlined [here](https://github.com/opencontainers/org/blob/master/SECURITY.md).
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
# -*- mode: ruby -*-
|
||||
# vi: set ft=ruby :
|
||||
|
||||
Vagrant.configure("2") do |config|
|
||||
# Fedora box is used for testing cgroup v2 support
|
||||
config.vm.box = "fedora/35-cloud-base"
|
||||
config.vm.provider :virtualbox do |v|
|
||||
v.memory = 2048
|
||||
v.cpus = 2
|
||||
end
|
||||
config.vm.provider :libvirt do |v|
|
||||
v.memory = 2048
|
||||
v.cpus = 2
|
||||
end
|
||||
config.vm.provision "shell", inline: <<-SHELL
|
||||
set -e -u -o pipefail
|
||||
# Work around dnf mirror failures by retrying a few times
|
||||
for i in $(seq 0 2); do
|
||||
sleep $i
|
||||
# "config exclude" dnf shell command is not working in Fedora 35
|
||||
# (see https://bugzilla.redhat.com/show_bug.cgi?id=2022571);
|
||||
# the workaround is to specify it as an option.
|
||||
cat << EOF | dnf -y --exclude=kernel,kernel-core shell && break
|
||||
config install_weak_deps false
|
||||
update
|
||||
install iptables gcc make golang-go glibc-static libseccomp-devel bats jq git-core criu fuse-sshfs
|
||||
ts run
|
||||
EOF
|
||||
done
|
||||
dnf clean all
|
||||
|
||||
# Add a user for rootless tests
|
||||
useradd -u2000 -m -d/home/rootless -s/bin/bash rootless
|
||||
|
||||
# Allow root and rootless itself to execute `ssh rootless@localhost` in tests/rootless.sh
|
||||
ssh-keygen -t ecdsa -N "" -f /root/rootless.key
|
||||
mkdir -m 0700 -p /home/rootless/.ssh
|
||||
cp /root/rootless.key /home/rootless/.ssh/id_ecdsa
|
||||
cat /root/rootless.key.pub >> /home/rootless/.ssh/authorized_keys
|
||||
chown -R rootless.rootless /home/rootless
|
||||
|
||||
# Delegate cgroup v2 controllers to rootless user via --systemd-cgroup
|
||||
mkdir -p /etc/systemd/system/user@.service.d
|
||||
cat > /etc/systemd/system/user@.service.d/delegate.conf << EOF
|
||||
[Service]
|
||||
# default: Delegate=pids memory
|
||||
# NOTE: delegation of cpuset requires systemd >= 244 (Fedora >= 32, Ubuntu >= 20.04).
|
||||
Delegate=yes
|
||||
EOF
|
||||
systemctl daemon-reload
|
||||
SHELL
|
||||
end
|
|
@ -1,19 +1,19 @@
|
|||
// +build linux
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
criu "github.com/checkpoint-restore/go-criu/v5/rpc"
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/userns"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
|
@ -34,7 +34,7 @@ checkpointed.`,
|
|||
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
|
||||
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
|
||||
cli.BoolFlag{Name: "lazy-pages", Usage: "use userfaultfd to lazily restore memory pages"},
|
||||
cli.StringFlag{Name: "status-fd", Value: "", Usage: "criu writes \\0 to this FD once lazy-pages is ready"},
|
||||
cli.IntFlag{Name: "status-fd", Value: -1, Usage: "criu writes \\0 to this FD once lazy-pages is ready"},
|
||||
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
|
||||
cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
|
||||
cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"},
|
||||
|
@ -47,7 +47,7 @@ checkpointed.`,
|
|||
return err
|
||||
}
|
||||
// XXX: Currently this is untested with rootless containers.
|
||||
if os.Geteuid() != 0 || system.RunningInUserNS() {
|
||||
if os.Geteuid() != 0 || userns.RunningInUserNS() {
|
||||
logrus.Warn("runc checkpoint is untested with rootless containers")
|
||||
}
|
||||
|
||||
|
@ -60,10 +60,13 @@ checkpointed.`,
|
|||
return err
|
||||
}
|
||||
if status == libcontainer.Created || status == libcontainer.Stopped {
|
||||
fatalf("Container cannot be checkpointed in %s state", status.String())
|
||||
fatal(fmt.Errorf("Container cannot be checkpointed in %s state", status.String()))
|
||||
}
|
||||
defer destroy(container)
|
||||
options := criuOptions(context)
|
||||
if !(options.LeaveRunning || options.PreDump) {
|
||||
// destroy container unless we tell CRIU to keep it
|
||||
defer destroy(container)
|
||||
}
|
||||
// these are the mandatory criu options for a container
|
||||
setPageServer(context, options)
|
||||
setManageCgroupsMode(context, options)
|
||||
|
@ -74,28 +77,53 @@ checkpointed.`,
|
|||
},
|
||||
}
|
||||
|
||||
func getCheckpointImagePath(context *cli.Context) string {
|
||||
func prepareImagePaths(context *cli.Context) (string, string, error) {
|
||||
imagePath := context.String("image-path")
|
||||
if imagePath == "" {
|
||||
imagePath = getDefaultImagePath(context)
|
||||
imagePath = getDefaultImagePath()
|
||||
}
|
||||
return imagePath
|
||||
|
||||
if err := os.MkdirAll(imagePath, 0o600); err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
parentPath := context.String("parent-path")
|
||||
if parentPath == "" {
|
||||
return imagePath, parentPath, nil
|
||||
}
|
||||
|
||||
if filepath.IsAbs(parentPath) {
|
||||
return "", "", errors.New("--parent-path must be relative")
|
||||
}
|
||||
|
||||
realParent := filepath.Join(imagePath, parentPath)
|
||||
fi, err := os.Stat(realParent)
|
||||
if err == nil && !fi.IsDir() {
|
||||
err = &os.PathError{Path: realParent, Err: unix.ENOTDIR}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("invalid --parent-path: %w", err)
|
||||
}
|
||||
|
||||
return imagePath, parentPath, nil
|
||||
}
|
||||
|
||||
func setPageServer(context *cli.Context, options *libcontainer.CriuOpts) {
|
||||
// xxx following criu opts are optional
|
||||
// The dump image can be sent to a criu page server
|
||||
if psOpt := context.String("page-server"); psOpt != "" {
|
||||
addressPort := strings.Split(psOpt, ":")
|
||||
if len(addressPort) != 2 {
|
||||
fatal(fmt.Errorf("Use --page-server ADDRESS:PORT to specify page server"))
|
||||
address, port, err := net.SplitHostPort(psOpt)
|
||||
|
||||
if err != nil || address == "" || port == "" {
|
||||
fatal(errors.New("Use --page-server ADDRESS:PORT to specify page server"))
|
||||
}
|
||||
portInt, err := strconv.Atoi(addressPort[1])
|
||||
portInt, err := strconv.Atoi(port)
|
||||
if err != nil {
|
||||
fatal(fmt.Errorf("Invalid port number"))
|
||||
fatal(errors.New("Invalid port number"))
|
||||
}
|
||||
options.PageServer = libcontainer.CriuPageServerInfo{
|
||||
Address: addressPort[0],
|
||||
Address: address,
|
||||
Port: int32(portInt),
|
||||
}
|
||||
}
|
||||
|
@ -105,13 +133,13 @@ func setManageCgroupsMode(context *cli.Context, options *libcontainer.CriuOpts)
|
|||
if cgOpt := context.String("manage-cgroups-mode"); cgOpt != "" {
|
||||
switch cgOpt {
|
||||
case "soft":
|
||||
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_SOFT
|
||||
options.ManageCgroupsMode = criu.CriuCgMode_SOFT
|
||||
case "full":
|
||||
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_FULL
|
||||
options.ManageCgroupsMode = criu.CriuCgMode_FULL
|
||||
case "strict":
|
||||
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_STRICT
|
||||
options.ManageCgroupsMode = criu.CriuCgMode_STRICT
|
||||
default:
|
||||
fatal(fmt.Errorf("Invalid manage cgroups mode"))
|
||||
fatal(errors.New("Invalid manage cgroups mode"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,12 +17,13 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/containerd/console"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
|
@ -65,7 +66,7 @@ func bail(err error) {
|
|||
os.Exit(1)
|
||||
}
|
||||
|
||||
func handleSingle(path string) error {
|
||||
func handleSingle(path string, noStdin bool) error {
|
||||
// Open a socket.
|
||||
ln, err := net.Listen("unix", path)
|
||||
if err != nil {
|
||||
|
@ -87,7 +88,7 @@ func handleSingle(path string) error {
|
|||
// Get the fd of the connection.
|
||||
unixconn, ok := conn.(*net.UnixConn)
|
||||
if !ok {
|
||||
return fmt.Errorf("failed to cast to unixconn")
|
||||
return errors.New("failed to cast to unixconn")
|
||||
}
|
||||
|
||||
socket, err := unixconn.File()
|
||||
|
@ -105,23 +106,37 @@ func handleSingle(path string) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
console.ClearONLCR(c.Fd())
|
||||
if err := console.ClearONLCR(c.Fd()); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Copy from our stdio to the master fd.
|
||||
quitChan := make(chan struct{})
|
||||
var (
|
||||
wg sync.WaitGroup
|
||||
inErr, outErr error
|
||||
)
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
io.Copy(os.Stdout, c)
|
||||
quitChan <- struct{}{}
|
||||
}()
|
||||
go func() {
|
||||
io.Copy(c, os.Stdin)
|
||||
quitChan <- struct{}{}
|
||||
_, outErr = io.Copy(os.Stdout, c)
|
||||
wg.Done()
|
||||
}()
|
||||
if !noStdin {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
_, inErr = io.Copy(c, os.Stdin)
|
||||
wg.Done()
|
||||
}()
|
||||
}
|
||||
|
||||
// Only close the master fd once we've stopped copying.
|
||||
<-quitChan
|
||||
wg.Wait()
|
||||
c.Close()
|
||||
return nil
|
||||
|
||||
if outErr != nil {
|
||||
return outErr
|
||||
}
|
||||
|
||||
return inErr
|
||||
}
|
||||
|
||||
func handleNull(path string) error {
|
||||
|
@ -161,15 +176,7 @@ func handleNull(path string) error {
|
|||
return
|
||||
}
|
||||
|
||||
// Just do a dumb copy to /dev/null.
|
||||
devnull, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
|
||||
if err != nil {
|
||||
// TODO: Handle this nicely.
|
||||
return
|
||||
}
|
||||
|
||||
io.Copy(devnull, master)
|
||||
devnull.Close()
|
||||
_, _ = io.Copy(io.Discard, master)
|
||||
}(conn)
|
||||
}
|
||||
}
|
||||
|
@ -185,7 +192,7 @@ func main() {
|
|||
v = append(v, version)
|
||||
}
|
||||
if gitCommit != "" {
|
||||
v = append(v, fmt.Sprintf("commit: %s", gitCommit))
|
||||
v = append(v, "commit: "+gitCommit)
|
||||
}
|
||||
app.Version = strings.Join(v, "\n")
|
||||
|
||||
|
@ -201,26 +208,31 @@ func main() {
|
|||
Value: "",
|
||||
Usage: "Path to write daemon process ID to",
|
||||
},
|
||||
cli.BoolFlag{
|
||||
Name: "no-stdin",
|
||||
Usage: "Disable stdin handling (no-op for null mode)",
|
||||
},
|
||||
}
|
||||
|
||||
app.Action = func(ctx *cli.Context) error {
|
||||
args := ctx.Args()
|
||||
if len(args) != 1 {
|
||||
return fmt.Errorf("need to specify a single socket path")
|
||||
return errors.New("need to specify a single socket path")
|
||||
}
|
||||
path := ctx.Args()[0]
|
||||
|
||||
pidPath := ctx.String("pid-file")
|
||||
if pidPath != "" {
|
||||
pid := fmt.Sprintf("%d\n", os.Getpid())
|
||||
if err := ioutil.WriteFile(pidPath, []byte(pid), 0644); err != nil {
|
||||
if err := os.WriteFile(pidPath, []byte(pid), 0o644); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
noStdin := ctx.Bool("no-stdin")
|
||||
switch ctx.String("mode") {
|
||||
case "single":
|
||||
if err := handleSingle(path); err != nil {
|
||||
if err := handleSingle(path, noStdin); err != nil {
|
||||
return err
|
||||
}
|
||||
case "null":
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func usage() {
|
||||
fmt.Print(`Open Container Initiative contrib/cmd/sd-helper
|
||||
|
||||
sd-helper is a tool that uses runc/libcontainer/cgroups/systemd package
|
||||
functionality to communicate to systemd in order to perform various operations.
|
||||
Currently this is limited to starting and stopping systemd transient slice
|
||||
units.
|
||||
|
||||
Usage:
|
||||
sd-helper [-debug] [-parent <pname>] {start|stop} <name>
|
||||
|
||||
Example:
|
||||
sd-helper -parent system.slice start system-pod123.slice
|
||||
`)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
var (
|
||||
debug = flag.Bool("debug", false, "enable debug output")
|
||||
parent = flag.String("parent", "", "parent unit name")
|
||||
)
|
||||
|
||||
func main() {
|
||||
if !systemd.IsRunningSystemd() {
|
||||
logrus.Fatal("systemd is required")
|
||||
}
|
||||
|
||||
// Set the flags.
|
||||
flag.Parse()
|
||||
if *debug {
|
||||
logrus.SetLevel(logrus.DebugLevel)
|
||||
}
|
||||
if flag.NArg() != 2 {
|
||||
usage()
|
||||
}
|
||||
|
||||
cmd := flag.Arg(0)
|
||||
unit := flag.Arg(1)
|
||||
|
||||
err := unitCommand(cmd, unit, *parent)
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func newManager(config *configs.Cgroup) (cgroups.Manager, error) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return systemd.NewUnifiedManager(config, "")
|
||||
}
|
||||
return systemd.NewLegacyManager(config, nil)
|
||||
}
|
||||
|
||||
func unitCommand(cmd, name, parent string) error {
|
||||
podConfig := &configs.Cgroup{
|
||||
Name: name,
|
||||
Parent: parent,
|
||||
Resources: &configs.Resources{},
|
||||
}
|
||||
pm, err := newManager(podConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
switch cmd {
|
||||
case "start":
|
||||
return pm.Apply(-1)
|
||||
case "stop":
|
||||
return pm.Destroy()
|
||||
}
|
||||
|
||||
return fmt.Errorf("unknown command: %s", cmd)
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
# Seccomp Agent
|
||||
|
||||
## Warning
|
||||
|
||||
Please note this is an example agent, as such it is possible that specially
|
||||
crafted messages can produce bad behaviour. Please use it as an example only.
|
||||
|
||||
Also, this agent is used for integration tests. Be aware that changing the
|
||||
behaviour can break the integration tests.
|
||||
|
||||
## Get started
|
||||
|
||||
Compile runc and seccompagent:
|
||||
```bash
|
||||
make all
|
||||
```
|
||||
|
||||
Run the seccomp agent in the background:
|
||||
```bash
|
||||
sudo ./contrib/cmd/seccompagent/seccompagent &
|
||||
```
|
||||
|
||||
Prepare a container:
|
||||
```bash
|
||||
mkdir container-seccomp-notify
|
||||
cd container-seccomp-notify
|
||||
mkdir rootfs
|
||||
docker export $(docker create busybox) | tar -C rootfs -xvf -
|
||||
```
|
||||
|
||||
Then, generate a config.json by running the script gen-seccomp-example-cfg.sh
|
||||
from the directory where this README.md is in the container directory you
|
||||
prepared earlier (`container-seccomp-notify`).
|
||||
|
||||
Then start the container:
|
||||
```bash
|
||||
runc run mycontainerid
|
||||
```
|
||||
|
||||
The container will output something like this:
|
||||
```bash
|
||||
+ cd /dev/shm
|
||||
+ mkdir test-dir
|
||||
+ touch test-file
|
||||
+ chmod 777 test-file
|
||||
chmod: changing permissions of 'test-file': No medium found
|
||||
+ stat /dev/shm/test-dir-foo
|
||||
File: /dev/shm/test-dir-foo
|
||||
Size: 40 Blocks: 0 IO Block: 4096 directory
|
||||
Device: 3eh/62d Inode: 2 Links: 2
|
||||
Access: (0755/drwxr-xr-x) Uid: ( 0/ root) Gid: ( 0/ root)
|
||||
Access: 2021-09-09 15:03:13.043716040 +0000
|
||||
Modify: 2021-09-09 15:03:13.043716040 +0000
|
||||
Change: 2021-09-09 15:03:13.043716040 +0000
|
||||
Birth: -
|
||||
+ ls -l /dev/shm
|
||||
total 0
|
||||
drwxr-xr-x 2 root root 40 Sep 9 15:03 test-dir-foo
|
||||
-rw-r--r-- 1 root root 0 Sep 9 15:03 test-file
|
||||
+ echo Note the agent added a suffix for the directory name and chmod fails
|
||||
Note the agent added a suffix for the directory name and chmod fails
|
||||
```
|
||||
|
||||
This shows a simple example that runs in /dev/shm just because it is a tmpfs in
|
||||
the example config.json.
|
||||
|
||||
The agent makes all chmod calls fail with ENOMEDIUM, as the example output shows.
|
||||
|
||||
For mkdir, the agent adds a "-foo" suffix: the container runs "mkdir test-dir"
|
||||
but the directory created is "test-dir-foo".
|
|
@ -0,0 +1,35 @@
|
|||
#!/usr/bin/env bash
|
||||
# Detect if we are running inside bats (i.e. inside integration tests) or just
|
||||
# called by an end-user
|
||||
# bats-core v1.2.1 defines BATS_RUN_TMPDIR
|
||||
if [ -z "$BATS_RUN_TMPDIR" ]; then
|
||||
# When not running in bats, we create the config.json
|
||||
set -e
|
||||
runc spec
|
||||
fi
|
||||
|
||||
# We can't source $(dirname $0)/../../../tests/integration/helpers.bash as that
|
||||
# exits when not running inside bats. We can do hacks, but just to redefine
|
||||
# update_config() seems clearer. We don't even really need to keep them in sync.
|
||||
function update_config() {
|
||||
jq "$1" "./config.json" | awk 'BEGIN{RS="";getline<"-";print>ARGV[1]}' "./config.json"
|
||||
}
|
||||
|
||||
update_config '.linux.seccomp = {
|
||||
"defaultAction": "SCMP_ACT_ALLOW",
|
||||
"listenerPath": "/run/seccomp-agent.socket",
|
||||
"listenerMetadata": "foo",
|
||||
"architectures": [ "SCMP_ARCH_X86", "SCMP_ARCH_X32", "SCMP_ARCH_X86_64" ],
|
||||
"syscalls": [
|
||||
{
|
||||
"names": [ "chmod", "fchmod", "fchmodat", "mkdir" ],
|
||||
"action": "SCMP_ACT_NOTIFY"
|
||||
}
|
||||
]
|
||||
}'
|
||||
|
||||
update_config '.process.args = [
|
||||
"sh",
|
||||
"-c",
|
||||
"set -x; cd /dev/shm; mkdir test-dir; touch test-file; chmod 777 test-file; stat /dev/shm/test-dir-foo && ls -l /dev/shm && echo \"Note the agent added a suffix for the directory name and chmod fails\" "
|
||||
]'
|
|
@ -0,0 +1,291 @@
|
|||
//go:build linux && seccomp
|
||||
// +build linux,seccomp
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
libseccomp "github.com/seccomp/libseccomp-golang"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var (
|
||||
socketFile string
|
||||
pidFile string
|
||||
)
|
||||
|
||||
func closeStateFds(recvFds []int) {
|
||||
for i := range recvFds {
|
||||
unix.Close(i)
|
||||
}
|
||||
}
|
||||
|
||||
// parseStateFds returns the seccomp-fd and closes the rest of the fds in recvFds.
|
||||
// In case of error, no fd is closed.
|
||||
// StateFds is assumed to be formatted as specs.ContainerProcessState.Fds and
|
||||
// recvFds the corresponding list of received fds in the same SCM_RIGHT message.
|
||||
func parseStateFds(stateFds []string, recvFds []int) (uintptr, error) {
|
||||
// Let's find the index in stateFds of the seccomp-fd.
|
||||
idx := -1
|
||||
err := false
|
||||
|
||||
for i, name := range stateFds {
|
||||
if name == specs.SeccompFdName && idx == -1 {
|
||||
idx = i
|
||||
continue
|
||||
}
|
||||
|
||||
// We found the seccompFdName twice. Error out!
|
||||
if name == specs.SeccompFdName && idx != -1 {
|
||||
err = true
|
||||
}
|
||||
}
|
||||
|
||||
if idx == -1 || err {
|
||||
return 0, errors.New("seccomp fd not found or malformed containerProcessState.Fds")
|
||||
}
|
||||
|
||||
if idx >= len(recvFds) || idx < 0 {
|
||||
return 0, errors.New("seccomp fd index out of range")
|
||||
}
|
||||
|
||||
fd := uintptr(recvFds[idx])
|
||||
|
||||
for i := range recvFds {
|
||||
if i == idx {
|
||||
continue
|
||||
}
|
||||
|
||||
unix.Close(recvFds[i])
|
||||
}
|
||||
|
||||
return fd, nil
|
||||
}
|
||||
|
||||
func handleNewMessage(sockfd int) (uintptr, string, error) {
|
||||
const maxNameLen = 4096
|
||||
stateBuf := make([]byte, maxNameLen)
|
||||
oobSpace := unix.CmsgSpace(4)
|
||||
oob := make([]byte, oobSpace)
|
||||
|
||||
n, oobn, _, _, err := unix.Recvmsg(sockfd, stateBuf, oob, 0)
|
||||
if err != nil {
|
||||
return 0, "", err
|
||||
}
|
||||
if n >= maxNameLen || oobn != oobSpace {
|
||||
return 0, "", fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
|
||||
}
|
||||
|
||||
// Truncate.
|
||||
stateBuf = stateBuf[:n]
|
||||
oob = oob[:oobn]
|
||||
|
||||
scms, err := unix.ParseSocketControlMessage(oob)
|
||||
if err != nil {
|
||||
return 0, "", err
|
||||
}
|
||||
if len(scms) != 1 {
|
||||
return 0, "", fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
|
||||
}
|
||||
scm := scms[0]
|
||||
|
||||
fds, err := unix.ParseUnixRights(&scm)
|
||||
if err != nil {
|
||||
return 0, "", err
|
||||
}
|
||||
|
||||
containerProcessState := &specs.ContainerProcessState{}
|
||||
err = json.Unmarshal(stateBuf, containerProcessState)
|
||||
if err != nil {
|
||||
closeStateFds(fds)
|
||||
return 0, "", fmt.Errorf("cannot parse OCI state: %w", err)
|
||||
}
|
||||
|
||||
fd, err := parseStateFds(containerProcessState.Fds, fds)
|
||||
if err != nil {
|
||||
closeStateFds(fds)
|
||||
return 0, "", err
|
||||
}
|
||||
|
||||
return fd, containerProcessState.Metadata, nil
|
||||
}
|
||||
|
||||
func readArgString(pid uint32, offset int64) (string, error) {
|
||||
buffer := make([]byte, 4096) // PATH_MAX
|
||||
|
||||
memfd, err := unix.Open(fmt.Sprintf("/proc/%d/mem", pid), unix.O_RDONLY, 0o777)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer unix.Close(memfd)
|
||||
|
||||
_, err = unix.Pread(memfd, buffer, offset)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
buffer[len(buffer)-1] = 0
|
||||
s := buffer[:bytes.IndexByte(buffer, 0)]
|
||||
return string(s), nil
|
||||
}
|
||||
|
||||
func runMkdirForContainer(pid uint32, fileName string, mode uint32, metadata string) error {
|
||||
// We validated before that metadata is not a string that can make
|
||||
// newFile a file in a different location other than root.
|
||||
newFile := fmt.Sprintf("%s-%s", fileName, metadata)
|
||||
root := fmt.Sprintf("/proc/%d/cwd/", pid)
|
||||
|
||||
if strings.HasPrefix(fileName, "/") {
|
||||
// If it starts with /, use the rootfs as base
|
||||
root = fmt.Sprintf("/proc/%d/root/", pid)
|
||||
}
|
||||
|
||||
path, err := securejoin.SecureJoin(root, newFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return unix.Mkdir(path, mode)
|
||||
}
|
||||
|
||||
// notifHandler handles seccomp notifications and responses
|
||||
func notifHandler(fd libseccomp.ScmpFd, metadata string) {
|
||||
defer unix.Close(int(fd))
|
||||
for {
|
||||
req, err := libseccomp.NotifReceive(fd)
|
||||
if err != nil {
|
||||
logrus.Errorf("Error in NotifReceive(): %s", err)
|
||||
continue
|
||||
}
|
||||
syscallName, err := req.Data.Syscall.GetName()
|
||||
if err != nil {
|
||||
logrus.Errorf("Error decoding syscall %v(): %s", req.Data.Syscall, err)
|
||||
continue
|
||||
}
|
||||
logrus.Debugf("Received syscall %q, pid %v, arch %q, args %+v", syscallName, req.Pid, req.Data.Arch, req.Data.Args)
|
||||
|
||||
resp := &libseccomp.ScmpNotifResp{
|
||||
ID: req.ID,
|
||||
Error: 0,
|
||||
Val: 0,
|
||||
Flags: libseccomp.NotifRespFlagContinue,
|
||||
}
|
||||
|
||||
// TOCTOU check
|
||||
if err := libseccomp.NotifIDValid(fd, req.ID); err != nil {
|
||||
logrus.Errorf("TOCTOU check failed: req.ID is no longer valid: %s", err)
|
||||
continue
|
||||
}
|
||||
|
||||
switch syscallName {
|
||||
case "mkdir":
|
||||
fileName, err := readArgString(req.Pid, int64(req.Data.Args[0]))
|
||||
if err != nil {
|
||||
logrus.Errorf("Cannot read argument: %s", err)
|
||||
resp.Error = int32(unix.ENOSYS)
|
||||
resp.Val = ^uint64(0) // -1
|
||||
goto sendResponse
|
||||
}
|
||||
|
||||
logrus.Debugf("mkdir: %q", fileName)
|
||||
|
||||
// TOCTOU check
|
||||
if err := libseccomp.NotifIDValid(fd, req.ID); err != nil {
|
||||
logrus.Errorf("TOCTOU check failed: req.ID is no longer valid: %s", err)
|
||||
continue
|
||||
}
|
||||
|
||||
err = runMkdirForContainer(req.Pid, fileName, uint32(req.Data.Args[1]), metadata)
|
||||
if err != nil {
|
||||
resp.Error = int32(unix.ENOSYS)
|
||||
resp.Val = ^uint64(0) // -1
|
||||
}
|
||||
resp.Flags = 0
|
||||
case "chmod", "fchmod", "fchmodat":
|
||||
resp.Error = int32(unix.ENOMEDIUM)
|
||||
resp.Val = ^uint64(0) // -1
|
||||
resp.Flags = 0
|
||||
}
|
||||
|
||||
sendResponse:
|
||||
if err = libseccomp.NotifRespond(fd, resp); err != nil {
|
||||
logrus.Errorf("Error in notification response: %s", err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.StringVar(&socketFile, "socketfile", "/run/seccomp-agent.socket", "Socket file")
|
||||
flag.StringVar(&pidFile, "pid-file", "", "Pid file")
|
||||
logrus.SetLevel(logrus.DebugLevel)
|
||||
|
||||
// Parse arguments
|
||||
flag.Parse()
|
||||
if flag.NArg() > 0 {
|
||||
flag.PrintDefaults()
|
||||
logrus.Fatal("Invalid command")
|
||||
}
|
||||
|
||||
if err := os.Remove(socketFile); err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
logrus.Fatalf("Cannot cleanup socket file: %v", err)
|
||||
}
|
||||
|
||||
if pidFile != "" {
|
||||
pid := fmt.Sprintf("%d", os.Getpid())
|
||||
if err := os.WriteFile(pidFile, []byte(pid), 0o644); err != nil {
|
||||
logrus.Fatalf("Cannot write pid file: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
logrus.Info("Waiting for seccomp file descriptors")
|
||||
l, err := net.Listen("unix", socketFile)
|
||||
if err != nil {
|
||||
logrus.Fatalf("Cannot listen: %s", err)
|
||||
}
|
||||
defer l.Close()
|
||||
|
||||
for {
|
||||
conn, err := l.Accept()
|
||||
if err != nil {
|
||||
logrus.Errorf("Cannot accept connection: %s", err)
|
||||
continue
|
||||
}
|
||||
socket, err := conn.(*net.UnixConn).File()
|
||||
conn.Close()
|
||||
if err != nil {
|
||||
logrus.Errorf("Cannot get socket: %v", err)
|
||||
continue
|
||||
}
|
||||
newFd, metadata, err := handleNewMessage(int(socket.Fd()))
|
||||
socket.Close()
|
||||
if err != nil {
|
||||
logrus.Errorf("Error receiving seccomp file descriptor: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Make sure we don't allow strings like "/../p", as that means
|
||||
// a file in a different location than expected. We just want
|
||||
// safe things to use as a suffix for a file name.
|
||||
metadata = filepath.Base(metadata)
|
||||
if strings.Contains(metadata, "/") {
|
||||
// Fallback to a safe string.
|
||||
metadata = "agent-generated-suffix"
|
||||
}
|
||||
|
||||
logrus.Infof("Received new seccomp fd: %v", newFd)
|
||||
go notifHandler(libseccomp.ScmpFd(newFd), metadata)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
//go:build !linux || !seccomp
|
||||
// +build !linux !seccomp
|
||||
|
||||
package main
|
||||
|
||||
import "fmt"
|
||||
|
||||
func main() {
|
||||
fmt.Println("Not supported, to use this compile with build tag: seccomp.")
|
||||
}
|
|
@ -113,6 +113,8 @@ __runc_complete_capabilities() {
|
|||
AUDIT_WRITE
|
||||
AUDIT_READ
|
||||
BLOCK_SUSPEND
|
||||
BPF
|
||||
CHECKPOINT_RESTORE
|
||||
CHOWN
|
||||
DAC_OVERRIDE
|
||||
DAC_READ_SEARCH
|
||||
|
@ -130,6 +132,7 @@ __runc_complete_capabilities() {
|
|||
NET_BIND_SERVICE
|
||||
NET_BROADCAST
|
||||
NET_RAW
|
||||
PERFMON
|
||||
SETFCAP
|
||||
SETGID
|
||||
SETPCAP
|
||||
|
@ -170,6 +173,7 @@ _runc_exec() {
|
|||
--apparmor
|
||||
--cap, -c
|
||||
--preserve-fds
|
||||
--ignore-paused
|
||||
"
|
||||
|
||||
local all_options="$options_with_args $boolean_options"
|
||||
|
@ -221,6 +225,7 @@ _runc_runc() {
|
|||
--help
|
||||
--version -v
|
||||
--debug
|
||||
--systemd-cgroup
|
||||
"
|
||||
local options_with_args="
|
||||
--log
|
||||
|
@ -733,8 +738,6 @@ _runc_update() {
|
|||
--cpu-share
|
||||
--cpuset-cpus
|
||||
--cpuset-mems
|
||||
--kernel-memory
|
||||
--kernel-memory-tcp
|
||||
--memory
|
||||
--memory-reservation
|
||||
--memory-swap
|
||||
|
@ -769,7 +772,6 @@ _runc() {
|
|||
delete
|
||||
events
|
||||
exec
|
||||
init
|
||||
kill
|
||||
list
|
||||
pause
|
||||
|
|
21
create.go
21
create.go
|
@ -1,6 +1,7 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/urfave/cli"
|
||||
|
@ -55,20 +56,12 @@ command(s) that get executed on start, edit the args parameter of the spec. See
|
|||
if err := checkArgs(context, 1, exactArgs); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := revisePidFile(context); err != nil {
|
||||
return err
|
||||
status, err := startContainer(context, CT_ACT_CREATE, nil)
|
||||
if err == nil {
|
||||
// exit with the container's exit status so any external supervisor
|
||||
// is notified of the exit with the correct exit status.
|
||||
os.Exit(status)
|
||||
}
|
||||
spec, err := setupSpec(context)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
status, err := startContainer(context, spec, CT_ACT_CREATE, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// exit with the container's exit status so any external supervisor is
|
||||
// notified of the exit with the correct exit status.
|
||||
os.Exit(status)
|
||||
return nil
|
||||
return fmt.Errorf("runc create failed: %w", err)
|
||||
},
|
||||
}
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
runc (1.1.0-ok1) yangtze; urgency=medium
|
||||
|
||||
* Merge new upstream version 1.1.0
|
||||
|
||||
-- Luoyaoming <luoyaoming@kylinos.cn> Fri, 30 Dec 2022 11:11:29 +0800
|
||||
|
||||
runc (1.0.0~rc10-ok2) yangtze; urgency=medium
|
||||
|
||||
* Update version.
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
From: Dmitry Smirnov <onlyjob@debian.org>
|
||||
Date: Thu, 28 Jul 2022 16:28:22 +0800
|
||||
Subject: fix FTBFS on i686
|
||||
|
||||
src/github.com/opencontainers/runc/libcontainer/user/user_test.go:448:36: constant 2147483648 overflows int
|
||||
Last-Update: 2018-06-16
|
||||
Forwarded: https://github.com/opencontainers/runc/pull/1821
|
||||
Bug-Upstream: https://github.com/opencontainers/runc/issues/941
|
||||
---
|
||||
libcontainer/user/user.go | 2 +-
|
||||
libcontainer/user/user_test.go | 2 +-
|
||||
2 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/libcontainer/user/user.go b/libcontainer/user/user.go
|
||||
index 7b912bb..38caded 100644
|
||||
--- a/libcontainer/user/user.go
|
||||
+++ b/libcontainer/user/user.go
|
||||
@@ -473,7 +473,7 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err
|
||||
return nil, fmt.Errorf("Unable to find group %s", ag)
|
||||
}
|
||||
// Ensure gid is inside gid range.
|
||||
- if gid < minId || gid > maxId {
|
||||
+ if gid < minId || gid >= maxId {
|
||||
return nil, ErrRange
|
||||
}
|
||||
gidMap[gid] = struct{}{}
|
||||
diff --git a/libcontainer/user/user_test.go b/libcontainer/user/user_test.go
|
||||
index 24ee559..a4aabdc 100644
|
||||
--- a/libcontainer/user/user_test.go
|
||||
+++ b/libcontainer/user/user_test.go
|
||||
@@ -445,7 +445,7 @@ this is just some garbage data
|
||||
if utils.GetIntSize() > 4 {
|
||||
tests = append(tests, foo{
|
||||
// groups with too large id
|
||||
- groups: []string{strconv.Itoa(1 << 31)},
|
||||
+ groups: []string{strconv.Itoa( 1<<31 -1 )},
|
||||
expected: nil,
|
||||
hasError: true,
|
||||
})
|
|
@ -0,0 +1,48 @@
|
|||
From: Dmitry Smirnov <onlyjob@debian.org>
|
||||
Date: Thu, 28 Jul 2022 16:28:22 +0800
|
||||
Subject: disabled unreliable tests due to random failures on [ppc64el,
|
||||
s390x].
|
||||
|
||||
Last-Update: 2018-09-27
|
||||
Forwarded: not-needed
|
||||
Bug-Upstream: https://github.com/opencontainers/runc/issues/1822
|
||||
---
|
||||
libcontainer/cgroups/fs/hugetlb_test.go | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/libcontainer/cgroups/fs/hugetlb_test.go b/libcontainer/cgroups/fs/hugetlb_test.go
|
||||
index 9ddacfe..9b60650 100644
|
||||
--- a/libcontainer/cgroups/fs/hugetlb_test.go
|
||||
+++ b/libcontainer/cgroups/fs/hugetlb_test.go
|
||||
@@ -89,6 +89,7 @@ func TestHugetlbStats(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestHugetlbStatsNoUsageFile(t *testing.T) {
|
||||
+t.Skip("Disabled unreliable test")
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
@@ -104,6 +105,7 @@ func TestHugetlbStatsNoUsageFile(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestHugetlbStatsNoMaxUsageFile(t *testing.T) {
|
||||
+t.Skip("Disabled unreliable test")
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
for _, pageSize := range HugePageSizes {
|
||||
@@ -121,6 +123,7 @@ func TestHugetlbStatsNoMaxUsageFile(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestHugetlbStatsBadUsageFile(t *testing.T) {
|
||||
+t.Skip("Disabled unreliable test")
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
for _, pageSize := range HugePageSizes {
|
||||
@@ -139,6 +142,7 @@ func TestHugetlbStatsBadUsageFile(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestHugetlbStatsBadMaxUsageFile(t *testing.T) {
|
||||
+t.Skip("Disabled unreliable test")
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
|
@ -0,0 +1,22 @@
|
|||
From: Dmitry Smirnov <onlyjob@debian.org>
|
||||
Date: Thu, 28 Jul 2022 16:28:22 +0800
|
||||
Subject: disable test (requires root)
|
||||
|
||||
Last-Update: 2018-06-15
|
||||
Forwarded: not-needed
|
||||
---
|
||||
libcontainer/factory_linux_test.go | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/libcontainer/factory_linux_test.go b/libcontainer/factory_linux_test.go
|
||||
index 8d0ca8a..1dc0180 100644
|
||||
--- a/libcontainer/factory_linux_test.go
|
||||
+++ b/libcontainer/factory_linux_test.go
|
||||
@@ -78,6 +78,7 @@ func TestFactoryNewIntelRdt(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestFactoryNewTmpfs(t *testing.T) {
|
||||
+t.Skip("DM - skipping privileged test")
|
||||
root, rerr := newTestRoot()
|
||||
if rerr != nil {
|
||||
t.Fatal(rerr)
|
12
delete.go
12
delete.go
|
@ -1,12 +1,10 @@
|
|||
// +build !solaris
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
|
@ -19,12 +17,12 @@ func killContainer(container libcontainer.Container) error {
|
|||
_ = container.Signal(unix.SIGKILL, false)
|
||||
for i := 0; i < 100; i++ {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
if err := container.Signal(syscall.Signal(0), false); err != nil {
|
||||
if err := container.Signal(unix.Signal(0), false); err != nil {
|
||||
destroy(container)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("container init still running")
|
||||
return errors.New("container init still running")
|
||||
}
|
||||
|
||||
var deleteCommand = cli.Command{
|
||||
|
@ -55,7 +53,7 @@ status of "ubuntu01" as "stopped" the following will delete resources held for
|
|||
force := context.Bool("force")
|
||||
container, err := getContainer(context)
|
||||
if err != nil {
|
||||
if lerr, ok := err.(libcontainer.Error); ok && lerr.Code() == libcontainer.ContainerNotExists {
|
||||
if errors.Is(err, libcontainer.ErrNotExist) {
|
||||
// if there was an aborted start or something of the sort then the container's directory could exist but
|
||||
// libcontainer does not see it because the state.json file inside that directory was never created.
|
||||
path := filepath.Join(context.GlobalString("root"), id)
|
||||
|
@ -81,7 +79,7 @@ status of "ubuntu01" as "stopped" the following will delete resources held for
|
|||
if force {
|
||||
return killContainer(container)
|
||||
}
|
||||
return fmt.Errorf("cannot delete container %s that is not stopped: %s\n", id, s)
|
||||
return fmt.Errorf("cannot delete container %s that is not stopped: %s", id, s)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
Binary file not shown.
|
@ -0,0 +1,62 @@
|
|||
# cgroup v2
|
||||
|
||||
runc fully supports cgroup v2 (unified mode) since v1.0.0-rc93.
|
||||
|
||||
To use cgroup v2, you might need to change the configuration of the host init system.
|
||||
Fedora (>= 31) uses cgroup v2 by default and no extra configuration is required.
|
||||
On other systemd-based distros, cgroup v2 can be enabled by adding `systemd.unified_cgroup_hierarchy=1` to the kernel cmdline.
|
||||
|
||||
## Am I using cgroup v2?
|
||||
|
||||
Yes if `/sys/fs/cgroup/cgroup.controllers` is present.
|
||||
|
||||
## Host Requirements
|
||||
### Kernel
|
||||
* Recommended version: 5.2 or later
|
||||
* Minimum version: 4.15
|
||||
|
||||
Kernel older than 5.2 is not recommended due to lack of freezer.
|
||||
|
||||
Notably, kernel older than 4.15 MUST NOT be used (unless you are running containers with user namespaces), as it lacks support for controlling permissions of devices.
|
||||
|
||||
### Systemd
|
||||
On cgroup v2 hosts, it is highly recommended to run runc with the systemd cgroup driver (`runc --systemd-cgroup`), though not mandatory.
|
||||
|
||||
The recommended systemd version is 244 or later. Older systemd does not support delegation of `cpuset` controller.
|
||||
|
||||
Make sure you also have the `dbus-user-session` (Debian/Ubuntu) or `dbus-daemon` (CentOS/Fedora) package installed, and that `dbus` is running. On Debian-flavored distros, this can be accomplished like so:
|
||||
|
||||
```console
|
||||
$ sudo apt install -y dbus-user-session
|
||||
$ systemctl --user start dbus
|
||||
```
|
||||
|
||||
## Rootless
|
||||
On cgroup v2 hosts, rootless runc can talk to systemd to get cgroup permissions to be delegated.
|
||||
|
||||
```console
|
||||
$ runc spec --rootless
|
||||
$ jq '.linux.cgroupsPath="user.slice:runc:foo"' config.json | sponge config.json
|
||||
$ runc --systemd-cgroup run foo
|
||||
```
|
||||
|
||||
The container processes are executed in a cgroup like `/user.slice/user-$(id -u).slice/user@$(id -u).service/user.slice/runc-foo.scope`.
|
||||
|
||||
### Configuring delegation
|
||||
Typically, only `memory` and `pids` controllers are delegated to non-root users by default.
|
||||
|
||||
```console
|
||||
$ cat /sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/cgroup.controllers
|
||||
memory pids
|
||||
```
|
||||
|
||||
To allow delegation of other controllers, you need to change the systemd configuration as follows:
|
||||
|
||||
```console
|
||||
# mkdir -p /etc/systemd/system/user@.service.d
|
||||
# cat > /etc/systemd/system/user@.service.d/delegate.conf << EOF
|
||||
[Service]
|
||||
Delegate=cpu cpuset io memory pids
|
||||
EOF
|
||||
# systemctl daemon-reload
|
||||
```
|
|
@ -0,0 +1,11 @@
|
|||
# Experimental features
|
||||
|
||||
The following features are experimental and subject to change:
|
||||
|
||||
- The `runc features` command (since runc v1.1.0)
|
||||
|
||||
The following features were experimental in the past:
|
||||
|
||||
Feature | Experimental release | Graduation release
|
||||
---------------------------------------- | -------------------- | ------------------
|
||||
cgroup v2 | v1.0.0-rc91 | v1.0.0-rc93
|
|
@ -0,0 +1,130 @@
|
|||
## systemd cgroup driver
|
||||
|
||||
By default, runc creates cgroups and sets cgroup limits on its own (this mode
|
||||
is known as fs cgroup driver). When `--systemd-cgroup` global option is given
|
||||
(as in e.g. `runc --systemd-cgroup run ...`), runc switches to systemd cgroup
|
||||
driver. This document describes its features and peculiarities.
|
||||
|
||||
### systemd unit name and placement
|
||||
|
||||
When creating a container, runc requests systemd (over dbus) to create
|
||||
a transient unit for the container, and place it into a specified slice.
|
||||
|
||||
The name of the unit and the containing slice is derived from the container
|
||||
runtime spec in the following way:
|
||||
|
||||
1. If `Linux.CgroupsPath` is set, it is expected to be in the form
|
||||
`[slice]:[prefix]:[name]`.
|
||||
|
||||
Here `slice` is a systemd slice under which the container is placed.
|
||||
If empty, it defaults to `system.slice`, except when cgroup v2 is
|
||||
used and rootless container is created, in which case it defaults
|
||||
to `user.slice`.
|
||||
|
||||
Note that `slice` can contain dashes to denote a sub-slice
|
||||
(e.g. `user-1000.slice` is a correct notation, meaning a subslice
|
||||
of `user.slice`), but it must not contain slashes (e.g.
|
||||
`user.slice/user-1000.slice` is invalid).
|
||||
|
||||
A `slice` of `-` represents a root slice.
|
||||
|
||||
Next, `prefix` and `name` are used to compose the unit name, which
|
||||
is `<prefix>-<name>.scope`, unless `name` has `.slice` suffix, in
|
||||
which case `prefix` is ignored and the `name` is used as is.
|
||||
|
||||
2. If `Linux.CgroupsPath` is not set or empty, it works the same way as if it
|
||||
would be set to `:runc:<container-id>`. See the description above to see
|
||||
what it transforms to.
|
||||
|
||||
As described above, a unit being created can either be a scope or a slice.
|
||||
For a scope, runc specifies its parent slice via a _Slice=_ systemd property,
|
||||
and also sets _Delegate=true_. For a slice, runc specifies a weak dependency on
|
||||
the parent slice via a _Wants=_ property.
|
||||
|
||||
### Resource limits
|
||||
|
||||
runc always enables accounting for all controllers, regardless of any limits
|
||||
being set. This means it unconditionally sets the following properties for the
|
||||
systemd unit being created:
|
||||
|
||||
* _CPUAccounting=true_
|
||||
* _IOAccounting=true_ (_BlockIOAccounting_ for cgroup v1)
|
||||
* _MemoryAccounting=true_
|
||||
* _TasksAccounting=true_
|
||||
|
||||
The resource limits of the systemd unit are set by runc by translating the
|
||||
runtime spec resources to systemd unit properties.
|
||||
|
||||
Such translation is by no means complete, as there are some cgroup properties
|
||||
that can not be set via systemd. Therefore, runc systemd cgroup driver is
|
||||
backed by fs driver (in other words, cgroup limits are first set via systemd
|
||||
unit properties, and when by writing to cgroupfs files).
|
||||
|
||||
The set of runtime spec resources which is translated by runc to systemd unit
|
||||
properties depends on kernel cgroup version being used (v1 or v2), and on the
|
||||
systemd version being run. If an older systemd version (which does not support
|
||||
some resources) is used, runc do not set those resources.
|
||||
|
||||
The following tables summarize which properties are translated.
|
||||
|
||||
#### cgroup v1
|
||||
|
||||
| runtime spec resource | systemd property name | min systemd version |
|
||||
|-----------------------|-----------------------|---------------------|
|
||||
| memory.limit | MemoryLimit | |
|
||||
| cpu.shares | CPUShares | |
|
||||
| blockIO.weight | BlockIOWeight | |
|
||||
| pids.limit | TasksMax | |
|
||||
| cpu.cpus | AllowedCPUs | v244 |
|
||||
| cpu.mems | AllowedMemoryNodes | v244 |
|
||||
|
||||
#### cgroup v2
|
||||
|
||||
| runtime spec resource | systemd property name | min systemd version |
|
||||
|-------------------------|-----------------------|---------------------|
|
||||
| memory.limit | MemoryMax | |
|
||||
| memory.reservation | MemoryLow | |
|
||||
| memory.swap | MemorySwapMax | |
|
||||
| cpu.shares | CPUWeight | |
|
||||
| pids.limit | TasksMax | |
|
||||
| cpu.cpus | AllowedCPUs | v244 |
|
||||
| cpu.mems | AllowedMemoryNodes | v244 |
|
||||
| unified.cpu.max | CPUQuota, CPUQuotaPeriodSec | v242 |
|
||||
| unified.cpu.weight | CPUWeight | |
|
||||
| unified.cpuset.cpus | AllowedCPUs | v244 |
|
||||
| unified.cpuset.mems | AllowedMemoryNodes | v244 |
|
||||
| unified.memory.high | MemoryHigh | |
|
||||
| unified.memory.low | MemoryLow | |
|
||||
| unified.memory.min | MemoryMin | |
|
||||
| unified.memory.max | MemoryMax | |
|
||||
| unified.memory.swap.max | MemorySwapMax | |
|
||||
| unified.pids.max | TasksMax | |
|
||||
|
||||
For documentation on systemd unit resource properties, see
|
||||
`systemd.resource-control(5)` man page.
|
||||
|
||||
### Auxiliary properties
|
||||
|
||||
Auxiliary properties of a systemd unit (as shown by `systemctl show
|
||||
<unit-name>` after the container is created) can be set (or overwritten) by
|
||||
adding annotations to the container runtime spec (`config.json`).
|
||||
|
||||
For example:
|
||||
|
||||
```json
|
||||
"annotations": {
|
||||
"org.systemd.property.TimeoutStopUSec": "uint64 123456789",
|
||||
"org.systemd.property.CollectMode":"'inactive-or-failed'"
|
||||
},
|
||||
```
|
||||
|
||||
The above will set the following properties:
|
||||
|
||||
* `TimeoutStopSec` to 2 minutes and 3 seconds;
|
||||
* `CollectMode` to "inactive-or-failed".
|
||||
|
||||
The values must be in the gvariant format (for details, see
|
||||
[gvariant documentation](https://developer.gnome.org/glib/stable/gvariant-text.html)).
|
||||
|
||||
To find out which type systemd expects for a particular parameter, please
|
||||
consult systemd sources.
|
|
@ -113,6 +113,33 @@ interact with pseudo-terminal `stdio`][tty_ioctl(4)].
|
|||
> means that it is not really possible to uniquely distinguish between `stdout`
|
||||
> and `stderr` from the caller's perspective.
|
||||
|
||||
#### Issues
|
||||
|
||||
If you see an error like
|
||||
|
||||
```
|
||||
open /dev/tty: no such device or address
|
||||
```
|
||||
|
||||
from runc, it means it can't open a terminal (because there isn't one). This
|
||||
can happen when stdin (and possibly also stdout and stderr) are redirected,
|
||||
or in some environments that lack a tty (such as GitHub Actions runners).
|
||||
|
||||
The solution to this is to *not* use a terminal for the container, i.e. have
|
||||
`terminal: false` in `config.json`. If the container really needs a terminal
|
||||
(some programs require one), you can provide one, using one of the following
|
||||
methods.
|
||||
|
||||
One way is to use `ssh` with the `-tt` flag. The second `t` forces a terminal
|
||||
allocation even if there's no local one -- and so it is required when stdin is
|
||||
not a terminal (some `ssh` implementations only look for a terminal on stdin).
|
||||
|
||||
Another way is to run runc under the `script` utility, like this
|
||||
|
||||
```console
|
||||
$ script -e -c 'runc run <container>'
|
||||
```
|
||||
|
||||
[tty_ioctl(4)]: https://linux.die.net/man/4/tty_ioctl
|
||||
|
||||
### <a name="pass-through"> Pass-Through ###
|
||||
|
@ -124,7 +151,7 @@ passing of file descriptors -- [details below](#runc-modes)). As an example
|
|||
(assuming that `terminal: false` is set in `config.json`):
|
||||
|
||||
```
|
||||
% echo input | runc run some_container > /tmp/log.out 2>& /tmp/log.err
|
||||
% echo input | runc run some_container > /tmp/log.out 2> /tmp/log.err
|
||||
```
|
||||
|
||||
Here the container's various `stdio` file descriptors will be substituted with
|
||||
|
@ -228,6 +255,19 @@ Unfortunately using detached mode is a bit more complicated and requires more
|
|||
care than the foreground mode -- mainly because it is now up to the caller to
|
||||
handle the `stdio` of the container.
|
||||
|
||||
Another complication is that the parent process is responsible for acting as
|
||||
the subreaper for the container. In short, you need to call
|
||||
`prctl(PR_SET_CHILD_SUBREAPER, 1, ...)` in the parent process and correctly
|
||||
handle the implications of being a subreaper. Failing to do so may result in
|
||||
zombie processes being accumulated on your host.
|
||||
|
||||
These tasks are usually performed by a dedicated (and minimal) monitor process
|
||||
per-container. For the sake of comparison, other runtimes such as LXC do not
|
||||
have an equivalent detached mode and instead integrate this monitor process
|
||||
into the container runtime itself -- this has several tradeoffs, and runc has
|
||||
opted to support delegating the monitoring responsibility to the parent process
|
||||
through this detached mode.
|
||||
|
||||
#### Detached Pass-Through ####
|
||||
|
||||
In detached mode, pass-through actually does what it says on the tin -- the
|
||||
|
|
41
events.go
41
events.go
|
@ -1,9 +1,8 @@
|
|||
// +build linux
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
|
@ -40,7 +39,7 @@ information is displayed once every 5 seconds.`,
|
|||
}
|
||||
duration := context.Duration("interval")
|
||||
if duration <= 0 {
|
||||
return fmt.Errorf("duration interval must be greater than 0")
|
||||
return errors.New("duration interval must be greater than 0")
|
||||
}
|
||||
status, err := container.Status()
|
||||
if err != nil {
|
||||
|
@ -125,10 +124,14 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
|
|||
s.CPU.Usage.User = cg.CpuStats.CpuUsage.UsageInUsermode
|
||||
s.CPU.Usage.Total = cg.CpuStats.CpuUsage.TotalUsage
|
||||
s.CPU.Usage.Percpu = cg.CpuStats.CpuUsage.PercpuUsage
|
||||
s.CPU.Usage.PercpuKernel = cg.CpuStats.CpuUsage.PercpuUsageInKernelmode
|
||||
s.CPU.Usage.PercpuUser = cg.CpuStats.CpuUsage.PercpuUsageInUsermode
|
||||
s.CPU.Throttling.Periods = cg.CpuStats.ThrottlingData.Periods
|
||||
s.CPU.Throttling.ThrottledPeriods = cg.CpuStats.ThrottlingData.ThrottledPeriods
|
||||
s.CPU.Throttling.ThrottledTime = cg.CpuStats.ThrottlingData.ThrottledTime
|
||||
|
||||
s.CPUSet = types.CPUSet(cg.CPUSetStats)
|
||||
|
||||
s.Memory.Cache = cg.MemoryStats.Cache
|
||||
s.Memory.Kernel = convertMemoryEntry(cg.MemoryStats.KernelUsage)
|
||||
s.Memory.KernelTCP = convertMemoryEntry(cg.MemoryStats.KernelTCPUsage)
|
||||
|
@ -151,16 +154,22 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
|
|||
}
|
||||
|
||||
if is := ls.IntelRdtStats; is != nil {
|
||||
if intelrdt.IsCatEnabled() {
|
||||
if intelrdt.IsCATEnabled() {
|
||||
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
|
||||
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
|
||||
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
|
||||
}
|
||||
if intelrdt.IsMbaEnabled() {
|
||||
if intelrdt.IsMBAEnabled() {
|
||||
s.IntelRdt.MemBwInfo = convertMemBwInfo(is.MemBwInfo)
|
||||
s.IntelRdt.MemBwSchemaRoot = is.MemBwSchemaRoot
|
||||
s.IntelRdt.MemBwSchema = is.MemBwSchema
|
||||
}
|
||||
if intelrdt.IsMBMEnabled() {
|
||||
s.IntelRdt.MBMStats = is.MBMStats
|
||||
}
|
||||
if intelrdt.IsCMTEnabled() {
|
||||
s.IntelRdt.CMTStats = is.CMTStats
|
||||
}
|
||||
}
|
||||
|
||||
s.NetworkInterfaces = ls.Interfaces
|
||||
|
@ -187,29 +196,17 @@ func convertMemoryEntry(c cgroups.MemoryData) types.MemoryEntry {
|
|||
func convertBlkioEntry(c []cgroups.BlkioStatEntry) []types.BlkioEntry {
|
||||
var out []types.BlkioEntry
|
||||
for _, e := range c {
|
||||
out = append(out, types.BlkioEntry{
|
||||
Major: e.Major,
|
||||
Minor: e.Minor,
|
||||
Op: e.Op,
|
||||
Value: e.Value,
|
||||
})
|
||||
out = append(out, types.BlkioEntry(e))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func convertL3CacheInfo(i *intelrdt.L3CacheInfo) *types.L3CacheInfo {
|
||||
return &types.L3CacheInfo{
|
||||
CbmMask: i.CbmMask,
|
||||
MinCbmBits: i.MinCbmBits,
|
||||
NumClosids: i.NumClosids,
|
||||
}
|
||||
ci := types.L3CacheInfo(*i)
|
||||
return &ci
|
||||
}
|
||||
|
||||
func convertMemBwInfo(i *intelrdt.MemBwInfo) *types.MemBwInfo {
|
||||
return &types.MemBwInfo{
|
||||
BandwidthGran: i.BandwidthGran,
|
||||
DelayLinear: i.DelayLinear,
|
||||
MinBandwidth: i.MinBandwidth,
|
||||
NumClosids: i.NumClosids,
|
||||
}
|
||||
mi := types.MemBwInfo(*i)
|
||||
return &mi
|
||||
}
|
||||
|
|
68
exec.go
68
exec.go
|
@ -1,9 +1,8 @@
|
|||
// +build linux
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
|
@ -84,15 +83,18 @@ following will output a list of processes running in the container:
|
|||
Value: &cli.StringSlice{},
|
||||
Usage: "add a capability to the bounding set for the process",
|
||||
},
|
||||
cli.BoolFlag{
|
||||
Name: "no-subreaper",
|
||||
Usage: "disable the use of the subreaper used to reap reparented processes",
|
||||
Hidden: true,
|
||||
},
|
||||
cli.IntFlag{
|
||||
Name: "preserve-fds",
|
||||
Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)",
|
||||
},
|
||||
cli.StringSliceFlag{
|
||||
Name: "cgroup",
|
||||
Usage: "run the process in an (existing) sub-cgroup(s). Format is [<controller>:]<cgroup>.",
|
||||
},
|
||||
cli.BoolFlag{
|
||||
Name: "ignore-paused",
|
||||
Usage: "allow exec in a paused container",
|
||||
},
|
||||
},
|
||||
Action: func(context *cli.Context) error {
|
||||
if err := checkArgs(context, 1, minArgs); err != nil {
|
||||
|
@ -105,11 +107,38 @@ following will output a list of processes running in the container:
|
|||
if err == nil {
|
||||
os.Exit(status)
|
||||
}
|
||||
return fmt.Errorf("exec failed: %v", err)
|
||||
fatalWithCode(fmt.Errorf("exec failed: %w", err), 255)
|
||||
return nil // to satisfy the linter
|
||||
},
|
||||
SkipArgReorder: true,
|
||||
}
|
||||
|
||||
func getSubCgroupPaths(args []string) (map[string]string, error) {
|
||||
if len(args) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
paths := make(map[string]string, len(args))
|
||||
for _, c := range args {
|
||||
// Split into controller:path.
|
||||
cs := strings.SplitN(c, ":", 3)
|
||||
if len(cs) > 2 {
|
||||
return nil, fmt.Errorf("invalid --cgroup argument: %s", c)
|
||||
}
|
||||
if len(cs) == 1 { // no controller: prefix
|
||||
if len(args) != 1 {
|
||||
return nil, fmt.Errorf("invalid --cgroup argument: %s (missing <controller>: prefix)", c)
|
||||
}
|
||||
paths[""] = c
|
||||
} else {
|
||||
// There may be a few comma-separated controllers.
|
||||
for _, ctrl := range strings.Split(cs[0], ",") {
|
||||
paths[ctrl] = cs[1]
|
||||
}
|
||||
}
|
||||
}
|
||||
return paths, nil
|
||||
}
|
||||
|
||||
func execProcess(context *cli.Context) (int, error) {
|
||||
container, err := getContainer(context)
|
||||
if err != nil {
|
||||
|
@ -120,13 +149,15 @@ func execProcess(context *cli.Context) (int, error) {
|
|||
return -1, err
|
||||
}
|
||||
if status == libcontainer.Stopped {
|
||||
return -1, fmt.Errorf("cannot exec a container that has stopped")
|
||||
return -1, errors.New("cannot exec in a stopped container")
|
||||
}
|
||||
if status == libcontainer.Paused && !context.Bool("ignore-paused") {
|
||||
return -1, errors.New("cannot exec in a paused container (use --ignore-paused to override)")
|
||||
}
|
||||
path := context.String("process")
|
||||
if path == "" && len(context.Args()) == 1 {
|
||||
return -1, fmt.Errorf("process args cannot be empty")
|
||||
return -1, errors.New("process args cannot be empty")
|
||||
}
|
||||
detach := context.Bool("detach")
|
||||
state, err := container.State()
|
||||
if err != nil {
|
||||
return -1, err
|
||||
|
@ -137,9 +168,9 @@ func execProcess(context *cli.Context) (int, error) {
|
|||
return -1, err
|
||||
}
|
||||
|
||||
logLevel := "info"
|
||||
if context.GlobalBool("debug") {
|
||||
logLevel = "debug"
|
||||
cgPaths, err := getSubCgroupPaths(context.StringSlice("cgroup"))
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
r := &runner{
|
||||
|
@ -147,12 +178,12 @@ func execProcess(context *cli.Context) (int, error) {
|
|||
shouldDestroy: false,
|
||||
container: container,
|
||||
consoleSocket: context.String("console-socket"),
|
||||
detach: detach,
|
||||
detach: context.Bool("detach"),
|
||||
pidFile: context.String("pid-file"),
|
||||
action: CT_ACT_RUN,
|
||||
init: false,
|
||||
preserveFDs: context.Int("preserve-fds"),
|
||||
logLevel: logLevel,
|
||||
subCgroupPaths: cgPaths,
|
||||
}
|
||||
return r.run(p)
|
||||
}
|
||||
|
@ -203,6 +234,7 @@ func getProcess(context *cli.Context, bundle string) (*specs.Process, error) {
|
|||
p.Env = append(p.Env, context.StringSlice("env")...)
|
||||
|
||||
// set the tty
|
||||
p.Terminal = false
|
||||
if context.IsSet("tty") {
|
||||
p.Terminal = context.Bool("tty")
|
||||
}
|
||||
|
@ -215,13 +247,13 @@ func getProcess(context *cli.Context, bundle string) (*specs.Process, error) {
|
|||
if len(u) > 1 {
|
||||
gid, err := strconv.Atoi(u[1])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing %s as int for gid failed: %v", u[1], err)
|
||||
return nil, fmt.Errorf("parsing %s as int for gid failed: %w", u[1], err)
|
||||
}
|
||||
p.User.GID = uint32(gid)
|
||||
}
|
||||
uid, err := strconv.Atoi(u[0])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing %s as int for uid failed: %v", u[0], err)
|
||||
return nil, fmt.Errorf("parsing %s as int for uid failed: %w", u[0], err)
|
||||
}
|
||||
p.User.UID = uint32(uid)
|
||||
}
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/capabilities"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||
"github.com/opencontainers/runc/libcontainer/specconv"
|
||||
"github.com/opencontainers/runc/types/features"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/urfave/cli"
|
||||
)
|
||||
|
||||
var featuresCommand = cli.Command{
|
||||
Name: "features",
|
||||
Usage: "show the enabled features",
|
||||
ArgsUsage: "",
|
||||
Description: `Show the enabled features.
|
||||
The result is parsable as a JSON.
|
||||
See https://pkg.go.dev/github.com/opencontainers/runc/types/features for the type definition.
|
||||
The types are experimental and subject to change.
|
||||
`,
|
||||
Action: func(context *cli.Context) error {
|
||||
if err := checkArgs(context, 0, exactArgs); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
tru := true
|
||||
|
||||
feat := features.Features{
|
||||
OCIVersionMin: "1.0.0",
|
||||
OCIVersionMax: specs.Version,
|
||||
Annotations: map[string]string{
|
||||
features.AnnotationRuncVersion: version,
|
||||
features.AnnotationRuncCommit: gitCommit,
|
||||
features.AnnotationRuncCheckpointEnabled: "true",
|
||||
},
|
||||
Hooks: configs.KnownHookNames(),
|
||||
MountOptions: specconv.KnownMountOptions(),
|
||||
Linux: &features.Linux{
|
||||
Namespaces: specconv.KnownNamespaces(),
|
||||
Capabilities: capabilities.KnownCapabilities(),
|
||||
Cgroup: &features.Cgroup{
|
||||
V1: &tru,
|
||||
V2: &tru,
|
||||
Systemd: &tru,
|
||||
SystemdUser: &tru,
|
||||
},
|
||||
Apparmor: &features.Apparmor{
|
||||
Enabled: &tru,
|
||||
},
|
||||
Selinux: &features.Selinux{
|
||||
Enabled: &tru,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
if seccomp.Enabled {
|
||||
feat.Linux.Seccomp = &features.Seccomp{
|
||||
Enabled: &tru,
|
||||
Actions: seccomp.KnownActions(),
|
||||
Operators: seccomp.KnownOperators(),
|
||||
Archs: seccomp.KnownArchs(),
|
||||
}
|
||||
major, minor, patch := seccomp.Version()
|
||||
feat.Annotations[features.AnnotationLibseccompVersion] = fmt.Sprintf("%d.%d.%d", major, minor, patch)
|
||||
}
|
||||
|
||||
enc := json.NewEncoder(context.App.Writer)
|
||||
enc.SetIndent("", " ")
|
||||
return enc.Encode(feat)
|
||||
},
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
module github.com/opencontainers/runc
|
||||
|
||||
go 1.16
|
||||
|
||||
require (
|
||||
github.com/checkpoint-restore/go-criu/v5 v5.3.0
|
||||
github.com/cilium/ebpf v0.7.0
|
||||
github.com/containerd/console v1.0.3
|
||||
github.com/coreos/go-systemd/v22 v22.3.2
|
||||
github.com/cyphar/filepath-securejoin v0.2.3
|
||||
github.com/docker/go-units v0.4.0
|
||||
github.com/godbus/dbus/v5 v5.0.6
|
||||
github.com/moby/sys/mountinfo v0.5.0
|
||||
github.com/mrunalp/fileutils v0.5.0
|
||||
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417
|
||||
github.com/opencontainers/selinux v1.10.0
|
||||
github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921
|
||||
github.com/sirupsen/logrus v1.8.1
|
||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635
|
||||
// NOTE: urfave/cli must be <= v1.22.1 due to a regression: https://github.com/urfave/cli/issues/1092
|
||||
github.com/urfave/cli v1.22.1
|
||||
github.com/vishvananda/netlink v1.1.0
|
||||
golang.org/x/net v0.0.0-20201224014010-6772e930b67b
|
||||
golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c
|
||||
google.golang.org/protobuf v1.27.1
|
||||
)
|
|
@ -0,0 +1,80 @@
|
|||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/checkpoint-restore/go-criu/v5 v5.3.0 h1:wpFFOoomK3389ue2lAb0Boag6XPht5QYpipxmSNL4d8=
|
||||
github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E=
|
||||
github.com/cilium/ebpf v0.7.0 h1:1k/q3ATgxSXRdrmPfH8d7YK0GfqVsEKZAX9dQZvs56k=
|
||||
github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA=
|
||||
github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
|
||||
github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
|
||||
github.com/coreos/go-systemd/v22 v22.3.2 h1:D9/bQk5vlXQFZ6Kwuu6zaiXJ9oTPe68++AzAJc1DzSI=
|
||||
github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d h1:U+s90UTSYgptZMwQh2aRr3LuazLJIa+Pg3Kc1ylSYVY=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
|
||||
github.com/cyphar/filepath-securejoin v0.2.3 h1:YX6ebbZCZP7VkM3scTTokDgBL2TY741X51MTk3ycuNI=
|
||||
github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
|
||||
github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
|
||||
github.com/frankban/quicktest v1.11.3 h1:8sXhOn0uLys67V8EsXLc6eszDs8VXWxL3iRvebPhedY=
|
||||
github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
|
||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/godbus/dbus/v5 v5.0.6 h1:mkgN1ofwASrYnJ5W6U/BxG15eXXXjirgZc7CLqkcaro=
|
||||
github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
|
||||
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
|
||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
|
||||
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/moby/sys/mountinfo v0.5.0 h1:2Ks8/r6lopsxWi9m58nlwjaeSzUX9iiL1vj5qB/9ObI=
|
||||
github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU=
|
||||
github.com/mrunalp/fileutils v0.5.0 h1:NKzVxiH7eSk+OQ4M+ZYW1K6h27RUV3MI6NUTsHhU6Z4=
|
||||
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
|
||||
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 h1:3snG66yBm59tKhhSPQrQ/0bCrv1LQbKt40LnUPiUxdc=
|
||||
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||
github.com/opencontainers/selinux v1.10.0 h1:rAiKF8hTcgLI3w0DHm6i0ylVVcOrlgR1kK99DRLDhyU=
|
||||
github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
|
||||
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921 h1:58EBmR2dMNL2n/FnbQewK3D14nXr0V9CObDSvMJLq+Y=
|
||||
github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
|
||||
github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
|
||||
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
|
||||
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
|
||||
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
|
||||
github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
|
||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
|
||||
github.com/urfave/cli v1.22.1 h1:+mkCCcOFKPnCmVYVcURKps1Xe+3zP90gSYGNfRkjoIY=
|
||||
github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
|
||||
github.com/vishvananda/netlink v1.1.0 h1:1iyaYNBLmP6L0220aDnYQpo1QEV4t4hJ+xEEhhJH8j0=
|
||||
github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
|
||||
github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df h1:OviZH7qLw/7ZovXvuNyL3XQl8UFofeikI1NW1Gypu7k=
|
||||
github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
|
||||
golang.org/x/net v0.0.0-20201224014010-6772e930b67b h1:iFwSg7t5GZmB/Q5TjiEAsdoLDrdJRC1RiF2WhuV29Qw=
|
||||
golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c h1:DHcbWVXeY+0Y8HHKR+rbLwnoh2F4tNCY7rTiHJ30RmA=
|
||||
golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
|
||||
google.golang.org/protobuf v1.27.1 h1:SnqbnDw1V7RiZcXPx5MEeqPv2s79L9i7BJUlG/+RurQ=
|
||||
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
31
init.go
31
init.go
|
@ -1,44 +1,37 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
"github.com/opencontainers/runc/libcontainer/logs"
|
||||
_ "github.com/opencontainers/runc/libcontainer/nsenter"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli"
|
||||
)
|
||||
|
||||
func init() {
|
||||
if len(os.Args) > 1 && os.Args[1] == "init" {
|
||||
// This is the golang entry point for runc init, executed
|
||||
// before main() but after libcontainer/nsenter's nsexec().
|
||||
runtime.GOMAXPROCS(1)
|
||||
runtime.LockOSThread()
|
||||
|
||||
level := os.Getenv("_LIBCONTAINER_LOGLEVEL")
|
||||
logLevel, err := logrus.ParseLevel(level)
|
||||
level, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_LOGLEVEL"))
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("libcontainer: failed to parse log level: %q: %v", level, err))
|
||||
panic(err)
|
||||
}
|
||||
|
||||
err = logs.ConfigureLogging(logs.Config{
|
||||
LogPipeFd: os.Getenv("_LIBCONTAINER_LOGPIPE"),
|
||||
LogFormat: "json",
|
||||
LogLevel: logLevel,
|
||||
})
|
||||
logPipeFd, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_LOGPIPE"))
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("libcontainer: failed to configure logging: %v", err))
|
||||
panic(err)
|
||||
}
|
||||
|
||||
logrus.SetLevel(logrus.Level(level))
|
||||
logrus.SetOutput(os.NewFile(uintptr(logPipeFd), "logpipe"))
|
||||
logrus.SetFormatter(new(logrus.JSONFormatter))
|
||||
logrus.Debug("child process in init()")
|
||||
}
|
||||
}
|
||||
|
||||
var initCommand = cli.Command{
|
||||
Name: "init",
|
||||
Usage: `initialize the namespaces and launch the process (do not call it outside of runc)`,
|
||||
Action: func(context *cli.Context) error {
|
||||
factory, _ := libcontainer.New("")
|
||||
if err := factory.StartInitialization(); err != nil {
|
||||
// as the error is sent back to the parent there is no need to log
|
||||
|
@ -46,5 +39,5 @@ var initCommand = cli.Command{
|
|||
os.Exit(1)
|
||||
}
|
||||
panic("libcontainer: container init failed to exec")
|
||||
},
|
||||
}
|
||||
}
|
||||
|
|
16
kill.go
16
kill.go
|
@ -1,14 +1,12 @@
|
|||
// +build linux
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/urfave/cli"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var killCommand = cli.Command{
|
||||
|
@ -55,13 +53,17 @@ signal to the init process of the "ubuntu01" container:
|
|||
},
|
||||
}
|
||||
|
||||
func parseSignal(rawSignal string) (syscall.Signal, error) {
|
||||
func parseSignal(rawSignal string) (unix.Signal, error) {
|
||||
s, err := strconv.Atoi(rawSignal)
|
||||
if err == nil {
|
||||
return syscall.Signal(s), nil
|
||||
return unix.Signal(s), nil
|
||||
}
|
||||
signal, ok := signalMap[strings.TrimPrefix(strings.ToUpper(rawSignal), "SIG")]
|
||||
if !ok {
|
||||
sig := strings.ToUpper(rawSignal)
|
||||
if !strings.HasPrefix(sig, "SIG") {
|
||||
sig = "SIG" + sig
|
||||
}
|
||||
signal := unix.SignalNum(sig)
|
||||
if signal == 0 {
|
||||
return -1, fmt.Errorf("unknown signal %q", rawSignal)
|
||||
}
|
||||
return signal, nil
|
||||
|
|
|
@ -57,90 +57,94 @@ struct describing how the container is to be created. A sample would look simila
|
|||
|
||||
```go
|
||||
defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
|
||||
var devices []*configs.DeviceRule
|
||||
for _, device := range specconv.AllowedDevices {
|
||||
devices = append(devices, &device.Rule)
|
||||
}
|
||||
config := &configs.Config{
|
||||
Rootfs: "/your/path/to/rootfs",
|
||||
Capabilities: &configs.Capabilities{
|
||||
Bounding: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_NET_RAW",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_SYS_CHROOT",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
Effective: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_NET_RAW",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_SYS_CHROOT",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
Inheritable: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_NET_RAW",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_SYS_CHROOT",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
Permitted: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_NET_RAW",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_SYS_CHROOT",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
Ambient: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_NET_RAW",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_SYS_CHROOT",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
},
|
||||
Bounding: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_NET_RAW",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_SYS_CHROOT",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
Effective: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_NET_RAW",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_SYS_CHROOT",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
Inheritable: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_NET_RAW",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_SYS_CHROOT",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
Permitted: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_NET_RAW",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_SYS_CHROOT",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
Ambient: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_NET_RAW",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_SYS_CHROOT",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
},
|
||||
Namespaces: configs.Namespaces([]configs.Namespace{
|
||||
{Type: configs.NEWNS},
|
||||
{Type: configs.NEWUTS},
|
||||
|
@ -155,8 +159,7 @@ config := &configs.Config{
|
|||
Parent: "system",
|
||||
Resources: &configs.Resources{
|
||||
MemorySwappiness: nil,
|
||||
AllowAllDevices: nil,
|
||||
AllowedDevices: configs.DefaultAllowedDevices,
|
||||
Devices: devices,
|
||||
},
|
||||
},
|
||||
MaskPaths: []string{
|
||||
|
@ -166,7 +169,7 @@ config := &configs.Config{
|
|||
ReadonlyPaths: []string{
|
||||
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
|
||||
},
|
||||
Devices: configs.DefaultAutoCreatedDevices,
|
||||
Devices: specconv.AllowedDevices,
|
||||
Hostname: "testing",
|
||||
Mounts: []*configs.Mount{
|
||||
{
|
||||
|
@ -314,7 +317,7 @@ state, err := container.State()
|
|||
#### Checkpoint & Restore
|
||||
|
||||
libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers.
|
||||
This let's you save the state of a process running inside a container to disk, and then restore
|
||||
This lets you save the state of a process running inside a container to disk, and then restore
|
||||
that state into a new process, on the same machine or on another machine.
|
||||
|
||||
`criu` version 1.5.2 or higher is required to use checkpoint and restore.
|
||||
|
|
|
@ -1,60 +1,16 @@
|
|||
// +build apparmor,linux
|
||||
|
||||
package apparmor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
import "errors"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
var (
|
||||
// IsEnabled returns true if apparmor is enabled for the host.
|
||||
IsEnabled = isEnabled
|
||||
|
||||
// ApplyProfile will apply the profile with the specified name to the process after
|
||||
// the next exec. It is only supported on Linux and produces an ErrApparmorNotEnabled
|
||||
// on other platforms.
|
||||
ApplyProfile = applyProfile
|
||||
|
||||
// ErrApparmorNotEnabled indicates that AppArmor is not enabled or not supported.
|
||||
ErrApparmorNotEnabled = errors.New("apparmor: config provided but apparmor not supported")
|
||||
)
|
||||
|
||||
// IsEnabled returns true if apparmor is enabled for the host.
|
||||
func IsEnabled() bool {
|
||||
if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil && os.Getenv("container") == "" {
|
||||
if _, err = os.Stat("/sbin/apparmor_parser"); err == nil {
|
||||
buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled")
|
||||
return err == nil && len(buf) > 1 && buf[0] == 'Y'
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func setProcAttr(attr, value string) error {
|
||||
// Under AppArmor you can only change your own attr, so use /proc/self/
|
||||
// instead of /proc/<tid>/ like libapparmor does
|
||||
path := fmt.Sprintf("/proc/self/attr/%s", attr)
|
||||
|
||||
f, err := os.OpenFile(path, os.O_WRONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if err := utils.EnsureProcHandle(f); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = fmt.Fprintf(f, "%s", value)
|
||||
return err
|
||||
}
|
||||
|
||||
// changeOnExec reimplements aa_change_onexec from libapparmor in Go
|
||||
func changeOnExec(name string) error {
|
||||
value := "exec " + name
|
||||
if err := setProcAttr("exec", value); err != nil {
|
||||
return fmt.Errorf("apparmor failed to apply profile: %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ApplyProfile will apply the profile with the specified name to the process after
|
||||
// the next exec.
|
||||
func ApplyProfile(name string) error {
|
||||
if name == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
return changeOnExec(name)
|
||||
}
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
// +build !apparmor !linux
|
||||
|
||||
package apparmor
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
var ErrApparmorNotEnabled = errors.New("apparmor: config provided but apparmor not supported")
|
||||
|
||||
func IsEnabled() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func ApplyProfile(name string) error {
|
||||
if name != "" {
|
||||
return ErrApparmorNotEnabled
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
package apparmor
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
var (
|
||||
appArmorEnabled bool
|
||||
checkAppArmor sync.Once
|
||||
)
|
||||
|
||||
// isEnabled returns true if apparmor is enabled for the host.
|
||||
func isEnabled() bool {
|
||||
checkAppArmor.Do(func() {
|
||||
if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil {
|
||||
buf, err := os.ReadFile("/sys/module/apparmor/parameters/enabled")
|
||||
appArmorEnabled = err == nil && len(buf) > 1 && buf[0] == 'Y'
|
||||
}
|
||||
})
|
||||
return appArmorEnabled
|
||||
}
|
||||
|
||||
func setProcAttr(attr, value string) error {
|
||||
// Under AppArmor you can only change your own attr, so use /proc/self/
|
||||
// instead of /proc/<tid>/ like libapparmor does
|
||||
attrPath := "/proc/self/attr/apparmor/" + attr
|
||||
if _, err := os.Stat(attrPath); errors.Is(err, os.ErrNotExist) {
|
||||
// fall back to the old convention
|
||||
attrPath = "/proc/self/attr/" + attr
|
||||
}
|
||||
|
||||
f, err := os.OpenFile(attrPath, os.O_WRONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if err := utils.EnsureProcHandle(f); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = f.WriteString(value)
|
||||
return err
|
||||
}
|
||||
|
||||
// changeOnExec reimplements aa_change_onexec from libapparmor in Go
|
||||
func changeOnExec(name string) error {
|
||||
if err := setProcAttr("exec", "exec "+name); err != nil {
|
||||
return fmt.Errorf("apparmor failed to apply profile: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// applyProfile will apply the profile with the specified name to the process after
|
||||
// the next exec. It is only supported on Linux and produces an error on other
|
||||
// platforms.
|
||||
func applyProfile(name string) error {
|
||||
if name == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
return changeOnExec(name)
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
//go:build !linux
|
||||
// +build !linux
|
||||
|
||||
package apparmor
|
||||
|
||||
func isEnabled() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func applyProfile(name string) error {
|
||||
if name != "" {
|
||||
return ErrApparmorNotEnabled
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,123 @@
|
|||
//go:build linux
|
||||
// +build linux
|
||||
|
||||
package capabilities
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/syndtr/gocapability/capability"
|
||||
)
|
||||
|
||||
const allCapabilityTypes = capability.CAPS | capability.BOUNDING | capability.AMBIENT
|
||||
|
||||
var (
|
||||
capabilityMap map[string]capability.Cap
|
||||
capTypes = []capability.CapType{
|
||||
capability.BOUNDING,
|
||||
capability.PERMITTED,
|
||||
capability.INHERITABLE,
|
||||
capability.EFFECTIVE,
|
||||
capability.AMBIENT,
|
||||
}
|
||||
)
|
||||
|
||||
func init() {
|
||||
capabilityMap = make(map[string]capability.Cap, capability.CAP_LAST_CAP+1)
|
||||
for _, c := range capability.List() {
|
||||
if c > capability.CAP_LAST_CAP {
|
||||
continue
|
||||
}
|
||||
capabilityMap["CAP_"+strings.ToUpper(c.String())] = c
|
||||
}
|
||||
}
|
||||
|
||||
// KnownCapabilities returns the list of the known capabilities.
|
||||
// Used by `runc features`.
|
||||
func KnownCapabilities() []string {
|
||||
list := capability.List()
|
||||
res := make([]string, len(list))
|
||||
for i, c := range list {
|
||||
res[i] = "CAP_" + strings.ToUpper(c.String())
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
// New creates a new Caps from the given Capabilities config. Unknown Capabilities
|
||||
// or Capabilities that are unavailable in the current environment are ignored,
|
||||
// printing a warning instead.
|
||||
func New(capConfig *configs.Capabilities) (*Caps, error) {
|
||||
var (
|
||||
err error
|
||||
c Caps
|
||||
)
|
||||
|
||||
unknownCaps := make(map[string]struct{})
|
||||
c.caps = map[capability.CapType][]capability.Cap{
|
||||
capability.BOUNDING: capSlice(capConfig.Bounding, unknownCaps),
|
||||
capability.EFFECTIVE: capSlice(capConfig.Effective, unknownCaps),
|
||||
capability.INHERITABLE: capSlice(capConfig.Inheritable, unknownCaps),
|
||||
capability.PERMITTED: capSlice(capConfig.Permitted, unknownCaps),
|
||||
capability.AMBIENT: capSlice(capConfig.Ambient, unknownCaps),
|
||||
}
|
||||
if c.pid, err = capability.NewPid2(0); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err = c.pid.Load(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(unknownCaps) > 0 {
|
||||
logrus.Warn("ignoring unknown or unavailable capabilities: ", mapKeys(unknownCaps))
|
||||
}
|
||||
return &c, nil
|
||||
}
|
||||
|
||||
// capSlice converts the slice of capability names in caps, to their numeric
|
||||
// equivalent, and returns them as a slice. Unknown or unavailable capabilities
|
||||
// are not returned, but appended to unknownCaps.
|
||||
func capSlice(caps []string, unknownCaps map[string]struct{}) []capability.Cap {
|
||||
var out []capability.Cap
|
||||
for _, c := range caps {
|
||||
if v, ok := capabilityMap[c]; !ok {
|
||||
unknownCaps[c] = struct{}{}
|
||||
} else {
|
||||
out = append(out, v)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// mapKeys returns the keys of input in sorted order
|
||||
func mapKeys(input map[string]struct{}) []string {
|
||||
var keys []string
|
||||
for c := range input {
|
||||
keys = append(keys, c)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
return keys
|
||||
}
|
||||
|
||||
// Caps holds the capabilities for a container.
|
||||
type Caps struct {
|
||||
pid capability.Capabilities
|
||||
caps map[capability.CapType][]capability.Cap
|
||||
}
|
||||
|
||||
// ApplyBoundingSet sets the capability bounding set to those specified in the whitelist.
|
||||
func (c *Caps) ApplyBoundingSet() error {
|
||||
c.pid.Clear(capability.BOUNDING)
|
||||
c.pid.Set(capability.BOUNDING, c.caps[capability.BOUNDING]...)
|
||||
return c.pid.Apply(capability.BOUNDING)
|
||||
}
|
||||
|
||||
// Apply sets all the capabilities for the current process in the config.
|
||||
func (c *Caps) ApplyCaps() error {
|
||||
c.pid.Clear(allCapabilityTypes)
|
||||
for _, g := range capTypes {
|
||||
c.pid.Set(g, c.caps[g]...)
|
||||
}
|
||||
return c.pid.Apply(allCapabilityTypes)
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
package capabilities
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/syndtr/gocapability/capability"
|
||||
)
|
||||
|
||||
func TestNew(t *testing.T) {
|
||||
cs := []string{"CAP_CHOWN", "CAP_UNKNOWN", "CAP_UNKNOWN2"}
|
||||
conf := configs.Capabilities{
|
||||
Bounding: cs,
|
||||
Effective: cs,
|
||||
Inheritable: cs,
|
||||
Permitted: cs,
|
||||
Ambient: cs,
|
||||
}
|
||||
|
||||
hook := test.NewGlobal()
|
||||
defer hook.Reset()
|
||||
|
||||
logrus.SetOutput(io.Discard)
|
||||
caps, err := New(&conf)
|
||||
logrus.SetOutput(os.Stderr)
|
||||
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
e := hook.AllEntries()
|
||||
if len(e) != 1 {
|
||||
t.Errorf("expected 1 warning, got %d", len(e))
|
||||
}
|
||||
|
||||
expectedLogs := logrus.Entry{
|
||||
Level: logrus.WarnLevel,
|
||||
Message: "ignoring unknown or unavailable capabilities: [CAP_UNKNOWN CAP_UNKNOWN2]",
|
||||
}
|
||||
|
||||
l := hook.LastEntry()
|
||||
if l == nil {
|
||||
t.Fatal("expected a warning, but got none")
|
||||
}
|
||||
if l.Level != expectedLogs.Level {
|
||||
t.Errorf("expected %q, got %q", expectedLogs.Level, l.Level)
|
||||
}
|
||||
if l.Message != expectedLogs.Message {
|
||||
t.Errorf("expected %q, got %q", expectedLogs.Message, l.Message)
|
||||
}
|
||||
|
||||
if len(caps.caps) != len(capTypes) {
|
||||
t.Errorf("expected %d capability types, got %d: %v", len(capTypes), len(caps.caps), caps.caps)
|
||||
}
|
||||
|
||||
for _, cType := range capTypes {
|
||||
if i := len(caps.caps[cType]); i != 1 {
|
||||
t.Errorf("expected 1 capability for %s, got %d: %v", cType, i, caps.caps[cType])
|
||||
continue
|
||||
}
|
||||
if caps.caps[cType][0] != capability.CAP_CHOWN {
|
||||
t.Errorf("expected CAP_CHOWN, got %s: ", caps.caps[cType][0])
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
hook.Reset()
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
//go:build !linux
|
||||
// +build !linux
|
||||
|
||||
package capabilities
|
|
@ -1,117 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/syndtr/gocapability/capability"
|
||||
)
|
||||
|
||||
const allCapabilityTypes = capability.CAPS | capability.BOUNDS | capability.AMBS
|
||||
|
||||
var capabilityMap map[string]capability.Cap
|
||||
|
||||
func init() {
|
||||
capabilityMap = make(map[string]capability.Cap)
|
||||
last := capability.CAP_LAST_CAP
|
||||
// workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap
|
||||
if last == capability.Cap(63) {
|
||||
last = capability.CAP_BLOCK_SUSPEND
|
||||
}
|
||||
for _, cap := range capability.List() {
|
||||
if cap > last {
|
||||
continue
|
||||
}
|
||||
capKey := fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))
|
||||
capabilityMap[capKey] = cap
|
||||
}
|
||||
}
|
||||
|
||||
func newContainerCapList(capConfig *configs.Capabilities) (*containerCapabilities, error) {
|
||||
bounding := []capability.Cap{}
|
||||
for _, c := range capConfig.Bounding {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
bounding = append(bounding, v)
|
||||
}
|
||||
effective := []capability.Cap{}
|
||||
for _, c := range capConfig.Effective {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
effective = append(effective, v)
|
||||
}
|
||||
inheritable := []capability.Cap{}
|
||||
for _, c := range capConfig.Inheritable {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
inheritable = append(inheritable, v)
|
||||
}
|
||||
permitted := []capability.Cap{}
|
||||
for _, c := range capConfig.Permitted {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
permitted = append(permitted, v)
|
||||
}
|
||||
ambient := []capability.Cap{}
|
||||
for _, c := range capConfig.Ambient {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
ambient = append(ambient, v)
|
||||
}
|
||||
pid, err := capability.NewPid2(0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = pid.Load()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &containerCapabilities{
|
||||
bounding: bounding,
|
||||
effective: effective,
|
||||
inheritable: inheritable,
|
||||
permitted: permitted,
|
||||
ambient: ambient,
|
||||
pid: pid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type containerCapabilities struct {
|
||||
pid capability.Capabilities
|
||||
bounding []capability.Cap
|
||||
effective []capability.Cap
|
||||
inheritable []capability.Cap
|
||||
permitted []capability.Cap
|
||||
ambient []capability.Cap
|
||||
}
|
||||
|
||||
// ApplyBoundingSet sets the capability bounding set to those specified in the whitelist.
|
||||
func (c *containerCapabilities) ApplyBoundingSet() error {
|
||||
c.pid.Clear(capability.BOUNDS)
|
||||
c.pid.Set(capability.BOUNDS, c.bounding...)
|
||||
return c.pid.Apply(capability.BOUNDS)
|
||||
}
|
||||
|
||||
// Apply sets all the capabilities for the current process in the config.
|
||||
func (c *containerCapabilities) ApplyCaps() error {
|
||||
c.pid.Clear(allCapabilityTypes)
|
||||
c.pid.Set(capability.BOUNDS, c.bounding...)
|
||||
c.pid.Set(capability.PERMITTED, c.permitted...)
|
||||
c.pid.Set(capability.INHERITABLE, c.inheritable...)
|
||||
c.pid.Set(capability.EFFECTIVE, c.effective...)
|
||||
c.pid.Set(capability.AMBIENT, c.ambient...)
|
||||
return c.pid.Apply(allCapabilityTypes)
|
||||
}
|
|
@ -1,74 +1,59 @@
|
|||
// +build linux
|
||||
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type Manager interface {
|
||||
// Applies cgroup configuration to the process with the specified pid
|
||||
// Apply creates a cgroup, if not yet created, and adds a process
|
||||
// with the specified pid into that cgroup. A special value of -1
|
||||
// can be used to merely create a cgroup.
|
||||
Apply(pid int) error
|
||||
|
||||
// Returns the PIDs inside the cgroup set
|
||||
// GetPids returns the PIDs of all processes inside the cgroup.
|
||||
GetPids() ([]int, error)
|
||||
|
||||
// Returns the PIDs inside the cgroup set & all sub-cgroups
|
||||
// GetAllPids returns the PIDs of all processes inside the cgroup
|
||||
// any all its sub-cgroups.
|
||||
GetAllPids() ([]int, error)
|
||||
|
||||
// Returns statistics for the cgroup set
|
||||
// GetStats returns cgroups statistics.
|
||||
GetStats() (*Stats, error)
|
||||
|
||||
// Toggles the freezer cgroup according with specified state
|
||||
// Freeze sets the freezer cgroup to the specified state.
|
||||
Freeze(state configs.FreezerState) error
|
||||
|
||||
// Destroys the cgroup set
|
||||
// Destroy removes cgroup.
|
||||
Destroy() error
|
||||
|
||||
// The option func SystemdCgroups() and Cgroupfs() require following attributes:
|
||||
// Paths map[string]string
|
||||
// Cgroups *configs.Cgroup
|
||||
// Paths maps cgroup subsystem to path at which it is mounted.
|
||||
// Cgroups specifies specific cgroup settings for the various subsystems
|
||||
// Path returns a cgroup path to the specified controller/subsystem.
|
||||
// For cgroupv2, the argument is unused and can be empty.
|
||||
Path(string) string
|
||||
|
||||
// Returns cgroup paths to save in a state file and to be able to
|
||||
// restore the object later.
|
||||
// Set sets cgroup resources parameters/limits. If the argument is nil,
|
||||
// the resources specified during Manager creation (or the previous call
|
||||
// to Set) are used.
|
||||
Set(r *configs.Resources) error
|
||||
|
||||
// GetPaths returns cgroup path(s) to save in a state file in order to
|
||||
// restore later.
|
||||
//
|
||||
// For cgroup v1, a key is cgroup subsystem name, and the value is the
|
||||
// path to the cgroup for this subsystem.
|
||||
//
|
||||
// For cgroup v2 unified hierarchy, a key is "", and the value is the
|
||||
// unified path.
|
||||
GetPaths() map[string]string
|
||||
|
||||
// GetUnifiedPath returns the unified path when running in unified mode.
|
||||
// The value corresponds to the all values of GetPaths() map.
|
||||
//
|
||||
// GetUnifiedPath returns error when running in hybrid mode as well as
|
||||
// in legacy mode.
|
||||
GetUnifiedPath() (string, error)
|
||||
|
||||
// Sets the cgroup as configured.
|
||||
Set(container *configs.Config) error
|
||||
|
||||
// Gets the cgroup as configured.
|
||||
// GetCgroups returns the cgroup data as configured.
|
||||
GetCgroups() (*configs.Cgroup, error)
|
||||
}
|
||||
|
||||
type NotFoundError struct {
|
||||
Subsystem string
|
||||
}
|
||||
// GetFreezerState retrieves the current FreezerState of the cgroup.
|
||||
GetFreezerState() (configs.FreezerState, error)
|
||||
|
||||
func (e *NotFoundError) Error() string {
|
||||
return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
|
||||
}
|
||||
// Exists returns whether the cgroup path exists or not.
|
||||
Exists() bool
|
||||
|
||||
func NewNotFoundError(sub string) error {
|
||||
return &NotFoundError{
|
||||
Subsystem: sub,
|
||||
}
|
||||
}
|
||||
|
||||
func IsNotFound(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
_, ok := err.(*NotFoundError)
|
||||
return ok
|
||||
// OOMKillCount reports OOM kill count for the cgroup.
|
||||
OOMKillCount() (uint64, error)
|
||||
}
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
// +build linux
|
||||
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
// +build !linux
|
||||
|
||||
package cgroups
|
|
@ -0,0 +1,386 @@
|
|||
// SPDX-License-Identifier: Apache-2.0
|
||||
/*
|
||||
* Copyright (C) 2020 Aleksa Sarai <cyphar@cyphar.com>
|
||||
* Copyright (C) 2020 SUSE LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package devices
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
)
|
||||
|
||||
// deviceMeta is a Rule without the Allow or Permissions fields, and no
|
||||
// wildcard-type support. It's effectively the "match" portion of a metadata
|
||||
// rule, for the purposes of our emulation.
|
||||
type deviceMeta struct {
|
||||
node devices.Type
|
||||
major int64
|
||||
minor int64
|
||||
}
|
||||
|
||||
// deviceRule is effectively the tuple (deviceMeta, Permissions).
|
||||
type deviceRule struct {
|
||||
meta deviceMeta
|
||||
perms devices.Permissions
|
||||
}
|
||||
|
||||
// deviceRules is a mapping of device metadata rules to the associated
|
||||
// permissions in the ruleset.
|
||||
type deviceRules map[deviceMeta]devices.Permissions
|
||||
|
||||
func (r deviceRules) orderedEntries() []deviceRule {
|
||||
var rules []deviceRule
|
||||
for meta, perms := range r {
|
||||
rules = append(rules, deviceRule{meta: meta, perms: perms})
|
||||
}
|
||||
sort.Slice(rules, func(i, j int) bool {
|
||||
// Sort by (major, minor, type).
|
||||
a, b := rules[i].meta, rules[j].meta
|
||||
return a.major < b.major ||
|
||||
(a.major == b.major && a.minor < b.minor) ||
|
||||
(a.major == b.major && a.minor == b.minor && a.node < b.node)
|
||||
})
|
||||
return rules
|
||||
}
|
||||
|
||||
type Emulator struct {
|
||||
defaultAllow bool
|
||||
rules deviceRules
|
||||
}
|
||||
|
||||
func (e *Emulator) IsBlacklist() bool {
|
||||
return e.defaultAllow
|
||||
}
|
||||
|
||||
func (e *Emulator) IsAllowAll() bool {
|
||||
return e.IsBlacklist() && len(e.rules) == 0
|
||||
}
|
||||
|
||||
func parseLine(line string) (*deviceRule, error) {
|
||||
// Input: node major:minor perms.
|
||||
fields := strings.FieldsFunc(line, func(r rune) bool {
|
||||
return r == ' ' || r == ':'
|
||||
})
|
||||
if len(fields) != 4 {
|
||||
return nil, fmt.Errorf("malformed devices.list rule %s", line)
|
||||
}
|
||||
|
||||
var (
|
||||
rule deviceRule
|
||||
node = fields[0]
|
||||
major = fields[1]
|
||||
minor = fields[2]
|
||||
perms = fields[3]
|
||||
)
|
||||
|
||||
// Parse the node type.
|
||||
switch node {
|
||||
case "a":
|
||||
// Super-special case -- "a" always means every device with every
|
||||
// access mode. In fact, for devices.list this actually indicates that
|
||||
// the cgroup is in black-list mode.
|
||||
// TODO: Double-check that the entire file is "a *:* rwm".
|
||||
return nil, nil
|
||||
case "b":
|
||||
rule.meta.node = devices.BlockDevice
|
||||
case "c":
|
||||
rule.meta.node = devices.CharDevice
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown device type %q", node)
|
||||
}
|
||||
|
||||
// Parse the major number.
|
||||
if major == "*" {
|
||||
rule.meta.major = devices.Wildcard
|
||||
} else {
|
||||
val, err := strconv.ParseUint(major, 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid major number: %w", err)
|
||||
}
|
||||
rule.meta.major = int64(val)
|
||||
}
|
||||
|
||||
// Parse the minor number.
|
||||
if minor == "*" {
|
||||
rule.meta.minor = devices.Wildcard
|
||||
} else {
|
||||
val, err := strconv.ParseUint(minor, 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid minor number: %w", err)
|
||||
}
|
||||
rule.meta.minor = int64(val)
|
||||
}
|
||||
|
||||
// Parse the access permissions.
|
||||
rule.perms = devices.Permissions(perms)
|
||||
if !rule.perms.IsValid() || rule.perms.IsEmpty() {
|
||||
return nil, fmt.Errorf("parse access mode: contained unknown modes or is empty: %q", perms)
|
||||
}
|
||||
return &rule, nil
|
||||
}
|
||||
|
||||
func (e *Emulator) addRule(rule deviceRule) error { //nolint:unparam
|
||||
if e.rules == nil {
|
||||
e.rules = make(map[deviceMeta]devices.Permissions)
|
||||
}
|
||||
|
||||
// Merge with any pre-existing permissions.
|
||||
oldPerms := e.rules[rule.meta]
|
||||
newPerms := rule.perms.Union(oldPerms)
|
||||
e.rules[rule.meta] = newPerms
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *Emulator) rmRule(rule deviceRule) error {
|
||||
// Give an error if any of the permissions requested to be removed are
|
||||
// present in a partially-matching wildcard rule, because such rules will
|
||||
// be ignored by cgroupv1.
|
||||
//
|
||||
// This is a diversion from cgroupv1, but is necessary to avoid leading
|
||||
// users into a false sense of security. cgroupv1 will silently(!) ignore
|
||||
// requests to remove partial exceptions, but we really shouldn't do that.
|
||||
//
|
||||
// It may seem like we could just "split" wildcard rules which hit this
|
||||
// issue, but unfortunately there are 2^32 possible major and minor
|
||||
// numbers, which would exhaust kernel memory quickly if we did this. Not
|
||||
// to mention it'd be really slow (the kernel side is implemented as a
|
||||
// linked-list of exceptions).
|
||||
for _, partialMeta := range []deviceMeta{
|
||||
{node: rule.meta.node, major: devices.Wildcard, minor: rule.meta.minor},
|
||||
{node: rule.meta.node, major: rule.meta.major, minor: devices.Wildcard},
|
||||
{node: rule.meta.node, major: devices.Wildcard, minor: devices.Wildcard},
|
||||
} {
|
||||
// This wildcard rule is equivalent to the requested rule, so skip it.
|
||||
if rule.meta == partialMeta {
|
||||
continue
|
||||
}
|
||||
// Only give an error if the set of permissions overlap.
|
||||
partialPerms := e.rules[partialMeta]
|
||||
if !partialPerms.Intersection(rule.perms).IsEmpty() {
|
||||
return fmt.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms)
|
||||
}
|
||||
}
|
||||
|
||||
// Subtract all of the permissions listed from the full match rule. If the
|
||||
// rule didn't exist, all of this is a no-op.
|
||||
newPerms := e.rules[rule.meta].Difference(rule.perms)
|
||||
if newPerms.IsEmpty() {
|
||||
delete(e.rules, rule.meta)
|
||||
} else {
|
||||
e.rules[rule.meta] = newPerms
|
||||
}
|
||||
// TODO: The actual cgroup code doesn't care if an exception didn't exist
|
||||
// during removal, so not erroring out here is /accurate/ but quite
|
||||
// worrying. Maybe we should do additional validation, but again we
|
||||
// have to worry about backwards-compatibility.
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e *Emulator) allow(rule *deviceRule) error {
|
||||
// This cgroup is configured as a black-list. Reset the entire emulator,
|
||||
// and put is into black-list mode.
|
||||
if rule == nil || rule.meta.node == devices.WildcardDevice {
|
||||
*e = Emulator{
|
||||
defaultAllow: true,
|
||||
rules: nil,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error
|
||||
if e.defaultAllow {
|
||||
err = wrapErr(e.rmRule(*rule), "unable to remove 'deny' exception")
|
||||
} else {
|
||||
err = wrapErr(e.addRule(*rule), "unable to add 'allow' exception")
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (e *Emulator) deny(rule *deviceRule) error {
|
||||
// This cgroup is configured as a white-list. Reset the entire emulator,
|
||||
// and put is into white-list mode.
|
||||
if rule == nil || rule.meta.node == devices.WildcardDevice {
|
||||
*e = Emulator{
|
||||
defaultAllow: false,
|
||||
rules: nil,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var err error
|
||||
if e.defaultAllow {
|
||||
err = wrapErr(e.addRule(*rule), "unable to add 'deny' exception")
|
||||
} else {
|
||||
err = wrapErr(e.rmRule(*rule), "unable to remove 'allow' exception")
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (e *Emulator) Apply(rule devices.Rule) error {
|
||||
if !rule.Type.CanCgroup() {
|
||||
return fmt.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type)
|
||||
}
|
||||
|
||||
innerRule := &deviceRule{
|
||||
meta: deviceMeta{
|
||||
node: rule.Type,
|
||||
major: rule.Major,
|
||||
minor: rule.Minor,
|
||||
},
|
||||
perms: rule.Permissions,
|
||||
}
|
||||
if innerRule.meta.node == devices.WildcardDevice {
|
||||
innerRule = nil
|
||||
}
|
||||
|
||||
if rule.Allow {
|
||||
return e.allow(innerRule)
|
||||
}
|
||||
|
||||
return e.deny(innerRule)
|
||||
}
|
||||
|
||||
// EmulatorFromList takes a reader to a "devices.list"-like source, and returns
|
||||
// a new Emulator that represents the state of the devices cgroup. Note that
|
||||
// black-list devices cgroups cannot be fully reconstructed, due to limitations
|
||||
// in the devices cgroup API. Instead, such cgroups are always treated as
|
||||
// "allow all" cgroups.
|
||||
func EmulatorFromList(list io.Reader) (*Emulator, error) {
|
||||
// Normally cgroups are in black-list mode by default, but the way we
|
||||
// figure out the current mode is whether or not devices.list has an
|
||||
// allow-all rule. So we default to a white-list, and the existence of an
|
||||
// "a *:* rwm" entry will tell us otherwise.
|
||||
e := &Emulator{
|
||||
defaultAllow: false,
|
||||
}
|
||||
|
||||
// Parse the "devices.list".
|
||||
s := bufio.NewScanner(list)
|
||||
for s.Scan() {
|
||||
line := s.Text()
|
||||
deviceRule, err := parseLine(line)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing line %q: %w", line, err)
|
||||
}
|
||||
// "devices.list" is an allow list. Note that this means that in
|
||||
// black-list mode, we have no idea what rules are in play. As a
|
||||
// result, we need to be very careful in Transition().
|
||||
if err := e.allow(deviceRule); err != nil {
|
||||
return nil, fmt.Errorf("error adding devices.list rule: %w", err)
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, fmt.Errorf("error reading devices.list lines: %w", err)
|
||||
}
|
||||
return e, nil
|
||||
}
|
||||
|
||||
// Transition calculates what is the minimally-disruptive set of rules need to
|
||||
// be applied to a devices cgroup in order to transition to the given target.
|
||||
// This means that any already-existing rules will not be applied, and
|
||||
// disruptive rules (like denying all device access) will only be applied if
|
||||
// necessary.
|
||||
//
|
||||
// This function is the sole reason for all of Emulator -- to allow us
|
||||
// to figure out how to update a containers' cgroups without causing spurious
|
||||
// device errors (if possible).
|
||||
func (source *Emulator) Transition(target *Emulator) ([]*devices.Rule, error) {
|
||||
var transitionRules []*devices.Rule
|
||||
oldRules := source.rules
|
||||
|
||||
// If the default policy doesn't match, we need to include a "disruptive"
|
||||
// rule (either allow-all or deny-all) in order to switch the cgroup to the
|
||||
// correct default policy.
|
||||
//
|
||||
// However, due to a limitation in "devices.list" we cannot be sure what
|
||||
// deny rules are in place in a black-list cgroup. Thus if the source is a
|
||||
// black-list we also have to include a disruptive rule.
|
||||
if source.IsBlacklist() || source.defaultAllow != target.defaultAllow {
|
||||
transitionRules = append(transitionRules, &devices.Rule{
|
||||
Type: 'a',
|
||||
Major: -1,
|
||||
Minor: -1,
|
||||
Permissions: devices.Permissions("rwm"),
|
||||
Allow: target.defaultAllow,
|
||||
})
|
||||
// The old rules are only relevant if we aren't starting out with a
|
||||
// disruptive rule.
|
||||
oldRules = nil
|
||||
}
|
||||
|
||||
// NOTE: We traverse through the rules in a sorted order so we always write
|
||||
// the same set of rules (this is to aid testing).
|
||||
|
||||
// First, we create inverse rules for any old rules not in the new set.
|
||||
// This includes partial-inverse rules for specific permissions. This is a
|
||||
// no-op if we added a disruptive rule, since oldRules will be empty.
|
||||
for _, rule := range oldRules.orderedEntries() {
|
||||
meta, oldPerms := rule.meta, rule.perms
|
||||
newPerms := target.rules[meta]
|
||||
droppedPerms := oldPerms.Difference(newPerms)
|
||||
if !droppedPerms.IsEmpty() {
|
||||
transitionRules = append(transitionRules, &devices.Rule{
|
||||
Type: meta.node,
|
||||
Major: meta.major,
|
||||
Minor: meta.minor,
|
||||
Permissions: droppedPerms,
|
||||
Allow: target.defaultAllow,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Add any additional rules which weren't in the old set. We happen to
|
||||
// filter out rules which are present in both sets, though this isn't
|
||||
// strictly necessary.
|
||||
for _, rule := range target.rules.orderedEntries() {
|
||||
meta, newPerms := rule.meta, rule.perms
|
||||
oldPerms := oldRules[meta]
|
||||
gainedPerms := newPerms.Difference(oldPerms)
|
||||
if !gainedPerms.IsEmpty() {
|
||||
transitionRules = append(transitionRules, &devices.Rule{
|
||||
Type: meta.node,
|
||||
Major: meta.major,
|
||||
Minor: meta.minor,
|
||||
Permissions: gainedPerms,
|
||||
Allow: !target.defaultAllow,
|
||||
})
|
||||
}
|
||||
}
|
||||
return transitionRules, nil
|
||||
}
|
||||
|
||||
// Rules returns the minimum set of rules necessary to convert a *deny-all*
|
||||
// cgroup to the emulated filter state (note that this is not the same as a
|
||||
// default cgroupv1 cgroup -- which is allow-all). This is effectively just a
|
||||
// wrapper around Transition() with the source emulator being an empty cgroup.
|
||||
func (e *Emulator) Rules() ([]*devices.Rule, error) {
|
||||
defaultCgroup := &Emulator{defaultAllow: false}
|
||||
return defaultCgroup.Transition(e)
|
||||
}
|
||||
|
||||
func wrapErr(err error, text string) error {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf(text+": %w", err)
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
|||
// Package devicefilter containes eBPF device filter program
|
||||
// Package devicefilter contains eBPF device filter program
|
||||
//
|
||||
// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c
|
||||
//
|
||||
|
@ -7,12 +7,14 @@
|
|||
package devicefilter
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"strconv"
|
||||
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
|
@ -22,22 +24,54 @@ const (
|
|||
)
|
||||
|
||||
// DeviceFilter returns eBPF device filter program and its license string
|
||||
func DeviceFilter(devices []*configs.Device) (asm.Instructions, string, error) {
|
||||
p := &program{}
|
||||
p.init()
|
||||
for i := len(devices) - 1; i >= 0; i-- {
|
||||
if err := p.appendDevice(devices[i]); err != nil {
|
||||
func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) {
|
||||
// Generate the minimum ruleset for the device rules we are given. While we
|
||||
// don't care about minimum transitions in cgroupv2, using the emulator
|
||||
// gives us a guarantee that the behaviour of devices filtering is the same
|
||||
// as cgroupv1, including security hardenings to avoid misconfiguration
|
||||
// (such as punching holes in wildcard rules).
|
||||
emu := new(devicesemulator.Emulator)
|
||||
for _, rule := range rules {
|
||||
if err := emu.Apply(*rule); err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
}
|
||||
insts, err := p.finalize()
|
||||
return insts, license, err
|
||||
cleanRules, err := emu.Rules()
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
p := &program{
|
||||
defaultAllow: emu.IsBlacklist(),
|
||||
}
|
||||
p.init()
|
||||
|
||||
for idx, rule := range cleanRules {
|
||||
if rule.Type == devices.WildcardDevice {
|
||||
// We can safely skip over wildcard entries because there should
|
||||
// only be one (at most) at the very start to instruct cgroupv1 to
|
||||
// go into allow-list mode. However we do double-check this here.
|
||||
if idx != 0 || rule.Allow != emu.IsBlacklist() {
|
||||
return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString())
|
||||
}
|
||||
continue
|
||||
}
|
||||
if rule.Allow == p.defaultAllow {
|
||||
// There should be no rules which have an action equal to the
|
||||
// default action, the emulator removes those.
|
||||
return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString())
|
||||
}
|
||||
if err := p.appendRule(rule); err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
}
|
||||
return p.finalize(), license, nil
|
||||
}
|
||||
|
||||
type program struct {
|
||||
insts asm.Instructions
|
||||
hasWildCard bool
|
||||
blockID int
|
||||
insts asm.Instructions
|
||||
defaultAllow bool
|
||||
blockID int
|
||||
}
|
||||
|
||||
func (p *program) init() {
|
||||
|
@ -49,7 +83,8 @@ func (p *program) init() {
|
|||
*/
|
||||
// R2 <- type (lower 16 bit of u32 access_type at R1[0])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R2, asm.R1, 0, asm.Half))
|
||||
asm.LoadMem(asm.R2, asm.R1, 0, asm.Word),
|
||||
asm.And.Imm32(asm.R2, 0xFFFF))
|
||||
|
||||
// R3 <- access (upper 16 bit of u32 access_type at R1[0])
|
||||
p.insts = append(p.insts,
|
||||
|
@ -66,39 +101,35 @@ func (p *program) init() {
|
|||
asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
|
||||
}
|
||||
|
||||
// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element.
|
||||
func (p *program) appendDevice(dev *configs.Device) error {
|
||||
// appendRule rule converts an OCI rule to the relevant eBPF block and adds it
|
||||
// to the in-progress filter program. In order to operate properly, it must be
|
||||
// called with a "clean" rule list (generated by devices.Emulator.Rules() --
|
||||
// with any "a" rules removed).
|
||||
func (p *program) appendRule(rule *devices.Rule) error {
|
||||
if p.blockID < 0 {
|
||||
return errors.New("the program is finalized")
|
||||
}
|
||||
if p.hasWildCard {
|
||||
// All entries after wildcard entry are ignored
|
||||
return nil
|
||||
}
|
||||
|
||||
bpfType := int32(-1)
|
||||
hasType := true
|
||||
switch dev.Type {
|
||||
case 'c':
|
||||
var bpfType int32
|
||||
switch rule.Type {
|
||||
case devices.CharDevice:
|
||||
bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
|
||||
case 'b':
|
||||
case devices.BlockDevice:
|
||||
bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
|
||||
case 'a':
|
||||
hasType = false
|
||||
default:
|
||||
// if not specified in OCI json, typ is set to DeviceTypeAll
|
||||
return errors.Errorf("invalid DeviceType %q", string(dev.Type))
|
||||
// We do not permit 'a', nor any other types we don't know about.
|
||||
return fmt.Errorf("invalid type %q", string(rule.Type))
|
||||
}
|
||||
if dev.Major > math.MaxUint32 {
|
||||
return errors.Errorf("invalid major %d", dev.Major)
|
||||
if rule.Major > math.MaxUint32 {
|
||||
return fmt.Errorf("invalid major %d", rule.Major)
|
||||
}
|
||||
if dev.Minor > math.MaxUint32 {
|
||||
return errors.Errorf("invalid minor %d", dev.Major)
|
||||
if rule.Minor > math.MaxUint32 {
|
||||
return fmt.Errorf("invalid minor %d", rule.Major)
|
||||
}
|
||||
hasMajor := dev.Major >= 0 // if not specified in OCI json, major is set to -1
|
||||
hasMinor := dev.Minor >= 0
|
||||
hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1
|
||||
hasMinor := rule.Minor >= 0
|
||||
bpfAccess := int32(0)
|
||||
for _, r := range dev.Permissions {
|
||||
for _, r := range rule.Permissions {
|
||||
switch r {
|
||||
case 'r':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_READ
|
||||
|
@ -107,68 +138,65 @@ func (p *program) appendDevice(dev *configs.Device) error {
|
|||
case 'm':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
|
||||
default:
|
||||
return errors.Errorf("unknown device access %v", r)
|
||||
return fmt.Errorf("unknown device access %v", r)
|
||||
}
|
||||
}
|
||||
// If the access is rwm, skip the check.
|
||||
hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD)
|
||||
|
||||
blockSym := fmt.Sprintf("block-%d", p.blockID)
|
||||
nextBlockSym := fmt.Sprintf("block-%d", p.blockID+1)
|
||||
prevBlockLastIdx := len(p.insts) - 1
|
||||
if hasType {
|
||||
p.insts = append(p.insts,
|
||||
// if (R2 != bpfType) goto next
|
||||
asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
|
||||
)
|
||||
}
|
||||
var (
|
||||
blockSym = "block-" + strconv.Itoa(p.blockID)
|
||||
nextBlockSym = "block-" + strconv.Itoa(p.blockID+1)
|
||||
prevBlockLastIdx = len(p.insts) - 1
|
||||
)
|
||||
p.insts = append(p.insts,
|
||||
// if (R2 != bpfType) goto next
|
||||
asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
|
||||
)
|
||||
if hasAccess {
|
||||
p.insts = append(p.insts,
|
||||
// if (R3 & bpfAccess == 0 /* use R1 as a temp var */) goto next
|
||||
// if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next
|
||||
asm.Mov.Reg32(asm.R1, asm.R3),
|
||||
asm.And.Imm32(asm.R1, bpfAccess),
|
||||
asm.JEq.Imm(asm.R1, 0, nextBlockSym),
|
||||
asm.JNE.Reg(asm.R1, asm.R3, nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasMajor {
|
||||
p.insts = append(p.insts,
|
||||
// if (R4 != major) goto next
|
||||
asm.JNE.Imm(asm.R4, int32(dev.Major), nextBlockSym),
|
||||
asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasMinor {
|
||||
p.insts = append(p.insts,
|
||||
// if (R5 != minor) goto next
|
||||
asm.JNE.Imm(asm.R5, int32(dev.Minor), nextBlockSym),
|
||||
asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym),
|
||||
)
|
||||
}
|
||||
if !hasType && !hasAccess && !hasMajor && !hasMinor {
|
||||
p.hasWildCard = true
|
||||
}
|
||||
p.insts = append(p.insts, acceptBlock(dev.Allow)...)
|
||||
p.insts = append(p.insts, acceptBlock(rule.Allow)...)
|
||||
// set blockSym to the first instruction we added in this iteration
|
||||
p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
|
||||
p.blockID++
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *program) finalize() (asm.Instructions, error) {
|
||||
if p.hasWildCard {
|
||||
// acceptBlock with asm.Return() is already inserted
|
||||
return p.insts, nil
|
||||
func (p *program) finalize() asm.Instructions {
|
||||
var v int32
|
||||
if p.defaultAllow {
|
||||
v = 1
|
||||
}
|
||||
blockSym := fmt.Sprintf("block-%d", p.blockID)
|
||||
blockSym := "block-" + strconv.Itoa(p.blockID)
|
||||
p.insts = append(p.insts,
|
||||
// R0 <- 0
|
||||
asm.Mov.Imm32(asm.R0, 0).Sym(blockSym),
|
||||
// R0 <- v
|
||||
asm.Mov.Imm32(asm.R0, v).Sym(blockSym),
|
||||
asm.Return(),
|
||||
)
|
||||
p.blockID = -1
|
||||
return p.insts, nil
|
||||
return p.insts
|
||||
}
|
||||
|
||||
func acceptBlock(accept bool) asm.Instructions {
|
||||
v := int32(0)
|
||||
var v int32
|
||||
if accept {
|
||||
v = 1
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ import (
|
|||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/specconv"
|
||||
)
|
||||
|
||||
|
@ -20,13 +20,12 @@ func hash(s, comm string) string {
|
|||
return strings.Join(res, "\n")
|
||||
}
|
||||
|
||||
func testDeviceFilter(t testing.TB, devices []*configs.Device, expectedStr string) {
|
||||
func testDeviceFilter(t testing.TB, devices []*devices.Rule, expectedStr string) {
|
||||
insts, _, err := DeviceFilter(devices)
|
||||
if err != nil {
|
||||
t.Fatalf("%s: %v (devices: %+v)", t.Name(), err, devices)
|
||||
}
|
||||
s := insts.String()
|
||||
t.Logf("%s: devices: %+v\n%s", t.Name(), devices, s)
|
||||
if expectedStr != "" {
|
||||
hashed := hash(s, "//")
|
||||
expectedHashed := hash(expectedStr, "//")
|
||||
|
@ -39,15 +38,16 @@ func testDeviceFilter(t testing.TB, devices []*configs.Device, expectedStr strin
|
|||
func TestDeviceFilter_Nil(t *testing.T) {
|
||||
expected := `
|
||||
// load parameters into registers
|
||||
0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
|
||||
1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||
2: RSh32Imm dst: r3 imm: 16
|
||||
3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||
4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||
0: LdXMemW dst: r2 src: r1 off: 0 imm: 0
|
||||
1: And32Imm dst: r2 imm: 65535
|
||||
2: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||
3: RSh32Imm dst: r3 imm: 16
|
||||
4: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||
5: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||
block-0:
|
||||
// return 0 (reject)
|
||||
5: Mov32Imm dst: r0 imm: 0
|
||||
6: Exit
|
||||
6: Mov32Imm dst: r0 imm: 0
|
||||
7: Exit
|
||||
`
|
||||
testDeviceFilter(t, nil, expected)
|
||||
}
|
||||
|
@ -55,97 +55,96 @@ block-0:
|
|||
func TestDeviceFilter_BuiltInAllowList(t *testing.T) {
|
||||
expected := `
|
||||
// load parameters into registers
|
||||
0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
|
||||
1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||
2: RSh32Imm dst: r3 imm: 16
|
||||
3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||
4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||
0: LdXMemW dst: r2 src: r1 off: 0 imm: 0
|
||||
1: And32Imm dst: r2 imm: 65535
|
||||
2: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||
3: RSh32Imm dst: r3 imm: 16
|
||||
4: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||
5: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||
block-0:
|
||||
// tuntap (c, 10, 200, rwm, allow)
|
||||
5: JNEImm dst: r2 off: -1 imm: 2 <block-1>
|
||||
6: JNEImm dst: r4 off: -1 imm: 10 <block-1>
|
||||
7: JNEImm dst: r5 off: -1 imm: 200 <block-1>
|
||||
8: Mov32Imm dst: r0 imm: 1
|
||||
9: Exit
|
||||
block-1:
|
||||
10: JNEImm dst: r2 off: -1 imm: 2 <block-2>
|
||||
11: JNEImm dst: r4 off: -1 imm: 5 <block-2>
|
||||
12: JNEImm dst: r5 off: -1 imm: 2 <block-2>
|
||||
13: Mov32Imm dst: r0 imm: 1
|
||||
14: Exit
|
||||
block-2:
|
||||
// /dev/pts (c, 136, wildcard, rwm, true)
|
||||
15: JNEImm dst: r2 off: -1 imm: 2 <block-3>
|
||||
16: JNEImm dst: r4 off: -1 imm: 136 <block-3>
|
||||
17: Mov32Imm dst: r0 imm: 1
|
||||
18: Exit
|
||||
block-3:
|
||||
19: JNEImm dst: r2 off: -1 imm: 2 <block-4>
|
||||
20: JNEImm dst: r4 off: -1 imm: 5 <block-4>
|
||||
21: JNEImm dst: r5 off: -1 imm: 1 <block-4>
|
||||
22: Mov32Imm dst: r0 imm: 1
|
||||
23: Exit
|
||||
block-4:
|
||||
24: JNEImm dst: r2 off: -1 imm: 2 <block-5>
|
||||
25: JNEImm dst: r4 off: -1 imm: 1 <block-5>
|
||||
26: JNEImm dst: r5 off: -1 imm: 9 <block-5>
|
||||
27: Mov32Imm dst: r0 imm: 1
|
||||
28: Exit
|
||||
block-5:
|
||||
29: JNEImm dst: r2 off: -1 imm: 2 <block-6>
|
||||
30: JNEImm dst: r4 off: -1 imm: 1 <block-6>
|
||||
31: JNEImm dst: r5 off: -1 imm: 5 <block-6>
|
||||
32: Mov32Imm dst: r0 imm: 1
|
||||
33: Exit
|
||||
block-6:
|
||||
34: JNEImm dst: r2 off: -1 imm: 2 <block-7>
|
||||
35: JNEImm dst: r4 off: -1 imm: 5 <block-7>
|
||||
36: JNEImm dst: r5 off: -1 imm: 0 <block-7>
|
||||
37: Mov32Imm dst: r0 imm: 1
|
||||
38: Exit
|
||||
block-7:
|
||||
39: JNEImm dst: r2 off: -1 imm: 2 <block-8>
|
||||
40: JNEImm dst: r4 off: -1 imm: 1 <block-8>
|
||||
41: JNEImm dst: r5 off: -1 imm: 7 <block-8>
|
||||
42: Mov32Imm dst: r0 imm: 1
|
||||
43: Exit
|
||||
block-8:
|
||||
44: JNEImm dst: r2 off: -1 imm: 2 <block-9>
|
||||
45: JNEImm dst: r4 off: -1 imm: 1 <block-9>
|
||||
46: JNEImm dst: r5 off: -1 imm: 8 <block-9>
|
||||
47: Mov32Imm dst: r0 imm: 1
|
||||
48: Exit
|
||||
block-9:
|
||||
49: JNEImm dst: r2 off: -1 imm: 2 <block-10>
|
||||
50: JNEImm dst: r4 off: -1 imm: 1 <block-10>
|
||||
51: JNEImm dst: r5 off: -1 imm: 3 <block-10>
|
||||
52: Mov32Imm dst: r0 imm: 1
|
||||
53: Exit
|
||||
block-10:
|
||||
// (b, wildcard, wildcard, m, true)
|
||||
54: JNEImm dst: r2 off: -1 imm: 1 <block-11>
|
||||
55: Mov32Reg dst: r1 src: r3
|
||||
56: And32Imm dst: r1 imm: 1
|
||||
57: JEqImm dst: r1 off: -1 imm: 0 <block-11>
|
||||
58: Mov32Imm dst: r0 imm: 1
|
||||
59: Exit
|
||||
block-11:
|
||||
6: JNEImm dst: r2 off: -1 imm: 1 <block-1>
|
||||
7: Mov32Reg dst: r1 src: r3
|
||||
8: And32Imm dst: r1 imm: 1
|
||||
9: JNEReg dst: r1 off: -1 src: r3 <block-1>
|
||||
10: Mov32Imm dst: r0 imm: 1
|
||||
11: Exit
|
||||
block-1:
|
||||
// (c, wildcard, wildcard, m, true)
|
||||
60: JNEImm dst: r2 off: -1 imm: 2 <block-12>
|
||||
61: Mov32Reg dst: r1 src: r3
|
||||
62: And32Imm dst: r1 imm: 1
|
||||
63: JEqImm dst: r1 off: -1 imm: 0 <block-12>
|
||||
64: Mov32Imm dst: r0 imm: 1
|
||||
65: Exit
|
||||
block-12:
|
||||
66: Mov32Imm dst: r0 imm: 0
|
||||
67: Exit
|
||||
12: JNEImm dst: r2 off: -1 imm: 2 <block-2>
|
||||
13: Mov32Reg dst: r1 src: r3
|
||||
14: And32Imm dst: r1 imm: 1
|
||||
15: JNEReg dst: r1 off: -1 src: r3 <block-2>
|
||||
16: Mov32Imm dst: r0 imm: 1
|
||||
17: Exit
|
||||
block-2:
|
||||
18: JNEImm dst: r2 off: -1 imm: 2 <block-3>
|
||||
19: JNEImm dst: r4 off: -1 imm: 1 <block-3>
|
||||
20: JNEImm dst: r5 off: -1 imm: 3 <block-3>
|
||||
21: Mov32Imm dst: r0 imm: 1
|
||||
22: Exit
|
||||
block-3:
|
||||
23: JNEImm dst: r2 off: -1 imm: 2 <block-4>
|
||||
24: JNEImm dst: r4 off: -1 imm: 1 <block-4>
|
||||
25: JNEImm dst: r5 off: -1 imm: 5 <block-4>
|
||||
26: Mov32Imm dst: r0 imm: 1
|
||||
27: Exit
|
||||
block-4:
|
||||
28: JNEImm dst: r2 off: -1 imm: 2 <block-5>
|
||||
29: JNEImm dst: r4 off: -1 imm: 1 <block-5>
|
||||
30: JNEImm dst: r5 off: -1 imm: 7 <block-5>
|
||||
31: Mov32Imm dst: r0 imm: 1
|
||||
32: Exit
|
||||
block-5:
|
||||
33: JNEImm dst: r2 off: -1 imm: 2 <block-6>
|
||||
34: JNEImm dst: r4 off: -1 imm: 1 <block-6>
|
||||
35: JNEImm dst: r5 off: -1 imm: 8 <block-6>
|
||||
36: Mov32Imm dst: r0 imm: 1
|
||||
37: Exit
|
||||
block-6:
|
||||
38: JNEImm dst: r2 off: -1 imm: 2 <block-7>
|
||||
39: JNEImm dst: r4 off: -1 imm: 1 <block-7>
|
||||
40: JNEImm dst: r5 off: -1 imm: 9 <block-7>
|
||||
41: Mov32Imm dst: r0 imm: 1
|
||||
42: Exit
|
||||
block-7:
|
||||
43: JNEImm dst: r2 off: -1 imm: 2 <block-8>
|
||||
44: JNEImm dst: r4 off: -1 imm: 5 <block-8>
|
||||
45: JNEImm dst: r5 off: -1 imm: 0 <block-8>
|
||||
46: Mov32Imm dst: r0 imm: 1
|
||||
47: Exit
|
||||
block-8:
|
||||
48: JNEImm dst: r2 off: -1 imm: 2 <block-9>
|
||||
49: JNEImm dst: r4 off: -1 imm: 5 <block-9>
|
||||
50: JNEImm dst: r5 off: -1 imm: 2 <block-9>
|
||||
51: Mov32Imm dst: r0 imm: 1
|
||||
52: Exit
|
||||
block-9:
|
||||
// tuntap (c, 10, 200, rwm, allow)
|
||||
53: JNEImm dst: r2 off: -1 imm: 2 <block-10>
|
||||
54: JNEImm dst: r4 off: -1 imm: 10 <block-10>
|
||||
55: JNEImm dst: r5 off: -1 imm: 200 <block-10>
|
||||
56: Mov32Imm dst: r0 imm: 1
|
||||
57: Exit
|
||||
block-10:
|
||||
// /dev/pts (c, 136, wildcard, rwm, true)
|
||||
58: JNEImm dst: r2 off: -1 imm: 2 <block-11>
|
||||
59: JNEImm dst: r4 off: -1 imm: 136 <block-11>
|
||||
60: Mov32Imm dst: r0 imm: 1
|
||||
61: Exit
|
||||
block-11:
|
||||
62: Mov32Imm dst: r0 imm: 0
|
||||
63: Exit
|
||||
`
|
||||
testDeviceFilter(t, specconv.AllowedDevices, expected)
|
||||
var devices []*devices.Rule
|
||||
for _, device := range specconv.AllowedDevices {
|
||||
devices = append(devices, &device.Rule)
|
||||
}
|
||||
testDeviceFilter(t, devices, expected)
|
||||
}
|
||||
|
||||
func TestDeviceFilter_Privileged(t *testing.T) {
|
||||
devices := []*configs.Device{
|
||||
devices := []*devices.Rule{
|
||||
{
|
||||
Type: 'a',
|
||||
Major: -1,
|
||||
|
@ -157,21 +156,22 @@ func TestDeviceFilter_Privileged(t *testing.T) {
|
|||
expected :=
|
||||
`
|
||||
// load parameters into registers
|
||||
0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
|
||||
1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||
2: RSh32Imm dst: r3 imm: 16
|
||||
3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||
4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||
0: LdXMemW dst: r2 src: r1 off: 0 imm: 0
|
||||
1: And32Imm dst: r2 imm: 65535
|
||||
2: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||
3: RSh32Imm dst: r3 imm: 16
|
||||
4: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||
5: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||
block-0:
|
||||
// return 1 (accept)
|
||||
5: Mov32Imm dst: r0 imm: 1
|
||||
6: Exit
|
||||
6: Mov32Imm dst: r0 imm: 1
|
||||
7: Exit
|
||||
`
|
||||
testDeviceFilter(t, devices, expected)
|
||||
}
|
||||
|
||||
func TestDeviceFilter_PrivilegedExceptSingleDevice(t *testing.T) {
|
||||
devices := []*configs.Device{
|
||||
devices := []*devices.Rule{
|
||||
{
|
||||
Type: 'a',
|
||||
Major: -1,
|
||||
|
@ -189,28 +189,29 @@ func TestDeviceFilter_PrivilegedExceptSingleDevice(t *testing.T) {
|
|||
}
|
||||
expected := `
|
||||
// load parameters into registers
|
||||
0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
|
||||
1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||
2: RSh32Imm dst: r3 imm: 16
|
||||
3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||
4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||
0: LdXMemW dst: r2 src: r1 off: 0 imm: 0
|
||||
1: And32Imm dst: r2 imm: 65535
|
||||
2: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||
3: RSh32Imm dst: r3 imm: 16
|
||||
4: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||
5: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||
block-0:
|
||||
// return 0 (reject) if type==b && major == 8 && minor == 0
|
||||
5: JNEImm dst: r2 off: -1 imm: 1 <block-1>
|
||||
6: JNEImm dst: r4 off: -1 imm: 8 <block-1>
|
||||
7: JNEImm dst: r5 off: -1 imm: 0 <block-1>
|
||||
8: Mov32Imm dst: r0 imm: 0
|
||||
9: Exit
|
||||
6: JNEImm dst: r2 off: -1 imm: 1 <block-1>
|
||||
7: JNEImm dst: r4 off: -1 imm: 8 <block-1>
|
||||
8: JNEImm dst: r5 off: -1 imm: 0 <block-1>
|
||||
9: Mov32Imm dst: r0 imm: 0
|
||||
10: Exit
|
||||
block-1:
|
||||
// return 1 (accept)
|
||||
10: Mov32Imm dst: r0 imm: 1
|
||||
11: Exit
|
||||
11: Mov32Imm dst: r0 imm: 1
|
||||
12: Exit
|
||||
`
|
||||
testDeviceFilter(t, devices, expected)
|
||||
}
|
||||
|
||||
func TestDeviceFilter_Weird(t *testing.T) {
|
||||
devices := []*configs.Device{
|
||||
devices := []*devices.Rule{
|
||||
{
|
||||
Type: 'b',
|
||||
Major: 8,
|
||||
|
@ -237,22 +238,23 @@ func TestDeviceFilter_Weird(t *testing.T) {
|
|||
// This conforms to runc v1.0.0-rc.9 (cgroup1) behavior.
|
||||
expected := `
|
||||
// load parameters into registers
|
||||
0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
|
||||
1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||
2: RSh32Imm dst: r3 imm: 16
|
||||
3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||
4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||
0: LdXMemW dst: r2 src: r1 off: 0 imm: 0
|
||||
1: And32Imm dst: r2 imm: 65535
|
||||
2: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||
3: RSh32Imm dst: r3 imm: 16
|
||||
4: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||
5: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||
block-0:
|
||||
// return 0 (reject) if type==b && major == 8 && minor == 2
|
||||
5: JNEImm dst: r2 off: -1 imm: 1 <block-1>
|
||||
6: JNEImm dst: r4 off: -1 imm: 8 <block-1>
|
||||
7: JNEImm dst: r5 off: -1 imm: 2 <block-1>
|
||||
8: Mov32Imm dst: r0 imm: 0
|
||||
9: Exit
|
||||
6: JNEImm dst: r2 off: -1 imm: 1 <block-1>
|
||||
7: JNEImm dst: r4 off: -1 imm: 8 <block-1>
|
||||
8: JNEImm dst: r5 off: -1 imm: 2 <block-1>
|
||||
9: Mov32Imm dst: r0 imm: 0
|
||||
10: Exit
|
||||
block-1:
|
||||
// return 1 (accept)
|
||||
10: Mov32Imm dst: r0 imm: 1
|
||||
11: Exit
|
||||
11: Mov32Imm dst: r0 imm: 1
|
||||
12: Exit
|
||||
`
|
||||
testDeviceFilter(t, devices, expected)
|
||||
}
|
||||
|
|
|
@ -1,45 +0,0 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"github.com/cilium/ebpf"
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
|
||||
//
|
||||
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
|
||||
//
|
||||
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
|
||||
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD int) (func() error, error) {
|
||||
nilCloser := func() error {
|
||||
return nil
|
||||
}
|
||||
// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
|
||||
// This limit is not inherited into the container.
|
||||
memlockLimit := &unix.Rlimit{
|
||||
Cur: unix.RLIM_INFINITY,
|
||||
Max: unix.RLIM_INFINITY,
|
||||
}
|
||||
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
|
||||
spec := &ebpf.ProgramSpec{
|
||||
Type: ebpf.CGroupDevice,
|
||||
Instructions: insts,
|
||||
License: license,
|
||||
}
|
||||
prog, err := ebpf.NewProgram(spec)
|
||||
if err != nil {
|
||||
return nilCloser, err
|
||||
}
|
||||
if err := prog.Attach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
|
||||
return nilCloser, errors.Wrap(err, "failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
|
||||
}
|
||||
closer := func() error {
|
||||
if err := prog.Detach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
|
||||
return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return closer, nil
|
||||
}
|
|
@ -0,0 +1,253 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/cilium/ebpf"
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/cilium/ebpf/link"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func nilCloser() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func findAttachedCgroupDeviceFilters(dirFd int) ([]*ebpf.Program, error) {
|
||||
type bpfAttrQuery struct {
|
||||
TargetFd uint32
|
||||
AttachType uint32
|
||||
QueryType uint32
|
||||
AttachFlags uint32
|
||||
ProgIds uint64 // __aligned_u64
|
||||
ProgCnt uint32
|
||||
}
|
||||
|
||||
// Currently you can only have 64 eBPF programs attached to a cgroup.
|
||||
size := 64
|
||||
retries := 0
|
||||
for retries < 10 {
|
||||
progIds := make([]uint32, size)
|
||||
query := bpfAttrQuery{
|
||||
TargetFd: uint32(dirFd),
|
||||
AttachType: uint32(unix.BPF_CGROUP_DEVICE),
|
||||
ProgIds: uint64(uintptr(unsafe.Pointer(&progIds[0]))),
|
||||
ProgCnt: uint32(len(progIds)),
|
||||
}
|
||||
|
||||
// Fetch the list of program ids.
|
||||
_, _, errno := unix.Syscall(unix.SYS_BPF,
|
||||
uintptr(unix.BPF_PROG_QUERY),
|
||||
uintptr(unsafe.Pointer(&query)),
|
||||
unsafe.Sizeof(query))
|
||||
size = int(query.ProgCnt)
|
||||
runtime.KeepAlive(query)
|
||||
if errno != 0 {
|
||||
// On ENOSPC we get the correct number of programs.
|
||||
if errno == unix.ENOSPC {
|
||||
retries++
|
||||
continue
|
||||
}
|
||||
return nil, fmt.Errorf("bpf_prog_query(BPF_CGROUP_DEVICE) failed: %w", errno)
|
||||
}
|
||||
|
||||
// Convert the ids to program handles.
|
||||
progIds = progIds[:size]
|
||||
programs := make([]*ebpf.Program, 0, len(progIds))
|
||||
for _, progId := range progIds {
|
||||
program, err := ebpf.NewProgramFromID(ebpf.ProgramID(progId))
|
||||
if err != nil {
|
||||
// We skip over programs that give us -EACCES or -EPERM. This
|
||||
// is necessary because there may be BPF programs that have
|
||||
// been attached (such as with --systemd-cgroup) which have an
|
||||
// LSM label that blocks us from interacting with the program.
|
||||
//
|
||||
// Because additional BPF_CGROUP_DEVICE programs only can add
|
||||
// restrictions, there's no real issue with just ignoring these
|
||||
// programs (and stops runc from breaking on distributions with
|
||||
// very strict SELinux policies).
|
||||
if errors.Is(err, os.ErrPermission) {
|
||||
logrus.Debugf("ignoring existing CGROUP_DEVICE program (prog_id=%v) which cannot be accessed by runc -- likely due to LSM policy: %v", progId, err)
|
||||
continue
|
||||
}
|
||||
return nil, fmt.Errorf("cannot fetch program from id: %w", err)
|
||||
}
|
||||
programs = append(programs, program)
|
||||
}
|
||||
runtime.KeepAlive(progIds)
|
||||
return programs, nil
|
||||
}
|
||||
|
||||
return nil, errors.New("could not get complete list of CGROUP_DEVICE programs")
|
||||
}
|
||||
|
||||
var (
|
||||
haveBpfProgReplaceBool bool
|
||||
haveBpfProgReplaceOnce sync.Once
|
||||
)
|
||||
|
||||
// Loosely based on the BPF_F_REPLACE support check in
|
||||
// <https://github.com/cilium/ebpf/blob/v0.6.0/link/syscalls.go>.
|
||||
//
|
||||
// TODO: move this logic to cilium/ebpf
|
||||
func haveBpfProgReplace() bool {
|
||||
haveBpfProgReplaceOnce.Do(func() {
|
||||
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
|
||||
Type: ebpf.CGroupDevice,
|
||||
License: "MIT",
|
||||
Instructions: asm.Instructions{
|
||||
asm.Mov.Imm(asm.R0, 0),
|
||||
asm.Return(),
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
logrus.Debugf("checking for BPF_F_REPLACE support: ebpf.NewProgram failed: %v", err)
|
||||
return
|
||||
}
|
||||
defer prog.Close()
|
||||
|
||||
devnull, err := os.Open("/dev/null")
|
||||
if err != nil {
|
||||
logrus.Debugf("checking for BPF_F_REPLACE support: open dummy target fd: %v", err)
|
||||
return
|
||||
}
|
||||
defer devnull.Close()
|
||||
|
||||
// We know that we have BPF_PROG_ATTACH since we can load
|
||||
// BPF_CGROUP_DEVICE programs. If passing BPF_F_REPLACE gives us EINVAL
|
||||
// we know that the feature isn't present.
|
||||
err = link.RawAttachProgram(link.RawAttachProgramOptions{
|
||||
// We rely on this fd being checked after attachFlags.
|
||||
Target: int(devnull.Fd()),
|
||||
// Attempt to "replace" bad fds with this program.
|
||||
Program: prog,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
Flags: unix.BPF_F_ALLOW_MULTI | unix.BPF_F_REPLACE,
|
||||
})
|
||||
if errors.Is(err, unix.EINVAL) {
|
||||
// not supported
|
||||
return
|
||||
}
|
||||
// attach_flags test succeeded.
|
||||
if !errors.Is(err, unix.EBADF) {
|
||||
logrus.Debugf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err)
|
||||
}
|
||||
haveBpfProgReplaceBool = true
|
||||
})
|
||||
return haveBpfProgReplaceBool
|
||||
}
|
||||
|
||||
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
|
||||
//
|
||||
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
|
||||
//
|
||||
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
|
||||
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd int) (func() error, error) {
|
||||
// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
|
||||
// This limit is not inherited into the container.
|
||||
memlockLimit := &unix.Rlimit{
|
||||
Cur: unix.RLIM_INFINITY,
|
||||
Max: unix.RLIM_INFINITY,
|
||||
}
|
||||
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
|
||||
|
||||
// Get the list of existing programs.
|
||||
oldProgs, err := findAttachedCgroupDeviceFilters(dirFd)
|
||||
if err != nil {
|
||||
return nilCloser, err
|
||||
}
|
||||
useReplaceProg := haveBpfProgReplace() && len(oldProgs) == 1
|
||||
|
||||
// Generate new program.
|
||||
spec := &ebpf.ProgramSpec{
|
||||
Type: ebpf.CGroupDevice,
|
||||
Instructions: insts,
|
||||
License: license,
|
||||
}
|
||||
prog, err := ebpf.NewProgram(spec)
|
||||
if err != nil {
|
||||
return nilCloser, err
|
||||
}
|
||||
|
||||
// If there is only one old program, we can just replace it directly.
|
||||
var (
|
||||
replaceProg *ebpf.Program
|
||||
attachFlags uint32 = unix.BPF_F_ALLOW_MULTI
|
||||
)
|
||||
if useReplaceProg {
|
||||
replaceProg = oldProgs[0]
|
||||
attachFlags |= unix.BPF_F_REPLACE
|
||||
}
|
||||
err = link.RawAttachProgram(link.RawAttachProgramOptions{
|
||||
Target: dirFd,
|
||||
Program: prog,
|
||||
Replace: replaceProg,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
Flags: attachFlags,
|
||||
})
|
||||
if err != nil {
|
||||
return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err)
|
||||
}
|
||||
closer := func() error {
|
||||
err = link.RawDetachProgram(link.RawDetachProgramOptions{
|
||||
Target: dirFd,
|
||||
Program: prog,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err)
|
||||
}
|
||||
// TODO: Should we attach the old filters back in this case? Otherwise
|
||||
// we fail-open on a security feature, which is a bit scary.
|
||||
return nil
|
||||
}
|
||||
if !useReplaceProg {
|
||||
logLevel := logrus.DebugLevel
|
||||
// If there was more than one old program, give a warning (since this
|
||||
// really shouldn't happen with runc-managed cgroups) and then detach
|
||||
// all the old programs.
|
||||
if len(oldProgs) > 1 {
|
||||
// NOTE: Ideally this should be a warning but it turns out that
|
||||
// systemd-managed cgroups trigger this warning (apparently
|
||||
// systemd doesn't delete old non-systemd programs when
|
||||
// setting properties).
|
||||
logrus.Infof("found more than one filter (%d) attached to a cgroup -- removing extra filters!", len(oldProgs))
|
||||
logLevel = logrus.InfoLevel
|
||||
}
|
||||
for idx, oldProg := range oldProgs {
|
||||
// Output some extra debug info.
|
||||
if info, err := oldProg.Info(); err == nil {
|
||||
fields := logrus.Fields{
|
||||
"type": info.Type.String(),
|
||||
"tag": info.Tag,
|
||||
"name": info.Name,
|
||||
}
|
||||
if id, ok := info.ID(); ok {
|
||||
fields["id"] = id
|
||||
}
|
||||
if runCount, ok := info.RunCount(); ok {
|
||||
fields["run_count"] = runCount
|
||||
}
|
||||
if runtime, ok := info.Runtime(); ok {
|
||||
fields["runtime"] = runtime.String()
|
||||
}
|
||||
logrus.WithFields(fields).Logf(logLevel, "removing old filter %d from cgroup", idx)
|
||||
}
|
||||
err = link.RawDetachProgram(link.RawDetachProgramOptions{
|
||||
Target: dirFd,
|
||||
Program: oldProg,
|
||||
Attach: ebpf.AttachCGroupDevice,
|
||||
})
|
||||
if err != nil {
|
||||
return closer, fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE) on old filter program: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return closer, nil
|
||||
}
|
|
@ -0,0 +1,190 @@
|
|||
package cgroups
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// OpenFile opens a cgroup file in a given dir with given flags.
|
||||
// It is supposed to be used for cgroup files only, and returns
|
||||
// an error if the file is not a cgroup file.
|
||||
//
|
||||
// Arguments dir and file are joined together to form an absolute path
|
||||
// to a file being opened.
|
||||
func OpenFile(dir, file string, flags int) (*os.File, error) {
|
||||
if dir == "" {
|
||||
return nil, fmt.Errorf("no directory specified for %s", file)
|
||||
}
|
||||
return openFile(dir, file, flags)
|
||||
}
|
||||
|
||||
// ReadFile reads data from a cgroup file in dir.
|
||||
// It is supposed to be used for cgroup files only.
|
||||
func ReadFile(dir, file string) (string, error) {
|
||||
fd, err := OpenFile(dir, file, unix.O_RDONLY)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer fd.Close()
|
||||
var buf bytes.Buffer
|
||||
|
||||
_, err = buf.ReadFrom(fd)
|
||||
return buf.String(), err
|
||||
}
|
||||
|
||||
// WriteFile writes data to a cgroup file in dir.
|
||||
// It is supposed to be used for cgroup files only.
|
||||
func WriteFile(dir, file, data string) error {
|
||||
fd, err := OpenFile(dir, file, unix.O_WRONLY)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fd.Close()
|
||||
if err := retryingWriteFile(fd, data); err != nil {
|
||||
// Having data in the error message helps in debugging.
|
||||
return fmt.Errorf("failed to write %q: %w", data, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func retryingWriteFile(fd *os.File, data string) error {
|
||||
for {
|
||||
_, err := fd.Write([]byte(data))
|
||||
if errors.Is(err, unix.EINTR) {
|
||||
logrus.Infof("interrupted while writing %s to %s", data, fd.Name())
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
const (
|
||||
cgroupfsDir = "/sys/fs/cgroup"
|
||||
cgroupfsPrefix = cgroupfsDir + "/"
|
||||
)
|
||||
|
||||
var (
|
||||
// TestMode is set to true by unit tests that need "fake" cgroupfs.
|
||||
TestMode bool
|
||||
|
||||
cgroupFd int = -1
|
||||
prepOnce sync.Once
|
||||
prepErr error
|
||||
resolveFlags uint64
|
||||
)
|
||||
|
||||
func prepareOpenat2() error {
|
||||
prepOnce.Do(func() {
|
||||
fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
|
||||
Flags: unix.O_DIRECTORY | unix.O_PATH,
|
||||
})
|
||||
if err != nil {
|
||||
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
|
||||
if err != unix.ENOSYS { //nolint:errorlint // unix errors are bare
|
||||
logrus.Warnf("falling back to securejoin: %s", prepErr)
|
||||
} else {
|
||||
logrus.Debug("openat2 not available, falling back to securejoin")
|
||||
}
|
||||
return
|
||||
}
|
||||
var st unix.Statfs_t
|
||||
if err = unix.Fstatfs(fd, &st); err != nil {
|
||||
prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err}
|
||||
logrus.Warnf("falling back to securejoin: %s", prepErr)
|
||||
return
|
||||
}
|
||||
|
||||
cgroupFd = fd
|
||||
|
||||
resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS
|
||||
if st.Type == unix.CGROUP2_SUPER_MAGIC {
|
||||
// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
|
||||
resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS
|
||||
}
|
||||
})
|
||||
|
||||
return prepErr
|
||||
}
|
||||
|
||||
func openFile(dir, file string, flags int) (*os.File, error) {
|
||||
mode := os.FileMode(0)
|
||||
if TestMode && flags&os.O_WRONLY != 0 {
|
||||
// "emulate" cgroup fs for unit tests
|
||||
flags |= os.O_TRUNC | os.O_CREATE
|
||||
mode = 0o600
|
||||
}
|
||||
path := path.Join(dir, file)
|
||||
if prepareOpenat2() != nil {
|
||||
return openFallback(path, flags, mode)
|
||||
}
|
||||
relPath := strings.TrimPrefix(path, cgroupfsPrefix)
|
||||
if len(relPath) == len(path) { // non-standard path, old system?
|
||||
return openFallback(path, flags, mode)
|
||||
}
|
||||
|
||||
fd, err := unix.Openat2(cgroupFd, relPath,
|
||||
&unix.OpenHow{
|
||||
Resolve: resolveFlags,
|
||||
Flags: uint64(flags) | unix.O_CLOEXEC,
|
||||
Mode: uint64(mode),
|
||||
})
|
||||
if err != nil {
|
||||
err = &os.PathError{Op: "openat2", Path: path, Err: err}
|
||||
// Check if cgroupFd is still opened to cgroupfsDir
|
||||
// (happens when this package is incorrectly used
|
||||
// across the chroot/pivot_root/mntns boundary, or
|
||||
// when /sys/fs/cgroup is remounted).
|
||||
//
|
||||
// TODO: if such usage will ever be common, amend this
|
||||
// to reopen cgroupFd and retry openat2.
|
||||
fdStr := strconv.Itoa(cgroupFd)
|
||||
fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr)
|
||||
if fdDest != cgroupfsDir {
|
||||
// Wrap the error so it is clear that cgroupFd
|
||||
// is opened to an unexpected/wrong directory.
|
||||
err = fmt.Errorf("cgroupFd %s unexpectedly opened to %s != %s: %w",
|
||||
fdStr, fdDest, cgroupfsDir, err)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return os.NewFile(uintptr(fd), path), nil
|
||||
}
|
||||
|
||||
var errNotCgroupfs = errors.New("not a cgroup file")
|
||||
|
||||
// Can be changed by unit tests.
|
||||
var openFallback = openAndCheck
|
||||
|
||||
// openAndCheck is used when openat2(2) is not available. It checks the opened
|
||||
// file is on cgroupfs, returning an error otherwise.
|
||||
func openAndCheck(path string, flags int, mode os.FileMode) (*os.File, error) {
|
||||
fd, err := os.OpenFile(path, flags, mode)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if TestMode {
|
||||
return fd, nil
|
||||
}
|
||||
// Check this is a cgroupfs file.
|
||||
var st unix.Statfs_t
|
||||
if err := unix.Fstatfs(int(fd.Fd()), &st); err != nil {
|
||||
_ = fd.Close()
|
||||
return nil, &os.PathError{Op: "statfs", Path: path, Err: err}
|
||||
}
|
||||
if st.Type != unix.CGROUP_SUPER_MAGIC && st.Type != unix.CGROUP2_SUPER_MAGIC {
|
||||
_ = fd.Close()
|
||||
return nil, &os.PathError{Op: "open", Path: path, Err: errNotCgroupfs}
|
||||
}
|
||||
|
||||
return fd, nil
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
package cgroups
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestWriteCgroupFileHandlesInterrupt(t *testing.T) {
|
||||
const (
|
||||
memoryCgroupMount = "/sys/fs/cgroup/memory"
|
||||
memoryLimit = "memory.limit_in_bytes"
|
||||
)
|
||||
if _, err := os.Stat(memoryCgroupMount); err != nil {
|
||||
// most probably cgroupv2
|
||||
t.Skip(err)
|
||||
}
|
||||
|
||||
cgroupName := fmt.Sprintf("test-eint-%d", time.Now().Nanosecond())
|
||||
cgroupPath := filepath.Join(memoryCgroupMount, cgroupName)
|
||||
if err := os.MkdirAll(cgroupPath, 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer os.RemoveAll(cgroupPath)
|
||||
|
||||
if _, err := os.Stat(filepath.Join(cgroupPath, memoryLimit)); err != nil {
|
||||
// either cgroupv2, or memory controller is not available
|
||||
t.Skip(err)
|
||||
}
|
||||
|
||||
for i := 0; i < 100000; i++ {
|
||||
limit := 1024*1024 + i
|
||||
if err := WriteFile(cgroupPath, memoryLimit, strconv.Itoa(limit)); err != nil {
|
||||
t.Fatalf("Failed to write %d on attempt %d: %+v", limit, i, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestOpenat2(t *testing.T) {
|
||||
if !IsCgroup2UnifiedMode() {
|
||||
// The reason is many test cases below test opening files from
|
||||
// the top-level directory, where cgroup v1 has no files.
|
||||
t.Skip("test requires cgroup v2")
|
||||
}
|
||||
|
||||
// Make sure we test openat2, not its fallback.
|
||||
openFallback = func(_ string, _ int, _ os.FileMode) (*os.File, error) {
|
||||
return nil, errors.New("fallback")
|
||||
}
|
||||
defer func() { openFallback = openAndCheck }()
|
||||
|
||||
for _, tc := range []struct{ dir, file string }{
|
||||
{"/sys/fs/cgroup", "cgroup.controllers"},
|
||||
{"/sys/fs/cgroup", "/cgroup.controllers"},
|
||||
{"/sys/fs/cgroup/", "cgroup.controllers"},
|
||||
{"/sys/fs/cgroup/", "/cgroup.controllers"},
|
||||
{"/sys/fs/cgroup/user.slice", "cgroup.controllers"},
|
||||
{"/sys/fs/cgroup/user.slice/", "/cgroup.controllers"},
|
||||
{"/", "/sys/fs/cgroup/cgroup.controllers"},
|
||||
{"/", "sys/fs/cgroup/cgroup.controllers"},
|
||||
{"/sys/fs/cgroup/cgroup.controllers", ""},
|
||||
} {
|
||||
fd, err := OpenFile(tc.dir, tc.file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
t.Errorf("case %+v: %v", tc, err)
|
||||
}
|
||||
fd.Close()
|
||||
}
|
||||
}
|
|
@ -1,411 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var (
|
||||
subsystemsLegacy = subsystemSet{
|
||||
&CpusetGroup{},
|
||||
&DevicesGroup{},
|
||||
&MemoryGroup{},
|
||||
&CpuGroup{},
|
||||
&CpuacctGroup{},
|
||||
&PidsGroup{},
|
||||
&BlkioGroup{},
|
||||
&HugetlbGroup{},
|
||||
&NetClsGroup{},
|
||||
&NetPrioGroup{},
|
||||
&PerfEventGroup{},
|
||||
&FreezerGroup{},
|
||||
&NameGroup{GroupName: "name=systemd", Join: true},
|
||||
}
|
||||
HugePageSizes, _ = cgroups.GetHugePageSize()
|
||||
)
|
||||
|
||||
var errSubsystemDoesNotExist = fmt.Errorf("cgroup: subsystem does not exist")
|
||||
|
||||
type subsystemSet []subsystem
|
||||
|
||||
func (s subsystemSet) Get(name string) (subsystem, error) {
|
||||
for _, ss := range s {
|
||||
if ss.Name() == name {
|
||||
return ss, nil
|
||||
}
|
||||
}
|
||||
return nil, errSubsystemDoesNotExist
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
// Name returns the name of the subsystem.
|
||||
Name() string
|
||||
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
|
||||
GetStats(path string, stats *cgroups.Stats) error
|
||||
// Removes the cgroup represented by 'cgroupData'.
|
||||
Remove(*cgroupData) error
|
||||
// Creates and joins the cgroup represented by 'cgroupData'.
|
||||
Apply(*cgroupData) error
|
||||
// Set the cgroup represented by cgroup.
|
||||
Set(path string, cgroup *configs.Cgroup) error
|
||||
}
|
||||
|
||||
type Manager struct {
|
||||
mu sync.Mutex
|
||||
Cgroups *configs.Cgroup
|
||||
Rootless bool // ignore permission-related errors
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
// The absolute path to the root of the cgroup hierarchies.
|
||||
var cgroupRootLock sync.Mutex
|
||||
var cgroupRoot string
|
||||
|
||||
// Gets the cgroupRoot.
|
||||
func getCgroupRoot() (string, error) {
|
||||
cgroupRootLock.Lock()
|
||||
defer cgroupRootLock.Unlock()
|
||||
|
||||
if cgroupRoot != "" {
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
root, err := cgroups.FindCgroupMountpointDir()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if _, err := os.Stat(root); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
cgroupRoot = root
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
type cgroupData struct {
|
||||
root string
|
||||
innerPath string
|
||||
config *configs.Cgroup
|
||||
pid int
|
||||
}
|
||||
|
||||
// isIgnorableError returns whether err is a permission error (in the loose
|
||||
// sense of the word). This includes EROFS (which for an unprivileged user is
|
||||
// basically a permission error) and EACCES (for similar reasons) as well as
|
||||
// the normal EPERM.
|
||||
func isIgnorableError(rootless bool, err error) bool {
|
||||
// We do not ignore errors if we are root.
|
||||
if !rootless {
|
||||
return false
|
||||
}
|
||||
// Is it an ordinary EPERM?
|
||||
if os.IsPermission(errors.Cause(err)) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Try to handle other errnos.
|
||||
var errno error
|
||||
switch err := errors.Cause(err).(type) {
|
||||
case *os.PathError:
|
||||
errno = err.Err
|
||||
case *os.LinkError:
|
||||
errno = err.Err
|
||||
case *os.SyscallError:
|
||||
errno = err.Err
|
||||
}
|
||||
return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
|
||||
}
|
||||
|
||||
func (m *Manager) getSubsystems() subsystemSet {
|
||||
return subsystemsLegacy
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) (err error) {
|
||||
if m.Cgroups == nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
var c = m.Cgroups
|
||||
|
||||
d, err := getCgroupData(m.Cgroups, pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.Paths = make(map[string]string)
|
||||
if c.Paths != nil {
|
||||
for name, path := range c.Paths {
|
||||
_, err := d.path(name)
|
||||
if err != nil {
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
m.Paths[name] = path
|
||||
}
|
||||
return cgroups.EnterPid(m.Paths, pid)
|
||||
}
|
||||
|
||||
for _, sys := range m.getSubsystems() {
|
||||
// TODO: Apply should, ideally, be reentrant or be broken up into a separate
|
||||
// create and join phase so that the cgroup hierarchy for a container can be
|
||||
// created then join consists of writing the process pids to cgroup.procs
|
||||
p, err := d.path(sys.Name())
|
||||
if err != nil {
|
||||
// The non-presence of the devices subsystem is
|
||||
// considered fatal for security reasons.
|
||||
if cgroups.IsNotFound(err) && sys.Name() != "devices" {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
m.Paths[sys.Name()] = p
|
||||
|
||||
if err := sys.Apply(d); err != nil {
|
||||
// In the case of rootless (including euid=0 in userns), where an explicit cgroup path hasn't
|
||||
// been set, we don't bail on error in case of permission problems.
|
||||
// Cases where limits have been set (and we couldn't create our own
|
||||
// cgroup) are handled by Set.
|
||||
if isIgnorableError(m.Rootless, err) && m.Cgroups.Path == "" {
|
||||
delete(m.Paths, sys.Name())
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
if m.Cgroups == nil || m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if err := cgroups.RemovePaths(m.Paths); err != nil {
|
||||
return err
|
||||
}
|
||||
m.Paths = make(map[string]string)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
paths := m.Paths
|
||||
m.mu.Unlock()
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *Manager) GetUnifiedPath() (string, error) {
|
||||
return "", errors.New("unified path is only supported when running in unified mode")
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := cgroups.NewStats()
|
||||
for name, path := range m.Paths {
|
||||
sys, err := m.getSubsystems().Get(name)
|
||||
if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
|
||||
continue
|
||||
}
|
||||
if err := sys.GetStats(path, stats); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
if container.Cgroups == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If Paths are set, then we are just joining cgroups paths
|
||||
// and there is no need to set any values.
|
||||
if m.Cgroups != nil && m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
paths := m.GetPaths()
|
||||
for _, sys := range m.getSubsystems() {
|
||||
path := paths[sys.Name()]
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
if m.Rootless && sys.Name() == "devices" {
|
||||
continue
|
||||
}
|
||||
// When m.Rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
|
||||
// However, errors from other subsystems are not ignored.
|
||||
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if path == "" {
|
||||
// We never created a path for this cgroup, so we cannot set
|
||||
// limits for it (though we have already tried at this point).
|
||||
return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if m.Paths["cpu"] != "" {
|
||||
if err := CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Freeze toggles the container's freezer cgroup depending on the state
|
||||
// provided
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
if m.Cgroups == nil {
|
||||
return errors.New("cannot toggle freezer: cgroups not configured for container")
|
||||
}
|
||||
|
||||
paths := m.GetPaths()
|
||||
dir := paths["freezer"]
|
||||
prevState := m.Cgroups.Resources.Freezer
|
||||
m.Cgroups.Resources.Freezer = state
|
||||
freezer, err := m.getSubsystems().Get("freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = freezer.Set(dir, m.Cgroups)
|
||||
if err != nil {
|
||||
m.Cgroups.Resources.Freezer = prevState
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
paths := m.GetPaths()
|
||||
return cgroups.GetPids(paths["devices"])
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
paths := m.GetPaths()
|
||||
return cgroups.GetAllPids(paths["devices"])
|
||||
}
|
||||
|
||||
func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return nil, fmt.Errorf("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
|
||||
// XXX: Do not remove this code. Path safety is important! -- cyphar
|
||||
cgPath := libcontainerUtils.CleanPath(c.Path)
|
||||
cgParent := libcontainerUtils.CleanPath(c.Parent)
|
||||
cgName := libcontainerUtils.CleanPath(c.Name)
|
||||
|
||||
innerPath := cgPath
|
||||
if innerPath == "" {
|
||||
innerPath = filepath.Join(cgParent, cgName)
|
||||
}
|
||||
|
||||
return &cgroupData{
|
||||
root: root,
|
||||
innerPath: innerPath,
|
||||
config: c,
|
||||
pid: pid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (raw *cgroupData) path(subsystem string) (string, error) {
|
||||
mnt, err := cgroups.FindCgroupMountpoint(raw.root, subsystem)
|
||||
// If we didn't mount the subsystem, there is no point we make the path.
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
|
||||
if filepath.IsAbs(raw.innerPath) {
|
||||
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
|
||||
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
|
||||
}
|
||||
|
||||
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
|
||||
// process could in container and shared pid namespace with host, and
|
||||
// /proc/1/cgroup could point to whole other world of cgroups.
|
||||
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(parentPath, raw.innerPath), nil
|
||||
}
|
||||
|
||||
func (raw *cgroupData) join(subsystem string) (string, error) {
|
||||
path, err := raw.path(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := cgroups.WriteCgroupProc(path, raw.pid); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func removePath(p string, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if p != "" {
|
||||
return os.RemoveAll(p)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func CheckCpushares(path string, c uint64) error {
|
||||
var cpuShares uint64
|
||||
|
||||
if c == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
fd, err := os.Open(filepath.Join(path, "cpu.shares"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
_, err = fmt.Fscanf(fd, "%d", &cpuShares)
|
||||
if err != nil && err != io.EOF {
|
||||
return err
|
||||
}
|
||||
|
||||
if c > cpuShares {
|
||||
return fmt.Errorf("The maximum allowed cpu-shares is %d", cpuShares)
|
||||
} else if c < cpuShares {
|
||||
return fmt.Errorf("The minimum allowed cpu-shares is %d", cpuShares)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.Cgroups, nil
|
||||
}
|
|
@ -1,297 +0,0 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func TestInvalidCgroupPath(t *testing.T) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("cgroup v1 is not supported")
|
||||
}
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup root: %v", err)
|
||||
}
|
||||
|
||||
config := &configs.Cgroup{
|
||||
Path: "../../../../../../../../../../some/path",
|
||||
}
|
||||
|
||||
data, err := getCgroupData(config, 0)
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup data: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
||||
if strings.HasPrefix(data.innerPath, "..") {
|
||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
// Double-check, using an actual cgroup.
|
||||
deviceRoot := filepath.Join(root, "devices")
|
||||
devicePath, err := data.path("devices")
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup path: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInvalidAbsoluteCgroupPath(t *testing.T) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("cgroup v1 is not supported")
|
||||
}
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup root: %v", err)
|
||||
}
|
||||
|
||||
config := &configs.Cgroup{
|
||||
Path: "/../../../../../../../../../../some/path",
|
||||
}
|
||||
|
||||
data, err := getCgroupData(config, 0)
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup data: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
||||
if strings.HasPrefix(data.innerPath, "..") {
|
||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
// Double-check, using an actual cgroup.
|
||||
deviceRoot := filepath.Join(root, "devices")
|
||||
devicePath, err := data.path("devices")
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup path: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
||||
}
|
||||
}
|
||||
|
||||
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
|
||||
func TestInvalidCgroupParent(t *testing.T) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("cgroup v1 is not supported")
|
||||
}
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup root: %v", err)
|
||||
}
|
||||
|
||||
config := &configs.Cgroup{
|
||||
Parent: "../../../../../../../../../../some/path",
|
||||
Name: "name",
|
||||
}
|
||||
|
||||
data, err := getCgroupData(config, 0)
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup data: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
||||
if strings.HasPrefix(data.innerPath, "..") {
|
||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
// Double-check, using an actual cgroup.
|
||||
deviceRoot := filepath.Join(root, "devices")
|
||||
devicePath, err := data.path("devices")
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup path: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
||||
}
|
||||
}
|
||||
|
||||
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
|
||||
func TestInvalidAbsoluteCgroupParent(t *testing.T) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("cgroup v1 is not supported")
|
||||
}
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup root: %v", err)
|
||||
}
|
||||
|
||||
config := &configs.Cgroup{
|
||||
Parent: "/../../../../../../../../../../some/path",
|
||||
Name: "name",
|
||||
}
|
||||
|
||||
data, err := getCgroupData(config, 0)
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup data: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
||||
if strings.HasPrefix(data.innerPath, "..") {
|
||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
// Double-check, using an actual cgroup.
|
||||
deviceRoot := filepath.Join(root, "devices")
|
||||
devicePath, err := data.path("devices")
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup path: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
||||
}
|
||||
}
|
||||
|
||||
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
|
||||
func TestInvalidCgroupName(t *testing.T) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("cgroup v1 is not supported")
|
||||
}
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup root: %v", err)
|
||||
}
|
||||
|
||||
config := &configs.Cgroup{
|
||||
Parent: "parent",
|
||||
Name: "../../../../../../../../../../some/path",
|
||||
}
|
||||
|
||||
data, err := getCgroupData(config, 0)
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup data: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
||||
if strings.HasPrefix(data.innerPath, "..") {
|
||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
// Double-check, using an actual cgroup.
|
||||
deviceRoot := filepath.Join(root, "devices")
|
||||
devicePath, err := data.path("devices")
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup path: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
|
||||
func TestInvalidAbsoluteCgroupName(t *testing.T) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("cgroup v1 is not supported")
|
||||
}
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup root: %v", err)
|
||||
}
|
||||
|
||||
config := &configs.Cgroup{
|
||||
Parent: "parent",
|
||||
Name: "/../../../../../../../../../../some/path",
|
||||
}
|
||||
|
||||
data, err := getCgroupData(config, 0)
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup data: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
||||
if strings.HasPrefix(data.innerPath, "..") {
|
||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
// Double-check, using an actual cgroup.
|
||||
deviceRoot := filepath.Join(root, "devices")
|
||||
devicePath, err := data.path("devices")
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup path: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
||||
}
|
||||
}
|
||||
|
||||
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
|
||||
func TestInvalidCgroupNameAndParent(t *testing.T) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("cgroup v1 is not supported")
|
||||
}
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup root: %v", err)
|
||||
}
|
||||
|
||||
config := &configs.Cgroup{
|
||||
Parent: "../../../../../../../../../../some/path",
|
||||
Name: "../../../../../../../../../../some/path",
|
||||
}
|
||||
|
||||
data, err := getCgroupData(config, 0)
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup data: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
||||
if strings.HasPrefix(data.innerPath, "..") {
|
||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
// Double-check, using an actual cgroup.
|
||||
deviceRoot := filepath.Join(root, "devices")
|
||||
devicePath, err := data.path("devices")
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup path: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
||||
}
|
||||
}
|
||||
|
||||
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
|
||||
func TestInvalidAbsoluteCgroupNameAndParent(t *testing.T) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("cgroup v1 is not supported")
|
||||
}
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup root: %v", err)
|
||||
}
|
||||
|
||||
config := &configs.Cgroup{
|
||||
Parent: "/../../../../../../../../../../some/path",
|
||||
Name: "/../../../../../../../../../../some/path",
|
||||
}
|
||||
|
||||
data, err := getCgroupData(config, 0)
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup data: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
||||
if strings.HasPrefix(data.innerPath, "..") {
|
||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
// Double-check, using an actual cgroup.
|
||||
deviceRoot := filepath.Join(root, "devices")
|
||||
devicePath, err := data.path("devices")
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup path: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
||||
}
|
||||
}
|
|
@ -1,72 +1,71 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type BlkioGroup struct {
|
||||
weightFilename string
|
||||
weightDeviceFilename string
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Name() string {
|
||||
return "blkio"
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("blkio")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
func (s *BlkioGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.BlkioWeight != 0 {
|
||||
if err := fscommon.WriteFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil {
|
||||
func (s *BlkioGroup) Set(path string, r *configs.Resources) error {
|
||||
s.detectWeightFilenames(path)
|
||||
if r.BlkioWeight != 0 {
|
||||
if err := cgroups.WriteFile(path, s.weightFilename, strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.BlkioLeafWeight != 0 {
|
||||
if err := fscommon.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioLeafWeight), 10)); err != nil {
|
||||
if r.BlkioLeafWeight != 0 {
|
||||
if err := cgroups.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(r.BlkioLeafWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, wd := range cgroup.Resources.BlkioWeightDevice {
|
||||
if err := fscommon.WriteFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
|
||||
return err
|
||||
for _, wd := range r.BlkioWeightDevice {
|
||||
if wd.Weight != 0 {
|
||||
if err := cgroups.WriteFile(path, s.weightDeviceFilename, wd.WeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := fscommon.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
||||
if wd.LeafWeight != 0 {
|
||||
if err := cgroups.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, td := range r.BlkioThrottleReadBpsDevice {
|
||||
if err := cgroups.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
||||
for _, td := range r.BlkioThrottleWriteBpsDevice {
|
||||
if err := cgroups.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
||||
for _, td := range r.BlkioThrottleReadIOPSDevice {
|
||||
if err := cgroups.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
||||
for _, td := range r.BlkioThrottleWriteIOPSDevice {
|
||||
if err := cgroups.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -74,10 +73,6 @@ func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("blkio"))
|
||||
}
|
||||
|
||||
/*
|
||||
examples:
|
||||
|
||||
|
@ -113,9 +108,9 @@ func splitBlkioStatLine(r rune) bool {
|
|||
return r == ' ' || r == ':'
|
||||
}
|
||||
|
||||
func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) {
|
||||
func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) {
|
||||
var blkioStats []cgroups.BlkioStatEntry
|
||||
f, err := os.Open(path)
|
||||
f, err := cgroups.OpenFile(dir, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return blkioStats, nil
|
||||
|
@ -133,19 +128,19 @@ func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) {
|
|||
// skip total line
|
||||
continue
|
||||
} else {
|
||||
return nil, fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text())
|
||||
return nil, malformedLine(dir, file, sc.Text())
|
||||
}
|
||||
}
|
||||
|
||||
v, err := strconv.ParseUint(fields[0], 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
major := v
|
||||
|
||||
v, err = strconv.ParseUint(fields[1], 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
minor := v
|
||||
|
||||
|
@ -157,82 +152,160 @@ func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) {
|
|||
}
|
||||
v, err = strconv.ParseUint(fields[valueField], 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v})
|
||||
}
|
||||
if err := sc.Err(); err != nil {
|
||||
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||
}
|
||||
|
||||
return blkioStats, nil
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
// Try to read CFQ stats available on all CFQ enabled kernels first
|
||||
if blkioStats, err := getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err == nil && blkioStats != nil {
|
||||
return getCFQStats(path, stats)
|
||||
type blkioStatInfo struct {
|
||||
filename string
|
||||
blkioStatEntriesPtr *[]cgroups.BlkioStatEntry
|
||||
}
|
||||
bfqDebugStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.bfq.sectors_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_service_time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_wait_time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_merged_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_queued_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_serviced_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_service_bytes_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
bfqStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.bfq.io_serviced_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.bfq.io_service_bytes_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
cfqStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.sectors_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_service_time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_wait_time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_merged_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_queued_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.time_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_serviced_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.io_service_bytes_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
throttleRecursiveStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.throttle.io_serviced_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.throttle.io_service_bytes_recursive",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
baseStats := []blkioStatInfo{
|
||||
{
|
||||
filename: "blkio.throttle.io_serviced",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||
},
|
||||
{
|
||||
filename: "blkio.throttle.io_service_bytes",
|
||||
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||
},
|
||||
}
|
||||
orderedStats := [][]blkioStatInfo{
|
||||
bfqDebugStats,
|
||||
bfqStats,
|
||||
cfqStats,
|
||||
throttleRecursiveStats,
|
||||
baseStats,
|
||||
}
|
||||
return getStats(path, stats) // Use generic stats as fallback
|
||||
}
|
||||
|
||||
func getCFQStats(path string, stats *cgroups.Stats) error {
|
||||
var blkioStats []cgroups.BlkioStatEntry
|
||||
var err error
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.sectors_recursive")); err != nil {
|
||||
return err
|
||||
for _, statGroup := range orderedStats {
|
||||
for i, statInfo := range statGroup {
|
||||
if blkioStats, err = getBlkioStat(path, statInfo.filename); err != nil || blkioStats == nil {
|
||||
// if error occurs on first file, move to next group
|
||||
if i == 0 {
|
||||
break
|
||||
}
|
||||
return err
|
||||
}
|
||||
*statInfo.blkioStatEntriesPtr = blkioStats
|
||||
// finish if all stats are gathered
|
||||
if i == len(statGroup)-1 {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
stats.BlkioStats.SectorsRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_bytes_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoServiceBytesRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoServicedRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_queued_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoQueuedRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_time_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoServiceTimeRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_wait_time_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoWaitTimeRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_merged_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoMergedRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.time_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoTimeRecursive = blkioStats
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getStats(path string, stats *cgroups.Stats) error {
|
||||
var blkioStats []cgroups.BlkioStatEntry
|
||||
var err error
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_service_bytes")); err != nil {
|
||||
return err
|
||||
func (s *BlkioGroup) detectWeightFilenames(path string) {
|
||||
if s.weightFilename != "" {
|
||||
// Already detected.
|
||||
return
|
||||
}
|
||||
stats.BlkioStats.IoServiceBytesRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_serviced")); err != nil {
|
||||
return err
|
||||
if cgroups.PathExists(filepath.Join(path, "blkio.weight")) {
|
||||
s.weightFilename = "blkio.weight"
|
||||
s.weightDeviceFilename = "blkio.weight_device"
|
||||
} else {
|
||||
s.weightFilename = "blkio.bfq.weight"
|
||||
s.weightDeviceFilename = "blkio.bfq.weight_device"
|
||||
}
|
||||
stats.BlkioStats.IoServicedRecursive = blkioStats
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,94 +1,105 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type CpuGroup struct {
|
||||
}
|
||||
type CpuGroup struct{}
|
||||
|
||||
func (s *CpuGroup) Name() string {
|
||||
return "cpu"
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Apply(d *cgroupData) error {
|
||||
// We always want to join the cpu group, to allow fair cpu scheduling
|
||||
// on a container basis
|
||||
path, err := d.path("cpu")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return s.ApplyDir(path, d.config, d.pid)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error {
|
||||
// This might happen if we have no cpu cgroup mounted.
|
||||
// Just do nothing and don't fail.
|
||||
if path == "" {
|
||||
return nil
|
||||
}
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
func (s *CpuGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
// We should set the real-Time group scheduling settings before moving
|
||||
// in the process because if the process is already in SCHED_RR mode
|
||||
// and no RT bandwidth is set, adding it will fail.
|
||||
if err := s.SetRtSched(path, cgroup); err != nil {
|
||||
if err := s.SetRtSched(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
// because we are not using d.join we need to place the pid into the procs file
|
||||
// unlike the other subsystems
|
||||
// Since we are not using apply(), we need to place the pid
|
||||
// into the procs file.
|
||||
return cgroups.WriteCgroupProc(path, pid)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuRtPeriod != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
|
||||
func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error {
|
||||
if r.CpuRtPeriod != 0 {
|
||||
if err := cgroups.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(r.CpuRtPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuRtRuntime != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
|
||||
if r.CpuRtRuntime != 0 {
|
||||
if err := cgroups.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuShares != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil {
|
||||
func (s *CpuGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.CpuShares != 0 {
|
||||
shares := r.CpuShares
|
||||
if err := cgroups.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuPeriod != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
|
||||
// read it back
|
||||
sharesRead, err := fscommon.GetCgroupParamUint(path, "cpu.shares")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuQuota != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.Resources.CpuQuota, 10)); err != nil {
|
||||
return err
|
||||
// ... and check
|
||||
if shares > sharesRead {
|
||||
return fmt.Errorf("the maximum allowed cpu-shares is %d", sharesRead)
|
||||
} else if shares < sharesRead {
|
||||
return fmt.Errorf("the minimum allowed cpu-shares is %d", sharesRead)
|
||||
}
|
||||
}
|
||||
return s.SetRtSched(path, cgroup)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("cpu"))
|
||||
var period string
|
||||
if r.CpuPeriod != 0 {
|
||||
period = strconv.FormatUint(r.CpuPeriod, 10)
|
||||
if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil {
|
||||
// Sometimes when the period to be set is smaller
|
||||
// than the current one, it is rejected by the kernel
|
||||
// (EINVAL) as old_quota/new_period exceeds the parent
|
||||
// cgroup quota limit. If this happens and the quota is
|
||||
// going to be set, ignore the error for now and retry
|
||||
// after setting the quota.
|
||||
if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
period = ""
|
||||
}
|
||||
}
|
||||
if r.CpuQuota != 0 {
|
||||
if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if period != "" {
|
||||
if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return s.SetRtSched(path, r)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
f, err := os.Open(filepath.Join(path, "cpu.stat"))
|
||||
const file = "cpu.stat"
|
||||
f, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
|
@ -99,9 +110,9 @@ func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return err
|
||||
return &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
switch t {
|
||||
case "nr_periods":
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
|
@ -9,40 +7,40 @@ import (
|
|||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func TestCpuSetShares(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpu", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "cpu")
|
||||
|
||||
const (
|
||||
sharesBefore = 1024
|
||||
sharesAfter = 512
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"cpu.shares": strconv.Itoa(sharesBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.CpuShares = sharesAfter
|
||||
r := &configs.Resources{
|
||||
CpuShares: sharesAfter,
|
||||
}
|
||||
cpu := &CpuGroup{}
|
||||
if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
if err := cpu.Set(path, r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.shares")
|
||||
value, err := fscommon.GetCgroupParamUint(path, "cpu.shares")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.shares - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if value != sharesAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.shares failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCpuSetBandWidth(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpu", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "cpu")
|
||||
|
||||
const (
|
||||
quotaBefore = 8000
|
||||
|
@ -55,47 +53,51 @@ func TestCpuSetBandWidth(t *testing.T) {
|
|||
rtPeriodAfter = 7000
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"cpu.cfs_quota_us": strconv.Itoa(quotaBefore),
|
||||
"cpu.cfs_period_us": strconv.Itoa(periodBefore),
|
||||
"cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore),
|
||||
"cpu.rt_period_us": strconv.Itoa(rtPeriodBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.CpuQuota = quotaAfter
|
||||
helper.CgroupData.config.Resources.CpuPeriod = periodAfter
|
||||
helper.CgroupData.config.Resources.CpuRtRuntime = rtRuntimeAfter
|
||||
helper.CgroupData.config.Resources.CpuRtPeriod = rtPeriodAfter
|
||||
r := &configs.Resources{
|
||||
CpuQuota: quotaAfter,
|
||||
CpuPeriod: periodAfter,
|
||||
CpuRtRuntime: rtRuntimeAfter,
|
||||
CpuRtPeriod: rtPeriodAfter,
|
||||
}
|
||||
cpu := &CpuGroup{}
|
||||
if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
if err := cpu.Set(path, r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
quota, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.cfs_quota_us")
|
||||
quota, err := fscommon.GetCgroupParamUint(path, "cpu.cfs_quota_us")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.cfs_quota_us - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if quota != quotaAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.cfs_quota_us failed.")
|
||||
}
|
||||
|
||||
period, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.cfs_period_us")
|
||||
period, err := fscommon.GetCgroupParamUint(path, "cpu.cfs_period_us")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.cfs_period_us - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if period != periodAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.cfs_period_us failed.")
|
||||
}
|
||||
rtRuntime, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us")
|
||||
|
||||
rtRuntime, err := fscommon.GetCgroupParamUint(path, "cpu.rt_runtime_us")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if rtRuntime != rtRuntimeAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.")
|
||||
}
|
||||
rtPeriod, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us")
|
||||
|
||||
rtPeriod, err := fscommon.GetCgroupParamUint(path, "cpu.rt_period_us")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.rt_period_us - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if rtPeriod != rtPeriodAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.rt_period_us failed.")
|
||||
|
@ -103,8 +105,7 @@ func TestCpuSetBandWidth(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestCpuStats(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpu", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "cpu")
|
||||
|
||||
const (
|
||||
nrPeriods = 2000
|
||||
|
@ -112,15 +113,15 @@ func TestCpuStats(t *testing.T) {
|
|||
throttledTime = uint64(18446744073709551615)
|
||||
)
|
||||
|
||||
cpuStatContent := fmt.Sprintf("nr_periods %d\n nr_throttled %d\n throttled_time %d\n",
|
||||
cpuStatContent := fmt.Sprintf("nr_periods %d\nnr_throttled %d\nthrottled_time %d\n",
|
||||
nrPeriods, nrThrottled, throttledTime)
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"cpu.stat": cpuStatContent,
|
||||
})
|
||||
|
||||
cpu := &CpuGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := cpu.GetStats(helper.CgroupPath, &actualStats)
|
||||
err := cpu.GetStats(path, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -128,44 +129,43 @@ func TestCpuStats(t *testing.T) {
|
|||
expectedStats := cgroups.ThrottlingData{
|
||||
Periods: nrPeriods,
|
||||
ThrottledPeriods: nrThrottled,
|
||||
ThrottledTime: throttledTime}
|
||||
ThrottledTime: throttledTime,
|
||||
}
|
||||
|
||||
expectThrottlingDataEquals(t, expectedStats, actualStats.CpuStats.ThrottlingData)
|
||||
}
|
||||
|
||||
func TestNoCpuStatFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpu", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "cpu")
|
||||
|
||||
cpu := &CpuGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := cpu.GetStats(helper.CgroupPath, &actualStats)
|
||||
err := cpu.GetStats(path, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal("Expected not to fail, but did")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInvalidCpuStat(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpu", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "cpu")
|
||||
|
||||
cpuStatContent := `nr_periods 2000
|
||||
nr_throttled 200
|
||||
throttled_time fortytwo`
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"cpu.stat": cpuStatContent,
|
||||
})
|
||||
|
||||
cpu := &CpuGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := cpu.GetStats(helper.CgroupPath, &actualStats)
|
||||
err := cpu.GetStats(path, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failed stat parsing.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCpuSetRtSchedAtApply(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpu", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "cpu")
|
||||
|
||||
const (
|
||||
rtRuntimeBefore = 0
|
||||
|
@ -174,35 +174,40 @@ func TestCpuSetRtSchedAtApply(t *testing.T) {
|
|||
rtPeriodAfter = 7000
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore),
|
||||
"cpu.rt_period_us": strconv.Itoa(rtPeriodBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.CpuRtRuntime = rtRuntimeAfter
|
||||
helper.CgroupData.config.Resources.CpuRtPeriod = rtPeriodAfter
|
||||
r := &configs.Resources{
|
||||
CpuRtRuntime: rtRuntimeAfter,
|
||||
CpuRtPeriod: rtPeriodAfter,
|
||||
}
|
||||
cpu := &CpuGroup{}
|
||||
if err := cpu.ApplyDir(helper.CgroupPath, helper.CgroupData.config, 1234); err != nil {
|
||||
|
||||
if err := cpu.Apply(path, r, 1234); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
rtRuntime, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us")
|
||||
rtRuntime, err := fscommon.GetCgroupParamUint(path, "cpu.rt_runtime_us")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if rtRuntime != rtRuntimeAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.")
|
||||
}
|
||||
rtPeriod, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us")
|
||||
|
||||
rtPeriod, err := fscommon.GetCgroupParamUint(path, "cpu.rt_period_us")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.rt_period_us - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if rtPeriod != rtPeriodAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.rt_period_us failed.")
|
||||
}
|
||||
pid, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cgroup.procs")
|
||||
|
||||
pid, err := fscommon.GetCgroupParamUint(path, "cgroup.procs")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cgroup.procs - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if pid != 1234 {
|
||||
t.Fatal("Got the wrong value, set cgroup.procs failed.")
|
||||
|
|
|
@ -1,52 +1,51 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"bufio"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupCpuacctStat = "cpuacct.stat"
|
||||
cgroupCpuacctStat = "cpuacct.stat"
|
||||
cgroupCpuacctUsageAll = "cpuacct.usage_all"
|
||||
|
||||
nanosecondsInSecond = 1000000000
|
||||
|
||||
userModeColumn = 1
|
||||
kernelModeColumn = 2
|
||||
cuacctUsageAllColumnsNumber = 3
|
||||
|
||||
// The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
|
||||
// on Linux it's a constant which is safe to be hard coded,
|
||||
// so we can avoid using cgo here. For details, see:
|
||||
// https://github.com/containerd/cgroups/pull/12
|
||||
clockTicks uint64 = 100
|
||||
)
|
||||
|
||||
var clockTicks = uint64(system.GetClockTicks())
|
||||
|
||||
type CpuacctGroup struct {
|
||||
}
|
||||
type CpuacctGroup struct{}
|
||||
|
||||
func (s *CpuacctGroup) Name() string {
|
||||
return "cpuacct"
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Apply(d *cgroupData) error {
|
||||
// we just want to join this group even though we don't set anything
|
||||
if _, err := d.join("cpuacct"); err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
func (s *CpuacctGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
func (s *CpuacctGroup) Set(_ string, _ *configs.Resources) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("cpuacct"))
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
if !cgroups.PathExists(path) {
|
||||
return nil
|
||||
}
|
||||
userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path)
|
||||
if err != nil {
|
||||
return err
|
||||
|
@ -62,8 +61,15 @@ func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|||
return err
|
||||
}
|
||||
|
||||
percpuUsageInKernelmode, percpuUsageInUsermode, err := getPercpuUsageInModes(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CpuStats.CpuUsage.TotalUsage = totalUsage
|
||||
stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage
|
||||
stats.CpuStats.CpuUsage.PercpuUsageInKernelmode = percpuUsageInKernelmode
|
||||
stats.CpuStats.CpuUsage.PercpuUsageInUsermode = percpuUsageInUsermode
|
||||
stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage
|
||||
stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage
|
||||
return nil
|
||||
|
@ -71,52 +77,90 @@ func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|||
|
||||
// Returns user and kernel usage breakdown in nanoseconds.
|
||||
func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
|
||||
userModeUsage := uint64(0)
|
||||
kernelModeUsage := uint64(0)
|
||||
var userModeUsage, kernelModeUsage uint64
|
||||
const (
|
||||
userField = "user"
|
||||
systemField = "system"
|
||||
file = cgroupCpuacctStat
|
||||
)
|
||||
|
||||
// Expected format:
|
||||
// user <usage in ticks>
|
||||
// system <usage in ticks>
|
||||
data, err := ioutil.ReadFile(filepath.Join(path, cgroupCpuacctStat))
|
||||
data, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
fields := strings.Fields(string(data))
|
||||
if len(fields) < 4 {
|
||||
return 0, 0, fmt.Errorf("failure - %s is expected to have at least 4 fields", filepath.Join(path, cgroupCpuacctStat))
|
||||
}
|
||||
if fields[0] != userField {
|
||||
return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField)
|
||||
}
|
||||
if fields[2] != systemField {
|
||||
return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[2], cgroupCpuacctStat, systemField)
|
||||
// TODO: use strings.SplitN instead.
|
||||
fields := strings.Fields(data)
|
||||
if len(fields) < 4 || fields[0] != userField || fields[2] != systemField {
|
||||
return 0, 0, malformedLine(path, file, data)
|
||||
}
|
||||
if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil {
|
||||
return 0, 0, err
|
||||
return 0, 0, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil {
|
||||
return 0, 0, err
|
||||
return 0, 0, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil
|
||||
}
|
||||
|
||||
func getPercpuUsage(path string) ([]uint64, error) {
|
||||
const file = "cpuacct.usage_percpu"
|
||||
percpuUsage := []uint64{}
|
||||
data, err := ioutil.ReadFile(filepath.Join(path, "cpuacct.usage_percpu"))
|
||||
data, err := cgroups.ReadFile(path, file)
|
||||
if err != nil {
|
||||
return percpuUsage, err
|
||||
}
|
||||
for _, value := range strings.Fields(string(data)) {
|
||||
// TODO: use strings.SplitN instead.
|
||||
for _, value := range strings.Fields(data) {
|
||||
value, err := strconv.ParseUint(value, 10, 64)
|
||||
if err != nil {
|
||||
return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err)
|
||||
return percpuUsage, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
percpuUsage = append(percpuUsage, value)
|
||||
}
|
||||
return percpuUsage, nil
|
||||
}
|
||||
|
||||
func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
|
||||
usageKernelMode := []uint64{}
|
||||
usageUserMode := []uint64{}
|
||||
const file = cgroupCpuacctUsageAll
|
||||
|
||||
fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if os.IsNotExist(err) {
|
||||
return usageKernelMode, usageUserMode, nil
|
||||
} else if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
scanner := bufio.NewScanner(fd)
|
||||
scanner.Scan() // skipping header line
|
||||
|
||||
for scanner.Scan() {
|
||||
lineFields := strings.SplitN(scanner.Text(), " ", cuacctUsageAllColumnsNumber+1)
|
||||
if len(lineFields) != cuacctUsageAllColumnsNumber {
|
||||
continue
|
||||
}
|
||||
|
||||
usageInKernelMode, err := strconv.ParseUint(lineFields[kernelModeColumn], 10, 64)
|
||||
if err != nil {
|
||||
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
usageKernelMode = append(usageKernelMode, usageInKernelMode)
|
||||
|
||||
usageInUserMode, err := strconv.ParseUint(lineFields[userModeColumn], 10, 64)
|
||||
if err != nil {
|
||||
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
usageUserMode = append(usageUserMode, usageInUserMode)
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
return usageKernelMode, usageUserMode, nil
|
||||
}
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
package fs
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
)
|
||||
|
||||
const (
|
||||
cpuAcctUsageContents = "12262454190222160"
|
||||
cpuAcctUsagePerCPUContents = "1564936537989058 1583937096487821 1604195415465681 1596445226820187 1481069084155629 1478735613864327 1477610593414743 1476362015778086"
|
||||
cpuAcctStatContents = "user 452278264\nsystem 291429664"
|
||||
cpuAcctUsageAll = `cpu user system
|
||||
0 962250696038415 637727786389114
|
||||
1 981956408513304 638197595421064
|
||||
2 1002658817529022 638956774598358
|
||||
3 994937703492523 637985531181620
|
||||
4 874843781648690 638837766495476
|
||||
5 872544369885276 638763309884944
|
||||
6 870104915696359 640081778921247
|
||||
7 870202363887496 638716766259495
|
||||
`
|
||||
)
|
||||
|
||||
func TestCpuacctStats(t *testing.T) {
|
||||
path := tempDir(t, "cpuacct")
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"cpuacct.usage": cpuAcctUsageContents,
|
||||
"cpuacct.usage_percpu": cpuAcctUsagePerCPUContents,
|
||||
"cpuacct.stat": cpuAcctStatContents,
|
||||
"cpuacct.usage_all": cpuAcctUsageAll,
|
||||
})
|
||||
|
||||
cpuacct := &CpuacctGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := cpuacct.GetStats(path, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
expectedStats := cgroups.CpuUsage{
|
||||
TotalUsage: uint64(12262454190222160),
|
||||
PercpuUsage: []uint64{
|
||||
1564936537989058, 1583937096487821, 1604195415465681, 1596445226820187,
|
||||
1481069084155629, 1478735613864327, 1477610593414743, 1476362015778086,
|
||||
},
|
||||
PercpuUsageInKernelmode: []uint64{
|
||||
637727786389114, 638197595421064, 638956774598358, 637985531181620,
|
||||
638837766495476, 638763309884944, 640081778921247, 638716766259495,
|
||||
},
|
||||
PercpuUsageInUsermode: []uint64{
|
||||
962250696038415, 981956408513304, 1002658817529022, 994937703492523,
|
||||
874843781648690, 872544369885276, 870104915696359, 870202363887496,
|
||||
},
|
||||
UsageInKernelmode: (uint64(291429664) * nanosecondsInSecond) / clockTicks,
|
||||
UsageInUsermode: (uint64(452278264) * nanosecondsInSecond) / clockTicks,
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(expectedStats, actualStats.CpuStats.CpuUsage) {
|
||||
t.Errorf("Expected CPU usage %#v but found %#v\n",
|
||||
expectedStats, actualStats.CpuStats.CpuUsage)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCpuacctStatsWithoutUsageAll(t *testing.T) {
|
||||
path := tempDir(t, "cpuacct")
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"cpuacct.usage": cpuAcctUsageContents,
|
||||
"cpuacct.usage_percpu": cpuAcctUsagePerCPUContents,
|
||||
"cpuacct.stat": cpuAcctStatContents,
|
||||
})
|
||||
|
||||
cpuacct := &CpuacctGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := cpuacct.GetStats(path, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
expectedStats := cgroups.CpuUsage{
|
||||
TotalUsage: uint64(12262454190222160),
|
||||
PercpuUsage: []uint64{
|
||||
1564936537989058, 1583937096487821, 1604195415465681, 1596445226820187,
|
||||
1481069084155629, 1478735613864327, 1477610593414743, 1476362015778086,
|
||||
},
|
||||
PercpuUsageInKernelmode: []uint64{},
|
||||
PercpuUsageInUsermode: []uint64{},
|
||||
UsageInKernelmode: (uint64(291429664) * nanosecondsInSecond) / clockTicks,
|
||||
UsageInUsermode: (uint64(452278264) * nanosecondsInSecond) / clockTicks,
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(expectedStats, actualStats.CpuStats.CpuUsage) {
|
||||
t.Errorf("Expected CPU usage %#v but found %#v\n",
|
||||
expectedStats, actualStats.CpuStats.CpuUsage)
|
||||
}
|
||||
}
|
|
@ -1,75 +1,159 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
type CpusetGroup struct {
|
||||
}
|
||||
type CpusetGroup struct{}
|
||||
|
||||
func (s *CpusetGroup) Name() string {
|
||||
return "cpuset"
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Apply(d *cgroupData) error {
|
||||
dir, err := d.path("cpuset")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return s.ApplyDir(dir, d.config, d.pid)
|
||||
func (s *CpusetGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||
return s.ApplyDir(path, r, pid)
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpusetCpus != "" {
|
||||
if err := fscommon.WriteFile(path, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil {
|
||||
func (s *CpusetGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.CpusetCpus != "" {
|
||||
if err := cgroups.WriteFile(path, "cpuset.cpus", r.CpusetCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpusetMems != "" {
|
||||
if err := fscommon.WriteFile(path, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil {
|
||||
if r.CpusetMems != "" {
|
||||
if err := cgroups.WriteFile(path, "cpuset.mems", r.CpusetMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("cpuset"))
|
||||
func getCpusetStat(path string, file string) ([]uint16, error) {
|
||||
var extracted []uint16
|
||||
fileContent, err := fscommon.GetCgroupParamString(path, file)
|
||||
if err != nil {
|
||||
return extracted, err
|
||||
}
|
||||
if len(fileContent) == 0 {
|
||||
return extracted, &parseError{Path: path, File: file, Err: errors.New("empty file")}
|
||||
}
|
||||
|
||||
for _, s := range strings.Split(fileContent, ",") {
|
||||
sp := strings.SplitN(s, "-", 3)
|
||||
switch len(sp) {
|
||||
case 3:
|
||||
return extracted, &parseError{Path: path, File: file, Err: errors.New("extra dash")}
|
||||
case 2:
|
||||
min, err := strconv.ParseUint(sp[0], 10, 16)
|
||||
if err != nil {
|
||||
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
max, err := strconv.ParseUint(sp[1], 10, 16)
|
||||
if err != nil {
|
||||
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
if min > max {
|
||||
return extracted, &parseError{Path: path, File: file, Err: errors.New("invalid values, min > max")}
|
||||
}
|
||||
for i := min; i <= max; i++ {
|
||||
extracted = append(extracted, uint16(i))
|
||||
}
|
||||
case 1:
|
||||
value, err := strconv.ParseUint(s, 10, 16)
|
||||
if err != nil {
|
||||
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
extracted = append(extracted, uint16(value))
|
||||
}
|
||||
}
|
||||
|
||||
return extracted, nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
var err error
|
||||
|
||||
stats.CPUSetStats.CPUs, err = getCpusetStat(path, "cpuset.cpus")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.CPUExclusive, err = fscommon.GetCgroupParamUint(path, "cpuset.cpu_exclusive")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.Mems, err = getCpusetStat(path, "cpuset.mems")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemHardwall, err = fscommon.GetCgroupParamUint(path, "cpuset.mem_hardwall")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemExclusive, err = fscommon.GetCgroupParamUint(path, "cpuset.mem_exclusive")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemoryMigrate, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_migrate")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemorySpreadPage, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_spread_page")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemorySpreadSlab, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_spread_slab")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.MemoryPressure, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_pressure")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.SchedLoadBalance, err = fscommon.GetCgroupParamUint(path, "cpuset.sched_load_balance")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CPUSetStats.SchedRelaxDomainLevel, err = fscommon.GetCgroupParamInt(path, "cpuset.sched_relax_domain_level")
|
||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error {
|
||||
func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error {
|
||||
// This might happen if we have no cpuset cgroup mounted.
|
||||
// Just do nothing and don't fail.
|
||||
if dir == "" {
|
||||
return nil
|
||||
}
|
||||
mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo)))
|
||||
// 'ensureParent' start with parent because we don't want to
|
||||
// explicitly inherit from parent, it could conflict with
|
||||
// 'cpuset.cpu_exclusive'.
|
||||
if err := s.ensureParent(filepath.Dir(dir), root); err != nil {
|
||||
if err := cpusetEnsureParent(filepath.Dir(dir)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(dir, 0755); err != nil {
|
||||
if err := os.Mkdir(dir, 0o755); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
// We didn't inherit cpuset configs from parent, but we have
|
||||
|
@ -79,82 +163,83 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
|
|||
// specified configs, otherwise, inherit from parent. This makes
|
||||
// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
|
||||
// keep backward compatibility.
|
||||
if err := s.ensureCpusAndMems(dir, cgroup); err != nil {
|
||||
if err := s.ensureCpusAndMems(dir, r); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// because we are not using d.join we need to place the pid into the procs file
|
||||
// unlike the other subsystems
|
||||
// Since we are not using apply(), we need to place the pid
|
||||
// into the procs file.
|
||||
return cgroups.WriteCgroupProc(dir, pid)
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) {
|
||||
if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil {
|
||||
func getCpusetSubsystemSettings(parent string) (cpus, mems string, err error) {
|
||||
if cpus, err = cgroups.ReadFile(parent, "cpuset.cpus"); err != nil {
|
||||
return
|
||||
}
|
||||
if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil {
|
||||
if mems, err = cgroups.ReadFile(parent, "cpuset.mems"); err != nil {
|
||||
return
|
||||
}
|
||||
return cpus, mems, nil
|
||||
}
|
||||
|
||||
// ensureParent makes sure that the parent directory of current is created
|
||||
// and populated with the proper cpus and mems files copied from
|
||||
// it's parent.
|
||||
func (s *CpusetGroup) ensureParent(current, root string) error {
|
||||
// cpusetEnsureParent makes sure that the parent directories of current
|
||||
// are created and populated with the proper cpus and mems files copied
|
||||
// from their respective parent. It does that recursively, starting from
|
||||
// the top of the cpuset hierarchy (i.e. cpuset cgroup mount point).
|
||||
func cpusetEnsureParent(current string) error {
|
||||
var st unix.Statfs_t
|
||||
|
||||
parent := filepath.Dir(current)
|
||||
if libcontainerUtils.CleanPath(parent) == root {
|
||||
err := unix.Statfs(parent, &st)
|
||||
if err == nil && st.Type != unix.CGROUP_SUPER_MAGIC {
|
||||
return nil
|
||||
}
|
||||
// Avoid infinite recursion.
|
||||
if parent == current {
|
||||
return fmt.Errorf("cpuset: cgroup parent path outside cgroup root")
|
||||
// Treat non-existing directory as cgroupfs as it will be created,
|
||||
// and the root cpuset directory obviously exists.
|
||||
if err != nil && err != unix.ENOENT { //nolint:errorlint // unix errors are bare
|
||||
return &os.PathError{Op: "statfs", Path: parent, Err: err}
|
||||
}
|
||||
if err := s.ensureParent(parent, root); err != nil {
|
||||
|
||||
if err := cpusetEnsureParent(parent); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(current, 0755); err != nil {
|
||||
if err := os.Mkdir(current, 0o755); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
return s.copyIfNeeded(current, parent)
|
||||
return cpusetCopyIfNeeded(current, parent)
|
||||
}
|
||||
|
||||
// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
|
||||
// cpusetCopyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
|
||||
// directory to the current directory if the file's contents are 0
|
||||
func (s *CpusetGroup) copyIfNeeded(current, parent string) error {
|
||||
var (
|
||||
err error
|
||||
currentCpus, currentMems []byte
|
||||
parentCpus, parentMems []byte
|
||||
)
|
||||
|
||||
if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil {
|
||||
func cpusetCopyIfNeeded(current, parent string) error {
|
||||
currentCpus, currentMems, err := getCpusetSubsystemSettings(current)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil {
|
||||
parentCpus, parentMems, err := getCpusetSubsystemSettings(parent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if s.isEmpty(currentCpus) {
|
||||
if err := fscommon.WriteFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
|
||||
if isEmptyCpuset(currentCpus) {
|
||||
if err := cgroups.WriteFile(current, "cpuset.cpus", parentCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if s.isEmpty(currentMems) {
|
||||
if err := fscommon.WriteFile(current, "cpuset.mems", string(parentMems)); err != nil {
|
||||
if isEmptyCpuset(currentMems) {
|
||||
if err := cgroups.WriteFile(current, "cpuset.mems", parentMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) isEmpty(b []byte) bool {
|
||||
return len(bytes.Trim(b, "\n")) == 0
|
||||
func isEmptyCpuset(str string) bool {
|
||||
return str == "" || str == "\n"
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) ensureCpusAndMems(path string, cgroup *configs.Cgroup) error {
|
||||
if err := s.Set(path, cgroup); err != nil {
|
||||
func (s *CpusetGroup) ensureCpusAndMems(path string, r *configs.Resources) error {
|
||||
if err := s.Set(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.copyIfNeeded(path, filepath.Dir(path))
|
||||
return cpusetCopyIfNeeded(path, filepath.Dir(path))
|
||||
}
|
||||
|
|
|
@ -1,67 +1,242 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func TestCpusetSetCpus(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpuset", t)
|
||||
defer helper.cleanup()
|
||||
const (
|
||||
cpus = "0-2,7,12-14\n"
|
||||
cpuExclusive = "1\n"
|
||||
mems = "1-4,6,9\n"
|
||||
memHardwall = "0\n"
|
||||
memExclusive = "0\n"
|
||||
memoryMigrate = "1\n"
|
||||
memorySpreadPage = "0\n"
|
||||
memorySpeadSlab = "1\n"
|
||||
memoryPressure = "34377\n"
|
||||
schedLoadBalance = "1\n"
|
||||
schedRelaxDomainLevel = "-1\n"
|
||||
)
|
||||
|
||||
var cpusetTestFiles = map[string]string{
|
||||
"cpuset.cpus": cpus,
|
||||
"cpuset.cpu_exclusive": cpuExclusive,
|
||||
"cpuset.mems": mems,
|
||||
"cpuset.mem_hardwall": memHardwall,
|
||||
"cpuset.mem_exclusive": memExclusive,
|
||||
"cpuset.memory_migrate": memoryMigrate,
|
||||
"cpuset.memory_spread_page": memorySpreadPage,
|
||||
"cpuset.memory_spread_slab": memorySpeadSlab,
|
||||
"cpuset.memory_pressure": memoryPressure,
|
||||
"cpuset.sched_load_balance": schedLoadBalance,
|
||||
"cpuset.sched_relax_domain_level": schedRelaxDomainLevel,
|
||||
}
|
||||
|
||||
func TestCPUSetSetCpus(t *testing.T) {
|
||||
path := tempDir(t, "cpuset")
|
||||
|
||||
const (
|
||||
cpusBefore = "0"
|
||||
cpusAfter = "1-3"
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"cpuset.cpus": cpusBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.CpusetCpus = cpusAfter
|
||||
r := &configs.Resources{
|
||||
CpusetCpus: cpusAfter,
|
||||
}
|
||||
cpuset := &CpusetGroup{}
|
||||
if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
if err := cpuset.Set(path, r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "cpuset.cpus")
|
||||
value, err := fscommon.GetCgroupParamString(path, "cpuset.cpus")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpuset.cpus - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if value != cpusAfter {
|
||||
t.Fatal("Got the wrong value, set cpuset.cpus failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCpusetSetMems(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpuset", t)
|
||||
defer helper.cleanup()
|
||||
func TestCPUSetSetMems(t *testing.T) {
|
||||
path := tempDir(t, "cpuset")
|
||||
|
||||
const (
|
||||
memsBefore = "0"
|
||||
memsAfter = "1"
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"cpuset.mems": memsBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.CpusetMems = memsAfter
|
||||
r := &configs.Resources{
|
||||
CpusetMems: memsAfter,
|
||||
}
|
||||
cpuset := &CpusetGroup{}
|
||||
if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
if err := cpuset.Set(path, r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "cpuset.mems")
|
||||
value, err := fscommon.GetCgroupParamString(path, "cpuset.mems")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpuset.mems - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if value != memsAfter {
|
||||
t.Fatal("Got the wrong value, set cpuset.mems failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCPUSetStatsCorrect(t *testing.T) {
|
||||
path := tempDir(t, "cpuset")
|
||||
writeFileContents(t, path, cpusetTestFiles)
|
||||
|
||||
cpuset := &CpusetGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := cpuset.GetStats(path, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
expectedStats := cgroups.CPUSetStats{
|
||||
CPUs: []uint16{0, 1, 2, 7, 12, 13, 14},
|
||||
CPUExclusive: 1,
|
||||
Mems: []uint16{1, 2, 3, 4, 6, 9},
|
||||
MemoryMigrate: 1,
|
||||
MemHardwall: 0,
|
||||
MemExclusive: 0,
|
||||
MemorySpreadPage: 0,
|
||||
MemorySpreadSlab: 1,
|
||||
MemoryPressure: 34377,
|
||||
SchedLoadBalance: 1,
|
||||
SchedRelaxDomainLevel: -1,
|
||||
}
|
||||
if !reflect.DeepEqual(expectedStats, actualStats.CPUSetStats) {
|
||||
t.Fatalf("Expected Cpuset stats usage %#v but found %#v",
|
||||
expectedStats, actualStats.CPUSetStats)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCPUSetStatsMissingFiles(t *testing.T) {
|
||||
for _, testCase := range []struct {
|
||||
desc string
|
||||
filename, contents string
|
||||
removeFile bool
|
||||
}{
|
||||
{
|
||||
desc: "empty cpus file",
|
||||
filename: "cpuset.cpus",
|
||||
contents: "",
|
||||
removeFile: false,
|
||||
},
|
||||
{
|
||||
desc: "empty mems file",
|
||||
filename: "cpuset.mems",
|
||||
contents: "",
|
||||
removeFile: false,
|
||||
},
|
||||
{
|
||||
desc: "corrupted cpus file",
|
||||
filename: "cpuset.cpus",
|
||||
contents: "0-3,*4^2",
|
||||
removeFile: false,
|
||||
},
|
||||
{
|
||||
desc: "corrupted mems file",
|
||||
filename: "cpuset.mems",
|
||||
contents: "0,1,2-5,8-7",
|
||||
removeFile: false,
|
||||
},
|
||||
{
|
||||
desc: "missing cpu_exclusive file",
|
||||
filename: "cpuset.cpu_exclusive",
|
||||
contents: "",
|
||||
removeFile: true,
|
||||
},
|
||||
{
|
||||
desc: "missing memory_migrate file",
|
||||
filename: "cpuset.memory_migrate",
|
||||
contents: "",
|
||||
removeFile: true,
|
||||
},
|
||||
{
|
||||
desc: "missing mem_hardwall file",
|
||||
filename: "cpuset.mem_hardwall",
|
||||
contents: "",
|
||||
removeFile: true,
|
||||
},
|
||||
{
|
||||
desc: "missing mem_exclusive file",
|
||||
filename: "cpuset.mem_exclusive",
|
||||
contents: "",
|
||||
removeFile: true,
|
||||
},
|
||||
{
|
||||
desc: "missing memory_spread_page file",
|
||||
filename: "cpuset.memory_spread_page",
|
||||
contents: "",
|
||||
removeFile: true,
|
||||
},
|
||||
{
|
||||
desc: "missing memory_spread_slab file",
|
||||
filename: "cpuset.memory_spread_slab",
|
||||
contents: "",
|
||||
removeFile: true,
|
||||
},
|
||||
{
|
||||
desc: "missing memory_pressure file",
|
||||
filename: "cpuset.memory_pressure",
|
||||
contents: "",
|
||||
removeFile: true,
|
||||
},
|
||||
{
|
||||
desc: "missing sched_load_balance file",
|
||||
filename: "cpuset.sched_load_balance",
|
||||
contents: "",
|
||||
removeFile: true,
|
||||
},
|
||||
{
|
||||
desc: "missing sched_relax_domain_level file",
|
||||
filename: "cpuset.sched_relax_domain_level",
|
||||
contents: "",
|
||||
removeFile: true,
|
||||
},
|
||||
} {
|
||||
t.Run(testCase.desc, func(t *testing.T) {
|
||||
path := tempDir(t, "cpuset")
|
||||
|
||||
tempCpusetTestFiles := map[string]string{}
|
||||
for i, v := range cpusetTestFiles {
|
||||
tempCpusetTestFiles[i] = v
|
||||
}
|
||||
|
||||
if testCase.removeFile {
|
||||
delete(tempCpusetTestFiles, testCase.filename)
|
||||
writeFileContents(t, path, tempCpusetTestFiles)
|
||||
cpuset := &CpusetGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := cpuset.GetStats(path, &actualStats)
|
||||
if err != nil {
|
||||
t.Errorf("failed unexpectedly: %q", err)
|
||||
}
|
||||
} else {
|
||||
tempCpusetTestFiles[testCase.filename] = testCase.contents
|
||||
writeFileContents(t, path, tempCpusetTestFiles)
|
||||
cpuset := &CpusetGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := cpuset.GetStats(path, &actualStats)
|
||||
|
||||
if err == nil {
|
||||
t.Error("failed to return expected error")
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,81 +1,109 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"reflect"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
"github.com/opencontainers/runc/libcontainer/userns"
|
||||
)
|
||||
|
||||
type DevicesGroup struct {
|
||||
TestingSkipFinalCheck bool
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Name() string {
|
||||
return "devices"
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("devices")
|
||||
func (s *DevicesGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||
if r.SkipDevices {
|
||||
return nil
|
||||
}
|
||||
if path == "" {
|
||||
// Return error here, since devices cgroup
|
||||
// is a hard requirement for container's security.
|
||||
return errSubsystemDoesNotExist
|
||||
}
|
||||
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func loadEmulator(path string) (*cgroupdevices.Emulator, error) {
|
||||
list, err := cgroups.ReadFile(path, "devices.list")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroupdevices.EmulatorFromList(bytes.NewBufferString(list))
|
||||
}
|
||||
|
||||
func buildEmulator(rules []*devices.Rule) (*cgroupdevices.Emulator, error) {
|
||||
// This defaults to a white-list -- which is what we want!
|
||||
emu := &cgroupdevices.Emulator{}
|
||||
for _, rule := range rules {
|
||||
if err := emu.Apply(*rule); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return emu, nil
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Set(path string, r *configs.Resources) error {
|
||||
if userns.RunningInUserNS() || r.SkipDevices {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Generate two emulators, one for the current state of the cgroup and one
|
||||
// for the requested state by the user.
|
||||
current, err := loadEmulator(path)
|
||||
if err != nil {
|
||||
// We will return error even it's `not found` error, devices
|
||||
// cgroup is hard requirement for container's security.
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if system.RunningInUserNS() {
|
||||
return nil
|
||||
target, err := buildEmulator(r.Devices)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
devices := cgroup.Resources.Devices
|
||||
if len(devices) > 0 {
|
||||
for _, dev := range devices {
|
||||
file := "devices.deny"
|
||||
if dev.Allow {
|
||||
file = "devices.allow"
|
||||
}
|
||||
if err := fscommon.WriteFile(path, file, dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
// Compute the minimal set of transition rules needed to achieve the
|
||||
// requested state.
|
||||
transitionRules, err := current.Transition(target)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if cgroup.Resources.AllowAllDevices != nil {
|
||||
if *cgroup.Resources.AllowAllDevices == false {
|
||||
if err := fscommon.WriteFile(path, "devices.deny", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, dev := range cgroup.Resources.AllowedDevices {
|
||||
if err := fscommon.WriteFile(path, "devices.allow", dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
for _, rule := range transitionRules {
|
||||
file := "devices.deny"
|
||||
if rule.Allow {
|
||||
file = "devices.allow"
|
||||
}
|
||||
|
||||
if err := fscommon.WriteFile(path, "devices.allow", "a"); err != nil {
|
||||
if err := cgroups.WriteFile(path, file, rule.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, dev := range cgroup.Resources.DeniedDevices {
|
||||
if err := fscommon.WriteFile(path, "devices.deny", dev.CgroupString()); err != nil {
|
||||
// Final safety check -- ensure that the resulting state is what was
|
||||
// requested. This is only really correct for white-lists, but for
|
||||
// black-lists we can at least check that the cgroup is in the right mode.
|
||||
//
|
||||
// This safety-check is skipped for the unit tests because we cannot
|
||||
// currently mock devices.list correctly.
|
||||
if !s.TestingSkipFinalCheck {
|
||||
currentAfter, err := loadEmulator(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !target.IsBlacklist() && !reflect.DeepEqual(currentAfter, target) {
|
||||
return errors.New("resulting devices cgroup doesn't precisely match target")
|
||||
} else if target.IsBlacklist() != currentAfter.IsBlacklist() {
|
||||
return errors.New("resulting devices cgroup doesn't match target mode")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("devices"))
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
|
@ -7,93 +5,48 @@ import (
|
|||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var (
|
||||
allowedDevices = []*configs.Device{
|
||||
{
|
||||
Path: "/dev/zero",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 5,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
}
|
||||
allowedList = "c 1:5 rwm"
|
||||
deniedDevices = []*configs.Device{
|
||||
{
|
||||
Path: "/dev/null",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 3,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
}
|
||||
deniedList = "c 1:3 rwm"
|
||||
"github.com/opencontainers/runc/libcontainer/devices"
|
||||
)
|
||||
|
||||
func TestDevicesSetAllow(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("devices", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "devices")
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"devices.deny": "a",
|
||||
})
|
||||
allowAllDevices := false
|
||||
helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices
|
||||
helper.CgroupData.config.Resources.AllowedDevices = allowedDevices
|
||||
devices := &DevicesGroup{}
|
||||
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse devices.allow - %s", err)
|
||||
}
|
||||
|
||||
if value != allowedList {
|
||||
t.Fatal("Got the wrong value, set devices.allow failed.")
|
||||
}
|
||||
|
||||
// When AllowAllDevices is nil, devices.allow file should not be modified.
|
||||
helper.CgroupData.config.Resources.AllowAllDevices = nil
|
||||
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
value, err = fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse devices.allow - %s", err)
|
||||
}
|
||||
if value != allowedList {
|
||||
t.Fatal("devices policy shouldn't have changed on AllowedAllDevices=nil.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDevicesSetDeny(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("devices", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"devices.allow": "a",
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"devices.allow": "",
|
||||
"devices.deny": "",
|
||||
"devices.list": "a *:* rwm",
|
||||
})
|
||||
|
||||
allowAllDevices := true
|
||||
helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices
|
||||
helper.CgroupData.config.Resources.DeniedDevices = deniedDevices
|
||||
devices := &DevicesGroup{}
|
||||
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
r := &configs.Resources{
|
||||
Devices: []*devices.Rule{
|
||||
{
|
||||
Type: devices.CharDevice,
|
||||
Major: 1,
|
||||
Minor: 5,
|
||||
Permissions: devices.Permissions("rwm"),
|
||||
Allow: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
d := &DevicesGroup{TestingSkipFinalCheck: true}
|
||||
if err := d.Set(path, r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.deny")
|
||||
// The default deny rule must be written.
|
||||
value, err := fscommon.GetCgroupParamString(path, "devices.deny")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse devices.deny - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if value[0] != 'a' {
|
||||
t.Errorf("Got the wrong value (%q), set devices.deny failed.", value)
|
||||
}
|
||||
|
||||
if value != deniedList {
|
||||
t.Fatal("Got the wrong value, set devices.deny failed.")
|
||||
// Permitted rule must be written.
|
||||
if value, err := fscommon.GetCgroupParamString(path, "devices.allow"); err != nil {
|
||||
t.Fatal(err)
|
||||
} else if value != "c 1:5 rwm" {
|
||||
t.Errorf("Got the wrong value (%q), set devices.allow failed.", value)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
)
|
||||
|
||||
type parseError = fscommon.ParseError
|
||||
|
||||
// malformedLine is used by all cgroupfs file parsers that expect a line
|
||||
// in a particular format but get some garbage instead.
|
||||
func malformedLine(path, file, line string) error {
|
||||
return &parseError{Path: path, File: file, Err: fmt.Errorf("malformed line: %s", line)}
|
||||
}
|
|
@ -1,67 +1,158 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type FreezerGroup struct {
|
||||
}
|
||||
type FreezerGroup struct{}
|
||||
|
||||
func (s *FreezerGroup) Name() string {
|
||||
return "freezer"
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("freezer")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
func (s *FreezerGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
switch cgroup.Resources.Freezer {
|
||||
case configs.Frozen, configs.Thawed:
|
||||
for {
|
||||
// In case this loop does not exit because it doesn't get the expected
|
||||
// state, let's write again this state, hoping it's going to be properly
|
||||
// set this time. Otherwise, this loop could run infinitely, waiting for
|
||||
// a state change that would never happen.
|
||||
if err := fscommon.WriteFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
|
||||
func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
|
||||
switch r.Freezer {
|
||||
case configs.Frozen:
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
// Freezing failed, and it is bad and dangerous
|
||||
// to leave the cgroup in FROZEN or FREEZING
|
||||
// state, so (try to) thaw it back.
|
||||
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||
}
|
||||
}()
|
||||
|
||||
// As per older kernel docs (freezer-subsystem.txt before
|
||||
// kernel commit ef9fe980c6fcc1821), if FREEZING is seen,
|
||||
// userspace should either retry or thaw. While current
|
||||
// kernel cgroup v1 docs no longer mention a need to retry,
|
||||
// even a recent kernel (v5.4, Ubuntu 20.04) can't reliably
|
||||
// freeze a cgroup v1 while new processes keep appearing in it
|
||||
// (either via fork/clone or by writing new PIDs to
|
||||
// cgroup.procs).
|
||||
//
|
||||
// The numbers below are empirically chosen to have a decent
|
||||
// chance to succeed in various scenarios ("runc pause/unpause
|
||||
// with parallel runc exec" and "bare freeze/unfreeze on a very
|
||||
// slow system"), tested on RHEL7 and Ubuntu 20.04 kernels.
|
||||
//
|
||||
// Adding any amount of sleep in between retries did not
|
||||
// increase the chances of successful freeze in "pause/unpause
|
||||
// with parallel exec" reproducer. OTOH, adding an occasional
|
||||
// sleep helped for the case where the system is extremely slow
|
||||
// (CentOS 7 VM on GHA CI).
|
||||
//
|
||||
// Alas, this is still a game of chances, since the real fix
|
||||
// belong to the kernel (cgroup v2 do not have this bug).
|
||||
|
||||
for i := 0; i < 1000; i++ {
|
||||
if i%50 == 49 {
|
||||
// Occasional thaw and sleep improves
|
||||
// the chances to succeed in freezing
|
||||
// in case new processes keep appearing
|
||||
// in the cgroup.
|
||||
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
|
||||
if err := cgroups.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
state, err := fscommon.ReadFile(path, "freezer.state")
|
||||
if i%25 == 24 {
|
||||
// Occasional short sleep before reading
|
||||
// the state back also improves the chances to
|
||||
// succeed in freezing in case of a very slow
|
||||
// system.
|
||||
time.Sleep(10 * time.Microsecond)
|
||||
}
|
||||
state, err := cgroups.ReadFile(path, "freezer.state")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) {
|
||||
break
|
||||
state = strings.TrimSpace(state)
|
||||
switch state {
|
||||
case "FREEZING":
|
||||
continue
|
||||
case string(configs.Frozen):
|
||||
if i > 1 {
|
||||
logrus.Debugf("frozen after %d retries", i)
|
||||
}
|
||||
return nil
|
||||
default:
|
||||
// should never happen
|
||||
return fmt.Errorf("unexpected state %s while freezing", strings.TrimSpace(state))
|
||||
}
|
||||
|
||||
time.Sleep(1 * time.Millisecond)
|
||||
}
|
||||
// Despite our best efforts, it got stuck in FREEZING.
|
||||
return errors.New("unable to freeze")
|
||||
case configs.Thawed:
|
||||
return cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||
case configs.Undefined:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Resources.Freezer))
|
||||
return fmt.Errorf("Invalid argument '%s' to freezer.state", string(r.Freezer))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("freezer"))
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) {
|
||||
for {
|
||||
state, err := cgroups.ReadFile(path, "freezer.state")
|
||||
if err != nil {
|
||||
// If the kernel is too old, then we just treat the freezer as
|
||||
// being in an "undefined" state.
|
||||
if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
|
||||
err = nil
|
||||
}
|
||||
return configs.Undefined, err
|
||||
}
|
||||
switch strings.TrimSpace(state) {
|
||||
case "THAWED":
|
||||
return configs.Thawed, nil
|
||||
case "FROZEN":
|
||||
// Find out whether the cgroup is frozen directly,
|
||||
// or indirectly via an ancestor.
|
||||
self, err := cgroups.ReadFile(path, "freezer.self_freezing")
|
||||
if err != nil {
|
||||
// If the kernel is too old, then we just treat
|
||||
// it as being frozen.
|
||||
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.ENODEV) {
|
||||
err = nil
|
||||
}
|
||||
return configs.Frozen, err
|
||||
}
|
||||
switch self {
|
||||
case "0\n":
|
||||
return configs.Thawed, nil
|
||||
case "1\n":
|
||||
return configs.Frozen, nil
|
||||
default:
|
||||
return configs.Undefined, fmt.Errorf(`unknown "freezer.self_freezing" state: %q`, self)
|
||||
}
|
||||
case "FREEZING":
|
||||
// Make sure we get a stable freezer state, so retry if the cgroup
|
||||
// is still undergoing freezing. This should be a temporary delay.
|
||||
time.Sleep(1 * time.Millisecond)
|
||||
continue
|
||||
default:
|
||||
return configs.Undefined, fmt.Errorf("unknown freezer.state %q", state)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
|
@ -10,22 +8,23 @@ import (
|
|||
)
|
||||
|
||||
func TestFreezerSetState(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("freezer", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "freezer")
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"freezer.state": string(configs.Frozen),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.Freezer = configs.Thawed
|
||||
r := &configs.Resources{
|
||||
Freezer: configs.Thawed,
|
||||
}
|
||||
freezer := &FreezerGroup{}
|
||||
if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
if err := freezer.Set(path, r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "freezer.state")
|
||||
value, err := fscommon.GetCgroupParamString(path, "freezer.state")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse freezer.state - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if value != string(configs.Thawed) {
|
||||
t.Fatal("Got the wrong value, set freezer.state failed.")
|
||||
|
@ -33,16 +32,15 @@ func TestFreezerSetState(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestFreezerSetInvalidState(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("freezer", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "freezer")
|
||||
|
||||
const (
|
||||
invalidArg configs.FreezerState = "Invalid"
|
||||
)
|
||||
const invalidArg configs.FreezerState = "Invalid"
|
||||
|
||||
helper.CgroupData.config.Resources.Freezer = invalidArg
|
||||
r := &configs.Resources{
|
||||
Freezer: invalidArg,
|
||||
}
|
||||
freezer := &FreezerGroup{}
|
||||
if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err == nil {
|
||||
if err := freezer.Set(path, r); err == nil {
|
||||
t.Fatal("Failed to return invalid argument error")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,264 @@
|
|||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var subsystems = []subsystem{
|
||||
&CpusetGroup{},
|
||||
&DevicesGroup{},
|
||||
&MemoryGroup{},
|
||||
&CpuGroup{},
|
||||
&CpuacctGroup{},
|
||||
&PidsGroup{},
|
||||
&BlkioGroup{},
|
||||
&HugetlbGroup{},
|
||||
&NetClsGroup{},
|
||||
&NetPrioGroup{},
|
||||
&PerfEventGroup{},
|
||||
&FreezerGroup{},
|
||||
&RdmaGroup{},
|
||||
&NameGroup{GroupName: "name=systemd", Join: true},
|
||||
}
|
||||
|
||||
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
||||
|
||||
func init() {
|
||||
// If using cgroups-hybrid mode then add a "" controller indicating
|
||||
// it should join the cgroups v2.
|
||||
if cgroups.IsCgroup2HybridMode() {
|
||||
subsystems = append(subsystems, &NameGroup{GroupName: "", Join: true})
|
||||
}
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
// Name returns the name of the subsystem.
|
||||
Name() string
|
||||
// GetStats fills in the stats for the subsystem.
|
||||
GetStats(path string, stats *cgroups.Stats) error
|
||||
// Apply creates and joins a cgroup, adding pid into it. Some
|
||||
// subsystems use resources to pre-configure the cgroup parents
|
||||
// before creating or joining it.
|
||||
Apply(path string, r *configs.Resources, pid int) error
|
||||
// Set sets the cgroup resources.
|
||||
Set(path string, r *configs.Resources) error
|
||||
}
|
||||
|
||||
type manager struct {
|
||||
mu sync.Mutex
|
||||
cgroups *configs.Cgroup
|
||||
paths map[string]string
|
||||
}
|
||||
|
||||
func NewManager(cg *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
|
||||
// Some v1 controllers (cpu, cpuset, and devices) expect
|
||||
// cgroups.Resources to not be nil in Apply.
|
||||
if cg.Resources == nil {
|
||||
return nil, errors.New("cgroup v1 manager needs configs.Resources to be set during manager creation")
|
||||
}
|
||||
if cg.Resources.Unified != nil {
|
||||
return nil, cgroups.ErrV1NoUnified
|
||||
}
|
||||
|
||||
if paths == nil {
|
||||
var err error
|
||||
paths, err = initPaths(cg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return &manager{
|
||||
cgroups: cg,
|
||||
paths: paths,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// isIgnorableError returns whether err is a permission error (in the loose
|
||||
// sense of the word). This includes EROFS (which for an unprivileged user is
|
||||
// basically a permission error) and EACCES (for similar reasons) as well as
|
||||
// the normal EPERM.
|
||||
func isIgnorableError(rootless bool, err error) bool {
|
||||
// We do not ignore errors if we are root.
|
||||
if !rootless {
|
||||
return false
|
||||
}
|
||||
// Is it an ordinary EPERM?
|
||||
if errors.Is(err, os.ErrPermission) {
|
||||
return true
|
||||
}
|
||||
// Handle some specific syscall errors.
|
||||
var errno unix.Errno
|
||||
if errors.As(err, &errno) {
|
||||
return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *manager) Apply(pid int) (err error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
c := m.cgroups
|
||||
|
||||
for _, sys := range subsystems {
|
||||
name := sys.Name()
|
||||
p, ok := m.paths[name]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := sys.Apply(p, c.Resources, pid); err != nil {
|
||||
// In the case of rootless (including euid=0 in userns), where an
|
||||
// explicit cgroup path hasn't been set, we don't bail on error in
|
||||
// case of permission problems here, but do delete the path from
|
||||
// the m.paths map, since it is either non-existent and could not
|
||||
// be created, or the pid could not be added to it.
|
||||
//
|
||||
// Cases where limits for the subsystem have been set are handled
|
||||
// later by Set, which fails with a friendly error (see
|
||||
// if path == "" in Set).
|
||||
if isIgnorableError(c.Rootless, err) && c.Path == "" {
|
||||
delete(m.paths, name)
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) Destroy() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return cgroups.RemovePaths(m.paths)
|
||||
}
|
||||
|
||||
func (m *manager) Path(subsys string) string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.paths[subsys]
|
||||
}
|
||||
|
||||
func (m *manager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := cgroups.NewStats()
|
||||
for _, sys := range subsystems {
|
||||
path := m.paths[sys.Name()]
|
||||
if path == "" {
|
||||
continue
|
||||
}
|
||||
if err := sys.GetStats(path, stats); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (m *manager) Set(r *configs.Resources) error {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if r.Unified != nil {
|
||||
return cgroups.ErrV1NoUnified
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
for _, sys := range subsystems {
|
||||
path := m.paths[sys.Name()]
|
||||
if err := sys.Set(path, r); err != nil {
|
||||
// When rootless is true, errors from the device subsystem
|
||||
// are ignored, as it is really not expected to work.
|
||||
if m.cgroups.Rootless && sys.Name() == "devices" {
|
||||
continue
|
||||
}
|
||||
// However, errors from other subsystems are not ignored.
|
||||
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if path == "" {
|
||||
// We never created a path for this cgroup, so we cannot set
|
||||
// limits for it (though we have already tried at this point).
|
||||
return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Freeze toggles the container's freezer cgroup depending on the state
|
||||
// provided
|
||||
func (m *manager) Freeze(state configs.FreezerState) error {
|
||||
path := m.Path("freezer")
|
||||
if path == "" {
|
||||
return errors.New("cannot toggle freezer: cgroups not configured for container")
|
||||
}
|
||||
|
||||
prevState := m.cgroups.Resources.Freezer
|
||||
m.cgroups.Resources.Freezer = state
|
||||
freezer := &FreezerGroup{}
|
||||
if err := freezer.Set(path, m.cgroups.Resources); err != nil {
|
||||
m.cgroups.Resources.Freezer = prevState
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) GetPids() ([]int, error) {
|
||||
return cgroups.GetPids(m.Path("devices"))
|
||||
}
|
||||
|
||||
func (m *manager) GetAllPids() ([]int, error) {
|
||||
return cgroups.GetAllPids(m.Path("devices"))
|
||||
}
|
||||
|
||||
func (m *manager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
return m.paths
|
||||
}
|
||||
|
||||
func (m *manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.cgroups, nil
|
||||
}
|
||||
|
||||
func (m *manager) GetFreezerState() (configs.FreezerState, error) {
|
||||
dir := m.Path("freezer")
|
||||
// If the container doesn't have the freezer cgroup, say it's undefined.
|
||||
if dir == "" {
|
||||
return configs.Undefined, nil
|
||||
}
|
||||
freezer := &FreezerGroup{}
|
||||
return freezer.GetState(dir)
|
||||
}
|
||||
|
||||
func (m *manager) Exists() bool {
|
||||
return cgroups.PathExists(m.Path("devices"))
|
||||
}
|
||||
|
||||
func OOMKillCount(path string) (uint64, error) {
|
||||
return fscommon.GetValueByKey(path, "memory.oom_control", "oom_kill")
|
||||
}
|
||||
|
||||
func (m *manager) OOMKillCount() (uint64, error) {
|
||||
c, err := OOMKillCount(m.Path("memory"))
|
||||
// Ignore ENOENT when rootless as it couldn't create cgroup.
|
||||
if err != nil && m.cgroups.Rootless && os.IsNotExist(err) {
|
||||
err = nil
|
||||
}
|
||||
|
||||
return c, err
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
package fs
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func BenchmarkGetStats(b *testing.B) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
b.Skip("cgroup v2 is not supported")
|
||||
}
|
||||
|
||||
// Unset TestMode as we work with real cgroupfs here,
|
||||
// and we want OpenFile to perform the fstype check.
|
||||
cgroups.TestMode = false
|
||||
defer func() {
|
||||
cgroups.TestMode = true
|
||||
}()
|
||||
|
||||
cg := &configs.Cgroup{
|
||||
Path: "/some/kind/of/a/path/here",
|
||||
Resources: &configs.Resources{},
|
||||
}
|
||||
m, err := NewManager(cg, nil)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
err = m.Apply(-1)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
defer func() {
|
||||
_ = m.Destroy()
|
||||
}()
|
||||
|
||||
var st *cgroups.Stats
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
st, err = m.GetStats()
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
if st.CpuStats.CpuUsage.TotalUsage != 0 {
|
||||
b.Fatalf("stats: %+v", st)
|
||||
}
|
||||
}
|
|
@ -1,3 +0,0 @@
|
|||
// +build !linux
|
||||
|
||||
package fs
|
|
@ -1,35 +1,26 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type HugetlbGroup struct {
|
||||
}
|
||||
type HugetlbGroup struct{}
|
||||
|
||||
func (s *HugetlbGroup) Name() string {
|
||||
return "hugetlb"
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("hugetlb")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
func (s *HugetlbGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
for _, hugetlb := range cgroup.Resources.HugetlbLimit {
|
||||
if err := fscommon.WriteFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
|
||||
func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
|
||||
for _, hugetlb := range r.HugetlbLimit {
|
||||
if err := cgroups.WriteFile(path, "hugetlb."+hugetlb.Pagesize+".limit_in_bytes", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -37,31 +28,30 @@ func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("hugetlb"))
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
if !cgroups.PathExists(path) {
|
||||
return nil
|
||||
}
|
||||
hugetlbStats := cgroups.HugetlbStats{}
|
||||
for _, pageSize := range HugePageSizes {
|
||||
usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".")
|
||||
for _, pageSize := range cgroups.HugePageSizes() {
|
||||
usage := "hugetlb." + pageSize + ".usage_in_bytes"
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s - %v", usage, err)
|
||||
return err
|
||||
}
|
||||
hugetlbStats.Usage = value
|
||||
|
||||
maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".")
|
||||
maxUsage := "hugetlb." + pageSize + ".max_usage_in_bytes"
|
||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s - %v", maxUsage, err)
|
||||
return err
|
||||
}
|
||||
hugetlbStats.MaxUsage = value
|
||||
|
||||
failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".")
|
||||
failcnt := "hugetlb." + pageSize + ".failcnt"
|
||||
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s - %v", failcnt, err)
|
||||
return err
|
||||
}
|
||||
hugetlbStats.Failcnt = value
|
||||
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
|
@ -18,7 +16,7 @@ const (
|
|||
hugetlbFailcnt = "100\n"
|
||||
)
|
||||
|
||||
var (
|
||||
const (
|
||||
usage = "hugetlb.%s.usage_in_bytes"
|
||||
limit = "hugetlb.%s.limit_in_bytes"
|
||||
maxUsage = "hugetlb.%s.max_usage_in_bytes"
|
||||
|
@ -26,38 +24,38 @@ var (
|
|||
)
|
||||
|
||||
func TestHugetlbSetHugetlb(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "hugetlb")
|
||||
|
||||
const (
|
||||
hugetlbBefore = 256
|
||||
hugetlbAfter = 512
|
||||
)
|
||||
|
||||
for _, pageSize := range HugePageSizes {
|
||||
helper.writeFileContents(map[string]string{
|
||||
for _, pageSize := range cgroups.HugePageSizes() {
|
||||
writeFileContents(t, path, map[string]string{
|
||||
fmt.Sprintf(limit, pageSize): strconv.Itoa(hugetlbBefore),
|
||||
})
|
||||
}
|
||||
|
||||
for _, pageSize := range HugePageSizes {
|
||||
helper.CgroupData.config.Resources.HugetlbLimit = []*configs.HugepageLimit{
|
||||
r := &configs.Resources{}
|
||||
for _, pageSize := range cgroups.HugePageSizes() {
|
||||
r.HugetlbLimit = []*configs.HugepageLimit{
|
||||
{
|
||||
Pagesize: pageSize,
|
||||
Limit: hugetlbAfter,
|
||||
},
|
||||
}
|
||||
hugetlb := &HugetlbGroup{}
|
||||
if err := hugetlb.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
if err := hugetlb.Set(path, r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, pageSize := range HugePageSizes {
|
||||
for _, pageSize := range cgroups.HugePageSizes() {
|
||||
limit := fmt.Sprintf(limit, pageSize)
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, limit)
|
||||
value, err := fscommon.GetCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse %s - %s", limit, err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if value != hugetlbAfter {
|
||||
t.Fatalf("Set hugetlb.limit_in_bytes failed. Expected: %v, Got: %v", hugetlbAfter, value)
|
||||
|
@ -66,10 +64,9 @@ func TestHugetlbSetHugetlb(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestHugetlbStats(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
for _, pageSize := range HugePageSizes {
|
||||
helper.writeFileContents(map[string]string{
|
||||
path := tempDir(t, "hugetlb")
|
||||
for _, pageSize := range cgroups.HugePageSizes() {
|
||||
writeFileContents(t, path, map[string]string{
|
||||
fmt.Sprintf(usage, pageSize): hugetlbUsageContents,
|
||||
fmt.Sprintf(maxUsage, pageSize): hugetlbMaxUsageContents,
|
||||
fmt.Sprintf(failcnt, pageSize): hugetlbFailcnt,
|
||||
|
@ -78,56 +75,50 @@ func TestHugetlbStats(t *testing.T) {
|
|||
|
||||
hugetlb := &HugetlbGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
||||
err := hugetlb.GetStats(path, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
expectedStats := cgroups.HugetlbStats{Usage: 128, MaxUsage: 256, Failcnt: 100}
|
||||
for _, pageSize := range HugePageSizes {
|
||||
for _, pageSize := range cgroups.HugePageSizes() {
|
||||
expectHugetlbStatEquals(t, expectedStats, actualStats.HugetlbStats[pageSize])
|
||||
}
|
||||
}
|
||||
|
||||
func TestHugetlbStatsNoUsageFile(t *testing.T) {
|
||||
t.Skip("Disabled unreliable test")
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
path := tempDir(t, "hugetlb")
|
||||
writeFileContents(t, path, map[string]string{
|
||||
maxUsage: hugetlbMaxUsageContents,
|
||||
})
|
||||
|
||||
hugetlb := &HugetlbGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
||||
err := hugetlb.GetStats(path, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHugetlbStatsNoMaxUsageFile(t *testing.T) {
|
||||
t.Skip("Disabled unreliable test")
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
for _, pageSize := range HugePageSizes {
|
||||
helper.writeFileContents(map[string]string{
|
||||
path := tempDir(t, "hugetlb")
|
||||
for _, pageSize := range cgroups.HugePageSizes() {
|
||||
writeFileContents(t, path, map[string]string{
|
||||
fmt.Sprintf(usage, pageSize): hugetlbUsageContents,
|
||||
})
|
||||
}
|
||||
|
||||
hugetlb := &HugetlbGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
||||
err := hugetlb.GetStats(path, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHugetlbStatsBadUsageFile(t *testing.T) {
|
||||
t.Skip("Disabled unreliable test")
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
for _, pageSize := range HugePageSizes {
|
||||
helper.writeFileContents(map[string]string{
|
||||
path := tempDir(t, "hugetlb")
|
||||
for _, pageSize := range cgroups.HugePageSizes() {
|
||||
writeFileContents(t, path, map[string]string{
|
||||
fmt.Sprintf(usage, pageSize): "bad",
|
||||
maxUsage: hugetlbMaxUsageContents,
|
||||
})
|
||||
|
@ -135,24 +126,22 @@ t.Skip("Disabled unreliable test")
|
|||
|
||||
hugetlb := &HugetlbGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
||||
err := hugetlb.GetStats(path, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHugetlbStatsBadMaxUsageFile(t *testing.T) {
|
||||
t.Skip("Disabled unreliable test")
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
path := tempDir(t, "hugetlb")
|
||||
writeFileContents(t, path, map[string]string{
|
||||
usage: hugetlbUsageContents,
|
||||
maxUsage: "bad",
|
||||
})
|
||||
|
||||
hugetlb := &HugetlbGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
||||
err := hugetlb.GetStats(path, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
|
|
|
@ -1,62 +0,0 @@
|
|||
// +build linux,!nokmem
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"syscall" // for Errno type only
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
|
||||
|
||||
func EnableKernelMemoryAccounting(path string) error {
|
||||
// Ensure that kernel memory is available in this kernel build. If it
|
||||
// isn't, we just ignore it because EnableKernelMemoryAccounting is
|
||||
// automatically called for all memory limits.
|
||||
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
|
||||
return nil
|
||||
}
|
||||
// We have to limit the kernel memory here as it won't be accounted at all
|
||||
// until a limit is set on the cgroup and limit cannot be set once the
|
||||
// cgroup has children, or if there are already tasks in the cgroup.
|
||||
for _, i := range []int64{1, -1} {
|
||||
if err := setKernelMemory(path, i); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setKernelMemory(path string, kernelMemoryLimit int64) error {
|
||||
if path == "" {
|
||||
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
|
||||
}
|
||||
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
|
||||
// We have specifically been asked to set a kmem limit. If the kernel
|
||||
// doesn't support it we *must* error out.
|
||||
return errors.New("kernel memory accounting not supported by this kernel")
|
||||
}
|
||||
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
|
||||
// Check if the error number returned by the syscall is "EBUSY"
|
||||
// The EBUSY signal is returned on attempts to write to the
|
||||
// memory.kmem.limit_in_bytes file if the cgroup has children or
|
||||
// once tasks have been attached to the cgroup
|
||||
if pathErr, ok := err.(*os.PathError); ok {
|
||||
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
|
||||
if errNo == unix.EBUSY {
|
||||
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
|
||||
}
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
// +build linux,nokmem
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
func EnableKernelMemoryAccounting(path string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func setKernelMemory(path string, kernelMemoryLimit int64) error {
|
||||
return errors.New("kernel memory accounting disabled in this runc build")
|
||||
}
|
|
@ -1,15 +1,17 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
|
@ -18,65 +20,66 @@ import (
|
|||
const (
|
||||
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
|
||||
cgroupMemoryLimit = "memory.limit_in_bytes"
|
||||
cgroupMemoryUsage = "memory.usage_in_bytes"
|
||||
cgroupMemoryMaxUsage = "memory.max_usage_in_bytes"
|
||||
)
|
||||
|
||||
type MemoryGroup struct {
|
||||
}
|
||||
type MemoryGroup struct{}
|
||||
|
||||
func (s *MemoryGroup) Name() string {
|
||||
return "memory"
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
|
||||
path, err := d.path("memory")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
} else if path == "" {
|
||||
return nil
|
||||
}
|
||||
if memoryAssigned(d.config) {
|
||||
if _, err := os.Stat(path); os.IsNotExist(err) {
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
// Only enable kernel memory accouting when this cgroup
|
||||
// is created by libcontainer, otherwise we might get
|
||||
// error when people use `cgroupsPath` to join an existed
|
||||
// cgroup whose kernel memory is not initialized.
|
||||
if err := EnableKernelMemoryAccounting(path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
os.RemoveAll(path)
|
||||
}
|
||||
}()
|
||||
|
||||
// We need to join memory cgroup after set memory limits, because
|
||||
// kmem.limit_in_bytes can only be set when the cgroup is empty.
|
||||
_, err = d.join("memory")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
func (s *MemoryGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
|
||||
// If the memory update is set to -1 we should also
|
||||
// set swap to -1, it means unlimited memory.
|
||||
if cgroup.Resources.Memory == -1 {
|
||||
func setMemory(path string, val int64) error {
|
||||
if val == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := cgroups.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10))
|
||||
if !errors.Is(err, unix.EBUSY) {
|
||||
return err
|
||||
}
|
||||
|
||||
// EBUSY means the kernel can't set new limit as it's too low
|
||||
// (lower than the current usage). Return more specific error.
|
||||
usage, err := fscommon.GetCgroupParamUint(path, cgroupMemoryUsage)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
max, err := fscommon.GetCgroupParamUint(path, cgroupMemoryMaxUsage)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return fmt.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max)
|
||||
}
|
||||
|
||||
func setSwap(path string, val int64) error {
|
||||
if val == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return cgroups.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10))
|
||||
}
|
||||
|
||||
func setMemoryAndSwap(path string, r *configs.Resources) error {
|
||||
// If the memory update is set to -1 and the swap is not explicitly
|
||||
// set, we should also set swap to -1, it means unlimited memory.
|
||||
if r.Memory == -1 && r.MemorySwap == 0 {
|
||||
// Only set swap if it's enabled in kernel
|
||||
if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
|
||||
cgroup.Resources.MemorySwap = -1
|
||||
r.MemorySwap = -1
|
||||
}
|
||||
}
|
||||
|
||||
// When memory and swap memory are both set, we need to handle the cases
|
||||
// for updating container.
|
||||
if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 {
|
||||
memoryUsage, err := getMemoryData(path, "")
|
||||
if r.Memory != 0 && r.MemorySwap != 0 {
|
||||
curLimit, err := fscommon.GetCgroupParamUint(path, cgroupMemoryLimit)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -84,84 +87,61 @@ func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
|
|||
// When update memory limit, we should adapt the write sequence
|
||||
// for memory and swap memory, so it won't fail because the new
|
||||
// value and the old value don't fit kernel's validation.
|
||||
if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
|
||||
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
if r.MemorySwap == -1 || curLimit < uint64(r.MemorySwap) {
|
||||
if err := setSwap(path, r.MemorySwap); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if cgroup.Resources.Memory != 0 {
|
||||
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.MemorySwap != 0 {
|
||||
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
if err := setMemory(path, r.Memory); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if err := setMemoryAndSwap(path, cgroup); err != nil {
|
||||
if err := setMemory(path, r.Memory); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setSwap(path, r.MemorySwap); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if cgroup.Resources.KernelMemory != 0 {
|
||||
if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Set(path string, r *configs.Resources) error {
|
||||
if err := setMemoryAndSwap(path, r); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// ignore KernelMemory and KernelMemoryTCP
|
||||
|
||||
if r.MemoryReservation != 0 {
|
||||
if err := cgroups.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.MemoryReservation != 0 {
|
||||
if err := fscommon.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
|
||||
if r.OomKillDisable {
|
||||
if err := cgroups.WriteFile(path, "memory.oom_control", "1"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.KernelMemoryTCP != 0 {
|
||||
if err := fscommon.WriteFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.OomKillDisable {
|
||||
if err := fscommon.WriteFile(path, "memory.oom_control", "1"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
|
||||
if r.MemorySwappiness == nil || int64(*r.MemorySwappiness) == -1 {
|
||||
return nil
|
||||
} else if *cgroup.Resources.MemorySwappiness <= 100 {
|
||||
if err := fscommon.WriteFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
|
||||
} else if *r.MemorySwappiness <= 100 {
|
||||
if err := cgroups.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *cgroup.Resources.MemorySwappiness)
|
||||
return fmt.Errorf("invalid memory swappiness value: %d (valid range is 0-100)", *r.MemorySwappiness)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("memory"))
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
// Set stats from memory.stat.
|
||||
statsFile, err := os.Open(filepath.Join(path, "memory.stat"))
|
||||
const file = "memory.stat"
|
||||
statsFile, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
|
@ -172,9 +152,9 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|||
|
||||
sc := bufio.NewScanner(statsFile)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
||||
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err)
|
||||
return &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
stats.MemoryStats.Stats[t] = v
|
||||
}
|
||||
|
@ -201,25 +181,21 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
|||
}
|
||||
stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
|
||||
|
||||
useHierarchy := strings.Join([]string{"memory", "use_hierarchy"}, ".")
|
||||
value, err := fscommon.GetCgroupParamUint(path, useHierarchy)
|
||||
value, err := fscommon.GetCgroupParamUint(path, "memory.use_hierarchy")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if value == 1 {
|
||||
stats.MemoryStats.UseHierarchy = true
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func memoryAssigned(cgroup *configs.Cgroup) bool {
|
||||
return cgroup.Resources.Memory != 0 ||
|
||||
cgroup.Resources.MemoryReservation != 0 ||
|
||||
cgroup.Resources.MemorySwap > 0 ||
|
||||
cgroup.Resources.KernelMemory > 0 ||
|
||||
cgroup.Resources.KernelMemoryTCP > 0 ||
|
||||
cgroup.Resources.OomKillDisable ||
|
||||
(cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1)
|
||||
pagesByNUMA, err := getPageUsageByNUMA(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.PageUsageByNUMA = pagesByNUMA
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
||||
|
@ -227,45 +203,146 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
|||
|
||||
moduleName := "memory"
|
||||
if name != "" {
|
||||
moduleName = strings.Join([]string{"memory", name}, ".")
|
||||
moduleName = "memory." + name
|
||||
}
|
||||
usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".")
|
||||
maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".")
|
||||
failcnt := strings.Join([]string{moduleName, "failcnt"}, ".")
|
||||
limit := strings.Join([]string{moduleName, "limit_in_bytes"}, ".")
|
||||
var (
|
||||
usage = moduleName + ".usage_in_bytes"
|
||||
maxUsage = moduleName + ".max_usage_in_bytes"
|
||||
failcnt = moduleName + ".failcnt"
|
||||
limit = moduleName + ".limit_in_bytes"
|
||||
)
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
if name != "" && os.IsNotExist(err) {
|
||||
// Ignore ENOENT as swap and kmem controllers
|
||||
// are optional in the kernel.
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err)
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Usage = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err)
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.MaxUsage = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err)
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Failcnt = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err)
|
||||
return cgroups.MemoryData{}, err
|
||||
}
|
||||
memoryData.Limit = value
|
||||
|
||||
return memoryData, nil
|
||||
}
|
||||
|
||||
func getPageUsageByNUMA(path string) (cgroups.PageUsageByNUMA, error) {
|
||||
const (
|
||||
maxColumns = math.MaxUint8 + 1
|
||||
file = "memory.numa_stat"
|
||||
)
|
||||
stats := cgroups.PageUsageByNUMA{}
|
||||
|
||||
fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||
if os.IsNotExist(err) {
|
||||
return stats, nil
|
||||
} else if err != nil {
|
||||
return stats, err
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
// File format is documented in linux/Documentation/cgroup-v1/memory.txt
|
||||
// and it looks like this:
|
||||
//
|
||||
// total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
// file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
// anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
// unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
// hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||
|
||||
scanner := bufio.NewScanner(fd)
|
||||
for scanner.Scan() {
|
||||
var field *cgroups.PageStats
|
||||
|
||||
line := scanner.Text()
|
||||
columns := strings.SplitN(line, " ", maxColumns)
|
||||
for i, column := range columns {
|
||||
byNode := strings.SplitN(column, "=", 2)
|
||||
// Some custom kernels have non-standard fields, like
|
||||
// numa_locality 0 0 0 0 0 0 0 0 0 0
|
||||
// numa_exectime 0
|
||||
if len(byNode) < 2 {
|
||||
if i == 0 {
|
||||
// Ignore/skip those.
|
||||
break
|
||||
} else {
|
||||
// The first column was already validated,
|
||||
// so be strict to the rest.
|
||||
return stats, malformedLine(path, file, line)
|
||||
}
|
||||
}
|
||||
key, val := byNode[0], byNode[1]
|
||||
if i == 0 { // First column: key is name, val is total.
|
||||
field = getNUMAField(&stats, key)
|
||||
if field == nil { // unknown field (new kernel?)
|
||||
break
|
||||
}
|
||||
field.Total, err = strconv.ParseUint(val, 0, 64)
|
||||
if err != nil {
|
||||
return stats, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
field.Nodes = map[uint8]uint64{}
|
||||
} else { // Subsequent columns: key is N<id>, val is usage.
|
||||
if len(key) < 2 || key[0] != 'N' {
|
||||
// This is definitely an error.
|
||||
return stats, malformedLine(path, file, line)
|
||||
}
|
||||
|
||||
n, err := strconv.ParseUint(key[1:], 10, 8)
|
||||
if err != nil {
|
||||
return stats, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
usage, err := strconv.ParseUint(val, 10, 64)
|
||||
if err != nil {
|
||||
return stats, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
field.Nodes[uint8(n)] = usage
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return cgroups.PageUsageByNUMA{}, &parseError{Path: path, File: file, Err: err}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func getNUMAField(stats *cgroups.PageUsageByNUMA, name string) *cgroups.PageStats {
|
||||
switch name {
|
||||
case "total":
|
||||
return &stats.Total
|
||||
case "file":
|
||||
return &stats.File
|
||||
case "anon":
|
||||
return &stats.Anon
|
||||
case "unevictable":
|
||||
return &stats.Unevictable
|
||||
case "hierarchical_total":
|
||||
return &stats.Hierarchical.Total
|
||||
case "hierarchical_file":
|
||||
return &stats.Hierarchical.File
|
||||
case "hierarchical_anon":
|
||||
return &stats.Hierarchical.Anon
|
||||
case "hierarchical_unevictable":
|
||||
return &stats.Hierarchical.Unevictable
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
|
@ -8,6 +6,7 @@ import (
|
|||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -18,11 +17,29 @@ rss 1024`
|
|||
memoryFailcnt = "100\n"
|
||||
memoryLimitContents = "8192\n"
|
||||
memoryUseHierarchyContents = "1\n"
|
||||
memoryNUMAStatContents = `total=44611 N0=32631 N1=7501 N2=1982 N3=2497
|
||||
file=44428 N0=32614 N1=7335 N2=1982 N3=2497
|
||||
anon=183 N0=17 N1=166 N2=0 N3=0
|
||||
unevictable=0 N0=0 N1=0 N2=0 N3=0
|
||||
hierarchical_total=768133 N0=509113 N1=138887 N2=20464 N3=99669
|
||||
hierarchical_file=722017 N0=496516 N1=119997 N2=20181 N3=85323
|
||||
hierarchical_anon=46096 N0=12597 N1=18890 N2=283 N3=14326
|
||||
hierarchical_unevictable=20 N0=0 N1=0 N2=0 N3=20
|
||||
`
|
||||
memoryNUMAStatNoHierarchyContents = `total=44611 N0=32631 N1=7501 N2=1982 N3=2497
|
||||
file=44428 N0=32614 N1=7335 N2=1982 N3=2497
|
||||
anon=183 N0=17 N1=166 N2=0 N3=0
|
||||
unevictable=0 N0=0 N1=0 N2=0 N3=0
|
||||
`
|
||||
// Some custom kernels has extra fields that should be ignored
|
||||
memoryNUMAStatExtraContents = `numa_locality 0 0 0 0 0 0 0 0 0 0
|
||||
numa_exectime 0
|
||||
whatever=100 N0=0
|
||||
`
|
||||
)
|
||||
|
||||
func TestMemorySetMemory(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "memory")
|
||||
|
||||
const (
|
||||
memoryBefore = 314572800 // 300M
|
||||
|
@ -31,29 +48,31 @@ func TestMemorySetMemory(t *testing.T) {
|
|||
reservationAfter = 314572800 // 300M
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
|
||||
"memory.soft_limit_in_bytes": strconv.Itoa(reservationBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.Memory = memoryAfter
|
||||
helper.CgroupData.config.Resources.MemoryReservation = reservationAfter
|
||||
r := &configs.Resources{
|
||||
Memory: memoryAfter,
|
||||
MemoryReservation: reservationAfter,
|
||||
}
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
if err := memory.Set(path, r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
|
||||
value, err := fscommon.GetCgroupParamUint(path, "memory.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if value != memoryAfter {
|
||||
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
|
||||
}
|
||||
|
||||
value, err = fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.soft_limit_in_bytes")
|
||||
value, err = fscommon.GetCgroupParamUint(path, "memory.soft_limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.soft_limit_in_bytes - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if value != reservationAfter {
|
||||
t.Fatal("Got the wrong value, set memory.soft_limit_in_bytes failed.")
|
||||
|
@ -61,27 +80,28 @@ func TestMemorySetMemory(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestMemorySetMemoryswap(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "memory")
|
||||
|
||||
const (
|
||||
memoryswapBefore = 314572800 // 300M
|
||||
memoryswapAfter = 524288000 // 500M
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
|
||||
r := &configs.Resources{
|
||||
MemorySwap: memoryswapAfter,
|
||||
}
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
if err := memory.Set(path, r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
|
||||
value, err := fscommon.GetCgroupParamUint(path, "memory.memsw.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if value != memoryswapAfter {
|
||||
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
|
||||
|
@ -89,8 +109,7 @@ func TestMemorySetMemoryswap(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestMemorySetMemoryLargerThanSwap(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "memory")
|
||||
|
||||
const (
|
||||
memoryBefore = 314572800 // 300M
|
||||
|
@ -99,7 +118,7 @@ func TestMemorySetMemoryLargerThanSwap(t *testing.T) {
|
|||
memoryswapAfter = 838860800 // 800M
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
|
||||
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
|
||||
// Set will call getMemoryData when memory and swap memory are
|
||||
|
@ -109,23 +128,26 @@ func TestMemorySetMemoryLargerThanSwap(t *testing.T) {
|
|||
"memory.failcnt": "0",
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.Memory = memoryAfter
|
||||
helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
|
||||
r := &configs.Resources{
|
||||
Memory: memoryAfter,
|
||||
MemorySwap: memoryswapAfter,
|
||||
}
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
if err := memory.Set(path, r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
|
||||
value, err := fscommon.GetCgroupParamUint(path, "memory.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if value != memoryAfter {
|
||||
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
|
||||
}
|
||||
value, err = fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
|
||||
|
||||
value, err = fscommon.GetCgroupParamUint(path, "memory.memsw.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if value != memoryswapAfter {
|
||||
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
|
||||
|
@ -133,8 +155,7 @@ func TestMemorySetMemoryLargerThanSwap(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestMemorySetSwapSmallerThanMemory(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "memory")
|
||||
|
||||
const (
|
||||
memoryBefore = 629145600 // 600M
|
||||
|
@ -143,115 +164,58 @@ func TestMemorySetSwapSmallerThanMemory(t *testing.T) {
|
|||
memoryswapAfter = 524288000 // 500M
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
|
||||
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
|
||||
// Set will call getMemoryData when memory and swap memory are
|
||||
// both set, fake these fields so we don't get error.
|
||||
"memory.usage_in_bytes": "0",
|
||||
"memory.max_usage_in_bytes": "0",
|
||||
"memory.failcnt": "0",
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.Memory = memoryAfter
|
||||
helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
|
||||
r := &configs.Resources{
|
||||
Memory: memoryAfter,
|
||||
MemorySwap: memoryswapAfter,
|
||||
}
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
if err := memory.Set(path, r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
|
||||
value, err := fscommon.GetCgroupParamUint(path, "memory.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if value != memoryAfter {
|
||||
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
|
||||
t.Fatalf("Got the wrong value (%d != %d), set memory.limit_in_bytes failed", value, memoryAfter)
|
||||
}
|
||||
value, err = fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
|
||||
|
||||
value, err = fscommon.GetCgroupParamUint(path, "memory.memsw.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if value != memoryswapAfter {
|
||||
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemorySetKernelMemory(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
kernelMemoryBefore = 314572800 // 300M
|
||||
kernelMemoryAfter = 524288000 // 500M
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.kmem.limit_in_bytes": strconv.Itoa(kernelMemoryBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.KernelMemory = kernelMemoryAfter
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.kmem.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.kmem.limit_in_bytes - %s", err)
|
||||
}
|
||||
if value != kernelMemoryAfter {
|
||||
t.Fatal("Got the wrong value, set memory.kmem.limit_in_bytes failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemorySetKernelMemoryTCP(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
kernelMemoryTCPBefore = 314572800 // 300M
|
||||
kernelMemoryTCPAfter = 524288000 // 500M
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.kmem.tcp.limit_in_bytes": strconv.Itoa(kernelMemoryTCPBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.KernelMemoryTCP = kernelMemoryTCPAfter
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.kmem.tcp.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.kmem.tcp.limit_in_bytes - %s", err)
|
||||
}
|
||||
if value != kernelMemoryTCPAfter {
|
||||
t.Fatal("Got the wrong value, set memory.kmem.tcp.limit_in_bytes failed.")
|
||||
t.Fatalf("Got the wrong value (%d != %d), set memory.memsw.limit_in_bytes failed", value, memoryswapAfter)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemorySetMemorySwappinessDefault(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "memory")
|
||||
|
||||
swappinessBefore := 60 //default is 60
|
||||
swappinessBefore := 60 // default is 60
|
||||
swappinessAfter := uint64(0)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"memory.swappiness": strconv.Itoa(swappinessBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.MemorySwappiness = &swappinessAfter
|
||||
r := &configs.Resources{
|
||||
MemorySwappiness: &swappinessAfter,
|
||||
}
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
if err := memory.Set(path, r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.swappiness")
|
||||
value, err := fscommon.GetCgroupParamUint(path, "memory.swappiness")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.swappiness - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if value != swappinessAfter {
|
||||
t.Fatalf("Got the wrong value (%d), set memory.swappiness = %d failed.", value, swappinessAfter)
|
||||
|
@ -259,9 +223,8 @@ func TestMemorySetMemorySwappinessDefault(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestMemoryStats(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
path := tempDir(t, "memory")
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.limit_in_bytes": memoryLimitContents,
|
||||
|
@ -276,22 +239,43 @@ func TestMemoryStats(t *testing.T) {
|
|||
"memory.kmem.failcnt": memoryFailcnt,
|
||||
"memory.kmem.limit_in_bytes": memoryLimitContents,
|
||||
"memory.use_hierarchy": memoryUseHierarchyContents,
|
||||
"memory.numa_stat": memoryNUMAStatContents + memoryNUMAStatExtraContents,
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
err := memory.GetStats(path, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
expectedStats := cgroups.MemoryStats{Cache: 512, Usage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, SwapUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, KernelUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, Stats: map[string]uint64{"cache": 512, "rss": 1024}, UseHierarchy: true}
|
||||
expectedStats := cgroups.MemoryStats{
|
||||
Cache: 512,
|
||||
Usage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192},
|
||||
SwapUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192},
|
||||
KernelUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192},
|
||||
Stats: map[string]uint64{"cache": 512, "rss": 1024},
|
||||
UseHierarchy: true,
|
||||
PageUsageByNUMA: cgroups.PageUsageByNUMA{
|
||||
PageUsageByNUMAInner: cgroups.PageUsageByNUMAInner{
|
||||
Total: cgroups.PageStats{Total: 44611, Nodes: map[uint8]uint64{0: 32631, 1: 7501, 2: 1982, 3: 2497}},
|
||||
File: cgroups.PageStats{Total: 44428, Nodes: map[uint8]uint64{0: 32614, 1: 7335, 2: 1982, 3: 2497}},
|
||||
Anon: cgroups.PageStats{Total: 183, Nodes: map[uint8]uint64{0: 17, 1: 166, 2: 0, 3: 0}},
|
||||
Unevictable: cgroups.PageStats{Total: 0, Nodes: map[uint8]uint64{0: 0, 1: 0, 2: 0, 3: 0}},
|
||||
},
|
||||
Hierarchical: cgroups.PageUsageByNUMAInner{
|
||||
Total: cgroups.PageStats{Total: 768133, Nodes: map[uint8]uint64{0: 509113, 1: 138887, 2: 20464, 3: 99669}},
|
||||
File: cgroups.PageStats{Total: 722017, Nodes: map[uint8]uint64{0: 496516, 1: 119997, 2: 20181, 3: 85323}},
|
||||
Anon: cgroups.PageStats{Total: 46096, Nodes: map[uint8]uint64{0: 12597, 1: 18890, 2: 283, 3: 14326}},
|
||||
Unevictable: cgroups.PageStats{Total: 20, Nodes: map[uint8]uint64{0: 0, 1: 0, 2: 0, 3: 20}},
|
||||
},
|
||||
},
|
||||
}
|
||||
expectMemoryStatEquals(t, expectedStats, actualStats.MemoryStats)
|
||||
}
|
||||
|
||||
func TestMemoryStatsNoStatFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
path := tempDir(t, "memory")
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
"memory.limit_in_bytes": memoryLimitContents,
|
||||
|
@ -299,16 +283,15 @@ func TestMemoryStatsNoStatFile(t *testing.T) {
|
|||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
err := memory.GetStats(path, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsNoUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
path := tempDir(t, "memory")
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
"memory.limit_in_bytes": memoryLimitContents,
|
||||
|
@ -316,16 +299,15 @@ func TestMemoryStatsNoUsageFile(t *testing.T) {
|
|||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
err := memory.GetStats(path, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsNoMaxUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
path := tempDir(t, "memory")
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.limit_in_bytes": memoryLimitContents,
|
||||
|
@ -333,16 +315,15 @@ func TestMemoryStatsNoMaxUsageFile(t *testing.T) {
|
|||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
err := memory.GetStats(path, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsNoLimitInBytesFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
path := tempDir(t, "memory")
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
|
@ -350,16 +331,15 @@ func TestMemoryStatsNoLimitInBytesFile(t *testing.T) {
|
|||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
err := memory.GetStats(path, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsBadStatFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
path := tempDir(t, "memory")
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"memory.stat": "rss rss",
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
|
@ -368,16 +348,15 @@ func TestMemoryStatsBadStatFile(t *testing.T) {
|
|||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
err := memory.GetStats(path, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsBadUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
path := tempDir(t, "memory")
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.usage_in_bytes": "bad",
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
|
@ -386,16 +365,15 @@ func TestMemoryStatsBadUsageFile(t *testing.T) {
|
|||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
err := memory.GetStats(path, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsBadMaxUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
path := tempDir(t, "memory")
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.max_usage_in_bytes": "bad",
|
||||
|
@ -404,16 +382,15 @@ func TestMemoryStatsBadMaxUsageFile(t *testing.T) {
|
|||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
err := memory.GetStats(path, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsBadLimitInBytesFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
path := tempDir(t, "memory")
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
|
@ -422,35 +399,108 @@ func TestMemoryStatsBadLimitInBytesFile(t *testing.T) {
|
|||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
err := memory.GetStats(path, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemorySetOomControl(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "memory")
|
||||
|
||||
const (
|
||||
oomKillDisable = 1 // disable oom killer, default is 0
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"memory.oom_control": strconv.Itoa(oomKillDisable),
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
r := &configs.Resources{}
|
||||
if err := memory.Set(path, r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.oom_control")
|
||||
value, err := fscommon.GetCgroupParamUint(path, "memory.oom_control")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.oom_control - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if value != oomKillDisable {
|
||||
t.Fatalf("Got the wrong value, set memory.oom_control failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNoHierarchicalNumaStat(t *testing.T) {
|
||||
path := tempDir(t, "memory")
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"memory.numa_stat": memoryNUMAStatNoHierarchyContents + memoryNUMAStatExtraContents,
|
||||
})
|
||||
|
||||
actualStats, err := getPageUsageByNUMA(path)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
pageUsageByNUMA := cgroups.PageUsageByNUMA{
|
||||
PageUsageByNUMAInner: cgroups.PageUsageByNUMAInner{
|
||||
Total: cgroups.PageStats{Total: 44611, Nodes: map[uint8]uint64{0: 32631, 1: 7501, 2: 1982, 3: 2497}},
|
||||
File: cgroups.PageStats{Total: 44428, Nodes: map[uint8]uint64{0: 32614, 1: 7335, 2: 1982, 3: 2497}},
|
||||
Anon: cgroups.PageStats{Total: 183, Nodes: map[uint8]uint64{0: 17, 1: 166, 2: 0, 3: 0}},
|
||||
Unevictable: cgroups.PageStats{Total: 0, Nodes: map[uint8]uint64{0: 0, 1: 0, 2: 0, 3: 0}},
|
||||
},
|
||||
Hierarchical: cgroups.PageUsageByNUMAInner{},
|
||||
}
|
||||
expectPageUsageByNUMAEquals(t, pageUsageByNUMA, actualStats)
|
||||
}
|
||||
|
||||
func TestBadNumaStat(t *testing.T) {
|
||||
memoryNUMAStatBadContents := []struct {
|
||||
desc, contents string
|
||||
}{
|
||||
{
|
||||
desc: "Nx where x is not a number",
|
||||
contents: `total=44611 N0=44611,
|
||||
file=44428 Nx=0
|
||||
`,
|
||||
}, {
|
||||
desc: "Nx where x > 255",
|
||||
contents: `total=44611 N333=444`,
|
||||
}, {
|
||||
desc: "Nx argument missing",
|
||||
contents: `total=44611 N0=123 N1=`,
|
||||
}, {
|
||||
desc: "Nx argument is not a number",
|
||||
contents: `total=44611 N0=123 N1=a`,
|
||||
}, {
|
||||
desc: "Missing = after Nx",
|
||||
contents: `total=44611 N0=123 N1`,
|
||||
}, {
|
||||
desc: "No Nx at non-first position",
|
||||
contents: `total=44611 N0=32631
|
||||
file=44428 N0=32614
|
||||
anon=183 N0=12 badone
|
||||
`,
|
||||
},
|
||||
}
|
||||
path := tempDir(t, "memory")
|
||||
for _, c := range memoryNUMAStatBadContents {
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"memory.numa_stat": c.contents,
|
||||
})
|
||||
|
||||
_, err := getPageUsageByNUMA(path)
|
||||
if err == nil {
|
||||
t.Errorf("case %q: expected error, got nil", c.desc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestWithoutNumaStat(t *testing.T) {
|
||||
path := tempDir(t, "memory")
|
||||
|
||||
actualStats, err := getPageUsageByNUMA(path)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
expectPageUsageByNUMAEquals(t, cgroups.PageUsageByNUMA{}, actualStats)
|
||||
}
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
|
@ -16,22 +14,15 @@ func (s *NameGroup) Name() string {
|
|||
return s.GroupName
|
||||
}
|
||||
|
||||
func (s *NameGroup) Apply(d *cgroupData) error {
|
||||
func (s *NameGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
if s.Join {
|
||||
// ignore errors if the named cgroup does not exist
|
||||
d.join(s.GroupName)
|
||||
// Ignore errors if the named cgroup does not exist.
|
||||
_ = apply(path, pid)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NameGroup) Remove(d *cgroupData) error {
|
||||
if s.Join {
|
||||
removePath(d.path(s.GroupName))
|
||||
}
|
||||
func (s *NameGroup) Set(_ string, _ *configs.Resources) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -1,33 +1,25 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type NetClsGroup struct {
|
||||
}
|
||||
type NetClsGroup struct{}
|
||||
|
||||
func (s *NetClsGroup) Name() string {
|
||||
return "net_cls"
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("net_cls")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
func (s *NetClsGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.NetClsClassid != 0 {
|
||||
if err := fscommon.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil {
|
||||
func (s *NetClsGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.NetClsClassid != 0 {
|
||||
if err := cgroups.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(r.NetClsClassid), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -35,10 +27,6 @@ func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("net_cls"))
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
|
@ -7,6 +5,7 @@ import (
|
|||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -15,25 +14,26 @@ const (
|
|||
)
|
||||
|
||||
func TestNetClsSetClassid(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("net_cls", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "net_cls")
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"net_cls.classid": strconv.FormatUint(classidBefore, 10),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.NetClsClassid = classidAfter
|
||||
r := &configs.Resources{
|
||||
NetClsClassid: classidAfter,
|
||||
}
|
||||
netcls := &NetClsGroup{}
|
||||
if err := netcls.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
if err := netcls.Set(path, r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// As we are in mock environment, we can't get correct value of classid from
|
||||
// net_cls.classid.
|
||||
// So. we just judge if we successfully write classid into file
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "net_cls.classid")
|
||||
value, err := fscommon.GetCgroupParamUint(path, "net_cls.classid")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse net_cls.classid - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if value != classidAfter {
|
||||
t.Fatal("Got the wrong value, set net_cls.classid failed.")
|
||||
|
|
|
@ -1,31 +1,23 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type NetPrioGroup struct {
|
||||
}
|
||||
type NetPrioGroup struct{}
|
||||
|
||||
func (s *NetPrioGroup) Name() string {
|
||||
return "net_prio"
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("net_prio")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
func (s *NetPrioGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
for _, prioMap := range cgroup.Resources.NetPrioIfpriomap {
|
||||
if err := fscommon.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
|
||||
func (s *NetPrioGroup) Set(path string, r *configs.Resources) error {
|
||||
for _, prioMap := range r.NetPrioIfpriomap {
|
||||
if err := cgroups.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -33,10 +25,6 @@ func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("net_prio"))
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
|
@ -10,28 +8,27 @@ import (
|
|||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var (
|
||||
prioMap = []*configs.IfPrioMap{
|
||||
{
|
||||
Interface: "test",
|
||||
Priority: 5,
|
||||
},
|
||||
}
|
||||
)
|
||||
var prioMap = []*configs.IfPrioMap{
|
||||
{
|
||||
Interface: "test",
|
||||
Priority: 5,
|
||||
},
|
||||
}
|
||||
|
||||
func TestNetPrioSetIfPrio(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("net_prio", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "net_prio")
|
||||
|
||||
helper.CgroupData.config.Resources.NetPrioIfpriomap = prioMap
|
||||
r := &configs.Resources{
|
||||
NetPrioIfpriomap: prioMap,
|
||||
}
|
||||
netPrio := &NetPrioGroup{}
|
||||
if err := netPrio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
if err := netPrio.Set(path, r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "net_prio.ifpriomap")
|
||||
value, err := fscommon.GetCgroupParamString(path, "net_prio.ifpriomap")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse net_prio.ifpriomap - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !strings.Contains(value, "test 5") {
|
||||
t.Fatal("Got the wrong value, set net_prio.ifpriomap failed.")
|
||||
|
|
|
@ -0,0 +1,186 @@
|
|||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
// The absolute path to the root of the cgroup hierarchies.
|
||||
var (
|
||||
cgroupRootLock sync.Mutex
|
||||
cgroupRoot string
|
||||
)
|
||||
|
||||
const defaultCgroupRoot = "/sys/fs/cgroup"
|
||||
|
||||
func initPaths(cg *configs.Cgroup) (map[string]string, error) {
|
||||
root, err := rootPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
inner, err := innerPath(cg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
paths := make(map[string]string)
|
||||
for _, sys := range subsystems {
|
||||
name := sys.Name()
|
||||
path, err := subsysPath(root, inner, name)
|
||||
if err != nil {
|
||||
// The non-presence of the devices subsystem
|
||||
// is considered fatal for security reasons.
|
||||
if cgroups.IsNotFound(err) && (cg.SkipDevices || name != "devices") {
|
||||
continue
|
||||
}
|
||||
|
||||
return nil, err
|
||||
}
|
||||
paths[name] = path
|
||||
}
|
||||
|
||||
return paths, nil
|
||||
}
|
||||
|
||||
func tryDefaultCgroupRoot() string {
|
||||
var st, pst unix.Stat_t
|
||||
|
||||
// (1) it should be a directory...
|
||||
err := unix.Lstat(defaultCgroupRoot, &st)
|
||||
if err != nil || st.Mode&unix.S_IFDIR == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// (2) ... and a mount point ...
|
||||
err = unix.Lstat(filepath.Dir(defaultCgroupRoot), &pst)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
if st.Dev == pst.Dev {
|
||||
// parent dir has the same dev -- not a mount point
|
||||
return ""
|
||||
}
|
||||
|
||||
// (3) ... of 'tmpfs' fs type.
|
||||
var fst unix.Statfs_t
|
||||
err = unix.Statfs(defaultCgroupRoot, &fst)
|
||||
if err != nil || fst.Type != unix.TMPFS_MAGIC {
|
||||
return ""
|
||||
}
|
||||
|
||||
// (4) it should have at least 1 entry ...
|
||||
dir, err := os.Open(defaultCgroupRoot)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
names, err := dir.Readdirnames(1)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
if len(names) < 1 {
|
||||
return ""
|
||||
}
|
||||
// ... which is a cgroup mount point.
|
||||
err = unix.Statfs(filepath.Join(defaultCgroupRoot, names[0]), &fst)
|
||||
if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
|
||||
return ""
|
||||
}
|
||||
|
||||
return defaultCgroupRoot
|
||||
}
|
||||
|
||||
// rootPath finds and returns path to the root of the cgroup hierarchies.
|
||||
func rootPath() (string, error) {
|
||||
cgroupRootLock.Lock()
|
||||
defer cgroupRootLock.Unlock()
|
||||
|
||||
if cgroupRoot != "" {
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
// fast path
|
||||
cgroupRoot = tryDefaultCgroupRoot()
|
||||
if cgroupRoot != "" {
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
// slow path: parse mountinfo
|
||||
mi, err := cgroups.GetCgroupMounts(false)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if len(mi) < 1 {
|
||||
return "", errors.New("no cgroup mount found in mountinfo")
|
||||
}
|
||||
|
||||
// Get the first cgroup mount (e.g. "/sys/fs/cgroup/memory"),
|
||||
// use its parent directory.
|
||||
root := filepath.Dir(mi[0].Mountpoint)
|
||||
|
||||
if _, err := os.Stat(root); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
cgroupRoot = root
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
func innerPath(c *configs.Cgroup) (string, error) {
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return "", errors.New("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
|
||||
// XXX: Do not remove CleanPath. Path safety is important! -- cyphar
|
||||
innerPath := utils.CleanPath(c.Path)
|
||||
if innerPath == "" {
|
||||
cgParent := utils.CleanPath(c.Parent)
|
||||
cgName := utils.CleanPath(c.Name)
|
||||
innerPath = filepath.Join(cgParent, cgName)
|
||||
}
|
||||
|
||||
return innerPath, nil
|
||||
}
|
||||
|
||||
func subsysPath(root, inner, subsystem string) (string, error) {
|
||||
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
|
||||
if filepath.IsAbs(inner) {
|
||||
mnt, err := cgroups.FindCgroupMountpoint(root, subsystem)
|
||||
// If we didn't mount the subsystem, there is no point we make the path.
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
|
||||
return filepath.Join(root, filepath.Base(mnt), inner), nil
|
||||
}
|
||||
|
||||
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
|
||||
// process could in container and shared pid namespace with host, and
|
||||
// /proc/1/cgroup could point to whole other world of cgroups.
|
||||
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(parentPath, inner), nil
|
||||
}
|
||||
|
||||
func apply(path string, pid int) error {
|
||||
if path == "" {
|
||||
return nil
|
||||
}
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
return cgroups.WriteCgroupProc(path, pid)
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
package fs
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func TestInvalidCgroupPath(t *testing.T) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("cgroup v2 is not supported")
|
||||
}
|
||||
|
||||
root, err := rootPath()
|
||||
if err != nil {
|
||||
t.Fatalf("couldn't get cgroup root: %v", err)
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
test string
|
||||
path, name, parent string
|
||||
}{
|
||||
{
|
||||
test: "invalid cgroup path",
|
||||
path: "../../../../../../../../../../some/path",
|
||||
},
|
||||
{
|
||||
test: "invalid absolute cgroup path",
|
||||
path: "/../../../../../../../../../../some/path",
|
||||
},
|
||||
{
|
||||
test: "invalid cgroup parent",
|
||||
parent: "../../../../../../../../../../some/path",
|
||||
name: "name",
|
||||
},
|
||||
{
|
||||
test: "invalid absolute cgroup parent",
|
||||
parent: "/../../../../../../../../../../some/path",
|
||||
name: "name",
|
||||
},
|
||||
{
|
||||
test: "invalid cgroup name",
|
||||
parent: "parent",
|
||||
name: "../../../../../../../../../../some/path",
|
||||
},
|
||||
{
|
||||
test: "invalid absolute cgroup name",
|
||||
parent: "parent",
|
||||
name: "/../../../../../../../../../../some/path",
|
||||
},
|
||||
{
|
||||
test: "invalid cgroup name and parent",
|
||||
parent: "../../../../../../../../../../some/path",
|
||||
name: "../../../../../../../../../../some/path",
|
||||
},
|
||||
{
|
||||
test: "invalid absolute cgroup name and parent",
|
||||
parent: "/../../../../../../../../../../some/path",
|
||||
name: "/../../../../../../../../../../some/path",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.test, func(t *testing.T) {
|
||||
config := &configs.Cgroup{Path: tc.path, Name: tc.name, Parent: tc.parent}
|
||||
|
||||
inner, err := innerPath(config)
|
||||
if err != nil {
|
||||
t.Fatalf("couldn't get cgroup data: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the final inner path doesn't go outside the cgroup mountpoint.
|
||||
if strings.HasPrefix(inner, "..") {
|
||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
// Double-check, using an actual cgroup.
|
||||
deviceRoot := filepath.Join(root, "devices")
|
||||
devicePath, err := subsysPath(root, inner, "devices")
|
||||
if err != nil {
|
||||
t.Fatalf("couldn't get cgroup path: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTryDefaultCgroupRoot(t *testing.T) {
|
||||
res := tryDefaultCgroupRoot()
|
||||
exp := defaultCgroupRoot
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
// checking that tryDefaultCgroupRoot does return ""
|
||||
// in case /sys/fs/cgroup is not cgroup v1 root dir.
|
||||
exp = ""
|
||||
}
|
||||
if res != exp {
|
||||
t.Errorf("tryDefaultCgroupRoot: want %q, got %q", exp, res)
|
||||
}
|
||||
}
|
|
@ -1,5 +1,3 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
|
@ -7,29 +5,20 @@ import (
|
|||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type PerfEventGroup struct {
|
||||
}
|
||||
type PerfEventGroup struct{}
|
||||
|
||||
func (s *PerfEventGroup) Name() string {
|
||||
return "perf_event"
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Apply(d *cgroupData) error {
|
||||
// we just want to join this group even though we don't set anything
|
||||
if _, err := d.join("perf_event"); err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
func (s *PerfEventGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
func (s *PerfEventGroup) Set(_ string, _ *configs.Resources) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("perf_event"))
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -1,10 +1,7 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"math"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
|
@ -12,31 +9,26 @@ import (
|
|||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type PidsGroup struct {
|
||||
}
|
||||
type PidsGroup struct{}
|
||||
|
||||
func (s *PidsGroup) Name() string {
|
||||
return "pids"
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("pids")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
func (s *PidsGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.PidsLimit != 0 {
|
||||
func (s *PidsGroup) Set(path string, r *configs.Resources) error {
|
||||
if r.PidsLimit != 0 {
|
||||
// "max" is the fallback value.
|
||||
limit := "max"
|
||||
|
||||
if cgroup.Resources.PidsLimit > 0 {
|
||||
limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10)
|
||||
if r.PidsLimit > 0 {
|
||||
limit = strconv.FormatInt(r.PidsLimit, 10)
|
||||
}
|
||||
|
||||
if err := fscommon.WriteFile(path, "pids.max", limit); err != nil {
|
||||
if err := cgroups.WriteFile(path, "pids.max", limit); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -44,28 +36,24 @@ func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("pids"))
|
||||
}
|
||||
|
||||
func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
if !cgroups.PathExists(path) {
|
||||
return nil
|
||||
}
|
||||
current, err := fscommon.GetCgroupParamUint(path, "pids.current")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.current - %s", err)
|
||||
return err
|
||||
}
|
||||
|
||||
maxString, err := fscommon.GetCgroupParamString(path, "pids.max")
|
||||
max, err := fscommon.GetCgroupParamUint(path, "pids.max")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.max - %s", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Default if pids.max == "max" is 0 -- which represents "no limit".
|
||||
var max uint64
|
||||
if maxString != "max" {
|
||||
max, err = fscommon.ParseUint(maxString, 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max"))
|
||||
}
|
||||
// If no limit is set, read from pids.max returns "max", which is
|
||||
// converted to MaxUint64 by GetCgroupParamUint. Historically, we
|
||||
// represent "no limit" for pids as 0, thus this conversion.
|
||||
if max == math.MaxUint64 {
|
||||
max = 0
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = current
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
|
@ -8,6 +6,7 @@ import (
|
|||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -16,65 +15,64 @@ const (
|
|||
)
|
||||
|
||||
func TestPidsSetMax(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("pids", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "pids")
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"pids.max": "max",
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.PidsLimit = maxLimited
|
||||
r := &configs.Resources{
|
||||
PidsLimit: maxLimited,
|
||||
}
|
||||
pids := &PidsGroup{}
|
||||
if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
if err := pids.Set(path, r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "pids.max")
|
||||
value, err := fscommon.GetCgroupParamUint(path, "pids.max")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse pids.max - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if value != maxLimited {
|
||||
t.Fatalf("Expected %d, got %d for setting pids.max - limited", maxLimited, value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPidsSetUnlimited(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("pids", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "pids")
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"pids.max": strconv.Itoa(maxLimited),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.PidsLimit = maxUnlimited
|
||||
r := &configs.Resources{
|
||||
PidsLimit: maxUnlimited,
|
||||
}
|
||||
pids := &PidsGroup{}
|
||||
if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
if err := pids.Set(path, r); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "pids.max")
|
||||
value, err := fscommon.GetCgroupParamString(path, "pids.max")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse pids.max - %s", err)
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if value != "max" {
|
||||
t.Fatalf("Expected %s, got %s for setting pids.max - unlimited", "max", value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPidsStats(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("pids", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "pids")
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"pids.current": strconv.Itoa(1337),
|
||||
"pids.max": strconv.Itoa(maxLimited),
|
||||
})
|
||||
|
||||
pids := &PidsGroup{}
|
||||
stats := *cgroups.NewStats()
|
||||
if err := pids.GetStats(helper.CgroupPath, &stats); err != nil {
|
||||
if err := pids.GetStats(path, &stats); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
|
@ -88,17 +86,16 @@ func TestPidsStats(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestPidsStatsUnlimited(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("pids", t)
|
||||
defer helper.cleanup()
|
||||
path := tempDir(t, "pids")
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
writeFileContents(t, path, map[string]string{
|
||||
"pids.current": strconv.Itoa(4096),
|
||||
"pids.max": "max",
|
||||
})
|
||||
|
||||
pids := &PidsGroup{}
|
||||
stats := *cgroups.NewStats()
|
||||
if err := pids.GetStats(helper.CgroupPath, &stats); err != nil {
|
||||
if err := pids.GetStats(path, &stats); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type RdmaGroup struct{}
|
||||
|
||||
func (s *RdmaGroup) Name() string {
|
||||
return "rdma"
|
||||
}
|
||||
|
||||
func (s *RdmaGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||
return apply(path, pid)
|
||||
}
|
||||
|
||||
func (s *RdmaGroup) Set(path string, r *configs.Resources) error {
|
||||
return fscommon.RdmaSet(path, r)
|
||||
}
|
||||
|
||||
func (s *RdmaGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return fscommon.RdmaGetStats(path, stats)
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue