mirror of https://gitee.com/openkylin/runc.git
merge upstream 1.1.0
This commit is contained in:
parent
f67506f80e
commit
1dc29861c3
|
@ -0,0 +1,158 @@
|
||||||
|
---
|
||||||
|
# We use Cirrus for Vagrant tests and native CentOS 7 and 8, because macOS
|
||||||
|
# instances of GHA are too slow and flaky, and Linux instances of GHA do not
|
||||||
|
# support KVM.
|
||||||
|
|
||||||
|
# NOTE Cirrus execution environments lack a terminal, needed for
|
||||||
|
# some integration tests. So we use `ssh -tt` command to fake a terminal.
|
||||||
|
|
||||||
|
task:
|
||||||
|
timeout_in: 30m
|
||||||
|
|
||||||
|
env:
|
||||||
|
DEBIAN_FRONTEND: noninteractive
|
||||||
|
HOME: /root
|
||||||
|
# yamllint disable rule:key-duplicates
|
||||||
|
matrix:
|
||||||
|
DISTRO: fedora
|
||||||
|
|
||||||
|
name: vagrant DISTRO:$DISTRO
|
||||||
|
|
||||||
|
compute_engine_instance:
|
||||||
|
image_project: cirrus-images
|
||||||
|
image: family/docker-kvm
|
||||||
|
platform: linux
|
||||||
|
nested_virtualization: true
|
||||||
|
# CPU limit: `16 / NTASK`: see https://cirrus-ci.org/faq/#are-there-any-limits
|
||||||
|
cpu: 8
|
||||||
|
# Memory limit: `4GB * NCPU`
|
||||||
|
memory: 32G
|
||||||
|
|
||||||
|
host_info_script: |
|
||||||
|
uname -a
|
||||||
|
echo "-----"
|
||||||
|
cat /etc/os-release
|
||||||
|
echo "-----"
|
||||||
|
cat /proc/cpuinfo
|
||||||
|
echo "-----"
|
||||||
|
df -T
|
||||||
|
install_libvirt_vagrant_script: |
|
||||||
|
apt-get update
|
||||||
|
apt-get install -y libvirt-daemon libvirt-daemon-system vagrant vagrant-libvirt
|
||||||
|
systemctl enable --now libvirtd
|
||||||
|
vagrant_cache:
|
||||||
|
fingerprint_script: uname -s ; cat Vagrantfile.$DISTRO
|
||||||
|
folder: /root/.vagrant.d
|
||||||
|
vagrant_up_script: |
|
||||||
|
ln -sf Vagrantfile.$DISTRO Vagrantfile
|
||||||
|
# Retry if it fails (download.fedoraproject.org returns 404 sometimes)
|
||||||
|
vagrant up --no-tty || vagrant up --no-tty
|
||||||
|
mkdir -p -m 0700 /root/.ssh
|
||||||
|
vagrant ssh-config >> /root/.ssh/config
|
||||||
|
guest_info_script: |
|
||||||
|
ssh default 'sh -exc "uname -a && systemctl --version && df -T && cat /etc/os-release"'
|
||||||
|
unit_tests_script: |
|
||||||
|
ssh default 'sudo -i make -C /vagrant localunittest'
|
||||||
|
integration_systemd_script: |
|
||||||
|
ssh -tt default "sudo -i make -C /vagrant localintegration RUNC_USE_SYSTEMD=yes"
|
||||||
|
integration_fs_script: |
|
||||||
|
ssh -tt default "sudo -i make -C /vagrant localintegration"
|
||||||
|
integration_systemd_rootless_script: |
|
||||||
|
ssh -tt default "sudo -i make -C /vagrant localrootlessintegration RUNC_USE_SYSTEMD=yes"
|
||||||
|
integration_fs_rootless_script: |
|
||||||
|
ssh -tt default "sudo -i make -C /vagrant localrootlessintegration"
|
||||||
|
|
||||||
|
task:
|
||||||
|
timeout_in: 30m
|
||||||
|
|
||||||
|
env:
|
||||||
|
HOME: /root
|
||||||
|
CIRRUS_WORKING_DIR: /home/runc
|
||||||
|
GO_VERSION: "1.17.3"
|
||||||
|
BATS_VERSION: "v1.3.0"
|
||||||
|
# yamllint disable rule:key-duplicates
|
||||||
|
matrix:
|
||||||
|
DISTRO: centos-7
|
||||||
|
DISTRO: centos-stream-8
|
||||||
|
|
||||||
|
name: ci / $DISTRO
|
||||||
|
|
||||||
|
compute_engine_instance:
|
||||||
|
image_project: centos-cloud
|
||||||
|
image: family/$DISTRO
|
||||||
|
platform: linux
|
||||||
|
cpu: 4
|
||||||
|
memory: 8G
|
||||||
|
|
||||||
|
install_dependencies_script: |
|
||||||
|
case $DISTRO in
|
||||||
|
centos-7)
|
||||||
|
(cd /etc/yum.repos.d && curl -O https://copr.fedorainfracloud.org/coprs/adrian/criu-el7/repo/epel-7/adrian-criu-el7-epel-7.repo)
|
||||||
|
# sysctl
|
||||||
|
echo "user.max_user_namespaces=15076" > /etc/sysctl.d/userns.conf
|
||||||
|
sysctl --system
|
||||||
|
;;
|
||||||
|
centos-stream-8)
|
||||||
|
yum config-manager --set-enabled powertools # for glibc-static
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
# Work around dnf mirror failures by retrying a few times.
|
||||||
|
for i in $(seq 0 2); do
|
||||||
|
sleep $i
|
||||||
|
yum install -y -q gcc git iptables jq glibc-static libseccomp-devel make criu fuse-sshfs && break
|
||||||
|
done
|
||||||
|
[ $? -eq 0 ] # fail if yum failed
|
||||||
|
# install Go
|
||||||
|
curl -fsSL "https://dl.google.com/go/go${GO_VERSION}.linux-amd64.tar.gz" | tar Cxz /usr/local
|
||||||
|
# install bats
|
||||||
|
cd /tmp
|
||||||
|
git clone https://github.com/bats-core/bats-core
|
||||||
|
cd bats-core
|
||||||
|
git checkout $BATS_VERSION
|
||||||
|
./install.sh /usr/local
|
||||||
|
cd -
|
||||||
|
# Add a user for rootless tests
|
||||||
|
useradd -u2000 -m -d/home/rootless -s/bin/bash rootless
|
||||||
|
# Allow root and rootless itself to execute `ssh rootless@localhost` in tests/rootless.sh
|
||||||
|
ssh-keygen -t ecdsa -N "" -f /root/rootless.key
|
||||||
|
mkdir -m 0700 -p /home/rootless/.ssh
|
||||||
|
cp /root/rootless.key /home/rootless/.ssh/id_ecdsa
|
||||||
|
cat /root/rootless.key.pub >> /home/rootless/.ssh/authorized_keys
|
||||||
|
chown -R rootless.rootless /home/rootless
|
||||||
|
# set PATH
|
||||||
|
echo 'export PATH=/usr/local/go/bin:/usr/local/bin:$PATH' >> /root/.bashrc
|
||||||
|
# Setup ssh localhost for terminal emulation (script -e did not work)
|
||||||
|
ssh-keygen -t ed25519 -f /root/.ssh/id_ed25519 -N ""
|
||||||
|
cat /root/.ssh/id_ed25519.pub >> /root/.ssh/authorized_keys
|
||||||
|
chmod 400 /root/.ssh/authorized_keys
|
||||||
|
ssh-keyscan localhost >> /root/.ssh/known_hosts
|
||||||
|
echo -e "Host localhost\n\tStrictHostKeyChecking no\t\nIdentityFile /root/.ssh/id_ed25519\n" >> /root/.ssh/config
|
||||||
|
sed -e "s,PermitRootLogin.*,PermitRootLogin prohibit-password,g" -i /etc/ssh/sshd_config
|
||||||
|
systemctl restart sshd
|
||||||
|
host_info_script: |
|
||||||
|
uname -a
|
||||||
|
echo "-----"
|
||||||
|
cat /etc/os-release
|
||||||
|
echo "-----"
|
||||||
|
cat /proc/cpuinfo
|
||||||
|
echo "-----"
|
||||||
|
df -T
|
||||||
|
echo "-----"
|
||||||
|
systemctl --version
|
||||||
|
unit_tests_script: |
|
||||||
|
ssh -tt localhost "make -C /home/runc localunittest"
|
||||||
|
integration_systemd_script: |
|
||||||
|
ssh -tt localhost "make -C /home/runc localintegration RUNC_USE_SYSTEMD=yes"
|
||||||
|
integration_fs_script: |
|
||||||
|
ssh -tt localhost "make -C /home/runc localintegration"
|
||||||
|
integration_systemd_rootless_script: |
|
||||||
|
echo "SKIP: integration_systemd_rootless_script requires cgroup v2"
|
||||||
|
integration_fs_rootless_script: |
|
||||||
|
case $DISTRO in
|
||||||
|
centos-7)
|
||||||
|
echo "SKIP: FIXME: integration_fs_rootless_script is skipped because of EPERM on writing cgroup.procs"
|
||||||
|
;;
|
||||||
|
centos-stream-8)
|
||||||
|
ssh -tt localhost "make -C /home/runc localrootlessintegration"
|
||||||
|
;;
|
||||||
|
esac
|
|
@ -0,0 +1,3 @@
|
||||||
|
[codespell]
|
||||||
|
skip = ./vendor,./.git
|
||||||
|
ignore-words-list = clos,creat
|
|
@ -0,0 +1,25 @@
|
||||||
|
# Please see the documentation for all configuration options:
|
||||||
|
# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
||||||
|
|
||||||
|
version: 2
|
||||||
|
updates:
|
||||||
|
# Dependencies listed in go.mod
|
||||||
|
- package-ecosystem: "gomod"
|
||||||
|
directory: "/" # Location of package manifests
|
||||||
|
schedule:
|
||||||
|
interval: "daily"
|
||||||
|
ignore:
|
||||||
|
# a regression in v1.22.2, see https://github.com/urfave/cli/issues/1092
|
||||||
|
- dependency-name: "github.com/urfave/cli"
|
||||||
|
|
||||||
|
# Dependencies listed in .github/workflows/*.yml
|
||||||
|
- package-ecosystem: "github-actions"
|
||||||
|
directory: "/"
|
||||||
|
schedule:
|
||||||
|
interval: "daily"
|
||||||
|
|
||||||
|
# Dependencies listed in Dockerfile
|
||||||
|
- package-ecosystem: "docker"
|
||||||
|
directory: "/"
|
||||||
|
schedule:
|
||||||
|
interval: "daily"
|
|
@ -0,0 +1,129 @@
|
||||||
|
# NOTE Github Actions execution environments lack a terminal, needed for
|
||||||
|
# some integration tests. So we use `script` command to fake a terminal.
|
||||||
|
|
||||||
|
name: ci
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
tags:
|
||||||
|
- v*
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
- release-*
|
||||||
|
pull_request:
|
||||||
|
|
||||||
|
env:
|
||||||
|
# Don't ignore C warnings. Note that the output of "go env CGO_CFLAGS" by default is "-g -O2", so we keep them.
|
||||||
|
CGO_CFLAGS: -g -O2 -Werror
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
runs-on: ubuntu-20.04
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
go-version: [1.16.x, 1.17.x]
|
||||||
|
rootless: ["rootless", ""]
|
||||||
|
race: ["-race", ""]
|
||||||
|
criu: [""]
|
||||||
|
include:
|
||||||
|
# Also test against latest criu-dev
|
||||||
|
- go-version: 1.17.x
|
||||||
|
rootless: ""
|
||||||
|
race: ""
|
||||||
|
criu: "criu-dev"
|
||||||
|
|
||||||
|
steps:
|
||||||
|
|
||||||
|
- name: checkout
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: install deps
|
||||||
|
if: matrix.criu == ''
|
||||||
|
env:
|
||||||
|
REPO: https://download.opensuse.org/repositories/devel:/tools:/criu/xUbuntu_20.04
|
||||||
|
run: |
|
||||||
|
# criu repo
|
||||||
|
curl -fSsl $REPO/Release.key | sudo apt-key add -
|
||||||
|
echo "deb $REPO/ /" | sudo tee /etc/apt/sources.list.d/criu.list
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install libseccomp-dev criu sshfs
|
||||||
|
|
||||||
|
- name: install deps (criu ${{ matrix.criu }})
|
||||||
|
if: matrix.criu != ''
|
||||||
|
run: |
|
||||||
|
sudo apt -q update
|
||||||
|
sudo apt -q install libseccomp-dev sshfs \
|
||||||
|
libcap-dev libnet1-dev libnl-3-dev \
|
||||||
|
libprotobuf-c-dev libprotobuf-dev protobuf-c-compiler protobuf-compiler
|
||||||
|
git clone https://github.com/checkpoint-restore/criu.git ~/criu
|
||||||
|
(cd ~/criu && git checkout ${{ matrix.criu }} && sudo make install-criu)
|
||||||
|
rm -rf ~/criu
|
||||||
|
|
||||||
|
- name: install go ${{ matrix.go-version }}
|
||||||
|
uses: actions/setup-go@v2
|
||||||
|
with:
|
||||||
|
stable: '!contains(${{ matrix.go-version }}, "beta") && !contains(${{ matrix.go-version }}, "rc")'
|
||||||
|
go-version: ${{ matrix.go-version }}
|
||||||
|
|
||||||
|
- name: build
|
||||||
|
run: sudo -E PATH="$PATH" make EXTRA_FLAGS="${{ matrix.race }}" all
|
||||||
|
|
||||||
|
- name: install bats
|
||||||
|
uses: mig4/setup-bats@v1
|
||||||
|
with:
|
||||||
|
bats-version: 1.3.0
|
||||||
|
|
||||||
|
- name: unit test
|
||||||
|
if: matrix.rootless != 'rootless'
|
||||||
|
run: sudo -E PATH="$PATH" -- make TESTFLAGS="${{ matrix.race }}" localunittest
|
||||||
|
|
||||||
|
- name: add rootless user
|
||||||
|
if: matrix.rootless == 'rootless'
|
||||||
|
run: |
|
||||||
|
sudo useradd -u2000 -m -d/home/rootless -s/bin/bash rootless
|
||||||
|
# Allow root and rootless itself to execute `ssh rootless@localhost` in tests/rootless.sh
|
||||||
|
ssh-keygen -t ecdsa -N "" -f $HOME/rootless.key
|
||||||
|
sudo mkdir -m 0700 -p /home/rootless/.ssh
|
||||||
|
sudo cp $HOME/rootless.key /home/rootless/.ssh/id_ecdsa
|
||||||
|
sudo cp $HOME/rootless.key.pub /home/rootless/.ssh/authorized_keys
|
||||||
|
sudo chown -R rootless.rootless /home/rootless
|
||||||
|
|
||||||
|
- name: integration test (fs driver)
|
||||||
|
run: sudo -E PATH="$PATH" script -e -c 'make local${{ matrix.rootless }}integration'
|
||||||
|
|
||||||
|
- name: integration test (systemd driver)
|
||||||
|
# can't use systemd driver with cgroupv1
|
||||||
|
if: matrix.rootless != 'rootless'
|
||||||
|
run: sudo -E PATH="$PATH" script -e -c 'make RUNC_USE_SYSTEMD=yes local${{ matrix.rootless }}integration'
|
||||||
|
|
||||||
|
# We need to continue support for 32-bit ARM.
|
||||||
|
# However, we do not have 32-bit ARM CI, so we use i386 for testing 32bit stuff.
|
||||||
|
# We are not interested in providing official support for i386.
|
||||||
|
cross-i386:
|
||||||
|
runs-on: ubuntu-20.04
|
||||||
|
|
||||||
|
steps:
|
||||||
|
|
||||||
|
- name: checkout
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: install deps
|
||||||
|
run: |
|
||||||
|
sudo dpkg --add-architecture i386
|
||||||
|
# add criu repo
|
||||||
|
sudo add-apt-repository -y ppa:criu/ppa
|
||||||
|
# apt-add-repository runs apt update so we don't have to.
|
||||||
|
|
||||||
|
# Due to a bug in apt, we have to update it first
|
||||||
|
# (see https://bugs.launchpad.net/ubuntu-cdimage/+bug/1871268)
|
||||||
|
sudo apt -q install apt
|
||||||
|
sudo apt -q install libseccomp-dev libseccomp-dev:i386 gcc-multilib criu
|
||||||
|
|
||||||
|
- name: install go
|
||||||
|
uses: actions/setup-go@v2
|
||||||
|
with:
|
||||||
|
go-version: 1.x # Latest stable
|
||||||
|
|
||||||
|
- name: unit test
|
||||||
|
# cgo is disabled by default when cross-compiling
|
||||||
|
run: sudo -E PATH="$PATH" -- make GOARCH=386 CGO_ENABLED=1 localunittest
|
|
@ -0,0 +1,198 @@
|
||||||
|
name: validate
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
tags:
|
||||||
|
- v*
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
- release-*
|
||||||
|
pull_request:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
|
||||||
|
lint:
|
||||||
|
runs-on: ubuntu-20.04
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: install deps
|
||||||
|
run: |
|
||||||
|
sudo apt -q update
|
||||||
|
sudo apt -q install libseccomp-dev
|
||||||
|
- uses: golangci/golangci-lint-action@v2
|
||||||
|
with:
|
||||||
|
# must be specified without patch version
|
||||||
|
version: v1.42
|
||||||
|
|
||||||
|
lint-extra:
|
||||||
|
# Extra linters, only checking new code from pull requests.
|
||||||
|
if: github.event_name == 'pull_request'
|
||||||
|
runs-on: ubuntu-20.04
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: install deps
|
||||||
|
run: |
|
||||||
|
sudo apt -q update
|
||||||
|
sudo apt -q install libseccomp-dev
|
||||||
|
- uses: golangci/golangci-lint-action@v2
|
||||||
|
with:
|
||||||
|
only-new-issues: true
|
||||||
|
args: --config .golangci-extra.yml
|
||||||
|
# must be specified without patch version
|
||||||
|
version: v1.43
|
||||||
|
|
||||||
|
|
||||||
|
compile-buildtags:
|
||||||
|
runs-on: ubuntu-20.04
|
||||||
|
env:
|
||||||
|
# Don't ignore C warnings. Note that the output of "go env CGO_CFLAGS" by default is "-g -O2", so we keep them.
|
||||||
|
CGO_CFLAGS: -g -O2 -Werror
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: install go
|
||||||
|
uses: actions/setup-go@v2
|
||||||
|
with:
|
||||||
|
go-version: 1.x # Latest stable
|
||||||
|
- name: compile with no build tags
|
||||||
|
run: make BUILDTAGS=""
|
||||||
|
|
||||||
|
codespell:
|
||||||
|
runs-on: ubuntu-20.04
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: install deps
|
||||||
|
# Version of codespell bundled with Ubuntu is way old, so use pip.
|
||||||
|
run: pip install codespell
|
||||||
|
- name: run codespell
|
||||||
|
run: codespell
|
||||||
|
|
||||||
|
shfmt:
|
||||||
|
runs-on: ubuntu-20.04
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: vars
|
||||||
|
run: |
|
||||||
|
echo "VERSION=3.3.1" >> $GITHUB_ENV
|
||||||
|
echo "$(go env GOPATH)/bin" >> $GITHUB_PATH
|
||||||
|
- name: cache go mod and $GOCACHE
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/go/pkg/mod
|
||||||
|
~/.cache/go-build
|
||||||
|
key: ${{ runner.os }}-shfmt-${{ env.VERSION }}
|
||||||
|
restore-keys: ${{ runner.os }}-shfmt-
|
||||||
|
- name: install shfmt
|
||||||
|
run: |
|
||||||
|
command -v shfmt || \
|
||||||
|
(cd ~ && GO111MODULE=on time go get mvdan.cc/sh/v3/cmd/shfmt@v$VERSION)
|
||||||
|
- name: shfmt
|
||||||
|
run: make shfmt
|
||||||
|
|
||||||
|
shellcheck:
|
||||||
|
runs-on: ubuntu-20.04
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: vars
|
||||||
|
run: |
|
||||||
|
echo 'VERSION=v0.7.2' >> $GITHUB_ENV
|
||||||
|
echo 'BASEURL=https://github.com/koalaman/shellcheck/releases/download' >> $GITHUB_ENV
|
||||||
|
echo 'SHA256SUM=12ee2e0b90a3d1e9cae24ac9b2838be66b48573cb2c8e8f3c566b959df6f050c' >> $GITHUB_ENV
|
||||||
|
echo ~/bin >> $GITHUB_PATH
|
||||||
|
- name: install shellcheck
|
||||||
|
run: |
|
||||||
|
mkdir ~/bin
|
||||||
|
curl -sSfL --retry 5 $BASEURL/$VERSION/shellcheck-$VERSION.linux.x86_64.tar.xz |
|
||||||
|
tar xfJ - -C ~/bin --strip 1 shellcheck-$VERSION/shellcheck
|
||||||
|
sha256sum ~/bin/shellcheck | grep -q $SHA256SUM
|
||||||
|
# make sure to remove the old version
|
||||||
|
sudo rm -f /usr/bin/shellcheck
|
||||||
|
- uses: lumaxis/shellcheck-problem-matchers@v1
|
||||||
|
- name: shellcheck
|
||||||
|
run: |
|
||||||
|
make shellcheck
|
||||||
|
|
||||||
|
deps:
|
||||||
|
runs-on: ubuntu-20.04
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: install go
|
||||||
|
uses: actions/setup-go@v2
|
||||||
|
with:
|
||||||
|
go-version: 1.x # Latest stable
|
||||||
|
- name: cache go mod and $GOCACHE
|
||||||
|
uses: actions/cache@v2
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/go/pkg/mod
|
||||||
|
~/.cache/go-build
|
||||||
|
key: ${{ runner.os }}-go.sum-${{ hashFiles('**/go.sum') }}
|
||||||
|
restore-keys: ${{ runner.os }}-go.sum-
|
||||||
|
- name: verify deps
|
||||||
|
run: make verify-dependencies
|
||||||
|
|
||||||
|
|
||||||
|
commit:
|
||||||
|
runs-on: ubuntu-20.04
|
||||||
|
# Only check commits on pull requests.
|
||||||
|
if: github.event_name == 'pull_request'
|
||||||
|
steps:
|
||||||
|
- name: get pr commits
|
||||||
|
id: 'get-pr-commits'
|
||||||
|
uses: tim-actions/get-pr-commits@v1.1.0
|
||||||
|
with:
|
||||||
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: check subject line length
|
||||||
|
uses: tim-actions/commit-message-checker-with-regex@v0.3.1
|
||||||
|
with:
|
||||||
|
commits: ${{ steps.get-pr-commits.outputs.commits }}
|
||||||
|
pattern: '^.{0,72}(\n.*)*$'
|
||||||
|
error: 'Subject too long (max 72)'
|
||||||
|
|
||||||
|
|
||||||
|
cfmt:
|
||||||
|
runs-on: ubuntu-20.04
|
||||||
|
steps:
|
||||||
|
- name: checkout
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
- name: install deps
|
||||||
|
run: |
|
||||||
|
sudo apt -qq update
|
||||||
|
sudo apt -qq install indent
|
||||||
|
- name: cfmt
|
||||||
|
run: |
|
||||||
|
make cfmt
|
||||||
|
git diff --exit-code
|
||||||
|
|
||||||
|
|
||||||
|
release:
|
||||||
|
runs-on: ubuntu-20.04
|
||||||
|
steps:
|
||||||
|
- name: checkout
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
# We have to run this under Docker as Ubuntu (host) does not support all
|
||||||
|
# the architectures we want to compile test against, and Dockerfile uses
|
||||||
|
# Debian (which does).
|
||||||
|
#
|
||||||
|
# XXX: as currently this is the only job that is using Docker, we are
|
||||||
|
# building and using the runcimage locally. In case more jobs running
|
||||||
|
# under Docker will emerge, it will be good to have a separate make
|
||||||
|
# runcimage job and share its result (the docker image) with whoever
|
||||||
|
# needs it.
|
||||||
|
- uses: satackey/action-docker-layer-caching@v0.0.11
|
||||||
|
continue-on-error: true
|
||||||
|
- name: build docker image
|
||||||
|
run: make runcimage
|
||||||
|
- name: make releaseall
|
||||||
|
run: make releaseall
|
||||||
|
- name: upload artifacts
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
with:
|
||||||
|
name: release-${{ github.run_id }}
|
||||||
|
path: release/*
|
|
@ -2,5 +2,9 @@ vendor/pkg
|
||||||
/runc
|
/runc
|
||||||
/runc-*
|
/runc-*
|
||||||
contrib/cmd/recvtty/recvtty
|
contrib/cmd/recvtty/recvtty
|
||||||
|
contrib/cmd/sd-helper/sd-helper
|
||||||
|
contrib/cmd/seccompagent/seccompagent
|
||||||
man/man8
|
man/man8
|
||||||
release
|
release
|
||||||
|
Vagrantfile
|
||||||
|
.vagrant
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
# This is golangci-lint config file which is used to check new code in
|
||||||
|
# github PRs only (see lint-extra job in .github/workflows/validate.yml).
|
||||||
|
#
|
||||||
|
# For the default linter config, see .golangci.yml. This config should
|
||||||
|
# only enable additional linters not enabled in the default config.
|
||||||
|
|
||||||
|
run:
|
||||||
|
build-tags:
|
||||||
|
- seccomp
|
||||||
|
|
||||||
|
linters:
|
||||||
|
disable-all: true
|
||||||
|
enable:
|
||||||
|
- godot
|
||||||
|
- revive
|
|
@ -0,0 +1,12 @@
|
||||||
|
# For documentation, see https://golangci-lint.run/usage/configuration/
|
||||||
|
|
||||||
|
run:
|
||||||
|
build-tags:
|
||||||
|
- seccomp
|
||||||
|
|
||||||
|
linters:
|
||||||
|
enable:
|
||||||
|
- gofumpt
|
||||||
|
- errorlint
|
||||||
|
- unconvert
|
||||||
|
- unparam
|
|
@ -1,10 +0,0 @@
|
||||||
approve_by_comment: true
|
|
||||||
approve_regex: ^LGTM
|
|
||||||
reject_regex: ^Rejected
|
|
||||||
reset_on_push: true
|
|
||||||
author_approval: ignored
|
|
||||||
reviewers:
|
|
||||||
teams:
|
|
||||||
- runc-maintainers
|
|
||||||
name: default
|
|
||||||
required: 2
|
|
54
.travis.yml
54
.travis.yml
|
@ -1,54 +0,0 @@
|
||||||
dist: bionic
|
|
||||||
language: go
|
|
||||||
go:
|
|
||||||
- 1.11.x
|
|
||||||
- 1.12.x
|
|
||||||
- tip
|
|
||||||
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
- go: 1.12.x
|
|
||||||
env:
|
|
||||||
- RUNC_USE_SYSTEMD=1
|
|
||||||
script:
|
|
||||||
- make BUILDTAGS="${BUILDTAGS}" all
|
|
||||||
- sudo PATH="$PATH" make localintegration RUNC_USE_SYSTEMD=1
|
|
||||||
- go: 1.12.x
|
|
||||||
env:
|
|
||||||
- VIRTUALBOX_VERSION=6.0
|
|
||||||
- VAGRANT_VERSION=2.2.6
|
|
||||||
- FEDORA_VERSION=31
|
|
||||||
before_install:
|
|
||||||
- cat /proc/cpuinfo
|
|
||||||
- wget -q https://www.virtualbox.org/download/oracle_vbox_2016.asc -O- | sudo apt-key add - && sudo sh -c "echo deb https://download.virtualbox.org/virtualbox/debian $(lsb_release -cs) contrib >> /etc/apt/sources.list" && sudo apt-get update && sudo apt-get install -yq build-essential gcc make linux-headers-$(uname -r) virtualbox-${VIRTUALBOX_VERSION} && sudo usermod -aG vboxusers $(whoami)
|
|
||||||
- wget https://releases.hashicorp.com/vagrant/${VAGRANT_VERSION}/vagrant_${VAGRANT_VERSION}_$(uname -m).deb && sudo dpkg -i vagrant_${VAGRANT_VERSION}_$(uname -m).deb
|
|
||||||
- vagrant init bento/fedora-${FEDORA_VERSION} && vagrant up && mkdir -p ~/.ssh && vagrant ssh-config >> ~/.ssh/config
|
|
||||||
- ssh default sudo dnf install -y podman
|
|
||||||
script:
|
|
||||||
- ssh default sudo podman build -t test /vagrant
|
|
||||||
- ssh default sudo podman run --privileged --cgroupns=private test make localunittest
|
|
||||||
allow_failures:
|
|
||||||
- go: tip
|
|
||||||
|
|
||||||
go_import_path: github.com/opencontainers/runc
|
|
||||||
|
|
||||||
# `make ci` uses Docker.
|
|
||||||
sudo: required
|
|
||||||
services:
|
|
||||||
- docker
|
|
||||||
|
|
||||||
env:
|
|
||||||
global:
|
|
||||||
- BUILDTAGS="seccomp apparmor selinux ambient"
|
|
||||||
|
|
||||||
before_install:
|
|
||||||
- sudo apt-get -qq update
|
|
||||||
- sudo apt-get install -y libseccomp-dev
|
|
||||||
- go get -u golang.org/x/lint/golint
|
|
||||||
- go get -u github.com/vbatts/git-validation
|
|
||||||
- env | grep TRAVIS_
|
|
||||||
|
|
||||||
script:
|
|
||||||
- git-validation -run DCO,short-subject -v
|
|
||||||
- make BUILDTAGS="${BUILDTAGS}"
|
|
||||||
- make BUILDTAGS="${BUILDTAGS}" clean ci cross
|
|
|
@ -0,0 +1,248 @@
|
||||||
|
# Changelog/
|
||||||
|
This file documents all notable changes made to this project since runc 1.0.
|
||||||
|
|
||||||
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [Unreleased]
|
||||||
|
|
||||||
|
## [1.1.0] - 2022-01-14
|
||||||
|
|
||||||
|
> A plan depends as much upon execution as it does upon concept.
|
||||||
|
|
||||||
|
## Changed
|
||||||
|
* libcontainer will now refuse to build without the nsenter package being
|
||||||
|
correctly compiled (specifically this requires CGO to be enabled). This
|
||||||
|
should avoid folks accidentally creating broken runc binaries (and
|
||||||
|
incorrectly importing our internal libraries into their projects). (#3331)
|
||||||
|
|
||||||
|
## [1.1.0-rc.1] - 2021-12-14
|
||||||
|
|
||||||
|
> He who controls the spice controls the universe.
|
||||||
|
|
||||||
|
### Deprecated
|
||||||
|
* runc run/start now warns if a new container cgroup is non-empty or frozen;
|
||||||
|
this warning will become an error in runc 1.2. (#3132, #3223)
|
||||||
|
* runc can only be built with Go 1.16 or later from this release onwards.
|
||||||
|
(#3100, #3245, #3325)
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
* `cgroup.GetHugePageSizes` has been removed entirely, and been replaced with
|
||||||
|
`cgroup.HugePageSizes` which is more efficient. (#3234)
|
||||||
|
* `intelrdt.GetIntelRdtPath` has been removed. Users who were using this
|
||||||
|
function to get the intelrdt root should use the new `intelrdt.Root`
|
||||||
|
instead. (#2920, #3239)
|
||||||
|
|
||||||
|
### Added
|
||||||
|
* Add support for RDMA cgroup added in Linux 4.11. (#2883)
|
||||||
|
* runc exec now produces exit code of 255 when the exec failed.
|
||||||
|
This may help in distinguishing between runc exec failures
|
||||||
|
(such as invalid options, non-running container or non-existent
|
||||||
|
binary etc.) and failures of the command being executed. (#3073)
|
||||||
|
* runc run: new `--keep` option to skip removal exited containers artefacts.
|
||||||
|
This might be useful to check the state (e.g. of cgroup controllers) after
|
||||||
|
the container hasexited. (#2817, #2825)
|
||||||
|
* seccomp: add support for `SCMP_ACT_KILL_PROCESS` and `SCMP_ACT_KILL_THREAD`
|
||||||
|
(the latter is just an alias for `SCMP_ACT_KILL`). (#3204)
|
||||||
|
* seccomp: add support for `SCMP_ACT_NOTIFY` (seccomp actions). This allows
|
||||||
|
users to create sophisticated seccomp filters where syscalls can be
|
||||||
|
efficiently emulated by privileged processes on the host. (#2682)
|
||||||
|
* checkpoint/restore: add an option (`--lsm-mount-context`) to set
|
||||||
|
a different LSM mount context on restore. (#3068)
|
||||||
|
* runc releases are now cross-compiled for several architectures. Static
|
||||||
|
builds for said architectures will be available for all future releases.
|
||||||
|
(#3197)
|
||||||
|
* intelrdt: support ClosID parameter. (#2920)
|
||||||
|
* runc exec --cgroup: an option to specify a (non-top) in-container cgroup
|
||||||
|
to use for the process being executed. (#3040, #3059)
|
||||||
|
* cgroup v1 controllers now support hybrid hierarchy (i.e. when on a cgroup v1
|
||||||
|
machine a cgroup2 filesystem is mounted to /sys/fs/cgroup/unified, runc
|
||||||
|
run/exec now adds the container to the appropriate cgroup under it). (#2087,
|
||||||
|
#3059)
|
||||||
|
* sysctl: allow slashes in sysctl names, to better match `sysctl(8)`'s
|
||||||
|
behaviour. (#3254, #3257)
|
||||||
|
* mounts: add support for bind-mounts which are inaccessible after switching
|
||||||
|
the user namespace. Note that this does not permit the container any
|
||||||
|
additional access to the host filesystem, it simply allows containers to
|
||||||
|
have bind-mounts configured for paths the user can access but have
|
||||||
|
restrictive access control settings for other users. (#2576)
|
||||||
|
* Add support for recursive mount attributes using `mount_setattr(2)`. These
|
||||||
|
have the same names as the proposed `mount(8)` options -- just prepend `r`
|
||||||
|
to the option name (such as `rro`). (#3272)
|
||||||
|
* Add `runc features` subcommand to allow runc users to detect what features
|
||||||
|
runc has been built with. This includes critical information such as
|
||||||
|
supported mount flags, hook names, and so on. Note that the output of this
|
||||||
|
command is subject to change and will not be considered stable until runc
|
||||||
|
1.2 at the earliest. The runtime-spec specification for this feature is
|
||||||
|
being developed in [opencontainers/runtime-spec#1130]. (#3296)
|
||||||
|
|
||||||
|
[opencontainers/runtime-spec#1130]: https://github.com/opencontainers/runtime-spec/pull/1130
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
* system: improve performance of `/proc/$pid/stat` parsing. (#2696)
|
||||||
|
* cgroup2: when `/sys/fs/cgroup` is configured as a read-write mount, change
|
||||||
|
the ownership of certain cgroup control files (as per
|
||||||
|
`/sys/kernel/cgroup/delegate`) to allow for proper deferral to the container
|
||||||
|
process. (#3057)
|
||||||
|
* docs: series of improvements to man pages to make them easier to read and
|
||||||
|
use. (#3032)
|
||||||
|
|
||||||
|
#### libcontainer API
|
||||||
|
* internal api: remove internal error types and handling system, switch to Go
|
||||||
|
wrapped errors. (#3033)
|
||||||
|
* New configs.Cgroup structure fields (#3177):
|
||||||
|
* Systemd (whether to use systemd cgroup manager); and
|
||||||
|
* Rootless (whether to use rootless cgroups).
|
||||||
|
* New cgroups/manager package aiming to simplify cgroup manager instantiation.
|
||||||
|
(#3177)
|
||||||
|
* All cgroup managers' instantiation methods now initialize cgroup paths and
|
||||||
|
can return errors. This allows to use any cgroup manager method (e.g.
|
||||||
|
Exists, Destroy, Set, GetStats) right after instantiation, which was not
|
||||||
|
possible before (as paths were initialized in Apply only). (#3178)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
* nsenter: do not try to close already-closed fds during container setup and
|
||||||
|
bail on close(2) failures. (#3058)
|
||||||
|
* runc checkpoint/restore: fixed for containers with an external bind mount
|
||||||
|
which destination is a symlink. (#3047).
|
||||||
|
* cgroup: improve openat2 handling for cgroup directory handle hardening.
|
||||||
|
(#3030)
|
||||||
|
* `runc delete -f` now succeeds (rather than timing out) on a paused
|
||||||
|
container. (#3134)
|
||||||
|
* runc run/start/exec now refuses a frozen cgroup (paused container in case of
|
||||||
|
exec). Users can disable this using `--ignore-paused`. (#3132, #3223)
|
||||||
|
* config: do not permit null bytes in mount fields. (#3287)
|
||||||
|
|
||||||
|
|
||||||
|
## [1.0.3] - 2021-12-06
|
||||||
|
|
||||||
|
> If you were waiting for the opportune moment, that was it.
|
||||||
|
|
||||||
|
### Security
|
||||||
|
* A potential vulnerability was discovered in runc (related to an internal
|
||||||
|
usage of netlink), however upon further investigation we discovered that
|
||||||
|
while this bug was exploitable on the master branch of runc, no released
|
||||||
|
version of runc could be exploited using this bug. The exploit required being
|
||||||
|
able to create a netlink attribute with a length that would overflow a uint16
|
||||||
|
but this was not possible in any released version of runc. For more
|
||||||
|
information, see [GHSA-v95c-p5hm-xq8f][] and CVE-2021-43784.
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
* Fixed inability to start a container with read-write bind mount of a
|
||||||
|
read-only fuse host mount. (#3283, #3292)
|
||||||
|
* Fixed inability to start when read-only /dev in set in spec (#3276, #3277)
|
||||||
|
* Fixed not removing sub-cgroups upon container delete, when rootless cgroup v2
|
||||||
|
is used with older systemd. (#3226, #3297)
|
||||||
|
* Fixed returning error from GetStats when hugetlb is unsupported (which causes
|
||||||
|
excessive logging for Kubernetes). (#3233, #3295)
|
||||||
|
* Improved an error message when dbus-user-session is not installed and
|
||||||
|
rootless + cgroup2 + systemd are used (#3212)
|
||||||
|
|
||||||
|
[GHSA-v95c-p5hm-xq8f]: https://github.com/opencontainers/runc/security/advisories/GHSA-v95c-p5hm-xq8f
|
||||||
|
|
||||||
|
|
||||||
|
## [1.0.2] - 2021-07-16
|
||||||
|
|
||||||
|
> Given the right lever, you can move a planet.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
* Made release builds reproducible from now on. (#3099, #3142)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
* Fixed a failure to set CPU quota period in some cases on cgroup v1. (#3090
|
||||||
|
#3115)
|
||||||
|
* Fixed the inability to start a container with the "adding seccomp filter
|
||||||
|
rule for syscall ..." error, caused by redundant seccomp rules (i.e. those
|
||||||
|
that has action equal to the default one). Such redundant rules are now
|
||||||
|
skipped. (#3109, #3129)
|
||||||
|
* Fixed a rare debug log race in runc init, which can result in occasional
|
||||||
|
harmful "failed to decode ..." errors from runc run or exec. (#3120, #3130)
|
||||||
|
* Fixed the check in cgroup v1 systemd manager if a container needs to be
|
||||||
|
frozen before Set, and add a setting to skip such freeze unconditionally.
|
||||||
|
The previous fix for that issue, done in runc 1.0.1, was not working.
|
||||||
|
(#3166, #3167)
|
||||||
|
|
||||||
|
|
||||||
|
## [1.0.1] - 2021-07-16
|
||||||
|
|
||||||
|
> If in doubt, Meriadoc, always follow your nose.
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
* Fixed occasional runc exec/run failure ("interrupted system call") on an
|
||||||
|
Azure volume. (#3045, #3074)
|
||||||
|
* Fixed "unable to find groups ... token too long" error with /etc/group
|
||||||
|
containing lines longer than 64K characters. (#3062, #3079)
|
||||||
|
* cgroup/systemd/v1: fix leaving cgroup frozen after Set if a parent cgroup is
|
||||||
|
frozen. This is a regression in 1.0.0, not affecting runc itself but some
|
||||||
|
of libcontainer users (e.g Kubernetes). (#3081, #3085)
|
||||||
|
* cgroupv2: bpf: Ignore inaccessible existing programs in case of
|
||||||
|
permission error when handling replacement of existing bpf cgroup
|
||||||
|
programs. This fixes a regression in 1.0.0, where some SELinux
|
||||||
|
policies would block runc from being able to run entirely. (#3055, #3087)
|
||||||
|
* cgroup/systemd/v2: don't freeze cgroup on Set. (#3067, #3092)
|
||||||
|
* cgroup/systemd/v1: avoid unnecessary freeze on Set. (#3082, #3093)
|
||||||
|
|
||||||
|
|
||||||
|
## [1.0.0] - 2021-06-22
|
||||||
|
|
||||||
|
> A wizard is never late, nor is he early, he arrives precisely when he means
|
||||||
|
> to.
|
||||||
|
|
||||||
|
As runc follows Semantic Versioning, we will endeavour to not make any
|
||||||
|
breaking changes without bumping the major version number of runc.
|
||||||
|
However, it should be noted that Go API usage of runc's internal
|
||||||
|
implementation (libcontainer) is *not* covered by this policy.
|
||||||
|
|
||||||
|
### Removed
|
||||||
|
* Removed libcontainer/configs.Device* identifiers (deprecated since rc94,
|
||||||
|
use libcontainer/devices). (#2999)
|
||||||
|
* Removed libcontainer/system.RunningInUserNS function (deprecated since
|
||||||
|
rc94, use libcontainer/userns). (#2999)
|
||||||
|
|
||||||
|
### Deprecated
|
||||||
|
* The usage of relative paths for mountpoints will now produce a warning
|
||||||
|
(such configurations are outside of the spec, and in future runc will
|
||||||
|
produce an error when given such configurations). (#2917, #3004)
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
* cgroupv2: devices: rework the filter generation to produce consistent
|
||||||
|
results with cgroupv1, and always clobber any existing eBPF
|
||||||
|
program(s) to fix `runc update` and avoid leaking eBPF programs
|
||||||
|
(resulting in errors when managing containers). (#2951)
|
||||||
|
* cgroupv2: correctly convert "number of IOs" statistics in a
|
||||||
|
cgroupv1-compatible way. (#2965, #2967, #2968, #2964)
|
||||||
|
* cgroupv2: support larger than 32-bit IO statistics on 32-bit architectures.
|
||||||
|
* cgroupv2: wait for freeze to finish before returning from the freezing
|
||||||
|
code, optimize the method for checking whether a cgroup is frozen. (#2955)
|
||||||
|
* cgroups/systemd: fixed "retry on dbus disconnect" logic introduced in rc94
|
||||||
|
* cgroups/systemd: fixed returning "unit already exists" error from a systemd
|
||||||
|
cgroup manager (regression in rc94) (#2997, #2996)
|
||||||
|
|
||||||
|
### Added
|
||||||
|
* cgroupv2: support SkipDevices with systemd driver. (#2958, #3019)
|
||||||
|
* cgroup1: blkio: support BFQ weights. (#3010)
|
||||||
|
* cgroupv2: set per-device io weights if BFQ IO scheduler is available.
|
||||||
|
(#3022)
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
* cgroup/systemd: return, not ignore, stop unit error from Destroy (#2946)
|
||||||
|
* Fix all golangci-lint failures. (#2781, #2962)
|
||||||
|
* Make `runc --version` output sane even when built with `go get` or
|
||||||
|
otherwise outside of our build scripts. (#2962)
|
||||||
|
* cgroups: set SkipDevices during runc update (so we don't modify
|
||||||
|
cgroups at all during `runc update`). (#2994)
|
||||||
|
|
||||||
|
<!-- minor releases -->
|
||||||
|
[Unreleased]: https://github.com/opencontainers/runc/compare/v1.1.0...HEAD
|
||||||
|
[1.1.0]: https://github.com/opencontainers/runc/compare/v1.1.0-rc.1...v1.1.0
|
||||||
|
[1.0.0]: https://github.com/opencontainers/runc/releases/tag/v1.0.0
|
||||||
|
|
||||||
|
<!-- 1.0.z patch releases -->
|
||||||
|
[Unreleased 1.0.z]: https://github.com/opencontainers/runc/compare/v1.0.3...release-1.0
|
||||||
|
[1.0.3]: https://github.com/opencontainers/runc/compare/v1.0.2...v1.0.3
|
||||||
|
[1.0.2]: https://github.com/opencontainers/runc/compare/v1.0.1...v1.0.2
|
||||||
|
[1.0.1]: https://github.com/opencontainers/runc/compare/v1.0.0...v1.0.1
|
||||||
|
|
||||||
|
<!-- 1.1.z patch releases -->
|
||||||
|
[Unreleased 1.1.z]: https://github.com/opencontainers/runc/compare/v1.1.0...release-1.1
|
||||||
|
[1.1.0-rc.1]: https://github.com/opencontainers/runc/compare/v1.0.0...v1.1.0-rc.1
|
96
Dockerfile
96
Dockerfile
|
@ -1,34 +1,41 @@
|
||||||
FROM golang:1.12-stretch
|
ARG GO_VERSION=1.17
|
||||||
|
ARG BATS_VERSION=v1.3.0
|
||||||
|
ARG LIBSECCOMP_VERSION=2.5.3
|
||||||
|
|
||||||
RUN dpkg --add-architecture armel \
|
FROM golang:${GO_VERSION}-bullseye
|
||||||
|
ARG DEBIAN_FRONTEND=noninteractive
|
||||||
|
ARG CRIU_REPO=https://download.opensuse.org/repositories/devel:/tools:/criu/Debian_11
|
||||||
|
|
||||||
|
RUN KEYFILE=/usr/share/keyrings/criu-repo-keyring.gpg; \
|
||||||
|
wget -nv $CRIU_REPO/Release.key -O- | gpg --dearmor > "$KEYFILE" \
|
||||||
|
&& echo "deb [signed-by=$KEYFILE] $CRIU_REPO/ /" > /etc/apt/sources.list.d/criu.list \
|
||||||
|
&& dpkg --add-architecture armel \
|
||||||
&& dpkg --add-architecture armhf \
|
&& dpkg --add-architecture armhf \
|
||||||
&& dpkg --add-architecture arm64 \
|
&& dpkg --add-architecture arm64 \
|
||||||
&& dpkg --add-architecture ppc64el \
|
&& dpkg --add-architecture ppc64el \
|
||||||
&& apt-get update && apt-get install -y \
|
&& apt-get update \
|
||||||
build-essential \
|
&& apt-get install -y --no-install-recommends \
|
||||||
curl \
|
build-essential \
|
||||||
sudo \
|
criu \
|
||||||
gawk \
|
crossbuild-essential-arm64 \
|
||||||
iptables \
|
crossbuild-essential-armel \
|
||||||
jq \
|
crossbuild-essential-armhf \
|
||||||
pkg-config \
|
crossbuild-essential-ppc64el \
|
||||||
libaio-dev \
|
crossbuild-essential-s390x \
|
||||||
libcap-dev \
|
curl \
|
||||||
libprotobuf-dev \
|
gawk \
|
||||||
libprotobuf-c0-dev \
|
gcc \
|
||||||
libnl-3-dev \
|
gperf \
|
||||||
libnet-dev \
|
iptables \
|
||||||
libseccomp2 \
|
jq \
|
||||||
libseccomp-dev \
|
kmod \
|
||||||
protobuf-c-compiler \
|
pkg-config \
|
||||||
protobuf-compiler \
|
python3-minimal \
|
||||||
python-minimal \
|
sshfs \
|
||||||
uidmap \
|
sudo \
|
||||||
kmod \
|
uidmap \
|
||||||
crossbuild-essential-armel crossbuild-essential-armhf crossbuild-essential-arm64 crossbuild-essential-ppc64el \
|
&& apt-get clean \
|
||||||
libseccomp-dev:armel libseccomp-dev:armhf libseccomp-dev:arm64 libseccomp-dev:ppc64el \
|
&& rm -rf /var/cache/apt /var/lib/apt/lists/* /etc/apt/sources.list.d/*.list
|
||||||
--no-install-recommends \
|
|
||||||
&& apt-get clean
|
|
||||||
|
|
||||||
# Add a dummy user for the rootless integration tests. While runC does
|
# Add a dummy user for the rootless integration tests. While runC does
|
||||||
# not require an entry in /etc/passwd to operate, one of the tests uses
|
# not require an entry in /etc/passwd to operate, one of the tests uses
|
||||||
|
@ -37,30 +44,21 @@ RUN dpkg --add-architecture armel \
|
||||||
RUN useradd -u1000 -m -d/home/rootless -s/bin/bash rootless
|
RUN useradd -u1000 -m -d/home/rootless -s/bin/bash rootless
|
||||||
|
|
||||||
# install bats
|
# install bats
|
||||||
|
ARG BATS_VERSION
|
||||||
RUN cd /tmp \
|
RUN cd /tmp \
|
||||||
&& git clone https://github.com/sstephenson/bats.git \
|
&& git clone https://github.com/bats-core/bats-core.git \
|
||||||
&& cd bats \
|
&& cd bats-core \
|
||||||
&& git reset --hard 03608115df2071fff4eaaff1605768c275e5f81f \
|
&& git reset --hard "${BATS_VERSION}" \
|
||||||
&& ./install.sh /usr/local \
|
&& ./install.sh /usr/local \
|
||||||
&& rm -rf /tmp/bats
|
&& rm -rf /tmp/bats-core
|
||||||
|
|
||||||
# install criu
|
# install libseccomp
|
||||||
ENV CRIU_VERSION v3.12
|
ARG LIBSECCOMP_VERSION
|
||||||
RUN mkdir -p /usr/src/criu \
|
COPY script/* /tmp/script/
|
||||||
&& curl -sSL https://github.com/checkpoint-restore/criu/archive/${CRIU_VERSION}.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1 \
|
RUN mkdir -p /opt/libseccomp \
|
||||||
&& cd /usr/src/criu \
|
&& /tmp/script/seccomp.sh "$LIBSECCOMP_VERSION" /opt/libseccomp arm64 armel armhf ppc64le s390x
|
||||||
&& make install-criu \
|
ENV LIBSECCOMP_VERSION=$LIBSECCOMP_VERSION
|
||||||
&& rm -rf /usr/src/criu
|
ENV LD_LIBRARY_PATH=/opt/libseccomp/lib
|
||||||
|
ENV PKG_CONFIG_PATH=/opt/libseccomp/lib/pkgconfig
|
||||||
|
|
||||||
# setup a playground for us to spawn containers in
|
|
||||||
ENV ROOTFS /busybox
|
|
||||||
RUN mkdir -p ${ROOTFS}
|
|
||||||
|
|
||||||
COPY script/tmpmount /
|
|
||||||
WORKDIR /go/src/github.com/opencontainers/runc
|
WORKDIR /go/src/github.com/opencontainers/runc
|
||||||
ENTRYPOINT ["/tmpmount"]
|
|
||||||
|
|
||||||
ADD . /go/src/github.com/opencontainers/runc
|
|
||||||
|
|
||||||
RUN . tests/integration/multi-arch.bash \
|
|
||||||
&& curl -o- -sSL `get_busybox` | tar xfJC - ${ROOTFS}
|
|
||||||
|
|
|
@ -0,0 +1,11 @@
|
||||||
|
## Emeritus ##
|
||||||
|
|
||||||
|
We would like to acknowledge previous runc maintainers and their huge
|
||||||
|
contributions to our collective success:
|
||||||
|
|
||||||
|
* Alexander Morozov (@lk4d4)
|
||||||
|
* Andrei Vagin (@avagin)
|
||||||
|
* Rohit Jnagal (@rjnagal)
|
||||||
|
* Victor Marmol (@vmarmol)
|
||||||
|
|
||||||
|
We thank these members for their service to the OCI community.
|
|
@ -1,5 +1,8 @@
|
||||||
Michael Crosby <michael@docker.com> (@crosbymichael)
|
Michael Crosby <michael@thepasture.io> (@crosbymichael)
|
||||||
Mrunal Patel <mpatel@redhat.com> (@mrunalp)
|
Mrunal Patel <mpatel@redhat.com> (@mrunalp)
|
||||||
Daniel, Dao Quang Minh <dqminh89@gmail.com> (@dqminh)
|
Daniel, Dao Quang Minh <dqminh89@gmail.com> (@dqminh)
|
||||||
Qiang Huang <h.huangqiang@huawei.com> (@hqhq)
|
Qiang Huang <h.huangqiang@huawei.com> (@hqhq)
|
||||||
Aleksa Sarai <asarai@suse.de> (@cyphar)
|
Aleksa Sarai <cyphar@cyphar.com> (@cyphar)
|
||||||
|
Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp> (@AkihiroSuda)
|
||||||
|
Kir Kolyshkin <kolyshkin@gmail.com> (@kolyshkin)
|
||||||
|
Sebastiaan van Stijn <github@gone.nl> (@thaJeztah)
|
||||||
|
|
175
Makefile
175
Makefile
|
@ -1,133 +1,158 @@
|
||||||
.PHONY: all shell dbuild man release \
|
|
||||||
localtest localunittest localintegration \
|
|
||||||
test unittest integration \
|
|
||||||
cross localcross
|
|
||||||
|
|
||||||
CONTAINER_ENGINE := docker
|
CONTAINER_ENGINE := docker
|
||||||
GO := go
|
GO ?= go
|
||||||
|
|
||||||
SOURCES := $(shell find . 2>&1 | grep -E '.*\.(c|h|go)$$')
|
PREFIX ?= /usr/local
|
||||||
PREFIX := $(DESTDIR)/usr/local
|
|
||||||
BINDIR := $(PREFIX)/sbin
|
BINDIR := $(PREFIX)/sbin
|
||||||
|
MANDIR := $(PREFIX)/share/man
|
||||||
|
|
||||||
GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
|
GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
|
||||||
GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g")
|
GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g")
|
||||||
RUNC_IMAGE := runc_dev$(if $(GIT_BRANCH_CLEAN),:$(GIT_BRANCH_CLEAN))
|
RUNC_IMAGE := runc_dev$(if $(GIT_BRANCH_CLEAN),:$(GIT_BRANCH_CLEAN))
|
||||||
PROJECT := github.com/opencontainers/runc
|
PROJECT := github.com/opencontainers/runc
|
||||||
BUILDTAGS ?= seccomp
|
BUILDTAGS ?= seccomp
|
||||||
COMMIT_NO := $(shell git rev-parse HEAD 2> /dev/null || true)
|
COMMIT ?= $(shell git describe --dirty --long --always)
|
||||||
COMMIT ?= $(if $(shell git status --porcelain --untracked-files=no),"${COMMIT_NO}-dirty","${COMMIT_NO}")
|
VERSION := $(shell cat ./VERSION)
|
||||||
|
|
||||||
MAN_DIR := $(CURDIR)/man/man8
|
ifeq ($(shell $(GO) env GOOS),linux)
|
||||||
MAN_PAGES = $(shell ls $(MAN_DIR)/*.8)
|
ifeq (,$(filter $(shell $(GO) env GOARCH),mips mipsle mips64 mips64le ppc64))
|
||||||
MAN_PAGES_BASE = $(notdir $(MAN_PAGES))
|
ifeq (,$(findstring -race,$(EXTRA_FLAGS)))
|
||||||
MAN_INSTALL_PATH := ${PREFIX}/share/man/man8/
|
GO_BUILDMODE := "-buildmode=pie"
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
GO_BUILD := $(GO) build -trimpath $(GO_BUILDMODE) $(EXTRA_FLAGS) -tags "$(BUILDTAGS)" \
|
||||||
|
-ldflags "-X main.gitCommit=$(COMMIT) -X main.version=$(VERSION) $(EXTRA_LDFLAGS)"
|
||||||
|
GO_BUILD_STATIC := CGO_ENABLED=1 $(GO) build -trimpath $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo osusergo" \
|
||||||
|
-ldflags "-extldflags -static -X main.gitCommit=$(COMMIT) -X main.version=$(VERSION) $(EXTRA_LDFLAGS)"
|
||||||
|
|
||||||
RELEASE_DIR := $(CURDIR)/release
|
GPG_KEYID ?= asarai@suse.de
|
||||||
|
|
||||||
VERSION := ${shell cat ./VERSION}
|
|
||||||
|
|
||||||
SHELL := $(shell command -v bash 2>/dev/null)
|
|
||||||
|
|
||||||
.DEFAULT: runc
|
.DEFAULT: runc
|
||||||
|
|
||||||
runc: $(SOURCES)
|
runc:
|
||||||
$(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc .
|
$(GO_BUILD) -o runc .
|
||||||
|
|
||||||
all: runc recvtty
|
all: runc recvtty sd-helper seccompagent
|
||||||
|
|
||||||
recvtty: contrib/cmd/recvtty/recvtty
|
recvtty sd-helper seccompagent:
|
||||||
|
$(GO_BUILD) -o contrib/cmd/$@/$@ ./contrib/cmd/$@
|
||||||
|
|
||||||
contrib/cmd/recvtty/recvtty: $(SOURCES)
|
static:
|
||||||
$(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty
|
$(GO_BUILD_STATIC) -o runc .
|
||||||
|
|
||||||
static: $(SOURCES)
|
releaseall: RELEASE_ARGS := "-a arm64 -a armel -a armhf -a ppc64le -a s390x"
|
||||||
CGO_ENABLED=1 $(GO) build $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo osusergo" -installsuffix netgo -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -o runc .
|
releaseall: release
|
||||||
CGO_ENABLED=1 $(GO) build $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo osusergo" -installsuffix netgo -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty
|
|
||||||
|
|
||||||
release:
|
release: runcimage
|
||||||
script/release.sh -r release/$(VERSION) -v $(VERSION)
|
$(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \
|
||||||
|
--rm -v $(CURDIR):/go/src/$(PROJECT) \
|
||||||
|
-e RELEASE_ARGS=$(RELEASE_ARGS) \
|
||||||
|
$(RUNC_IMAGE) make localrelease
|
||||||
|
script/release_sign.sh -S $(GPG_KEYID) -r release/$(VERSION) -v $(VERSION)
|
||||||
|
|
||||||
|
localrelease:
|
||||||
|
script/release_build.sh -r release/$(VERSION) -v $(VERSION) $(RELEASE_ARGS)
|
||||||
|
|
||||||
dbuild: runcimage
|
dbuild: runcimage
|
||||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} --rm -v $(CURDIR):/go/src/$(PROJECT) --privileged $(RUNC_IMAGE) make clean all
|
$(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \
|
||||||
|
--privileged --rm \
|
||||||
|
-v $(CURDIR):/go/src/$(PROJECT) \
|
||||||
|
$(RUNC_IMAGE) make clean all
|
||||||
|
|
||||||
lint:
|
lint:
|
||||||
$(GO) vet $(allpackages)
|
golangci-lint run ./...
|
||||||
$(GO) fmt $(allpackages)
|
|
||||||
|
|
||||||
man:
|
man:
|
||||||
man/md2man-all.sh
|
man/md2man-all.sh
|
||||||
|
|
||||||
runcimage:
|
runcimage:
|
||||||
$(CONTAINER_ENGINE) build ${CONTAINER_ENGINE_BUILD_FLAGS} -t $(RUNC_IMAGE) .
|
$(CONTAINER_ENGINE) build $(CONTAINER_ENGINE_BUILD_FLAGS) -t $(RUNC_IMAGE) .
|
||||||
|
|
||||||
test:
|
test: unittest integration rootlessintegration
|
||||||
make unittest integration rootlessintegration
|
|
||||||
|
|
||||||
localtest:
|
localtest: localunittest localintegration localrootlessintegration
|
||||||
make localunittest localintegration localrootlessintegration
|
|
||||||
|
|
||||||
unittest: runcimage
|
unittest: runcimage
|
||||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -t --privileged --rm -v /lib/modules:/lib/modules:ro -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localunittest TESTFLAGS=${TESTFLAGS}
|
$(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \
|
||||||
|
-t --privileged --rm \
|
||||||
|
-v /lib/modules:/lib/modules:ro \
|
||||||
|
-v $(CURDIR):/go/src/$(PROJECT) \
|
||||||
|
$(RUNC_IMAGE) make localunittest TESTFLAGS=$(TESTFLAGS)
|
||||||
|
|
||||||
localunittest: all
|
localunittest: all
|
||||||
$(GO) test -timeout 3m -tags "$(BUILDTAGS)" ${TESTFLAGS} -v $(allpackages)
|
$(GO) test -timeout 3m -tags "$(BUILDTAGS)" $(TESTFLAGS) -v ./...
|
||||||
|
|
||||||
integration: runcimage
|
integration: runcimage
|
||||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -t --privileged --rm -v /lib/modules:/lib/modules:ro -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localintegration TESTPATH=${TESTPATH}
|
$(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \
|
||||||
|
-t --privileged --rm \
|
||||||
|
-v /lib/modules:/lib/modules:ro \
|
||||||
|
-v $(CURDIR):/go/src/$(PROJECT) \
|
||||||
|
$(RUNC_IMAGE) make localintegration TESTPATH=$(TESTPATH)
|
||||||
|
|
||||||
localintegration: all
|
localintegration: all
|
||||||
bats -t tests/integration${TESTPATH}
|
bats -t tests/integration$(TESTPATH)
|
||||||
|
|
||||||
rootlessintegration: runcimage
|
rootlessintegration: runcimage
|
||||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localrootlessintegration
|
$(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \
|
||||||
|
-t --privileged --rm \
|
||||||
|
-v $(CURDIR):/go/src/$(PROJECT) \
|
||||||
|
-e ROOTLESS_TESTPATH \
|
||||||
|
$(RUNC_IMAGE) make localrootlessintegration
|
||||||
|
|
||||||
localrootlessintegration: all
|
localrootlessintegration: all
|
||||||
tests/rootless.sh
|
tests/rootless.sh
|
||||||
|
|
||||||
shell: runcimage
|
shell: runcimage
|
||||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) bash
|
$(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \
|
||||||
|
-ti --privileged --rm \
|
||||||
|
-v $(CURDIR):/go/src/$(PROJECT) \
|
||||||
|
$(RUNC_IMAGE) bash
|
||||||
|
|
||||||
install:
|
install:
|
||||||
install -D -m0755 runc $(BINDIR)/runc
|
install -D -m0755 runc $(DESTDIR)$(BINDIR)/runc
|
||||||
|
|
||||||
install-bash:
|
install-bash:
|
||||||
install -D -m0644 contrib/completions/bash/runc $(PREFIX)/share/bash-completion/completions/runc
|
install -D -m0644 contrib/completions/bash/runc $(DESTDIR)$(PREFIX)/share/bash-completion/completions/runc
|
||||||
|
|
||||||
install-man:
|
install-man: man
|
||||||
install -d -m 755 $(MAN_INSTALL_PATH)
|
install -d -m 755 $(DESTDIR)$(MANDIR)/man8
|
||||||
install -m 644 $(MAN_PAGES) $(MAN_INSTALL_PATH)
|
install -D -m 644 man/man8/*.8 $(DESTDIR)$(MANDIR)/man8
|
||||||
|
|
||||||
uninstall:
|
|
||||||
rm -f $(BINDIR)/runc
|
|
||||||
|
|
||||||
uninstall-bash:
|
|
||||||
rm -f $(PREFIX)/share/bash-completion/completions/runc
|
|
||||||
|
|
||||||
uninstall-man:
|
|
||||||
rm -f $(addprefix $(MAN_INSTALL_PATH),$(MAN_PAGES_BASE))
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f runc runc-*
|
rm -f runc runc-*
|
||||||
rm -f contrib/cmd/recvtty/recvtty
|
rm -f contrib/cmd/recvtty/recvtty
|
||||||
rm -rf $(RELEASE_DIR)
|
rm -f contrib/cmd/sd-helper/sd-helper
|
||||||
rm -rf $(MAN_DIR)
|
rm -f contrib/cmd/seccompagent/seccompagent
|
||||||
|
rm -rf release
|
||||||
|
rm -rf man/man8
|
||||||
|
|
||||||
validate:
|
cfmt: C_SRC=$(shell git ls-files '*.c' | grep -v '^vendor/')
|
||||||
script/validate-gofmt
|
cfmt:
|
||||||
script/validate-c
|
indent -linux -l120 -il0 -ppi2 -cp1 -T size_t -T jmp_buf $(C_SRC)
|
||||||
$(GO) vet $(allpackages)
|
|
||||||
|
|
||||||
ci: validate test release
|
shellcheck:
|
||||||
|
shellcheck tests/integration/*.bats tests/integration/*.sh \
|
||||||
|
tests/integration/*.bash tests/*.sh \
|
||||||
|
script/release_*.sh script/seccomp.sh script/lib.sh
|
||||||
|
# TODO: add shellcheck for more sh files
|
||||||
|
|
||||||
cross: runcimage
|
shfmt:
|
||||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -e BUILDTAGS="$(BUILDTAGS)" --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localcross
|
shfmt -ln bats -d -w tests/integration/*.bats
|
||||||
|
shfmt -ln bash -d -w man/*.sh script/* tests/*.sh tests/integration/*.bash
|
||||||
|
|
||||||
localcross:
|
vendor:
|
||||||
CGO_ENABLED=1 GOARCH=arm GOARM=6 CC=arm-linux-gnueabi-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-armel .
|
$(GO) mod tidy
|
||||||
CGO_ENABLED=1 GOARCH=arm GOARM=7 CC=arm-linux-gnueabihf-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-armhf .
|
$(GO) mod vendor
|
||||||
CGO_ENABLED=1 GOARCH=arm64 CC=aarch64-linux-gnu-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-arm64 .
|
$(GO) mod verify
|
||||||
CGO_ENABLED=1 GOARCH=ppc64le CC=powerpc64le-linux-gnu-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-ppc64le .
|
|
||||||
|
|
||||||
# memoize allpackages, so that it's executed only once and only if used
|
verify-dependencies: vendor
|
||||||
_allpackages = $(shell $(GO) list ./... | grep -v vendor)
|
@test -z "$$(git status --porcelain -- go.mod go.sum vendor/)" \
|
||||||
allpackages = $(if $(__allpackages),,$(eval __allpackages := $$(_allpackages)))$(__allpackages)
|
|| (echo -e "git status:\n $$(git status -- go.mod go.sum vendor/)\nerror: vendor/, go.mod and/or go.sum not up to date. Run \"make vendor\" to update"; exit 1) \
|
||||||
|
&& echo "all vendor files are up to date."
|
||||||
|
|
||||||
|
.PHONY: runc all recvtty sd-helper seccompagent static releaseall release \
|
||||||
|
localrelease dbuild lint man runcimage \
|
||||||
|
test localtest unittest localunittest integration localintegration \
|
||||||
|
rootlessintegration localrootlessintegration shell install install-bash \
|
||||||
|
install-man clean cfmt shfmt shellcheck \
|
||||||
|
vendor verify-dependencies
|
||||||
|
|
88
README.md
88
README.md
|
@ -1,39 +1,33 @@
|
||||||
# runc
|
# runc
|
||||||
|
|
||||||
[![Build Status](https://travis-ci.org/opencontainers/runc.svg?branch=master)](https://travis-ci.org/opencontainers/runc)
|
|
||||||
[![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runc)](https://goreportcard.com/report/github.com/opencontainers/runc)
|
[![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runc)](https://goreportcard.com/report/github.com/opencontainers/runc)
|
||||||
[![GoDoc](https://godoc.org/github.com/opencontainers/runc?status.svg)](https://godoc.org/github.com/opencontainers/runc)
|
[![GoDoc](https://godoc.org/github.com/opencontainers/runc?status.svg)](https://godoc.org/github.com/opencontainers/runc)
|
||||||
|
[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/588/badge)](https://bestpractices.coreinfrastructure.org/projects/588)
|
||||||
|
[![gha/validate](https://github.com/opencontainers/runc/workflows/validate/badge.svg)](https://github.com/opencontainers/runc/actions?query=workflow%3Avalidate)
|
||||||
|
[![gha/ci](https://github.com/opencontainers/runc/workflows/ci/badge.svg)](https://github.com/opencontainers/runc/actions?query=workflow%3Aci)
|
||||||
|
|
||||||
## Introduction
|
## Introduction
|
||||||
|
|
||||||
`runc` is a CLI tool for spawning and running containers according to the OCI specification.
|
`runc` is a CLI tool for spawning and running containers on Linux according to the OCI specification.
|
||||||
|
|
||||||
## Releases
|
## Releases
|
||||||
|
|
||||||
`runc` depends on and tracks the [runtime-spec](https://github.com/opencontainers/runtime-spec) repository.
|
|
||||||
We will try to make sure that `runc` and the OCI specification major versions stay in lockstep.
|
|
||||||
This means that `runc` 1.0.0 should implement the 1.0 version of the specification.
|
|
||||||
|
|
||||||
You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page.
|
You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page.
|
||||||
|
|
||||||
Currently, the following features are not considered to be production-ready:
|
|
||||||
|
|
||||||
* Support for cgroup v2
|
|
||||||
|
|
||||||
## Security
|
## Security
|
||||||
|
|
||||||
The reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/).
|
The reporting process and disclosure communications are outlined [here](https://github.com/opencontainers/org/blob/master/SECURITY.md).
|
||||||
|
|
||||||
|
### Security Audit
|
||||||
|
A third party security audit was performed by Cure53, you can see the full report [here](https://github.com/opencontainers/runc/blob/master/docs/Security-Audit.pdf).
|
||||||
|
|
||||||
## Building
|
## Building
|
||||||
|
|
||||||
`runc` currently supports the Linux platform with various architecture support.
|
`runc` only supports Linux. It must be built with Go version 1.16 or higher.
|
||||||
It must be built with Go version 1.6 or higher in order for some features to function properly.
|
|
||||||
|
|
||||||
In order to enable seccomp support you will need to install `libseccomp` on your platform.
|
In order to enable seccomp support you will need to install `libseccomp` on your platform.
|
||||||
> e.g. `libseccomp-devel` for CentOS, or `libseccomp-dev` for Ubuntu
|
> e.g. `libseccomp-devel` for CentOS, or `libseccomp-dev` for Ubuntu
|
||||||
|
|
||||||
Otherwise, if you do not want to build `runc` with seccomp support you can add `BUILDTAGS=""` when running make.
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# create a 'github.com/opencontainers' in your GOPATH/src
|
# create a 'github.com/opencontainers' in your GOPATH/src
|
||||||
cd github.com/opencontainers
|
cd github.com/opencontainers
|
||||||
|
@ -58,21 +52,24 @@ sudo make install
|
||||||
|
|
||||||
#### Build Tags
|
#### Build Tags
|
||||||
|
|
||||||
`runc` supports optional build tags for compiling support of various features.
|
`runc` supports optional build tags for compiling support of various features,
|
||||||
To add build tags to the make option the `BUILDTAGS` variable must be set.
|
with some of them enabled by default (see `BUILDTAGS` in top-level `Makefile`).
|
||||||
|
|
||||||
|
To change build tags from the default, set the `BUILDTAGS` variable for make,
|
||||||
|
e.g. to disable seccomp:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
make BUILDTAGS='seccomp apparmor'
|
make BUILDTAGS=""
|
||||||
```
|
```
|
||||||
|
|
||||||
| Build Tag | Feature | Dependency |
|
| Build Tag | Feature | Enabled by default | Dependency |
|
||||||
|-----------|------------------------------------|-------------|
|
|-----------|------------------------------------|--------------------|------------|
|
||||||
| seccomp | Syscall filtering | libseccomp |
|
| seccomp | Syscall filtering | yes | libseccomp |
|
||||||
| selinux | selinux process and mount labeling | <none> |
|
|
||||||
| apparmor | apparmor profile support | <none> |
|
|
||||||
| ambient | ambient capability support | kernel 4.3 |
|
|
||||||
| nokmem | disable kernel memory account | <none> |
|
|
||||||
|
|
||||||
|
The following build tags were used earlier, but are now obsoleted:
|
||||||
|
- **nokmem** (since runc v1.0.0-rc94 kernel memory settings are ignored)
|
||||||
|
- **apparmor** (since runc v1.0.0-rc93 the feature is always enabled)
|
||||||
|
- **selinux** (since runc v1.0.0-rc93 the feature is always enabled)
|
||||||
|
|
||||||
### Running the test suite
|
### Running the test suite
|
||||||
|
|
||||||
|
@ -97,20 +94,41 @@ You can run a specific integration test by setting the `TESTPATH` variable.
|
||||||
# make test TESTPATH="/checkpoint.bats"
|
# make test TESTPATH="/checkpoint.bats"
|
||||||
```
|
```
|
||||||
|
|
||||||
You can run a test in your proxy environment by setting `DOCKER_BUILD_PROXY` and `DOCKER_RUN_PROXY` variables.
|
You can run a specific rootless integration test by setting the `ROOTLESS_TESTPATH` variable.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# make test DOCKER_BUILD_PROXY="--build-arg HTTP_PROXY=http://yourproxy/" DOCKER_RUN_PROXY="-e HTTP_PROXY=http://yourproxy/"
|
# make test ROOTLESS_TESTPATH="/checkpoint.bats"
|
||||||
|
```
|
||||||
|
|
||||||
|
You can run a test using your container engine's flags by setting `CONTAINER_ENGINE_BUILD_FLAGS` and `CONTAINER_ENGINE_RUN_FLAGS` variables.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# make test CONTAINER_ENGINE_BUILD_FLAGS="--build-arg http_proxy=http://yourproxy/" CONTAINER_ENGINE_RUN_FLAGS="-e http_proxy=http://yourproxy/"
|
||||||
```
|
```
|
||||||
|
|
||||||
### Dependencies Management
|
### Dependencies Management
|
||||||
|
|
||||||
`runc` uses [vndr](https://github.com/LK4D4/vndr) for dependencies management.
|
`runc` uses [Go Modules](https://github.com/golang/go/wiki/Modules) for dependencies management.
|
||||||
Please refer to [vndr](https://github.com/LK4D4/vndr) for how to add or update
|
Please refer to [Go Modules](https://github.com/golang/go/wiki/Modules) for how to add or update
|
||||||
new dependencies.
|
new dependencies.
|
||||||
|
|
||||||
|
```
|
||||||
|
# Update vendored dependencies
|
||||||
|
make vendor
|
||||||
|
# Verify all dependencies
|
||||||
|
make verify-dependencies
|
||||||
|
```
|
||||||
|
|
||||||
## Using runc
|
## Using runc
|
||||||
|
|
||||||
|
Please note that runc is a low level tool not designed with an end user
|
||||||
|
in mind. It is mostly employed by other higher level container software.
|
||||||
|
|
||||||
|
Therefore, unless there is some specific use case that prevents the use
|
||||||
|
of tools like Docker or Podman, it is not recommended to use runc directly.
|
||||||
|
|
||||||
|
If you still want to use runc, here's how.
|
||||||
|
|
||||||
### Creating an OCI Bundle
|
### Creating an OCI Bundle
|
||||||
|
|
||||||
In order to use runc you must have your container in the format of an OCI bundle.
|
In order to use runc you must have your container in the format of an OCI bundle.
|
||||||
|
@ -152,7 +170,9 @@ If you used the unmodified `runc spec` template this should give you a `sh` sess
|
||||||
|
|
||||||
The second way to start a container is using the specs lifecycle operations.
|
The second way to start a container is using the specs lifecycle operations.
|
||||||
This gives you more power over how the container is created and managed while it is running.
|
This gives you more power over how the container is created and managed while it is running.
|
||||||
This will also launch the container in the background so you will have to edit the `config.json` to remove the `terminal` setting for the simple examples here.
|
This will also launch the container in the background so you will have to edit
|
||||||
|
the `config.json` to remove the `terminal` setting for the simple examples
|
||||||
|
below (see more details about [runc terminal handling](docs/terminals.md)).
|
||||||
Your process field in the `config.json` should look like this below with `"terminal": false` and `"args": ["sleep", "5"]`.
|
Your process field in the `config.json` should look like this below with `"terminal": false` and `"args": ["sleep", "5"]`.
|
||||||
|
|
||||||
|
|
||||||
|
@ -275,6 +295,14 @@ PIDFile=/run/mycontainerid.pid
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## More documentation
|
||||||
|
|
||||||
|
* [cgroup v2](./docs/cgroup-v2.md)
|
||||||
|
* [Checkpoint and restore](./docs/checkpoint-restore.md)
|
||||||
|
* [systemd cgroup driver](./docs/systemd.md)
|
||||||
|
* [Terminals and standard IO](./docs/terminals.md)
|
||||||
|
* [Experimental features](./docs/experimental.md)
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
The code and docs are released under the [Apache 2.0 license](LICENSE).
|
The code and docs are released under the [Apache 2.0 license](LICENSE).
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
# Security
|
# Security
|
||||||
|
|
||||||
The reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/).
|
The reporting process and disclosure communications are outlined [here](https://github.com/opencontainers/org/blob/master/SECURITY.md).
|
||||||
|
|
|
@ -0,0 +1,52 @@
|
||||||
|
# -*- mode: ruby -*-
|
||||||
|
# vi: set ft=ruby :
|
||||||
|
|
||||||
|
Vagrant.configure("2") do |config|
|
||||||
|
# Fedora box is used for testing cgroup v2 support
|
||||||
|
config.vm.box = "fedora/35-cloud-base"
|
||||||
|
config.vm.provider :virtualbox do |v|
|
||||||
|
v.memory = 2048
|
||||||
|
v.cpus = 2
|
||||||
|
end
|
||||||
|
config.vm.provider :libvirt do |v|
|
||||||
|
v.memory = 2048
|
||||||
|
v.cpus = 2
|
||||||
|
end
|
||||||
|
config.vm.provision "shell", inline: <<-SHELL
|
||||||
|
set -e -u -o pipefail
|
||||||
|
# Work around dnf mirror failures by retrying a few times
|
||||||
|
for i in $(seq 0 2); do
|
||||||
|
sleep $i
|
||||||
|
# "config exclude" dnf shell command is not working in Fedora 35
|
||||||
|
# (see https://bugzilla.redhat.com/show_bug.cgi?id=2022571);
|
||||||
|
# the workaround is to specify it as an option.
|
||||||
|
cat << EOF | dnf -y --exclude=kernel,kernel-core shell && break
|
||||||
|
config install_weak_deps false
|
||||||
|
update
|
||||||
|
install iptables gcc make golang-go glibc-static libseccomp-devel bats jq git-core criu fuse-sshfs
|
||||||
|
ts run
|
||||||
|
EOF
|
||||||
|
done
|
||||||
|
dnf clean all
|
||||||
|
|
||||||
|
# Add a user for rootless tests
|
||||||
|
useradd -u2000 -m -d/home/rootless -s/bin/bash rootless
|
||||||
|
|
||||||
|
# Allow root and rootless itself to execute `ssh rootless@localhost` in tests/rootless.sh
|
||||||
|
ssh-keygen -t ecdsa -N "" -f /root/rootless.key
|
||||||
|
mkdir -m 0700 -p /home/rootless/.ssh
|
||||||
|
cp /root/rootless.key /home/rootless/.ssh/id_ecdsa
|
||||||
|
cat /root/rootless.key.pub >> /home/rootless/.ssh/authorized_keys
|
||||||
|
chown -R rootless.rootless /home/rootless
|
||||||
|
|
||||||
|
# Delegate cgroup v2 controllers to rootless user via --systemd-cgroup
|
||||||
|
mkdir -p /etc/systemd/system/user@.service.d
|
||||||
|
cat > /etc/systemd/system/user@.service.d/delegate.conf << EOF
|
||||||
|
[Service]
|
||||||
|
# default: Delegate=pids memory
|
||||||
|
# NOTE: delegation of cpuset requires systemd >= 244 (Fedora >= 32, Ubuntu >= 20.04).
|
||||||
|
Delegate=yes
|
||||||
|
EOF
|
||||||
|
systemctl daemon-reload
|
||||||
|
SHELL
|
||||||
|
end
|
|
@ -1,19 +1,19 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"net"
|
||||||
"os"
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
|
||||||
|
|
||||||
|
criu "github.com/checkpoint-restore/go-criu/v5/rpc"
|
||||||
"github.com/opencontainers/runc/libcontainer"
|
"github.com/opencontainers/runc/libcontainer"
|
||||||
"github.com/opencontainers/runc/libcontainer/system"
|
"github.com/opencontainers/runc/libcontainer/userns"
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/urfave/cli"
|
"github.com/urfave/cli"
|
||||||
|
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -34,7 +34,7 @@ checkpointed.`,
|
||||||
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
|
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
|
||||||
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
|
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
|
||||||
cli.BoolFlag{Name: "lazy-pages", Usage: "use userfaultfd to lazily restore memory pages"},
|
cli.BoolFlag{Name: "lazy-pages", Usage: "use userfaultfd to lazily restore memory pages"},
|
||||||
cli.StringFlag{Name: "status-fd", Value: "", Usage: "criu writes \\0 to this FD once lazy-pages is ready"},
|
cli.IntFlag{Name: "status-fd", Value: -1, Usage: "criu writes \\0 to this FD once lazy-pages is ready"},
|
||||||
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
|
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
|
||||||
cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
|
cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
|
||||||
cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"},
|
cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"},
|
||||||
|
@ -47,7 +47,7 @@ checkpointed.`,
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// XXX: Currently this is untested with rootless containers.
|
// XXX: Currently this is untested with rootless containers.
|
||||||
if os.Geteuid() != 0 || system.RunningInUserNS() {
|
if os.Geteuid() != 0 || userns.RunningInUserNS() {
|
||||||
logrus.Warn("runc checkpoint is untested with rootless containers")
|
logrus.Warn("runc checkpoint is untested with rootless containers")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,10 +60,13 @@ checkpointed.`,
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if status == libcontainer.Created || status == libcontainer.Stopped {
|
if status == libcontainer.Created || status == libcontainer.Stopped {
|
||||||
fatalf("Container cannot be checkpointed in %s state", status.String())
|
fatal(fmt.Errorf("Container cannot be checkpointed in %s state", status.String()))
|
||||||
}
|
}
|
||||||
defer destroy(container)
|
|
||||||
options := criuOptions(context)
|
options := criuOptions(context)
|
||||||
|
if !(options.LeaveRunning || options.PreDump) {
|
||||||
|
// destroy container unless we tell CRIU to keep it
|
||||||
|
defer destroy(container)
|
||||||
|
}
|
||||||
// these are the mandatory criu options for a container
|
// these are the mandatory criu options for a container
|
||||||
setPageServer(context, options)
|
setPageServer(context, options)
|
||||||
setManageCgroupsMode(context, options)
|
setManageCgroupsMode(context, options)
|
||||||
|
@ -74,28 +77,53 @@ checkpointed.`,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func getCheckpointImagePath(context *cli.Context) string {
|
func prepareImagePaths(context *cli.Context) (string, string, error) {
|
||||||
imagePath := context.String("image-path")
|
imagePath := context.String("image-path")
|
||||||
if imagePath == "" {
|
if imagePath == "" {
|
||||||
imagePath = getDefaultImagePath(context)
|
imagePath = getDefaultImagePath()
|
||||||
}
|
}
|
||||||
return imagePath
|
|
||||||
|
if err := os.MkdirAll(imagePath, 0o600); err != nil {
|
||||||
|
return "", "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
parentPath := context.String("parent-path")
|
||||||
|
if parentPath == "" {
|
||||||
|
return imagePath, parentPath, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if filepath.IsAbs(parentPath) {
|
||||||
|
return "", "", errors.New("--parent-path must be relative")
|
||||||
|
}
|
||||||
|
|
||||||
|
realParent := filepath.Join(imagePath, parentPath)
|
||||||
|
fi, err := os.Stat(realParent)
|
||||||
|
if err == nil && !fi.IsDir() {
|
||||||
|
err = &os.PathError{Path: realParent, Err: unix.ENOTDIR}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return "", "", fmt.Errorf("invalid --parent-path: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return imagePath, parentPath, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func setPageServer(context *cli.Context, options *libcontainer.CriuOpts) {
|
func setPageServer(context *cli.Context, options *libcontainer.CriuOpts) {
|
||||||
// xxx following criu opts are optional
|
// xxx following criu opts are optional
|
||||||
// The dump image can be sent to a criu page server
|
// The dump image can be sent to a criu page server
|
||||||
if psOpt := context.String("page-server"); psOpt != "" {
|
if psOpt := context.String("page-server"); psOpt != "" {
|
||||||
addressPort := strings.Split(psOpt, ":")
|
address, port, err := net.SplitHostPort(psOpt)
|
||||||
if len(addressPort) != 2 {
|
|
||||||
fatal(fmt.Errorf("Use --page-server ADDRESS:PORT to specify page server"))
|
if err != nil || address == "" || port == "" {
|
||||||
|
fatal(errors.New("Use --page-server ADDRESS:PORT to specify page server"))
|
||||||
}
|
}
|
||||||
portInt, err := strconv.Atoi(addressPort[1])
|
portInt, err := strconv.Atoi(port)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fatal(fmt.Errorf("Invalid port number"))
|
fatal(errors.New("Invalid port number"))
|
||||||
}
|
}
|
||||||
options.PageServer = libcontainer.CriuPageServerInfo{
|
options.PageServer = libcontainer.CriuPageServerInfo{
|
||||||
Address: addressPort[0],
|
Address: address,
|
||||||
Port: int32(portInt),
|
Port: int32(portInt),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -105,13 +133,13 @@ func setManageCgroupsMode(context *cli.Context, options *libcontainer.CriuOpts)
|
||||||
if cgOpt := context.String("manage-cgroups-mode"); cgOpt != "" {
|
if cgOpt := context.String("manage-cgroups-mode"); cgOpt != "" {
|
||||||
switch cgOpt {
|
switch cgOpt {
|
||||||
case "soft":
|
case "soft":
|
||||||
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_SOFT
|
options.ManageCgroupsMode = criu.CriuCgMode_SOFT
|
||||||
case "full":
|
case "full":
|
||||||
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_FULL
|
options.ManageCgroupsMode = criu.CriuCgMode_FULL
|
||||||
case "strict":
|
case "strict":
|
||||||
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_STRICT
|
options.ManageCgroupsMode = criu.CriuCgMode_STRICT
|
||||||
default:
|
default:
|
||||||
fatal(fmt.Errorf("Invalid manage cgroups mode"))
|
fatal(errors.New("Invalid manage cgroups mode"))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,12 +17,13 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
|
||||||
"net"
|
"net"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/containerd/console"
|
"github.com/containerd/console"
|
||||||
"github.com/opencontainers/runc/libcontainer/utils"
|
"github.com/opencontainers/runc/libcontainer/utils"
|
||||||
|
@ -65,7 +66,7 @@ func bail(err error) {
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
func handleSingle(path string) error {
|
func handleSingle(path string, noStdin bool) error {
|
||||||
// Open a socket.
|
// Open a socket.
|
||||||
ln, err := net.Listen("unix", path)
|
ln, err := net.Listen("unix", path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -87,7 +88,7 @@ func handleSingle(path string) error {
|
||||||
// Get the fd of the connection.
|
// Get the fd of the connection.
|
||||||
unixconn, ok := conn.(*net.UnixConn)
|
unixconn, ok := conn.(*net.UnixConn)
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("failed to cast to unixconn")
|
return errors.New("failed to cast to unixconn")
|
||||||
}
|
}
|
||||||
|
|
||||||
socket, err := unixconn.File()
|
socket, err := unixconn.File()
|
||||||
|
@ -105,23 +106,37 @@ func handleSingle(path string) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
console.ClearONLCR(c.Fd())
|
if err := console.ClearONLCR(c.Fd()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// Copy from our stdio to the master fd.
|
// Copy from our stdio to the master fd.
|
||||||
quitChan := make(chan struct{})
|
var (
|
||||||
|
wg sync.WaitGroup
|
||||||
|
inErr, outErr error
|
||||||
|
)
|
||||||
|
wg.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
io.Copy(os.Stdout, c)
|
_, outErr = io.Copy(os.Stdout, c)
|
||||||
quitChan <- struct{}{}
|
wg.Done()
|
||||||
}()
|
|
||||||
go func() {
|
|
||||||
io.Copy(c, os.Stdin)
|
|
||||||
quitChan <- struct{}{}
|
|
||||||
}()
|
}()
|
||||||
|
if !noStdin {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
_, inErr = io.Copy(c, os.Stdin)
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
// Only close the master fd once we've stopped copying.
|
// Only close the master fd once we've stopped copying.
|
||||||
<-quitChan
|
wg.Wait()
|
||||||
c.Close()
|
c.Close()
|
||||||
return nil
|
|
||||||
|
if outErr != nil {
|
||||||
|
return outErr
|
||||||
|
}
|
||||||
|
|
||||||
|
return inErr
|
||||||
}
|
}
|
||||||
|
|
||||||
func handleNull(path string) error {
|
func handleNull(path string) error {
|
||||||
|
@ -161,15 +176,7 @@ func handleNull(path string) error {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Just do a dumb copy to /dev/null.
|
_, _ = io.Copy(io.Discard, master)
|
||||||
devnull, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
|
|
||||||
if err != nil {
|
|
||||||
// TODO: Handle this nicely.
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
io.Copy(devnull, master)
|
|
||||||
devnull.Close()
|
|
||||||
}(conn)
|
}(conn)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -185,7 +192,7 @@ func main() {
|
||||||
v = append(v, version)
|
v = append(v, version)
|
||||||
}
|
}
|
||||||
if gitCommit != "" {
|
if gitCommit != "" {
|
||||||
v = append(v, fmt.Sprintf("commit: %s", gitCommit))
|
v = append(v, "commit: "+gitCommit)
|
||||||
}
|
}
|
||||||
app.Version = strings.Join(v, "\n")
|
app.Version = strings.Join(v, "\n")
|
||||||
|
|
||||||
|
@ -201,26 +208,31 @@ func main() {
|
||||||
Value: "",
|
Value: "",
|
||||||
Usage: "Path to write daemon process ID to",
|
Usage: "Path to write daemon process ID to",
|
||||||
},
|
},
|
||||||
|
cli.BoolFlag{
|
||||||
|
Name: "no-stdin",
|
||||||
|
Usage: "Disable stdin handling (no-op for null mode)",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
app.Action = func(ctx *cli.Context) error {
|
app.Action = func(ctx *cli.Context) error {
|
||||||
args := ctx.Args()
|
args := ctx.Args()
|
||||||
if len(args) != 1 {
|
if len(args) != 1 {
|
||||||
return fmt.Errorf("need to specify a single socket path")
|
return errors.New("need to specify a single socket path")
|
||||||
}
|
}
|
||||||
path := ctx.Args()[0]
|
path := ctx.Args()[0]
|
||||||
|
|
||||||
pidPath := ctx.String("pid-file")
|
pidPath := ctx.String("pid-file")
|
||||||
if pidPath != "" {
|
if pidPath != "" {
|
||||||
pid := fmt.Sprintf("%d\n", os.Getpid())
|
pid := fmt.Sprintf("%d\n", os.Getpid())
|
||||||
if err := ioutil.WriteFile(pidPath, []byte(pid), 0644); err != nil {
|
if err := os.WriteFile(pidPath, []byte(pid), 0o644); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
noStdin := ctx.Bool("no-stdin")
|
||||||
switch ctx.String("mode") {
|
switch ctx.String("mode") {
|
||||||
case "single":
|
case "single":
|
||||||
if err := handleSingle(path); err != nil {
|
if err := handleSingle(path, noStdin); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
case "null":
|
case "null":
|
||||||
|
|
|
@ -0,0 +1,86 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
func usage() {
|
||||||
|
fmt.Print(`Open Container Initiative contrib/cmd/sd-helper
|
||||||
|
|
||||||
|
sd-helper is a tool that uses runc/libcontainer/cgroups/systemd package
|
||||||
|
functionality to communicate to systemd in order to perform various operations.
|
||||||
|
Currently this is limited to starting and stopping systemd transient slice
|
||||||
|
units.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
sd-helper [-debug] [-parent <pname>] {start|stop} <name>
|
||||||
|
|
||||||
|
Example:
|
||||||
|
sd-helper -parent system.slice start system-pod123.slice
|
||||||
|
`)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
debug = flag.Bool("debug", false, "enable debug output")
|
||||||
|
parent = flag.String("parent", "", "parent unit name")
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
if !systemd.IsRunningSystemd() {
|
||||||
|
logrus.Fatal("systemd is required")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set the flags.
|
||||||
|
flag.Parse()
|
||||||
|
if *debug {
|
||||||
|
logrus.SetLevel(logrus.DebugLevel)
|
||||||
|
}
|
||||||
|
if flag.NArg() != 2 {
|
||||||
|
usage()
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd := flag.Arg(0)
|
||||||
|
unit := flag.Arg(1)
|
||||||
|
|
||||||
|
err := unitCommand(cmd, unit, *parent)
|
||||||
|
if err != nil {
|
||||||
|
logrus.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newManager(config *configs.Cgroup) (cgroups.Manager, error) {
|
||||||
|
if cgroups.IsCgroup2UnifiedMode() {
|
||||||
|
return systemd.NewUnifiedManager(config, "")
|
||||||
|
}
|
||||||
|
return systemd.NewLegacyManager(config, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func unitCommand(cmd, name, parent string) error {
|
||||||
|
podConfig := &configs.Cgroup{
|
||||||
|
Name: name,
|
||||||
|
Parent: parent,
|
||||||
|
Resources: &configs.Resources{},
|
||||||
|
}
|
||||||
|
pm, err := newManager(podConfig)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch cmd {
|
||||||
|
case "start":
|
||||||
|
return pm.Apply(-1)
|
||||||
|
case "stop":
|
||||||
|
return pm.Destroy()
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Errorf("unknown command: %s", cmd)
|
||||||
|
}
|
|
@ -0,0 +1,70 @@
|
||||||
|
# Seccomp Agent
|
||||||
|
|
||||||
|
## Warning
|
||||||
|
|
||||||
|
Please note this is an example agent, as such it is possible that specially
|
||||||
|
crafted messages can produce bad behaviour. Please use it as an example only.
|
||||||
|
|
||||||
|
Also, this agent is used for integration tests. Be aware that changing the
|
||||||
|
behaviour can break the integration tests.
|
||||||
|
|
||||||
|
## Get started
|
||||||
|
|
||||||
|
Compile runc and seccompagent:
|
||||||
|
```bash
|
||||||
|
make all
|
||||||
|
```
|
||||||
|
|
||||||
|
Run the seccomp agent in the background:
|
||||||
|
```bash
|
||||||
|
sudo ./contrib/cmd/seccompagent/seccompagent &
|
||||||
|
```
|
||||||
|
|
||||||
|
Prepare a container:
|
||||||
|
```bash
|
||||||
|
mkdir container-seccomp-notify
|
||||||
|
cd container-seccomp-notify
|
||||||
|
mkdir rootfs
|
||||||
|
docker export $(docker create busybox) | tar -C rootfs -xvf -
|
||||||
|
```
|
||||||
|
|
||||||
|
Then, generate a config.json by running the script gen-seccomp-example-cfg.sh
|
||||||
|
from the directory where this README.md is in the container directory you
|
||||||
|
prepared earlier (`container-seccomp-notify`).
|
||||||
|
|
||||||
|
Then start the container:
|
||||||
|
```bash
|
||||||
|
runc run mycontainerid
|
||||||
|
```
|
||||||
|
|
||||||
|
The container will output something like this:
|
||||||
|
```bash
|
||||||
|
+ cd /dev/shm
|
||||||
|
+ mkdir test-dir
|
||||||
|
+ touch test-file
|
||||||
|
+ chmod 777 test-file
|
||||||
|
chmod: changing permissions of 'test-file': No medium found
|
||||||
|
+ stat /dev/shm/test-dir-foo
|
||||||
|
File: /dev/shm/test-dir-foo
|
||||||
|
Size: 40 Blocks: 0 IO Block: 4096 directory
|
||||||
|
Device: 3eh/62d Inode: 2 Links: 2
|
||||||
|
Access: (0755/drwxr-xr-x) Uid: ( 0/ root) Gid: ( 0/ root)
|
||||||
|
Access: 2021-09-09 15:03:13.043716040 +0000
|
||||||
|
Modify: 2021-09-09 15:03:13.043716040 +0000
|
||||||
|
Change: 2021-09-09 15:03:13.043716040 +0000
|
||||||
|
Birth: -
|
||||||
|
+ ls -l /dev/shm
|
||||||
|
total 0
|
||||||
|
drwxr-xr-x 2 root root 40 Sep 9 15:03 test-dir-foo
|
||||||
|
-rw-r--r-- 1 root root 0 Sep 9 15:03 test-file
|
||||||
|
+ echo Note the agent added a suffix for the directory name and chmod fails
|
||||||
|
Note the agent added a suffix for the directory name and chmod fails
|
||||||
|
```
|
||||||
|
|
||||||
|
This shows a simple example that runs in /dev/shm just because it is a tmpfs in
|
||||||
|
the example config.json.
|
||||||
|
|
||||||
|
The agent makes all chmod calls fail with ENOMEDIUM, as the example output shows.
|
||||||
|
|
||||||
|
For mkdir, the agent adds a "-foo" suffix: the container runs "mkdir test-dir"
|
||||||
|
but the directory created is "test-dir-foo".
|
|
@ -0,0 +1,35 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# Detect if we are running inside bats (i.e. inside integration tests) or just
|
||||||
|
# called by an end-user
|
||||||
|
# bats-core v1.2.1 defines BATS_RUN_TMPDIR
|
||||||
|
if [ -z "$BATS_RUN_TMPDIR" ]; then
|
||||||
|
# When not running in bats, we create the config.json
|
||||||
|
set -e
|
||||||
|
runc spec
|
||||||
|
fi
|
||||||
|
|
||||||
|
# We can't source $(dirname $0)/../../../tests/integration/helpers.bash as that
|
||||||
|
# exits when not running inside bats. We can do hacks, but just to redefine
|
||||||
|
# update_config() seems clearer. We don't even really need to keep them in sync.
|
||||||
|
function update_config() {
|
||||||
|
jq "$1" "./config.json" | awk 'BEGIN{RS="";getline<"-";print>ARGV[1]}' "./config.json"
|
||||||
|
}
|
||||||
|
|
||||||
|
update_config '.linux.seccomp = {
|
||||||
|
"defaultAction": "SCMP_ACT_ALLOW",
|
||||||
|
"listenerPath": "/run/seccomp-agent.socket",
|
||||||
|
"listenerMetadata": "foo",
|
||||||
|
"architectures": [ "SCMP_ARCH_X86", "SCMP_ARCH_X32", "SCMP_ARCH_X86_64" ],
|
||||||
|
"syscalls": [
|
||||||
|
{
|
||||||
|
"names": [ "chmod", "fchmod", "fchmodat", "mkdir" ],
|
||||||
|
"action": "SCMP_ACT_NOTIFY"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
|
||||||
|
update_config '.process.args = [
|
||||||
|
"sh",
|
||||||
|
"-c",
|
||||||
|
"set -x; cd /dev/shm; mkdir test-dir; touch test-file; chmod 777 test-file; stat /dev/shm/test-dir-foo && ls -l /dev/shm && echo \"Note the agent added a suffix for the directory name and chmod fails\" "
|
||||||
|
]'
|
|
@ -0,0 +1,291 @@
|
||||||
|
//go:build linux && seccomp
|
||||||
|
// +build linux,seccomp
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"net"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
securejoin "github.com/cyphar/filepath-securejoin"
|
||||||
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
|
libseccomp "github.com/seccomp/libseccomp-golang"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
socketFile string
|
||||||
|
pidFile string
|
||||||
|
)
|
||||||
|
|
||||||
|
func closeStateFds(recvFds []int) {
|
||||||
|
for i := range recvFds {
|
||||||
|
unix.Close(i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseStateFds returns the seccomp-fd and closes the rest of the fds in recvFds.
|
||||||
|
// In case of error, no fd is closed.
|
||||||
|
// StateFds is assumed to be formatted as specs.ContainerProcessState.Fds and
|
||||||
|
// recvFds the corresponding list of received fds in the same SCM_RIGHT message.
|
||||||
|
func parseStateFds(stateFds []string, recvFds []int) (uintptr, error) {
|
||||||
|
// Let's find the index in stateFds of the seccomp-fd.
|
||||||
|
idx := -1
|
||||||
|
err := false
|
||||||
|
|
||||||
|
for i, name := range stateFds {
|
||||||
|
if name == specs.SeccompFdName && idx == -1 {
|
||||||
|
idx = i
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// We found the seccompFdName twice. Error out!
|
||||||
|
if name == specs.SeccompFdName && idx != -1 {
|
||||||
|
err = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if idx == -1 || err {
|
||||||
|
return 0, errors.New("seccomp fd not found or malformed containerProcessState.Fds")
|
||||||
|
}
|
||||||
|
|
||||||
|
if idx >= len(recvFds) || idx < 0 {
|
||||||
|
return 0, errors.New("seccomp fd index out of range")
|
||||||
|
}
|
||||||
|
|
||||||
|
fd := uintptr(recvFds[idx])
|
||||||
|
|
||||||
|
for i := range recvFds {
|
||||||
|
if i == idx {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
unix.Close(recvFds[i])
|
||||||
|
}
|
||||||
|
|
||||||
|
return fd, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func handleNewMessage(sockfd int) (uintptr, string, error) {
|
||||||
|
const maxNameLen = 4096
|
||||||
|
stateBuf := make([]byte, maxNameLen)
|
||||||
|
oobSpace := unix.CmsgSpace(4)
|
||||||
|
oob := make([]byte, oobSpace)
|
||||||
|
|
||||||
|
n, oobn, _, _, err := unix.Recvmsg(sockfd, stateBuf, oob, 0)
|
||||||
|
if err != nil {
|
||||||
|
return 0, "", err
|
||||||
|
}
|
||||||
|
if n >= maxNameLen || oobn != oobSpace {
|
||||||
|
return 0, "", fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Truncate.
|
||||||
|
stateBuf = stateBuf[:n]
|
||||||
|
oob = oob[:oobn]
|
||||||
|
|
||||||
|
scms, err := unix.ParseSocketControlMessage(oob)
|
||||||
|
if err != nil {
|
||||||
|
return 0, "", err
|
||||||
|
}
|
||||||
|
if len(scms) != 1 {
|
||||||
|
return 0, "", fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
|
||||||
|
}
|
||||||
|
scm := scms[0]
|
||||||
|
|
||||||
|
fds, err := unix.ParseUnixRights(&scm)
|
||||||
|
if err != nil {
|
||||||
|
return 0, "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
containerProcessState := &specs.ContainerProcessState{}
|
||||||
|
err = json.Unmarshal(stateBuf, containerProcessState)
|
||||||
|
if err != nil {
|
||||||
|
closeStateFds(fds)
|
||||||
|
return 0, "", fmt.Errorf("cannot parse OCI state: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fd, err := parseStateFds(containerProcessState.Fds, fds)
|
||||||
|
if err != nil {
|
||||||
|
closeStateFds(fds)
|
||||||
|
return 0, "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return fd, containerProcessState.Metadata, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func readArgString(pid uint32, offset int64) (string, error) {
|
||||||
|
buffer := make([]byte, 4096) // PATH_MAX
|
||||||
|
|
||||||
|
memfd, err := unix.Open(fmt.Sprintf("/proc/%d/mem", pid), unix.O_RDONLY, 0o777)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer unix.Close(memfd)
|
||||||
|
|
||||||
|
_, err = unix.Pread(memfd, buffer, offset)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer[len(buffer)-1] = 0
|
||||||
|
s := buffer[:bytes.IndexByte(buffer, 0)]
|
||||||
|
return string(s), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func runMkdirForContainer(pid uint32, fileName string, mode uint32, metadata string) error {
|
||||||
|
// We validated before that metadata is not a string that can make
|
||||||
|
// newFile a file in a different location other than root.
|
||||||
|
newFile := fmt.Sprintf("%s-%s", fileName, metadata)
|
||||||
|
root := fmt.Sprintf("/proc/%d/cwd/", pid)
|
||||||
|
|
||||||
|
if strings.HasPrefix(fileName, "/") {
|
||||||
|
// If it starts with /, use the rootfs as base
|
||||||
|
root = fmt.Sprintf("/proc/%d/root/", pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
path, err := securejoin.SecureJoin(root, newFile)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return unix.Mkdir(path, mode)
|
||||||
|
}
|
||||||
|
|
||||||
|
// notifHandler handles seccomp notifications and responses
|
||||||
|
func notifHandler(fd libseccomp.ScmpFd, metadata string) {
|
||||||
|
defer unix.Close(int(fd))
|
||||||
|
for {
|
||||||
|
req, err := libseccomp.NotifReceive(fd)
|
||||||
|
if err != nil {
|
||||||
|
logrus.Errorf("Error in NotifReceive(): %s", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
syscallName, err := req.Data.Syscall.GetName()
|
||||||
|
if err != nil {
|
||||||
|
logrus.Errorf("Error decoding syscall %v(): %s", req.Data.Syscall, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
logrus.Debugf("Received syscall %q, pid %v, arch %q, args %+v", syscallName, req.Pid, req.Data.Arch, req.Data.Args)
|
||||||
|
|
||||||
|
resp := &libseccomp.ScmpNotifResp{
|
||||||
|
ID: req.ID,
|
||||||
|
Error: 0,
|
||||||
|
Val: 0,
|
||||||
|
Flags: libseccomp.NotifRespFlagContinue,
|
||||||
|
}
|
||||||
|
|
||||||
|
// TOCTOU check
|
||||||
|
if err := libseccomp.NotifIDValid(fd, req.ID); err != nil {
|
||||||
|
logrus.Errorf("TOCTOU check failed: req.ID is no longer valid: %s", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
switch syscallName {
|
||||||
|
case "mkdir":
|
||||||
|
fileName, err := readArgString(req.Pid, int64(req.Data.Args[0]))
|
||||||
|
if err != nil {
|
||||||
|
logrus.Errorf("Cannot read argument: %s", err)
|
||||||
|
resp.Error = int32(unix.ENOSYS)
|
||||||
|
resp.Val = ^uint64(0) // -1
|
||||||
|
goto sendResponse
|
||||||
|
}
|
||||||
|
|
||||||
|
logrus.Debugf("mkdir: %q", fileName)
|
||||||
|
|
||||||
|
// TOCTOU check
|
||||||
|
if err := libseccomp.NotifIDValid(fd, req.ID); err != nil {
|
||||||
|
logrus.Errorf("TOCTOU check failed: req.ID is no longer valid: %s", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
err = runMkdirForContainer(req.Pid, fileName, uint32(req.Data.Args[1]), metadata)
|
||||||
|
if err != nil {
|
||||||
|
resp.Error = int32(unix.ENOSYS)
|
||||||
|
resp.Val = ^uint64(0) // -1
|
||||||
|
}
|
||||||
|
resp.Flags = 0
|
||||||
|
case "chmod", "fchmod", "fchmodat":
|
||||||
|
resp.Error = int32(unix.ENOMEDIUM)
|
||||||
|
resp.Val = ^uint64(0) // -1
|
||||||
|
resp.Flags = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
sendResponse:
|
||||||
|
if err = libseccomp.NotifRespond(fd, resp); err != nil {
|
||||||
|
logrus.Errorf("Error in notification response: %s", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
flag.StringVar(&socketFile, "socketfile", "/run/seccomp-agent.socket", "Socket file")
|
||||||
|
flag.StringVar(&pidFile, "pid-file", "", "Pid file")
|
||||||
|
logrus.SetLevel(logrus.DebugLevel)
|
||||||
|
|
||||||
|
// Parse arguments
|
||||||
|
flag.Parse()
|
||||||
|
if flag.NArg() > 0 {
|
||||||
|
flag.PrintDefaults()
|
||||||
|
logrus.Fatal("Invalid command")
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.Remove(socketFile); err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
logrus.Fatalf("Cannot cleanup socket file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if pidFile != "" {
|
||||||
|
pid := fmt.Sprintf("%d", os.Getpid())
|
||||||
|
if err := os.WriteFile(pidFile, []byte(pid), 0o644); err != nil {
|
||||||
|
logrus.Fatalf("Cannot write pid file: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
logrus.Info("Waiting for seccomp file descriptors")
|
||||||
|
l, err := net.Listen("unix", socketFile)
|
||||||
|
if err != nil {
|
||||||
|
logrus.Fatalf("Cannot listen: %s", err)
|
||||||
|
}
|
||||||
|
defer l.Close()
|
||||||
|
|
||||||
|
for {
|
||||||
|
conn, err := l.Accept()
|
||||||
|
if err != nil {
|
||||||
|
logrus.Errorf("Cannot accept connection: %s", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
socket, err := conn.(*net.UnixConn).File()
|
||||||
|
conn.Close()
|
||||||
|
if err != nil {
|
||||||
|
logrus.Errorf("Cannot get socket: %v", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
newFd, metadata, err := handleNewMessage(int(socket.Fd()))
|
||||||
|
socket.Close()
|
||||||
|
if err != nil {
|
||||||
|
logrus.Errorf("Error receiving seccomp file descriptor: %v", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure we don't allow strings like "/../p", as that means
|
||||||
|
// a file in a different location than expected. We just want
|
||||||
|
// safe things to use as a suffix for a file name.
|
||||||
|
metadata = filepath.Base(metadata)
|
||||||
|
if strings.Contains(metadata, "/") {
|
||||||
|
// Fallback to a safe string.
|
||||||
|
metadata = "agent-generated-suffix"
|
||||||
|
}
|
||||||
|
|
||||||
|
logrus.Infof("Received new seccomp fd: %v", newFd)
|
||||||
|
go notifHandler(libseccomp.ScmpFd(newFd), metadata)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,10 @@
|
||||||
|
//go:build !linux || !seccomp
|
||||||
|
// +build !linux !seccomp
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
fmt.Println("Not supported, to use this compile with build tag: seccomp.")
|
||||||
|
}
|
|
@ -113,6 +113,8 @@ __runc_complete_capabilities() {
|
||||||
AUDIT_WRITE
|
AUDIT_WRITE
|
||||||
AUDIT_READ
|
AUDIT_READ
|
||||||
BLOCK_SUSPEND
|
BLOCK_SUSPEND
|
||||||
|
BPF
|
||||||
|
CHECKPOINT_RESTORE
|
||||||
CHOWN
|
CHOWN
|
||||||
DAC_OVERRIDE
|
DAC_OVERRIDE
|
||||||
DAC_READ_SEARCH
|
DAC_READ_SEARCH
|
||||||
|
@ -130,6 +132,7 @@ __runc_complete_capabilities() {
|
||||||
NET_BIND_SERVICE
|
NET_BIND_SERVICE
|
||||||
NET_BROADCAST
|
NET_BROADCAST
|
||||||
NET_RAW
|
NET_RAW
|
||||||
|
PERFMON
|
||||||
SETFCAP
|
SETFCAP
|
||||||
SETGID
|
SETGID
|
||||||
SETPCAP
|
SETPCAP
|
||||||
|
@ -170,6 +173,7 @@ _runc_exec() {
|
||||||
--apparmor
|
--apparmor
|
||||||
--cap, -c
|
--cap, -c
|
||||||
--preserve-fds
|
--preserve-fds
|
||||||
|
--ignore-paused
|
||||||
"
|
"
|
||||||
|
|
||||||
local all_options="$options_with_args $boolean_options"
|
local all_options="$options_with_args $boolean_options"
|
||||||
|
@ -221,6 +225,7 @@ _runc_runc() {
|
||||||
--help
|
--help
|
||||||
--version -v
|
--version -v
|
||||||
--debug
|
--debug
|
||||||
|
--systemd-cgroup
|
||||||
"
|
"
|
||||||
local options_with_args="
|
local options_with_args="
|
||||||
--log
|
--log
|
||||||
|
@ -733,8 +738,6 @@ _runc_update() {
|
||||||
--cpu-share
|
--cpu-share
|
||||||
--cpuset-cpus
|
--cpuset-cpus
|
||||||
--cpuset-mems
|
--cpuset-mems
|
||||||
--kernel-memory
|
|
||||||
--kernel-memory-tcp
|
|
||||||
--memory
|
--memory
|
||||||
--memory-reservation
|
--memory-reservation
|
||||||
--memory-swap
|
--memory-swap
|
||||||
|
@ -769,7 +772,6 @@ _runc() {
|
||||||
delete
|
delete
|
||||||
events
|
events
|
||||||
exec
|
exec
|
||||||
init
|
|
||||||
kill
|
kill
|
||||||
list
|
list
|
||||||
pause
|
pause
|
||||||
|
|
21
create.go
21
create.go
|
@ -1,6 +1,7 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"github.com/urfave/cli"
|
"github.com/urfave/cli"
|
||||||
|
@ -55,20 +56,12 @@ command(s) that get executed on start, edit the args parameter of the spec. See
|
||||||
if err := checkArgs(context, 1, exactArgs); err != nil {
|
if err := checkArgs(context, 1, exactArgs); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := revisePidFile(context); err != nil {
|
status, err := startContainer(context, CT_ACT_CREATE, nil)
|
||||||
return err
|
if err == nil {
|
||||||
|
// exit with the container's exit status so any external supervisor
|
||||||
|
// is notified of the exit with the correct exit status.
|
||||||
|
os.Exit(status)
|
||||||
}
|
}
|
||||||
spec, err := setupSpec(context)
|
return fmt.Errorf("runc create failed: %w", err)
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
status, err := startContainer(context, spec, CT_ACT_CREATE, nil)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// exit with the container's exit status so any external supervisor is
|
|
||||||
// notified of the exit with the correct exit status.
|
|
||||||
os.Exit(status)
|
|
||||||
return nil
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,9 @@
|
||||||
|
runc (1.1.0-ok1) yangtze; urgency=medium
|
||||||
|
|
||||||
|
* Merge new upstream version 1.1.0
|
||||||
|
|
||||||
|
-- Luoyaoming <luoyaoming@kylinos.cn> Fri, 30 Dec 2022 11:11:29 +0800
|
||||||
|
|
||||||
runc (1.0.0~rc10-ok2) yangtze; urgency=medium
|
runc (1.0.0~rc10-ok2) yangtze; urgency=medium
|
||||||
|
|
||||||
* Update version.
|
* Update version.
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
From: Dmitry Smirnov <onlyjob@debian.org>
|
||||||
|
Date: Thu, 28 Jul 2022 16:28:22 +0800
|
||||||
|
Subject: fix FTBFS on i686
|
||||||
|
|
||||||
|
src/github.com/opencontainers/runc/libcontainer/user/user_test.go:448:36: constant 2147483648 overflows int
|
||||||
|
Last-Update: 2018-06-16
|
||||||
|
Forwarded: https://github.com/opencontainers/runc/pull/1821
|
||||||
|
Bug-Upstream: https://github.com/opencontainers/runc/issues/941
|
||||||
|
---
|
||||||
|
libcontainer/user/user.go | 2 +-
|
||||||
|
libcontainer/user/user_test.go | 2 +-
|
||||||
|
2 files changed, 2 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/libcontainer/user/user.go b/libcontainer/user/user.go
|
||||||
|
index 7b912bb..38caded 100644
|
||||||
|
--- a/libcontainer/user/user.go
|
||||||
|
+++ b/libcontainer/user/user.go
|
||||||
|
@@ -473,7 +473,7 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err
|
||||||
|
return nil, fmt.Errorf("Unable to find group %s", ag)
|
||||||
|
}
|
||||||
|
// Ensure gid is inside gid range.
|
||||||
|
- if gid < minId || gid > maxId {
|
||||||
|
+ if gid < minId || gid >= maxId {
|
||||||
|
return nil, ErrRange
|
||||||
|
}
|
||||||
|
gidMap[gid] = struct{}{}
|
||||||
|
diff --git a/libcontainer/user/user_test.go b/libcontainer/user/user_test.go
|
||||||
|
index 24ee559..a4aabdc 100644
|
||||||
|
--- a/libcontainer/user/user_test.go
|
||||||
|
+++ b/libcontainer/user/user_test.go
|
||||||
|
@@ -445,7 +445,7 @@ this is just some garbage data
|
||||||
|
if utils.GetIntSize() > 4 {
|
||||||
|
tests = append(tests, foo{
|
||||||
|
// groups with too large id
|
||||||
|
- groups: []string{strconv.Itoa(1 << 31)},
|
||||||
|
+ groups: []string{strconv.Itoa( 1<<31 -1 )},
|
||||||
|
expected: nil,
|
||||||
|
hasError: true,
|
||||||
|
})
|
|
@ -0,0 +1,48 @@
|
||||||
|
From: Dmitry Smirnov <onlyjob@debian.org>
|
||||||
|
Date: Thu, 28 Jul 2022 16:28:22 +0800
|
||||||
|
Subject: disabled unreliable tests due to random failures on [ppc64el,
|
||||||
|
s390x].
|
||||||
|
|
||||||
|
Last-Update: 2018-09-27
|
||||||
|
Forwarded: not-needed
|
||||||
|
Bug-Upstream: https://github.com/opencontainers/runc/issues/1822
|
||||||
|
---
|
||||||
|
libcontainer/cgroups/fs/hugetlb_test.go | 4 ++++
|
||||||
|
1 file changed, 4 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/libcontainer/cgroups/fs/hugetlb_test.go b/libcontainer/cgroups/fs/hugetlb_test.go
|
||||||
|
index 9ddacfe..9b60650 100644
|
||||||
|
--- a/libcontainer/cgroups/fs/hugetlb_test.go
|
||||||
|
+++ b/libcontainer/cgroups/fs/hugetlb_test.go
|
||||||
|
@@ -89,6 +89,7 @@ func TestHugetlbStats(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHugetlbStatsNoUsageFile(t *testing.T) {
|
||||||
|
+t.Skip("Disabled unreliable test")
|
||||||
|
helper := NewCgroupTestUtil("hugetlb", t)
|
||||||
|
defer helper.cleanup()
|
||||||
|
helper.writeFileContents(map[string]string{
|
||||||
|
@@ -104,6 +105,7 @@ func TestHugetlbStatsNoUsageFile(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHugetlbStatsNoMaxUsageFile(t *testing.T) {
|
||||||
|
+t.Skip("Disabled unreliable test")
|
||||||
|
helper := NewCgroupTestUtil("hugetlb", t)
|
||||||
|
defer helper.cleanup()
|
||||||
|
for _, pageSize := range HugePageSizes {
|
||||||
|
@@ -121,6 +123,7 @@ func TestHugetlbStatsNoMaxUsageFile(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHugetlbStatsBadUsageFile(t *testing.T) {
|
||||||
|
+t.Skip("Disabled unreliable test")
|
||||||
|
helper := NewCgroupTestUtil("hugetlb", t)
|
||||||
|
defer helper.cleanup()
|
||||||
|
for _, pageSize := range HugePageSizes {
|
||||||
|
@@ -139,6 +142,7 @@ func TestHugetlbStatsBadUsageFile(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestHugetlbStatsBadMaxUsageFile(t *testing.T) {
|
||||||
|
+t.Skip("Disabled unreliable test")
|
||||||
|
helper := NewCgroupTestUtil("hugetlb", t)
|
||||||
|
defer helper.cleanup()
|
||||||
|
helper.writeFileContents(map[string]string{
|
|
@ -0,0 +1,22 @@
|
||||||
|
From: Dmitry Smirnov <onlyjob@debian.org>
|
||||||
|
Date: Thu, 28 Jul 2022 16:28:22 +0800
|
||||||
|
Subject: disable test (requires root)
|
||||||
|
|
||||||
|
Last-Update: 2018-06-15
|
||||||
|
Forwarded: not-needed
|
||||||
|
---
|
||||||
|
libcontainer/factory_linux_test.go | 1 +
|
||||||
|
1 file changed, 1 insertion(+)
|
||||||
|
|
||||||
|
diff --git a/libcontainer/factory_linux_test.go b/libcontainer/factory_linux_test.go
|
||||||
|
index 8d0ca8a..1dc0180 100644
|
||||||
|
--- a/libcontainer/factory_linux_test.go
|
||||||
|
+++ b/libcontainer/factory_linux_test.go
|
||||||
|
@@ -78,6 +78,7 @@ func TestFactoryNewIntelRdt(t *testing.T) {
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFactoryNewTmpfs(t *testing.T) {
|
||||||
|
+t.Skip("DM - skipping privileged test")
|
||||||
|
root, rerr := newTestRoot()
|
||||||
|
if rerr != nil {
|
||||||
|
t.Fatal(rerr)
|
12
delete.go
12
delete.go
|
@ -1,12 +1,10 @@
|
||||||
// +build !solaris
|
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"syscall"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer"
|
"github.com/opencontainers/runc/libcontainer"
|
||||||
|
@ -19,12 +17,12 @@ func killContainer(container libcontainer.Container) error {
|
||||||
_ = container.Signal(unix.SIGKILL, false)
|
_ = container.Signal(unix.SIGKILL, false)
|
||||||
for i := 0; i < 100; i++ {
|
for i := 0; i < 100; i++ {
|
||||||
time.Sleep(100 * time.Millisecond)
|
time.Sleep(100 * time.Millisecond)
|
||||||
if err := container.Signal(syscall.Signal(0), false); err != nil {
|
if err := container.Signal(unix.Signal(0), false); err != nil {
|
||||||
destroy(container)
|
destroy(container)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return fmt.Errorf("container init still running")
|
return errors.New("container init still running")
|
||||||
}
|
}
|
||||||
|
|
||||||
var deleteCommand = cli.Command{
|
var deleteCommand = cli.Command{
|
||||||
|
@ -55,7 +53,7 @@ status of "ubuntu01" as "stopped" the following will delete resources held for
|
||||||
force := context.Bool("force")
|
force := context.Bool("force")
|
||||||
container, err := getContainer(context)
|
container, err := getContainer(context)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if lerr, ok := err.(libcontainer.Error); ok && lerr.Code() == libcontainer.ContainerNotExists {
|
if errors.Is(err, libcontainer.ErrNotExist) {
|
||||||
// if there was an aborted start or something of the sort then the container's directory could exist but
|
// if there was an aborted start or something of the sort then the container's directory could exist but
|
||||||
// libcontainer does not see it because the state.json file inside that directory was never created.
|
// libcontainer does not see it because the state.json file inside that directory was never created.
|
||||||
path := filepath.Join(context.GlobalString("root"), id)
|
path := filepath.Join(context.GlobalString("root"), id)
|
||||||
|
@ -81,7 +79,7 @@ status of "ubuntu01" as "stopped" the following will delete resources held for
|
||||||
if force {
|
if force {
|
||||||
return killContainer(container)
|
return killContainer(container)
|
||||||
}
|
}
|
||||||
return fmt.Errorf("cannot delete container %s that is not stopped: %s\n", id, s)
|
return fmt.Errorf("cannot delete container %s that is not stopped: %s", id, s)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|
Binary file not shown.
|
@ -0,0 +1,62 @@
|
||||||
|
# cgroup v2
|
||||||
|
|
||||||
|
runc fully supports cgroup v2 (unified mode) since v1.0.0-rc93.
|
||||||
|
|
||||||
|
To use cgroup v2, you might need to change the configuration of the host init system.
|
||||||
|
Fedora (>= 31) uses cgroup v2 by default and no extra configuration is required.
|
||||||
|
On other systemd-based distros, cgroup v2 can be enabled by adding `systemd.unified_cgroup_hierarchy=1` to the kernel cmdline.
|
||||||
|
|
||||||
|
## Am I using cgroup v2?
|
||||||
|
|
||||||
|
Yes if `/sys/fs/cgroup/cgroup.controllers` is present.
|
||||||
|
|
||||||
|
## Host Requirements
|
||||||
|
### Kernel
|
||||||
|
* Recommended version: 5.2 or later
|
||||||
|
* Minimum version: 4.15
|
||||||
|
|
||||||
|
Kernel older than 5.2 is not recommended due to lack of freezer.
|
||||||
|
|
||||||
|
Notably, kernel older than 4.15 MUST NOT be used (unless you are running containers with user namespaces), as it lacks support for controlling permissions of devices.
|
||||||
|
|
||||||
|
### Systemd
|
||||||
|
On cgroup v2 hosts, it is highly recommended to run runc with the systemd cgroup driver (`runc --systemd-cgroup`), though not mandatory.
|
||||||
|
|
||||||
|
The recommended systemd version is 244 or later. Older systemd does not support delegation of `cpuset` controller.
|
||||||
|
|
||||||
|
Make sure you also have the `dbus-user-session` (Debian/Ubuntu) or `dbus-daemon` (CentOS/Fedora) package installed, and that `dbus` is running. On Debian-flavored distros, this can be accomplished like so:
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ sudo apt install -y dbus-user-session
|
||||||
|
$ systemctl --user start dbus
|
||||||
|
```
|
||||||
|
|
||||||
|
## Rootless
|
||||||
|
On cgroup v2 hosts, rootless runc can talk to systemd to get cgroup permissions to be delegated.
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ runc spec --rootless
|
||||||
|
$ jq '.linux.cgroupsPath="user.slice:runc:foo"' config.json | sponge config.json
|
||||||
|
$ runc --systemd-cgroup run foo
|
||||||
|
```
|
||||||
|
|
||||||
|
The container processes are executed in a cgroup like `/user.slice/user-$(id -u).slice/user@$(id -u).service/user.slice/runc-foo.scope`.
|
||||||
|
|
||||||
|
### Configuring delegation
|
||||||
|
Typically, only `memory` and `pids` controllers are delegated to non-root users by default.
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ cat /sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/cgroup.controllers
|
||||||
|
memory pids
|
||||||
|
```
|
||||||
|
|
||||||
|
To allow delegation of other controllers, you need to change the systemd configuration as follows:
|
||||||
|
|
||||||
|
```console
|
||||||
|
# mkdir -p /etc/systemd/system/user@.service.d
|
||||||
|
# cat > /etc/systemd/system/user@.service.d/delegate.conf << EOF
|
||||||
|
[Service]
|
||||||
|
Delegate=cpu cpuset io memory pids
|
||||||
|
EOF
|
||||||
|
# systemctl daemon-reload
|
||||||
|
```
|
|
@ -0,0 +1,11 @@
|
||||||
|
# Experimental features
|
||||||
|
|
||||||
|
The following features are experimental and subject to change:
|
||||||
|
|
||||||
|
- The `runc features` command (since runc v1.1.0)
|
||||||
|
|
||||||
|
The following features were experimental in the past:
|
||||||
|
|
||||||
|
Feature | Experimental release | Graduation release
|
||||||
|
---------------------------------------- | -------------------- | ------------------
|
||||||
|
cgroup v2 | v1.0.0-rc91 | v1.0.0-rc93
|
|
@ -0,0 +1,130 @@
|
||||||
|
## systemd cgroup driver
|
||||||
|
|
||||||
|
By default, runc creates cgroups and sets cgroup limits on its own (this mode
|
||||||
|
is known as fs cgroup driver). When `--systemd-cgroup` global option is given
|
||||||
|
(as in e.g. `runc --systemd-cgroup run ...`), runc switches to systemd cgroup
|
||||||
|
driver. This document describes its features and peculiarities.
|
||||||
|
|
||||||
|
### systemd unit name and placement
|
||||||
|
|
||||||
|
When creating a container, runc requests systemd (over dbus) to create
|
||||||
|
a transient unit for the container, and place it into a specified slice.
|
||||||
|
|
||||||
|
The name of the unit and the containing slice is derived from the container
|
||||||
|
runtime spec in the following way:
|
||||||
|
|
||||||
|
1. If `Linux.CgroupsPath` is set, it is expected to be in the form
|
||||||
|
`[slice]:[prefix]:[name]`.
|
||||||
|
|
||||||
|
Here `slice` is a systemd slice under which the container is placed.
|
||||||
|
If empty, it defaults to `system.slice`, except when cgroup v2 is
|
||||||
|
used and rootless container is created, in which case it defaults
|
||||||
|
to `user.slice`.
|
||||||
|
|
||||||
|
Note that `slice` can contain dashes to denote a sub-slice
|
||||||
|
(e.g. `user-1000.slice` is a correct notation, meaning a subslice
|
||||||
|
of `user.slice`), but it must not contain slashes (e.g.
|
||||||
|
`user.slice/user-1000.slice` is invalid).
|
||||||
|
|
||||||
|
A `slice` of `-` represents a root slice.
|
||||||
|
|
||||||
|
Next, `prefix` and `name` are used to compose the unit name, which
|
||||||
|
is `<prefix>-<name>.scope`, unless `name` has `.slice` suffix, in
|
||||||
|
which case `prefix` is ignored and the `name` is used as is.
|
||||||
|
|
||||||
|
2. If `Linux.CgroupsPath` is not set or empty, it works the same way as if it
|
||||||
|
would be set to `:runc:<container-id>`. See the description above to see
|
||||||
|
what it transforms to.
|
||||||
|
|
||||||
|
As described above, a unit being created can either be a scope or a slice.
|
||||||
|
For a scope, runc specifies its parent slice via a _Slice=_ systemd property,
|
||||||
|
and also sets _Delegate=true_. For a slice, runc specifies a weak dependency on
|
||||||
|
the parent slice via a _Wants=_ property.
|
||||||
|
|
||||||
|
### Resource limits
|
||||||
|
|
||||||
|
runc always enables accounting for all controllers, regardless of any limits
|
||||||
|
being set. This means it unconditionally sets the following properties for the
|
||||||
|
systemd unit being created:
|
||||||
|
|
||||||
|
* _CPUAccounting=true_
|
||||||
|
* _IOAccounting=true_ (_BlockIOAccounting_ for cgroup v1)
|
||||||
|
* _MemoryAccounting=true_
|
||||||
|
* _TasksAccounting=true_
|
||||||
|
|
||||||
|
The resource limits of the systemd unit are set by runc by translating the
|
||||||
|
runtime spec resources to systemd unit properties.
|
||||||
|
|
||||||
|
Such translation is by no means complete, as there are some cgroup properties
|
||||||
|
that can not be set via systemd. Therefore, runc systemd cgroup driver is
|
||||||
|
backed by fs driver (in other words, cgroup limits are first set via systemd
|
||||||
|
unit properties, and when by writing to cgroupfs files).
|
||||||
|
|
||||||
|
The set of runtime spec resources which is translated by runc to systemd unit
|
||||||
|
properties depends on kernel cgroup version being used (v1 or v2), and on the
|
||||||
|
systemd version being run. If an older systemd version (which does not support
|
||||||
|
some resources) is used, runc do not set those resources.
|
||||||
|
|
||||||
|
The following tables summarize which properties are translated.
|
||||||
|
|
||||||
|
#### cgroup v1
|
||||||
|
|
||||||
|
| runtime spec resource | systemd property name | min systemd version |
|
||||||
|
|-----------------------|-----------------------|---------------------|
|
||||||
|
| memory.limit | MemoryLimit | |
|
||||||
|
| cpu.shares | CPUShares | |
|
||||||
|
| blockIO.weight | BlockIOWeight | |
|
||||||
|
| pids.limit | TasksMax | |
|
||||||
|
| cpu.cpus | AllowedCPUs | v244 |
|
||||||
|
| cpu.mems | AllowedMemoryNodes | v244 |
|
||||||
|
|
||||||
|
#### cgroup v2
|
||||||
|
|
||||||
|
| runtime spec resource | systemd property name | min systemd version |
|
||||||
|
|-------------------------|-----------------------|---------------------|
|
||||||
|
| memory.limit | MemoryMax | |
|
||||||
|
| memory.reservation | MemoryLow | |
|
||||||
|
| memory.swap | MemorySwapMax | |
|
||||||
|
| cpu.shares | CPUWeight | |
|
||||||
|
| pids.limit | TasksMax | |
|
||||||
|
| cpu.cpus | AllowedCPUs | v244 |
|
||||||
|
| cpu.mems | AllowedMemoryNodes | v244 |
|
||||||
|
| unified.cpu.max | CPUQuota, CPUQuotaPeriodSec | v242 |
|
||||||
|
| unified.cpu.weight | CPUWeight | |
|
||||||
|
| unified.cpuset.cpus | AllowedCPUs | v244 |
|
||||||
|
| unified.cpuset.mems | AllowedMemoryNodes | v244 |
|
||||||
|
| unified.memory.high | MemoryHigh | |
|
||||||
|
| unified.memory.low | MemoryLow | |
|
||||||
|
| unified.memory.min | MemoryMin | |
|
||||||
|
| unified.memory.max | MemoryMax | |
|
||||||
|
| unified.memory.swap.max | MemorySwapMax | |
|
||||||
|
| unified.pids.max | TasksMax | |
|
||||||
|
|
||||||
|
For documentation on systemd unit resource properties, see
|
||||||
|
`systemd.resource-control(5)` man page.
|
||||||
|
|
||||||
|
### Auxiliary properties
|
||||||
|
|
||||||
|
Auxiliary properties of a systemd unit (as shown by `systemctl show
|
||||||
|
<unit-name>` after the container is created) can be set (or overwritten) by
|
||||||
|
adding annotations to the container runtime spec (`config.json`).
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"annotations": {
|
||||||
|
"org.systemd.property.TimeoutStopUSec": "uint64 123456789",
|
||||||
|
"org.systemd.property.CollectMode":"'inactive-or-failed'"
|
||||||
|
},
|
||||||
|
```
|
||||||
|
|
||||||
|
The above will set the following properties:
|
||||||
|
|
||||||
|
* `TimeoutStopSec` to 2 minutes and 3 seconds;
|
||||||
|
* `CollectMode` to "inactive-or-failed".
|
||||||
|
|
||||||
|
The values must be in the gvariant format (for details, see
|
||||||
|
[gvariant documentation](https://developer.gnome.org/glib/stable/gvariant-text.html)).
|
||||||
|
|
||||||
|
To find out which type systemd expects for a particular parameter, please
|
||||||
|
consult systemd sources.
|
|
@ -113,6 +113,33 @@ interact with pseudo-terminal `stdio`][tty_ioctl(4)].
|
||||||
> means that it is not really possible to uniquely distinguish between `stdout`
|
> means that it is not really possible to uniquely distinguish between `stdout`
|
||||||
> and `stderr` from the caller's perspective.
|
> and `stderr` from the caller's perspective.
|
||||||
|
|
||||||
|
#### Issues
|
||||||
|
|
||||||
|
If you see an error like
|
||||||
|
|
||||||
|
```
|
||||||
|
open /dev/tty: no such device or address
|
||||||
|
```
|
||||||
|
|
||||||
|
from runc, it means it can't open a terminal (because there isn't one). This
|
||||||
|
can happen when stdin (and possibly also stdout and stderr) are redirected,
|
||||||
|
or in some environments that lack a tty (such as GitHub Actions runners).
|
||||||
|
|
||||||
|
The solution to this is to *not* use a terminal for the container, i.e. have
|
||||||
|
`terminal: false` in `config.json`. If the container really needs a terminal
|
||||||
|
(some programs require one), you can provide one, using one of the following
|
||||||
|
methods.
|
||||||
|
|
||||||
|
One way is to use `ssh` with the `-tt` flag. The second `t` forces a terminal
|
||||||
|
allocation even if there's no local one -- and so it is required when stdin is
|
||||||
|
not a terminal (some `ssh` implementations only look for a terminal on stdin).
|
||||||
|
|
||||||
|
Another way is to run runc under the `script` utility, like this
|
||||||
|
|
||||||
|
```console
|
||||||
|
$ script -e -c 'runc run <container>'
|
||||||
|
```
|
||||||
|
|
||||||
[tty_ioctl(4)]: https://linux.die.net/man/4/tty_ioctl
|
[tty_ioctl(4)]: https://linux.die.net/man/4/tty_ioctl
|
||||||
|
|
||||||
### <a name="pass-through"> Pass-Through ###
|
### <a name="pass-through"> Pass-Through ###
|
||||||
|
@ -124,7 +151,7 @@ passing of file descriptors -- [details below](#runc-modes)). As an example
|
||||||
(assuming that `terminal: false` is set in `config.json`):
|
(assuming that `terminal: false` is set in `config.json`):
|
||||||
|
|
||||||
```
|
```
|
||||||
% echo input | runc run some_container > /tmp/log.out 2>& /tmp/log.err
|
% echo input | runc run some_container > /tmp/log.out 2> /tmp/log.err
|
||||||
```
|
```
|
||||||
|
|
||||||
Here the container's various `stdio` file descriptors will be substituted with
|
Here the container's various `stdio` file descriptors will be substituted with
|
||||||
|
@ -228,6 +255,19 @@ Unfortunately using detached mode is a bit more complicated and requires more
|
||||||
care than the foreground mode -- mainly because it is now up to the caller to
|
care than the foreground mode -- mainly because it is now up to the caller to
|
||||||
handle the `stdio` of the container.
|
handle the `stdio` of the container.
|
||||||
|
|
||||||
|
Another complication is that the parent process is responsible for acting as
|
||||||
|
the subreaper for the container. In short, you need to call
|
||||||
|
`prctl(PR_SET_CHILD_SUBREAPER, 1, ...)` in the parent process and correctly
|
||||||
|
handle the implications of being a subreaper. Failing to do so may result in
|
||||||
|
zombie processes being accumulated on your host.
|
||||||
|
|
||||||
|
These tasks are usually performed by a dedicated (and minimal) monitor process
|
||||||
|
per-container. For the sake of comparison, other runtimes such as LXC do not
|
||||||
|
have an equivalent detached mode and instead integrate this monitor process
|
||||||
|
into the container runtime itself -- this has several tradeoffs, and runc has
|
||||||
|
opted to support delegating the monitoring responsibility to the parent process
|
||||||
|
through this detached mode.
|
||||||
|
|
||||||
#### Detached Pass-Through ####
|
#### Detached Pass-Through ####
|
||||||
|
|
||||||
In detached mode, pass-through actually does what it says on the tin -- the
|
In detached mode, pass-through actually does what it says on the tin -- the
|
||||||
|
|
41
events.go
41
events.go
|
@ -1,9 +1,8 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"sync"
|
"sync"
|
||||||
|
@ -40,7 +39,7 @@ information is displayed once every 5 seconds.`,
|
||||||
}
|
}
|
||||||
duration := context.Duration("interval")
|
duration := context.Duration("interval")
|
||||||
if duration <= 0 {
|
if duration <= 0 {
|
||||||
return fmt.Errorf("duration interval must be greater than 0")
|
return errors.New("duration interval must be greater than 0")
|
||||||
}
|
}
|
||||||
status, err := container.Status()
|
status, err := container.Status()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -125,10 +124,14 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
|
||||||
s.CPU.Usage.User = cg.CpuStats.CpuUsage.UsageInUsermode
|
s.CPU.Usage.User = cg.CpuStats.CpuUsage.UsageInUsermode
|
||||||
s.CPU.Usage.Total = cg.CpuStats.CpuUsage.TotalUsage
|
s.CPU.Usage.Total = cg.CpuStats.CpuUsage.TotalUsage
|
||||||
s.CPU.Usage.Percpu = cg.CpuStats.CpuUsage.PercpuUsage
|
s.CPU.Usage.Percpu = cg.CpuStats.CpuUsage.PercpuUsage
|
||||||
|
s.CPU.Usage.PercpuKernel = cg.CpuStats.CpuUsage.PercpuUsageInKernelmode
|
||||||
|
s.CPU.Usage.PercpuUser = cg.CpuStats.CpuUsage.PercpuUsageInUsermode
|
||||||
s.CPU.Throttling.Periods = cg.CpuStats.ThrottlingData.Periods
|
s.CPU.Throttling.Periods = cg.CpuStats.ThrottlingData.Periods
|
||||||
s.CPU.Throttling.ThrottledPeriods = cg.CpuStats.ThrottlingData.ThrottledPeriods
|
s.CPU.Throttling.ThrottledPeriods = cg.CpuStats.ThrottlingData.ThrottledPeriods
|
||||||
s.CPU.Throttling.ThrottledTime = cg.CpuStats.ThrottlingData.ThrottledTime
|
s.CPU.Throttling.ThrottledTime = cg.CpuStats.ThrottlingData.ThrottledTime
|
||||||
|
|
||||||
|
s.CPUSet = types.CPUSet(cg.CPUSetStats)
|
||||||
|
|
||||||
s.Memory.Cache = cg.MemoryStats.Cache
|
s.Memory.Cache = cg.MemoryStats.Cache
|
||||||
s.Memory.Kernel = convertMemoryEntry(cg.MemoryStats.KernelUsage)
|
s.Memory.Kernel = convertMemoryEntry(cg.MemoryStats.KernelUsage)
|
||||||
s.Memory.KernelTCP = convertMemoryEntry(cg.MemoryStats.KernelTCPUsage)
|
s.Memory.KernelTCP = convertMemoryEntry(cg.MemoryStats.KernelTCPUsage)
|
||||||
|
@ -151,16 +154,22 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
|
||||||
}
|
}
|
||||||
|
|
||||||
if is := ls.IntelRdtStats; is != nil {
|
if is := ls.IntelRdtStats; is != nil {
|
||||||
if intelrdt.IsCatEnabled() {
|
if intelrdt.IsCATEnabled() {
|
||||||
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
|
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
|
||||||
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
|
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
|
||||||
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
|
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
|
||||||
}
|
}
|
||||||
if intelrdt.IsMbaEnabled() {
|
if intelrdt.IsMBAEnabled() {
|
||||||
s.IntelRdt.MemBwInfo = convertMemBwInfo(is.MemBwInfo)
|
s.IntelRdt.MemBwInfo = convertMemBwInfo(is.MemBwInfo)
|
||||||
s.IntelRdt.MemBwSchemaRoot = is.MemBwSchemaRoot
|
s.IntelRdt.MemBwSchemaRoot = is.MemBwSchemaRoot
|
||||||
s.IntelRdt.MemBwSchema = is.MemBwSchema
|
s.IntelRdt.MemBwSchema = is.MemBwSchema
|
||||||
}
|
}
|
||||||
|
if intelrdt.IsMBMEnabled() {
|
||||||
|
s.IntelRdt.MBMStats = is.MBMStats
|
||||||
|
}
|
||||||
|
if intelrdt.IsCMTEnabled() {
|
||||||
|
s.IntelRdt.CMTStats = is.CMTStats
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s.NetworkInterfaces = ls.Interfaces
|
s.NetworkInterfaces = ls.Interfaces
|
||||||
|
@ -187,29 +196,17 @@ func convertMemoryEntry(c cgroups.MemoryData) types.MemoryEntry {
|
||||||
func convertBlkioEntry(c []cgroups.BlkioStatEntry) []types.BlkioEntry {
|
func convertBlkioEntry(c []cgroups.BlkioStatEntry) []types.BlkioEntry {
|
||||||
var out []types.BlkioEntry
|
var out []types.BlkioEntry
|
||||||
for _, e := range c {
|
for _, e := range c {
|
||||||
out = append(out, types.BlkioEntry{
|
out = append(out, types.BlkioEntry(e))
|
||||||
Major: e.Major,
|
|
||||||
Minor: e.Minor,
|
|
||||||
Op: e.Op,
|
|
||||||
Value: e.Value,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
func convertL3CacheInfo(i *intelrdt.L3CacheInfo) *types.L3CacheInfo {
|
func convertL3CacheInfo(i *intelrdt.L3CacheInfo) *types.L3CacheInfo {
|
||||||
return &types.L3CacheInfo{
|
ci := types.L3CacheInfo(*i)
|
||||||
CbmMask: i.CbmMask,
|
return &ci
|
||||||
MinCbmBits: i.MinCbmBits,
|
|
||||||
NumClosids: i.NumClosids,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func convertMemBwInfo(i *intelrdt.MemBwInfo) *types.MemBwInfo {
|
func convertMemBwInfo(i *intelrdt.MemBwInfo) *types.MemBwInfo {
|
||||||
return &types.MemBwInfo{
|
mi := types.MemBwInfo(*i)
|
||||||
BandwidthGran: i.BandwidthGran,
|
return &mi
|
||||||
DelayLinear: i.DelayLinear,
|
|
||||||
MinBandwidth: i.MinBandwidth,
|
|
||||||
NumClosids: i.NumClosids,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
68
exec.go
68
exec.go
|
@ -1,9 +1,8 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
@ -84,15 +83,18 @@ following will output a list of processes running in the container:
|
||||||
Value: &cli.StringSlice{},
|
Value: &cli.StringSlice{},
|
||||||
Usage: "add a capability to the bounding set for the process",
|
Usage: "add a capability to the bounding set for the process",
|
||||||
},
|
},
|
||||||
cli.BoolFlag{
|
|
||||||
Name: "no-subreaper",
|
|
||||||
Usage: "disable the use of the subreaper used to reap reparented processes",
|
|
||||||
Hidden: true,
|
|
||||||
},
|
|
||||||
cli.IntFlag{
|
cli.IntFlag{
|
||||||
Name: "preserve-fds",
|
Name: "preserve-fds",
|
||||||
Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)",
|
Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)",
|
||||||
},
|
},
|
||||||
|
cli.StringSliceFlag{
|
||||||
|
Name: "cgroup",
|
||||||
|
Usage: "run the process in an (existing) sub-cgroup(s). Format is [<controller>:]<cgroup>.",
|
||||||
|
},
|
||||||
|
cli.BoolFlag{
|
||||||
|
Name: "ignore-paused",
|
||||||
|
Usage: "allow exec in a paused container",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
Action: func(context *cli.Context) error {
|
Action: func(context *cli.Context) error {
|
||||||
if err := checkArgs(context, 1, minArgs); err != nil {
|
if err := checkArgs(context, 1, minArgs); err != nil {
|
||||||
|
@ -105,11 +107,38 @@ following will output a list of processes running in the container:
|
||||||
if err == nil {
|
if err == nil {
|
||||||
os.Exit(status)
|
os.Exit(status)
|
||||||
}
|
}
|
||||||
return fmt.Errorf("exec failed: %v", err)
|
fatalWithCode(fmt.Errorf("exec failed: %w", err), 255)
|
||||||
|
return nil // to satisfy the linter
|
||||||
},
|
},
|
||||||
SkipArgReorder: true,
|
SkipArgReorder: true,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getSubCgroupPaths(args []string) (map[string]string, error) {
|
||||||
|
if len(args) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
paths := make(map[string]string, len(args))
|
||||||
|
for _, c := range args {
|
||||||
|
// Split into controller:path.
|
||||||
|
cs := strings.SplitN(c, ":", 3)
|
||||||
|
if len(cs) > 2 {
|
||||||
|
return nil, fmt.Errorf("invalid --cgroup argument: %s", c)
|
||||||
|
}
|
||||||
|
if len(cs) == 1 { // no controller: prefix
|
||||||
|
if len(args) != 1 {
|
||||||
|
return nil, fmt.Errorf("invalid --cgroup argument: %s (missing <controller>: prefix)", c)
|
||||||
|
}
|
||||||
|
paths[""] = c
|
||||||
|
} else {
|
||||||
|
// There may be a few comma-separated controllers.
|
||||||
|
for _, ctrl := range strings.Split(cs[0], ",") {
|
||||||
|
paths[ctrl] = cs[1]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return paths, nil
|
||||||
|
}
|
||||||
|
|
||||||
func execProcess(context *cli.Context) (int, error) {
|
func execProcess(context *cli.Context) (int, error) {
|
||||||
container, err := getContainer(context)
|
container, err := getContainer(context)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -120,13 +149,15 @@ func execProcess(context *cli.Context) (int, error) {
|
||||||
return -1, err
|
return -1, err
|
||||||
}
|
}
|
||||||
if status == libcontainer.Stopped {
|
if status == libcontainer.Stopped {
|
||||||
return -1, fmt.Errorf("cannot exec a container that has stopped")
|
return -1, errors.New("cannot exec in a stopped container")
|
||||||
|
}
|
||||||
|
if status == libcontainer.Paused && !context.Bool("ignore-paused") {
|
||||||
|
return -1, errors.New("cannot exec in a paused container (use --ignore-paused to override)")
|
||||||
}
|
}
|
||||||
path := context.String("process")
|
path := context.String("process")
|
||||||
if path == "" && len(context.Args()) == 1 {
|
if path == "" && len(context.Args()) == 1 {
|
||||||
return -1, fmt.Errorf("process args cannot be empty")
|
return -1, errors.New("process args cannot be empty")
|
||||||
}
|
}
|
||||||
detach := context.Bool("detach")
|
|
||||||
state, err := container.State()
|
state, err := container.State()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return -1, err
|
return -1, err
|
||||||
|
@ -137,9 +168,9 @@ func execProcess(context *cli.Context) (int, error) {
|
||||||
return -1, err
|
return -1, err
|
||||||
}
|
}
|
||||||
|
|
||||||
logLevel := "info"
|
cgPaths, err := getSubCgroupPaths(context.StringSlice("cgroup"))
|
||||||
if context.GlobalBool("debug") {
|
if err != nil {
|
||||||
logLevel = "debug"
|
return -1, err
|
||||||
}
|
}
|
||||||
|
|
||||||
r := &runner{
|
r := &runner{
|
||||||
|
@ -147,12 +178,12 @@ func execProcess(context *cli.Context) (int, error) {
|
||||||
shouldDestroy: false,
|
shouldDestroy: false,
|
||||||
container: container,
|
container: container,
|
||||||
consoleSocket: context.String("console-socket"),
|
consoleSocket: context.String("console-socket"),
|
||||||
detach: detach,
|
detach: context.Bool("detach"),
|
||||||
pidFile: context.String("pid-file"),
|
pidFile: context.String("pid-file"),
|
||||||
action: CT_ACT_RUN,
|
action: CT_ACT_RUN,
|
||||||
init: false,
|
init: false,
|
||||||
preserveFDs: context.Int("preserve-fds"),
|
preserveFDs: context.Int("preserve-fds"),
|
||||||
logLevel: logLevel,
|
subCgroupPaths: cgPaths,
|
||||||
}
|
}
|
||||||
return r.run(p)
|
return r.run(p)
|
||||||
}
|
}
|
||||||
|
@ -203,6 +234,7 @@ func getProcess(context *cli.Context, bundle string) (*specs.Process, error) {
|
||||||
p.Env = append(p.Env, context.StringSlice("env")...)
|
p.Env = append(p.Env, context.StringSlice("env")...)
|
||||||
|
|
||||||
// set the tty
|
// set the tty
|
||||||
|
p.Terminal = false
|
||||||
if context.IsSet("tty") {
|
if context.IsSet("tty") {
|
||||||
p.Terminal = context.Bool("tty")
|
p.Terminal = context.Bool("tty")
|
||||||
}
|
}
|
||||||
|
@ -215,13 +247,13 @@ func getProcess(context *cli.Context, bundle string) (*specs.Process, error) {
|
||||||
if len(u) > 1 {
|
if len(u) > 1 {
|
||||||
gid, err := strconv.Atoi(u[1])
|
gid, err := strconv.Atoi(u[1])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("parsing %s as int for gid failed: %v", u[1], err)
|
return nil, fmt.Errorf("parsing %s as int for gid failed: %w", u[1], err)
|
||||||
}
|
}
|
||||||
p.User.GID = uint32(gid)
|
p.User.GID = uint32(gid)
|
||||||
}
|
}
|
||||||
uid, err := strconv.Atoi(u[0])
|
uid, err := strconv.Atoi(u[0])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("parsing %s as int for uid failed: %v", u[0], err)
|
return nil, fmt.Errorf("parsing %s as int for uid failed: %w", u[0], err)
|
||||||
}
|
}
|
||||||
p.User.UID = uint32(uid)
|
p.User.UID = uint32(uid)
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,75 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/capabilities"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/specconv"
|
||||||
|
"github.com/opencontainers/runc/types/features"
|
||||||
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
|
"github.com/urfave/cli"
|
||||||
|
)
|
||||||
|
|
||||||
|
var featuresCommand = cli.Command{
|
||||||
|
Name: "features",
|
||||||
|
Usage: "show the enabled features",
|
||||||
|
ArgsUsage: "",
|
||||||
|
Description: `Show the enabled features.
|
||||||
|
The result is parsable as a JSON.
|
||||||
|
See https://pkg.go.dev/github.com/opencontainers/runc/types/features for the type definition.
|
||||||
|
The types are experimental and subject to change.
|
||||||
|
`,
|
||||||
|
Action: func(context *cli.Context) error {
|
||||||
|
if err := checkArgs(context, 0, exactArgs); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
tru := true
|
||||||
|
|
||||||
|
feat := features.Features{
|
||||||
|
OCIVersionMin: "1.0.0",
|
||||||
|
OCIVersionMax: specs.Version,
|
||||||
|
Annotations: map[string]string{
|
||||||
|
features.AnnotationRuncVersion: version,
|
||||||
|
features.AnnotationRuncCommit: gitCommit,
|
||||||
|
features.AnnotationRuncCheckpointEnabled: "true",
|
||||||
|
},
|
||||||
|
Hooks: configs.KnownHookNames(),
|
||||||
|
MountOptions: specconv.KnownMountOptions(),
|
||||||
|
Linux: &features.Linux{
|
||||||
|
Namespaces: specconv.KnownNamespaces(),
|
||||||
|
Capabilities: capabilities.KnownCapabilities(),
|
||||||
|
Cgroup: &features.Cgroup{
|
||||||
|
V1: &tru,
|
||||||
|
V2: &tru,
|
||||||
|
Systemd: &tru,
|
||||||
|
SystemdUser: &tru,
|
||||||
|
},
|
||||||
|
Apparmor: &features.Apparmor{
|
||||||
|
Enabled: &tru,
|
||||||
|
},
|
||||||
|
Selinux: &features.Selinux{
|
||||||
|
Enabled: &tru,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
if seccomp.Enabled {
|
||||||
|
feat.Linux.Seccomp = &features.Seccomp{
|
||||||
|
Enabled: &tru,
|
||||||
|
Actions: seccomp.KnownActions(),
|
||||||
|
Operators: seccomp.KnownOperators(),
|
||||||
|
Archs: seccomp.KnownArchs(),
|
||||||
|
}
|
||||||
|
major, minor, patch := seccomp.Version()
|
||||||
|
feat.Annotations[features.AnnotationLibseccompVersion] = fmt.Sprintf("%d.%d.%d", major, minor, patch)
|
||||||
|
}
|
||||||
|
|
||||||
|
enc := json.NewEncoder(context.App.Writer)
|
||||||
|
enc.SetIndent("", " ")
|
||||||
|
return enc.Encode(feat)
|
||||||
|
},
|
||||||
|
}
|
|
@ -0,0 +1,26 @@
|
||||||
|
module github.com/opencontainers/runc
|
||||||
|
|
||||||
|
go 1.16
|
||||||
|
|
||||||
|
require (
|
||||||
|
github.com/checkpoint-restore/go-criu/v5 v5.3.0
|
||||||
|
github.com/cilium/ebpf v0.7.0
|
||||||
|
github.com/containerd/console v1.0.3
|
||||||
|
github.com/coreos/go-systemd/v22 v22.3.2
|
||||||
|
github.com/cyphar/filepath-securejoin v0.2.3
|
||||||
|
github.com/docker/go-units v0.4.0
|
||||||
|
github.com/godbus/dbus/v5 v5.0.6
|
||||||
|
github.com/moby/sys/mountinfo v0.5.0
|
||||||
|
github.com/mrunalp/fileutils v0.5.0
|
||||||
|
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417
|
||||||
|
github.com/opencontainers/selinux v1.10.0
|
||||||
|
github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921
|
||||||
|
github.com/sirupsen/logrus v1.8.1
|
||||||
|
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635
|
||||||
|
// NOTE: urfave/cli must be <= v1.22.1 due to a regression: https://github.com/urfave/cli/issues/1092
|
||||||
|
github.com/urfave/cli v1.22.1
|
||||||
|
github.com/vishvananda/netlink v1.1.0
|
||||||
|
golang.org/x/net v0.0.0-20201224014010-6772e930b67b
|
||||||
|
golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c
|
||||||
|
google.golang.org/protobuf v1.27.1
|
||||||
|
)
|
|
@ -0,0 +1,80 @@
|
||||||
|
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||||
|
github.com/checkpoint-restore/go-criu/v5 v5.3.0 h1:wpFFOoomK3389ue2lAb0Boag6XPht5QYpipxmSNL4d8=
|
||||||
|
github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E=
|
||||||
|
github.com/cilium/ebpf v0.7.0 h1:1k/q3ATgxSXRdrmPfH8d7YK0GfqVsEKZAX9dQZvs56k=
|
||||||
|
github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA=
|
||||||
|
github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
|
||||||
|
github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
|
||||||
|
github.com/coreos/go-systemd/v22 v22.3.2 h1:D9/bQk5vlXQFZ6Kwuu6zaiXJ9oTPe68++AzAJc1DzSI=
|
||||||
|
github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
||||||
|
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d h1:U+s90UTSYgptZMwQh2aRr3LuazLJIa+Pg3Kc1ylSYVY=
|
||||||
|
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
|
||||||
|
github.com/cyphar/filepath-securejoin v0.2.3 h1:YX6ebbZCZP7VkM3scTTokDgBL2TY741X51MTk3ycuNI=
|
||||||
|
github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
|
||||||
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
|
||||||
|
github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
|
||||||
|
github.com/frankban/quicktest v1.11.3 h1:8sXhOn0uLys67V8EsXLc6eszDs8VXWxL3iRvebPhedY=
|
||||||
|
github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
|
||||||
|
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||||
|
github.com/godbus/dbus/v5 v5.0.6 h1:mkgN1ofwASrYnJ5W6U/BxG15eXXXjirgZc7CLqkcaro=
|
||||||
|
github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||||
|
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
|
||||||
|
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
|
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
|
||||||
|
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
|
github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
|
||||||
|
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||||
|
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||||
|
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
|
||||||
|
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||||
|
github.com/moby/sys/mountinfo v0.5.0 h1:2Ks8/r6lopsxWi9m58nlwjaeSzUX9iiL1vj5qB/9ObI=
|
||||||
|
github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU=
|
||||||
|
github.com/mrunalp/fileutils v0.5.0 h1:NKzVxiH7eSk+OQ4M+ZYW1K6h27RUV3MI6NUTsHhU6Z4=
|
||||||
|
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
|
||||||
|
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 h1:3snG66yBm59tKhhSPQrQ/0bCrv1LQbKt40LnUPiUxdc=
|
||||||
|
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
|
||||||
|
github.com/opencontainers/selinux v1.10.0 h1:rAiKF8hTcgLI3w0DHm6i0ylVVcOrlgR1kK99DRLDhyU=
|
||||||
|
github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
|
github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
|
||||||
|
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||||
|
github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921 h1:58EBmR2dMNL2n/FnbQewK3D14nXr0V9CObDSvMJLq+Y=
|
||||||
|
github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
|
||||||
|
github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
|
||||||
|
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
|
||||||
|
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
|
||||||
|
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
|
||||||
|
github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
|
||||||
|
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||||
|
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
|
||||||
|
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
|
||||||
|
github.com/urfave/cli v1.22.1 h1:+mkCCcOFKPnCmVYVcURKps1Xe+3zP90gSYGNfRkjoIY=
|
||||||
|
github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
|
||||||
|
github.com/vishvananda/netlink v1.1.0 h1:1iyaYNBLmP6L0220aDnYQpo1QEV4t4hJ+xEEhhJH8j0=
|
||||||
|
github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
|
||||||
|
github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df h1:OviZH7qLw/7ZovXvuNyL3XQl8UFofeikI1NW1Gypu7k=
|
||||||
|
github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
|
||||||
|
golang.org/x/net v0.0.0-20201224014010-6772e930b67b h1:iFwSg7t5GZmB/Q5TjiEAsdoLDrdJRC1RiF2WhuV29Qw=
|
||||||
|
golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||||
|
golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c h1:DHcbWVXeY+0Y8HHKR+rbLwnoh2F4tNCY7rTiHJ30RmA=
|
||||||
|
golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||||
|
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
||||||
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
|
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
|
||||||
|
google.golang.org/protobuf v1.27.1 h1:SnqbnDw1V7RiZcXPx5MEeqPv2s79L9i7BJUlG/+RurQ=
|
||||||
|
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
|
||||||
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
31
init.go
31
init.go
|
@ -1,44 +1,37 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"os"
|
"os"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer"
|
"github.com/opencontainers/runc/libcontainer"
|
||||||
"github.com/opencontainers/runc/libcontainer/logs"
|
|
||||||
_ "github.com/opencontainers/runc/libcontainer/nsenter"
|
_ "github.com/opencontainers/runc/libcontainer/nsenter"
|
||||||
"github.com/sirupsen/logrus"
|
"github.com/sirupsen/logrus"
|
||||||
"github.com/urfave/cli"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
if len(os.Args) > 1 && os.Args[1] == "init" {
|
if len(os.Args) > 1 && os.Args[1] == "init" {
|
||||||
|
// This is the golang entry point for runc init, executed
|
||||||
|
// before main() but after libcontainer/nsenter's nsexec().
|
||||||
runtime.GOMAXPROCS(1)
|
runtime.GOMAXPROCS(1)
|
||||||
runtime.LockOSThread()
|
runtime.LockOSThread()
|
||||||
|
|
||||||
level := os.Getenv("_LIBCONTAINER_LOGLEVEL")
|
level, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_LOGLEVEL"))
|
||||||
logLevel, err := logrus.ParseLevel(level)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Sprintf("libcontainer: failed to parse log level: %q: %v", level, err))
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = logs.ConfigureLogging(logs.Config{
|
logPipeFd, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_LOGPIPE"))
|
||||||
LogPipeFd: os.Getenv("_LIBCONTAINER_LOGPIPE"),
|
|
||||||
LogFormat: "json",
|
|
||||||
LogLevel: logLevel,
|
|
||||||
})
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Sprintf("libcontainer: failed to configure logging: %v", err))
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logrus.SetLevel(logrus.Level(level))
|
||||||
|
logrus.SetOutput(os.NewFile(uintptr(logPipeFd), "logpipe"))
|
||||||
|
logrus.SetFormatter(new(logrus.JSONFormatter))
|
||||||
logrus.Debug("child process in init()")
|
logrus.Debug("child process in init()")
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var initCommand = cli.Command{
|
|
||||||
Name: "init",
|
|
||||||
Usage: `initialize the namespaces and launch the process (do not call it outside of runc)`,
|
|
||||||
Action: func(context *cli.Context) error {
|
|
||||||
factory, _ := libcontainer.New("")
|
factory, _ := libcontainer.New("")
|
||||||
if err := factory.StartInitialization(); err != nil {
|
if err := factory.StartInitialization(); err != nil {
|
||||||
// as the error is sent back to the parent there is no need to log
|
// as the error is sent back to the parent there is no need to log
|
||||||
|
@ -46,5 +39,5 @@ var initCommand = cli.Command{
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
panic("libcontainer: container init failed to exec")
|
panic("libcontainer: container init failed to exec")
|
||||||
},
|
}
|
||||||
}
|
}
|
||||||
|
|
16
kill.go
16
kill.go
|
@ -1,14 +1,12 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
|
||||||
|
|
||||||
"github.com/urfave/cli"
|
"github.com/urfave/cli"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
var killCommand = cli.Command{
|
var killCommand = cli.Command{
|
||||||
|
@ -55,13 +53,17 @@ signal to the init process of the "ubuntu01" container:
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseSignal(rawSignal string) (syscall.Signal, error) {
|
func parseSignal(rawSignal string) (unix.Signal, error) {
|
||||||
s, err := strconv.Atoi(rawSignal)
|
s, err := strconv.Atoi(rawSignal)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return syscall.Signal(s), nil
|
return unix.Signal(s), nil
|
||||||
}
|
}
|
||||||
signal, ok := signalMap[strings.TrimPrefix(strings.ToUpper(rawSignal), "SIG")]
|
sig := strings.ToUpper(rawSignal)
|
||||||
if !ok {
|
if !strings.HasPrefix(sig, "SIG") {
|
||||||
|
sig = "SIG" + sig
|
||||||
|
}
|
||||||
|
signal := unix.SignalNum(sig)
|
||||||
|
if signal == 0 {
|
||||||
return -1, fmt.Errorf("unknown signal %q", rawSignal)
|
return -1, fmt.Errorf("unknown signal %q", rawSignal)
|
||||||
}
|
}
|
||||||
return signal, nil
|
return signal, nil
|
||||||
|
|
|
@ -57,90 +57,94 @@ struct describing how the container is to be created. A sample would look simila
|
||||||
|
|
||||||
```go
|
```go
|
||||||
defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
|
defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
|
||||||
|
var devices []*configs.DeviceRule
|
||||||
|
for _, device := range specconv.AllowedDevices {
|
||||||
|
devices = append(devices, &device.Rule)
|
||||||
|
}
|
||||||
config := &configs.Config{
|
config := &configs.Config{
|
||||||
Rootfs: "/your/path/to/rootfs",
|
Rootfs: "/your/path/to/rootfs",
|
||||||
Capabilities: &configs.Capabilities{
|
Capabilities: &configs.Capabilities{
|
||||||
Bounding: []string{
|
Bounding: []string{
|
||||||
"CAP_CHOWN",
|
"CAP_CHOWN",
|
||||||
"CAP_DAC_OVERRIDE",
|
"CAP_DAC_OVERRIDE",
|
||||||
"CAP_FSETID",
|
"CAP_FSETID",
|
||||||
"CAP_FOWNER",
|
"CAP_FOWNER",
|
||||||
"CAP_MKNOD",
|
"CAP_MKNOD",
|
||||||
"CAP_NET_RAW",
|
"CAP_NET_RAW",
|
||||||
"CAP_SETGID",
|
"CAP_SETGID",
|
||||||
"CAP_SETUID",
|
"CAP_SETUID",
|
||||||
"CAP_SETFCAP",
|
"CAP_SETFCAP",
|
||||||
"CAP_SETPCAP",
|
"CAP_SETPCAP",
|
||||||
"CAP_NET_BIND_SERVICE",
|
"CAP_NET_BIND_SERVICE",
|
||||||
"CAP_SYS_CHROOT",
|
"CAP_SYS_CHROOT",
|
||||||
"CAP_KILL",
|
"CAP_KILL",
|
||||||
"CAP_AUDIT_WRITE",
|
"CAP_AUDIT_WRITE",
|
||||||
},
|
},
|
||||||
Effective: []string{
|
Effective: []string{
|
||||||
"CAP_CHOWN",
|
"CAP_CHOWN",
|
||||||
"CAP_DAC_OVERRIDE",
|
"CAP_DAC_OVERRIDE",
|
||||||
"CAP_FSETID",
|
"CAP_FSETID",
|
||||||
"CAP_FOWNER",
|
"CAP_FOWNER",
|
||||||
"CAP_MKNOD",
|
"CAP_MKNOD",
|
||||||
"CAP_NET_RAW",
|
"CAP_NET_RAW",
|
||||||
"CAP_SETGID",
|
"CAP_SETGID",
|
||||||
"CAP_SETUID",
|
"CAP_SETUID",
|
||||||
"CAP_SETFCAP",
|
"CAP_SETFCAP",
|
||||||
"CAP_SETPCAP",
|
"CAP_SETPCAP",
|
||||||
"CAP_NET_BIND_SERVICE",
|
"CAP_NET_BIND_SERVICE",
|
||||||
"CAP_SYS_CHROOT",
|
"CAP_SYS_CHROOT",
|
||||||
"CAP_KILL",
|
"CAP_KILL",
|
||||||
"CAP_AUDIT_WRITE",
|
"CAP_AUDIT_WRITE",
|
||||||
},
|
},
|
||||||
Inheritable: []string{
|
Inheritable: []string{
|
||||||
"CAP_CHOWN",
|
"CAP_CHOWN",
|
||||||
"CAP_DAC_OVERRIDE",
|
"CAP_DAC_OVERRIDE",
|
||||||
"CAP_FSETID",
|
"CAP_FSETID",
|
||||||
"CAP_FOWNER",
|
"CAP_FOWNER",
|
||||||
"CAP_MKNOD",
|
"CAP_MKNOD",
|
||||||
"CAP_NET_RAW",
|
"CAP_NET_RAW",
|
||||||
"CAP_SETGID",
|
"CAP_SETGID",
|
||||||
"CAP_SETUID",
|
"CAP_SETUID",
|
||||||
"CAP_SETFCAP",
|
"CAP_SETFCAP",
|
||||||
"CAP_SETPCAP",
|
"CAP_SETPCAP",
|
||||||
"CAP_NET_BIND_SERVICE",
|
"CAP_NET_BIND_SERVICE",
|
||||||
"CAP_SYS_CHROOT",
|
"CAP_SYS_CHROOT",
|
||||||
"CAP_KILL",
|
"CAP_KILL",
|
||||||
"CAP_AUDIT_WRITE",
|
"CAP_AUDIT_WRITE",
|
||||||
},
|
},
|
||||||
Permitted: []string{
|
Permitted: []string{
|
||||||
"CAP_CHOWN",
|
"CAP_CHOWN",
|
||||||
"CAP_DAC_OVERRIDE",
|
"CAP_DAC_OVERRIDE",
|
||||||
"CAP_FSETID",
|
"CAP_FSETID",
|
||||||
"CAP_FOWNER",
|
"CAP_FOWNER",
|
||||||
"CAP_MKNOD",
|
"CAP_MKNOD",
|
||||||
"CAP_NET_RAW",
|
"CAP_NET_RAW",
|
||||||
"CAP_SETGID",
|
"CAP_SETGID",
|
||||||
"CAP_SETUID",
|
"CAP_SETUID",
|
||||||
"CAP_SETFCAP",
|
"CAP_SETFCAP",
|
||||||
"CAP_SETPCAP",
|
"CAP_SETPCAP",
|
||||||
"CAP_NET_BIND_SERVICE",
|
"CAP_NET_BIND_SERVICE",
|
||||||
"CAP_SYS_CHROOT",
|
"CAP_SYS_CHROOT",
|
||||||
"CAP_KILL",
|
"CAP_KILL",
|
||||||
"CAP_AUDIT_WRITE",
|
"CAP_AUDIT_WRITE",
|
||||||
},
|
},
|
||||||
Ambient: []string{
|
Ambient: []string{
|
||||||
"CAP_CHOWN",
|
"CAP_CHOWN",
|
||||||
"CAP_DAC_OVERRIDE",
|
"CAP_DAC_OVERRIDE",
|
||||||
"CAP_FSETID",
|
"CAP_FSETID",
|
||||||
"CAP_FOWNER",
|
"CAP_FOWNER",
|
||||||
"CAP_MKNOD",
|
"CAP_MKNOD",
|
||||||
"CAP_NET_RAW",
|
"CAP_NET_RAW",
|
||||||
"CAP_SETGID",
|
"CAP_SETGID",
|
||||||
"CAP_SETUID",
|
"CAP_SETUID",
|
||||||
"CAP_SETFCAP",
|
"CAP_SETFCAP",
|
||||||
"CAP_SETPCAP",
|
"CAP_SETPCAP",
|
||||||
"CAP_NET_BIND_SERVICE",
|
"CAP_NET_BIND_SERVICE",
|
||||||
"CAP_SYS_CHROOT",
|
"CAP_SYS_CHROOT",
|
||||||
"CAP_KILL",
|
"CAP_KILL",
|
||||||
"CAP_AUDIT_WRITE",
|
"CAP_AUDIT_WRITE",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
Namespaces: configs.Namespaces([]configs.Namespace{
|
Namespaces: configs.Namespaces([]configs.Namespace{
|
||||||
{Type: configs.NEWNS},
|
{Type: configs.NEWNS},
|
||||||
{Type: configs.NEWUTS},
|
{Type: configs.NEWUTS},
|
||||||
|
@ -155,8 +159,7 @@ config := &configs.Config{
|
||||||
Parent: "system",
|
Parent: "system",
|
||||||
Resources: &configs.Resources{
|
Resources: &configs.Resources{
|
||||||
MemorySwappiness: nil,
|
MemorySwappiness: nil,
|
||||||
AllowAllDevices: nil,
|
Devices: devices,
|
||||||
AllowedDevices: configs.DefaultAllowedDevices,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
MaskPaths: []string{
|
MaskPaths: []string{
|
||||||
|
@ -166,7 +169,7 @@ config := &configs.Config{
|
||||||
ReadonlyPaths: []string{
|
ReadonlyPaths: []string{
|
||||||
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
|
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
|
||||||
},
|
},
|
||||||
Devices: configs.DefaultAutoCreatedDevices,
|
Devices: specconv.AllowedDevices,
|
||||||
Hostname: "testing",
|
Hostname: "testing",
|
||||||
Mounts: []*configs.Mount{
|
Mounts: []*configs.Mount{
|
||||||
{
|
{
|
||||||
|
@ -314,7 +317,7 @@ state, err := container.State()
|
||||||
#### Checkpoint & Restore
|
#### Checkpoint & Restore
|
||||||
|
|
||||||
libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers.
|
libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers.
|
||||||
This let's you save the state of a process running inside a container to disk, and then restore
|
This lets you save the state of a process running inside a container to disk, and then restore
|
||||||
that state into a new process, on the same machine or on another machine.
|
that state into a new process, on the same machine or on another machine.
|
||||||
|
|
||||||
`criu` version 1.5.2 or higher is required to use checkpoint and restore.
|
`criu` version 1.5.2 or higher is required to use checkpoint and restore.
|
||||||
|
|
|
@ -1,60 +1,16 @@
|
||||||
// +build apparmor,linux
|
|
||||||
|
|
||||||
package apparmor
|
package apparmor
|
||||||
|
|
||||||
import (
|
import "errors"
|
||||||
"fmt"
|
|
||||||
"io/ioutil"
|
|
||||||
"os"
|
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/utils"
|
var (
|
||||||
|
// IsEnabled returns true if apparmor is enabled for the host.
|
||||||
|
IsEnabled = isEnabled
|
||||||
|
|
||||||
|
// ApplyProfile will apply the profile with the specified name to the process after
|
||||||
|
// the next exec. It is only supported on Linux and produces an ErrApparmorNotEnabled
|
||||||
|
// on other platforms.
|
||||||
|
ApplyProfile = applyProfile
|
||||||
|
|
||||||
|
// ErrApparmorNotEnabled indicates that AppArmor is not enabled or not supported.
|
||||||
|
ErrApparmorNotEnabled = errors.New("apparmor: config provided but apparmor not supported")
|
||||||
)
|
)
|
||||||
|
|
||||||
// IsEnabled returns true if apparmor is enabled for the host.
|
|
||||||
func IsEnabled() bool {
|
|
||||||
if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil && os.Getenv("container") == "" {
|
|
||||||
if _, err = os.Stat("/sbin/apparmor_parser"); err == nil {
|
|
||||||
buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled")
|
|
||||||
return err == nil && len(buf) > 1 && buf[0] == 'Y'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func setProcAttr(attr, value string) error {
|
|
||||||
// Under AppArmor you can only change your own attr, so use /proc/self/
|
|
||||||
// instead of /proc/<tid>/ like libapparmor does
|
|
||||||
path := fmt.Sprintf("/proc/self/attr/%s", attr)
|
|
||||||
|
|
||||||
f, err := os.OpenFile(path, os.O_WRONLY, 0)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
if err := utils.EnsureProcHandle(f); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = fmt.Fprintf(f, "%s", value)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// changeOnExec reimplements aa_change_onexec from libapparmor in Go
|
|
||||||
func changeOnExec(name string) error {
|
|
||||||
value := "exec " + name
|
|
||||||
if err := setProcAttr("exec", value); err != nil {
|
|
||||||
return fmt.Errorf("apparmor failed to apply profile: %s", err)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// ApplyProfile will apply the profile with the specified name to the process after
|
|
||||||
// the next exec.
|
|
||||||
func ApplyProfile(name string) error {
|
|
||||||
if name == "" {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return changeOnExec(name)
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
// +build !apparmor !linux
|
|
||||||
|
|
||||||
package apparmor
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
)
|
|
||||||
|
|
||||||
var ErrApparmorNotEnabled = errors.New("apparmor: config provided but apparmor not supported")
|
|
||||||
|
|
||||||
func IsEnabled() bool {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func ApplyProfile(name string) error {
|
|
||||||
if name != "" {
|
|
||||||
return ErrApparmorNotEnabled
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
package apparmor
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/utils"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
appArmorEnabled bool
|
||||||
|
checkAppArmor sync.Once
|
||||||
|
)
|
||||||
|
|
||||||
|
// isEnabled returns true if apparmor is enabled for the host.
|
||||||
|
func isEnabled() bool {
|
||||||
|
checkAppArmor.Do(func() {
|
||||||
|
if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil {
|
||||||
|
buf, err := os.ReadFile("/sys/module/apparmor/parameters/enabled")
|
||||||
|
appArmorEnabled = err == nil && len(buf) > 1 && buf[0] == 'Y'
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return appArmorEnabled
|
||||||
|
}
|
||||||
|
|
||||||
|
func setProcAttr(attr, value string) error {
|
||||||
|
// Under AppArmor you can only change your own attr, so use /proc/self/
|
||||||
|
// instead of /proc/<tid>/ like libapparmor does
|
||||||
|
attrPath := "/proc/self/attr/apparmor/" + attr
|
||||||
|
if _, err := os.Stat(attrPath); errors.Is(err, os.ErrNotExist) {
|
||||||
|
// fall back to the old convention
|
||||||
|
attrPath = "/proc/self/attr/" + attr
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err := os.OpenFile(attrPath, os.O_WRONLY, 0)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
if err := utils.EnsureProcHandle(f); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = f.WriteString(value)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// changeOnExec reimplements aa_change_onexec from libapparmor in Go
|
||||||
|
func changeOnExec(name string) error {
|
||||||
|
if err := setProcAttr("exec", "exec "+name); err != nil {
|
||||||
|
return fmt.Errorf("apparmor failed to apply profile: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// applyProfile will apply the profile with the specified name to the process after
|
||||||
|
// the next exec. It is only supported on Linux and produces an error on other
|
||||||
|
// platforms.
|
||||||
|
func applyProfile(name string) error {
|
||||||
|
if name == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return changeOnExec(name)
|
||||||
|
}
|
|
@ -0,0 +1,15 @@
|
||||||
|
//go:build !linux
|
||||||
|
// +build !linux
|
||||||
|
|
||||||
|
package apparmor
|
||||||
|
|
||||||
|
func isEnabled() bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func applyProfile(name string) error {
|
||||||
|
if name != "" {
|
||||||
|
return ErrApparmorNotEnabled
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
|
@ -0,0 +1,123 @@
|
||||||
|
//go:build linux
|
||||||
|
// +build linux
|
||||||
|
|
||||||
|
package capabilities
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
"github.com/syndtr/gocapability/capability"
|
||||||
|
)
|
||||||
|
|
||||||
|
const allCapabilityTypes = capability.CAPS | capability.BOUNDING | capability.AMBIENT
|
||||||
|
|
||||||
|
var (
|
||||||
|
capabilityMap map[string]capability.Cap
|
||||||
|
capTypes = []capability.CapType{
|
||||||
|
capability.BOUNDING,
|
||||||
|
capability.PERMITTED,
|
||||||
|
capability.INHERITABLE,
|
||||||
|
capability.EFFECTIVE,
|
||||||
|
capability.AMBIENT,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
capabilityMap = make(map[string]capability.Cap, capability.CAP_LAST_CAP+1)
|
||||||
|
for _, c := range capability.List() {
|
||||||
|
if c > capability.CAP_LAST_CAP {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
capabilityMap["CAP_"+strings.ToUpper(c.String())] = c
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// KnownCapabilities returns the list of the known capabilities.
|
||||||
|
// Used by `runc features`.
|
||||||
|
func KnownCapabilities() []string {
|
||||||
|
list := capability.List()
|
||||||
|
res := make([]string, len(list))
|
||||||
|
for i, c := range list {
|
||||||
|
res[i] = "CAP_" + strings.ToUpper(c.String())
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
|
// New creates a new Caps from the given Capabilities config. Unknown Capabilities
|
||||||
|
// or Capabilities that are unavailable in the current environment are ignored,
|
||||||
|
// printing a warning instead.
|
||||||
|
func New(capConfig *configs.Capabilities) (*Caps, error) {
|
||||||
|
var (
|
||||||
|
err error
|
||||||
|
c Caps
|
||||||
|
)
|
||||||
|
|
||||||
|
unknownCaps := make(map[string]struct{})
|
||||||
|
c.caps = map[capability.CapType][]capability.Cap{
|
||||||
|
capability.BOUNDING: capSlice(capConfig.Bounding, unknownCaps),
|
||||||
|
capability.EFFECTIVE: capSlice(capConfig.Effective, unknownCaps),
|
||||||
|
capability.INHERITABLE: capSlice(capConfig.Inheritable, unknownCaps),
|
||||||
|
capability.PERMITTED: capSlice(capConfig.Permitted, unknownCaps),
|
||||||
|
capability.AMBIENT: capSlice(capConfig.Ambient, unknownCaps),
|
||||||
|
}
|
||||||
|
if c.pid, err = capability.NewPid2(0); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if err = c.pid.Load(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if len(unknownCaps) > 0 {
|
||||||
|
logrus.Warn("ignoring unknown or unavailable capabilities: ", mapKeys(unknownCaps))
|
||||||
|
}
|
||||||
|
return &c, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// capSlice converts the slice of capability names in caps, to their numeric
|
||||||
|
// equivalent, and returns them as a slice. Unknown or unavailable capabilities
|
||||||
|
// are not returned, but appended to unknownCaps.
|
||||||
|
func capSlice(caps []string, unknownCaps map[string]struct{}) []capability.Cap {
|
||||||
|
var out []capability.Cap
|
||||||
|
for _, c := range caps {
|
||||||
|
if v, ok := capabilityMap[c]; !ok {
|
||||||
|
unknownCaps[c] = struct{}{}
|
||||||
|
} else {
|
||||||
|
out = append(out, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// mapKeys returns the keys of input in sorted order
|
||||||
|
func mapKeys(input map[string]struct{}) []string {
|
||||||
|
var keys []string
|
||||||
|
for c := range input {
|
||||||
|
keys = append(keys, c)
|
||||||
|
}
|
||||||
|
sort.Strings(keys)
|
||||||
|
return keys
|
||||||
|
}
|
||||||
|
|
||||||
|
// Caps holds the capabilities for a container.
|
||||||
|
type Caps struct {
|
||||||
|
pid capability.Capabilities
|
||||||
|
caps map[capability.CapType][]capability.Cap
|
||||||
|
}
|
||||||
|
|
||||||
|
// ApplyBoundingSet sets the capability bounding set to those specified in the whitelist.
|
||||||
|
func (c *Caps) ApplyBoundingSet() error {
|
||||||
|
c.pid.Clear(capability.BOUNDING)
|
||||||
|
c.pid.Set(capability.BOUNDING, c.caps[capability.BOUNDING]...)
|
||||||
|
return c.pid.Apply(capability.BOUNDING)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply sets all the capabilities for the current process in the config.
|
||||||
|
func (c *Caps) ApplyCaps() error {
|
||||||
|
c.pid.Clear(allCapabilityTypes)
|
||||||
|
for _, g := range capTypes {
|
||||||
|
c.pid.Set(g, c.caps[g]...)
|
||||||
|
}
|
||||||
|
return c.pid.Apply(allCapabilityTypes)
|
||||||
|
}
|
|
@ -0,0 +1,71 @@
|
||||||
|
package capabilities
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
"github.com/sirupsen/logrus/hooks/test"
|
||||||
|
"github.com/syndtr/gocapability/capability"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNew(t *testing.T) {
|
||||||
|
cs := []string{"CAP_CHOWN", "CAP_UNKNOWN", "CAP_UNKNOWN2"}
|
||||||
|
conf := configs.Capabilities{
|
||||||
|
Bounding: cs,
|
||||||
|
Effective: cs,
|
||||||
|
Inheritable: cs,
|
||||||
|
Permitted: cs,
|
||||||
|
Ambient: cs,
|
||||||
|
}
|
||||||
|
|
||||||
|
hook := test.NewGlobal()
|
||||||
|
defer hook.Reset()
|
||||||
|
|
||||||
|
logrus.SetOutput(io.Discard)
|
||||||
|
caps, err := New(&conf)
|
||||||
|
logrus.SetOutput(os.Stderr)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
e := hook.AllEntries()
|
||||||
|
if len(e) != 1 {
|
||||||
|
t.Errorf("expected 1 warning, got %d", len(e))
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedLogs := logrus.Entry{
|
||||||
|
Level: logrus.WarnLevel,
|
||||||
|
Message: "ignoring unknown or unavailable capabilities: [CAP_UNKNOWN CAP_UNKNOWN2]",
|
||||||
|
}
|
||||||
|
|
||||||
|
l := hook.LastEntry()
|
||||||
|
if l == nil {
|
||||||
|
t.Fatal("expected a warning, but got none")
|
||||||
|
}
|
||||||
|
if l.Level != expectedLogs.Level {
|
||||||
|
t.Errorf("expected %q, got %q", expectedLogs.Level, l.Level)
|
||||||
|
}
|
||||||
|
if l.Message != expectedLogs.Message {
|
||||||
|
t.Errorf("expected %q, got %q", expectedLogs.Message, l.Message)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(caps.caps) != len(capTypes) {
|
||||||
|
t.Errorf("expected %d capability types, got %d: %v", len(capTypes), len(caps.caps), caps.caps)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, cType := range capTypes {
|
||||||
|
if i := len(caps.caps[cType]); i != 1 {
|
||||||
|
t.Errorf("expected 1 capability for %s, got %d: %v", cType, i, caps.caps[cType])
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if caps.caps[cType][0] != capability.CAP_CHOWN {
|
||||||
|
t.Errorf("expected CAP_CHOWN, got %s: ", caps.caps[cType][0])
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
hook.Reset()
|
||||||
|
}
|
|
@ -0,0 +1,4 @@
|
||||||
|
//go:build !linux
|
||||||
|
// +build !linux
|
||||||
|
|
||||||
|
package capabilities
|
|
@ -1,117 +0,0 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package libcontainer
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
|
||||||
"github.com/syndtr/gocapability/capability"
|
|
||||||
)
|
|
||||||
|
|
||||||
const allCapabilityTypes = capability.CAPS | capability.BOUNDS | capability.AMBS
|
|
||||||
|
|
||||||
var capabilityMap map[string]capability.Cap
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
capabilityMap = make(map[string]capability.Cap)
|
|
||||||
last := capability.CAP_LAST_CAP
|
|
||||||
// workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap
|
|
||||||
if last == capability.Cap(63) {
|
|
||||||
last = capability.CAP_BLOCK_SUSPEND
|
|
||||||
}
|
|
||||||
for _, cap := range capability.List() {
|
|
||||||
if cap > last {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
capKey := fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))
|
|
||||||
capabilityMap[capKey] = cap
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func newContainerCapList(capConfig *configs.Capabilities) (*containerCapabilities, error) {
|
|
||||||
bounding := []capability.Cap{}
|
|
||||||
for _, c := range capConfig.Bounding {
|
|
||||||
v, ok := capabilityMap[c]
|
|
||||||
if !ok {
|
|
||||||
return nil, fmt.Errorf("unknown capability %q", c)
|
|
||||||
}
|
|
||||||
bounding = append(bounding, v)
|
|
||||||
}
|
|
||||||
effective := []capability.Cap{}
|
|
||||||
for _, c := range capConfig.Effective {
|
|
||||||
v, ok := capabilityMap[c]
|
|
||||||
if !ok {
|
|
||||||
return nil, fmt.Errorf("unknown capability %q", c)
|
|
||||||
}
|
|
||||||
effective = append(effective, v)
|
|
||||||
}
|
|
||||||
inheritable := []capability.Cap{}
|
|
||||||
for _, c := range capConfig.Inheritable {
|
|
||||||
v, ok := capabilityMap[c]
|
|
||||||
if !ok {
|
|
||||||
return nil, fmt.Errorf("unknown capability %q", c)
|
|
||||||
}
|
|
||||||
inheritable = append(inheritable, v)
|
|
||||||
}
|
|
||||||
permitted := []capability.Cap{}
|
|
||||||
for _, c := range capConfig.Permitted {
|
|
||||||
v, ok := capabilityMap[c]
|
|
||||||
if !ok {
|
|
||||||
return nil, fmt.Errorf("unknown capability %q", c)
|
|
||||||
}
|
|
||||||
permitted = append(permitted, v)
|
|
||||||
}
|
|
||||||
ambient := []capability.Cap{}
|
|
||||||
for _, c := range capConfig.Ambient {
|
|
||||||
v, ok := capabilityMap[c]
|
|
||||||
if !ok {
|
|
||||||
return nil, fmt.Errorf("unknown capability %q", c)
|
|
||||||
}
|
|
||||||
ambient = append(ambient, v)
|
|
||||||
}
|
|
||||||
pid, err := capability.NewPid2(0)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
err = pid.Load()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
return &containerCapabilities{
|
|
||||||
bounding: bounding,
|
|
||||||
effective: effective,
|
|
||||||
inheritable: inheritable,
|
|
||||||
permitted: permitted,
|
|
||||||
ambient: ambient,
|
|
||||||
pid: pid,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type containerCapabilities struct {
|
|
||||||
pid capability.Capabilities
|
|
||||||
bounding []capability.Cap
|
|
||||||
effective []capability.Cap
|
|
||||||
inheritable []capability.Cap
|
|
||||||
permitted []capability.Cap
|
|
||||||
ambient []capability.Cap
|
|
||||||
}
|
|
||||||
|
|
||||||
// ApplyBoundingSet sets the capability bounding set to those specified in the whitelist.
|
|
||||||
func (c *containerCapabilities) ApplyBoundingSet() error {
|
|
||||||
c.pid.Clear(capability.BOUNDS)
|
|
||||||
c.pid.Set(capability.BOUNDS, c.bounding...)
|
|
||||||
return c.pid.Apply(capability.BOUNDS)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply sets all the capabilities for the current process in the config.
|
|
||||||
func (c *containerCapabilities) ApplyCaps() error {
|
|
||||||
c.pid.Clear(allCapabilityTypes)
|
|
||||||
c.pid.Set(capability.BOUNDS, c.bounding...)
|
|
||||||
c.pid.Set(capability.PERMITTED, c.permitted...)
|
|
||||||
c.pid.Set(capability.INHERITABLE, c.inheritable...)
|
|
||||||
c.pid.Set(capability.EFFECTIVE, c.effective...)
|
|
||||||
c.pid.Set(capability.AMBIENT, c.ambient...)
|
|
||||||
return c.pid.Apply(allCapabilityTypes)
|
|
||||||
}
|
|
|
@ -1,74 +1,59 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package cgroups
|
package cgroups
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Manager interface {
|
type Manager interface {
|
||||||
// Applies cgroup configuration to the process with the specified pid
|
// Apply creates a cgroup, if not yet created, and adds a process
|
||||||
|
// with the specified pid into that cgroup. A special value of -1
|
||||||
|
// can be used to merely create a cgroup.
|
||||||
Apply(pid int) error
|
Apply(pid int) error
|
||||||
|
|
||||||
// Returns the PIDs inside the cgroup set
|
// GetPids returns the PIDs of all processes inside the cgroup.
|
||||||
GetPids() ([]int, error)
|
GetPids() ([]int, error)
|
||||||
|
|
||||||
// Returns the PIDs inside the cgroup set & all sub-cgroups
|
// GetAllPids returns the PIDs of all processes inside the cgroup
|
||||||
|
// any all its sub-cgroups.
|
||||||
GetAllPids() ([]int, error)
|
GetAllPids() ([]int, error)
|
||||||
|
|
||||||
// Returns statistics for the cgroup set
|
// GetStats returns cgroups statistics.
|
||||||
GetStats() (*Stats, error)
|
GetStats() (*Stats, error)
|
||||||
|
|
||||||
// Toggles the freezer cgroup according with specified state
|
// Freeze sets the freezer cgroup to the specified state.
|
||||||
Freeze(state configs.FreezerState) error
|
Freeze(state configs.FreezerState) error
|
||||||
|
|
||||||
// Destroys the cgroup set
|
// Destroy removes cgroup.
|
||||||
Destroy() error
|
Destroy() error
|
||||||
|
|
||||||
// The option func SystemdCgroups() and Cgroupfs() require following attributes:
|
// Path returns a cgroup path to the specified controller/subsystem.
|
||||||
// Paths map[string]string
|
// For cgroupv2, the argument is unused and can be empty.
|
||||||
// Cgroups *configs.Cgroup
|
Path(string) string
|
||||||
// Paths maps cgroup subsystem to path at which it is mounted.
|
|
||||||
// Cgroups specifies specific cgroup settings for the various subsystems
|
|
||||||
|
|
||||||
// Returns cgroup paths to save in a state file and to be able to
|
// Set sets cgroup resources parameters/limits. If the argument is nil,
|
||||||
// restore the object later.
|
// the resources specified during Manager creation (or the previous call
|
||||||
|
// to Set) are used.
|
||||||
|
Set(r *configs.Resources) error
|
||||||
|
|
||||||
|
// GetPaths returns cgroup path(s) to save in a state file in order to
|
||||||
|
// restore later.
|
||||||
|
//
|
||||||
|
// For cgroup v1, a key is cgroup subsystem name, and the value is the
|
||||||
|
// path to the cgroup for this subsystem.
|
||||||
|
//
|
||||||
|
// For cgroup v2 unified hierarchy, a key is "", and the value is the
|
||||||
|
// unified path.
|
||||||
GetPaths() map[string]string
|
GetPaths() map[string]string
|
||||||
|
|
||||||
// GetUnifiedPath returns the unified path when running in unified mode.
|
// GetCgroups returns the cgroup data as configured.
|
||||||
// The value corresponds to the all values of GetPaths() map.
|
|
||||||
//
|
|
||||||
// GetUnifiedPath returns error when running in hybrid mode as well as
|
|
||||||
// in legacy mode.
|
|
||||||
GetUnifiedPath() (string, error)
|
|
||||||
|
|
||||||
// Sets the cgroup as configured.
|
|
||||||
Set(container *configs.Config) error
|
|
||||||
|
|
||||||
// Gets the cgroup as configured.
|
|
||||||
GetCgroups() (*configs.Cgroup, error)
|
GetCgroups() (*configs.Cgroup, error)
|
||||||
}
|
|
||||||
|
|
||||||
type NotFoundError struct {
|
// GetFreezerState retrieves the current FreezerState of the cgroup.
|
||||||
Subsystem string
|
GetFreezerState() (configs.FreezerState, error)
|
||||||
}
|
|
||||||
|
|
||||||
func (e *NotFoundError) Error() string {
|
// Exists returns whether the cgroup path exists or not.
|
||||||
return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
|
Exists() bool
|
||||||
}
|
|
||||||
|
|
||||||
func NewNotFoundError(sub string) error {
|
// OOMKillCount reports OOM kill count for the cgroup.
|
||||||
return &NotFoundError{
|
OOMKillCount() (uint64, error)
|
||||||
Subsystem: sub,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func IsNotFound(err error) bool {
|
|
||||||
if err == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
_, ok := err.(*NotFoundError)
|
|
||||||
return ok
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package cgroups
|
package cgroups
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|
|
@ -1,3 +0,0 @@
|
||||||
// +build !linux
|
|
||||||
|
|
||||||
package cgroups
|
|
|
@ -0,0 +1,386 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2020 Aleksa Sarai <cyphar@cyphar.com>
|
||||||
|
* Copyright (C) 2020 SUSE LLC
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package devices
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/devices"
|
||||||
|
)
|
||||||
|
|
||||||
|
// deviceMeta is a Rule without the Allow or Permissions fields, and no
|
||||||
|
// wildcard-type support. It's effectively the "match" portion of a metadata
|
||||||
|
// rule, for the purposes of our emulation.
|
||||||
|
type deviceMeta struct {
|
||||||
|
node devices.Type
|
||||||
|
major int64
|
||||||
|
minor int64
|
||||||
|
}
|
||||||
|
|
||||||
|
// deviceRule is effectively the tuple (deviceMeta, Permissions).
|
||||||
|
type deviceRule struct {
|
||||||
|
meta deviceMeta
|
||||||
|
perms devices.Permissions
|
||||||
|
}
|
||||||
|
|
||||||
|
// deviceRules is a mapping of device metadata rules to the associated
|
||||||
|
// permissions in the ruleset.
|
||||||
|
type deviceRules map[deviceMeta]devices.Permissions
|
||||||
|
|
||||||
|
func (r deviceRules) orderedEntries() []deviceRule {
|
||||||
|
var rules []deviceRule
|
||||||
|
for meta, perms := range r {
|
||||||
|
rules = append(rules, deviceRule{meta: meta, perms: perms})
|
||||||
|
}
|
||||||
|
sort.Slice(rules, func(i, j int) bool {
|
||||||
|
// Sort by (major, minor, type).
|
||||||
|
a, b := rules[i].meta, rules[j].meta
|
||||||
|
return a.major < b.major ||
|
||||||
|
(a.major == b.major && a.minor < b.minor) ||
|
||||||
|
(a.major == b.major && a.minor == b.minor && a.node < b.node)
|
||||||
|
})
|
||||||
|
return rules
|
||||||
|
}
|
||||||
|
|
||||||
|
type Emulator struct {
|
||||||
|
defaultAllow bool
|
||||||
|
rules deviceRules
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Emulator) IsBlacklist() bool {
|
||||||
|
return e.defaultAllow
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Emulator) IsAllowAll() bool {
|
||||||
|
return e.IsBlacklist() && len(e.rules) == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseLine(line string) (*deviceRule, error) {
|
||||||
|
// Input: node major:minor perms.
|
||||||
|
fields := strings.FieldsFunc(line, func(r rune) bool {
|
||||||
|
return r == ' ' || r == ':'
|
||||||
|
})
|
||||||
|
if len(fields) != 4 {
|
||||||
|
return nil, fmt.Errorf("malformed devices.list rule %s", line)
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
rule deviceRule
|
||||||
|
node = fields[0]
|
||||||
|
major = fields[1]
|
||||||
|
minor = fields[2]
|
||||||
|
perms = fields[3]
|
||||||
|
)
|
||||||
|
|
||||||
|
// Parse the node type.
|
||||||
|
switch node {
|
||||||
|
case "a":
|
||||||
|
// Super-special case -- "a" always means every device with every
|
||||||
|
// access mode. In fact, for devices.list this actually indicates that
|
||||||
|
// the cgroup is in black-list mode.
|
||||||
|
// TODO: Double-check that the entire file is "a *:* rwm".
|
||||||
|
return nil, nil
|
||||||
|
case "b":
|
||||||
|
rule.meta.node = devices.BlockDevice
|
||||||
|
case "c":
|
||||||
|
rule.meta.node = devices.CharDevice
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("unknown device type %q", node)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the major number.
|
||||||
|
if major == "*" {
|
||||||
|
rule.meta.major = devices.Wildcard
|
||||||
|
} else {
|
||||||
|
val, err := strconv.ParseUint(major, 10, 32)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid major number: %w", err)
|
||||||
|
}
|
||||||
|
rule.meta.major = int64(val)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the minor number.
|
||||||
|
if minor == "*" {
|
||||||
|
rule.meta.minor = devices.Wildcard
|
||||||
|
} else {
|
||||||
|
val, err := strconv.ParseUint(minor, 10, 32)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid minor number: %w", err)
|
||||||
|
}
|
||||||
|
rule.meta.minor = int64(val)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the access permissions.
|
||||||
|
rule.perms = devices.Permissions(perms)
|
||||||
|
if !rule.perms.IsValid() || rule.perms.IsEmpty() {
|
||||||
|
return nil, fmt.Errorf("parse access mode: contained unknown modes or is empty: %q", perms)
|
||||||
|
}
|
||||||
|
return &rule, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Emulator) addRule(rule deviceRule) error { //nolint:unparam
|
||||||
|
if e.rules == nil {
|
||||||
|
e.rules = make(map[deviceMeta]devices.Permissions)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merge with any pre-existing permissions.
|
||||||
|
oldPerms := e.rules[rule.meta]
|
||||||
|
newPerms := rule.perms.Union(oldPerms)
|
||||||
|
e.rules[rule.meta] = newPerms
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Emulator) rmRule(rule deviceRule) error {
|
||||||
|
// Give an error if any of the permissions requested to be removed are
|
||||||
|
// present in a partially-matching wildcard rule, because such rules will
|
||||||
|
// be ignored by cgroupv1.
|
||||||
|
//
|
||||||
|
// This is a diversion from cgroupv1, but is necessary to avoid leading
|
||||||
|
// users into a false sense of security. cgroupv1 will silently(!) ignore
|
||||||
|
// requests to remove partial exceptions, but we really shouldn't do that.
|
||||||
|
//
|
||||||
|
// It may seem like we could just "split" wildcard rules which hit this
|
||||||
|
// issue, but unfortunately there are 2^32 possible major and minor
|
||||||
|
// numbers, which would exhaust kernel memory quickly if we did this. Not
|
||||||
|
// to mention it'd be really slow (the kernel side is implemented as a
|
||||||
|
// linked-list of exceptions).
|
||||||
|
for _, partialMeta := range []deviceMeta{
|
||||||
|
{node: rule.meta.node, major: devices.Wildcard, minor: rule.meta.minor},
|
||||||
|
{node: rule.meta.node, major: rule.meta.major, minor: devices.Wildcard},
|
||||||
|
{node: rule.meta.node, major: devices.Wildcard, minor: devices.Wildcard},
|
||||||
|
} {
|
||||||
|
// This wildcard rule is equivalent to the requested rule, so skip it.
|
||||||
|
if rule.meta == partialMeta {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Only give an error if the set of permissions overlap.
|
||||||
|
partialPerms := e.rules[partialMeta]
|
||||||
|
if !partialPerms.Intersection(rule.perms).IsEmpty() {
|
||||||
|
return fmt.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Subtract all of the permissions listed from the full match rule. If the
|
||||||
|
// rule didn't exist, all of this is a no-op.
|
||||||
|
newPerms := e.rules[rule.meta].Difference(rule.perms)
|
||||||
|
if newPerms.IsEmpty() {
|
||||||
|
delete(e.rules, rule.meta)
|
||||||
|
} else {
|
||||||
|
e.rules[rule.meta] = newPerms
|
||||||
|
}
|
||||||
|
// TODO: The actual cgroup code doesn't care if an exception didn't exist
|
||||||
|
// during removal, so not erroring out here is /accurate/ but quite
|
||||||
|
// worrying. Maybe we should do additional validation, but again we
|
||||||
|
// have to worry about backwards-compatibility.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Emulator) allow(rule *deviceRule) error {
|
||||||
|
// This cgroup is configured as a black-list. Reset the entire emulator,
|
||||||
|
// and put is into black-list mode.
|
||||||
|
if rule == nil || rule.meta.node == devices.WildcardDevice {
|
||||||
|
*e = Emulator{
|
||||||
|
defaultAllow: true,
|
||||||
|
rules: nil,
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
if e.defaultAllow {
|
||||||
|
err = wrapErr(e.rmRule(*rule), "unable to remove 'deny' exception")
|
||||||
|
} else {
|
||||||
|
err = wrapErr(e.addRule(*rule), "unable to add 'allow' exception")
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Emulator) deny(rule *deviceRule) error {
|
||||||
|
// This cgroup is configured as a white-list. Reset the entire emulator,
|
||||||
|
// and put is into white-list mode.
|
||||||
|
if rule == nil || rule.meta.node == devices.WildcardDevice {
|
||||||
|
*e = Emulator{
|
||||||
|
defaultAllow: false,
|
||||||
|
rules: nil,
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
if e.defaultAllow {
|
||||||
|
err = wrapErr(e.addRule(*rule), "unable to add 'deny' exception")
|
||||||
|
} else {
|
||||||
|
err = wrapErr(e.rmRule(*rule), "unable to remove 'allow' exception")
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Emulator) Apply(rule devices.Rule) error {
|
||||||
|
if !rule.Type.CanCgroup() {
|
||||||
|
return fmt.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type)
|
||||||
|
}
|
||||||
|
|
||||||
|
innerRule := &deviceRule{
|
||||||
|
meta: deviceMeta{
|
||||||
|
node: rule.Type,
|
||||||
|
major: rule.Major,
|
||||||
|
minor: rule.Minor,
|
||||||
|
},
|
||||||
|
perms: rule.Permissions,
|
||||||
|
}
|
||||||
|
if innerRule.meta.node == devices.WildcardDevice {
|
||||||
|
innerRule = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if rule.Allow {
|
||||||
|
return e.allow(innerRule)
|
||||||
|
}
|
||||||
|
|
||||||
|
return e.deny(innerRule)
|
||||||
|
}
|
||||||
|
|
||||||
|
// EmulatorFromList takes a reader to a "devices.list"-like source, and returns
|
||||||
|
// a new Emulator that represents the state of the devices cgroup. Note that
|
||||||
|
// black-list devices cgroups cannot be fully reconstructed, due to limitations
|
||||||
|
// in the devices cgroup API. Instead, such cgroups are always treated as
|
||||||
|
// "allow all" cgroups.
|
||||||
|
func EmulatorFromList(list io.Reader) (*Emulator, error) {
|
||||||
|
// Normally cgroups are in black-list mode by default, but the way we
|
||||||
|
// figure out the current mode is whether or not devices.list has an
|
||||||
|
// allow-all rule. So we default to a white-list, and the existence of an
|
||||||
|
// "a *:* rwm" entry will tell us otherwise.
|
||||||
|
e := &Emulator{
|
||||||
|
defaultAllow: false,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the "devices.list".
|
||||||
|
s := bufio.NewScanner(list)
|
||||||
|
for s.Scan() {
|
||||||
|
line := s.Text()
|
||||||
|
deviceRule, err := parseLine(line)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("error parsing line %q: %w", line, err)
|
||||||
|
}
|
||||||
|
// "devices.list" is an allow list. Note that this means that in
|
||||||
|
// black-list mode, we have no idea what rules are in play. As a
|
||||||
|
// result, we need to be very careful in Transition().
|
||||||
|
if err := e.allow(deviceRule); err != nil {
|
||||||
|
return nil, fmt.Errorf("error adding devices.list rule: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := s.Err(); err != nil {
|
||||||
|
return nil, fmt.Errorf("error reading devices.list lines: %w", err)
|
||||||
|
}
|
||||||
|
return e, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Transition calculates what is the minimally-disruptive set of rules need to
|
||||||
|
// be applied to a devices cgroup in order to transition to the given target.
|
||||||
|
// This means that any already-existing rules will not be applied, and
|
||||||
|
// disruptive rules (like denying all device access) will only be applied if
|
||||||
|
// necessary.
|
||||||
|
//
|
||||||
|
// This function is the sole reason for all of Emulator -- to allow us
|
||||||
|
// to figure out how to update a containers' cgroups without causing spurious
|
||||||
|
// device errors (if possible).
|
||||||
|
func (source *Emulator) Transition(target *Emulator) ([]*devices.Rule, error) {
|
||||||
|
var transitionRules []*devices.Rule
|
||||||
|
oldRules := source.rules
|
||||||
|
|
||||||
|
// If the default policy doesn't match, we need to include a "disruptive"
|
||||||
|
// rule (either allow-all or deny-all) in order to switch the cgroup to the
|
||||||
|
// correct default policy.
|
||||||
|
//
|
||||||
|
// However, due to a limitation in "devices.list" we cannot be sure what
|
||||||
|
// deny rules are in place in a black-list cgroup. Thus if the source is a
|
||||||
|
// black-list we also have to include a disruptive rule.
|
||||||
|
if source.IsBlacklist() || source.defaultAllow != target.defaultAllow {
|
||||||
|
transitionRules = append(transitionRules, &devices.Rule{
|
||||||
|
Type: 'a',
|
||||||
|
Major: -1,
|
||||||
|
Minor: -1,
|
||||||
|
Permissions: devices.Permissions("rwm"),
|
||||||
|
Allow: target.defaultAllow,
|
||||||
|
})
|
||||||
|
// The old rules are only relevant if we aren't starting out with a
|
||||||
|
// disruptive rule.
|
||||||
|
oldRules = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: We traverse through the rules in a sorted order so we always write
|
||||||
|
// the same set of rules (this is to aid testing).
|
||||||
|
|
||||||
|
// First, we create inverse rules for any old rules not in the new set.
|
||||||
|
// This includes partial-inverse rules for specific permissions. This is a
|
||||||
|
// no-op if we added a disruptive rule, since oldRules will be empty.
|
||||||
|
for _, rule := range oldRules.orderedEntries() {
|
||||||
|
meta, oldPerms := rule.meta, rule.perms
|
||||||
|
newPerms := target.rules[meta]
|
||||||
|
droppedPerms := oldPerms.Difference(newPerms)
|
||||||
|
if !droppedPerms.IsEmpty() {
|
||||||
|
transitionRules = append(transitionRules, &devices.Rule{
|
||||||
|
Type: meta.node,
|
||||||
|
Major: meta.major,
|
||||||
|
Minor: meta.minor,
|
||||||
|
Permissions: droppedPerms,
|
||||||
|
Allow: target.defaultAllow,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add any additional rules which weren't in the old set. We happen to
|
||||||
|
// filter out rules which are present in both sets, though this isn't
|
||||||
|
// strictly necessary.
|
||||||
|
for _, rule := range target.rules.orderedEntries() {
|
||||||
|
meta, newPerms := rule.meta, rule.perms
|
||||||
|
oldPerms := oldRules[meta]
|
||||||
|
gainedPerms := newPerms.Difference(oldPerms)
|
||||||
|
if !gainedPerms.IsEmpty() {
|
||||||
|
transitionRules = append(transitionRules, &devices.Rule{
|
||||||
|
Type: meta.node,
|
||||||
|
Major: meta.major,
|
||||||
|
Minor: meta.minor,
|
||||||
|
Permissions: gainedPerms,
|
||||||
|
Allow: !target.defaultAllow,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return transitionRules, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rules returns the minimum set of rules necessary to convert a *deny-all*
|
||||||
|
// cgroup to the emulated filter state (note that this is not the same as a
|
||||||
|
// default cgroupv1 cgroup -- which is allow-all). This is effectively just a
|
||||||
|
// wrapper around Transition() with the source emulator being an empty cgroup.
|
||||||
|
func (e *Emulator) Rules() ([]*devices.Rule, error) {
|
||||||
|
defaultCgroup := &Emulator{defaultAllow: false}
|
||||||
|
return defaultCgroup.Transition(e)
|
||||||
|
}
|
||||||
|
|
||||||
|
func wrapErr(err error, text string) error {
|
||||||
|
if err == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return fmt.Errorf(text+": %w", err)
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
||||||
// Package devicefilter containes eBPF device filter program
|
// Package devicefilter contains eBPF device filter program
|
||||||
//
|
//
|
||||||
// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c
|
// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c
|
||||||
//
|
//
|
||||||
|
@ -7,12 +7,14 @@
|
||||||
package devicefilter
|
package devicefilter
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
"github.com/cilium/ebpf/asm"
|
"github.com/cilium/ebpf/asm"
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices"
|
||||||
"github.com/pkg/errors"
|
"github.com/opencontainers/runc/libcontainer/devices"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -22,22 +24,54 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
// DeviceFilter returns eBPF device filter program and its license string
|
// DeviceFilter returns eBPF device filter program and its license string
|
||||||
func DeviceFilter(devices []*configs.Device) (asm.Instructions, string, error) {
|
func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) {
|
||||||
p := &program{}
|
// Generate the minimum ruleset for the device rules we are given. While we
|
||||||
p.init()
|
// don't care about minimum transitions in cgroupv2, using the emulator
|
||||||
for i := len(devices) - 1; i >= 0; i-- {
|
// gives us a guarantee that the behaviour of devices filtering is the same
|
||||||
if err := p.appendDevice(devices[i]); err != nil {
|
// as cgroupv1, including security hardenings to avoid misconfiguration
|
||||||
|
// (such as punching holes in wildcard rules).
|
||||||
|
emu := new(devicesemulator.Emulator)
|
||||||
|
for _, rule := range rules {
|
||||||
|
if err := emu.Apply(*rule); err != nil {
|
||||||
return nil, "", err
|
return nil, "", err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
insts, err := p.finalize()
|
cleanRules, err := emu.Rules()
|
||||||
return insts, license, err
|
if err != nil {
|
||||||
|
return nil, "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
p := &program{
|
||||||
|
defaultAllow: emu.IsBlacklist(),
|
||||||
|
}
|
||||||
|
p.init()
|
||||||
|
|
||||||
|
for idx, rule := range cleanRules {
|
||||||
|
if rule.Type == devices.WildcardDevice {
|
||||||
|
// We can safely skip over wildcard entries because there should
|
||||||
|
// only be one (at most) at the very start to instruct cgroupv1 to
|
||||||
|
// go into allow-list mode. However we do double-check this here.
|
||||||
|
if idx != 0 || rule.Allow != emu.IsBlacklist() {
|
||||||
|
return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString())
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if rule.Allow == p.defaultAllow {
|
||||||
|
// There should be no rules which have an action equal to the
|
||||||
|
// default action, the emulator removes those.
|
||||||
|
return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString())
|
||||||
|
}
|
||||||
|
if err := p.appendRule(rule); err != nil {
|
||||||
|
return nil, "", err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return p.finalize(), license, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type program struct {
|
type program struct {
|
||||||
insts asm.Instructions
|
insts asm.Instructions
|
||||||
hasWildCard bool
|
defaultAllow bool
|
||||||
blockID int
|
blockID int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *program) init() {
|
func (p *program) init() {
|
||||||
|
@ -49,7 +83,8 @@ func (p *program) init() {
|
||||||
*/
|
*/
|
||||||
// R2 <- type (lower 16 bit of u32 access_type at R1[0])
|
// R2 <- type (lower 16 bit of u32 access_type at R1[0])
|
||||||
p.insts = append(p.insts,
|
p.insts = append(p.insts,
|
||||||
asm.LoadMem(asm.R2, asm.R1, 0, asm.Half))
|
asm.LoadMem(asm.R2, asm.R1, 0, asm.Word),
|
||||||
|
asm.And.Imm32(asm.R2, 0xFFFF))
|
||||||
|
|
||||||
// R3 <- access (upper 16 bit of u32 access_type at R1[0])
|
// R3 <- access (upper 16 bit of u32 access_type at R1[0])
|
||||||
p.insts = append(p.insts,
|
p.insts = append(p.insts,
|
||||||
|
@ -66,39 +101,35 @@ func (p *program) init() {
|
||||||
asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
|
asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
|
||||||
}
|
}
|
||||||
|
|
||||||
// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element.
|
// appendRule rule converts an OCI rule to the relevant eBPF block and adds it
|
||||||
func (p *program) appendDevice(dev *configs.Device) error {
|
// to the in-progress filter program. In order to operate properly, it must be
|
||||||
|
// called with a "clean" rule list (generated by devices.Emulator.Rules() --
|
||||||
|
// with any "a" rules removed).
|
||||||
|
func (p *program) appendRule(rule *devices.Rule) error {
|
||||||
if p.blockID < 0 {
|
if p.blockID < 0 {
|
||||||
return errors.New("the program is finalized")
|
return errors.New("the program is finalized")
|
||||||
}
|
}
|
||||||
if p.hasWildCard {
|
|
||||||
// All entries after wildcard entry are ignored
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
bpfType := int32(-1)
|
var bpfType int32
|
||||||
hasType := true
|
switch rule.Type {
|
||||||
switch dev.Type {
|
case devices.CharDevice:
|
||||||
case 'c':
|
|
||||||
bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
|
bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
|
||||||
case 'b':
|
case devices.BlockDevice:
|
||||||
bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
|
bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
|
||||||
case 'a':
|
|
||||||
hasType = false
|
|
||||||
default:
|
default:
|
||||||
// if not specified in OCI json, typ is set to DeviceTypeAll
|
// We do not permit 'a', nor any other types we don't know about.
|
||||||
return errors.Errorf("invalid DeviceType %q", string(dev.Type))
|
return fmt.Errorf("invalid type %q", string(rule.Type))
|
||||||
}
|
}
|
||||||
if dev.Major > math.MaxUint32 {
|
if rule.Major > math.MaxUint32 {
|
||||||
return errors.Errorf("invalid major %d", dev.Major)
|
return fmt.Errorf("invalid major %d", rule.Major)
|
||||||
}
|
}
|
||||||
if dev.Minor > math.MaxUint32 {
|
if rule.Minor > math.MaxUint32 {
|
||||||
return errors.Errorf("invalid minor %d", dev.Major)
|
return fmt.Errorf("invalid minor %d", rule.Major)
|
||||||
}
|
}
|
||||||
hasMajor := dev.Major >= 0 // if not specified in OCI json, major is set to -1
|
hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1
|
||||||
hasMinor := dev.Minor >= 0
|
hasMinor := rule.Minor >= 0
|
||||||
bpfAccess := int32(0)
|
bpfAccess := int32(0)
|
||||||
for _, r := range dev.Permissions {
|
for _, r := range rule.Permissions {
|
||||||
switch r {
|
switch r {
|
||||||
case 'r':
|
case 'r':
|
||||||
bpfAccess |= unix.BPF_DEVCG_ACC_READ
|
bpfAccess |= unix.BPF_DEVCG_ACC_READ
|
||||||
|
@ -107,68 +138,65 @@ func (p *program) appendDevice(dev *configs.Device) error {
|
||||||
case 'm':
|
case 'm':
|
||||||
bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
|
bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
|
||||||
default:
|
default:
|
||||||
return errors.Errorf("unknown device access %v", r)
|
return fmt.Errorf("unknown device access %v", r)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// If the access is rwm, skip the check.
|
// If the access is rwm, skip the check.
|
||||||
hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD)
|
hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD)
|
||||||
|
|
||||||
blockSym := fmt.Sprintf("block-%d", p.blockID)
|
var (
|
||||||
nextBlockSym := fmt.Sprintf("block-%d", p.blockID+1)
|
blockSym = "block-" + strconv.Itoa(p.blockID)
|
||||||
prevBlockLastIdx := len(p.insts) - 1
|
nextBlockSym = "block-" + strconv.Itoa(p.blockID+1)
|
||||||
if hasType {
|
prevBlockLastIdx = len(p.insts) - 1
|
||||||
p.insts = append(p.insts,
|
)
|
||||||
// if (R2 != bpfType) goto next
|
p.insts = append(p.insts,
|
||||||
asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
|
// if (R2 != bpfType) goto next
|
||||||
)
|
asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
|
||||||
}
|
)
|
||||||
if hasAccess {
|
if hasAccess {
|
||||||
p.insts = append(p.insts,
|
p.insts = append(p.insts,
|
||||||
// if (R3 & bpfAccess == 0 /* use R1 as a temp var */) goto next
|
// if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next
|
||||||
asm.Mov.Reg32(asm.R1, asm.R3),
|
asm.Mov.Reg32(asm.R1, asm.R3),
|
||||||
asm.And.Imm32(asm.R1, bpfAccess),
|
asm.And.Imm32(asm.R1, bpfAccess),
|
||||||
asm.JEq.Imm(asm.R1, 0, nextBlockSym),
|
asm.JNE.Reg(asm.R1, asm.R3, nextBlockSym),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
if hasMajor {
|
if hasMajor {
|
||||||
p.insts = append(p.insts,
|
p.insts = append(p.insts,
|
||||||
// if (R4 != major) goto next
|
// if (R4 != major) goto next
|
||||||
asm.JNE.Imm(asm.R4, int32(dev.Major), nextBlockSym),
|
asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
if hasMinor {
|
if hasMinor {
|
||||||
p.insts = append(p.insts,
|
p.insts = append(p.insts,
|
||||||
// if (R5 != minor) goto next
|
// if (R5 != minor) goto next
|
||||||
asm.JNE.Imm(asm.R5, int32(dev.Minor), nextBlockSym),
|
asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
if !hasType && !hasAccess && !hasMajor && !hasMinor {
|
p.insts = append(p.insts, acceptBlock(rule.Allow)...)
|
||||||
p.hasWildCard = true
|
|
||||||
}
|
|
||||||
p.insts = append(p.insts, acceptBlock(dev.Allow)...)
|
|
||||||
// set blockSym to the first instruction we added in this iteration
|
// set blockSym to the first instruction we added in this iteration
|
||||||
p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
|
p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
|
||||||
p.blockID++
|
p.blockID++
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *program) finalize() (asm.Instructions, error) {
|
func (p *program) finalize() asm.Instructions {
|
||||||
if p.hasWildCard {
|
var v int32
|
||||||
// acceptBlock with asm.Return() is already inserted
|
if p.defaultAllow {
|
||||||
return p.insts, nil
|
v = 1
|
||||||
}
|
}
|
||||||
blockSym := fmt.Sprintf("block-%d", p.blockID)
|
blockSym := "block-" + strconv.Itoa(p.blockID)
|
||||||
p.insts = append(p.insts,
|
p.insts = append(p.insts,
|
||||||
// R0 <- 0
|
// R0 <- v
|
||||||
asm.Mov.Imm32(asm.R0, 0).Sym(blockSym),
|
asm.Mov.Imm32(asm.R0, v).Sym(blockSym),
|
||||||
asm.Return(),
|
asm.Return(),
|
||||||
)
|
)
|
||||||
p.blockID = -1
|
p.blockID = -1
|
||||||
return p.insts, nil
|
return p.insts
|
||||||
}
|
}
|
||||||
|
|
||||||
func acceptBlock(accept bool) asm.Instructions {
|
func acceptBlock(accept bool) asm.Instructions {
|
||||||
v := int32(0)
|
var v int32
|
||||||
if accept {
|
if accept {
|
||||||
v = 1
|
v = 1
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,7 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/devices"
|
||||||
"github.com/opencontainers/runc/libcontainer/specconv"
|
"github.com/opencontainers/runc/libcontainer/specconv"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -20,13 +20,12 @@ func hash(s, comm string) string {
|
||||||
return strings.Join(res, "\n")
|
return strings.Join(res, "\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
func testDeviceFilter(t testing.TB, devices []*configs.Device, expectedStr string) {
|
func testDeviceFilter(t testing.TB, devices []*devices.Rule, expectedStr string) {
|
||||||
insts, _, err := DeviceFilter(devices)
|
insts, _, err := DeviceFilter(devices)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("%s: %v (devices: %+v)", t.Name(), err, devices)
|
t.Fatalf("%s: %v (devices: %+v)", t.Name(), err, devices)
|
||||||
}
|
}
|
||||||
s := insts.String()
|
s := insts.String()
|
||||||
t.Logf("%s: devices: %+v\n%s", t.Name(), devices, s)
|
|
||||||
if expectedStr != "" {
|
if expectedStr != "" {
|
||||||
hashed := hash(s, "//")
|
hashed := hash(s, "//")
|
||||||
expectedHashed := hash(expectedStr, "//")
|
expectedHashed := hash(expectedStr, "//")
|
||||||
|
@ -39,15 +38,16 @@ func testDeviceFilter(t testing.TB, devices []*configs.Device, expectedStr strin
|
||||||
func TestDeviceFilter_Nil(t *testing.T) {
|
func TestDeviceFilter_Nil(t *testing.T) {
|
||||||
expected := `
|
expected := `
|
||||||
// load parameters into registers
|
// load parameters into registers
|
||||||
0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
|
0: LdXMemW dst: r2 src: r1 off: 0 imm: 0
|
||||||
1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
1: And32Imm dst: r2 imm: 65535
|
||||||
2: RSh32Imm dst: r3 imm: 16
|
2: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||||
3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
3: RSh32Imm dst: r3 imm: 16
|
||||||
4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
4: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||||
|
5: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||||
block-0:
|
block-0:
|
||||||
// return 0 (reject)
|
// return 0 (reject)
|
||||||
5: Mov32Imm dst: r0 imm: 0
|
6: Mov32Imm dst: r0 imm: 0
|
||||||
6: Exit
|
7: Exit
|
||||||
`
|
`
|
||||||
testDeviceFilter(t, nil, expected)
|
testDeviceFilter(t, nil, expected)
|
||||||
}
|
}
|
||||||
|
@ -55,97 +55,96 @@ block-0:
|
||||||
func TestDeviceFilter_BuiltInAllowList(t *testing.T) {
|
func TestDeviceFilter_BuiltInAllowList(t *testing.T) {
|
||||||
expected := `
|
expected := `
|
||||||
// load parameters into registers
|
// load parameters into registers
|
||||||
0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
|
0: LdXMemW dst: r2 src: r1 off: 0 imm: 0
|
||||||
1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
1: And32Imm dst: r2 imm: 65535
|
||||||
2: RSh32Imm dst: r3 imm: 16
|
2: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||||
3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
3: RSh32Imm dst: r3 imm: 16
|
||||||
4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
4: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||||
|
5: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||||
block-0:
|
block-0:
|
||||||
// tuntap (c, 10, 200, rwm, allow)
|
|
||||||
5: JNEImm dst: r2 off: -1 imm: 2 <block-1>
|
|
||||||
6: JNEImm dst: r4 off: -1 imm: 10 <block-1>
|
|
||||||
7: JNEImm dst: r5 off: -1 imm: 200 <block-1>
|
|
||||||
8: Mov32Imm dst: r0 imm: 1
|
|
||||||
9: Exit
|
|
||||||
block-1:
|
|
||||||
10: JNEImm dst: r2 off: -1 imm: 2 <block-2>
|
|
||||||
11: JNEImm dst: r4 off: -1 imm: 5 <block-2>
|
|
||||||
12: JNEImm dst: r5 off: -1 imm: 2 <block-2>
|
|
||||||
13: Mov32Imm dst: r0 imm: 1
|
|
||||||
14: Exit
|
|
||||||
block-2:
|
|
||||||
// /dev/pts (c, 136, wildcard, rwm, true)
|
|
||||||
15: JNEImm dst: r2 off: -1 imm: 2 <block-3>
|
|
||||||
16: JNEImm dst: r4 off: -1 imm: 136 <block-3>
|
|
||||||
17: Mov32Imm dst: r0 imm: 1
|
|
||||||
18: Exit
|
|
||||||
block-3:
|
|
||||||
19: JNEImm dst: r2 off: -1 imm: 2 <block-4>
|
|
||||||
20: JNEImm dst: r4 off: -1 imm: 5 <block-4>
|
|
||||||
21: JNEImm dst: r5 off: -1 imm: 1 <block-4>
|
|
||||||
22: Mov32Imm dst: r0 imm: 1
|
|
||||||
23: Exit
|
|
||||||
block-4:
|
|
||||||
24: JNEImm dst: r2 off: -1 imm: 2 <block-5>
|
|
||||||
25: JNEImm dst: r4 off: -1 imm: 1 <block-5>
|
|
||||||
26: JNEImm dst: r5 off: -1 imm: 9 <block-5>
|
|
||||||
27: Mov32Imm dst: r0 imm: 1
|
|
||||||
28: Exit
|
|
||||||
block-5:
|
|
||||||
29: JNEImm dst: r2 off: -1 imm: 2 <block-6>
|
|
||||||
30: JNEImm dst: r4 off: -1 imm: 1 <block-6>
|
|
||||||
31: JNEImm dst: r5 off: -1 imm: 5 <block-6>
|
|
||||||
32: Mov32Imm dst: r0 imm: 1
|
|
||||||
33: Exit
|
|
||||||
block-6:
|
|
||||||
34: JNEImm dst: r2 off: -1 imm: 2 <block-7>
|
|
||||||
35: JNEImm dst: r4 off: -1 imm: 5 <block-7>
|
|
||||||
36: JNEImm dst: r5 off: -1 imm: 0 <block-7>
|
|
||||||
37: Mov32Imm dst: r0 imm: 1
|
|
||||||
38: Exit
|
|
||||||
block-7:
|
|
||||||
39: JNEImm dst: r2 off: -1 imm: 2 <block-8>
|
|
||||||
40: JNEImm dst: r4 off: -1 imm: 1 <block-8>
|
|
||||||
41: JNEImm dst: r5 off: -1 imm: 7 <block-8>
|
|
||||||
42: Mov32Imm dst: r0 imm: 1
|
|
||||||
43: Exit
|
|
||||||
block-8:
|
|
||||||
44: JNEImm dst: r2 off: -1 imm: 2 <block-9>
|
|
||||||
45: JNEImm dst: r4 off: -1 imm: 1 <block-9>
|
|
||||||
46: JNEImm dst: r5 off: -1 imm: 8 <block-9>
|
|
||||||
47: Mov32Imm dst: r0 imm: 1
|
|
||||||
48: Exit
|
|
||||||
block-9:
|
|
||||||
49: JNEImm dst: r2 off: -1 imm: 2 <block-10>
|
|
||||||
50: JNEImm dst: r4 off: -1 imm: 1 <block-10>
|
|
||||||
51: JNEImm dst: r5 off: -1 imm: 3 <block-10>
|
|
||||||
52: Mov32Imm dst: r0 imm: 1
|
|
||||||
53: Exit
|
|
||||||
block-10:
|
|
||||||
// (b, wildcard, wildcard, m, true)
|
// (b, wildcard, wildcard, m, true)
|
||||||
54: JNEImm dst: r2 off: -1 imm: 1 <block-11>
|
6: JNEImm dst: r2 off: -1 imm: 1 <block-1>
|
||||||
55: Mov32Reg dst: r1 src: r3
|
7: Mov32Reg dst: r1 src: r3
|
||||||
56: And32Imm dst: r1 imm: 1
|
8: And32Imm dst: r1 imm: 1
|
||||||
57: JEqImm dst: r1 off: -1 imm: 0 <block-11>
|
9: JNEReg dst: r1 off: -1 src: r3 <block-1>
|
||||||
58: Mov32Imm dst: r0 imm: 1
|
10: Mov32Imm dst: r0 imm: 1
|
||||||
59: Exit
|
11: Exit
|
||||||
block-11:
|
block-1:
|
||||||
// (c, wildcard, wildcard, m, true)
|
// (c, wildcard, wildcard, m, true)
|
||||||
60: JNEImm dst: r2 off: -1 imm: 2 <block-12>
|
12: JNEImm dst: r2 off: -1 imm: 2 <block-2>
|
||||||
61: Mov32Reg dst: r1 src: r3
|
13: Mov32Reg dst: r1 src: r3
|
||||||
62: And32Imm dst: r1 imm: 1
|
14: And32Imm dst: r1 imm: 1
|
||||||
63: JEqImm dst: r1 off: -1 imm: 0 <block-12>
|
15: JNEReg dst: r1 off: -1 src: r3 <block-2>
|
||||||
64: Mov32Imm dst: r0 imm: 1
|
16: Mov32Imm dst: r0 imm: 1
|
||||||
65: Exit
|
17: Exit
|
||||||
block-12:
|
block-2:
|
||||||
66: Mov32Imm dst: r0 imm: 0
|
18: JNEImm dst: r2 off: -1 imm: 2 <block-3>
|
||||||
67: Exit
|
19: JNEImm dst: r4 off: -1 imm: 1 <block-3>
|
||||||
|
20: JNEImm dst: r5 off: -1 imm: 3 <block-3>
|
||||||
|
21: Mov32Imm dst: r0 imm: 1
|
||||||
|
22: Exit
|
||||||
|
block-3:
|
||||||
|
23: JNEImm dst: r2 off: -1 imm: 2 <block-4>
|
||||||
|
24: JNEImm dst: r4 off: -1 imm: 1 <block-4>
|
||||||
|
25: JNEImm dst: r5 off: -1 imm: 5 <block-4>
|
||||||
|
26: Mov32Imm dst: r0 imm: 1
|
||||||
|
27: Exit
|
||||||
|
block-4:
|
||||||
|
28: JNEImm dst: r2 off: -1 imm: 2 <block-5>
|
||||||
|
29: JNEImm dst: r4 off: -1 imm: 1 <block-5>
|
||||||
|
30: JNEImm dst: r5 off: -1 imm: 7 <block-5>
|
||||||
|
31: Mov32Imm dst: r0 imm: 1
|
||||||
|
32: Exit
|
||||||
|
block-5:
|
||||||
|
33: JNEImm dst: r2 off: -1 imm: 2 <block-6>
|
||||||
|
34: JNEImm dst: r4 off: -1 imm: 1 <block-6>
|
||||||
|
35: JNEImm dst: r5 off: -1 imm: 8 <block-6>
|
||||||
|
36: Mov32Imm dst: r0 imm: 1
|
||||||
|
37: Exit
|
||||||
|
block-6:
|
||||||
|
38: JNEImm dst: r2 off: -1 imm: 2 <block-7>
|
||||||
|
39: JNEImm dst: r4 off: -1 imm: 1 <block-7>
|
||||||
|
40: JNEImm dst: r5 off: -1 imm: 9 <block-7>
|
||||||
|
41: Mov32Imm dst: r0 imm: 1
|
||||||
|
42: Exit
|
||||||
|
block-7:
|
||||||
|
43: JNEImm dst: r2 off: -1 imm: 2 <block-8>
|
||||||
|
44: JNEImm dst: r4 off: -1 imm: 5 <block-8>
|
||||||
|
45: JNEImm dst: r5 off: -1 imm: 0 <block-8>
|
||||||
|
46: Mov32Imm dst: r0 imm: 1
|
||||||
|
47: Exit
|
||||||
|
block-8:
|
||||||
|
48: JNEImm dst: r2 off: -1 imm: 2 <block-9>
|
||||||
|
49: JNEImm dst: r4 off: -1 imm: 5 <block-9>
|
||||||
|
50: JNEImm dst: r5 off: -1 imm: 2 <block-9>
|
||||||
|
51: Mov32Imm dst: r0 imm: 1
|
||||||
|
52: Exit
|
||||||
|
block-9:
|
||||||
|
// tuntap (c, 10, 200, rwm, allow)
|
||||||
|
53: JNEImm dst: r2 off: -1 imm: 2 <block-10>
|
||||||
|
54: JNEImm dst: r4 off: -1 imm: 10 <block-10>
|
||||||
|
55: JNEImm dst: r5 off: -1 imm: 200 <block-10>
|
||||||
|
56: Mov32Imm dst: r0 imm: 1
|
||||||
|
57: Exit
|
||||||
|
block-10:
|
||||||
|
// /dev/pts (c, 136, wildcard, rwm, true)
|
||||||
|
58: JNEImm dst: r2 off: -1 imm: 2 <block-11>
|
||||||
|
59: JNEImm dst: r4 off: -1 imm: 136 <block-11>
|
||||||
|
60: Mov32Imm dst: r0 imm: 1
|
||||||
|
61: Exit
|
||||||
|
block-11:
|
||||||
|
62: Mov32Imm dst: r0 imm: 0
|
||||||
|
63: Exit
|
||||||
`
|
`
|
||||||
testDeviceFilter(t, specconv.AllowedDevices, expected)
|
var devices []*devices.Rule
|
||||||
|
for _, device := range specconv.AllowedDevices {
|
||||||
|
devices = append(devices, &device.Rule)
|
||||||
|
}
|
||||||
|
testDeviceFilter(t, devices, expected)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDeviceFilter_Privileged(t *testing.T) {
|
func TestDeviceFilter_Privileged(t *testing.T) {
|
||||||
devices := []*configs.Device{
|
devices := []*devices.Rule{
|
||||||
{
|
{
|
||||||
Type: 'a',
|
Type: 'a',
|
||||||
Major: -1,
|
Major: -1,
|
||||||
|
@ -157,21 +156,22 @@ func TestDeviceFilter_Privileged(t *testing.T) {
|
||||||
expected :=
|
expected :=
|
||||||
`
|
`
|
||||||
// load parameters into registers
|
// load parameters into registers
|
||||||
0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
|
0: LdXMemW dst: r2 src: r1 off: 0 imm: 0
|
||||||
1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
1: And32Imm dst: r2 imm: 65535
|
||||||
2: RSh32Imm dst: r3 imm: 16
|
2: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||||
3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
3: RSh32Imm dst: r3 imm: 16
|
||||||
4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
4: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||||
|
5: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||||
block-0:
|
block-0:
|
||||||
// return 1 (accept)
|
// return 1 (accept)
|
||||||
5: Mov32Imm dst: r0 imm: 1
|
6: Mov32Imm dst: r0 imm: 1
|
||||||
6: Exit
|
7: Exit
|
||||||
`
|
`
|
||||||
testDeviceFilter(t, devices, expected)
|
testDeviceFilter(t, devices, expected)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDeviceFilter_PrivilegedExceptSingleDevice(t *testing.T) {
|
func TestDeviceFilter_PrivilegedExceptSingleDevice(t *testing.T) {
|
||||||
devices := []*configs.Device{
|
devices := []*devices.Rule{
|
||||||
{
|
{
|
||||||
Type: 'a',
|
Type: 'a',
|
||||||
Major: -1,
|
Major: -1,
|
||||||
|
@ -189,28 +189,29 @@ func TestDeviceFilter_PrivilegedExceptSingleDevice(t *testing.T) {
|
||||||
}
|
}
|
||||||
expected := `
|
expected := `
|
||||||
// load parameters into registers
|
// load parameters into registers
|
||||||
0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
|
0: LdXMemW dst: r2 src: r1 off: 0 imm: 0
|
||||||
1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
1: And32Imm dst: r2 imm: 65535
|
||||||
2: RSh32Imm dst: r3 imm: 16
|
2: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||||
3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
3: RSh32Imm dst: r3 imm: 16
|
||||||
4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
4: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||||
|
5: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||||
block-0:
|
block-0:
|
||||||
// return 0 (reject) if type==b && major == 8 && minor == 0
|
// return 0 (reject) if type==b && major == 8 && minor == 0
|
||||||
5: JNEImm dst: r2 off: -1 imm: 1 <block-1>
|
6: JNEImm dst: r2 off: -1 imm: 1 <block-1>
|
||||||
6: JNEImm dst: r4 off: -1 imm: 8 <block-1>
|
7: JNEImm dst: r4 off: -1 imm: 8 <block-1>
|
||||||
7: JNEImm dst: r5 off: -1 imm: 0 <block-1>
|
8: JNEImm dst: r5 off: -1 imm: 0 <block-1>
|
||||||
8: Mov32Imm dst: r0 imm: 0
|
9: Mov32Imm dst: r0 imm: 0
|
||||||
9: Exit
|
10: Exit
|
||||||
block-1:
|
block-1:
|
||||||
// return 1 (accept)
|
// return 1 (accept)
|
||||||
10: Mov32Imm dst: r0 imm: 1
|
11: Mov32Imm dst: r0 imm: 1
|
||||||
11: Exit
|
12: Exit
|
||||||
`
|
`
|
||||||
testDeviceFilter(t, devices, expected)
|
testDeviceFilter(t, devices, expected)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDeviceFilter_Weird(t *testing.T) {
|
func TestDeviceFilter_Weird(t *testing.T) {
|
||||||
devices := []*configs.Device{
|
devices := []*devices.Rule{
|
||||||
{
|
{
|
||||||
Type: 'b',
|
Type: 'b',
|
||||||
Major: 8,
|
Major: 8,
|
||||||
|
@ -237,22 +238,23 @@ func TestDeviceFilter_Weird(t *testing.T) {
|
||||||
// This conforms to runc v1.0.0-rc.9 (cgroup1) behavior.
|
// This conforms to runc v1.0.0-rc.9 (cgroup1) behavior.
|
||||||
expected := `
|
expected := `
|
||||||
// load parameters into registers
|
// load parameters into registers
|
||||||
0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
|
0: LdXMemW dst: r2 src: r1 off: 0 imm: 0
|
||||||
1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
1: And32Imm dst: r2 imm: 65535
|
||||||
2: RSh32Imm dst: r3 imm: 16
|
2: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||||
3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
3: RSh32Imm dst: r3 imm: 16
|
||||||
4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
4: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||||
|
5: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||||
block-0:
|
block-0:
|
||||||
// return 0 (reject) if type==b && major == 8 && minor == 2
|
// return 0 (reject) if type==b && major == 8 && minor == 2
|
||||||
5: JNEImm dst: r2 off: -1 imm: 1 <block-1>
|
6: JNEImm dst: r2 off: -1 imm: 1 <block-1>
|
||||||
6: JNEImm dst: r4 off: -1 imm: 8 <block-1>
|
7: JNEImm dst: r4 off: -1 imm: 8 <block-1>
|
||||||
7: JNEImm dst: r5 off: -1 imm: 2 <block-1>
|
8: JNEImm dst: r5 off: -1 imm: 2 <block-1>
|
||||||
8: Mov32Imm dst: r0 imm: 0
|
9: Mov32Imm dst: r0 imm: 0
|
||||||
9: Exit
|
10: Exit
|
||||||
block-1:
|
block-1:
|
||||||
// return 1 (accept)
|
// return 1 (accept)
|
||||||
10: Mov32Imm dst: r0 imm: 1
|
11: Mov32Imm dst: r0 imm: 1
|
||||||
11: Exit
|
12: Exit
|
||||||
`
|
`
|
||||||
testDeviceFilter(t, devices, expected)
|
testDeviceFilter(t, devices, expected)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,45 +0,0 @@
|
||||||
package ebpf
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/cilium/ebpf"
|
|
||||||
"github.com/cilium/ebpf/asm"
|
|
||||||
"github.com/pkg/errors"
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
|
|
||||||
//
|
|
||||||
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
|
|
||||||
//
|
|
||||||
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
|
|
||||||
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD int) (func() error, error) {
|
|
||||||
nilCloser := func() error {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
|
|
||||||
// This limit is not inherited into the container.
|
|
||||||
memlockLimit := &unix.Rlimit{
|
|
||||||
Cur: unix.RLIM_INFINITY,
|
|
||||||
Max: unix.RLIM_INFINITY,
|
|
||||||
}
|
|
||||||
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
|
|
||||||
spec := &ebpf.ProgramSpec{
|
|
||||||
Type: ebpf.CGroupDevice,
|
|
||||||
Instructions: insts,
|
|
||||||
License: license,
|
|
||||||
}
|
|
||||||
prog, err := ebpf.NewProgram(spec)
|
|
||||||
if err != nil {
|
|
||||||
return nilCloser, err
|
|
||||||
}
|
|
||||||
if err := prog.Attach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
|
|
||||||
return nilCloser, errors.Wrap(err, "failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
|
|
||||||
}
|
|
||||||
closer := func() error {
|
|
||||||
if err := prog.Detach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
|
|
||||||
return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return closer, nil
|
|
||||||
}
|
|
|
@ -0,0 +1,253 @@
|
||||||
|
package ebpf
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"runtime"
|
||||||
|
"sync"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/cilium/ebpf"
|
||||||
|
"github.com/cilium/ebpf/asm"
|
||||||
|
"github.com/cilium/ebpf/link"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
func nilCloser() error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func findAttachedCgroupDeviceFilters(dirFd int) ([]*ebpf.Program, error) {
|
||||||
|
type bpfAttrQuery struct {
|
||||||
|
TargetFd uint32
|
||||||
|
AttachType uint32
|
||||||
|
QueryType uint32
|
||||||
|
AttachFlags uint32
|
||||||
|
ProgIds uint64 // __aligned_u64
|
||||||
|
ProgCnt uint32
|
||||||
|
}
|
||||||
|
|
||||||
|
// Currently you can only have 64 eBPF programs attached to a cgroup.
|
||||||
|
size := 64
|
||||||
|
retries := 0
|
||||||
|
for retries < 10 {
|
||||||
|
progIds := make([]uint32, size)
|
||||||
|
query := bpfAttrQuery{
|
||||||
|
TargetFd: uint32(dirFd),
|
||||||
|
AttachType: uint32(unix.BPF_CGROUP_DEVICE),
|
||||||
|
ProgIds: uint64(uintptr(unsafe.Pointer(&progIds[0]))),
|
||||||
|
ProgCnt: uint32(len(progIds)),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch the list of program ids.
|
||||||
|
_, _, errno := unix.Syscall(unix.SYS_BPF,
|
||||||
|
uintptr(unix.BPF_PROG_QUERY),
|
||||||
|
uintptr(unsafe.Pointer(&query)),
|
||||||
|
unsafe.Sizeof(query))
|
||||||
|
size = int(query.ProgCnt)
|
||||||
|
runtime.KeepAlive(query)
|
||||||
|
if errno != 0 {
|
||||||
|
// On ENOSPC we get the correct number of programs.
|
||||||
|
if errno == unix.ENOSPC {
|
||||||
|
retries++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("bpf_prog_query(BPF_CGROUP_DEVICE) failed: %w", errno)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert the ids to program handles.
|
||||||
|
progIds = progIds[:size]
|
||||||
|
programs := make([]*ebpf.Program, 0, len(progIds))
|
||||||
|
for _, progId := range progIds {
|
||||||
|
program, err := ebpf.NewProgramFromID(ebpf.ProgramID(progId))
|
||||||
|
if err != nil {
|
||||||
|
// We skip over programs that give us -EACCES or -EPERM. This
|
||||||
|
// is necessary because there may be BPF programs that have
|
||||||
|
// been attached (such as with --systemd-cgroup) which have an
|
||||||
|
// LSM label that blocks us from interacting with the program.
|
||||||
|
//
|
||||||
|
// Because additional BPF_CGROUP_DEVICE programs only can add
|
||||||
|
// restrictions, there's no real issue with just ignoring these
|
||||||
|
// programs (and stops runc from breaking on distributions with
|
||||||
|
// very strict SELinux policies).
|
||||||
|
if errors.Is(err, os.ErrPermission) {
|
||||||
|
logrus.Debugf("ignoring existing CGROUP_DEVICE program (prog_id=%v) which cannot be accessed by runc -- likely due to LSM policy: %v", progId, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("cannot fetch program from id: %w", err)
|
||||||
|
}
|
||||||
|
programs = append(programs, program)
|
||||||
|
}
|
||||||
|
runtime.KeepAlive(progIds)
|
||||||
|
return programs, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, errors.New("could not get complete list of CGROUP_DEVICE programs")
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
haveBpfProgReplaceBool bool
|
||||||
|
haveBpfProgReplaceOnce sync.Once
|
||||||
|
)
|
||||||
|
|
||||||
|
// Loosely based on the BPF_F_REPLACE support check in
|
||||||
|
// <https://github.com/cilium/ebpf/blob/v0.6.0/link/syscalls.go>.
|
||||||
|
//
|
||||||
|
// TODO: move this logic to cilium/ebpf
|
||||||
|
func haveBpfProgReplace() bool {
|
||||||
|
haveBpfProgReplaceOnce.Do(func() {
|
||||||
|
prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
|
||||||
|
Type: ebpf.CGroupDevice,
|
||||||
|
License: "MIT",
|
||||||
|
Instructions: asm.Instructions{
|
||||||
|
asm.Mov.Imm(asm.R0, 0),
|
||||||
|
asm.Return(),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
logrus.Debugf("checking for BPF_F_REPLACE support: ebpf.NewProgram failed: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer prog.Close()
|
||||||
|
|
||||||
|
devnull, err := os.Open("/dev/null")
|
||||||
|
if err != nil {
|
||||||
|
logrus.Debugf("checking for BPF_F_REPLACE support: open dummy target fd: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer devnull.Close()
|
||||||
|
|
||||||
|
// We know that we have BPF_PROG_ATTACH since we can load
|
||||||
|
// BPF_CGROUP_DEVICE programs. If passing BPF_F_REPLACE gives us EINVAL
|
||||||
|
// we know that the feature isn't present.
|
||||||
|
err = link.RawAttachProgram(link.RawAttachProgramOptions{
|
||||||
|
// We rely on this fd being checked after attachFlags.
|
||||||
|
Target: int(devnull.Fd()),
|
||||||
|
// Attempt to "replace" bad fds with this program.
|
||||||
|
Program: prog,
|
||||||
|
Attach: ebpf.AttachCGroupDevice,
|
||||||
|
Flags: unix.BPF_F_ALLOW_MULTI | unix.BPF_F_REPLACE,
|
||||||
|
})
|
||||||
|
if errors.Is(err, unix.EINVAL) {
|
||||||
|
// not supported
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// attach_flags test succeeded.
|
||||||
|
if !errors.Is(err, unix.EBADF) {
|
||||||
|
logrus.Debugf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err)
|
||||||
|
}
|
||||||
|
haveBpfProgReplaceBool = true
|
||||||
|
})
|
||||||
|
return haveBpfProgReplaceBool
|
||||||
|
}
|
||||||
|
|
||||||
|
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
|
||||||
|
//
|
||||||
|
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
|
||||||
|
//
|
||||||
|
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
|
||||||
|
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd int) (func() error, error) {
|
||||||
|
// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
|
||||||
|
// This limit is not inherited into the container.
|
||||||
|
memlockLimit := &unix.Rlimit{
|
||||||
|
Cur: unix.RLIM_INFINITY,
|
||||||
|
Max: unix.RLIM_INFINITY,
|
||||||
|
}
|
||||||
|
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
|
||||||
|
|
||||||
|
// Get the list of existing programs.
|
||||||
|
oldProgs, err := findAttachedCgroupDeviceFilters(dirFd)
|
||||||
|
if err != nil {
|
||||||
|
return nilCloser, err
|
||||||
|
}
|
||||||
|
useReplaceProg := haveBpfProgReplace() && len(oldProgs) == 1
|
||||||
|
|
||||||
|
// Generate new program.
|
||||||
|
spec := &ebpf.ProgramSpec{
|
||||||
|
Type: ebpf.CGroupDevice,
|
||||||
|
Instructions: insts,
|
||||||
|
License: license,
|
||||||
|
}
|
||||||
|
prog, err := ebpf.NewProgram(spec)
|
||||||
|
if err != nil {
|
||||||
|
return nilCloser, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// If there is only one old program, we can just replace it directly.
|
||||||
|
var (
|
||||||
|
replaceProg *ebpf.Program
|
||||||
|
attachFlags uint32 = unix.BPF_F_ALLOW_MULTI
|
||||||
|
)
|
||||||
|
if useReplaceProg {
|
||||||
|
replaceProg = oldProgs[0]
|
||||||
|
attachFlags |= unix.BPF_F_REPLACE
|
||||||
|
}
|
||||||
|
err = link.RawAttachProgram(link.RawAttachProgramOptions{
|
||||||
|
Target: dirFd,
|
||||||
|
Program: prog,
|
||||||
|
Replace: replaceProg,
|
||||||
|
Attach: ebpf.AttachCGroupDevice,
|
||||||
|
Flags: attachFlags,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err)
|
||||||
|
}
|
||||||
|
closer := func() error {
|
||||||
|
err = link.RawDetachProgram(link.RawDetachProgramOptions{
|
||||||
|
Target: dirFd,
|
||||||
|
Program: prog,
|
||||||
|
Attach: ebpf.AttachCGroupDevice,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err)
|
||||||
|
}
|
||||||
|
// TODO: Should we attach the old filters back in this case? Otherwise
|
||||||
|
// we fail-open on a security feature, which is a bit scary.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if !useReplaceProg {
|
||||||
|
logLevel := logrus.DebugLevel
|
||||||
|
// If there was more than one old program, give a warning (since this
|
||||||
|
// really shouldn't happen with runc-managed cgroups) and then detach
|
||||||
|
// all the old programs.
|
||||||
|
if len(oldProgs) > 1 {
|
||||||
|
// NOTE: Ideally this should be a warning but it turns out that
|
||||||
|
// systemd-managed cgroups trigger this warning (apparently
|
||||||
|
// systemd doesn't delete old non-systemd programs when
|
||||||
|
// setting properties).
|
||||||
|
logrus.Infof("found more than one filter (%d) attached to a cgroup -- removing extra filters!", len(oldProgs))
|
||||||
|
logLevel = logrus.InfoLevel
|
||||||
|
}
|
||||||
|
for idx, oldProg := range oldProgs {
|
||||||
|
// Output some extra debug info.
|
||||||
|
if info, err := oldProg.Info(); err == nil {
|
||||||
|
fields := logrus.Fields{
|
||||||
|
"type": info.Type.String(),
|
||||||
|
"tag": info.Tag,
|
||||||
|
"name": info.Name,
|
||||||
|
}
|
||||||
|
if id, ok := info.ID(); ok {
|
||||||
|
fields["id"] = id
|
||||||
|
}
|
||||||
|
if runCount, ok := info.RunCount(); ok {
|
||||||
|
fields["run_count"] = runCount
|
||||||
|
}
|
||||||
|
if runtime, ok := info.Runtime(); ok {
|
||||||
|
fields["runtime"] = runtime.String()
|
||||||
|
}
|
||||||
|
logrus.WithFields(fields).Logf(logLevel, "removing old filter %d from cgroup", idx)
|
||||||
|
}
|
||||||
|
err = link.RawDetachProgram(link.RawDetachProgramOptions{
|
||||||
|
Target: dirFd,
|
||||||
|
Program: oldProg,
|
||||||
|
Attach: ebpf.AttachCGroupDevice,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return closer, fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE) on old filter program: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return closer, nil
|
||||||
|
}
|
|
@ -0,0 +1,190 @@
|
||||||
|
package cgroups
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
// OpenFile opens a cgroup file in a given dir with given flags.
|
||||||
|
// It is supposed to be used for cgroup files only, and returns
|
||||||
|
// an error if the file is not a cgroup file.
|
||||||
|
//
|
||||||
|
// Arguments dir and file are joined together to form an absolute path
|
||||||
|
// to a file being opened.
|
||||||
|
func OpenFile(dir, file string, flags int) (*os.File, error) {
|
||||||
|
if dir == "" {
|
||||||
|
return nil, fmt.Errorf("no directory specified for %s", file)
|
||||||
|
}
|
||||||
|
return openFile(dir, file, flags)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReadFile reads data from a cgroup file in dir.
|
||||||
|
// It is supposed to be used for cgroup files only.
|
||||||
|
func ReadFile(dir, file string) (string, error) {
|
||||||
|
fd, err := OpenFile(dir, file, unix.O_RDONLY)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
defer fd.Close()
|
||||||
|
var buf bytes.Buffer
|
||||||
|
|
||||||
|
_, err = buf.ReadFrom(fd)
|
||||||
|
return buf.String(), err
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteFile writes data to a cgroup file in dir.
|
||||||
|
// It is supposed to be used for cgroup files only.
|
||||||
|
func WriteFile(dir, file, data string) error {
|
||||||
|
fd, err := OpenFile(dir, file, unix.O_WRONLY)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer fd.Close()
|
||||||
|
if err := retryingWriteFile(fd, data); err != nil {
|
||||||
|
// Having data in the error message helps in debugging.
|
||||||
|
return fmt.Errorf("failed to write %q: %w", data, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func retryingWriteFile(fd *os.File, data string) error {
|
||||||
|
for {
|
||||||
|
_, err := fd.Write([]byte(data))
|
||||||
|
if errors.Is(err, unix.EINTR) {
|
||||||
|
logrus.Infof("interrupted while writing %s to %s", data, fd.Name())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
cgroupfsDir = "/sys/fs/cgroup"
|
||||||
|
cgroupfsPrefix = cgroupfsDir + "/"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
// TestMode is set to true by unit tests that need "fake" cgroupfs.
|
||||||
|
TestMode bool
|
||||||
|
|
||||||
|
cgroupFd int = -1
|
||||||
|
prepOnce sync.Once
|
||||||
|
prepErr error
|
||||||
|
resolveFlags uint64
|
||||||
|
)
|
||||||
|
|
||||||
|
func prepareOpenat2() error {
|
||||||
|
prepOnce.Do(func() {
|
||||||
|
fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
|
||||||
|
Flags: unix.O_DIRECTORY | unix.O_PATH,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
|
||||||
|
if err != unix.ENOSYS { //nolint:errorlint // unix errors are bare
|
||||||
|
logrus.Warnf("falling back to securejoin: %s", prepErr)
|
||||||
|
} else {
|
||||||
|
logrus.Debug("openat2 not available, falling back to securejoin")
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var st unix.Statfs_t
|
||||||
|
if err = unix.Fstatfs(fd, &st); err != nil {
|
||||||
|
prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err}
|
||||||
|
logrus.Warnf("falling back to securejoin: %s", prepErr)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
cgroupFd = fd
|
||||||
|
|
||||||
|
resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS
|
||||||
|
if st.Type == unix.CGROUP2_SUPER_MAGIC {
|
||||||
|
// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
|
||||||
|
resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
return prepErr
|
||||||
|
}
|
||||||
|
|
||||||
|
func openFile(dir, file string, flags int) (*os.File, error) {
|
||||||
|
mode := os.FileMode(0)
|
||||||
|
if TestMode && flags&os.O_WRONLY != 0 {
|
||||||
|
// "emulate" cgroup fs for unit tests
|
||||||
|
flags |= os.O_TRUNC | os.O_CREATE
|
||||||
|
mode = 0o600
|
||||||
|
}
|
||||||
|
path := path.Join(dir, file)
|
||||||
|
if prepareOpenat2() != nil {
|
||||||
|
return openFallback(path, flags, mode)
|
||||||
|
}
|
||||||
|
relPath := strings.TrimPrefix(path, cgroupfsPrefix)
|
||||||
|
if len(relPath) == len(path) { // non-standard path, old system?
|
||||||
|
return openFallback(path, flags, mode)
|
||||||
|
}
|
||||||
|
|
||||||
|
fd, err := unix.Openat2(cgroupFd, relPath,
|
||||||
|
&unix.OpenHow{
|
||||||
|
Resolve: resolveFlags,
|
||||||
|
Flags: uint64(flags) | unix.O_CLOEXEC,
|
||||||
|
Mode: uint64(mode),
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
err = &os.PathError{Op: "openat2", Path: path, Err: err}
|
||||||
|
// Check if cgroupFd is still opened to cgroupfsDir
|
||||||
|
// (happens when this package is incorrectly used
|
||||||
|
// across the chroot/pivot_root/mntns boundary, or
|
||||||
|
// when /sys/fs/cgroup is remounted).
|
||||||
|
//
|
||||||
|
// TODO: if such usage will ever be common, amend this
|
||||||
|
// to reopen cgroupFd and retry openat2.
|
||||||
|
fdStr := strconv.Itoa(cgroupFd)
|
||||||
|
fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr)
|
||||||
|
if fdDest != cgroupfsDir {
|
||||||
|
// Wrap the error so it is clear that cgroupFd
|
||||||
|
// is opened to an unexpected/wrong directory.
|
||||||
|
err = fmt.Errorf("cgroupFd %s unexpectedly opened to %s != %s: %w",
|
||||||
|
fdStr, fdDest, cgroupfsDir, err)
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return os.NewFile(uintptr(fd), path), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var errNotCgroupfs = errors.New("not a cgroup file")
|
||||||
|
|
||||||
|
// Can be changed by unit tests.
|
||||||
|
var openFallback = openAndCheck
|
||||||
|
|
||||||
|
// openAndCheck is used when openat2(2) is not available. It checks the opened
|
||||||
|
// file is on cgroupfs, returning an error otherwise.
|
||||||
|
func openAndCheck(path string, flags int, mode os.FileMode) (*os.File, error) {
|
||||||
|
fd, err := os.OpenFile(path, flags, mode)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if TestMode {
|
||||||
|
return fd, nil
|
||||||
|
}
|
||||||
|
// Check this is a cgroupfs file.
|
||||||
|
var st unix.Statfs_t
|
||||||
|
if err := unix.Fstatfs(int(fd.Fd()), &st); err != nil {
|
||||||
|
_ = fd.Close()
|
||||||
|
return nil, &os.PathError{Op: "statfs", Path: path, Err: err}
|
||||||
|
}
|
||||||
|
if st.Type != unix.CGROUP_SUPER_MAGIC && st.Type != unix.CGROUP2_SUPER_MAGIC {
|
||||||
|
_ = fd.Close()
|
||||||
|
return nil, &os.PathError{Op: "open", Path: path, Err: errNotCgroupfs}
|
||||||
|
}
|
||||||
|
|
||||||
|
return fd, nil
|
||||||
|
}
|
|
@ -0,0 +1,73 @@
|
||||||
|
package cgroups
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestWriteCgroupFileHandlesInterrupt(t *testing.T) {
|
||||||
|
const (
|
||||||
|
memoryCgroupMount = "/sys/fs/cgroup/memory"
|
||||||
|
memoryLimit = "memory.limit_in_bytes"
|
||||||
|
)
|
||||||
|
if _, err := os.Stat(memoryCgroupMount); err != nil {
|
||||||
|
// most probably cgroupv2
|
||||||
|
t.Skip(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cgroupName := fmt.Sprintf("test-eint-%d", time.Now().Nanosecond())
|
||||||
|
cgroupPath := filepath.Join(memoryCgroupMount, cgroupName)
|
||||||
|
if err := os.MkdirAll(cgroupPath, 0o755); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(cgroupPath)
|
||||||
|
|
||||||
|
if _, err := os.Stat(filepath.Join(cgroupPath, memoryLimit)); err != nil {
|
||||||
|
// either cgroupv2, or memory controller is not available
|
||||||
|
t.Skip(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < 100000; i++ {
|
||||||
|
limit := 1024*1024 + i
|
||||||
|
if err := WriteFile(cgroupPath, memoryLimit, strconv.Itoa(limit)); err != nil {
|
||||||
|
t.Fatalf("Failed to write %d on attempt %d: %+v", limit, i, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOpenat2(t *testing.T) {
|
||||||
|
if !IsCgroup2UnifiedMode() {
|
||||||
|
// The reason is many test cases below test opening files from
|
||||||
|
// the top-level directory, where cgroup v1 has no files.
|
||||||
|
t.Skip("test requires cgroup v2")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure we test openat2, not its fallback.
|
||||||
|
openFallback = func(_ string, _ int, _ os.FileMode) (*os.File, error) {
|
||||||
|
return nil, errors.New("fallback")
|
||||||
|
}
|
||||||
|
defer func() { openFallback = openAndCheck }()
|
||||||
|
|
||||||
|
for _, tc := range []struct{ dir, file string }{
|
||||||
|
{"/sys/fs/cgroup", "cgroup.controllers"},
|
||||||
|
{"/sys/fs/cgroup", "/cgroup.controllers"},
|
||||||
|
{"/sys/fs/cgroup/", "cgroup.controllers"},
|
||||||
|
{"/sys/fs/cgroup/", "/cgroup.controllers"},
|
||||||
|
{"/sys/fs/cgroup/user.slice", "cgroup.controllers"},
|
||||||
|
{"/sys/fs/cgroup/user.slice/", "/cgroup.controllers"},
|
||||||
|
{"/", "/sys/fs/cgroup/cgroup.controllers"},
|
||||||
|
{"/", "sys/fs/cgroup/cgroup.controllers"},
|
||||||
|
{"/sys/fs/cgroup/cgroup.controllers", ""},
|
||||||
|
} {
|
||||||
|
fd, err := OpenFile(tc.dir, tc.file, os.O_RDONLY)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("case %+v: %v", tc, err)
|
||||||
|
}
|
||||||
|
fd.Close()
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,411 +0,0 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"sync"
|
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
|
||||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
|
||||||
"github.com/pkg/errors"
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
subsystemsLegacy = subsystemSet{
|
|
||||||
&CpusetGroup{},
|
|
||||||
&DevicesGroup{},
|
|
||||||
&MemoryGroup{},
|
|
||||||
&CpuGroup{},
|
|
||||||
&CpuacctGroup{},
|
|
||||||
&PidsGroup{},
|
|
||||||
&BlkioGroup{},
|
|
||||||
&HugetlbGroup{},
|
|
||||||
&NetClsGroup{},
|
|
||||||
&NetPrioGroup{},
|
|
||||||
&PerfEventGroup{},
|
|
||||||
&FreezerGroup{},
|
|
||||||
&NameGroup{GroupName: "name=systemd", Join: true},
|
|
||||||
}
|
|
||||||
HugePageSizes, _ = cgroups.GetHugePageSize()
|
|
||||||
)
|
|
||||||
|
|
||||||
var errSubsystemDoesNotExist = fmt.Errorf("cgroup: subsystem does not exist")
|
|
||||||
|
|
||||||
type subsystemSet []subsystem
|
|
||||||
|
|
||||||
func (s subsystemSet) Get(name string) (subsystem, error) {
|
|
||||||
for _, ss := range s {
|
|
||||||
if ss.Name() == name {
|
|
||||||
return ss, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil, errSubsystemDoesNotExist
|
|
||||||
}
|
|
||||||
|
|
||||||
type subsystem interface {
|
|
||||||
// Name returns the name of the subsystem.
|
|
||||||
Name() string
|
|
||||||
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
|
|
||||||
GetStats(path string, stats *cgroups.Stats) error
|
|
||||||
// Removes the cgroup represented by 'cgroupData'.
|
|
||||||
Remove(*cgroupData) error
|
|
||||||
// Creates and joins the cgroup represented by 'cgroupData'.
|
|
||||||
Apply(*cgroupData) error
|
|
||||||
// Set the cgroup represented by cgroup.
|
|
||||||
Set(path string, cgroup *configs.Cgroup) error
|
|
||||||
}
|
|
||||||
|
|
||||||
type Manager struct {
|
|
||||||
mu sync.Mutex
|
|
||||||
Cgroups *configs.Cgroup
|
|
||||||
Rootless bool // ignore permission-related errors
|
|
||||||
Paths map[string]string
|
|
||||||
}
|
|
||||||
|
|
||||||
// The absolute path to the root of the cgroup hierarchies.
|
|
||||||
var cgroupRootLock sync.Mutex
|
|
||||||
var cgroupRoot string
|
|
||||||
|
|
||||||
// Gets the cgroupRoot.
|
|
||||||
func getCgroupRoot() (string, error) {
|
|
||||||
cgroupRootLock.Lock()
|
|
||||||
defer cgroupRootLock.Unlock()
|
|
||||||
|
|
||||||
if cgroupRoot != "" {
|
|
||||||
return cgroupRoot, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
root, err := cgroups.FindCgroupMountpointDir()
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, err := os.Stat(root); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
cgroupRoot = root
|
|
||||||
return cgroupRoot, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type cgroupData struct {
|
|
||||||
root string
|
|
||||||
innerPath string
|
|
||||||
config *configs.Cgroup
|
|
||||||
pid int
|
|
||||||
}
|
|
||||||
|
|
||||||
// isIgnorableError returns whether err is a permission error (in the loose
|
|
||||||
// sense of the word). This includes EROFS (which for an unprivileged user is
|
|
||||||
// basically a permission error) and EACCES (for similar reasons) as well as
|
|
||||||
// the normal EPERM.
|
|
||||||
func isIgnorableError(rootless bool, err error) bool {
|
|
||||||
// We do not ignore errors if we are root.
|
|
||||||
if !rootless {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
// Is it an ordinary EPERM?
|
|
||||||
if os.IsPermission(errors.Cause(err)) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to handle other errnos.
|
|
||||||
var errno error
|
|
||||||
switch err := errors.Cause(err).(type) {
|
|
||||||
case *os.PathError:
|
|
||||||
errno = err.Err
|
|
||||||
case *os.LinkError:
|
|
||||||
errno = err.Err
|
|
||||||
case *os.SyscallError:
|
|
||||||
errno = err.Err
|
|
||||||
}
|
|
||||||
return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Manager) getSubsystems() subsystemSet {
|
|
||||||
return subsystemsLegacy
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Manager) Apply(pid int) (err error) {
|
|
||||||
if m.Cgroups == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
m.mu.Lock()
|
|
||||||
defer m.mu.Unlock()
|
|
||||||
|
|
||||||
var c = m.Cgroups
|
|
||||||
|
|
||||||
d, err := getCgroupData(m.Cgroups, pid)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
m.Paths = make(map[string]string)
|
|
||||||
if c.Paths != nil {
|
|
||||||
for name, path := range c.Paths {
|
|
||||||
_, err := d.path(name)
|
|
||||||
if err != nil {
|
|
||||||
if cgroups.IsNotFound(err) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.Paths[name] = path
|
|
||||||
}
|
|
||||||
return cgroups.EnterPid(m.Paths, pid)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, sys := range m.getSubsystems() {
|
|
||||||
// TODO: Apply should, ideally, be reentrant or be broken up into a separate
|
|
||||||
// create and join phase so that the cgroup hierarchy for a container can be
|
|
||||||
// created then join consists of writing the process pids to cgroup.procs
|
|
||||||
p, err := d.path(sys.Name())
|
|
||||||
if err != nil {
|
|
||||||
// The non-presence of the devices subsystem is
|
|
||||||
// considered fatal for security reasons.
|
|
||||||
if cgroups.IsNotFound(err) && sys.Name() != "devices" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.Paths[sys.Name()] = p
|
|
||||||
|
|
||||||
if err := sys.Apply(d); err != nil {
|
|
||||||
// In the case of rootless (including euid=0 in userns), where an explicit cgroup path hasn't
|
|
||||||
// been set, we don't bail on error in case of permission problems.
|
|
||||||
// Cases where limits have been set (and we couldn't create our own
|
|
||||||
// cgroup) are handled by Set.
|
|
||||||
if isIgnorableError(m.Rootless, err) && m.Cgroups.Path == "" {
|
|
||||||
delete(m.Paths, sys.Name())
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Manager) Destroy() error {
|
|
||||||
if m.Cgroups == nil || m.Cgroups.Paths != nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
m.mu.Lock()
|
|
||||||
defer m.mu.Unlock()
|
|
||||||
if err := cgroups.RemovePaths(m.Paths); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
m.Paths = make(map[string]string)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Manager) GetPaths() map[string]string {
|
|
||||||
m.mu.Lock()
|
|
||||||
paths := m.Paths
|
|
||||||
m.mu.Unlock()
|
|
||||||
return paths
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Manager) GetUnifiedPath() (string, error) {
|
|
||||||
return "", errors.New("unified path is only supported when running in unified mode")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
|
||||||
m.mu.Lock()
|
|
||||||
defer m.mu.Unlock()
|
|
||||||
stats := cgroups.NewStats()
|
|
||||||
for name, path := range m.Paths {
|
|
||||||
sys, err := m.getSubsystems().Get(name)
|
|
||||||
if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if err := sys.GetStats(path, stats); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return stats, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Manager) Set(container *configs.Config) error {
|
|
||||||
if container.Cgroups == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// If Paths are set, then we are just joining cgroups paths
|
|
||||||
// and there is no need to set any values.
|
|
||||||
if m.Cgroups != nil && m.Cgroups.Paths != nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
paths := m.GetPaths()
|
|
||||||
for _, sys := range m.getSubsystems() {
|
|
||||||
path := paths[sys.Name()]
|
|
||||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
|
||||||
if m.Rootless && sys.Name() == "devices" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// When m.Rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
|
|
||||||
// However, errors from other subsystems are not ignored.
|
|
||||||
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
|
||||||
if path == "" {
|
|
||||||
// We never created a path for this cgroup, so we cannot set
|
|
||||||
// limits for it (though we have already tried at this point).
|
|
||||||
return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if m.Paths["cpu"] != "" {
|
|
||||||
if err := CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Freeze toggles the container's freezer cgroup depending on the state
|
|
||||||
// provided
|
|
||||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
|
||||||
if m.Cgroups == nil {
|
|
||||||
return errors.New("cannot toggle freezer: cgroups not configured for container")
|
|
||||||
}
|
|
||||||
|
|
||||||
paths := m.GetPaths()
|
|
||||||
dir := paths["freezer"]
|
|
||||||
prevState := m.Cgroups.Resources.Freezer
|
|
||||||
m.Cgroups.Resources.Freezer = state
|
|
||||||
freezer, err := m.getSubsystems().Get("freezer")
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
err = freezer.Set(dir, m.Cgroups)
|
|
||||||
if err != nil {
|
|
||||||
m.Cgroups.Resources.Freezer = prevState
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Manager) GetPids() ([]int, error) {
|
|
||||||
paths := m.GetPaths()
|
|
||||||
return cgroups.GetPids(paths["devices"])
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Manager) GetAllPids() ([]int, error) {
|
|
||||||
paths := m.GetPaths()
|
|
||||||
return cgroups.GetAllPids(paths["devices"])
|
|
||||||
}
|
|
||||||
|
|
||||||
func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
|
|
||||||
root, err := getCgroupRoot()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
|
||||||
return nil, fmt.Errorf("cgroup: either Path or Name and Parent should be used")
|
|
||||||
}
|
|
||||||
|
|
||||||
// XXX: Do not remove this code. Path safety is important! -- cyphar
|
|
||||||
cgPath := libcontainerUtils.CleanPath(c.Path)
|
|
||||||
cgParent := libcontainerUtils.CleanPath(c.Parent)
|
|
||||||
cgName := libcontainerUtils.CleanPath(c.Name)
|
|
||||||
|
|
||||||
innerPath := cgPath
|
|
||||||
if innerPath == "" {
|
|
||||||
innerPath = filepath.Join(cgParent, cgName)
|
|
||||||
}
|
|
||||||
|
|
||||||
return &cgroupData{
|
|
||||||
root: root,
|
|
||||||
innerPath: innerPath,
|
|
||||||
config: c,
|
|
||||||
pid: pid,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (raw *cgroupData) path(subsystem string) (string, error) {
|
|
||||||
mnt, err := cgroups.FindCgroupMountpoint(raw.root, subsystem)
|
|
||||||
// If we didn't mount the subsystem, there is no point we make the path.
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
|
|
||||||
if filepath.IsAbs(raw.innerPath) {
|
|
||||||
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
|
|
||||||
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
|
|
||||||
// process could in container and shared pid namespace with host, and
|
|
||||||
// /proc/1/cgroup could point to whole other world of cgroups.
|
|
||||||
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
return filepath.Join(parentPath, raw.innerPath), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (raw *cgroupData) join(subsystem string) (string, error) {
|
|
||||||
path, err := raw.path(subsystem)
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
if err := os.MkdirAll(path, 0755); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
if err := cgroups.WriteCgroupProc(path, raw.pid); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
return path, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func removePath(p string, err error) error {
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if p != "" {
|
|
||||||
return os.RemoveAll(p)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func CheckCpushares(path string, c uint64) error {
|
|
||||||
var cpuShares uint64
|
|
||||||
|
|
||||||
if c == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
fd, err := os.Open(filepath.Join(path, "cpu.shares"))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer fd.Close()
|
|
||||||
|
|
||||||
_, err = fmt.Fscanf(fd, "%d", &cpuShares)
|
|
||||||
if err != nil && err != io.EOF {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if c > cpuShares {
|
|
||||||
return fmt.Errorf("The maximum allowed cpu-shares is %d", cpuShares)
|
|
||||||
} else if c < cpuShares {
|
|
||||||
return fmt.Errorf("The minimum allowed cpu-shares is %d", cpuShares)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
|
|
||||||
return m.Cgroups, nil
|
|
||||||
}
|
|
|
@ -1,297 +0,0 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
|
||||||
|
|
||||||
import (
|
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestInvalidCgroupPath(t *testing.T) {
|
|
||||||
if cgroups.IsCgroup2UnifiedMode() {
|
|
||||||
t.Skip("cgroup v1 is not supported")
|
|
||||||
}
|
|
||||||
root, err := getCgroupRoot()
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup root: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
config := &configs.Cgroup{
|
|
||||||
Path: "../../../../../../../../../../some/path",
|
|
||||||
}
|
|
||||||
|
|
||||||
data, err := getCgroupData(config, 0)
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup data: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
|
||||||
if strings.HasPrefix(data.innerPath, "..") {
|
|
||||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Double-check, using an actual cgroup.
|
|
||||||
deviceRoot := filepath.Join(root, "devices")
|
|
||||||
devicePath, err := data.path("devices")
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup path: %v", err)
|
|
||||||
}
|
|
||||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
|
||||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestInvalidAbsoluteCgroupPath(t *testing.T) {
|
|
||||||
if cgroups.IsCgroup2UnifiedMode() {
|
|
||||||
t.Skip("cgroup v1 is not supported")
|
|
||||||
}
|
|
||||||
root, err := getCgroupRoot()
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup root: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
config := &configs.Cgroup{
|
|
||||||
Path: "/../../../../../../../../../../some/path",
|
|
||||||
}
|
|
||||||
|
|
||||||
data, err := getCgroupData(config, 0)
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup data: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
|
||||||
if strings.HasPrefix(data.innerPath, "..") {
|
|
||||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Double-check, using an actual cgroup.
|
|
||||||
deviceRoot := filepath.Join(root, "devices")
|
|
||||||
devicePath, err := data.path("devices")
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup path: %v", err)
|
|
||||||
}
|
|
||||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
|
||||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
|
|
||||||
func TestInvalidCgroupParent(t *testing.T) {
|
|
||||||
if cgroups.IsCgroup2UnifiedMode() {
|
|
||||||
t.Skip("cgroup v1 is not supported")
|
|
||||||
}
|
|
||||||
root, err := getCgroupRoot()
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup root: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
config := &configs.Cgroup{
|
|
||||||
Parent: "../../../../../../../../../../some/path",
|
|
||||||
Name: "name",
|
|
||||||
}
|
|
||||||
|
|
||||||
data, err := getCgroupData(config, 0)
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup data: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
|
||||||
if strings.HasPrefix(data.innerPath, "..") {
|
|
||||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Double-check, using an actual cgroup.
|
|
||||||
deviceRoot := filepath.Join(root, "devices")
|
|
||||||
devicePath, err := data.path("devices")
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup path: %v", err)
|
|
||||||
}
|
|
||||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
|
||||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
|
|
||||||
func TestInvalidAbsoluteCgroupParent(t *testing.T) {
|
|
||||||
if cgroups.IsCgroup2UnifiedMode() {
|
|
||||||
t.Skip("cgroup v1 is not supported")
|
|
||||||
}
|
|
||||||
root, err := getCgroupRoot()
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup root: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
config := &configs.Cgroup{
|
|
||||||
Parent: "/../../../../../../../../../../some/path",
|
|
||||||
Name: "name",
|
|
||||||
}
|
|
||||||
|
|
||||||
data, err := getCgroupData(config, 0)
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup data: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
|
||||||
if strings.HasPrefix(data.innerPath, "..") {
|
|
||||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Double-check, using an actual cgroup.
|
|
||||||
deviceRoot := filepath.Join(root, "devices")
|
|
||||||
devicePath, err := data.path("devices")
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup path: %v", err)
|
|
||||||
}
|
|
||||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
|
||||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
|
|
||||||
func TestInvalidCgroupName(t *testing.T) {
|
|
||||||
if cgroups.IsCgroup2UnifiedMode() {
|
|
||||||
t.Skip("cgroup v1 is not supported")
|
|
||||||
}
|
|
||||||
root, err := getCgroupRoot()
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup root: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
config := &configs.Cgroup{
|
|
||||||
Parent: "parent",
|
|
||||||
Name: "../../../../../../../../../../some/path",
|
|
||||||
}
|
|
||||||
|
|
||||||
data, err := getCgroupData(config, 0)
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup data: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
|
||||||
if strings.HasPrefix(data.innerPath, "..") {
|
|
||||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Double-check, using an actual cgroup.
|
|
||||||
deviceRoot := filepath.Join(root, "devices")
|
|
||||||
devicePath, err := data.path("devices")
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup path: %v", err)
|
|
||||||
}
|
|
||||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
|
||||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
|
|
||||||
func TestInvalidAbsoluteCgroupName(t *testing.T) {
|
|
||||||
if cgroups.IsCgroup2UnifiedMode() {
|
|
||||||
t.Skip("cgroup v1 is not supported")
|
|
||||||
}
|
|
||||||
root, err := getCgroupRoot()
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup root: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
config := &configs.Cgroup{
|
|
||||||
Parent: "parent",
|
|
||||||
Name: "/../../../../../../../../../../some/path",
|
|
||||||
}
|
|
||||||
|
|
||||||
data, err := getCgroupData(config, 0)
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup data: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
|
||||||
if strings.HasPrefix(data.innerPath, "..") {
|
|
||||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Double-check, using an actual cgroup.
|
|
||||||
deviceRoot := filepath.Join(root, "devices")
|
|
||||||
devicePath, err := data.path("devices")
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup path: %v", err)
|
|
||||||
}
|
|
||||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
|
||||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
|
|
||||||
func TestInvalidCgroupNameAndParent(t *testing.T) {
|
|
||||||
if cgroups.IsCgroup2UnifiedMode() {
|
|
||||||
t.Skip("cgroup v1 is not supported")
|
|
||||||
}
|
|
||||||
root, err := getCgroupRoot()
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup root: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
config := &configs.Cgroup{
|
|
||||||
Parent: "../../../../../../../../../../some/path",
|
|
||||||
Name: "../../../../../../../../../../some/path",
|
|
||||||
}
|
|
||||||
|
|
||||||
data, err := getCgroupData(config, 0)
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup data: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
|
||||||
if strings.HasPrefix(data.innerPath, "..") {
|
|
||||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Double-check, using an actual cgroup.
|
|
||||||
deviceRoot := filepath.Join(root, "devices")
|
|
||||||
devicePath, err := data.path("devices")
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup path: %v", err)
|
|
||||||
}
|
|
||||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
|
||||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
|
|
||||||
func TestInvalidAbsoluteCgroupNameAndParent(t *testing.T) {
|
|
||||||
if cgroups.IsCgroup2UnifiedMode() {
|
|
||||||
t.Skip("cgroup v1 is not supported")
|
|
||||||
}
|
|
||||||
root, err := getCgroupRoot()
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup root: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
config := &configs.Cgroup{
|
|
||||||
Parent: "/../../../../../../../../../../some/path",
|
|
||||||
Name: "/../../../../../../../../../../some/path",
|
|
||||||
}
|
|
||||||
|
|
||||||
data, err := getCgroupData(config, 0)
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup data: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
|
||||||
if strings.HasPrefix(data.innerPath, "..") {
|
|
||||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Double-check, using an actual cgroup.
|
|
||||||
deviceRoot := filepath.Join(root, "devices")
|
|
||||||
devicePath, err := data.path("devices")
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("couldn't get cgroup path: %v", err)
|
|
||||||
}
|
|
||||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
|
||||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,72 +1,71 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"fmt"
|
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
)
|
)
|
||||||
|
|
||||||
type BlkioGroup struct {
|
type BlkioGroup struct {
|
||||||
|
weightFilename string
|
||||||
|
weightDeviceFilename string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BlkioGroup) Name() string {
|
func (s *BlkioGroup) Name() string {
|
||||||
return "blkio"
|
return "blkio"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BlkioGroup) Apply(d *cgroupData) error {
|
func (s *BlkioGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
_, err := d.join("blkio")
|
return apply(path, pid)
|
||||||
if err != nil && !cgroups.IsNotFound(err) {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
func (s *BlkioGroup) Set(path string, r *configs.Resources) error {
|
||||||
if cgroup.Resources.BlkioWeight != 0 {
|
s.detectWeightFilenames(path)
|
||||||
if err := fscommon.WriteFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil {
|
if r.BlkioWeight != 0 {
|
||||||
|
if err := cgroups.WriteFile(path, s.weightFilename, strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if cgroup.Resources.BlkioLeafWeight != 0 {
|
if r.BlkioLeafWeight != 0 {
|
||||||
if err := fscommon.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioLeafWeight), 10)); err != nil {
|
if err := cgroups.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(r.BlkioLeafWeight), 10)); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, wd := range cgroup.Resources.BlkioWeightDevice {
|
for _, wd := range r.BlkioWeightDevice {
|
||||||
if err := fscommon.WriteFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
|
if wd.Weight != 0 {
|
||||||
return err
|
if err := cgroups.WriteFile(path, s.weightDeviceFilename, wd.WeightString()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if err := fscommon.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
if wd.LeafWeight != 0 {
|
||||||
|
if err := cgroups.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, td := range r.BlkioThrottleReadBpsDevice {
|
||||||
|
if err := cgroups.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice {
|
for _, td := range r.BlkioThrottleWriteBpsDevice {
|
||||||
if err := fscommon.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
if err := cgroups.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice {
|
for _, td := range r.BlkioThrottleReadIOPSDevice {
|
||||||
if err := fscommon.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
if err := cgroups.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice {
|
for _, td := range r.BlkioThrottleWriteIOPSDevice {
|
||||||
if err := fscommon.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
if err := cgroups.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice {
|
|
||||||
if err := fscommon.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -74,10 +73,6 @@ func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BlkioGroup) Remove(d *cgroupData) error {
|
|
||||||
return removePath(d.path("blkio"))
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
examples:
|
examples:
|
||||||
|
|
||||||
|
@ -113,9 +108,9 @@ func splitBlkioStatLine(r rune) bool {
|
||||||
return r == ' ' || r == ':'
|
return r == ' ' || r == ':'
|
||||||
}
|
}
|
||||||
|
|
||||||
func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) {
|
func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) {
|
||||||
var blkioStats []cgroups.BlkioStatEntry
|
var blkioStats []cgroups.BlkioStatEntry
|
||||||
f, err := os.Open(path)
|
f, err := cgroups.OpenFile(dir, file, os.O_RDONLY)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if os.IsNotExist(err) {
|
if os.IsNotExist(err) {
|
||||||
return blkioStats, nil
|
return blkioStats, nil
|
||||||
|
@ -133,19 +128,19 @@ func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) {
|
||||||
// skip total line
|
// skip total line
|
||||||
continue
|
continue
|
||||||
} else {
|
} else {
|
||||||
return nil, fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text())
|
return nil, malformedLine(dir, file, sc.Text())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
v, err := strconv.ParseUint(fields[0], 10, 64)
|
v, err := strconv.ParseUint(fields[0], 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||||
}
|
}
|
||||||
major := v
|
major := v
|
||||||
|
|
||||||
v, err = strconv.ParseUint(fields[1], 10, 64)
|
v, err = strconv.ParseUint(fields[1], 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||||
}
|
}
|
||||||
minor := v
|
minor := v
|
||||||
|
|
||||||
|
@ -157,82 +152,160 @@ func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) {
|
||||||
}
|
}
|
||||||
v, err = strconv.ParseUint(fields[valueField], 10, 64)
|
v, err = strconv.ParseUint(fields[valueField], 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||||
}
|
}
|
||||||
blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v})
|
blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v})
|
||||||
}
|
}
|
||||||
|
if err := sc.Err(); err != nil {
|
||||||
|
return nil, &parseError{Path: dir, File: file, Err: err}
|
||||||
|
}
|
||||||
|
|
||||||
return blkioStats, nil
|
return blkioStats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
|
func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
// Try to read CFQ stats available on all CFQ enabled kernels first
|
type blkioStatInfo struct {
|
||||||
if blkioStats, err := getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err == nil && blkioStats != nil {
|
filename string
|
||||||
return getCFQStats(path, stats)
|
blkioStatEntriesPtr *[]cgroups.BlkioStatEntry
|
||||||
|
}
|
||||||
|
bfqDebugStats := []blkioStatInfo{
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.sectors_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.io_service_time_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.io_wait_time_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.io_merged_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.io_queued_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.time_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.io_serviced_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.io_service_bytes_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
bfqStats := []blkioStatInfo{
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.io_serviced_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.bfq.io_service_bytes_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
cfqStats := []blkioStatInfo{
|
||||||
|
{
|
||||||
|
filename: "blkio.sectors_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.io_service_time_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.io_wait_time_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.io_merged_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.io_queued_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.time_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.io_serviced_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.io_service_bytes_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
throttleRecursiveStats := []blkioStatInfo{
|
||||||
|
{
|
||||||
|
filename: "blkio.throttle.io_serviced_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.throttle.io_service_bytes_recursive",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
baseStats := []blkioStatInfo{
|
||||||
|
{
|
||||||
|
filename: "blkio.throttle.io_serviced",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
filename: "blkio.throttle.io_service_bytes",
|
||||||
|
blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
orderedStats := [][]blkioStatInfo{
|
||||||
|
bfqDebugStats,
|
||||||
|
bfqStats,
|
||||||
|
cfqStats,
|
||||||
|
throttleRecursiveStats,
|
||||||
|
baseStats,
|
||||||
}
|
}
|
||||||
return getStats(path, stats) // Use generic stats as fallback
|
|
||||||
}
|
|
||||||
|
|
||||||
func getCFQStats(path string, stats *cgroups.Stats) error {
|
|
||||||
var blkioStats []cgroups.BlkioStatEntry
|
var blkioStats []cgroups.BlkioStatEntry
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.sectors_recursive")); err != nil {
|
for _, statGroup := range orderedStats {
|
||||||
return err
|
for i, statInfo := range statGroup {
|
||||||
|
if blkioStats, err = getBlkioStat(path, statInfo.filename); err != nil || blkioStats == nil {
|
||||||
|
// if error occurs on first file, move to next group
|
||||||
|
if i == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
*statInfo.blkioStatEntriesPtr = blkioStats
|
||||||
|
// finish if all stats are gathered
|
||||||
|
if i == len(statGroup)-1 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
stats.BlkioStats.SectorsRecursive = blkioStats
|
|
||||||
|
|
||||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_bytes_recursive")); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
stats.BlkioStats.IoServiceBytesRecursive = blkioStats
|
|
||||||
|
|
||||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
stats.BlkioStats.IoServicedRecursive = blkioStats
|
|
||||||
|
|
||||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_queued_recursive")); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
stats.BlkioStats.IoQueuedRecursive = blkioStats
|
|
||||||
|
|
||||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_time_recursive")); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
stats.BlkioStats.IoServiceTimeRecursive = blkioStats
|
|
||||||
|
|
||||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_wait_time_recursive")); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
stats.BlkioStats.IoWaitTimeRecursive = blkioStats
|
|
||||||
|
|
||||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_merged_recursive")); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
stats.BlkioStats.IoMergedRecursive = blkioStats
|
|
||||||
|
|
||||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.time_recursive")); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
stats.BlkioStats.IoTimeRecursive = blkioStats
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getStats(path string, stats *cgroups.Stats) error {
|
func (s *BlkioGroup) detectWeightFilenames(path string) {
|
||||||
var blkioStats []cgroups.BlkioStatEntry
|
if s.weightFilename != "" {
|
||||||
var err error
|
// Already detected.
|
||||||
|
return
|
||||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_service_bytes")); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
stats.BlkioStats.IoServiceBytesRecursive = blkioStats
|
if cgroups.PathExists(filepath.Join(path, "blkio.weight")) {
|
||||||
|
s.weightFilename = "blkio.weight"
|
||||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_serviced")); err != nil {
|
s.weightDeviceFilename = "blkio.weight_device"
|
||||||
return err
|
} else {
|
||||||
|
s.weightFilename = "blkio.bfq.weight"
|
||||||
|
s.weightDeviceFilename = "blkio.bfq.weight_device"
|
||||||
}
|
}
|
||||||
stats.BlkioStats.IoServicedRecursive = blkioStats
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,94 +1,105 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
type CpuGroup struct {
|
type CpuGroup struct{}
|
||||||
}
|
|
||||||
|
|
||||||
func (s *CpuGroup) Name() string {
|
func (s *CpuGroup) Name() string {
|
||||||
return "cpu"
|
return "cpu"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CpuGroup) Apply(d *cgroupData) error {
|
func (s *CpuGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||||
// We always want to join the cpu group, to allow fair cpu scheduling
|
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||||
// on a container basis
|
|
||||||
path, err := d.path("cpu")
|
|
||||||
if err != nil && !cgroups.IsNotFound(err) {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return s.ApplyDir(path, d.config, d.pid)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error {
|
|
||||||
// This might happen if we have no cpu cgroup mounted.
|
|
||||||
// Just do nothing and don't fail.
|
|
||||||
if path == "" {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if err := os.MkdirAll(path, 0755); err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// We should set the real-Time group scheduling settings before moving
|
// We should set the real-Time group scheduling settings before moving
|
||||||
// in the process because if the process is already in SCHED_RR mode
|
// in the process because if the process is already in SCHED_RR mode
|
||||||
// and no RT bandwidth is set, adding it will fail.
|
// and no RT bandwidth is set, adding it will fail.
|
||||||
if err := s.SetRtSched(path, cgroup); err != nil {
|
if err := s.SetRtSched(path, r); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// because we are not using d.join we need to place the pid into the procs file
|
// Since we are not using apply(), we need to place the pid
|
||||||
// unlike the other subsystems
|
// into the procs file.
|
||||||
return cgroups.WriteCgroupProc(path, pid)
|
return cgroups.WriteCgroupProc(path, pid)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
|
func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error {
|
||||||
if cgroup.Resources.CpuRtPeriod != 0 {
|
if r.CpuRtPeriod != 0 {
|
||||||
if err := fscommon.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
|
if err := cgroups.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(r.CpuRtPeriod, 10)); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if cgroup.Resources.CpuRtRuntime != 0 {
|
if r.CpuRtRuntime != 0 {
|
||||||
if err := fscommon.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
|
if err := cgroups.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
|
func (s *CpuGroup) Set(path string, r *configs.Resources) error {
|
||||||
if cgroup.Resources.CpuShares != 0 {
|
if r.CpuShares != 0 {
|
||||||
if err := fscommon.WriteFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil {
|
shares := r.CpuShares
|
||||||
|
if err := cgroups.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
// read it back
|
||||||
if cgroup.Resources.CpuPeriod != 0 {
|
sharesRead, err := fscommon.GetCgroupParamUint(path, "cpu.shares")
|
||||||
if err := fscommon.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
// ... and check
|
||||||
if cgroup.Resources.CpuQuota != 0 {
|
if shares > sharesRead {
|
||||||
if err := fscommon.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.Resources.CpuQuota, 10)); err != nil {
|
return fmt.Errorf("the maximum allowed cpu-shares is %d", sharesRead)
|
||||||
return err
|
} else if shares < sharesRead {
|
||||||
|
return fmt.Errorf("the minimum allowed cpu-shares is %d", sharesRead)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return s.SetRtSched(path, cgroup)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *CpuGroup) Remove(d *cgroupData) error {
|
var period string
|
||||||
return removePath(d.path("cpu"))
|
if r.CpuPeriod != 0 {
|
||||||
|
period = strconv.FormatUint(r.CpuPeriod, 10)
|
||||||
|
if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil {
|
||||||
|
// Sometimes when the period to be set is smaller
|
||||||
|
// than the current one, it is rejected by the kernel
|
||||||
|
// (EINVAL) as old_quota/new_period exceeds the parent
|
||||||
|
// cgroup quota limit. If this happens and the quota is
|
||||||
|
// going to be set, ignore the error for now and retry
|
||||||
|
// after setting the quota.
|
||||||
|
if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
period = ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if r.CpuQuota != 0 {
|
||||||
|
if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if period != "" {
|
||||||
|
if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return s.SetRtSched(path, r)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
|
func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
f, err := os.Open(filepath.Join(path, "cpu.stat"))
|
const file = "cpu.stat"
|
||||||
|
f, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if os.IsNotExist(err) {
|
if os.IsNotExist(err) {
|
||||||
return nil
|
return nil
|
||||||
|
@ -99,9 +110,9 @@ func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
|
||||||
sc := bufio.NewScanner(f)
|
sc := bufio.NewScanner(f)
|
||||||
for sc.Scan() {
|
for sc.Scan() {
|
||||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return &parseError{Path: path, File: file, Err: err}
|
||||||
}
|
}
|
||||||
switch t {
|
switch t {
|
||||||
case "nr_periods":
|
case "nr_periods":
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -9,40 +7,40 @@ import (
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestCpuSetShares(t *testing.T) {
|
func TestCpuSetShares(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("cpu", t)
|
path := tempDir(t, "cpu")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
sharesBefore = 1024
|
sharesBefore = 1024
|
||||||
sharesAfter = 512
|
sharesAfter = 512
|
||||||
)
|
)
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"cpu.shares": strconv.Itoa(sharesBefore),
|
"cpu.shares": strconv.Itoa(sharesBefore),
|
||||||
})
|
})
|
||||||
|
|
||||||
helper.CgroupData.config.Resources.CpuShares = sharesAfter
|
r := &configs.Resources{
|
||||||
|
CpuShares: sharesAfter,
|
||||||
|
}
|
||||||
cpu := &CpuGroup{}
|
cpu := &CpuGroup{}
|
||||||
if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
if err := cpu.Set(path, r); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.shares")
|
value, err := fscommon.GetCgroupParamUint(path, "cpu.shares")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse cpu.shares - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if value != sharesAfter {
|
if value != sharesAfter {
|
||||||
t.Fatal("Got the wrong value, set cpu.shares failed.")
|
t.Fatal("Got the wrong value, set cpu.shares failed.")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCpuSetBandWidth(t *testing.T) {
|
func TestCpuSetBandWidth(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("cpu", t)
|
path := tempDir(t, "cpu")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
quotaBefore = 8000
|
quotaBefore = 8000
|
||||||
|
@ -55,47 +53,51 @@ func TestCpuSetBandWidth(t *testing.T) {
|
||||||
rtPeriodAfter = 7000
|
rtPeriodAfter = 7000
|
||||||
)
|
)
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"cpu.cfs_quota_us": strconv.Itoa(quotaBefore),
|
"cpu.cfs_quota_us": strconv.Itoa(quotaBefore),
|
||||||
"cpu.cfs_period_us": strconv.Itoa(periodBefore),
|
"cpu.cfs_period_us": strconv.Itoa(periodBefore),
|
||||||
"cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore),
|
"cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore),
|
||||||
"cpu.rt_period_us": strconv.Itoa(rtPeriodBefore),
|
"cpu.rt_period_us": strconv.Itoa(rtPeriodBefore),
|
||||||
})
|
})
|
||||||
|
|
||||||
helper.CgroupData.config.Resources.CpuQuota = quotaAfter
|
r := &configs.Resources{
|
||||||
helper.CgroupData.config.Resources.CpuPeriod = periodAfter
|
CpuQuota: quotaAfter,
|
||||||
helper.CgroupData.config.Resources.CpuRtRuntime = rtRuntimeAfter
|
CpuPeriod: periodAfter,
|
||||||
helper.CgroupData.config.Resources.CpuRtPeriod = rtPeriodAfter
|
CpuRtRuntime: rtRuntimeAfter,
|
||||||
|
CpuRtPeriod: rtPeriodAfter,
|
||||||
|
}
|
||||||
cpu := &CpuGroup{}
|
cpu := &CpuGroup{}
|
||||||
if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
if err := cpu.Set(path, r); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
quota, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.cfs_quota_us")
|
quota, err := fscommon.GetCgroupParamUint(path, "cpu.cfs_quota_us")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse cpu.cfs_quota_us - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if quota != quotaAfter {
|
if quota != quotaAfter {
|
||||||
t.Fatal("Got the wrong value, set cpu.cfs_quota_us failed.")
|
t.Fatal("Got the wrong value, set cpu.cfs_quota_us failed.")
|
||||||
}
|
}
|
||||||
|
|
||||||
period, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.cfs_period_us")
|
period, err := fscommon.GetCgroupParamUint(path, "cpu.cfs_period_us")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse cpu.cfs_period_us - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if period != periodAfter {
|
if period != periodAfter {
|
||||||
t.Fatal("Got the wrong value, set cpu.cfs_period_us failed.")
|
t.Fatal("Got the wrong value, set cpu.cfs_period_us failed.")
|
||||||
}
|
}
|
||||||
rtRuntime, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us")
|
|
||||||
|
rtRuntime, err := fscommon.GetCgroupParamUint(path, "cpu.rt_runtime_us")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if rtRuntime != rtRuntimeAfter {
|
if rtRuntime != rtRuntimeAfter {
|
||||||
t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.")
|
t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.")
|
||||||
}
|
}
|
||||||
rtPeriod, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us")
|
|
||||||
|
rtPeriod, err := fscommon.GetCgroupParamUint(path, "cpu.rt_period_us")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse cpu.rt_period_us - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if rtPeriod != rtPeriodAfter {
|
if rtPeriod != rtPeriodAfter {
|
||||||
t.Fatal("Got the wrong value, set cpu.rt_period_us failed.")
|
t.Fatal("Got the wrong value, set cpu.rt_period_us failed.")
|
||||||
|
@ -103,8 +105,7 @@ func TestCpuSetBandWidth(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCpuStats(t *testing.T) {
|
func TestCpuStats(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("cpu", t)
|
path := tempDir(t, "cpu")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
nrPeriods = 2000
|
nrPeriods = 2000
|
||||||
|
@ -112,15 +113,15 @@ func TestCpuStats(t *testing.T) {
|
||||||
throttledTime = uint64(18446744073709551615)
|
throttledTime = uint64(18446744073709551615)
|
||||||
)
|
)
|
||||||
|
|
||||||
cpuStatContent := fmt.Sprintf("nr_periods %d\n nr_throttled %d\n throttled_time %d\n",
|
cpuStatContent := fmt.Sprintf("nr_periods %d\nnr_throttled %d\nthrottled_time %d\n",
|
||||||
nrPeriods, nrThrottled, throttledTime)
|
nrPeriods, nrThrottled, throttledTime)
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"cpu.stat": cpuStatContent,
|
"cpu.stat": cpuStatContent,
|
||||||
})
|
})
|
||||||
|
|
||||||
cpu := &CpuGroup{}
|
cpu := &CpuGroup{}
|
||||||
actualStats := *cgroups.NewStats()
|
actualStats := *cgroups.NewStats()
|
||||||
err := cpu.GetStats(helper.CgroupPath, &actualStats)
|
err := cpu.GetStats(path, &actualStats)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
@ -128,44 +129,43 @@ func TestCpuStats(t *testing.T) {
|
||||||
expectedStats := cgroups.ThrottlingData{
|
expectedStats := cgroups.ThrottlingData{
|
||||||
Periods: nrPeriods,
|
Periods: nrPeriods,
|
||||||
ThrottledPeriods: nrThrottled,
|
ThrottledPeriods: nrThrottled,
|
||||||
ThrottledTime: throttledTime}
|
ThrottledTime: throttledTime,
|
||||||
|
}
|
||||||
|
|
||||||
expectThrottlingDataEquals(t, expectedStats, actualStats.CpuStats.ThrottlingData)
|
expectThrottlingDataEquals(t, expectedStats, actualStats.CpuStats.ThrottlingData)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestNoCpuStatFile(t *testing.T) {
|
func TestNoCpuStatFile(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("cpu", t)
|
path := tempDir(t, "cpu")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
cpu := &CpuGroup{}
|
cpu := &CpuGroup{}
|
||||||
actualStats := *cgroups.NewStats()
|
actualStats := *cgroups.NewStats()
|
||||||
err := cpu.GetStats(helper.CgroupPath, &actualStats)
|
err := cpu.GetStats(path, &actualStats)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal("Expected not to fail, but did")
|
t.Fatal("Expected not to fail, but did")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestInvalidCpuStat(t *testing.T) {
|
func TestInvalidCpuStat(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("cpu", t)
|
path := tempDir(t, "cpu")
|
||||||
defer helper.cleanup()
|
|
||||||
cpuStatContent := `nr_periods 2000
|
cpuStatContent := `nr_periods 2000
|
||||||
nr_throttled 200
|
nr_throttled 200
|
||||||
throttled_time fortytwo`
|
throttled_time fortytwo`
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"cpu.stat": cpuStatContent,
|
"cpu.stat": cpuStatContent,
|
||||||
})
|
})
|
||||||
|
|
||||||
cpu := &CpuGroup{}
|
cpu := &CpuGroup{}
|
||||||
actualStats := *cgroups.NewStats()
|
actualStats := *cgroups.NewStats()
|
||||||
err := cpu.GetStats(helper.CgroupPath, &actualStats)
|
err := cpu.GetStats(path, &actualStats)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatal("Expected failed stat parsing.")
|
t.Fatal("Expected failed stat parsing.")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCpuSetRtSchedAtApply(t *testing.T) {
|
func TestCpuSetRtSchedAtApply(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("cpu", t)
|
path := tempDir(t, "cpu")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
rtRuntimeBefore = 0
|
rtRuntimeBefore = 0
|
||||||
|
@ -174,35 +174,40 @@ func TestCpuSetRtSchedAtApply(t *testing.T) {
|
||||||
rtPeriodAfter = 7000
|
rtPeriodAfter = 7000
|
||||||
)
|
)
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore),
|
"cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore),
|
||||||
"cpu.rt_period_us": strconv.Itoa(rtPeriodBefore),
|
"cpu.rt_period_us": strconv.Itoa(rtPeriodBefore),
|
||||||
})
|
})
|
||||||
|
|
||||||
helper.CgroupData.config.Resources.CpuRtRuntime = rtRuntimeAfter
|
r := &configs.Resources{
|
||||||
helper.CgroupData.config.Resources.CpuRtPeriod = rtPeriodAfter
|
CpuRtRuntime: rtRuntimeAfter,
|
||||||
|
CpuRtPeriod: rtPeriodAfter,
|
||||||
|
}
|
||||||
cpu := &CpuGroup{}
|
cpu := &CpuGroup{}
|
||||||
if err := cpu.ApplyDir(helper.CgroupPath, helper.CgroupData.config, 1234); err != nil {
|
|
||||||
|
if err := cpu.Apply(path, r, 1234); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
rtRuntime, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us")
|
rtRuntime, err := fscommon.GetCgroupParamUint(path, "cpu.rt_runtime_us")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if rtRuntime != rtRuntimeAfter {
|
if rtRuntime != rtRuntimeAfter {
|
||||||
t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.")
|
t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.")
|
||||||
}
|
}
|
||||||
rtPeriod, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us")
|
|
||||||
|
rtPeriod, err := fscommon.GetCgroupParamUint(path, "cpu.rt_period_us")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse cpu.rt_period_us - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if rtPeriod != rtPeriodAfter {
|
if rtPeriod != rtPeriodAfter {
|
||||||
t.Fatal("Got the wrong value, set cpu.rt_period_us failed.")
|
t.Fatal("Got the wrong value, set cpu.rt_period_us failed.")
|
||||||
}
|
}
|
||||||
pid, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cgroup.procs")
|
|
||||||
|
pid, err := fscommon.GetCgroupParamUint(path, "cgroup.procs")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse cgroup.procs - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if pid != 1234 {
|
if pid != 1234 {
|
||||||
t.Fatal("Got the wrong value, set cgroup.procs failed.")
|
t.Fatal("Got the wrong value, set cgroup.procs failed.")
|
||||||
|
|
|
@ -1,52 +1,51 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"bufio"
|
||||||
"io/ioutil"
|
"os"
|
||||||
"path/filepath"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
"github.com/opencontainers/runc/libcontainer/system"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
cgroupCpuacctStat = "cpuacct.stat"
|
cgroupCpuacctStat = "cpuacct.stat"
|
||||||
|
cgroupCpuacctUsageAll = "cpuacct.usage_all"
|
||||||
|
|
||||||
nanosecondsInSecond = 1000000000
|
nanosecondsInSecond = 1000000000
|
||||||
|
|
||||||
|
userModeColumn = 1
|
||||||
|
kernelModeColumn = 2
|
||||||
|
cuacctUsageAllColumnsNumber = 3
|
||||||
|
|
||||||
|
// The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
|
||||||
|
// on Linux it's a constant which is safe to be hard coded,
|
||||||
|
// so we can avoid using cgo here. For details, see:
|
||||||
|
// https://github.com/containerd/cgroups/pull/12
|
||||||
|
clockTicks uint64 = 100
|
||||||
)
|
)
|
||||||
|
|
||||||
var clockTicks = uint64(system.GetClockTicks())
|
type CpuacctGroup struct{}
|
||||||
|
|
||||||
type CpuacctGroup struct {
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *CpuacctGroup) Name() string {
|
func (s *CpuacctGroup) Name() string {
|
||||||
return "cpuacct"
|
return "cpuacct"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CpuacctGroup) Apply(d *cgroupData) error {
|
func (s *CpuacctGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
// we just want to join this group even though we don't set anything
|
return apply(path, pid)
|
||||||
if _, err := d.join("cpuacct"); err != nil && !cgroups.IsNotFound(err) {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CpuacctGroup) Set(path string, cgroup *configs.Cgroup) error {
|
func (s *CpuacctGroup) Set(_ string, _ *configs.Resources) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CpuacctGroup) Remove(d *cgroupData) error {
|
|
||||||
return removePath(d.path("cpuacct"))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
|
func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
if !cgroups.PathExists(path) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path)
|
userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -62,8 +61,15 @@ func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
percpuUsageInKernelmode, percpuUsageInUsermode, err := getPercpuUsageInModes(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
stats.CpuStats.CpuUsage.TotalUsage = totalUsage
|
stats.CpuStats.CpuUsage.TotalUsage = totalUsage
|
||||||
stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage
|
stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage
|
||||||
|
stats.CpuStats.CpuUsage.PercpuUsageInKernelmode = percpuUsageInKernelmode
|
||||||
|
stats.CpuStats.CpuUsage.PercpuUsageInUsermode = percpuUsageInUsermode
|
||||||
stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage
|
stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage
|
||||||
stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage
|
stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage
|
||||||
return nil
|
return nil
|
||||||
|
@ -71,52 +77,90 @@ func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
|
||||||
// Returns user and kernel usage breakdown in nanoseconds.
|
// Returns user and kernel usage breakdown in nanoseconds.
|
||||||
func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
|
func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
|
||||||
userModeUsage := uint64(0)
|
var userModeUsage, kernelModeUsage uint64
|
||||||
kernelModeUsage := uint64(0)
|
|
||||||
const (
|
const (
|
||||||
userField = "user"
|
userField = "user"
|
||||||
systemField = "system"
|
systemField = "system"
|
||||||
|
file = cgroupCpuacctStat
|
||||||
)
|
)
|
||||||
|
|
||||||
// Expected format:
|
// Expected format:
|
||||||
// user <usage in ticks>
|
// user <usage in ticks>
|
||||||
// system <usage in ticks>
|
// system <usage in ticks>
|
||||||
data, err := ioutil.ReadFile(filepath.Join(path, cgroupCpuacctStat))
|
data, err := cgroups.ReadFile(path, file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, 0, err
|
return 0, 0, err
|
||||||
}
|
}
|
||||||
fields := strings.Fields(string(data))
|
// TODO: use strings.SplitN instead.
|
||||||
if len(fields) < 4 {
|
fields := strings.Fields(data)
|
||||||
return 0, 0, fmt.Errorf("failure - %s is expected to have at least 4 fields", filepath.Join(path, cgroupCpuacctStat))
|
if len(fields) < 4 || fields[0] != userField || fields[2] != systemField {
|
||||||
}
|
return 0, 0, malformedLine(path, file, data)
|
||||||
if fields[0] != userField {
|
|
||||||
return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField)
|
|
||||||
}
|
|
||||||
if fields[2] != systemField {
|
|
||||||
return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[2], cgroupCpuacctStat, systemField)
|
|
||||||
}
|
}
|
||||||
if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil {
|
if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil {
|
||||||
return 0, 0, err
|
return 0, 0, &parseError{Path: path, File: file, Err: err}
|
||||||
}
|
}
|
||||||
if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil {
|
if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil {
|
||||||
return 0, 0, err
|
return 0, 0, &parseError{Path: path, File: file, Err: err}
|
||||||
}
|
}
|
||||||
|
|
||||||
return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil
|
return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getPercpuUsage(path string) ([]uint64, error) {
|
func getPercpuUsage(path string) ([]uint64, error) {
|
||||||
|
const file = "cpuacct.usage_percpu"
|
||||||
percpuUsage := []uint64{}
|
percpuUsage := []uint64{}
|
||||||
data, err := ioutil.ReadFile(filepath.Join(path, "cpuacct.usage_percpu"))
|
data, err := cgroups.ReadFile(path, file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return percpuUsage, err
|
return percpuUsage, err
|
||||||
}
|
}
|
||||||
for _, value := range strings.Fields(string(data)) {
|
// TODO: use strings.SplitN instead.
|
||||||
|
for _, value := range strings.Fields(data) {
|
||||||
value, err := strconv.ParseUint(value, 10, 64)
|
value, err := strconv.ParseUint(value, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err)
|
return percpuUsage, &parseError{Path: path, File: file, Err: err}
|
||||||
}
|
}
|
||||||
percpuUsage = append(percpuUsage, value)
|
percpuUsage = append(percpuUsage, value)
|
||||||
}
|
}
|
||||||
return percpuUsage, nil
|
return percpuUsage, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
|
||||||
|
usageKernelMode := []uint64{}
|
||||||
|
usageUserMode := []uint64{}
|
||||||
|
const file = cgroupCpuacctUsageAll
|
||||||
|
|
||||||
|
fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return usageKernelMode, usageUserMode, nil
|
||||||
|
} else if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
defer fd.Close()
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(fd)
|
||||||
|
scanner.Scan() // skipping header line
|
||||||
|
|
||||||
|
for scanner.Scan() {
|
||||||
|
lineFields := strings.SplitN(scanner.Text(), " ", cuacctUsageAllColumnsNumber+1)
|
||||||
|
if len(lineFields) != cuacctUsageAllColumnsNumber {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
usageInKernelMode, err := strconv.ParseUint(lineFields[kernelModeColumn], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
usageKernelMode = append(usageKernelMode, usageInKernelMode)
|
||||||
|
|
||||||
|
usageInUserMode, err := strconv.ParseUint(lineFields[userModeColumn], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
usageUserMode = append(usageUserMode, usageInUserMode)
|
||||||
|
}
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
return nil, nil, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
|
||||||
|
return usageKernelMode, usageUserMode, nil
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,97 @@
|
||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
cpuAcctUsageContents = "12262454190222160"
|
||||||
|
cpuAcctUsagePerCPUContents = "1564936537989058 1583937096487821 1604195415465681 1596445226820187 1481069084155629 1478735613864327 1477610593414743 1476362015778086"
|
||||||
|
cpuAcctStatContents = "user 452278264\nsystem 291429664"
|
||||||
|
cpuAcctUsageAll = `cpu user system
|
||||||
|
0 962250696038415 637727786389114
|
||||||
|
1 981956408513304 638197595421064
|
||||||
|
2 1002658817529022 638956774598358
|
||||||
|
3 994937703492523 637985531181620
|
||||||
|
4 874843781648690 638837766495476
|
||||||
|
5 872544369885276 638763309884944
|
||||||
|
6 870104915696359 640081778921247
|
||||||
|
7 870202363887496 638716766259495
|
||||||
|
`
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCpuacctStats(t *testing.T) {
|
||||||
|
path := tempDir(t, "cpuacct")
|
||||||
|
writeFileContents(t, path, map[string]string{
|
||||||
|
"cpuacct.usage": cpuAcctUsageContents,
|
||||||
|
"cpuacct.usage_percpu": cpuAcctUsagePerCPUContents,
|
||||||
|
"cpuacct.stat": cpuAcctStatContents,
|
||||||
|
"cpuacct.usage_all": cpuAcctUsageAll,
|
||||||
|
})
|
||||||
|
|
||||||
|
cpuacct := &CpuacctGroup{}
|
||||||
|
actualStats := *cgroups.NewStats()
|
||||||
|
err := cpuacct.GetStats(path, &actualStats)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedStats := cgroups.CpuUsage{
|
||||||
|
TotalUsage: uint64(12262454190222160),
|
||||||
|
PercpuUsage: []uint64{
|
||||||
|
1564936537989058, 1583937096487821, 1604195415465681, 1596445226820187,
|
||||||
|
1481069084155629, 1478735613864327, 1477610593414743, 1476362015778086,
|
||||||
|
},
|
||||||
|
PercpuUsageInKernelmode: []uint64{
|
||||||
|
637727786389114, 638197595421064, 638956774598358, 637985531181620,
|
||||||
|
638837766495476, 638763309884944, 640081778921247, 638716766259495,
|
||||||
|
},
|
||||||
|
PercpuUsageInUsermode: []uint64{
|
||||||
|
962250696038415, 981956408513304, 1002658817529022, 994937703492523,
|
||||||
|
874843781648690, 872544369885276, 870104915696359, 870202363887496,
|
||||||
|
},
|
||||||
|
UsageInKernelmode: (uint64(291429664) * nanosecondsInSecond) / clockTicks,
|
||||||
|
UsageInUsermode: (uint64(452278264) * nanosecondsInSecond) / clockTicks,
|
||||||
|
}
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(expectedStats, actualStats.CpuStats.CpuUsage) {
|
||||||
|
t.Errorf("Expected CPU usage %#v but found %#v\n",
|
||||||
|
expectedStats, actualStats.CpuStats.CpuUsage)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCpuacctStatsWithoutUsageAll(t *testing.T) {
|
||||||
|
path := tempDir(t, "cpuacct")
|
||||||
|
writeFileContents(t, path, map[string]string{
|
||||||
|
"cpuacct.usage": cpuAcctUsageContents,
|
||||||
|
"cpuacct.usage_percpu": cpuAcctUsagePerCPUContents,
|
||||||
|
"cpuacct.stat": cpuAcctStatContents,
|
||||||
|
})
|
||||||
|
|
||||||
|
cpuacct := &CpuacctGroup{}
|
||||||
|
actualStats := *cgroups.NewStats()
|
||||||
|
err := cpuacct.GetStats(path, &actualStats)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedStats := cgroups.CpuUsage{
|
||||||
|
TotalUsage: uint64(12262454190222160),
|
||||||
|
PercpuUsage: []uint64{
|
||||||
|
1564936537989058, 1583937096487821, 1604195415465681, 1596445226820187,
|
||||||
|
1481069084155629, 1478735613864327, 1477610593414743, 1476362015778086,
|
||||||
|
},
|
||||||
|
PercpuUsageInKernelmode: []uint64{},
|
||||||
|
PercpuUsageInUsermode: []uint64{},
|
||||||
|
UsageInKernelmode: (uint64(291429664) * nanosecondsInSecond) / clockTicks,
|
||||||
|
UsageInUsermode: (uint64(452278264) * nanosecondsInSecond) / clockTicks,
|
||||||
|
}
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(expectedStats, actualStats.CpuStats.CpuUsage) {
|
||||||
|
t.Errorf("Expected CPU usage %#v but found %#v\n",
|
||||||
|
expectedStats, actualStats.CpuStats.CpuUsage)
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,75 +1,159 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"errors"
|
||||||
"fmt"
|
|
||||||
"io/ioutil"
|
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type CpusetGroup struct {
|
type CpusetGroup struct{}
|
||||||
}
|
|
||||||
|
|
||||||
func (s *CpusetGroup) Name() string {
|
func (s *CpusetGroup) Name() string {
|
||||||
return "cpuset"
|
return "cpuset"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CpusetGroup) Apply(d *cgroupData) error {
|
func (s *CpusetGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||||
dir, err := d.path("cpuset")
|
return s.ApplyDir(path, r, pid)
|
||||||
if err != nil && !cgroups.IsNotFound(err) {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return s.ApplyDir(dir, d.config, d.pid)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CpusetGroup) Set(path string, cgroup *configs.Cgroup) error {
|
func (s *CpusetGroup) Set(path string, r *configs.Resources) error {
|
||||||
if cgroup.Resources.CpusetCpus != "" {
|
if r.CpusetCpus != "" {
|
||||||
if err := fscommon.WriteFile(path, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil {
|
if err := cgroups.WriteFile(path, "cpuset.cpus", r.CpusetCpus); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if cgroup.Resources.CpusetMems != "" {
|
if r.CpusetMems != "" {
|
||||||
if err := fscommon.WriteFile(path, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil {
|
if err := cgroups.WriteFile(path, "cpuset.mems", r.CpusetMems); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CpusetGroup) Remove(d *cgroupData) error {
|
func getCpusetStat(path string, file string) ([]uint16, error) {
|
||||||
return removePath(d.path("cpuset"))
|
var extracted []uint16
|
||||||
|
fileContent, err := fscommon.GetCgroupParamString(path, file)
|
||||||
|
if err != nil {
|
||||||
|
return extracted, err
|
||||||
|
}
|
||||||
|
if len(fileContent) == 0 {
|
||||||
|
return extracted, &parseError{Path: path, File: file, Err: errors.New("empty file")}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, s := range strings.Split(fileContent, ",") {
|
||||||
|
sp := strings.SplitN(s, "-", 3)
|
||||||
|
switch len(sp) {
|
||||||
|
case 3:
|
||||||
|
return extracted, &parseError{Path: path, File: file, Err: errors.New("extra dash")}
|
||||||
|
case 2:
|
||||||
|
min, err := strconv.ParseUint(sp[0], 10, 16)
|
||||||
|
if err != nil {
|
||||||
|
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
max, err := strconv.ParseUint(sp[1], 10, 16)
|
||||||
|
if err != nil {
|
||||||
|
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
if min > max {
|
||||||
|
return extracted, &parseError{Path: path, File: file, Err: errors.New("invalid values, min > max")}
|
||||||
|
}
|
||||||
|
for i := min; i <= max; i++ {
|
||||||
|
extracted = append(extracted, uint16(i))
|
||||||
|
}
|
||||||
|
case 1:
|
||||||
|
value, err := strconv.ParseUint(s, 10, 16)
|
||||||
|
if err != nil {
|
||||||
|
return extracted, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
extracted = append(extracted, uint16(value))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return extracted, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
|
func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
var err error
|
||||||
|
|
||||||
|
stats.CPUSetStats.CPUs, err = getCpusetStat(path, "cpuset.cpus")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.CPUExclusive, err = fscommon.GetCgroupParamUint(path, "cpuset.cpu_exclusive")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.Mems, err = getCpusetStat(path, "cpuset.mems")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.MemHardwall, err = fscommon.GetCgroupParamUint(path, "cpuset.mem_hardwall")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.MemExclusive, err = fscommon.GetCgroupParamUint(path, "cpuset.mem_exclusive")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.MemoryMigrate, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_migrate")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.MemorySpreadPage, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_spread_page")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.MemorySpreadSlab, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_spread_slab")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.MemoryPressure, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_pressure")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.SchedLoadBalance, err = fscommon.GetCgroupParamUint(path, "cpuset.sched_load_balance")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.CPUSetStats.SchedRelaxDomainLevel, err = fscommon.GetCgroupParamInt(path, "cpuset.sched_relax_domain_level")
|
||||||
|
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error {
|
func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error {
|
||||||
// This might happen if we have no cpuset cgroup mounted.
|
// This might happen if we have no cpuset cgroup mounted.
|
||||||
// Just do nothing and don't fail.
|
// Just do nothing and don't fail.
|
||||||
if dir == "" {
|
if dir == "" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo")
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo)))
|
|
||||||
// 'ensureParent' start with parent because we don't want to
|
// 'ensureParent' start with parent because we don't want to
|
||||||
// explicitly inherit from parent, it could conflict with
|
// explicitly inherit from parent, it could conflict with
|
||||||
// 'cpuset.cpu_exclusive'.
|
// 'cpuset.cpu_exclusive'.
|
||||||
if err := s.ensureParent(filepath.Dir(dir), root); err != nil {
|
if err := cpusetEnsureParent(filepath.Dir(dir)); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := os.MkdirAll(dir, 0755); err != nil {
|
if err := os.Mkdir(dir, 0o755); err != nil && !os.IsExist(err) {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// We didn't inherit cpuset configs from parent, but we have
|
// We didn't inherit cpuset configs from parent, but we have
|
||||||
|
@ -79,82 +163,83 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
|
||||||
// specified configs, otherwise, inherit from parent. This makes
|
// specified configs, otherwise, inherit from parent. This makes
|
||||||
// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
|
// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
|
||||||
// keep backward compatibility.
|
// keep backward compatibility.
|
||||||
if err := s.ensureCpusAndMems(dir, cgroup); err != nil {
|
if err := s.ensureCpusAndMems(dir, r); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
// Since we are not using apply(), we need to place the pid
|
||||||
// because we are not using d.join we need to place the pid into the procs file
|
// into the procs file.
|
||||||
// unlike the other subsystems
|
|
||||||
return cgroups.WriteCgroupProc(dir, pid)
|
return cgroups.WriteCgroupProc(dir, pid)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) {
|
func getCpusetSubsystemSettings(parent string) (cpus, mems string, err error) {
|
||||||
if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil {
|
if cpus, err = cgroups.ReadFile(parent, "cpuset.cpus"); err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil {
|
if mems, err = cgroups.ReadFile(parent, "cpuset.mems"); err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
return cpus, mems, nil
|
return cpus, mems, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ensureParent makes sure that the parent directory of current is created
|
// cpusetEnsureParent makes sure that the parent directories of current
|
||||||
// and populated with the proper cpus and mems files copied from
|
// are created and populated with the proper cpus and mems files copied
|
||||||
// it's parent.
|
// from their respective parent. It does that recursively, starting from
|
||||||
func (s *CpusetGroup) ensureParent(current, root string) error {
|
// the top of the cpuset hierarchy (i.e. cpuset cgroup mount point).
|
||||||
|
func cpusetEnsureParent(current string) error {
|
||||||
|
var st unix.Statfs_t
|
||||||
|
|
||||||
parent := filepath.Dir(current)
|
parent := filepath.Dir(current)
|
||||||
if libcontainerUtils.CleanPath(parent) == root {
|
err := unix.Statfs(parent, &st)
|
||||||
|
if err == nil && st.Type != unix.CGROUP_SUPER_MAGIC {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
// Avoid infinite recursion.
|
// Treat non-existing directory as cgroupfs as it will be created,
|
||||||
if parent == current {
|
// and the root cpuset directory obviously exists.
|
||||||
return fmt.Errorf("cpuset: cgroup parent path outside cgroup root")
|
if err != nil && err != unix.ENOENT { //nolint:errorlint // unix errors are bare
|
||||||
|
return &os.PathError{Op: "statfs", Path: parent, Err: err}
|
||||||
}
|
}
|
||||||
if err := s.ensureParent(parent, root); err != nil {
|
|
||||||
|
if err := cpusetEnsureParent(parent); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := os.MkdirAll(current, 0755); err != nil {
|
if err := os.Mkdir(current, 0o755); err != nil && !os.IsExist(err) {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return s.copyIfNeeded(current, parent)
|
return cpusetCopyIfNeeded(current, parent)
|
||||||
}
|
}
|
||||||
|
|
||||||
// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
|
// cpusetCopyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
|
||||||
// directory to the current directory if the file's contents are 0
|
// directory to the current directory if the file's contents are 0
|
||||||
func (s *CpusetGroup) copyIfNeeded(current, parent string) error {
|
func cpusetCopyIfNeeded(current, parent string) error {
|
||||||
var (
|
currentCpus, currentMems, err := getCpusetSubsystemSettings(current)
|
||||||
err error
|
if err != nil {
|
||||||
currentCpus, currentMems []byte
|
|
||||||
parentCpus, parentMems []byte
|
|
||||||
)
|
|
||||||
|
|
||||||
if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil {
|
parentCpus, parentMems, err := getCpusetSubsystemSettings(parent)
|
||||||
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if s.isEmpty(currentCpus) {
|
if isEmptyCpuset(currentCpus) {
|
||||||
if err := fscommon.WriteFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
|
if err := cgroups.WriteFile(current, "cpuset.cpus", parentCpus); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if s.isEmpty(currentMems) {
|
if isEmptyCpuset(currentMems) {
|
||||||
if err := fscommon.WriteFile(current, "cpuset.mems", string(parentMems)); err != nil {
|
if err := cgroups.WriteFile(current, "cpuset.mems", parentMems); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CpusetGroup) isEmpty(b []byte) bool {
|
func isEmptyCpuset(str string) bool {
|
||||||
return len(bytes.Trim(b, "\n")) == 0
|
return str == "" || str == "\n"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *CpusetGroup) ensureCpusAndMems(path string, cgroup *configs.Cgroup) error {
|
func (s *CpusetGroup) ensureCpusAndMems(path string, r *configs.Resources) error {
|
||||||
if err := s.Set(path, cgroup); err != nil {
|
if err := s.Set(path, r); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return s.copyIfNeeded(path, filepath.Dir(path))
|
return cpusetCopyIfNeeded(path, filepath.Dir(path))
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,67 +1,242 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestCpusetSetCpus(t *testing.T) {
|
const (
|
||||||
helper := NewCgroupTestUtil("cpuset", t)
|
cpus = "0-2,7,12-14\n"
|
||||||
defer helper.cleanup()
|
cpuExclusive = "1\n"
|
||||||
|
mems = "1-4,6,9\n"
|
||||||
|
memHardwall = "0\n"
|
||||||
|
memExclusive = "0\n"
|
||||||
|
memoryMigrate = "1\n"
|
||||||
|
memorySpreadPage = "0\n"
|
||||||
|
memorySpeadSlab = "1\n"
|
||||||
|
memoryPressure = "34377\n"
|
||||||
|
schedLoadBalance = "1\n"
|
||||||
|
schedRelaxDomainLevel = "-1\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
var cpusetTestFiles = map[string]string{
|
||||||
|
"cpuset.cpus": cpus,
|
||||||
|
"cpuset.cpu_exclusive": cpuExclusive,
|
||||||
|
"cpuset.mems": mems,
|
||||||
|
"cpuset.mem_hardwall": memHardwall,
|
||||||
|
"cpuset.mem_exclusive": memExclusive,
|
||||||
|
"cpuset.memory_migrate": memoryMigrate,
|
||||||
|
"cpuset.memory_spread_page": memorySpreadPage,
|
||||||
|
"cpuset.memory_spread_slab": memorySpeadSlab,
|
||||||
|
"cpuset.memory_pressure": memoryPressure,
|
||||||
|
"cpuset.sched_load_balance": schedLoadBalance,
|
||||||
|
"cpuset.sched_relax_domain_level": schedRelaxDomainLevel,
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCPUSetSetCpus(t *testing.T) {
|
||||||
|
path := tempDir(t, "cpuset")
|
||||||
|
|
||||||
const (
|
const (
|
||||||
cpusBefore = "0"
|
cpusBefore = "0"
|
||||||
cpusAfter = "1-3"
|
cpusAfter = "1-3"
|
||||||
)
|
)
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"cpuset.cpus": cpusBefore,
|
"cpuset.cpus": cpusBefore,
|
||||||
})
|
})
|
||||||
|
|
||||||
helper.CgroupData.config.Resources.CpusetCpus = cpusAfter
|
r := &configs.Resources{
|
||||||
|
CpusetCpus: cpusAfter,
|
||||||
|
}
|
||||||
cpuset := &CpusetGroup{}
|
cpuset := &CpusetGroup{}
|
||||||
if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
if err := cpuset.Set(path, r); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "cpuset.cpus")
|
value, err := fscommon.GetCgroupParamString(path, "cpuset.cpus")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse cpuset.cpus - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if value != cpusAfter {
|
if value != cpusAfter {
|
||||||
t.Fatal("Got the wrong value, set cpuset.cpus failed.")
|
t.Fatal("Got the wrong value, set cpuset.cpus failed.")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCpusetSetMems(t *testing.T) {
|
func TestCPUSetSetMems(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("cpuset", t)
|
path := tempDir(t, "cpuset")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
memsBefore = "0"
|
memsBefore = "0"
|
||||||
memsAfter = "1"
|
memsAfter = "1"
|
||||||
)
|
)
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"cpuset.mems": memsBefore,
|
"cpuset.mems": memsBefore,
|
||||||
})
|
})
|
||||||
|
|
||||||
helper.CgroupData.config.Resources.CpusetMems = memsAfter
|
r := &configs.Resources{
|
||||||
|
CpusetMems: memsAfter,
|
||||||
|
}
|
||||||
cpuset := &CpusetGroup{}
|
cpuset := &CpusetGroup{}
|
||||||
if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
if err := cpuset.Set(path, r); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "cpuset.mems")
|
value, err := fscommon.GetCgroupParamString(path, "cpuset.mems")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse cpuset.mems - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if value != memsAfter {
|
if value != memsAfter {
|
||||||
t.Fatal("Got the wrong value, set cpuset.mems failed.")
|
t.Fatal("Got the wrong value, set cpuset.mems failed.")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCPUSetStatsCorrect(t *testing.T) {
|
||||||
|
path := tempDir(t, "cpuset")
|
||||||
|
writeFileContents(t, path, cpusetTestFiles)
|
||||||
|
|
||||||
|
cpuset := &CpusetGroup{}
|
||||||
|
actualStats := *cgroups.NewStats()
|
||||||
|
err := cpuset.GetStats(path, &actualStats)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
expectedStats := cgroups.CPUSetStats{
|
||||||
|
CPUs: []uint16{0, 1, 2, 7, 12, 13, 14},
|
||||||
|
CPUExclusive: 1,
|
||||||
|
Mems: []uint16{1, 2, 3, 4, 6, 9},
|
||||||
|
MemoryMigrate: 1,
|
||||||
|
MemHardwall: 0,
|
||||||
|
MemExclusive: 0,
|
||||||
|
MemorySpreadPage: 0,
|
||||||
|
MemorySpreadSlab: 1,
|
||||||
|
MemoryPressure: 34377,
|
||||||
|
SchedLoadBalance: 1,
|
||||||
|
SchedRelaxDomainLevel: -1,
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(expectedStats, actualStats.CPUSetStats) {
|
||||||
|
t.Fatalf("Expected Cpuset stats usage %#v but found %#v",
|
||||||
|
expectedStats, actualStats.CPUSetStats)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCPUSetStatsMissingFiles(t *testing.T) {
|
||||||
|
for _, testCase := range []struct {
|
||||||
|
desc string
|
||||||
|
filename, contents string
|
||||||
|
removeFile bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
desc: "empty cpus file",
|
||||||
|
filename: "cpuset.cpus",
|
||||||
|
contents: "",
|
||||||
|
removeFile: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "empty mems file",
|
||||||
|
filename: "cpuset.mems",
|
||||||
|
contents: "",
|
||||||
|
removeFile: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "corrupted cpus file",
|
||||||
|
filename: "cpuset.cpus",
|
||||||
|
contents: "0-3,*4^2",
|
||||||
|
removeFile: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "corrupted mems file",
|
||||||
|
filename: "cpuset.mems",
|
||||||
|
contents: "0,1,2-5,8-7",
|
||||||
|
removeFile: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "missing cpu_exclusive file",
|
||||||
|
filename: "cpuset.cpu_exclusive",
|
||||||
|
contents: "",
|
||||||
|
removeFile: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "missing memory_migrate file",
|
||||||
|
filename: "cpuset.memory_migrate",
|
||||||
|
contents: "",
|
||||||
|
removeFile: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "missing mem_hardwall file",
|
||||||
|
filename: "cpuset.mem_hardwall",
|
||||||
|
contents: "",
|
||||||
|
removeFile: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "missing mem_exclusive file",
|
||||||
|
filename: "cpuset.mem_exclusive",
|
||||||
|
contents: "",
|
||||||
|
removeFile: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "missing memory_spread_page file",
|
||||||
|
filename: "cpuset.memory_spread_page",
|
||||||
|
contents: "",
|
||||||
|
removeFile: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "missing memory_spread_slab file",
|
||||||
|
filename: "cpuset.memory_spread_slab",
|
||||||
|
contents: "",
|
||||||
|
removeFile: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "missing memory_pressure file",
|
||||||
|
filename: "cpuset.memory_pressure",
|
||||||
|
contents: "",
|
||||||
|
removeFile: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "missing sched_load_balance file",
|
||||||
|
filename: "cpuset.sched_load_balance",
|
||||||
|
contents: "",
|
||||||
|
removeFile: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "missing sched_relax_domain_level file",
|
||||||
|
filename: "cpuset.sched_relax_domain_level",
|
||||||
|
contents: "",
|
||||||
|
removeFile: true,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Run(testCase.desc, func(t *testing.T) {
|
||||||
|
path := tempDir(t, "cpuset")
|
||||||
|
|
||||||
|
tempCpusetTestFiles := map[string]string{}
|
||||||
|
for i, v := range cpusetTestFiles {
|
||||||
|
tempCpusetTestFiles[i] = v
|
||||||
|
}
|
||||||
|
|
||||||
|
if testCase.removeFile {
|
||||||
|
delete(tempCpusetTestFiles, testCase.filename)
|
||||||
|
writeFileContents(t, path, tempCpusetTestFiles)
|
||||||
|
cpuset := &CpusetGroup{}
|
||||||
|
actualStats := *cgroups.NewStats()
|
||||||
|
err := cpuset.GetStats(path, &actualStats)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("failed unexpectedly: %q", err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tempCpusetTestFiles[testCase.filename] = testCase.contents
|
||||||
|
writeFileContents(t, path, tempCpusetTestFiles)
|
||||||
|
cpuset := &CpusetGroup{}
|
||||||
|
actualStats := *cgroups.NewStats()
|
||||||
|
err := cpuset.GetStats(path, &actualStats)
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
t.Error("failed to return expected error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -1,81 +1,109 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"errors"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices"
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
"github.com/opencontainers/runc/libcontainer/system"
|
"github.com/opencontainers/runc/libcontainer/devices"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/userns"
|
||||||
)
|
)
|
||||||
|
|
||||||
type DevicesGroup struct {
|
type DevicesGroup struct {
|
||||||
|
TestingSkipFinalCheck bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *DevicesGroup) Name() string {
|
func (s *DevicesGroup) Name() string {
|
||||||
return "devices"
|
return "devices"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *DevicesGroup) Apply(d *cgroupData) error {
|
func (s *DevicesGroup) Apply(path string, r *configs.Resources, pid int) error {
|
||||||
_, err := d.join("devices")
|
if r.SkipDevices {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if path == "" {
|
||||||
|
// Return error here, since devices cgroup
|
||||||
|
// is a hard requirement for container's security.
|
||||||
|
return errSubsystemDoesNotExist
|
||||||
|
}
|
||||||
|
|
||||||
|
return apply(path, pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func loadEmulator(path string) (*cgroupdevices.Emulator, error) {
|
||||||
|
list, err := cgroups.ReadFile(path, "devices.list")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return cgroupdevices.EmulatorFromList(bytes.NewBufferString(list))
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildEmulator(rules []*devices.Rule) (*cgroupdevices.Emulator, error) {
|
||||||
|
// This defaults to a white-list -- which is what we want!
|
||||||
|
emu := &cgroupdevices.Emulator{}
|
||||||
|
for _, rule := range rules {
|
||||||
|
if err := emu.Apply(*rule); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return emu, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *DevicesGroup) Set(path string, r *configs.Resources) error {
|
||||||
|
if userns.RunningInUserNS() || r.SkipDevices {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate two emulators, one for the current state of the cgroup and one
|
||||||
|
// for the requested state by the user.
|
||||||
|
current, err := loadEmulator(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// We will return error even it's `not found` error, devices
|
|
||||||
// cgroup is hard requirement for container's security.
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return nil
|
target, err := buildEmulator(r.Devices)
|
||||||
}
|
if err != nil {
|
||||||
|
return err
|
||||||
func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
|
|
||||||
if system.RunningInUserNS() {
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
devices := cgroup.Resources.Devices
|
// Compute the minimal set of transition rules needed to achieve the
|
||||||
if len(devices) > 0 {
|
// requested state.
|
||||||
for _, dev := range devices {
|
transitionRules, err := current.Transition(target)
|
||||||
file := "devices.deny"
|
if err != nil {
|
||||||
if dev.Allow {
|
return err
|
||||||
file = "devices.allow"
|
|
||||||
}
|
|
||||||
if err := fscommon.WriteFile(path, file, dev.CgroupString()); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
if cgroup.Resources.AllowAllDevices != nil {
|
for _, rule := range transitionRules {
|
||||||
if *cgroup.Resources.AllowAllDevices == false {
|
file := "devices.deny"
|
||||||
if err := fscommon.WriteFile(path, "devices.deny", "a"); err != nil {
|
if rule.Allow {
|
||||||
return err
|
file = "devices.allow"
|
||||||
}
|
|
||||||
|
|
||||||
for _, dev := range cgroup.Resources.AllowedDevices {
|
|
||||||
if err := fscommon.WriteFile(path, "devices.allow", dev.CgroupString()); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
if err := cgroups.WriteFile(path, file, rule.CgroupString()); err != nil {
|
||||||
if err := fscommon.WriteFile(path, "devices.allow", "a"); err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, dev := range cgroup.Resources.DeniedDevices {
|
// Final safety check -- ensure that the resulting state is what was
|
||||||
if err := fscommon.WriteFile(path, "devices.deny", dev.CgroupString()); err != nil {
|
// requested. This is only really correct for white-lists, but for
|
||||||
|
// black-lists we can at least check that the cgroup is in the right mode.
|
||||||
|
//
|
||||||
|
// This safety-check is skipped for the unit tests because we cannot
|
||||||
|
// currently mock devices.list correctly.
|
||||||
|
if !s.TestingSkipFinalCheck {
|
||||||
|
currentAfter, err := loadEmulator(path)
|
||||||
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
if !target.IsBlacklist() && !reflect.DeepEqual(currentAfter, target) {
|
||||||
|
return errors.New("resulting devices cgroup doesn't precisely match target")
|
||||||
|
} else if target.IsBlacklist() != currentAfter.IsBlacklist() {
|
||||||
|
return errors.New("resulting devices cgroup doesn't match target mode")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *DevicesGroup) Remove(d *cgroupData) error {
|
|
||||||
return removePath(d.path("devices"))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error {
|
func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -7,93 +5,48 @@ import (
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
)
|
"github.com/opencontainers/runc/libcontainer/devices"
|
||||||
|
|
||||||
var (
|
|
||||||
allowedDevices = []*configs.Device{
|
|
||||||
{
|
|
||||||
Path: "/dev/zero",
|
|
||||||
Type: 'c',
|
|
||||||
Major: 1,
|
|
||||||
Minor: 5,
|
|
||||||
Permissions: "rwm",
|
|
||||||
FileMode: 0666,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
allowedList = "c 1:5 rwm"
|
|
||||||
deniedDevices = []*configs.Device{
|
|
||||||
{
|
|
||||||
Path: "/dev/null",
|
|
||||||
Type: 'c',
|
|
||||||
Major: 1,
|
|
||||||
Minor: 3,
|
|
||||||
Permissions: "rwm",
|
|
||||||
FileMode: 0666,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
deniedList = "c 1:3 rwm"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestDevicesSetAllow(t *testing.T) {
|
func TestDevicesSetAllow(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("devices", t)
|
path := tempDir(t, "devices")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"devices.deny": "a",
|
"devices.allow": "",
|
||||||
})
|
"devices.deny": "",
|
||||||
allowAllDevices := false
|
"devices.list": "a *:* rwm",
|
||||||
helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices
|
|
||||||
helper.CgroupData.config.Resources.AllowedDevices = allowedDevices
|
|
||||||
devices := &DevicesGroup{}
|
|
||||||
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Failed to parse devices.allow - %s", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if value != allowedList {
|
|
||||||
t.Fatal("Got the wrong value, set devices.allow failed.")
|
|
||||||
}
|
|
||||||
|
|
||||||
// When AllowAllDevices is nil, devices.allow file should not be modified.
|
|
||||||
helper.CgroupData.config.Resources.AllowAllDevices = nil
|
|
||||||
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
value, err = fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Failed to parse devices.allow - %s", err)
|
|
||||||
}
|
|
||||||
if value != allowedList {
|
|
||||||
t.Fatal("devices policy shouldn't have changed on AllowedAllDevices=nil.")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestDevicesSetDeny(t *testing.T) {
|
|
||||||
helper := NewCgroupTestUtil("devices", t)
|
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
|
||||||
"devices.allow": "a",
|
|
||||||
})
|
})
|
||||||
|
|
||||||
allowAllDevices := true
|
r := &configs.Resources{
|
||||||
helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices
|
Devices: []*devices.Rule{
|
||||||
helper.CgroupData.config.Resources.DeniedDevices = deniedDevices
|
{
|
||||||
devices := &DevicesGroup{}
|
Type: devices.CharDevice,
|
||||||
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
Major: 1,
|
||||||
|
Minor: 5,
|
||||||
|
Permissions: devices.Permissions("rwm"),
|
||||||
|
Allow: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
d := &DevicesGroup{TestingSkipFinalCheck: true}
|
||||||
|
if err := d.Set(path, r); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.deny")
|
// The default deny rule must be written.
|
||||||
|
value, err := fscommon.GetCgroupParamString(path, "devices.deny")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse devices.deny - %s", err)
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if value[0] != 'a' {
|
||||||
|
t.Errorf("Got the wrong value (%q), set devices.deny failed.", value)
|
||||||
}
|
}
|
||||||
|
|
||||||
if value != deniedList {
|
// Permitted rule must be written.
|
||||||
t.Fatal("Got the wrong value, set devices.deny failed.")
|
if value, err := fscommon.GetCgroupParamString(path, "devices.allow"); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
} else if value != "c 1:5 rwm" {
|
||||||
|
t.Errorf("Got the wrong value (%q), set devices.allow failed.", value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
)
|
||||||
|
|
||||||
|
type parseError = fscommon.ParseError
|
||||||
|
|
||||||
|
// malformedLine is used by all cgroupfs file parsers that expect a line
|
||||||
|
// in a particular format but get some garbage instead.
|
||||||
|
func malformedLine(path, file, line string) error {
|
||||||
|
return &parseError{Path: path, File: file, Err: fmt.Errorf("malformed line: %s", line)}
|
||||||
|
}
|
|
@ -1,67 +1,158 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
"github.com/sirupsen/logrus"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
|
|
||||||
type FreezerGroup struct {
|
type FreezerGroup struct{}
|
||||||
}
|
|
||||||
|
|
||||||
func (s *FreezerGroup) Name() string {
|
func (s *FreezerGroup) Name() string {
|
||||||
return "freezer"
|
return "freezer"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *FreezerGroup) Apply(d *cgroupData) error {
|
func (s *FreezerGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
_, err := d.join("freezer")
|
return apply(path, pid)
|
||||||
if err != nil && !cgroups.IsNotFound(err) {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
|
func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
|
||||||
switch cgroup.Resources.Freezer {
|
switch r.Freezer {
|
||||||
case configs.Frozen, configs.Thawed:
|
case configs.Frozen:
|
||||||
for {
|
defer func() {
|
||||||
// In case this loop does not exit because it doesn't get the expected
|
if Err != nil {
|
||||||
// state, let's write again this state, hoping it's going to be properly
|
// Freezing failed, and it is bad and dangerous
|
||||||
// set this time. Otherwise, this loop could run infinitely, waiting for
|
// to leave the cgroup in FROZEN or FREEZING
|
||||||
// a state change that would never happen.
|
// state, so (try to) thaw it back.
|
||||||
if err := fscommon.WriteFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
|
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// As per older kernel docs (freezer-subsystem.txt before
|
||||||
|
// kernel commit ef9fe980c6fcc1821), if FREEZING is seen,
|
||||||
|
// userspace should either retry or thaw. While current
|
||||||
|
// kernel cgroup v1 docs no longer mention a need to retry,
|
||||||
|
// even a recent kernel (v5.4, Ubuntu 20.04) can't reliably
|
||||||
|
// freeze a cgroup v1 while new processes keep appearing in it
|
||||||
|
// (either via fork/clone or by writing new PIDs to
|
||||||
|
// cgroup.procs).
|
||||||
|
//
|
||||||
|
// The numbers below are empirically chosen to have a decent
|
||||||
|
// chance to succeed in various scenarios ("runc pause/unpause
|
||||||
|
// with parallel runc exec" and "bare freeze/unfreeze on a very
|
||||||
|
// slow system"), tested on RHEL7 and Ubuntu 20.04 kernels.
|
||||||
|
//
|
||||||
|
// Adding any amount of sleep in between retries did not
|
||||||
|
// increase the chances of successful freeze in "pause/unpause
|
||||||
|
// with parallel exec" reproducer. OTOH, adding an occasional
|
||||||
|
// sleep helped for the case where the system is extremely slow
|
||||||
|
// (CentOS 7 VM on GHA CI).
|
||||||
|
//
|
||||||
|
// Alas, this is still a game of chances, since the real fix
|
||||||
|
// belong to the kernel (cgroup v2 do not have this bug).
|
||||||
|
|
||||||
|
for i := 0; i < 1000; i++ {
|
||||||
|
if i%50 == 49 {
|
||||||
|
// Occasional thaw and sleep improves
|
||||||
|
// the chances to succeed in freezing
|
||||||
|
// in case new processes keep appearing
|
||||||
|
// in the cgroup.
|
||||||
|
_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||||
|
time.Sleep(10 * time.Millisecond)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := cgroups.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
state, err := fscommon.ReadFile(path, "freezer.state")
|
if i%25 == 24 {
|
||||||
|
// Occasional short sleep before reading
|
||||||
|
// the state back also improves the chances to
|
||||||
|
// succeed in freezing in case of a very slow
|
||||||
|
// system.
|
||||||
|
time.Sleep(10 * time.Microsecond)
|
||||||
|
}
|
||||||
|
state, err := cgroups.ReadFile(path, "freezer.state")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) {
|
state = strings.TrimSpace(state)
|
||||||
break
|
switch state {
|
||||||
|
case "FREEZING":
|
||||||
|
continue
|
||||||
|
case string(configs.Frozen):
|
||||||
|
if i > 1 {
|
||||||
|
logrus.Debugf("frozen after %d retries", i)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
default:
|
||||||
|
// should never happen
|
||||||
|
return fmt.Errorf("unexpected state %s while freezing", strings.TrimSpace(state))
|
||||||
}
|
}
|
||||||
|
|
||||||
time.Sleep(1 * time.Millisecond)
|
|
||||||
}
|
}
|
||||||
|
// Despite our best efforts, it got stuck in FREEZING.
|
||||||
|
return errors.New("unable to freeze")
|
||||||
|
case configs.Thawed:
|
||||||
|
return cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
|
||||||
case configs.Undefined:
|
case configs.Undefined:
|
||||||
return nil
|
return nil
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Resources.Freezer))
|
return fmt.Errorf("Invalid argument '%s' to freezer.state", string(r.Freezer))
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *FreezerGroup) Remove(d *cgroupData) error {
|
|
||||||
return removePath(d.path("freezer"))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
|
func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) {
|
||||||
|
for {
|
||||||
|
state, err := cgroups.ReadFile(path, "freezer.state")
|
||||||
|
if err != nil {
|
||||||
|
// If the kernel is too old, then we just treat the freezer as
|
||||||
|
// being in an "undefined" state.
|
||||||
|
if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
|
return configs.Undefined, err
|
||||||
|
}
|
||||||
|
switch strings.TrimSpace(state) {
|
||||||
|
case "THAWED":
|
||||||
|
return configs.Thawed, nil
|
||||||
|
case "FROZEN":
|
||||||
|
// Find out whether the cgroup is frozen directly,
|
||||||
|
// or indirectly via an ancestor.
|
||||||
|
self, err := cgroups.ReadFile(path, "freezer.self_freezing")
|
||||||
|
if err != nil {
|
||||||
|
// If the kernel is too old, then we just treat
|
||||||
|
// it as being frozen.
|
||||||
|
if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.ENODEV) {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
|
return configs.Frozen, err
|
||||||
|
}
|
||||||
|
switch self {
|
||||||
|
case "0\n":
|
||||||
|
return configs.Thawed, nil
|
||||||
|
case "1\n":
|
||||||
|
return configs.Frozen, nil
|
||||||
|
default:
|
||||||
|
return configs.Undefined, fmt.Errorf(`unknown "freezer.self_freezing" state: %q`, self)
|
||||||
|
}
|
||||||
|
case "FREEZING":
|
||||||
|
// Make sure we get a stable freezer state, so retry if the cgroup
|
||||||
|
// is still undergoing freezing. This should be a temporary delay.
|
||||||
|
time.Sleep(1 * time.Millisecond)
|
||||||
|
continue
|
||||||
|
default:
|
||||||
|
return configs.Undefined, fmt.Errorf("unknown freezer.state %q", state)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -10,22 +8,23 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestFreezerSetState(t *testing.T) {
|
func TestFreezerSetState(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("freezer", t)
|
path := tempDir(t, "freezer")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"freezer.state": string(configs.Frozen),
|
"freezer.state": string(configs.Frozen),
|
||||||
})
|
})
|
||||||
|
|
||||||
helper.CgroupData.config.Resources.Freezer = configs.Thawed
|
r := &configs.Resources{
|
||||||
|
Freezer: configs.Thawed,
|
||||||
|
}
|
||||||
freezer := &FreezerGroup{}
|
freezer := &FreezerGroup{}
|
||||||
if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
if err := freezer.Set(path, r); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "freezer.state")
|
value, err := fscommon.GetCgroupParamString(path, "freezer.state")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse freezer.state - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if value != string(configs.Thawed) {
|
if value != string(configs.Thawed) {
|
||||||
t.Fatal("Got the wrong value, set freezer.state failed.")
|
t.Fatal("Got the wrong value, set freezer.state failed.")
|
||||||
|
@ -33,16 +32,15 @@ func TestFreezerSetState(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestFreezerSetInvalidState(t *testing.T) {
|
func TestFreezerSetInvalidState(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("freezer", t)
|
path := tempDir(t, "freezer")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
const (
|
const invalidArg configs.FreezerState = "Invalid"
|
||||||
invalidArg configs.FreezerState = "Invalid"
|
|
||||||
)
|
|
||||||
|
|
||||||
helper.CgroupData.config.Resources.Freezer = invalidArg
|
r := &configs.Resources{
|
||||||
|
Freezer: invalidArg,
|
||||||
|
}
|
||||||
freezer := &FreezerGroup{}
|
freezer := &FreezerGroup{}
|
||||||
if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err == nil {
|
if err := freezer.Set(path, r); err == nil {
|
||||||
t.Fatal("Failed to return invalid argument error")
|
t.Fatal("Failed to return invalid argument error")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,264 @@
|
||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
var subsystems = []subsystem{
|
||||||
|
&CpusetGroup{},
|
||||||
|
&DevicesGroup{},
|
||||||
|
&MemoryGroup{},
|
||||||
|
&CpuGroup{},
|
||||||
|
&CpuacctGroup{},
|
||||||
|
&PidsGroup{},
|
||||||
|
&BlkioGroup{},
|
||||||
|
&HugetlbGroup{},
|
||||||
|
&NetClsGroup{},
|
||||||
|
&NetPrioGroup{},
|
||||||
|
&PerfEventGroup{},
|
||||||
|
&FreezerGroup{},
|
||||||
|
&RdmaGroup{},
|
||||||
|
&NameGroup{GroupName: "name=systemd", Join: true},
|
||||||
|
}
|
||||||
|
|
||||||
|
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
// If using cgroups-hybrid mode then add a "" controller indicating
|
||||||
|
// it should join the cgroups v2.
|
||||||
|
if cgroups.IsCgroup2HybridMode() {
|
||||||
|
subsystems = append(subsystems, &NameGroup{GroupName: "", Join: true})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type subsystem interface {
|
||||||
|
// Name returns the name of the subsystem.
|
||||||
|
Name() string
|
||||||
|
// GetStats fills in the stats for the subsystem.
|
||||||
|
GetStats(path string, stats *cgroups.Stats) error
|
||||||
|
// Apply creates and joins a cgroup, adding pid into it. Some
|
||||||
|
// subsystems use resources to pre-configure the cgroup parents
|
||||||
|
// before creating or joining it.
|
||||||
|
Apply(path string, r *configs.Resources, pid int) error
|
||||||
|
// Set sets the cgroup resources.
|
||||||
|
Set(path string, r *configs.Resources) error
|
||||||
|
}
|
||||||
|
|
||||||
|
type manager struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
cgroups *configs.Cgroup
|
||||||
|
paths map[string]string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewManager(cg *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
|
||||||
|
// Some v1 controllers (cpu, cpuset, and devices) expect
|
||||||
|
// cgroups.Resources to not be nil in Apply.
|
||||||
|
if cg.Resources == nil {
|
||||||
|
return nil, errors.New("cgroup v1 manager needs configs.Resources to be set during manager creation")
|
||||||
|
}
|
||||||
|
if cg.Resources.Unified != nil {
|
||||||
|
return nil, cgroups.ErrV1NoUnified
|
||||||
|
}
|
||||||
|
|
||||||
|
if paths == nil {
|
||||||
|
var err error
|
||||||
|
paths, err = initPaths(cg)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &manager{
|
||||||
|
cgroups: cg,
|
||||||
|
paths: paths,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// isIgnorableError returns whether err is a permission error (in the loose
|
||||||
|
// sense of the word). This includes EROFS (which for an unprivileged user is
|
||||||
|
// basically a permission error) and EACCES (for similar reasons) as well as
|
||||||
|
// the normal EPERM.
|
||||||
|
func isIgnorableError(rootless bool, err error) bool {
|
||||||
|
// We do not ignore errors if we are root.
|
||||||
|
if !rootless {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
// Is it an ordinary EPERM?
|
||||||
|
if errors.Is(err, os.ErrPermission) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Handle some specific syscall errors.
|
||||||
|
var errno unix.Errno
|
||||||
|
if errors.As(err, &errno) {
|
||||||
|
return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) Apply(pid int) (err error) {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
|
||||||
|
c := m.cgroups
|
||||||
|
|
||||||
|
for _, sys := range subsystems {
|
||||||
|
name := sys.Name()
|
||||||
|
p, ok := m.paths[name]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := sys.Apply(p, c.Resources, pid); err != nil {
|
||||||
|
// In the case of rootless (including euid=0 in userns), where an
|
||||||
|
// explicit cgroup path hasn't been set, we don't bail on error in
|
||||||
|
// case of permission problems here, but do delete the path from
|
||||||
|
// the m.paths map, since it is either non-existent and could not
|
||||||
|
// be created, or the pid could not be added to it.
|
||||||
|
//
|
||||||
|
// Cases where limits for the subsystem have been set are handled
|
||||||
|
// later by Set, which fails with a friendly error (see
|
||||||
|
// if path == "" in Set).
|
||||||
|
if isIgnorableError(c.Rootless, err) && c.Path == "" {
|
||||||
|
delete(m.paths, name)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) Destroy() error {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
return cgroups.RemovePaths(m.paths)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) Path(subsys string) string {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
return m.paths[subsys]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetStats() (*cgroups.Stats, error) {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
stats := cgroups.NewStats()
|
||||||
|
for _, sys := range subsystems {
|
||||||
|
path := m.paths[sys.Name()]
|
||||||
|
if path == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err := sys.GetStats(path, stats); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return stats, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) Set(r *configs.Resources) error {
|
||||||
|
if r == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.Unified != nil {
|
||||||
|
return cgroups.ErrV1NoUnified
|
||||||
|
}
|
||||||
|
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
for _, sys := range subsystems {
|
||||||
|
path := m.paths[sys.Name()]
|
||||||
|
if err := sys.Set(path, r); err != nil {
|
||||||
|
// When rootless is true, errors from the device subsystem
|
||||||
|
// are ignored, as it is really not expected to work.
|
||||||
|
if m.cgroups.Rootless && sys.Name() == "devices" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// However, errors from other subsystems are not ignored.
|
||||||
|
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||||
|
if path == "" {
|
||||||
|
// We never created a path for this cgroup, so we cannot set
|
||||||
|
// limits for it (though we have already tried at this point).
|
||||||
|
return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Freeze toggles the container's freezer cgroup depending on the state
|
||||||
|
// provided
|
||||||
|
func (m *manager) Freeze(state configs.FreezerState) error {
|
||||||
|
path := m.Path("freezer")
|
||||||
|
if path == "" {
|
||||||
|
return errors.New("cannot toggle freezer: cgroups not configured for container")
|
||||||
|
}
|
||||||
|
|
||||||
|
prevState := m.cgroups.Resources.Freezer
|
||||||
|
m.cgroups.Resources.Freezer = state
|
||||||
|
freezer := &FreezerGroup{}
|
||||||
|
if err := freezer.Set(path, m.cgroups.Resources); err != nil {
|
||||||
|
m.cgroups.Resources.Freezer = prevState
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetPids() ([]int, error) {
|
||||||
|
return cgroups.GetPids(m.Path("devices"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetAllPids() ([]int, error) {
|
||||||
|
return cgroups.GetAllPids(m.Path("devices"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetPaths() map[string]string {
|
||||||
|
m.mu.Lock()
|
||||||
|
defer m.mu.Unlock()
|
||||||
|
return m.paths
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetCgroups() (*configs.Cgroup, error) {
|
||||||
|
return m.cgroups, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) GetFreezerState() (configs.FreezerState, error) {
|
||||||
|
dir := m.Path("freezer")
|
||||||
|
// If the container doesn't have the freezer cgroup, say it's undefined.
|
||||||
|
if dir == "" {
|
||||||
|
return configs.Undefined, nil
|
||||||
|
}
|
||||||
|
freezer := &FreezerGroup{}
|
||||||
|
return freezer.GetState(dir)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) Exists() bool {
|
||||||
|
return cgroups.PathExists(m.Path("devices"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func OOMKillCount(path string) (uint64, error) {
|
||||||
|
return fscommon.GetValueByKey(path, "memory.oom_control", "oom_kill")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *manager) OOMKillCount() (uint64, error) {
|
||||||
|
c, err := OOMKillCount(m.Path("memory"))
|
||||||
|
// Ignore ENOENT when rootless as it couldn't create cgroup.
|
||||||
|
if err != nil && m.cgroups.Rootless && os.IsNotExist(err) {
|
||||||
|
err = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return c, err
|
||||||
|
}
|
|
@ -0,0 +1,50 @@
|
||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
func BenchmarkGetStats(b *testing.B) {
|
||||||
|
if cgroups.IsCgroup2UnifiedMode() {
|
||||||
|
b.Skip("cgroup v2 is not supported")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unset TestMode as we work with real cgroupfs here,
|
||||||
|
// and we want OpenFile to perform the fstype check.
|
||||||
|
cgroups.TestMode = false
|
||||||
|
defer func() {
|
||||||
|
cgroups.TestMode = true
|
||||||
|
}()
|
||||||
|
|
||||||
|
cg := &configs.Cgroup{
|
||||||
|
Path: "/some/kind/of/a/path/here",
|
||||||
|
Resources: &configs.Resources{},
|
||||||
|
}
|
||||||
|
m, err := NewManager(cg, nil)
|
||||||
|
if err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
err = m.Apply(-1)
|
||||||
|
if err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
_ = m.Destroy()
|
||||||
|
}()
|
||||||
|
|
||||||
|
var st *cgroups.Stats
|
||||||
|
|
||||||
|
b.ResetTimer()
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
st, err = m.GetStats()
|
||||||
|
if err != nil {
|
||||||
|
b.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if st.CpuStats.CpuUsage.TotalUsage != 0 {
|
||||||
|
b.Fatalf("stats: %+v", st)
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,3 +0,0 @@
|
||||||
// +build !linux
|
|
||||||
|
|
||||||
package fs
|
|
|
@ -1,35 +1,26 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
)
|
)
|
||||||
|
|
||||||
type HugetlbGroup struct {
|
type HugetlbGroup struct{}
|
||||||
}
|
|
||||||
|
|
||||||
func (s *HugetlbGroup) Name() string {
|
func (s *HugetlbGroup) Name() string {
|
||||||
return "hugetlb"
|
return "hugetlb"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *HugetlbGroup) Apply(d *cgroupData) error {
|
func (s *HugetlbGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
_, err := d.join("hugetlb")
|
return apply(path, pid)
|
||||||
if err != nil && !cgroups.IsNotFound(err) {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error {
|
func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
|
||||||
for _, hugetlb := range cgroup.Resources.HugetlbLimit {
|
for _, hugetlb := range r.HugetlbLimit {
|
||||||
if err := fscommon.WriteFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
|
if err := cgroups.WriteFile(path, "hugetlb."+hugetlb.Pagesize+".limit_in_bytes", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -37,31 +28,30 @@ func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *HugetlbGroup) Remove(d *cgroupData) error {
|
|
||||||
return removePath(d.path("hugetlb"))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
|
func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
if !cgroups.PathExists(path) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
hugetlbStats := cgroups.HugetlbStats{}
|
hugetlbStats := cgroups.HugetlbStats{}
|
||||||
for _, pageSize := range HugePageSizes {
|
for _, pageSize := range cgroups.HugePageSizes() {
|
||||||
usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".")
|
usage := "hugetlb." + pageSize + ".usage_in_bytes"
|
||||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to parse %s - %v", usage, err)
|
return err
|
||||||
}
|
}
|
||||||
hugetlbStats.Usage = value
|
hugetlbStats.Usage = value
|
||||||
|
|
||||||
maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".")
|
maxUsage := "hugetlb." + pageSize + ".max_usage_in_bytes"
|
||||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to parse %s - %v", maxUsage, err)
|
return err
|
||||||
}
|
}
|
||||||
hugetlbStats.MaxUsage = value
|
hugetlbStats.MaxUsage = value
|
||||||
|
|
||||||
failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".")
|
failcnt := "hugetlb." + pageSize + ".failcnt"
|
||||||
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to parse %s - %v", failcnt, err)
|
return err
|
||||||
}
|
}
|
||||||
hugetlbStats.Failcnt = value
|
hugetlbStats.Failcnt = value
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -18,7 +16,7 @@ const (
|
||||||
hugetlbFailcnt = "100\n"
|
hugetlbFailcnt = "100\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
const (
|
||||||
usage = "hugetlb.%s.usage_in_bytes"
|
usage = "hugetlb.%s.usage_in_bytes"
|
||||||
limit = "hugetlb.%s.limit_in_bytes"
|
limit = "hugetlb.%s.limit_in_bytes"
|
||||||
maxUsage = "hugetlb.%s.max_usage_in_bytes"
|
maxUsage = "hugetlb.%s.max_usage_in_bytes"
|
||||||
|
@ -26,38 +24,38 @@ var (
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestHugetlbSetHugetlb(t *testing.T) {
|
func TestHugetlbSetHugetlb(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("hugetlb", t)
|
path := tempDir(t, "hugetlb")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
hugetlbBefore = 256
|
hugetlbBefore = 256
|
||||||
hugetlbAfter = 512
|
hugetlbAfter = 512
|
||||||
)
|
)
|
||||||
|
|
||||||
for _, pageSize := range HugePageSizes {
|
for _, pageSize := range cgroups.HugePageSizes() {
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
fmt.Sprintf(limit, pageSize): strconv.Itoa(hugetlbBefore),
|
fmt.Sprintf(limit, pageSize): strconv.Itoa(hugetlbBefore),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, pageSize := range HugePageSizes {
|
r := &configs.Resources{}
|
||||||
helper.CgroupData.config.Resources.HugetlbLimit = []*configs.HugepageLimit{
|
for _, pageSize := range cgroups.HugePageSizes() {
|
||||||
|
r.HugetlbLimit = []*configs.HugepageLimit{
|
||||||
{
|
{
|
||||||
Pagesize: pageSize,
|
Pagesize: pageSize,
|
||||||
Limit: hugetlbAfter,
|
Limit: hugetlbAfter,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
hugetlb := &HugetlbGroup{}
|
hugetlb := &HugetlbGroup{}
|
||||||
if err := hugetlb.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
if err := hugetlb.Set(path, r); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, pageSize := range HugePageSizes {
|
for _, pageSize := range cgroups.HugePageSizes() {
|
||||||
limit := fmt.Sprintf(limit, pageSize)
|
limit := fmt.Sprintf(limit, pageSize)
|
||||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, limit)
|
value, err := fscommon.GetCgroupParamUint(path, limit)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse %s - %s", limit, err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if value != hugetlbAfter {
|
if value != hugetlbAfter {
|
||||||
t.Fatalf("Set hugetlb.limit_in_bytes failed. Expected: %v, Got: %v", hugetlbAfter, value)
|
t.Fatalf("Set hugetlb.limit_in_bytes failed. Expected: %v, Got: %v", hugetlbAfter, value)
|
||||||
|
@ -66,10 +64,9 @@ func TestHugetlbSetHugetlb(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestHugetlbStats(t *testing.T) {
|
func TestHugetlbStats(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("hugetlb", t)
|
path := tempDir(t, "hugetlb")
|
||||||
defer helper.cleanup()
|
for _, pageSize := range cgroups.HugePageSizes() {
|
||||||
for _, pageSize := range HugePageSizes {
|
writeFileContents(t, path, map[string]string{
|
||||||
helper.writeFileContents(map[string]string{
|
|
||||||
fmt.Sprintf(usage, pageSize): hugetlbUsageContents,
|
fmt.Sprintf(usage, pageSize): hugetlbUsageContents,
|
||||||
fmt.Sprintf(maxUsage, pageSize): hugetlbMaxUsageContents,
|
fmt.Sprintf(maxUsage, pageSize): hugetlbMaxUsageContents,
|
||||||
fmt.Sprintf(failcnt, pageSize): hugetlbFailcnt,
|
fmt.Sprintf(failcnt, pageSize): hugetlbFailcnt,
|
||||||
|
@ -78,56 +75,50 @@ func TestHugetlbStats(t *testing.T) {
|
||||||
|
|
||||||
hugetlb := &HugetlbGroup{}
|
hugetlb := &HugetlbGroup{}
|
||||||
actualStats := *cgroups.NewStats()
|
actualStats := *cgroups.NewStats()
|
||||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
err := hugetlb.GetStats(path, &actualStats)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
expectedStats := cgroups.HugetlbStats{Usage: 128, MaxUsage: 256, Failcnt: 100}
|
expectedStats := cgroups.HugetlbStats{Usage: 128, MaxUsage: 256, Failcnt: 100}
|
||||||
for _, pageSize := range HugePageSizes {
|
for _, pageSize := range cgroups.HugePageSizes() {
|
||||||
expectHugetlbStatEquals(t, expectedStats, actualStats.HugetlbStats[pageSize])
|
expectHugetlbStatEquals(t, expectedStats, actualStats.HugetlbStats[pageSize])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestHugetlbStatsNoUsageFile(t *testing.T) {
|
func TestHugetlbStatsNoUsageFile(t *testing.T) {
|
||||||
t.Skip("Disabled unreliable test")
|
path := tempDir(t, "hugetlb")
|
||||||
helper := NewCgroupTestUtil("hugetlb", t)
|
writeFileContents(t, path, map[string]string{
|
||||||
defer helper.cleanup()
|
|
||||||
helper.writeFileContents(map[string]string{
|
|
||||||
maxUsage: hugetlbMaxUsageContents,
|
maxUsage: hugetlbMaxUsageContents,
|
||||||
})
|
})
|
||||||
|
|
||||||
hugetlb := &HugetlbGroup{}
|
hugetlb := &HugetlbGroup{}
|
||||||
actualStats := *cgroups.NewStats()
|
actualStats := *cgroups.NewStats()
|
||||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
err := hugetlb.GetStats(path, &actualStats)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatal("Expected failure")
|
t.Fatal("Expected failure")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestHugetlbStatsNoMaxUsageFile(t *testing.T) {
|
func TestHugetlbStatsNoMaxUsageFile(t *testing.T) {
|
||||||
t.Skip("Disabled unreliable test")
|
path := tempDir(t, "hugetlb")
|
||||||
helper := NewCgroupTestUtil("hugetlb", t)
|
for _, pageSize := range cgroups.HugePageSizes() {
|
||||||
defer helper.cleanup()
|
writeFileContents(t, path, map[string]string{
|
||||||
for _, pageSize := range HugePageSizes {
|
|
||||||
helper.writeFileContents(map[string]string{
|
|
||||||
fmt.Sprintf(usage, pageSize): hugetlbUsageContents,
|
fmt.Sprintf(usage, pageSize): hugetlbUsageContents,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
hugetlb := &HugetlbGroup{}
|
hugetlb := &HugetlbGroup{}
|
||||||
actualStats := *cgroups.NewStats()
|
actualStats := *cgroups.NewStats()
|
||||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
err := hugetlb.GetStats(path, &actualStats)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatal("Expected failure")
|
t.Fatal("Expected failure")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestHugetlbStatsBadUsageFile(t *testing.T) {
|
func TestHugetlbStatsBadUsageFile(t *testing.T) {
|
||||||
t.Skip("Disabled unreliable test")
|
path := tempDir(t, "hugetlb")
|
||||||
helper := NewCgroupTestUtil("hugetlb", t)
|
for _, pageSize := range cgroups.HugePageSizes() {
|
||||||
defer helper.cleanup()
|
writeFileContents(t, path, map[string]string{
|
||||||
for _, pageSize := range HugePageSizes {
|
|
||||||
helper.writeFileContents(map[string]string{
|
|
||||||
fmt.Sprintf(usage, pageSize): "bad",
|
fmt.Sprintf(usage, pageSize): "bad",
|
||||||
maxUsage: hugetlbMaxUsageContents,
|
maxUsage: hugetlbMaxUsageContents,
|
||||||
})
|
})
|
||||||
|
@ -135,24 +126,22 @@ t.Skip("Disabled unreliable test")
|
||||||
|
|
||||||
hugetlb := &HugetlbGroup{}
|
hugetlb := &HugetlbGroup{}
|
||||||
actualStats := *cgroups.NewStats()
|
actualStats := *cgroups.NewStats()
|
||||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
err := hugetlb.GetStats(path, &actualStats)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatal("Expected failure")
|
t.Fatal("Expected failure")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestHugetlbStatsBadMaxUsageFile(t *testing.T) {
|
func TestHugetlbStatsBadMaxUsageFile(t *testing.T) {
|
||||||
t.Skip("Disabled unreliable test")
|
path := tempDir(t, "hugetlb")
|
||||||
helper := NewCgroupTestUtil("hugetlb", t)
|
writeFileContents(t, path, map[string]string{
|
||||||
defer helper.cleanup()
|
|
||||||
helper.writeFileContents(map[string]string{
|
|
||||||
usage: hugetlbUsageContents,
|
usage: hugetlbUsageContents,
|
||||||
maxUsage: "bad",
|
maxUsage: "bad",
|
||||||
})
|
})
|
||||||
|
|
||||||
hugetlb := &HugetlbGroup{}
|
hugetlb := &HugetlbGroup{}
|
||||||
actualStats := *cgroups.NewStats()
|
actualStats := *cgroups.NewStats()
|
||||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
err := hugetlb.GetStats(path, &actualStats)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatal("Expected failure")
|
t.Fatal("Expected failure")
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,62 +0,0 @@
|
||||||
// +build linux,!nokmem
|
|
||||||
|
|
||||||
package fs
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"io/ioutil"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strconv"
|
|
||||||
"syscall" // for Errno type only
|
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
|
|
||||||
|
|
||||||
func EnableKernelMemoryAccounting(path string) error {
|
|
||||||
// Ensure that kernel memory is available in this kernel build. If it
|
|
||||||
// isn't, we just ignore it because EnableKernelMemoryAccounting is
|
|
||||||
// automatically called for all memory limits.
|
|
||||||
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
// We have to limit the kernel memory here as it won't be accounted at all
|
|
||||||
// until a limit is set on the cgroup and limit cannot be set once the
|
|
||||||
// cgroup has children, or if there are already tasks in the cgroup.
|
|
||||||
for _, i := range []int64{1, -1} {
|
|
||||||
if err := setKernelMemory(path, i); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func setKernelMemory(path string, kernelMemoryLimit int64) error {
|
|
||||||
if path == "" {
|
|
||||||
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
|
|
||||||
}
|
|
||||||
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
|
|
||||||
// We have specifically been asked to set a kmem limit. If the kernel
|
|
||||||
// doesn't support it we *must* error out.
|
|
||||||
return errors.New("kernel memory accounting not supported by this kernel")
|
|
||||||
}
|
|
||||||
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
|
|
||||||
// Check if the error number returned by the syscall is "EBUSY"
|
|
||||||
// The EBUSY signal is returned on attempts to write to the
|
|
||||||
// memory.kmem.limit_in_bytes file if the cgroup has children or
|
|
||||||
// once tasks have been attached to the cgroup
|
|
||||||
if pathErr, ok := err.(*os.PathError); ok {
|
|
||||||
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
|
|
||||||
if errNo == unix.EBUSY {
|
|
||||||
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
|
@ -1,15 +0,0 @@
|
||||||
// +build linux,nokmem
|
|
||||||
|
|
||||||
package fs
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
)
|
|
||||||
|
|
||||||
func EnableKernelMemoryAccounting(path string) error {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func setKernelMemory(path string, kernelMemoryLimit int64) error {
|
|
||||||
return errors.New("kernel memory accounting disabled in this runc build")
|
|
||||||
}
|
|
|
@ -1,15 +1,17 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
@ -18,65 +20,66 @@ import (
|
||||||
const (
|
const (
|
||||||
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
|
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
|
||||||
cgroupMemoryLimit = "memory.limit_in_bytes"
|
cgroupMemoryLimit = "memory.limit_in_bytes"
|
||||||
|
cgroupMemoryUsage = "memory.usage_in_bytes"
|
||||||
|
cgroupMemoryMaxUsage = "memory.max_usage_in_bytes"
|
||||||
)
|
)
|
||||||
|
|
||||||
type MemoryGroup struct {
|
type MemoryGroup struct{}
|
||||||
}
|
|
||||||
|
|
||||||
func (s *MemoryGroup) Name() string {
|
func (s *MemoryGroup) Name() string {
|
||||||
return "memory"
|
return "memory"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
|
func (s *MemoryGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
path, err := d.path("memory")
|
return apply(path, pid)
|
||||||
if err != nil && !cgroups.IsNotFound(err) {
|
|
||||||
return err
|
|
||||||
} else if path == "" {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if memoryAssigned(d.config) {
|
|
||||||
if _, err := os.Stat(path); os.IsNotExist(err) {
|
|
||||||
if err := os.MkdirAll(path, 0755); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// Only enable kernel memory accouting when this cgroup
|
|
||||||
// is created by libcontainer, otherwise we might get
|
|
||||||
// error when people use `cgroupsPath` to join an existed
|
|
||||||
// cgroup whose kernel memory is not initialized.
|
|
||||||
if err := EnableKernelMemoryAccounting(path); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
if err != nil {
|
|
||||||
os.RemoveAll(path)
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
// We need to join memory cgroup after set memory limits, because
|
|
||||||
// kmem.limit_in_bytes can only be set when the cgroup is empty.
|
|
||||||
_, err = d.join("memory")
|
|
||||||
if err != nil && !cgroups.IsNotFound(err) {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
|
func setMemory(path string, val int64) error {
|
||||||
// If the memory update is set to -1 we should also
|
if val == 0 {
|
||||||
// set swap to -1, it means unlimited memory.
|
return nil
|
||||||
if cgroup.Resources.Memory == -1 {
|
}
|
||||||
|
|
||||||
|
err := cgroups.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10))
|
||||||
|
if !errors.Is(err, unix.EBUSY) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// EBUSY means the kernel can't set new limit as it's too low
|
||||||
|
// (lower than the current usage). Return more specific error.
|
||||||
|
usage, err := fscommon.GetCgroupParamUint(path, cgroupMemoryUsage)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
max, err := fscommon.GetCgroupParamUint(path, cgroupMemoryMaxUsage)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max)
|
||||||
|
}
|
||||||
|
|
||||||
|
func setSwap(path string, val int64) error {
|
||||||
|
if val == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return cgroups.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10))
|
||||||
|
}
|
||||||
|
|
||||||
|
func setMemoryAndSwap(path string, r *configs.Resources) error {
|
||||||
|
// If the memory update is set to -1 and the swap is not explicitly
|
||||||
|
// set, we should also set swap to -1, it means unlimited memory.
|
||||||
|
if r.Memory == -1 && r.MemorySwap == 0 {
|
||||||
// Only set swap if it's enabled in kernel
|
// Only set swap if it's enabled in kernel
|
||||||
if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
|
if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
|
||||||
cgroup.Resources.MemorySwap = -1
|
r.MemorySwap = -1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// When memory and swap memory are both set, we need to handle the cases
|
// When memory and swap memory are both set, we need to handle the cases
|
||||||
// for updating container.
|
// for updating container.
|
||||||
if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 {
|
if r.Memory != 0 && r.MemorySwap != 0 {
|
||||||
memoryUsage, err := getMemoryData(path, "")
|
curLimit, err := fscommon.GetCgroupParamUint(path, cgroupMemoryLimit)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -84,84 +87,61 @@ func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
|
||||||
// When update memory limit, we should adapt the write sequence
|
// When update memory limit, we should adapt the write sequence
|
||||||
// for memory and swap memory, so it won't fail because the new
|
// for memory and swap memory, so it won't fail because the new
|
||||||
// value and the old value don't fit kernel's validation.
|
// value and the old value don't fit kernel's validation.
|
||||||
if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
|
if r.MemorySwap == -1 || curLimit < uint64(r.MemorySwap) {
|
||||||
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
if err := setSwap(path, r.MemorySwap); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
if err := setMemory(path, r.Memory); err != nil {
|
||||||
return err
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if cgroup.Resources.Memory != 0 {
|
|
||||||
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if cgroup.Resources.MemorySwap != 0 {
|
|
||||||
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
if err := setMemory(path, r.Memory); err != nil {
|
||||||
}
|
return err
|
||||||
|
}
|
||||||
func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
|
if err := setSwap(path, r.MemorySwap); err != nil {
|
||||||
if err := setMemoryAndSwap(path, cgroup); err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if cgroup.Resources.KernelMemory != 0 {
|
return nil
|
||||||
if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil {
|
}
|
||||||
|
|
||||||
|
func (s *MemoryGroup) Set(path string, r *configs.Resources) error {
|
||||||
|
if err := setMemoryAndSwap(path, r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// ignore KernelMemory and KernelMemoryTCP
|
||||||
|
|
||||||
|
if r.MemoryReservation != 0 {
|
||||||
|
if err := cgroups.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if cgroup.Resources.MemoryReservation != 0 {
|
if r.OomKillDisable {
|
||||||
if err := fscommon.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
|
if err := cgroups.WriteFile(path, "memory.oom_control", "1"); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if r.MemorySwappiness == nil || int64(*r.MemorySwappiness) == -1 {
|
||||||
if cgroup.Resources.KernelMemoryTCP != 0 {
|
|
||||||
if err := fscommon.WriteFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if cgroup.Resources.OomKillDisable {
|
|
||||||
if err := fscommon.WriteFile(path, "memory.oom_control", "1"); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
|
|
||||||
return nil
|
return nil
|
||||||
} else if *cgroup.Resources.MemorySwappiness <= 100 {
|
} else if *r.MemorySwappiness <= 100 {
|
||||||
if err := fscommon.WriteFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
|
if err := cgroups.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *cgroup.Resources.MemorySwappiness)
|
return fmt.Errorf("invalid memory swappiness value: %d (valid range is 0-100)", *r.MemorySwappiness)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *MemoryGroup) Remove(d *cgroupData) error {
|
|
||||||
return removePath(d.path("memory"))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
// Set stats from memory.stat.
|
const file = "memory.stat"
|
||||||
statsFile, err := os.Open(filepath.Join(path, "memory.stat"))
|
statsFile, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if os.IsNotExist(err) {
|
if os.IsNotExist(err) {
|
||||||
return nil
|
return nil
|
||||||
|
@ -172,9 +152,9 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
|
||||||
sc := bufio.NewScanner(statsFile)
|
sc := bufio.NewScanner(statsFile)
|
||||||
for sc.Scan() {
|
for sc.Scan() {
|
||||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
t, v, err := fscommon.ParseKeyValue(sc.Text())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err)
|
return &parseError{Path: path, File: file, Err: err}
|
||||||
}
|
}
|
||||||
stats.MemoryStats.Stats[t] = v
|
stats.MemoryStats.Stats[t] = v
|
||||||
}
|
}
|
||||||
|
@ -201,25 +181,21 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
}
|
}
|
||||||
stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
|
stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
|
||||||
|
|
||||||
useHierarchy := strings.Join([]string{"memory", "use_hierarchy"}, ".")
|
value, err := fscommon.GetCgroupParamUint(path, "memory.use_hierarchy")
|
||||||
value, err := fscommon.GetCgroupParamUint(path, useHierarchy)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if value == 1 {
|
if value == 1 {
|
||||||
stats.MemoryStats.UseHierarchy = true
|
stats.MemoryStats.UseHierarchy = true
|
||||||
}
|
}
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func memoryAssigned(cgroup *configs.Cgroup) bool {
|
pagesByNUMA, err := getPageUsageByNUMA(path)
|
||||||
return cgroup.Resources.Memory != 0 ||
|
if err != nil {
|
||||||
cgroup.Resources.MemoryReservation != 0 ||
|
return err
|
||||||
cgroup.Resources.MemorySwap > 0 ||
|
}
|
||||||
cgroup.Resources.KernelMemory > 0 ||
|
stats.MemoryStats.PageUsageByNUMA = pagesByNUMA
|
||||||
cgroup.Resources.KernelMemoryTCP > 0 ||
|
|
||||||
cgroup.Resources.OomKillDisable ||
|
return nil
|
||||||
(cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
||||||
|
@ -227,45 +203,146 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
||||||
|
|
||||||
moduleName := "memory"
|
moduleName := "memory"
|
||||||
if name != "" {
|
if name != "" {
|
||||||
moduleName = strings.Join([]string{"memory", name}, ".")
|
moduleName = "memory." + name
|
||||||
}
|
}
|
||||||
usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".")
|
var (
|
||||||
maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".")
|
usage = moduleName + ".usage_in_bytes"
|
||||||
failcnt := strings.Join([]string{moduleName, "failcnt"}, ".")
|
maxUsage = moduleName + ".max_usage_in_bytes"
|
||||||
limit := strings.Join([]string{moduleName, "limit_in_bytes"}, ".")
|
failcnt = moduleName + ".failcnt"
|
||||||
|
limit = moduleName + ".limit_in_bytes"
|
||||||
|
)
|
||||||
|
|
||||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if moduleName != "memory" && os.IsNotExist(err) {
|
if name != "" && os.IsNotExist(err) {
|
||||||
|
// Ignore ENOENT as swap and kmem controllers
|
||||||
|
// are optional in the kernel.
|
||||||
return cgroups.MemoryData{}, nil
|
return cgroups.MemoryData{}, nil
|
||||||
}
|
}
|
||||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err)
|
return cgroups.MemoryData{}, err
|
||||||
}
|
}
|
||||||
memoryData.Usage = value
|
memoryData.Usage = value
|
||||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if moduleName != "memory" && os.IsNotExist(err) {
|
return cgroups.MemoryData{}, err
|
||||||
return cgroups.MemoryData{}, nil
|
|
||||||
}
|
|
||||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err)
|
|
||||||
}
|
}
|
||||||
memoryData.MaxUsage = value
|
memoryData.MaxUsage = value
|
||||||
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if moduleName != "memory" && os.IsNotExist(err) {
|
return cgroups.MemoryData{}, err
|
||||||
return cgroups.MemoryData{}, nil
|
|
||||||
}
|
|
||||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err)
|
|
||||||
}
|
}
|
||||||
memoryData.Failcnt = value
|
memoryData.Failcnt = value
|
||||||
value, err = fscommon.GetCgroupParamUint(path, limit)
|
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if moduleName != "memory" && os.IsNotExist(err) {
|
return cgroups.MemoryData{}, err
|
||||||
return cgroups.MemoryData{}, nil
|
|
||||||
}
|
|
||||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err)
|
|
||||||
}
|
}
|
||||||
memoryData.Limit = value
|
memoryData.Limit = value
|
||||||
|
|
||||||
return memoryData, nil
|
return memoryData, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getPageUsageByNUMA(path string) (cgroups.PageUsageByNUMA, error) {
|
||||||
|
const (
|
||||||
|
maxColumns = math.MaxUint8 + 1
|
||||||
|
file = "memory.numa_stat"
|
||||||
|
)
|
||||||
|
stats := cgroups.PageUsageByNUMA{}
|
||||||
|
|
||||||
|
fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return stats, nil
|
||||||
|
} else if err != nil {
|
||||||
|
return stats, err
|
||||||
|
}
|
||||||
|
defer fd.Close()
|
||||||
|
|
||||||
|
// File format is documented in linux/Documentation/cgroup-v1/memory.txt
|
||||||
|
// and it looks like this:
|
||||||
|
//
|
||||||
|
// total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||||
|
// file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||||
|
// anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||||
|
// unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||||
|
// hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(fd)
|
||||||
|
for scanner.Scan() {
|
||||||
|
var field *cgroups.PageStats
|
||||||
|
|
||||||
|
line := scanner.Text()
|
||||||
|
columns := strings.SplitN(line, " ", maxColumns)
|
||||||
|
for i, column := range columns {
|
||||||
|
byNode := strings.SplitN(column, "=", 2)
|
||||||
|
// Some custom kernels have non-standard fields, like
|
||||||
|
// numa_locality 0 0 0 0 0 0 0 0 0 0
|
||||||
|
// numa_exectime 0
|
||||||
|
if len(byNode) < 2 {
|
||||||
|
if i == 0 {
|
||||||
|
// Ignore/skip those.
|
||||||
|
break
|
||||||
|
} else {
|
||||||
|
// The first column was already validated,
|
||||||
|
// so be strict to the rest.
|
||||||
|
return stats, malformedLine(path, file, line)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
key, val := byNode[0], byNode[1]
|
||||||
|
if i == 0 { // First column: key is name, val is total.
|
||||||
|
field = getNUMAField(&stats, key)
|
||||||
|
if field == nil { // unknown field (new kernel?)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
field.Total, err = strconv.ParseUint(val, 0, 64)
|
||||||
|
if err != nil {
|
||||||
|
return stats, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
field.Nodes = map[uint8]uint64{}
|
||||||
|
} else { // Subsequent columns: key is N<id>, val is usage.
|
||||||
|
if len(key) < 2 || key[0] != 'N' {
|
||||||
|
// This is definitely an error.
|
||||||
|
return stats, malformedLine(path, file, line)
|
||||||
|
}
|
||||||
|
|
||||||
|
n, err := strconv.ParseUint(key[1:], 10, 8)
|
||||||
|
if err != nil {
|
||||||
|
return stats, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
|
||||||
|
usage, err := strconv.ParseUint(val, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return stats, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
|
||||||
|
field.Nodes[uint8(n)] = usage
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
return cgroups.PageUsageByNUMA{}, &parseError{Path: path, File: file, Err: err}
|
||||||
|
}
|
||||||
|
|
||||||
|
return stats, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getNUMAField(stats *cgroups.PageUsageByNUMA, name string) *cgroups.PageStats {
|
||||||
|
switch name {
|
||||||
|
case "total":
|
||||||
|
return &stats.Total
|
||||||
|
case "file":
|
||||||
|
return &stats.File
|
||||||
|
case "anon":
|
||||||
|
return &stats.Anon
|
||||||
|
case "unevictable":
|
||||||
|
return &stats.Unevictable
|
||||||
|
case "hierarchical_total":
|
||||||
|
return &stats.Hierarchical.Total
|
||||||
|
case "hierarchical_file":
|
||||||
|
return &stats.Hierarchical.File
|
||||||
|
case "hierarchical_anon":
|
||||||
|
return &stats.Hierarchical.Anon
|
||||||
|
case "hierarchical_unevictable":
|
||||||
|
return &stats.Hierarchical.Unevictable
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -8,6 +6,7 @@ import (
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -18,11 +17,29 @@ rss 1024`
|
||||||
memoryFailcnt = "100\n"
|
memoryFailcnt = "100\n"
|
||||||
memoryLimitContents = "8192\n"
|
memoryLimitContents = "8192\n"
|
||||||
memoryUseHierarchyContents = "1\n"
|
memoryUseHierarchyContents = "1\n"
|
||||||
|
memoryNUMAStatContents = `total=44611 N0=32631 N1=7501 N2=1982 N3=2497
|
||||||
|
file=44428 N0=32614 N1=7335 N2=1982 N3=2497
|
||||||
|
anon=183 N0=17 N1=166 N2=0 N3=0
|
||||||
|
unevictable=0 N0=0 N1=0 N2=0 N3=0
|
||||||
|
hierarchical_total=768133 N0=509113 N1=138887 N2=20464 N3=99669
|
||||||
|
hierarchical_file=722017 N0=496516 N1=119997 N2=20181 N3=85323
|
||||||
|
hierarchical_anon=46096 N0=12597 N1=18890 N2=283 N3=14326
|
||||||
|
hierarchical_unevictable=20 N0=0 N1=0 N2=0 N3=20
|
||||||
|
`
|
||||||
|
memoryNUMAStatNoHierarchyContents = `total=44611 N0=32631 N1=7501 N2=1982 N3=2497
|
||||||
|
file=44428 N0=32614 N1=7335 N2=1982 N3=2497
|
||||||
|
anon=183 N0=17 N1=166 N2=0 N3=0
|
||||||
|
unevictable=0 N0=0 N1=0 N2=0 N3=0
|
||||||
|
`
|
||||||
|
// Some custom kernels has extra fields that should be ignored
|
||||||
|
memoryNUMAStatExtraContents = `numa_locality 0 0 0 0 0 0 0 0 0 0
|
||||||
|
numa_exectime 0
|
||||||
|
whatever=100 N0=0
|
||||||
|
`
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestMemorySetMemory(t *testing.T) {
|
func TestMemorySetMemory(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("memory", t)
|
path := tempDir(t, "memory")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
memoryBefore = 314572800 // 300M
|
memoryBefore = 314572800 // 300M
|
||||||
|
@ -31,29 +48,31 @@ func TestMemorySetMemory(t *testing.T) {
|
||||||
reservationAfter = 314572800 // 300M
|
reservationAfter = 314572800 // 300M
|
||||||
)
|
)
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
|
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
|
||||||
"memory.soft_limit_in_bytes": strconv.Itoa(reservationBefore),
|
"memory.soft_limit_in_bytes": strconv.Itoa(reservationBefore),
|
||||||
})
|
})
|
||||||
|
|
||||||
helper.CgroupData.config.Resources.Memory = memoryAfter
|
r := &configs.Resources{
|
||||||
helper.CgroupData.config.Resources.MemoryReservation = reservationAfter
|
Memory: memoryAfter,
|
||||||
|
MemoryReservation: reservationAfter,
|
||||||
|
}
|
||||||
memory := &MemoryGroup{}
|
memory := &MemoryGroup{}
|
||||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
if err := memory.Set(path, r); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
|
value, err := fscommon.GetCgroupParamUint(path, "memory.limit_in_bytes")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if value != memoryAfter {
|
if value != memoryAfter {
|
||||||
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
|
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
|
||||||
}
|
}
|
||||||
|
|
||||||
value, err = fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.soft_limit_in_bytes")
|
value, err = fscommon.GetCgroupParamUint(path, "memory.soft_limit_in_bytes")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse memory.soft_limit_in_bytes - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if value != reservationAfter {
|
if value != reservationAfter {
|
||||||
t.Fatal("Got the wrong value, set memory.soft_limit_in_bytes failed.")
|
t.Fatal("Got the wrong value, set memory.soft_limit_in_bytes failed.")
|
||||||
|
@ -61,27 +80,28 @@ func TestMemorySetMemory(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMemorySetMemoryswap(t *testing.T) {
|
func TestMemorySetMemoryswap(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("memory", t)
|
path := tempDir(t, "memory")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
memoryswapBefore = 314572800 // 300M
|
memoryswapBefore = 314572800 // 300M
|
||||||
memoryswapAfter = 524288000 // 500M
|
memoryswapAfter = 524288000 // 500M
|
||||||
)
|
)
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
|
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
|
||||||
})
|
})
|
||||||
|
|
||||||
helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
|
r := &configs.Resources{
|
||||||
|
MemorySwap: memoryswapAfter,
|
||||||
|
}
|
||||||
memory := &MemoryGroup{}
|
memory := &MemoryGroup{}
|
||||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
if err := memory.Set(path, r); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
|
value, err := fscommon.GetCgroupParamUint(path, "memory.memsw.limit_in_bytes")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if value != memoryswapAfter {
|
if value != memoryswapAfter {
|
||||||
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
|
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
|
||||||
|
@ -89,8 +109,7 @@ func TestMemorySetMemoryswap(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMemorySetMemoryLargerThanSwap(t *testing.T) {
|
func TestMemorySetMemoryLargerThanSwap(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("memory", t)
|
path := tempDir(t, "memory")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
memoryBefore = 314572800 // 300M
|
memoryBefore = 314572800 // 300M
|
||||||
|
@ -99,7 +118,7 @@ func TestMemorySetMemoryLargerThanSwap(t *testing.T) {
|
||||||
memoryswapAfter = 838860800 // 800M
|
memoryswapAfter = 838860800 // 800M
|
||||||
)
|
)
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
|
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
|
||||||
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
|
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
|
||||||
// Set will call getMemoryData when memory and swap memory are
|
// Set will call getMemoryData when memory and swap memory are
|
||||||
|
@ -109,23 +128,26 @@ func TestMemorySetMemoryLargerThanSwap(t *testing.T) {
|
||||||
"memory.failcnt": "0",
|
"memory.failcnt": "0",
|
||||||
})
|
})
|
||||||
|
|
||||||
helper.CgroupData.config.Resources.Memory = memoryAfter
|
r := &configs.Resources{
|
||||||
helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
|
Memory: memoryAfter,
|
||||||
|
MemorySwap: memoryswapAfter,
|
||||||
|
}
|
||||||
memory := &MemoryGroup{}
|
memory := &MemoryGroup{}
|
||||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
if err := memory.Set(path, r); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
|
value, err := fscommon.GetCgroupParamUint(path, "memory.limit_in_bytes")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if value != memoryAfter {
|
if value != memoryAfter {
|
||||||
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
|
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
|
||||||
}
|
}
|
||||||
value, err = fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
|
|
||||||
|
value, err = fscommon.GetCgroupParamUint(path, "memory.memsw.limit_in_bytes")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if value != memoryswapAfter {
|
if value != memoryswapAfter {
|
||||||
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
|
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
|
||||||
|
@ -133,8 +155,7 @@ func TestMemorySetMemoryLargerThanSwap(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMemorySetSwapSmallerThanMemory(t *testing.T) {
|
func TestMemorySetSwapSmallerThanMemory(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("memory", t)
|
path := tempDir(t, "memory")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
memoryBefore = 629145600 // 600M
|
memoryBefore = 629145600 // 600M
|
||||||
|
@ -143,115 +164,58 @@ func TestMemorySetSwapSmallerThanMemory(t *testing.T) {
|
||||||
memoryswapAfter = 524288000 // 500M
|
memoryswapAfter = 524288000 // 500M
|
||||||
)
|
)
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
|
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
|
||||||
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
|
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
|
||||||
// Set will call getMemoryData when memory and swap memory are
|
|
||||||
// both set, fake these fields so we don't get error.
|
|
||||||
"memory.usage_in_bytes": "0",
|
|
||||||
"memory.max_usage_in_bytes": "0",
|
|
||||||
"memory.failcnt": "0",
|
|
||||||
})
|
})
|
||||||
|
|
||||||
helper.CgroupData.config.Resources.Memory = memoryAfter
|
r := &configs.Resources{
|
||||||
helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
|
Memory: memoryAfter,
|
||||||
|
MemorySwap: memoryswapAfter,
|
||||||
|
}
|
||||||
memory := &MemoryGroup{}
|
memory := &MemoryGroup{}
|
||||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
if err := memory.Set(path, r); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
|
value, err := fscommon.GetCgroupParamUint(path, "memory.limit_in_bytes")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if value != memoryAfter {
|
if value != memoryAfter {
|
||||||
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
|
t.Fatalf("Got the wrong value (%d != %d), set memory.limit_in_bytes failed", value, memoryAfter)
|
||||||
}
|
}
|
||||||
value, err = fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
|
|
||||||
|
value, err = fscommon.GetCgroupParamUint(path, "memory.memsw.limit_in_bytes")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if value != memoryswapAfter {
|
if value != memoryswapAfter {
|
||||||
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
|
t.Fatalf("Got the wrong value (%d != %d), set memory.memsw.limit_in_bytes failed", value, memoryswapAfter)
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestMemorySetKernelMemory(t *testing.T) {
|
|
||||||
helper := NewCgroupTestUtil("memory", t)
|
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
const (
|
|
||||||
kernelMemoryBefore = 314572800 // 300M
|
|
||||||
kernelMemoryAfter = 524288000 // 500M
|
|
||||||
)
|
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
|
||||||
"memory.kmem.limit_in_bytes": strconv.Itoa(kernelMemoryBefore),
|
|
||||||
})
|
|
||||||
|
|
||||||
helper.CgroupData.config.Resources.KernelMemory = kernelMemoryAfter
|
|
||||||
memory := &MemoryGroup{}
|
|
||||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.kmem.limit_in_bytes")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Failed to parse memory.kmem.limit_in_bytes - %s", err)
|
|
||||||
}
|
|
||||||
if value != kernelMemoryAfter {
|
|
||||||
t.Fatal("Got the wrong value, set memory.kmem.limit_in_bytes failed.")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestMemorySetKernelMemoryTCP(t *testing.T) {
|
|
||||||
helper := NewCgroupTestUtil("memory", t)
|
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
const (
|
|
||||||
kernelMemoryTCPBefore = 314572800 // 300M
|
|
||||||
kernelMemoryTCPAfter = 524288000 // 500M
|
|
||||||
)
|
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
|
||||||
"memory.kmem.tcp.limit_in_bytes": strconv.Itoa(kernelMemoryTCPBefore),
|
|
||||||
})
|
|
||||||
|
|
||||||
helper.CgroupData.config.Resources.KernelMemoryTCP = kernelMemoryTCPAfter
|
|
||||||
memory := &MemoryGroup{}
|
|
||||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.kmem.tcp.limit_in_bytes")
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("Failed to parse memory.kmem.tcp.limit_in_bytes - %s", err)
|
|
||||||
}
|
|
||||||
if value != kernelMemoryTCPAfter {
|
|
||||||
t.Fatal("Got the wrong value, set memory.kmem.tcp.limit_in_bytes failed.")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMemorySetMemorySwappinessDefault(t *testing.T) {
|
func TestMemorySetMemorySwappinessDefault(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("memory", t)
|
path := tempDir(t, "memory")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
swappinessBefore := 60 //default is 60
|
swappinessBefore := 60 // default is 60
|
||||||
swappinessAfter := uint64(0)
|
swappinessAfter := uint64(0)
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"memory.swappiness": strconv.Itoa(swappinessBefore),
|
"memory.swappiness": strconv.Itoa(swappinessBefore),
|
||||||
})
|
})
|
||||||
|
|
||||||
helper.CgroupData.config.Resources.MemorySwappiness = &swappinessAfter
|
r := &configs.Resources{
|
||||||
|
MemorySwappiness: &swappinessAfter,
|
||||||
|
}
|
||||||
memory := &MemoryGroup{}
|
memory := &MemoryGroup{}
|
||||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
if err := memory.Set(path, r); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.swappiness")
|
value, err := fscommon.GetCgroupParamUint(path, "memory.swappiness")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse memory.swappiness - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if value != swappinessAfter {
|
if value != swappinessAfter {
|
||||||
t.Fatalf("Got the wrong value (%d), set memory.swappiness = %d failed.", value, swappinessAfter)
|
t.Fatalf("Got the wrong value (%d), set memory.swappiness = %d failed.", value, swappinessAfter)
|
||||||
|
@ -259,9 +223,8 @@ func TestMemorySetMemorySwappinessDefault(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMemoryStats(t *testing.T) {
|
func TestMemoryStats(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("memory", t)
|
path := tempDir(t, "memory")
|
||||||
defer helper.cleanup()
|
writeFileContents(t, path, map[string]string{
|
||||||
helper.writeFileContents(map[string]string{
|
|
||||||
"memory.stat": memoryStatContents,
|
"memory.stat": memoryStatContents,
|
||||||
"memory.usage_in_bytes": memoryUsageContents,
|
"memory.usage_in_bytes": memoryUsageContents,
|
||||||
"memory.limit_in_bytes": memoryLimitContents,
|
"memory.limit_in_bytes": memoryLimitContents,
|
||||||
|
@ -276,22 +239,43 @@ func TestMemoryStats(t *testing.T) {
|
||||||
"memory.kmem.failcnt": memoryFailcnt,
|
"memory.kmem.failcnt": memoryFailcnt,
|
||||||
"memory.kmem.limit_in_bytes": memoryLimitContents,
|
"memory.kmem.limit_in_bytes": memoryLimitContents,
|
||||||
"memory.use_hierarchy": memoryUseHierarchyContents,
|
"memory.use_hierarchy": memoryUseHierarchyContents,
|
||||||
|
"memory.numa_stat": memoryNUMAStatContents + memoryNUMAStatExtraContents,
|
||||||
})
|
})
|
||||||
|
|
||||||
memory := &MemoryGroup{}
|
memory := &MemoryGroup{}
|
||||||
actualStats := *cgroups.NewStats()
|
actualStats := *cgroups.NewStats()
|
||||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
err := memory.GetStats(path, &actualStats)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
expectedStats := cgroups.MemoryStats{Cache: 512, Usage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, SwapUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, KernelUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, Stats: map[string]uint64{"cache": 512, "rss": 1024}, UseHierarchy: true}
|
expectedStats := cgroups.MemoryStats{
|
||||||
|
Cache: 512,
|
||||||
|
Usage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192},
|
||||||
|
SwapUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192},
|
||||||
|
KernelUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192},
|
||||||
|
Stats: map[string]uint64{"cache": 512, "rss": 1024},
|
||||||
|
UseHierarchy: true,
|
||||||
|
PageUsageByNUMA: cgroups.PageUsageByNUMA{
|
||||||
|
PageUsageByNUMAInner: cgroups.PageUsageByNUMAInner{
|
||||||
|
Total: cgroups.PageStats{Total: 44611, Nodes: map[uint8]uint64{0: 32631, 1: 7501, 2: 1982, 3: 2497}},
|
||||||
|
File: cgroups.PageStats{Total: 44428, Nodes: map[uint8]uint64{0: 32614, 1: 7335, 2: 1982, 3: 2497}},
|
||||||
|
Anon: cgroups.PageStats{Total: 183, Nodes: map[uint8]uint64{0: 17, 1: 166, 2: 0, 3: 0}},
|
||||||
|
Unevictable: cgroups.PageStats{Total: 0, Nodes: map[uint8]uint64{0: 0, 1: 0, 2: 0, 3: 0}},
|
||||||
|
},
|
||||||
|
Hierarchical: cgroups.PageUsageByNUMAInner{
|
||||||
|
Total: cgroups.PageStats{Total: 768133, Nodes: map[uint8]uint64{0: 509113, 1: 138887, 2: 20464, 3: 99669}},
|
||||||
|
File: cgroups.PageStats{Total: 722017, Nodes: map[uint8]uint64{0: 496516, 1: 119997, 2: 20181, 3: 85323}},
|
||||||
|
Anon: cgroups.PageStats{Total: 46096, Nodes: map[uint8]uint64{0: 12597, 1: 18890, 2: 283, 3: 14326}},
|
||||||
|
Unevictable: cgroups.PageStats{Total: 20, Nodes: map[uint8]uint64{0: 0, 1: 0, 2: 0, 3: 20}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
expectMemoryStatEquals(t, expectedStats, actualStats.MemoryStats)
|
expectMemoryStatEquals(t, expectedStats, actualStats.MemoryStats)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMemoryStatsNoStatFile(t *testing.T) {
|
func TestMemoryStatsNoStatFile(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("memory", t)
|
path := tempDir(t, "memory")
|
||||||
defer helper.cleanup()
|
writeFileContents(t, path, map[string]string{
|
||||||
helper.writeFileContents(map[string]string{
|
|
||||||
"memory.usage_in_bytes": memoryUsageContents,
|
"memory.usage_in_bytes": memoryUsageContents,
|
||||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||||
"memory.limit_in_bytes": memoryLimitContents,
|
"memory.limit_in_bytes": memoryLimitContents,
|
||||||
|
@ -299,16 +283,15 @@ func TestMemoryStatsNoStatFile(t *testing.T) {
|
||||||
|
|
||||||
memory := &MemoryGroup{}
|
memory := &MemoryGroup{}
|
||||||
actualStats := *cgroups.NewStats()
|
actualStats := *cgroups.NewStats()
|
||||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
err := memory.GetStats(path, &actualStats)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMemoryStatsNoUsageFile(t *testing.T) {
|
func TestMemoryStatsNoUsageFile(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("memory", t)
|
path := tempDir(t, "memory")
|
||||||
defer helper.cleanup()
|
writeFileContents(t, path, map[string]string{
|
||||||
helper.writeFileContents(map[string]string{
|
|
||||||
"memory.stat": memoryStatContents,
|
"memory.stat": memoryStatContents,
|
||||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||||
"memory.limit_in_bytes": memoryLimitContents,
|
"memory.limit_in_bytes": memoryLimitContents,
|
||||||
|
@ -316,16 +299,15 @@ func TestMemoryStatsNoUsageFile(t *testing.T) {
|
||||||
|
|
||||||
memory := &MemoryGroup{}
|
memory := &MemoryGroup{}
|
||||||
actualStats := *cgroups.NewStats()
|
actualStats := *cgroups.NewStats()
|
||||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
err := memory.GetStats(path, &actualStats)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatal("Expected failure")
|
t.Fatal("Expected failure")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMemoryStatsNoMaxUsageFile(t *testing.T) {
|
func TestMemoryStatsNoMaxUsageFile(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("memory", t)
|
path := tempDir(t, "memory")
|
||||||
defer helper.cleanup()
|
writeFileContents(t, path, map[string]string{
|
||||||
helper.writeFileContents(map[string]string{
|
|
||||||
"memory.stat": memoryStatContents,
|
"memory.stat": memoryStatContents,
|
||||||
"memory.usage_in_bytes": memoryUsageContents,
|
"memory.usage_in_bytes": memoryUsageContents,
|
||||||
"memory.limit_in_bytes": memoryLimitContents,
|
"memory.limit_in_bytes": memoryLimitContents,
|
||||||
|
@ -333,16 +315,15 @@ func TestMemoryStatsNoMaxUsageFile(t *testing.T) {
|
||||||
|
|
||||||
memory := &MemoryGroup{}
|
memory := &MemoryGroup{}
|
||||||
actualStats := *cgroups.NewStats()
|
actualStats := *cgroups.NewStats()
|
||||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
err := memory.GetStats(path, &actualStats)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatal("Expected failure")
|
t.Fatal("Expected failure")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMemoryStatsNoLimitInBytesFile(t *testing.T) {
|
func TestMemoryStatsNoLimitInBytesFile(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("memory", t)
|
path := tempDir(t, "memory")
|
||||||
defer helper.cleanup()
|
writeFileContents(t, path, map[string]string{
|
||||||
helper.writeFileContents(map[string]string{
|
|
||||||
"memory.stat": memoryStatContents,
|
"memory.stat": memoryStatContents,
|
||||||
"memory.usage_in_bytes": memoryUsageContents,
|
"memory.usage_in_bytes": memoryUsageContents,
|
||||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||||
|
@ -350,16 +331,15 @@ func TestMemoryStatsNoLimitInBytesFile(t *testing.T) {
|
||||||
|
|
||||||
memory := &MemoryGroup{}
|
memory := &MemoryGroup{}
|
||||||
actualStats := *cgroups.NewStats()
|
actualStats := *cgroups.NewStats()
|
||||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
err := memory.GetStats(path, &actualStats)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatal("Expected failure")
|
t.Fatal("Expected failure")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMemoryStatsBadStatFile(t *testing.T) {
|
func TestMemoryStatsBadStatFile(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("memory", t)
|
path := tempDir(t, "memory")
|
||||||
defer helper.cleanup()
|
writeFileContents(t, path, map[string]string{
|
||||||
helper.writeFileContents(map[string]string{
|
|
||||||
"memory.stat": "rss rss",
|
"memory.stat": "rss rss",
|
||||||
"memory.usage_in_bytes": memoryUsageContents,
|
"memory.usage_in_bytes": memoryUsageContents,
|
||||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||||
|
@ -368,16 +348,15 @@ func TestMemoryStatsBadStatFile(t *testing.T) {
|
||||||
|
|
||||||
memory := &MemoryGroup{}
|
memory := &MemoryGroup{}
|
||||||
actualStats := *cgroups.NewStats()
|
actualStats := *cgroups.NewStats()
|
||||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
err := memory.GetStats(path, &actualStats)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatal("Expected failure")
|
t.Fatal("Expected failure")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMemoryStatsBadUsageFile(t *testing.T) {
|
func TestMemoryStatsBadUsageFile(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("memory", t)
|
path := tempDir(t, "memory")
|
||||||
defer helper.cleanup()
|
writeFileContents(t, path, map[string]string{
|
||||||
helper.writeFileContents(map[string]string{
|
|
||||||
"memory.stat": memoryStatContents,
|
"memory.stat": memoryStatContents,
|
||||||
"memory.usage_in_bytes": "bad",
|
"memory.usage_in_bytes": "bad",
|
||||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||||
|
@ -386,16 +365,15 @@ func TestMemoryStatsBadUsageFile(t *testing.T) {
|
||||||
|
|
||||||
memory := &MemoryGroup{}
|
memory := &MemoryGroup{}
|
||||||
actualStats := *cgroups.NewStats()
|
actualStats := *cgroups.NewStats()
|
||||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
err := memory.GetStats(path, &actualStats)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatal("Expected failure")
|
t.Fatal("Expected failure")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMemoryStatsBadMaxUsageFile(t *testing.T) {
|
func TestMemoryStatsBadMaxUsageFile(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("memory", t)
|
path := tempDir(t, "memory")
|
||||||
defer helper.cleanup()
|
writeFileContents(t, path, map[string]string{
|
||||||
helper.writeFileContents(map[string]string{
|
|
||||||
"memory.stat": memoryStatContents,
|
"memory.stat": memoryStatContents,
|
||||||
"memory.usage_in_bytes": memoryUsageContents,
|
"memory.usage_in_bytes": memoryUsageContents,
|
||||||
"memory.max_usage_in_bytes": "bad",
|
"memory.max_usage_in_bytes": "bad",
|
||||||
|
@ -404,16 +382,15 @@ func TestMemoryStatsBadMaxUsageFile(t *testing.T) {
|
||||||
|
|
||||||
memory := &MemoryGroup{}
|
memory := &MemoryGroup{}
|
||||||
actualStats := *cgroups.NewStats()
|
actualStats := *cgroups.NewStats()
|
||||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
err := memory.GetStats(path, &actualStats)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatal("Expected failure")
|
t.Fatal("Expected failure")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMemoryStatsBadLimitInBytesFile(t *testing.T) {
|
func TestMemoryStatsBadLimitInBytesFile(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("memory", t)
|
path := tempDir(t, "memory")
|
||||||
defer helper.cleanup()
|
writeFileContents(t, path, map[string]string{
|
||||||
helper.writeFileContents(map[string]string{
|
|
||||||
"memory.stat": memoryStatContents,
|
"memory.stat": memoryStatContents,
|
||||||
"memory.usage_in_bytes": memoryUsageContents,
|
"memory.usage_in_bytes": memoryUsageContents,
|
||||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||||
|
@ -422,35 +399,108 @@ func TestMemoryStatsBadLimitInBytesFile(t *testing.T) {
|
||||||
|
|
||||||
memory := &MemoryGroup{}
|
memory := &MemoryGroup{}
|
||||||
actualStats := *cgroups.NewStats()
|
actualStats := *cgroups.NewStats()
|
||||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
err := memory.GetStats(path, &actualStats)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Fatal("Expected failure")
|
t.Fatal("Expected failure")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestMemorySetOomControl(t *testing.T) {
|
func TestMemorySetOomControl(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("memory", t)
|
path := tempDir(t, "memory")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
oomKillDisable = 1 // disable oom killer, default is 0
|
oomKillDisable = 1 // disable oom killer, default is 0
|
||||||
)
|
)
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"memory.oom_control": strconv.Itoa(oomKillDisable),
|
"memory.oom_control": strconv.Itoa(oomKillDisable),
|
||||||
})
|
})
|
||||||
|
|
||||||
memory := &MemoryGroup{}
|
memory := &MemoryGroup{}
|
||||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
r := &configs.Resources{}
|
||||||
|
if err := memory.Set(path, r); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.oom_control")
|
value, err := fscommon.GetCgroupParamUint(path, "memory.oom_control")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse memory.oom_control - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if value != oomKillDisable {
|
if value != oomKillDisable {
|
||||||
t.Fatalf("Got the wrong value, set memory.oom_control failed.")
|
t.Fatalf("Got the wrong value, set memory.oom_control failed.")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNoHierarchicalNumaStat(t *testing.T) {
|
||||||
|
path := tempDir(t, "memory")
|
||||||
|
writeFileContents(t, path, map[string]string{
|
||||||
|
"memory.numa_stat": memoryNUMAStatNoHierarchyContents + memoryNUMAStatExtraContents,
|
||||||
|
})
|
||||||
|
|
||||||
|
actualStats, err := getPageUsageByNUMA(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
pageUsageByNUMA := cgroups.PageUsageByNUMA{
|
||||||
|
PageUsageByNUMAInner: cgroups.PageUsageByNUMAInner{
|
||||||
|
Total: cgroups.PageStats{Total: 44611, Nodes: map[uint8]uint64{0: 32631, 1: 7501, 2: 1982, 3: 2497}},
|
||||||
|
File: cgroups.PageStats{Total: 44428, Nodes: map[uint8]uint64{0: 32614, 1: 7335, 2: 1982, 3: 2497}},
|
||||||
|
Anon: cgroups.PageStats{Total: 183, Nodes: map[uint8]uint64{0: 17, 1: 166, 2: 0, 3: 0}},
|
||||||
|
Unevictable: cgroups.PageStats{Total: 0, Nodes: map[uint8]uint64{0: 0, 1: 0, 2: 0, 3: 0}},
|
||||||
|
},
|
||||||
|
Hierarchical: cgroups.PageUsageByNUMAInner{},
|
||||||
|
}
|
||||||
|
expectPageUsageByNUMAEquals(t, pageUsageByNUMA, actualStats)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBadNumaStat(t *testing.T) {
|
||||||
|
memoryNUMAStatBadContents := []struct {
|
||||||
|
desc, contents string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
desc: "Nx where x is not a number",
|
||||||
|
contents: `total=44611 N0=44611,
|
||||||
|
file=44428 Nx=0
|
||||||
|
`,
|
||||||
|
}, {
|
||||||
|
desc: "Nx where x > 255",
|
||||||
|
contents: `total=44611 N333=444`,
|
||||||
|
}, {
|
||||||
|
desc: "Nx argument missing",
|
||||||
|
contents: `total=44611 N0=123 N1=`,
|
||||||
|
}, {
|
||||||
|
desc: "Nx argument is not a number",
|
||||||
|
contents: `total=44611 N0=123 N1=a`,
|
||||||
|
}, {
|
||||||
|
desc: "Missing = after Nx",
|
||||||
|
contents: `total=44611 N0=123 N1`,
|
||||||
|
}, {
|
||||||
|
desc: "No Nx at non-first position",
|
||||||
|
contents: `total=44611 N0=32631
|
||||||
|
file=44428 N0=32614
|
||||||
|
anon=183 N0=12 badone
|
||||||
|
`,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
path := tempDir(t, "memory")
|
||||||
|
for _, c := range memoryNUMAStatBadContents {
|
||||||
|
writeFileContents(t, path, map[string]string{
|
||||||
|
"memory.numa_stat": c.contents,
|
||||||
|
})
|
||||||
|
|
||||||
|
_, err := getPageUsageByNUMA(path)
|
||||||
|
if err == nil {
|
||||||
|
t.Errorf("case %q: expected error, got nil", c.desc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWithoutNumaStat(t *testing.T) {
|
||||||
|
path := tempDir(t, "memory")
|
||||||
|
|
||||||
|
actualStats, err := getPageUsageByNUMA(path)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
expectPageUsageByNUMAEquals(t, cgroups.PageUsageByNUMA{}, actualStats)
|
||||||
|
}
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -16,22 +14,15 @@ func (s *NameGroup) Name() string {
|
||||||
return s.GroupName
|
return s.GroupName
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *NameGroup) Apply(d *cgroupData) error {
|
func (s *NameGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
if s.Join {
|
if s.Join {
|
||||||
// ignore errors if the named cgroup does not exist
|
// Ignore errors if the named cgroup does not exist.
|
||||||
d.join(s.GroupName)
|
_ = apply(path, pid)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error {
|
func (s *NameGroup) Set(_ string, _ *configs.Resources) error {
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *NameGroup) Remove(d *cgroupData) error {
|
|
||||||
if s.Join {
|
|
||||||
removePath(d.path(s.GroupName))
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,33 +1,25 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"strconv"
|
"strconv"
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
)
|
)
|
||||||
|
|
||||||
type NetClsGroup struct {
|
type NetClsGroup struct{}
|
||||||
}
|
|
||||||
|
|
||||||
func (s *NetClsGroup) Name() string {
|
func (s *NetClsGroup) Name() string {
|
||||||
return "net_cls"
|
return "net_cls"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *NetClsGroup) Apply(d *cgroupData) error {
|
func (s *NetClsGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
_, err := d.join("net_cls")
|
return apply(path, pid)
|
||||||
if err != nil && !cgroups.IsNotFound(err) {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
func (s *NetClsGroup) Set(path string, r *configs.Resources) error {
|
||||||
if cgroup.Resources.NetClsClassid != 0 {
|
if r.NetClsClassid != 0 {
|
||||||
if err := fscommon.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil {
|
if err := cgroups.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(r.NetClsClassid), 10)); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -35,10 +27,6 @@ func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *NetClsGroup) Remove(d *cgroupData) error {
|
|
||||||
return removePath(d.path("net_cls"))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -7,6 +5,7 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -15,25 +14,26 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestNetClsSetClassid(t *testing.T) {
|
func TestNetClsSetClassid(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("net_cls", t)
|
path := tempDir(t, "net_cls")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"net_cls.classid": strconv.FormatUint(classidBefore, 10),
|
"net_cls.classid": strconv.FormatUint(classidBefore, 10),
|
||||||
})
|
})
|
||||||
|
|
||||||
helper.CgroupData.config.Resources.NetClsClassid = classidAfter
|
r := &configs.Resources{
|
||||||
|
NetClsClassid: classidAfter,
|
||||||
|
}
|
||||||
netcls := &NetClsGroup{}
|
netcls := &NetClsGroup{}
|
||||||
if err := netcls.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
if err := netcls.Set(path, r); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// As we are in mock environment, we can't get correct value of classid from
|
// As we are in mock environment, we can't get correct value of classid from
|
||||||
// net_cls.classid.
|
// net_cls.classid.
|
||||||
// So. we just judge if we successfully write classid into file
|
// So. we just judge if we successfully write classid into file
|
||||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "net_cls.classid")
|
value, err := fscommon.GetCgroupParamUint(path, "net_cls.classid")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse net_cls.classid - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if value != classidAfter {
|
if value != classidAfter {
|
||||||
t.Fatal("Got the wrong value, set net_cls.classid failed.")
|
t.Fatal("Got the wrong value, set net_cls.classid failed.")
|
||||||
|
|
|
@ -1,31 +1,23 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
)
|
)
|
||||||
|
|
||||||
type NetPrioGroup struct {
|
type NetPrioGroup struct{}
|
||||||
}
|
|
||||||
|
|
||||||
func (s *NetPrioGroup) Name() string {
|
func (s *NetPrioGroup) Name() string {
|
||||||
return "net_prio"
|
return "net_prio"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *NetPrioGroup) Apply(d *cgroupData) error {
|
func (s *NetPrioGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
_, err := d.join("net_prio")
|
return apply(path, pid)
|
||||||
if err != nil && !cgroups.IsNotFound(err) {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
func (s *NetPrioGroup) Set(path string, r *configs.Resources) error {
|
||||||
for _, prioMap := range cgroup.Resources.NetPrioIfpriomap {
|
for _, prioMap := range r.NetPrioIfpriomap {
|
||||||
if err := fscommon.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
|
if err := cgroups.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -33,10 +25,6 @@ func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *NetPrioGroup) Remove(d *cgroupData) error {
|
|
||||||
return removePath(d.path("net_prio"))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error {
|
func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -10,28 +8,27 @@ import (
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var prioMap = []*configs.IfPrioMap{
|
||||||
prioMap = []*configs.IfPrioMap{
|
{
|
||||||
{
|
Interface: "test",
|
||||||
Interface: "test",
|
Priority: 5,
|
||||||
Priority: 5,
|
},
|
||||||
},
|
}
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestNetPrioSetIfPrio(t *testing.T) {
|
func TestNetPrioSetIfPrio(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("net_prio", t)
|
path := tempDir(t, "net_prio")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
helper.CgroupData.config.Resources.NetPrioIfpriomap = prioMap
|
r := &configs.Resources{
|
||||||
|
NetPrioIfpriomap: prioMap,
|
||||||
|
}
|
||||||
netPrio := &NetPrioGroup{}
|
netPrio := &NetPrioGroup{}
|
||||||
if err := netPrio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
if err := netPrio.Set(path, r); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "net_prio.ifpriomap")
|
value, err := fscommon.GetCgroupParamString(path, "net_prio.ifpriomap")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse net_prio.ifpriomap - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if !strings.Contains(value, "test 5") {
|
if !strings.Contains(value, "test 5") {
|
||||||
t.Fatal("Got the wrong value, set net_prio.ifpriomap failed.")
|
t.Fatal("Got the wrong value, set net_prio.ifpriomap failed.")
|
||||||
|
|
|
@ -0,0 +1,186 @@
|
||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/utils"
|
||||||
|
)
|
||||||
|
|
||||||
|
// The absolute path to the root of the cgroup hierarchies.
|
||||||
|
var (
|
||||||
|
cgroupRootLock sync.Mutex
|
||||||
|
cgroupRoot string
|
||||||
|
)
|
||||||
|
|
||||||
|
const defaultCgroupRoot = "/sys/fs/cgroup"
|
||||||
|
|
||||||
|
func initPaths(cg *configs.Cgroup) (map[string]string, error) {
|
||||||
|
root, err := rootPath()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
inner, err := innerPath(cg)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
paths := make(map[string]string)
|
||||||
|
for _, sys := range subsystems {
|
||||||
|
name := sys.Name()
|
||||||
|
path, err := subsysPath(root, inner, name)
|
||||||
|
if err != nil {
|
||||||
|
// The non-presence of the devices subsystem
|
||||||
|
// is considered fatal for security reasons.
|
||||||
|
if cgroups.IsNotFound(err) && (cg.SkipDevices || name != "devices") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
paths[name] = path
|
||||||
|
}
|
||||||
|
|
||||||
|
return paths, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func tryDefaultCgroupRoot() string {
|
||||||
|
var st, pst unix.Stat_t
|
||||||
|
|
||||||
|
// (1) it should be a directory...
|
||||||
|
err := unix.Lstat(defaultCgroupRoot, &st)
|
||||||
|
if err != nil || st.Mode&unix.S_IFDIR == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// (2) ... and a mount point ...
|
||||||
|
err = unix.Lstat(filepath.Dir(defaultCgroupRoot), &pst)
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
if st.Dev == pst.Dev {
|
||||||
|
// parent dir has the same dev -- not a mount point
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// (3) ... of 'tmpfs' fs type.
|
||||||
|
var fst unix.Statfs_t
|
||||||
|
err = unix.Statfs(defaultCgroupRoot, &fst)
|
||||||
|
if err != nil || fst.Type != unix.TMPFS_MAGIC {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// (4) it should have at least 1 entry ...
|
||||||
|
dir, err := os.Open(defaultCgroupRoot)
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
names, err := dir.Readdirnames(1)
|
||||||
|
if err != nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if len(names) < 1 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
// ... which is a cgroup mount point.
|
||||||
|
err = unix.Statfs(filepath.Join(defaultCgroupRoot, names[0]), &fst)
|
||||||
|
if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
return defaultCgroupRoot
|
||||||
|
}
|
||||||
|
|
||||||
|
// rootPath finds and returns path to the root of the cgroup hierarchies.
|
||||||
|
func rootPath() (string, error) {
|
||||||
|
cgroupRootLock.Lock()
|
||||||
|
defer cgroupRootLock.Unlock()
|
||||||
|
|
||||||
|
if cgroupRoot != "" {
|
||||||
|
return cgroupRoot, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// fast path
|
||||||
|
cgroupRoot = tryDefaultCgroupRoot()
|
||||||
|
if cgroupRoot != "" {
|
||||||
|
return cgroupRoot, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// slow path: parse mountinfo
|
||||||
|
mi, err := cgroups.GetCgroupMounts(false)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
if len(mi) < 1 {
|
||||||
|
return "", errors.New("no cgroup mount found in mountinfo")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the first cgroup mount (e.g. "/sys/fs/cgroup/memory"),
|
||||||
|
// use its parent directory.
|
||||||
|
root := filepath.Dir(mi[0].Mountpoint)
|
||||||
|
|
||||||
|
if _, err := os.Stat(root); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
cgroupRoot = root
|
||||||
|
return cgroupRoot, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func innerPath(c *configs.Cgroup) (string, error) {
|
||||||
|
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||||
|
return "", errors.New("cgroup: either Path or Name and Parent should be used")
|
||||||
|
}
|
||||||
|
|
||||||
|
// XXX: Do not remove CleanPath. Path safety is important! -- cyphar
|
||||||
|
innerPath := utils.CleanPath(c.Path)
|
||||||
|
if innerPath == "" {
|
||||||
|
cgParent := utils.CleanPath(c.Parent)
|
||||||
|
cgName := utils.CleanPath(c.Name)
|
||||||
|
innerPath = filepath.Join(cgParent, cgName)
|
||||||
|
}
|
||||||
|
|
||||||
|
return innerPath, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func subsysPath(root, inner, subsystem string) (string, error) {
|
||||||
|
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
|
||||||
|
if filepath.IsAbs(inner) {
|
||||||
|
mnt, err := cgroups.FindCgroupMountpoint(root, subsystem)
|
||||||
|
// If we didn't mount the subsystem, there is no point we make the path.
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
|
||||||
|
return filepath.Join(root, filepath.Base(mnt), inner), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
|
||||||
|
// process could in container and shared pid namespace with host, and
|
||||||
|
// /proc/1/cgroup could point to whole other world of cgroups.
|
||||||
|
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return filepath.Join(parentPath, inner), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func apply(path string, pid int) error {
|
||||||
|
if path == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return cgroups.WriteCgroupProc(path, pid)
|
||||||
|
}
|
|
@ -0,0 +1,104 @@
|
||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestInvalidCgroupPath(t *testing.T) {
|
||||||
|
if cgroups.IsCgroup2UnifiedMode() {
|
||||||
|
t.Skip("cgroup v2 is not supported")
|
||||||
|
}
|
||||||
|
|
||||||
|
root, err := rootPath()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("couldn't get cgroup root: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
testCases := []struct {
|
||||||
|
test string
|
||||||
|
path, name, parent string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
test: "invalid cgroup path",
|
||||||
|
path: "../../../../../../../../../../some/path",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
test: "invalid absolute cgroup path",
|
||||||
|
path: "/../../../../../../../../../../some/path",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
test: "invalid cgroup parent",
|
||||||
|
parent: "../../../../../../../../../../some/path",
|
||||||
|
name: "name",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
test: "invalid absolute cgroup parent",
|
||||||
|
parent: "/../../../../../../../../../../some/path",
|
||||||
|
name: "name",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
test: "invalid cgroup name",
|
||||||
|
parent: "parent",
|
||||||
|
name: "../../../../../../../../../../some/path",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
test: "invalid absolute cgroup name",
|
||||||
|
parent: "parent",
|
||||||
|
name: "/../../../../../../../../../../some/path",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
test: "invalid cgroup name and parent",
|
||||||
|
parent: "../../../../../../../../../../some/path",
|
||||||
|
name: "../../../../../../../../../../some/path",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
test: "invalid absolute cgroup name and parent",
|
||||||
|
parent: "/../../../../../../../../../../some/path",
|
||||||
|
name: "/../../../../../../../../../../some/path",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.test, func(t *testing.T) {
|
||||||
|
config := &configs.Cgroup{Path: tc.path, Name: tc.name, Parent: tc.parent}
|
||||||
|
|
||||||
|
inner, err := innerPath(config)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("couldn't get cgroup data: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure the final inner path doesn't go outside the cgroup mountpoint.
|
||||||
|
if strings.HasPrefix(inner, "..") {
|
||||||
|
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Double-check, using an actual cgroup.
|
||||||
|
deviceRoot := filepath.Join(root, "devices")
|
||||||
|
devicePath, err := subsysPath(root, inner, "devices")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("couldn't get cgroup path: %v", err)
|
||||||
|
}
|
||||||
|
if !strings.HasPrefix(devicePath, deviceRoot) {
|
||||||
|
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTryDefaultCgroupRoot(t *testing.T) {
|
||||||
|
res := tryDefaultCgroupRoot()
|
||||||
|
exp := defaultCgroupRoot
|
||||||
|
if cgroups.IsCgroup2UnifiedMode() {
|
||||||
|
// checking that tryDefaultCgroupRoot does return ""
|
||||||
|
// in case /sys/fs/cgroup is not cgroup v1 root dir.
|
||||||
|
exp = ""
|
||||||
|
}
|
||||||
|
if res != exp {
|
||||||
|
t.Errorf("tryDefaultCgroupRoot: want %q, got %q", exp, res)
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,5 +1,3 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -7,29 +5,20 @@ import (
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
)
|
)
|
||||||
|
|
||||||
type PerfEventGroup struct {
|
type PerfEventGroup struct{}
|
||||||
}
|
|
||||||
|
|
||||||
func (s *PerfEventGroup) Name() string {
|
func (s *PerfEventGroup) Name() string {
|
||||||
return "perf_event"
|
return "perf_event"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *PerfEventGroup) Apply(d *cgroupData) error {
|
func (s *PerfEventGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
// we just want to join this group even though we don't set anything
|
return apply(path, pid)
|
||||||
if _, err := d.join("perf_event"); err != nil && !cgroups.IsNotFound(err) {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *PerfEventGroup) Set(path string, cgroup *configs.Cgroup) error {
|
func (s *PerfEventGroup) Set(_ string, _ *configs.Resources) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *PerfEventGroup) Remove(d *cgroupData) error {
|
|
||||||
return removePath(d.path("perf_event"))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error {
|
func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,7 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"math"
|
||||||
"path/filepath"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
@ -12,31 +9,26 @@ import (
|
||||||
"github.com/opencontainers/runc/libcontainer/configs"
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
)
|
)
|
||||||
|
|
||||||
type PidsGroup struct {
|
type PidsGroup struct{}
|
||||||
}
|
|
||||||
|
|
||||||
func (s *PidsGroup) Name() string {
|
func (s *PidsGroup) Name() string {
|
||||||
return "pids"
|
return "pids"
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *PidsGroup) Apply(d *cgroupData) error {
|
func (s *PidsGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
_, err := d.join("pids")
|
return apply(path, pid)
|
||||||
if err != nil && !cgroups.IsNotFound(err) {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
func (s *PidsGroup) Set(path string, r *configs.Resources) error {
|
||||||
if cgroup.Resources.PidsLimit != 0 {
|
if r.PidsLimit != 0 {
|
||||||
// "max" is the fallback value.
|
// "max" is the fallback value.
|
||||||
limit := "max"
|
limit := "max"
|
||||||
|
|
||||||
if cgroup.Resources.PidsLimit > 0 {
|
if r.PidsLimit > 0 {
|
||||||
limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10)
|
limit = strconv.FormatInt(r.PidsLimit, 10)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := fscommon.WriteFile(path, "pids.max", limit); err != nil {
|
if err := cgroups.WriteFile(path, "pids.max", limit); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -44,28 +36,24 @@ func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *PidsGroup) Remove(d *cgroupData) error {
|
|
||||||
return removePath(d.path("pids"))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
if !cgroups.PathExists(path) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
current, err := fscommon.GetCgroupParamUint(path, "pids.current")
|
current, err := fscommon.GetCgroupParamUint(path, "pids.current")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to parse pids.current - %s", err)
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
maxString, err := fscommon.GetCgroupParamString(path, "pids.max")
|
max, err := fscommon.GetCgroupParamUint(path, "pids.max")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to parse pids.max - %s", err)
|
return err
|
||||||
}
|
}
|
||||||
|
// If no limit is set, read from pids.max returns "max", which is
|
||||||
// Default if pids.max == "max" is 0 -- which represents "no limit".
|
// converted to MaxUint64 by GetCgroupParamUint. Historically, we
|
||||||
var max uint64
|
// represent "no limit" for pids as 0, thus this conversion.
|
||||||
if maxString != "max" {
|
if max == math.MaxUint64 {
|
||||||
max, err = fscommon.ParseUint(maxString, 10, 64)
|
max = 0
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max"))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
stats.PidsStats.Current = current
|
stats.PidsStats.Current = current
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
// +build linux
|
|
||||||
|
|
||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -8,6 +6,7 @@ import (
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -16,65 +15,64 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestPidsSetMax(t *testing.T) {
|
func TestPidsSetMax(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("pids", t)
|
path := tempDir(t, "pids")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"pids.max": "max",
|
"pids.max": "max",
|
||||||
})
|
})
|
||||||
|
|
||||||
helper.CgroupData.config.Resources.PidsLimit = maxLimited
|
r := &configs.Resources{
|
||||||
|
PidsLimit: maxLimited,
|
||||||
|
}
|
||||||
pids := &PidsGroup{}
|
pids := &PidsGroup{}
|
||||||
if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
if err := pids.Set(path, r); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "pids.max")
|
value, err := fscommon.GetCgroupParamUint(path, "pids.max")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse pids.max - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if value != maxLimited {
|
if value != maxLimited {
|
||||||
t.Fatalf("Expected %d, got %d for setting pids.max - limited", maxLimited, value)
|
t.Fatalf("Expected %d, got %d for setting pids.max - limited", maxLimited, value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestPidsSetUnlimited(t *testing.T) {
|
func TestPidsSetUnlimited(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("pids", t)
|
path := tempDir(t, "pids")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"pids.max": strconv.Itoa(maxLimited),
|
"pids.max": strconv.Itoa(maxLimited),
|
||||||
})
|
})
|
||||||
|
|
||||||
helper.CgroupData.config.Resources.PidsLimit = maxUnlimited
|
r := &configs.Resources{
|
||||||
|
PidsLimit: maxUnlimited,
|
||||||
|
}
|
||||||
pids := &PidsGroup{}
|
pids := &PidsGroup{}
|
||||||
if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
if err := pids.Set(path, r); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "pids.max")
|
value, err := fscommon.GetCgroupParamString(path, "pids.max")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Failed to parse pids.max - %s", err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if value != "max" {
|
if value != "max" {
|
||||||
t.Fatalf("Expected %s, got %s for setting pids.max - unlimited", "max", value)
|
t.Fatalf("Expected %s, got %s for setting pids.max - unlimited", "max", value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestPidsStats(t *testing.T) {
|
func TestPidsStats(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("pids", t)
|
path := tempDir(t, "pids")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"pids.current": strconv.Itoa(1337),
|
"pids.current": strconv.Itoa(1337),
|
||||||
"pids.max": strconv.Itoa(maxLimited),
|
"pids.max": strconv.Itoa(maxLimited),
|
||||||
})
|
})
|
||||||
|
|
||||||
pids := &PidsGroup{}
|
pids := &PidsGroup{}
|
||||||
stats := *cgroups.NewStats()
|
stats := *cgroups.NewStats()
|
||||||
if err := pids.GetStats(helper.CgroupPath, &stats); err != nil {
|
if err := pids.GetStats(path, &stats); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,17 +86,16 @@ func TestPidsStats(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestPidsStatsUnlimited(t *testing.T) {
|
func TestPidsStatsUnlimited(t *testing.T) {
|
||||||
helper := NewCgroupTestUtil("pids", t)
|
path := tempDir(t, "pids")
|
||||||
defer helper.cleanup()
|
|
||||||
|
|
||||||
helper.writeFileContents(map[string]string{
|
writeFileContents(t, path, map[string]string{
|
||||||
"pids.current": strconv.Itoa(4096),
|
"pids.current": strconv.Itoa(4096),
|
||||||
"pids.max": "max",
|
"pids.max": "max",
|
||||||
})
|
})
|
||||||
|
|
||||||
pids := &PidsGroup{}
|
pids := &PidsGroup{}
|
||||||
stats := *cgroups.NewStats()
|
stats := *cgroups.NewStats()
|
||||||
if err := pids.GetStats(helper.CgroupPath, &stats); err != nil {
|
if err := pids.GetStats(path, &stats); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
package fs
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||||
|
"github.com/opencontainers/runc/libcontainer/configs"
|
||||||
|
)
|
||||||
|
|
||||||
|
type RdmaGroup struct{}
|
||||||
|
|
||||||
|
func (s *RdmaGroup) Name() string {
|
||||||
|
return "rdma"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *RdmaGroup) Apply(path string, _ *configs.Resources, pid int) error {
|
||||||
|
return apply(path, pid)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *RdmaGroup) Set(path string, r *configs.Resources) error {
|
||||||
|
return fscommon.RdmaSet(path, r)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *RdmaGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||||
|
return fscommon.RdmaGetStats(path, stats)
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue