merge upstream 1.1.0

2022-12-30 11:21:19 +08:00 · 2022-12-30 11:21:19 +08:00 · 1dc29861c3
parent f67506f80e
commit 1dc29861c3
1285 changed files with 161305 additions and 143317 deletions
--- a/.cirrus.yml
+++ b/.cirrus.yml
@ -0,0 +1,158 @@
 ---
 # We use Cirrus for Vagrant tests and native CentOS 7 and 8, because macOS
 # instances of GHA are too slow and flaky, and Linux instances of GHA do not
 # support KVM.
 # NOTE Cirrus execution environments lack a terminal, needed for
 # some integration tests. So we use `ssh -tt` command to fake a terminal.
 task:
  timeout_in: 30m
  env:
    DEBIAN_FRONTEND: noninteractive
    HOME: /root
    # yamllint disable rule:key-duplicates
    matrix:
      DISTRO: fedora
  name: vagrant DISTRO:$DISTRO
  compute_engine_instance:
    image_project: cirrus-images
    image: family/docker-kvm
    platform: linux
    nested_virtualization: true
    # CPU limit: `16 / NTASK`: see https://cirrus-ci.org/faq/#are-there-any-limits
    cpu: 8
    # Memory limit: `4GB * NCPU`
    memory: 32G
  host_info_script: |
    uname -a
    echo "-----"
    cat /etc/os-release
    echo "-----"
    cat /proc/cpuinfo
    echo "-----"
    df -T
  install_libvirt_vagrant_script: |
    apt-get update
    apt-get install -y libvirt-daemon libvirt-daemon-system vagrant vagrant-libvirt
    systemctl enable --now libvirtd
  vagrant_cache:
    fingerprint_script: uname -s ; cat Vagrantfile.$DISTRO
    folder: /root/.vagrant.d
  vagrant_up_script: |
    ln -sf Vagrantfile.$DISTRO Vagrantfile
    # Retry if it fails (download.fedoraproject.org returns 404 sometimes)
    vagrant up --no-tty || vagrant up --no-tty
    mkdir -p -m 0700 /root/.ssh
    vagrant ssh-config >> /root/.ssh/config
  guest_info_script: |
    ssh default 'sh -exc "uname -a && systemctl --version && df -T && cat /etc/os-release"'
  unit_tests_script: |
    ssh default 'sudo -i make -C /vagrant localunittest'
  integration_systemd_script: |
    ssh -tt default "sudo -i make -C /vagrant localintegration RUNC_USE_SYSTEMD=yes"
  integration_fs_script: |
    ssh -tt default "sudo -i make -C /vagrant localintegration"
  integration_systemd_rootless_script: |
    ssh -tt default "sudo -i make -C /vagrant localrootlessintegration RUNC_USE_SYSTEMD=yes"
  integration_fs_rootless_script: |
    ssh -tt default "sudo -i make -C /vagrant localrootlessintegration"
 task:
  timeout_in: 30m
  env:
    HOME: /root
    CIRRUS_WORKING_DIR: /home/runc
    GO_VERSION: "1.17.3"
    BATS_VERSION: "v1.3.0"
    # yamllint disable rule:key-duplicates
    matrix:
      DISTRO: centos-7
      DISTRO: centos-stream-8
  name: ci / $DISTRO
  compute_engine_instance:
    image_project: centos-cloud
    image: family/$DISTRO
    platform: linux
    cpu: 4
    memory: 8G
  install_dependencies_script: |
    case $DISTRO in
    centos-7)
      (cd /etc/yum.repos.d && curl -O https://copr.fedorainfracloud.org/coprs/adrian/criu-el7/repo/epel-7/adrian-criu-el7-epel-7.repo)
      # sysctl
      echo "user.max_user_namespaces=15076" > /etc/sysctl.d/userns.conf
      sysctl --system
      ;;
    centos-stream-8)
      yum config-manager --set-enabled powertools # for glibc-static
      ;;
    esac
    # Work around dnf mirror failures by retrying a few times.
    for i in $(seq 0 2); do
      sleep $i
      yum install -y -q gcc git iptables jq glibc-static libseccomp-devel make criu fuse-sshfs && break
    done
    [ $? -eq 0 ] # fail if yum failed
    # install Go
    curl -fsSL "https://dl.google.com/go/go${GO_VERSION}.linux-amd64.tar.gz" | tar Cxz /usr/local
    # install bats
    cd /tmp
    git clone https://github.com/bats-core/bats-core
    cd bats-core
    git checkout $BATS_VERSION
    ./install.sh /usr/local
    cd -
    # Add a user for rootless tests
    useradd -u2000 -m -d/home/rootless -s/bin/bash rootless
    # Allow root and rootless itself to execute `ssh rootless@localhost` in tests/rootless.sh
    ssh-keygen -t ecdsa -N "" -f /root/rootless.key
    mkdir -m 0700 -p /home/rootless/.ssh
    cp /root/rootless.key /home/rootless/.ssh/id_ecdsa
    cat /root/rootless.key.pub >> /home/rootless/.ssh/authorized_keys
    chown -R rootless.rootless /home/rootless
    # set PATH
    echo 'export PATH=/usr/local/go/bin:/usr/local/bin:$PATH' >> /root/.bashrc
    # Setup ssh localhost for terminal emulation (script -e did not work)
    ssh-keygen -t ed25519 -f /root/.ssh/id_ed25519 -N ""
    cat /root/.ssh/id_ed25519.pub >> /root/.ssh/authorized_keys
    chmod 400 /root/.ssh/authorized_keys
    ssh-keyscan localhost >> /root/.ssh/known_hosts
    echo -e "Host localhost\n\tStrictHostKeyChecking no\t\nIdentityFile /root/.ssh/id_ed25519\n" >> /root/.ssh/config
    sed -e "s,PermitRootLogin.*,PermitRootLogin prohibit-password,g" -i /etc/ssh/sshd_config
    systemctl restart sshd
  host_info_script: |
    uname -a
    echo "-----"
    cat /etc/os-release
    echo "-----"
    cat /proc/cpuinfo
    echo "-----"
    df -T
    echo "-----"
    systemctl --version
  unit_tests_script: |
    ssh -tt localhost "make -C /home/runc localunittest"
  integration_systemd_script: |
    ssh -tt localhost "make -C /home/runc localintegration RUNC_USE_SYSTEMD=yes"
  integration_fs_script: |
    ssh -tt localhost "make -C /home/runc localintegration"
  integration_systemd_rootless_script: |
    echo "SKIP: integration_systemd_rootless_script requires cgroup v2"
  integration_fs_rootless_script: |
    case $DISTRO in
    centos-7)
      echo "SKIP: FIXME: integration_fs_rootless_script is skipped because of EPERM on writing cgroup.procs"
        ;;
    centos-stream-8)
      ssh -tt localhost "make -C /home/runc localrootlessintegration"
      ;;
    esac
--- a/.codespellrc
+++ b/.codespellrc
@ -0,0 +1,3 @@
 [codespell]
 skip = ./vendor,./.git
 ignore-words-list = clos,creat
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -0,0 +1,25 @@
 # Please see the documentation for all configuration options:
 # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 version: 2
 updates:
  # Dependencies listed in go.mod
  - package-ecosystem: "gomod"
    directory: "/" # Location of package manifests
    schedule:
      interval: "daily"
    ignore:
      # a regression in v1.22.2, see https://github.com/urfave/cli/issues/1092
      - dependency-name: "github.com/urfave/cli"
  # Dependencies listed in .github/workflows/*.yml
  - package-ecosystem: "github-actions"
    directory: "/"
    schedule:
      interval: "daily"
  # Dependencies listed in Dockerfile
  - package-ecosystem: "docker"
    directory: "/"
    schedule:
      interval: "daily"
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -0,0 +1,129 @@
 # NOTE Github Actions execution environments lack a terminal, needed for
 # some integration tests. So we use `script` command to fake a terminal.
 name: ci
 on:
  push:
    tags:
      - v*
    branches:
      - master
      - release-*
  pull_request:
 env:
  # Don't ignore C warnings. Note that the output of "go env CGO_CFLAGS" by default is "-g -O2", so we keep them.
  CGO_CFLAGS: -g -O2 -Werror
 jobs:
  test:
    runs-on: ubuntu-20.04
    strategy:
      fail-fast: false
      matrix:
        go-version: [1.16.x, 1.17.x]
        rootless: ["rootless", ""]
        race: ["-race", ""]
        criu: [""]
        include:
          # Also test against latest criu-dev
          - go-version: 1.17.x
            rootless: ""
            race: ""
            criu: "criu-dev"
    steps:
    - name: checkout
      uses: actions/checkout@v2
    - name: install deps
      if: matrix.criu == ''
      env:
        REPO: https://download.opensuse.org/repositories/devel:/tools:/criu/xUbuntu_20.04
      run: |
        # criu repo
        curl -fSsl $REPO/Release.key | sudo apt-key add -
        echo "deb $REPO/ /" | sudo tee /etc/apt/sources.list.d/criu.list
        sudo apt update
        sudo apt install libseccomp-dev criu sshfs
    - name: install deps (criu ${{ matrix.criu }})
      if: matrix.criu != ''
      run: |
        sudo apt -q update
        sudo apt -q install libseccomp-dev sshfs \
          libcap-dev libnet1-dev libnl-3-dev \
          libprotobuf-c-dev libprotobuf-dev protobuf-c-compiler protobuf-compiler
        git clone https://github.com/checkpoint-restore/criu.git ~/criu
        (cd ~/criu && git checkout ${{ matrix.criu }} && sudo make install-criu)
        rm -rf ~/criu
    - name: install go ${{ matrix.go-version }}
      uses: actions/setup-go@v2
      with:
        stable: '!contains(${{ matrix.go-version }}, "beta") && !contains(${{ matrix.go-version }}, "rc")'
        go-version: ${{ matrix.go-version }}
    - name: build
      run: sudo -E PATH="$PATH" make EXTRA_FLAGS="${{ matrix.race }}" all
    - name: install bats
      uses: mig4/setup-bats@v1
      with:
        bats-version: 1.3.0
    - name: unit test
      if: matrix.rootless != 'rootless'
      run: sudo -E PATH="$PATH" -- make TESTFLAGS="${{ matrix.race }}" localunittest
    - name: add rootless user
      if: matrix.rootless == 'rootless'
      run: |
        sudo useradd -u2000 -m -d/home/rootless -s/bin/bash rootless
        # Allow root and rootless itself to execute `ssh rootless@localhost` in tests/rootless.sh
        ssh-keygen -t ecdsa -N "" -f $HOME/rootless.key
        sudo mkdir -m 0700 -p /home/rootless/.ssh
        sudo cp $HOME/rootless.key /home/rootless/.ssh/id_ecdsa
        sudo cp $HOME/rootless.key.pub /home/rootless/.ssh/authorized_keys
        sudo chown -R rootless.rootless /home/rootless
    - name: integration test (fs driver)
      run: sudo -E PATH="$PATH" script -e -c 'make local${{ matrix.rootless }}integration'
    - name: integration test (systemd driver)
      # can't use systemd driver with cgroupv1
      if: matrix.rootless != 'rootless'
      run: sudo -E PATH="$PATH" script -e -c 'make RUNC_USE_SYSTEMD=yes local${{ matrix.rootless }}integration'
  # We need to continue support for 32-bit ARM.
  # However, we do not have 32-bit ARM CI, so we use i386 for testing 32bit stuff.
  # We are not interested in providing official support for i386.
  cross-i386:
    runs-on: ubuntu-20.04
    steps:
    - name: checkout
      uses: actions/checkout@v2
    - name: install deps
      run: |
        sudo dpkg --add-architecture i386
        # add criu repo
        sudo add-apt-repository -y ppa:criu/ppa
        # apt-add-repository runs apt update so we don't have to.
        # Due to a bug in apt, we have to update it first
        # (see https://bugs.launchpad.net/ubuntu-cdimage/+bug/1871268)
        sudo apt -q install apt
        sudo apt -q install libseccomp-dev libseccomp-dev:i386 gcc-multilib criu
    - name: install go
      uses: actions/setup-go@v2
      with:
        go-version: 1.x # Latest stable
    - name: unit test
      # cgo is disabled by default when cross-compiling
      run: sudo -E PATH="$PATH" -- make GOARCH=386 CGO_ENABLED=1 localunittest
--- a/.github/workflows/validate.yml
+++ b/.github/workflows/validate.yml
@ -0,0 +1,198 @@
 name: validate
 on:
  push:
    tags:
      - v*
    branches:
      - master
      - release-*
  pull_request:
 jobs:
  lint:
    runs-on: ubuntu-20.04
    steps:
      - uses: actions/checkout@v2
      - name: install deps
        run: |
          sudo apt -q update
          sudo apt -q install libseccomp-dev
      - uses: golangci/golangci-lint-action@v2
        with:
          # must be specified without patch version
          version: v1.42
  lint-extra:
    # Extra linters, only checking new code from pull requests.
    if: github.event_name == 'pull_request'
    runs-on: ubuntu-20.04
    permissions:
      contents: read
    steps:
      - uses: actions/checkout@v2
      - name: install deps
        run: |
          sudo apt -q update
          sudo apt -q install libseccomp-dev
      - uses: golangci/golangci-lint-action@v2
        with:
          only-new-issues: true
          args: --config .golangci-extra.yml
          # must be specified without patch version
          version: v1.43
  compile-buildtags:
    runs-on: ubuntu-20.04
    env:
      # Don't ignore C warnings. Note that the output of "go env CGO_CFLAGS" by default is "-g -O2", so we keep them.
      CGO_CFLAGS: -g -O2 -Werror
    steps:
      - uses: actions/checkout@v2
      - name: install go
        uses: actions/setup-go@v2
        with:
          go-version: 1.x # Latest stable
      - name: compile with no build tags
        run: make BUILDTAGS=""
  codespell:
    runs-on: ubuntu-20.04
    steps:
    - uses: actions/checkout@v2
    - name: install deps
      # Version of codespell bundled with Ubuntu is way old, so use pip.
      run: pip install codespell
    - name: run codespell
      run: codespell
  shfmt:
    runs-on: ubuntu-20.04
    steps:
    - uses: actions/checkout@v2
    - name: vars
      run: |
        echo "VERSION=3.3.1" >> $GITHUB_ENV
        echo "$(go env GOPATH)/bin" >> $GITHUB_PATH
    - name: cache go mod and $GOCACHE
      uses: actions/cache@v2
      with:
        path: |
          ~/go/pkg/mod
          ~/.cache/go-build
        key: ${{ runner.os }}-shfmt-${{ env.VERSION }}
        restore-keys: ${{ runner.os }}-shfmt-
    - name: install shfmt
      run: |
        command -v shfmt || \
          (cd ~ && GO111MODULE=on time go get mvdan.cc/sh/v3/cmd/shfmt@v$VERSION)
    - name: shfmt
      run: make shfmt
  shellcheck:
    runs-on: ubuntu-20.04
    steps:
      - uses: actions/checkout@v2
      - name: vars
        run: |
          echo 'VERSION=v0.7.2' >> $GITHUB_ENV
          echo 'BASEURL=https://github.com/koalaman/shellcheck/releases/download' >> $GITHUB_ENV
          echo 'SHA256SUM=12ee2e0b90a3d1e9cae24ac9b2838be66b48573cb2c8e8f3c566b959df6f050c' >> $GITHUB_ENV
          echo ~/bin >> $GITHUB_PATH
      - name: install shellcheck
        run: |
          mkdir ~/bin
          curl -sSfL --retry 5 $BASEURL/$VERSION/shellcheck-$VERSION.linux.x86_64.tar.xz |
            tar xfJ - -C ~/bin --strip 1 shellcheck-$VERSION/shellcheck
          sha256sum ~/bin/shellcheck | grep -q $SHA256SUM
          # make sure to remove the old version
          sudo rm -f /usr/bin/shellcheck
      - uses: lumaxis/shellcheck-problem-matchers@v1
      - name: shellcheck
        run: |
          make shellcheck
  deps:
    runs-on: ubuntu-20.04
    steps:
    - uses: actions/checkout@v2
    - name: install go
      uses: actions/setup-go@v2
      with:
        go-version: 1.x # Latest stable
    - name: cache go mod and $GOCACHE
      uses: actions/cache@v2
      with:
        path: |
          ~/go/pkg/mod
          ~/.cache/go-build
        key: ${{ runner.os }}-go.sum-${{ hashFiles('**/go.sum') }}
        restore-keys: ${{ runner.os }}-go.sum-
    - name: verify deps
      run: make verify-dependencies
  commit:
    runs-on: ubuntu-20.04
    # Only check commits on pull requests.
    if: github.event_name == 'pull_request'
    steps:
      - name: get pr commits
        id: 'get-pr-commits'
        uses: tim-actions/get-pr-commits@v1.1.0
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
      - name: check subject line length
        uses: tim-actions/commit-message-checker-with-regex@v0.3.1
        with:
          commits: ${{ steps.get-pr-commits.outputs.commits }}
          pattern: '^.{0,72}(\n.*)*$'
          error: 'Subject too long (max 72)'
  cfmt:
    runs-on: ubuntu-20.04
    steps:
    - name: checkout
      uses: actions/checkout@v2
      with:
        fetch-depth: 0
    - name: install deps
      run: |
        sudo apt -qq update
        sudo apt -qq install indent
    - name: cfmt
      run: |
        make cfmt
        git diff --exit-code
  release:
    runs-on: ubuntu-20.04
    steps:
    - name: checkout
      uses: actions/checkout@v2
      with:
        fetch-depth: 0
      # We have to run this under Docker as Ubuntu (host) does not support all
      # the architectures we want to compile test against, and Dockerfile uses
      # Debian (which does).
      #
      # XXX: as currently this is the only job that is using Docker, we are
      # building and using the runcimage locally. In case more jobs running
      # under Docker will emerge, it will be good to have a separate make
      # runcimage job and share its result (the docker image) with whoever
      # needs it.
    - uses: satackey/action-docker-layer-caching@v0.0.11
      continue-on-error: true
    - name: build docker image
      run: make runcimage
    - name: make releaseall
      run: make releaseall
    - name: upload artifacts
      uses: actions/upload-artifact@v2
      with:
        name: release-${{ github.run_id }}
        path: release/*
--- a/.gitignore
+++ b/.gitignore
@ -2,5 +2,9 @@ vendor/pkg
 /runc
 /runc-*
 contrib/cmd/recvtty/recvtty
 contrib/cmd/sd-helper/sd-helper
 contrib/cmd/seccompagent/seccompagent
 man/man8
 release
 Vagrantfile
 .vagrant
--- a/.golangci-extra.yml
+++ b/.golangci-extra.yml
@ -0,0 +1,15 @@
 # This is golangci-lint config file which is used to check new code in
 # github PRs only (see lint-extra job in .github/workflows/validate.yml).
 #
 # For the default linter config, see .golangci.yml. This config should
 # only enable additional linters not enabled in the default config.
 run:
  build-tags:
    - seccomp
 linters:
  disable-all: true
  enable:
    - godot
    - revive
--- a/.golangci.yml
+++ b/.golangci.yml
@ -0,0 +1,12 @@
 # For documentation, see https://golangci-lint.run/usage/configuration/
 run:
  build-tags:
    - seccomp
 linters:
  enable:
    - gofumpt
    - errorlint
    - unconvert
    - unparam
--- a/.pullapprove.yml
+++ b/.pullapprove.yml
@ -1,10 +0,0 @@
 approve_by_comment: true
 approve_regex: ^LGTM
 reject_regex: ^Rejected
 reset_on_push: true
 author_approval: ignored
 reviewers:
  teams:
    - runc-maintainers
  name: default
  required: 2
--- a/.travis.yml
+++ b/.travis.yml
@ -1,54 +0,0 @@
 dist: bionic
 language: go
 go:
  - 1.11.x
  - 1.12.x
  - tip
 matrix:
  include:
    - go: 1.12.x
      env:
        - RUNC_USE_SYSTEMD=1
      script:
        - make BUILDTAGS="${BUILDTAGS}" all
        - sudo PATH="$PATH" make localintegration RUNC_USE_SYSTEMD=1
    - go: 1.12.x
      env:
        - VIRTUALBOX_VERSION=6.0
        - VAGRANT_VERSION=2.2.6
        - FEDORA_VERSION=31
      before_install:
        - cat /proc/cpuinfo
        - wget -q https://www.virtualbox.org/download/oracle_vbox_2016.asc -O- | sudo apt-key add - && sudo sh -c "echo deb https://download.virtualbox.org/virtualbox/debian $(lsb_release -cs) contrib >> /etc/apt/sources.list" && sudo apt-get update && sudo apt-get install -yq build-essential gcc make linux-headers-$(uname -r) virtualbox-${VIRTUALBOX_VERSION} && sudo usermod -aG vboxusers $(whoami)
        - wget https://releases.hashicorp.com/vagrant/${VAGRANT_VERSION}/vagrant_${VAGRANT_VERSION}_$(uname -m).deb && sudo dpkg -i vagrant_${VAGRANT_VERSION}_$(uname -m).deb
        - vagrant init bento/fedora-${FEDORA_VERSION} && vagrant up && mkdir -p ~/.ssh && vagrant ssh-config >> ~/.ssh/config
        - ssh default sudo dnf install -y podman
      script:
        - ssh default sudo podman build -t test /vagrant
        - ssh default sudo podman run --privileged --cgroupns=private test make localunittest
  allow_failures:
    - go: tip
 go_import_path: github.com/opencontainers/runc
 # `make ci` uses Docker.
 sudo: required
 services:
  - docker
 env:
  global:
    - BUILDTAGS="seccomp apparmor selinux ambient"
 before_install:
  - sudo apt-get -qq update
  - sudo apt-get install -y libseccomp-dev
  - go get -u golang.org/x/lint/golint
  - go get -u github.com/vbatts/git-validation
  - env | grep TRAVIS_
 script:
  - git-validation -run DCO,short-subject -v
  - make BUILDTAGS="${BUILDTAGS}"
  - make BUILDTAGS="${BUILDTAGS}" clean ci cross
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -0,0 +1,248 @@
 # Changelog/
 This file documents all notable changes made to this project since runc 1.0.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 ## [Unreleased]
 ## [1.1.0] - 2022-01-14
 > A plan depends as much upon execution as it does upon concept.
 ## Changed
 * libcontainer will now refuse to build without the nsenter package being
   correctly compiled (specifically this requires CGO to be enabled). This
   should avoid folks accidentally creating broken runc binaries (and
   incorrectly importing our internal libraries into their projects). (#3331)
 ## [1.1.0-rc.1] - 2021-12-14
 > He who controls the spice controls the universe.
 ### Deprecated
 * runc run/start now warns if a new container cgroup is non-empty or frozen;
   this warning will become an error in runc 1.2. (#3132, #3223)
 * runc can only be built with Go 1.16 or later from this release onwards.
   (#3100, #3245, #3325)
 ### Removed
 * `cgroup.GetHugePageSizes` has been removed entirely, and been replaced with
   `cgroup.HugePageSizes` which is more efficient. (#3234)
 * `intelrdt.GetIntelRdtPath` has been removed. Users who were using this
   function to get the intelrdt root should use the new `intelrdt.Root`
   instead. (#2920, #3239)
 ### Added
 * Add support for RDMA cgroup added in Linux 4.11. (#2883)
 * runc exec now produces exit code of 255 when the exec failed.
   This may help in distinguishing between runc exec failures
   (such as invalid options, non-running container or non-existent
   binary etc.) and failures of the command being executed. (#3073)
 * runc run: new `--keep` option to skip removal exited containers artefacts.
   This might be useful to check the state (e.g. of cgroup controllers) after
   the container hasexited. (#2817, #2825)
 * seccomp: add support for `SCMP_ACT_KILL_PROCESS` and `SCMP_ACT_KILL_THREAD`
   (the latter is just an alias for `SCMP_ACT_KILL`). (#3204)
 * seccomp: add support for `SCMP_ACT_NOTIFY` (seccomp actions). This allows
   users to create sophisticated seccomp filters where syscalls can be
   efficiently emulated by privileged processes on the host. (#2682)
 * checkpoint/restore: add an option (`--lsm-mount-context`) to set
   a different LSM mount context on restore. (#3068)
 * runc releases are now cross-compiled for several architectures. Static
   builds for said architectures will be available for all future releases.
   (#3197)
 * intelrdt: support ClosID parameter. (#2920)
 * runc exec --cgroup: an option to specify a (non-top) in-container cgroup
   to use for the process being executed. (#3040, #3059)
 * cgroup v1 controllers now support hybrid hierarchy (i.e. when on a cgroup v1
   machine a cgroup2 filesystem is mounted to /sys/fs/cgroup/unified, runc
   run/exec now adds the container to the appropriate cgroup under it). (#2087,
   #3059)
 * sysctl: allow slashes in sysctl names, to better match `sysctl(8)`'s
   behaviour. (#3254, #3257)
 * mounts: add support for bind-mounts which are inaccessible after switching
   the user namespace. Note that this does not permit the container any
   additional access to the host filesystem, it simply allows containers to
   have bind-mounts configured for paths the user can access but have
   restrictive access control settings for other users. (#2576)
 * Add support for recursive mount attributes using `mount_setattr(2)`. These
   have the same names as the proposed `mount(8)` options -- just prepend `r`
   to the option name (such as `rro`). (#3272)
 * Add `runc features` subcommand to allow runc users to detect what features
   runc has been built with. This includes critical information such as
   supported mount flags, hook names, and so on. Note that the output of this
   command is subject to change and will not be considered stable until runc
   1.2 at the earliest. The runtime-spec specification for this feature is
   being developed in [opencontainers/runtime-spec#1130]. (#3296)
 [opencontainers/runtime-spec#1130]: https://github.com/opencontainers/runtime-spec/pull/1130
 ### Changed
 * system: improve performance of `/proc/$pid/stat` parsing. (#2696)
 * cgroup2: when `/sys/fs/cgroup` is configured as a read-write mount, change
   the ownership of certain cgroup control files (as per
   `/sys/kernel/cgroup/delegate`) to allow for proper deferral to the container
   process. (#3057)
 * docs: series of improvements to man pages to make them easier to read and
   use. (#3032)
 #### libcontainer API
 * internal api: remove internal error types and handling system, switch to Go
   wrapped errors. (#3033)
 * New configs.Cgroup structure fields (#3177):
   * Systemd (whether to use systemd cgroup manager); and
   * Rootless (whether to use rootless cgroups).
 * New cgroups/manager package aiming to simplify cgroup manager instantiation.
   (#3177)
 * All cgroup managers' instantiation methods now initialize cgroup paths and
   can return errors. This allows to use any cgroup manager method (e.g.
   Exists, Destroy, Set, GetStats) right after instantiation, which was not
   possible before (as paths were initialized in Apply only). (#3178)
 ### Fixed
 * nsenter: do not try to close already-closed fds during container setup and
   bail on close(2) failures. (#3058)
 * runc checkpoint/restore: fixed for containers with an external bind mount
   which destination is a symlink. (#3047).
 * cgroup: improve openat2 handling for cgroup directory handle hardening.
   (#3030)
 * `runc delete -f` now succeeds (rather than timing out) on a paused
   container. (#3134)
 * runc run/start/exec now refuses a frozen cgroup (paused container in case of
   exec). Users can disable this using `--ignore-paused`. (#3132, #3223)
 * config: do not permit null bytes in mount fields. (#3287)
 ## [1.0.3] - 2021-12-06
 > If you were waiting for the opportune moment, that was it.
 ### Security
 * A potential vulnerability was discovered in runc (related to an internal
   usage of netlink), however upon further investigation we discovered that
   while this bug was exploitable on the master branch of runc, no released
   version of runc could be exploited using this bug. The exploit required being
   able to create a netlink attribute with a length that would overflow a uint16
   but this was not possible in any released version of runc. For more
   information, see [GHSA-v95c-p5hm-xq8f][] and CVE-2021-43784.
 ### Fixed
 * Fixed inability to start a container with read-write bind mount of a
   read-only fuse host mount. (#3283, #3292)
 * Fixed inability to start when read-only /dev in set in spec (#3276, #3277)
 * Fixed not removing sub-cgroups upon container delete, when rootless cgroup v2
   is used with older systemd. (#3226, #3297)
 * Fixed returning error from GetStats when hugetlb is unsupported (which causes
   excessive logging for Kubernetes). (#3233, #3295)
 * Improved an error message when dbus-user-session is not installed and
   rootless + cgroup2 + systemd are used (#3212)
 [GHSA-v95c-p5hm-xq8f]: https://github.com/opencontainers/runc/security/advisories/GHSA-v95c-p5hm-xq8f
 ## [1.0.2] - 2021-07-16
 > Given the right lever, you can move a planet.
 ### Changed
 * Made release builds reproducible from now on. (#3099, #3142)
 ### Fixed
 * Fixed a failure to set CPU quota period in some cases on cgroup v1. (#3090
   #3115)
 * Fixed the inability to start a container with the "adding seccomp filter
   rule for syscall ..." error, caused by redundant seccomp rules (i.e. those
   that has action equal to the default one). Such redundant rules are now
   skipped. (#3109, #3129)
 * Fixed a rare debug log race in runc init, which can result in occasional
   harmful "failed to decode ..." errors from runc run or exec. (#3120, #3130)
 * Fixed the check in cgroup v1 systemd manager if a container needs to be
   frozen before Set, and add a setting to skip such freeze unconditionally.
   The previous fix for that issue, done in  runc 1.0.1, was not working.
   (#3166, #3167)
 ## [1.0.1] - 2021-07-16
 > If in doubt, Meriadoc, always follow your nose.
 ### Fixed
 * Fixed occasional runc exec/run failure ("interrupted system call") on an
   Azure volume. (#3045, #3074)
 * Fixed "unable to find groups ... token too long" error with /etc/group
   containing lines longer than 64K characters. (#3062, #3079)
 * cgroup/systemd/v1: fix leaving cgroup frozen after Set if a parent cgroup is
   frozen.  This is a regression in 1.0.0, not affecting runc itself but some
   of libcontainer users (e.g Kubernetes). (#3081, #3085)
 * cgroupv2: bpf: Ignore inaccessible existing programs in case of
   permission error when handling replacement of existing bpf cgroup
   programs. This fixes a regression in 1.0.0, where some SELinux
   policies would block runc from being able to run entirely. (#3055, #3087)
 * cgroup/systemd/v2: don't freeze cgroup on Set. (#3067, #3092)
 * cgroup/systemd/v1: avoid unnecessary freeze on Set. (#3082, #3093)
 ## [1.0.0] - 2021-06-22
 > A wizard is never late, nor is he early, he arrives precisely when he means
 > to.
 As runc follows Semantic Versioning, we will endeavour to not make any
 breaking changes without bumping the major version number of runc.
 However, it should be noted that Go API usage of runc's internal
 implementation (libcontainer) is *not* covered by this policy.
 ### Removed
 * Removed libcontainer/configs.Device* identifiers (deprecated since rc94,
   use libcontainer/devices). (#2999)
 * Removed libcontainer/system.RunningInUserNS function (deprecated since
   rc94, use libcontainer/userns). (#2999)
 ### Deprecated
 * The usage of relative paths for mountpoints will now produce a warning
   (such configurations are outside of the spec, and in future runc will
   produce an error when given such configurations). (#2917, #3004)
 ### Fixed
 * cgroupv2: devices: rework the filter generation to produce consistent
   results with cgroupv1, and always clobber any existing eBPF
   program(s) to fix `runc update` and avoid leaking eBPF programs
   (resulting in errors when managing containers).  (#2951)
 * cgroupv2: correctly convert "number of IOs" statistics in a
   cgroupv1-compatible way. (#2965, #2967, #2968, #2964)
 * cgroupv2: support larger than 32-bit IO statistics on 32-bit architectures.
 * cgroupv2: wait for freeze to finish before returning from the freezing
   code, optimize the method for checking whether a cgroup is frozen. (#2955)
 * cgroups/systemd: fixed "retry on dbus disconnect" logic introduced in rc94
 * cgroups/systemd: fixed returning "unit already exists" error from a systemd
   cgroup manager (regression in rc94) (#2997, #2996)
 ### Added
 * cgroupv2: support SkipDevices with systemd driver. (#2958, #3019)
 * cgroup1: blkio: support BFQ weights. (#3010)
 * cgroupv2: set per-device io weights if BFQ IO scheduler is available.
   (#3022)
 ### Changed
 * cgroup/systemd: return, not ignore, stop unit error from Destroy (#2946)
 * Fix all golangci-lint failures. (#2781, #2962)
 * Make `runc --version` output sane even when built with `go get` or
   otherwise outside of our build scripts. (#2962)
 * cgroups: set SkipDevices during runc update (so we don't modify
   cgroups at all during `runc update`). (#2994)
 <!-- minor releases -->
 [Unreleased]: https://github.com/opencontainers/runc/compare/v1.1.0...HEAD
 [1.1.0]: https://github.com/opencontainers/runc/compare/v1.1.0-rc.1...v1.1.0
 [1.0.0]: https://github.com/opencontainers/runc/releases/tag/v1.0.0
 <!-- 1.0.z patch releases -->
 [Unreleased 1.0.z]: https://github.com/opencontainers/runc/compare/v1.0.3...release-1.0
 [1.0.3]: https://github.com/opencontainers/runc/compare/v1.0.2...v1.0.3
 [1.0.2]: https://github.com/opencontainers/runc/compare/v1.0.1...v1.0.2
 [1.0.1]: https://github.com/opencontainers/runc/compare/v1.0.0...v1.0.1
 <!-- 1.1.z patch releases -->
 [Unreleased 1.1.z]: https://github.com/opencontainers/runc/compare/v1.1.0...release-1.1
 [1.1.0-rc.1]: https://github.com/opencontainers/runc/compare/v1.0.0...v1.1.0-rc.1
--- a/96
+++ b/96
@ -1,34 +1,41 @@
-FROM golang:1.12-stretch
+ARG GO_VERSION=1.17
 ARG BATS_VERSION=v1.3.0
 ARG LIBSECCOMP_VERSION=2.5.3
-RUN dpkg --add-architecture armel \
+FROM golang:${GO_VERSION}-bullseye
 ARG DEBIAN_FRONTEND=noninteractive
 ARG CRIU_REPO=https://download.opensuse.org/repositories/devel:/tools:/criu/Debian_11
 RUN KEYFILE=/usr/share/keyrings/criu-repo-keyring.gpg; \
    wget -nv $CRIU_REPO/Release.key -O- | gpg --dearmor > "$KEYFILE" \
    && echo "deb [signed-by=$KEYFILE] $CRIU_REPO/ /" > /etc/apt/sources.list.d/criu.list \
    && dpkg --add-architecture armel \
    && dpkg --add-architecture armhf \
    && dpkg --add-architecture arm64 \
    && dpkg --add-architecture ppc64el \
-    && apt-get update && apt-get install -y \
+    && apt-get update \
-    build-essential \
+    && apt-get install -y --no-install-recommends \
-    curl \
+        build-essential \
-    sudo \
+        criu \
-    gawk \
+        crossbuild-essential-arm64 \
-    iptables \
+        crossbuild-essential-armel \
-    jq \
+        crossbuild-essential-armhf \
-    pkg-config \
+        crossbuild-essential-ppc64el \
-    libaio-dev \
+        crossbuild-essential-s390x \
-    libcap-dev \
+        curl \
-    libprotobuf-dev \
+        gawk \
-    libprotobuf-c0-dev \
+        gcc \
-    libnl-3-dev \
+        gperf \
-    libnet-dev \
+        iptables \
-    libseccomp2 \
+        jq \
-    libseccomp-dev \
+        kmod \
-    protobuf-c-compiler \
+        pkg-config \
-    protobuf-compiler \
+        python3-minimal \
-    python-minimal \
+        sshfs \
-    uidmap \
+        sudo \
-    kmod \
+        uidmap \
-    crossbuild-essential-armel crossbuild-essential-armhf crossbuild-essential-arm64 crossbuild-essential-ppc64el \
+    && apt-get clean \
-    libseccomp-dev:armel libseccomp-dev:armhf libseccomp-dev:arm64 libseccomp-dev:ppc64el \
+    && rm -rf /var/cache/apt /var/lib/apt/lists/* /etc/apt/sources.list.d/*.list
    --no-install-recommends \
    && apt-get clean
 # Add a dummy user for the rootless integration tests. While runC does
 # not require an entry in /etc/passwd to operate, one of the tests uses
@ -37,30 +44,21 @@ RUN dpkg --add-architecture armel \
 RUN useradd -u1000 -m -d/home/rootless -s/bin/bash rootless
 # install bats
 ARG BATS_VERSION
 RUN cd /tmp \
-    && git clone https://github.com/sstephenson/bats.git \
+    && git clone https://github.com/bats-core/bats-core.git \
-    && cd bats \
+    && cd bats-core \
-    && git reset --hard 03608115df2071fff4eaaff1605768c275e5f81f \
+    && git reset --hard "${BATS_VERSION}" \
    && ./install.sh /usr/local \
-    && rm -rf /tmp/bats
+    && rm -rf /tmp/bats-core
-# install criu
+# install libseccomp
-ENV CRIU_VERSION v3.12
+ARG LIBSECCOMP_VERSION
-RUN mkdir -p /usr/src/criu \
+COPY script/* /tmp/script/
-    && curl -sSL https://github.com/checkpoint-restore/criu/archive/${CRIU_VERSION}.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1 \
+RUN mkdir -p /opt/libseccomp \
-    && cd /usr/src/criu \
+    && /tmp/script/seccomp.sh "$LIBSECCOMP_VERSION" /opt/libseccomp arm64 armel armhf ppc64le s390x
-    && make install-criu \
+ENV LIBSECCOMP_VERSION=$LIBSECCOMP_VERSION
-    && rm -rf /usr/src/criu
+ENV LD_LIBRARY_PATH=/opt/libseccomp/lib
 ENV PKG_CONFIG_PATH=/opt/libseccomp/lib/pkgconfig
 # setup a playground for us to spawn containers in
 ENV ROOTFS /busybox
 RUN mkdir -p ${ROOTFS}
 COPY script/tmpmount /
 WORKDIR /go/src/github.com/opencontainers/runc
 ENTRYPOINT ["/tmpmount"]
 ADD . /go/src/github.com/opencontainers/runc
 RUN . tests/integration/multi-arch.bash \
    && curl -o- -sSL `get_busybox` | tar xfJC - ${ROOTFS}
--- a/EMERITUS.md
+++ b/EMERITUS.md
@ -0,0 +1,11 @@
 ## Emeritus ##
 We would like to acknowledge previous runc maintainers and their huge
 contributions to our collective success:
 * Alexander Morozov (@lk4d4)
 * Andrei Vagin (@avagin)
 * Rohit Jnagal (@rjnagal)
 * Victor Marmol (@vmarmol)
 We thank these members for their service to the OCI community.
--- a/7
+++ b/7
@ -1,5 +1,8 @@
-Michael Crosby <michael@docker.com> (@crosbymichael)
+Michael Crosby <michael@thepasture.io> (@crosbymichael)
 Mrunal Patel <mpatel@redhat.com> (@mrunalp)
 Daniel, Dao Quang Minh <dqminh89@gmail.com> (@dqminh)
 Qiang Huang <h.huangqiang@huawei.com> (@hqhq)
-Aleksa Sarai <asarai@suse.de> (@cyphar)
+Aleksa Sarai <cyphar@cyphar.com> (@cyphar)
 Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp> (@AkihiroSuda)
 Kir Kolyshkin <kolyshkin@gmail.com> (@kolyshkin)
 Sebastiaan van Stijn <github@gone.nl> (@thaJeztah)
--- a/175
+++ b/175
@ -1,133 +1,158 @@
 .PHONY: all shell dbuild man release \
 	    localtest localunittest localintegration \
 	    test unittest integration \
 	    cross localcross
 CONTAINER_ENGINE := docker
-GO := go
+GO ?= go
-SOURCES := $(shell find . 2>&1 | grep -E '.*\.(c|h|go)$$')
+PREFIX ?= /usr/local
 PREFIX := $(DESTDIR)/usr/local
 BINDIR := $(PREFIX)/sbin
 MANDIR := $(PREFIX)/share/man
 GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
 GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g")
 RUNC_IMAGE := runc_dev$(if $(GIT_BRANCH_CLEAN),:$(GIT_BRANCH_CLEAN))
 PROJECT := github.com/opencontainers/runc
 BUILDTAGS ?= seccomp
-COMMIT_NO := $(shell git rev-parse HEAD 2> /dev/null || true)
+COMMIT ?= $(shell git describe --dirty --long --always)
-COMMIT ?= $(if $(shell git status --porcelain --untracked-files=no),"${COMMIT_NO}-dirty","${COMMIT_NO}")
+VERSION := $(shell cat ./VERSION)
-MAN_DIR := $(CURDIR)/man/man8
+ifeq ($(shell $(GO) env GOOS),linux)
-MAN_PAGES = $(shell ls $(MAN_DIR)/*.8)
+	ifeq (,$(filter $(shell $(GO) env GOARCH),mips mipsle mips64 mips64le ppc64))
-MAN_PAGES_BASE = $(notdir $(MAN_PAGES))
+		ifeq (,$(findstring -race,$(EXTRA_FLAGS)))
-MAN_INSTALL_PATH := ${PREFIX}/share/man/man8/
+			GO_BUILDMODE := "-buildmode=pie"
 		endif
 	endif
 endif
 GO_BUILD := $(GO) build -trimpath $(GO_BUILDMODE) $(EXTRA_FLAGS) -tags "$(BUILDTAGS)" \
 	-ldflags "-X main.gitCommit=$(COMMIT) -X main.version=$(VERSION) $(EXTRA_LDFLAGS)"
 GO_BUILD_STATIC := CGO_ENABLED=1 $(GO) build -trimpath $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo osusergo" \
 	-ldflags "-extldflags -static -X main.gitCommit=$(COMMIT) -X main.version=$(VERSION) $(EXTRA_LDFLAGS)"
-RELEASE_DIR := $(CURDIR)/release
+GPG_KEYID ?= asarai@suse.de
 VERSION := ${shell cat ./VERSION}
 SHELL := $(shell command -v bash 2>/dev/null)
 .DEFAULT: runc
-runc: $(SOURCES)
+runc:
-	$(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc .
+	$(GO_BUILD) -o runc .
-all: runc recvtty
+all: runc recvtty sd-helper seccompagent
-recvtty: contrib/cmd/recvtty/recvtty
+recvtty sd-helper seccompagent:
 	$(GO_BUILD) -o contrib/cmd/$@/$@ ./contrib/cmd/$@
-contrib/cmd/recvtty/recvtty: $(SOURCES)
+static:
-	$(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty
+	$(GO_BUILD_STATIC) -o runc .
-static: $(SOURCES)
+releaseall: RELEASE_ARGS := "-a arm64 -a armel -a armhf -a ppc64le -a s390x"
-	CGO_ENABLED=1 $(GO) build $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo osusergo" -installsuffix netgo -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -o runc .
+releaseall: release
 	CGO_ENABLED=1 $(GO) build $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo osusergo" -installsuffix netgo -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty
-release:
+release: runcimage
-	script/release.sh -r release/$(VERSION) -v $(VERSION)
+	$(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \
 		--rm -v $(CURDIR):/go/src/$(PROJECT) \
 		-e RELEASE_ARGS=$(RELEASE_ARGS) \
 		$(RUNC_IMAGE) make localrelease
 	script/release_sign.sh -S $(GPG_KEYID) -r release/$(VERSION) -v $(VERSION)
 localrelease:
 	script/release_build.sh -r release/$(VERSION) -v $(VERSION) $(RELEASE_ARGS)
 dbuild: runcimage
-	$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} --rm -v $(CURDIR):/go/src/$(PROJECT) --privileged $(RUNC_IMAGE) make clean all
+	$(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \
 		--privileged --rm \
 		-v $(CURDIR):/go/src/$(PROJECT) \
 		$(RUNC_IMAGE) make clean all
 lint:
-	$(GO) vet $(allpackages)
+	golangci-lint run ./...
 	$(GO) fmt $(allpackages)
 man:
 	man/md2man-all.sh
 runcimage:
-	$(CONTAINER_ENGINE) build ${CONTAINER_ENGINE_BUILD_FLAGS} -t $(RUNC_IMAGE) .
+	$(CONTAINER_ENGINE) build $(CONTAINER_ENGINE_BUILD_FLAGS) -t $(RUNC_IMAGE) .
-test:
+test: unittest integration rootlessintegration
 	make unittest integration rootlessintegration
-localtest:
+localtest: localunittest localintegration localrootlessintegration
 	make localunittest localintegration localrootlessintegration
 unittest: runcimage
-	$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -t --privileged --rm -v /lib/modules:/lib/modules:ro -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localunittest TESTFLAGS=${TESTFLAGS}
+	$(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \
 		-t --privileged --rm \
 		-v /lib/modules:/lib/modules:ro \
 		-v $(CURDIR):/go/src/$(PROJECT) \
 		$(RUNC_IMAGE) make localunittest TESTFLAGS=$(TESTFLAGS)
 localunittest: all
-	$(GO) test -timeout 3m -tags "$(BUILDTAGS)" ${TESTFLAGS} -v $(allpackages)
+	$(GO) test -timeout 3m -tags "$(BUILDTAGS)" $(TESTFLAGS) -v ./...
 integration: runcimage
-	$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -t --privileged --rm -v /lib/modules:/lib/modules:ro -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localintegration TESTPATH=${TESTPATH}
+	$(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \
 		-t --privileged --rm \
 		-v /lib/modules:/lib/modules:ro \
 		-v $(CURDIR):/go/src/$(PROJECT) \
 		$(RUNC_IMAGE) make localintegration TESTPATH=$(TESTPATH)
 localintegration: all
-	bats -t tests/integration${TESTPATH}
+	bats -t tests/integration$(TESTPATH)
 rootlessintegration: runcimage
-	$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localrootlessintegration
+	$(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \
 		-t --privileged --rm \
 		-v $(CURDIR):/go/src/$(PROJECT) \
 		-e ROOTLESS_TESTPATH \
 		$(RUNC_IMAGE) make localrootlessintegration
 localrootlessintegration: all
 	tests/rootless.sh
 shell: runcimage
-	$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) bash
+	$(CONTAINER_ENGINE) run $(CONTAINER_ENGINE_RUN_FLAGS) \
 		-ti --privileged --rm \
 		-v $(CURDIR):/go/src/$(PROJECT) \
 		$(RUNC_IMAGE) bash
 install:
-	install -D -m0755 runc $(BINDIR)/runc
+	install -D -m0755 runc $(DESTDIR)$(BINDIR)/runc
 install-bash:
-	install -D -m0644 contrib/completions/bash/runc $(PREFIX)/share/bash-completion/completions/runc
+	install -D -m0644 contrib/completions/bash/runc $(DESTDIR)$(PREFIX)/share/bash-completion/completions/runc
-install-man:
+install-man: man
-	install -d -m 755 $(MAN_INSTALL_PATH)
+	install -d -m 755 $(DESTDIR)$(MANDIR)/man8
-	install -m 644 $(MAN_PAGES) $(MAN_INSTALL_PATH)
+	install -D -m 644 man/man8/*.8 $(DESTDIR)$(MANDIR)/man8
 uninstall:
 	rm -f $(BINDIR)/runc
 uninstall-bash:
 	rm -f $(PREFIX)/share/bash-completion/completions/runc
 uninstall-man:
 	rm -f $(addprefix $(MAN_INSTALL_PATH),$(MAN_PAGES_BASE))
 clean:
 	rm -f runc runc-*
 	rm -f contrib/cmd/recvtty/recvtty
-	rm -rf $(RELEASE_DIR)
+	rm -f contrib/cmd/sd-helper/sd-helper
-	rm -rf $(MAN_DIR)
+	rm -f contrib/cmd/seccompagent/seccompagent
 	rm -rf release
 	rm -rf man/man8
-validate:
+cfmt: C_SRC=$(shell git ls-files '*.c' | grep -v '^vendor/')
-	script/validate-gofmt
+cfmt:
-	script/validate-c
+	indent -linux -l120 -il0 -ppi2 -cp1 -T size_t -T jmp_buf $(C_SRC)
 	$(GO) vet $(allpackages)
-ci: validate test release
+shellcheck:
 	shellcheck tests/integration/*.bats tests/integration/*.sh \
 		tests/integration/*.bash tests/*.sh \
 		script/release_*.sh script/seccomp.sh script/lib.sh
 	# TODO: add shellcheck for more sh files
-cross: runcimage
+shfmt:
-	$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -e BUILDTAGS="$(BUILDTAGS)" --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localcross
+	shfmt -ln bats -d -w tests/integration/*.bats
 	shfmt -ln bash -d -w man/*.sh script/* tests/*.sh tests/integration/*.bash
-localcross:
+vendor:
-	CGO_ENABLED=1 GOARCH=arm GOARM=6 CC=arm-linux-gnueabi-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-armel .
+	$(GO) mod tidy
-	CGO_ENABLED=1 GOARCH=arm GOARM=7 CC=arm-linux-gnueabihf-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-armhf .
+	$(GO) mod vendor
-	CGO_ENABLED=1 GOARCH=arm64 CC=aarch64-linux-gnu-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-arm64 .
+	$(GO) mod verify
 	CGO_ENABLED=1 GOARCH=ppc64le CC=powerpc64le-linux-gnu-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-ppc64le .
-# memoize allpackages, so that it's executed only once and only if used
+verify-dependencies: vendor
-_allpackages = $(shell $(GO) list ./... | grep -v vendor)
+	@test -z "$$(git status --porcelain -- go.mod go.sum vendor/)" \
-allpackages = $(if $(__allpackages),,$(eval __allpackages := $$(_allpackages)))$(__allpackages)
+		|| (echo -e "git status:\n $$(git status -- go.mod go.sum vendor/)\nerror: vendor/, go.mod and/or go.sum not up to date. Run \"make vendor\" to update"; exit 1) \
 		&& echo "all vendor files are up to date."
 .PHONY: runc all recvtty sd-helper seccompagent static releaseall release \
 	localrelease dbuild lint man runcimage \
 	test localtest unittest localunittest integration localintegration \
 	rootlessintegration localrootlessintegration shell install install-bash \
 	install-man clean cfmt shfmt shellcheck \
 	vendor verify-dependencies
--- a/README.md
+++ b/README.md
@ -1,39 +1,33 @@
 # runc
 [![Build Status](https://travis-ci.org/opencontainers/runc.svg?branch=master)](https://travis-ci.org/opencontainers/runc)
 [![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runc)](https://goreportcard.com/report/github.com/opencontainers/runc)
 [![GoDoc](https://godoc.org/github.com/opencontainers/runc?status.svg)](https://godoc.org/github.com/opencontainers/runc)
 [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/588/badge)](https://bestpractices.coreinfrastructure.org/projects/588)
 [![gha/validate](https://github.com/opencontainers/runc/workflows/validate/badge.svg)](https://github.com/opencontainers/runc/actions?query=workflow%3Avalidate)
 [![gha/ci](https://github.com/opencontainers/runc/workflows/ci/badge.svg)](https://github.com/opencontainers/runc/actions?query=workflow%3Aci)
 ## Introduction
-`runc` is a CLI tool for spawning and running containers according to the OCI specification.
+`runc` is a CLI tool for spawning and running containers on Linux according to the OCI specification.
 ## Releases
 `runc` depends on and tracks the [runtime-spec](https://github.com/opencontainers/runtime-spec) repository.
 We will try to make sure that `runc` and the OCI specification major versions stay in lockstep.
 This means that `runc` 1.0.0 should implement the 1.0 version of the specification.
 You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page.
 Currently, the following features are not considered to be production-ready:
 * Support for cgroup v2
 ## Security
-The reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/).
+The reporting process and disclosure communications are outlined [here](https://github.com/opencontainers/org/blob/master/SECURITY.md).
 ### Security Audit
 A third party security audit was performed by Cure53, you can see the full report [here](https://github.com/opencontainers/runc/blob/master/docs/Security-Audit.pdf).
 ## Building
-`runc` currently supports the Linux platform with various architecture support.
+`runc` only supports Linux. It must be built with Go version 1.16 or higher.
 It must be built with Go version 1.6 or higher in order for some features to function properly.
 In order to enable seccomp support you will need to install `libseccomp` on your platform.
 > e.g. `libseccomp-devel` for CentOS, or `libseccomp-dev` for Ubuntu
 Otherwise, if you do not want to build `runc` with seccomp support you can add `BUILDTAGS=""` when running make.
 ```bash
 # create a 'github.com/opencontainers' in your GOPATH/src
 cd github.com/opencontainers
@ -58,21 +52,24 @@ sudo make install
 #### Build Tags
-`runc` supports optional build tags for compiling support of various features.
+`runc` supports optional build tags for compiling support of various features,
-To add build tags to the make option the `BUILDTAGS` variable must be set.
+with some of them enabled by default (see `BUILDTAGS` in top-level `Makefile`).
 To change build tags from the default, set the `BUILDTAGS` variable for make,
 e.g. to disable seccomp:
 ```bash
-make BUILDTAGS='seccomp apparmor'
+make BUILDTAGS=""
 ```
-| Build Tag | Feature                            | Dependency  |
+| Build Tag | Feature                            | Enabled by default | Dependency |
-|-----------|------------------------------------|-------------|
+|-----------|------------------------------------|--------------------|------------|
-| seccomp   | Syscall filtering                  | libseccomp  |
+| seccomp   | Syscall filtering                  | yes                | libseccomp |
 | selinux   | selinux process and mount labeling | <none>      |
 | apparmor  | apparmor profile support           | <none>      |
 | ambient   | ambient capability support         | kernel 4.3  |
 | nokmem    | disable kernel memory account      | <none>      |
 The following build tags were used earlier, but are now obsoleted:
 - **nokmem** (since runc v1.0.0-rc94 kernel memory settings are ignored)
 - **apparmor** (since runc v1.0.0-rc93 the feature is always enabled)
 - **selinux**  (since runc v1.0.0-rc93 the feature is always enabled)
 ### Running the test suite
@ -97,20 +94,41 @@ You can run a specific integration test by setting the `TESTPATH` variable.
 # make test TESTPATH="/checkpoint.bats"
 ```
-You can run a test in your proxy environment by setting `DOCKER_BUILD_PROXY` and `DOCKER_RUN_PROXY` variables.
+You can run a specific rootless integration test by setting the `ROOTLESS_TESTPATH` variable.
 ```bash
-# make test DOCKER_BUILD_PROXY="--build-arg HTTP_PROXY=http://yourproxy/" DOCKER_RUN_PROXY="-e HTTP_PROXY=http://yourproxy/"
+# make test ROOTLESS_TESTPATH="/checkpoint.bats"
 ```
 You can run a test using your container engine's flags by setting `CONTAINER_ENGINE_BUILD_FLAGS` and `CONTAINER_ENGINE_RUN_FLAGS` variables.
 ```bash
 # make test CONTAINER_ENGINE_BUILD_FLAGS="--build-arg http_proxy=http://yourproxy/" CONTAINER_ENGINE_RUN_FLAGS="-e http_proxy=http://yourproxy/"
 ```
 ### Dependencies Management
-`runc` uses [vndr](https://github.com/LK4D4/vndr) for dependencies management.
+`runc` uses [Go Modules](https://github.com/golang/go/wiki/Modules) for dependencies management.
-Please refer to [vndr](https://github.com/LK4D4/vndr) for how to add or update
+Please refer to [Go Modules](https://github.com/golang/go/wiki/Modules) for how to add or update
 new dependencies.
 ```
 # Update vendored dependencies
 make vendor
 # Verify all dependencies
 make verify-dependencies
 ```
 ## Using runc
 Please note that runc is a low level tool not designed with an end user
 in mind. It is mostly employed by other higher level container software.
 Therefore, unless there is some specific use case that prevents the use
 of tools like Docker or Podman, it is not recommended to use runc directly.
 If you still want to use runc, here's how.
 ### Creating an OCI Bundle
 In order to use runc you must have your container in the format of an OCI bundle.
@ -152,7 +170,9 @@ If you used the unmodified `runc spec` template this should give you a `sh` sess
 The second way to start a container is using the specs lifecycle operations.
 This gives you more power over how the container is created and managed while it is running.
-This will also launch the container in the background so you will have to edit the `config.json` to remove the `terminal` setting for the simple examples here.
+This will also launch the container in the background so you will have to edit
 the `config.json` to remove the `terminal` setting for the simple examples
 below (see more details about [runc terminal handling](docs/terminals.md)).
 Your process field in the `config.json` should look like this below with `"terminal": false` and `"args": ["sleep", "5"]`.
@ -275,6 +295,14 @@ PIDFile=/run/mycontainerid.pid
 WantedBy=multi-user.target
 ```
 ## More documentation
 * [cgroup v2](./docs/cgroup-v2.md)
 * [Checkpoint and restore](./docs/checkpoint-restore.md)
 * [systemd cgroup driver](./docs/systemd.md)
 * [Terminals and standard IO](./docs/terminals.md)
 * [Experimental features](./docs/experimental.md)
 ## License
 The code and docs are released under the [Apache 2.0 license](LICENSE).
--- a/SECURITY.md
+++ b/SECURITY.md
@ -1,3 +1,3 @@
 # Security
-The reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/).
+The reporting process and disclosure communications are outlined [here](https://github.com/opencontainers/org/blob/master/SECURITY.md).
--- a/2
+++ b/2
@ -1 +1 @@
-1.0.0-rc10
+1.1.0
--- a/Vagrantfile.fedora
+++ b/Vagrantfile.fedora
@ -0,0 +1,52 @@
 # -*- mode: ruby -*-
 # vi: set ft=ruby :
 Vagrant.configure("2") do |config|
 # Fedora box is used for testing cgroup v2 support
  config.vm.box = "fedora/35-cloud-base"
  config.vm.provider :virtualbox do |v|
    v.memory = 2048
    v.cpus = 2
  end
  config.vm.provider :libvirt do |v|
    v.memory = 2048
    v.cpus = 2
  end
  config.vm.provision "shell", inline: <<-SHELL
    set -e -u -o pipefail
    # Work around dnf mirror failures by retrying a few times
    for i in $(seq 0 2); do
      sleep $i
      # "config exclude" dnf shell command is not working in Fedora 35
      # (see https://bugzilla.redhat.com/show_bug.cgi?id=2022571);
      # the workaround is to specify it as an option.
      cat << EOF | dnf -y --exclude=kernel,kernel-core shell && break
 config install_weak_deps false
 update
 install iptables gcc make golang-go glibc-static libseccomp-devel bats jq git-core criu fuse-sshfs
 ts run
 EOF
    done
    dnf clean all
    # Add a user for rootless tests
    useradd -u2000 -m -d/home/rootless -s/bin/bash rootless
    # Allow root and rootless itself to execute `ssh rootless@localhost` in tests/rootless.sh
    ssh-keygen -t ecdsa -N "" -f /root/rootless.key
    mkdir -m 0700 -p /home/rootless/.ssh
    cp /root/rootless.key /home/rootless/.ssh/id_ecdsa
    cat /root/rootless.key.pub >> /home/rootless/.ssh/authorized_keys
    chown -R rootless.rootless /home/rootless
    # Delegate cgroup v2 controllers to rootless user via --systemd-cgroup
    mkdir -p /etc/systemd/system/user@.service.d
    cat > /etc/systemd/system/user@.service.d/delegate.conf << EOF
 [Service]
 # default: Delegate=pids memory
 # NOTE: delegation of cpuset requires systemd >= 244 (Fedora >= 32, Ubuntu >= 20.04).
 Delegate=yes
 EOF
    systemctl daemon-reload
  SHELL
 end
--- a/checkpoint.go
+++ b/checkpoint.go
@ -1,19 +1,19 @@
 // +build linux
 package main
 import (
 	"errors"
 	"fmt"
 	"net"
 	"os"
 	"path/filepath"
 	"strconv"
 	"strings"
 	criu "github.com/checkpoint-restore/go-criu/v5/rpc"
 	"github.com/opencontainers/runc/libcontainer"
-	"github.com/opencontainers/runc/libcontainer/system"
+	"github.com/opencontainers/runc/libcontainer/userns"
 	"github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/sirupsen/logrus"
 	"github.com/urfave/cli"
 	"golang.org/x/sys/unix"
 )
@ -34,7 +34,7 @@ checkpointed.`,
 		cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
 		cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
 		cli.BoolFlag{Name: "lazy-pages", Usage: "use userfaultfd to lazily restore memory pages"},
-		cli.StringFlag{Name: "status-fd", Value: "", Usage: "criu writes \\0 to this FD once lazy-pages is ready"},
+		cli.IntFlag{Name: "status-fd", Value: -1, Usage: "criu writes \\0 to this FD once lazy-pages is ready"},
 		cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
 		cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
 		cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"},
@ -47,7 +47,7 @@ checkpointed.`,
 			return err
 		}
 		// XXX: Currently this is untested with rootless containers.
-		if os.Geteuid() != 0 || system.RunningInUserNS() {
+		if os.Geteuid() != 0 || userns.RunningInUserNS() {
 			logrus.Warn("runc checkpoint is untested with rootless containers")
 		}
@ -60,10 +60,13 @@ checkpointed.`,
 			return err
 		}
 		if status == libcontainer.Created || status == libcontainer.Stopped {
-			fatalf("Container cannot be checkpointed in %s state", status.String())
+			fatal(fmt.Errorf("Container cannot be checkpointed in %s state", status.String()))
 		}
 		defer destroy(container)
 		options := criuOptions(context)
 		if !(options.LeaveRunning || options.PreDump) {
 			// destroy container unless we tell CRIU to keep it
 			defer destroy(container)
 		}
 		// these are the mandatory criu options for a container
 		setPageServer(context, options)
 		setManageCgroupsMode(context, options)
@ -74,28 +77,53 @@ checkpointed.`,
 	},
 }
-func getCheckpointImagePath(context *cli.Context) string {
+func prepareImagePaths(context *cli.Context) (string, string, error) {
 	imagePath := context.String("image-path")
 	if imagePath == "" {
-		imagePath = getDefaultImagePath(context)
+		imagePath = getDefaultImagePath()
 	}
-	return imagePath
+
 	if err := os.MkdirAll(imagePath, 0o600); err != nil {
 		return "", "", err
 	}
 	parentPath := context.String("parent-path")
 	if parentPath == "" {
 		return imagePath, parentPath, nil
 	}
 	if filepath.IsAbs(parentPath) {
 		return "", "", errors.New("--parent-path must be relative")
 	}
 	realParent := filepath.Join(imagePath, parentPath)
 	fi, err := os.Stat(realParent)
 	if err == nil && !fi.IsDir() {
 		err = &os.PathError{Path: realParent, Err: unix.ENOTDIR}
 	}
 	if err != nil {
 		return "", "", fmt.Errorf("invalid --parent-path: %w", err)
 	}
 	return imagePath, parentPath, nil
 }
 func setPageServer(context *cli.Context, options *libcontainer.CriuOpts) {
 	// xxx following criu opts are optional
 	// The dump image can be sent to a criu page server
 	if psOpt := context.String("page-server"); psOpt != "" {
-		addressPort := strings.Split(psOpt, ":")
+		address, port, err := net.SplitHostPort(psOpt)
-		if len(addressPort) != 2 {
+
-			fatal(fmt.Errorf("Use --page-server ADDRESS:PORT to specify page server"))
+		if err != nil || address == "" || port == "" {
 			fatal(errors.New("Use --page-server ADDRESS:PORT to specify page server"))
 		}
-		portInt, err := strconv.Atoi(addressPort[1])
+		portInt, err := strconv.Atoi(port)
 		if err != nil {
-			fatal(fmt.Errorf("Invalid port number"))
+			fatal(errors.New("Invalid port number"))
 		}
 		options.PageServer = libcontainer.CriuPageServerInfo{
-			Address: addressPort[0],
+			Address: address,
 			Port:    int32(portInt),
 		}
 	}
@ -105,13 +133,13 @@ func setManageCgroupsMode(context *cli.Context, options *libcontainer.CriuOpts)
 	if cgOpt := context.String("manage-cgroups-mode"); cgOpt != "" {
 		switch cgOpt {
 		case "soft":
-			options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_SOFT
+			options.ManageCgroupsMode = criu.CriuCgMode_SOFT
 		case "full":
-			options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_FULL
+			options.ManageCgroupsMode = criu.CriuCgMode_FULL
 		case "strict":
-			options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_STRICT
+			options.ManageCgroupsMode = criu.CriuCgMode_STRICT
 		default:
-			fatal(fmt.Errorf("Invalid manage cgroups mode"))
+			fatal(errors.New("Invalid manage cgroups mode"))
 		}
 	}
 }
--- a/contrib/cmd/recvtty/recvtty.go
+++ b/contrib/cmd/recvtty/recvtty.go
@ -17,12 +17,13 @@
 package main
 import (
 	"errors"
 	"fmt"
 	"io"
 	"io/ioutil"
 	"net"
 	"os"
 	"strings"
 	"sync"
 	"github.com/containerd/console"
 	"github.com/opencontainers/runc/libcontainer/utils"
@ -65,7 +66,7 @@ func bail(err error) {
 	os.Exit(1)
 }
-func handleSingle(path string) error {
+func handleSingle(path string, noStdin bool) error {
 	// Open a socket.
 	ln, err := net.Listen("unix", path)
 	if err != nil {
@ -87,7 +88,7 @@ func handleSingle(path string) error {
 	// Get the fd of the connection.
 	unixconn, ok := conn.(*net.UnixConn)
 	if !ok {
-		return fmt.Errorf("failed to cast to unixconn")
+		return errors.New("failed to cast to unixconn")
 	}
 	socket, err := unixconn.File()
@ -105,23 +106,37 @@ func handleSingle(path string) error {
 	if err != nil {
 		return err
 	}
-	console.ClearONLCR(c.Fd())
+	if err := console.ClearONLCR(c.Fd()); err != nil {
 		return err
 	}
 	// Copy from our stdio to the master fd.
-	quitChan := make(chan struct{})
+	var (
 		wg            sync.WaitGroup
 		inErr, outErr error
 	)
 	wg.Add(1)
 	go func() {
-		io.Copy(os.Stdout, c)
+		_, outErr = io.Copy(os.Stdout, c)
-		quitChan <- struct{}{}
+		wg.Done()
 	}()
 	go func() {
 		io.Copy(c, os.Stdin)
 		quitChan <- struct{}{}
 	}()
 	if !noStdin {
 		wg.Add(1)
 		go func() {
 			_, inErr = io.Copy(c, os.Stdin)
 			wg.Done()
 		}()
 	}
 	// Only close the master fd once we've stopped copying.
-	<-quitChan
+	wg.Wait()
 	c.Close()
-	return nil
+
 	if outErr != nil {
 		return outErr
 	}
 	return inErr
 }
 func handleNull(path string) error {
@ -161,15 +176,7 @@ func handleNull(path string) error {
 				return
 			}
-			// Just do a dumb copy to /dev/null.
+			_, _ = io.Copy(io.Discard, master)
 			devnull, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
 			if err != nil {
 				// TODO: Handle this nicely.
 				return
 			}
 			io.Copy(devnull, master)
 			devnull.Close()
 		}(conn)
 	}
 }
@ -185,7 +192,7 @@ func main() {
 		v = append(v, version)
 	}
 	if gitCommit != "" {
-		v = append(v, fmt.Sprintf("commit: %s", gitCommit))
+		v = append(v, "commit: "+gitCommit)
 	}
 	app.Version = strings.Join(v, "\n")
@ -201,26 +208,31 @@ func main() {
 			Value: "",
 			Usage: "Path to write daemon process ID to",
 		},
 		cli.BoolFlag{
 			Name:  "no-stdin",
 			Usage: "Disable stdin handling (no-op for null mode)",
 		},
 	}
 	app.Action = func(ctx *cli.Context) error {
 		args := ctx.Args()
 		if len(args) != 1 {
-			return fmt.Errorf("need to specify a single socket path")
+			return errors.New("need to specify a single socket path")
 		}
 		path := ctx.Args()[0]
 		pidPath := ctx.String("pid-file")
 		if pidPath != "" {
 			pid := fmt.Sprintf("%d\n", os.Getpid())
-			if err := ioutil.WriteFile(pidPath, []byte(pid), 0644); err != nil {
+			if err := os.WriteFile(pidPath, []byte(pid), 0o644); err != nil {
 				return err
 			}
 		}
 		noStdin := ctx.Bool("no-stdin")
 		switch ctx.String("mode") {
 		case "single":
-			if err := handleSingle(path); err != nil {
+			if err := handleSingle(path, noStdin); err != nil {
 				return err
 			}
 		case "null":
--- a/contrib/cmd/sd-helper/helper.go
+++ b/contrib/cmd/sd-helper/helper.go
@ -0,0 +1,86 @@
 package main
 import (
 	"flag"
 	"fmt"
 	"os"
 	"github.com/sirupsen/logrus"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
 func usage() {
 	fmt.Print(`Open Container Initiative contrib/cmd/sd-helper
 sd-helper is a tool that uses runc/libcontainer/cgroups/systemd package
 functionality to communicate to systemd in order to perform various operations.
 Currently this is limited to starting and stopping systemd transient slice
 units.
 Usage:
 	sd-helper [-debug] [-parent <pname>] {start|stop} <name>
 Example:
 	sd-helper -parent system.slice start system-pod123.slice
 `)
 	os.Exit(1)
 }
 var (
 	debug  = flag.Bool("debug", false, "enable debug output")
 	parent = flag.String("parent", "", "parent unit name")
 )
 func main() {
 	if !systemd.IsRunningSystemd() {
 		logrus.Fatal("systemd is required")
 	}
 	// Set the flags.
 	flag.Parse()
 	if *debug {
 		logrus.SetLevel(logrus.DebugLevel)
 	}
 	if flag.NArg() != 2 {
 		usage()
 	}
 	cmd := flag.Arg(0)
 	unit := flag.Arg(1)
 	err := unitCommand(cmd, unit, *parent)
 	if err != nil {
 		logrus.Fatal(err)
 	}
 }
 func newManager(config *configs.Cgroup) (cgroups.Manager, error) {
 	if cgroups.IsCgroup2UnifiedMode() {
 		return systemd.NewUnifiedManager(config, "")
 	}
 	return systemd.NewLegacyManager(config, nil)
 }
 func unitCommand(cmd, name, parent string) error {
 	podConfig := &configs.Cgroup{
 		Name:      name,
 		Parent:    parent,
 		Resources: &configs.Resources{},
 	}
 	pm, err := newManager(podConfig)
 	if err != nil {
 		return err
 	}
 	switch cmd {
 	case "start":
 		return pm.Apply(-1)
 	case "stop":
 		return pm.Destroy()
 	}
 	return fmt.Errorf("unknown command: %s", cmd)
 }
--- a/contrib/cmd/seccompagent/README.md
+++ b/contrib/cmd/seccompagent/README.md
@ -0,0 +1,70 @@
 # Seccomp Agent
 ## Warning
 Please note this is an example agent, as such it is possible that specially
 crafted messages can produce bad behaviour. Please use it as an example only.
 Also, this agent is used for integration tests. Be aware that changing the
 behaviour can break the integration tests.
 ## Get started
 Compile runc and seccompagent:
 ```bash
 make all
 ```
 Run the seccomp agent in the background:
 ```bash
 sudo ./contrib/cmd/seccompagent/seccompagent &
 ```
 Prepare a container:
 ```bash
 mkdir container-seccomp-notify
 cd container-seccomp-notify
 mkdir rootfs
 docker export $(docker create busybox) | tar -C rootfs -xvf -
 ```
 Then, generate a config.json by running the script gen-seccomp-example-cfg.sh
 from the directory where this README.md is in the container directory you
 prepared earlier (`container-seccomp-notify`).
 Then start the container:
 ```bash
 runc run mycontainerid
 ```
 The container will output something like this:
 ```bash
 + cd /dev/shm
 + mkdir test-dir
 + touch test-file
 + chmod 777 test-file
 chmod: changing permissions of 'test-file': No medium found
 + stat /dev/shm/test-dir-foo
  File: /dev/shm/test-dir-foo
  Size: 40        	Blocks: 0          IO Block: 4096   directory
 Device: 3eh/62d	Inode: 2           Links: 2
 Access: (0755/drwxr-xr-x)  Uid: (    0/    root)   Gid: (    0/    root)
 Access: 2021-09-09 15:03:13.043716040 +0000
 Modify: 2021-09-09 15:03:13.043716040 +0000
 Change: 2021-09-09 15:03:13.043716040 +0000
 Birth: -
 + ls -l /dev/shm
 total 0
 drwxr-xr-x 2 root root 40 Sep  9 15:03 test-dir-foo
 -rw-r--r-- 1 root root  0 Sep  9 15:03 test-file
 + echo Note the agent added a suffix for the directory name and chmod fails
 Note the agent added a suffix for the directory name and chmod fails
 ```
 This shows a simple example that runs in /dev/shm just because it is a tmpfs in
 the example config.json.
 The agent makes all chmod calls fail with ENOMEDIUM, as the example output shows.
 For mkdir, the agent adds a "-foo" suffix: the container runs "mkdir test-dir"
 but the directory created is "test-dir-foo".
--- a/contrib/cmd/seccompagent/gen-seccomp-example-cfg.sh
+++ b/contrib/cmd/seccompagent/gen-seccomp-example-cfg.sh
@ -0,0 +1,35 @@
 #!/usr/bin/env bash
 # Detect if we are running inside bats (i.e. inside integration tests) or just
 # called by an end-user
 # bats-core v1.2.1 defines BATS_RUN_TMPDIR
 if [ -z "$BATS_RUN_TMPDIR" ]; then
 	# When not running in bats, we create the config.json
 	set -e
 	runc spec
 fi
 # We can't source $(dirname $0)/../../../tests/integration/helpers.bash as that
 # exits when not running inside bats. We can do hacks, but just to redefine
 # update_config() seems clearer. We don't even really need to keep them in sync.
 function update_config() {
        jq "$1" "./config.json" | awk 'BEGIN{RS="";getline<"-";print>ARGV[1]}' "./config.json"
 }
 update_config '.linux.seccomp = {
                        "defaultAction": "SCMP_ACT_ALLOW",
                        "listenerPath": "/run/seccomp-agent.socket",
                        "listenerMetadata": "foo",
                        "architectures": [ "SCMP_ARCH_X86", "SCMP_ARCH_X32", "SCMP_ARCH_X86_64" ],
                        "syscalls": [
                                {
                                        "names": [ "chmod", "fchmod", "fchmodat", "mkdir" ],
                                        "action": "SCMP_ACT_NOTIFY"
                                }
 			]
 		}'
 update_config '.process.args = [
 				"sh",
 				"-c",
 				"set -x; cd /dev/shm; mkdir test-dir; touch test-file; chmod 777 test-file; stat /dev/shm/test-dir-foo && ls -l /dev/shm && echo \"Note the agent added a suffix for the directory name and chmod fails\" "
 				]'
--- a/contrib/cmd/seccompagent/seccompagent.go
+++ b/contrib/cmd/seccompagent/seccompagent.go
@ -0,0 +1,291 @@
 //go:build linux && seccomp
 // +build linux,seccomp
 package main
 import (
 	"bytes"
 	"encoding/json"
 	"errors"
 	"flag"
 	"fmt"
 	"net"
 	"os"
 	"path/filepath"
 	"strings"
 	securejoin "github.com/cyphar/filepath-securejoin"
 	"github.com/opencontainers/runtime-spec/specs-go"
 	libseccomp "github.com/seccomp/libseccomp-golang"
 	"github.com/sirupsen/logrus"
 	"golang.org/x/sys/unix"
 )
 var (
 	socketFile string
 	pidFile    string
 )
 func closeStateFds(recvFds []int) {
 	for i := range recvFds {
 		unix.Close(i)
 	}
 }
 // parseStateFds returns the seccomp-fd and closes the rest of the fds in recvFds.
 // In case of error, no fd is closed.
 // StateFds is assumed to be formatted as specs.ContainerProcessState.Fds and
 // recvFds the corresponding list of received fds in the same SCM_RIGHT message.
 func parseStateFds(stateFds []string, recvFds []int) (uintptr, error) {
 	// Let's find the index in stateFds of the seccomp-fd.
 	idx := -1
 	err := false
 	for i, name := range stateFds {
 		if name == specs.SeccompFdName && idx == -1 {
 			idx = i
 			continue
 		}
 		// We found the seccompFdName twice. Error out!
 		if name == specs.SeccompFdName && idx != -1 {
 			err = true
 		}
 	}
 	if idx == -1 || err {
 		return 0, errors.New("seccomp fd not found or malformed containerProcessState.Fds")
 	}
 	if idx >= len(recvFds) || idx < 0 {
 		return 0, errors.New("seccomp fd index out of range")
 	}
 	fd := uintptr(recvFds[idx])
 	for i := range recvFds {
 		if i == idx {
 			continue
 		}
 		unix.Close(recvFds[i])
 	}
 	return fd, nil
 }
 func handleNewMessage(sockfd int) (uintptr, string, error) {
 	const maxNameLen = 4096
 	stateBuf := make([]byte, maxNameLen)
 	oobSpace := unix.CmsgSpace(4)
 	oob := make([]byte, oobSpace)
 	n, oobn, _, _, err := unix.Recvmsg(sockfd, stateBuf, oob, 0)
 	if err != nil {
 		return 0, "", err
 	}
 	if n >= maxNameLen || oobn != oobSpace {
 		return 0, "", fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn)
 	}
 	// Truncate.
 	stateBuf = stateBuf[:n]
 	oob = oob[:oobn]
 	scms, err := unix.ParseSocketControlMessage(oob)
 	if err != nil {
 		return 0, "", err
 	}
 	if len(scms) != 1 {
 		return 0, "", fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms))
 	}
 	scm := scms[0]
 	fds, err := unix.ParseUnixRights(&scm)
 	if err != nil {
 		return 0, "", err
 	}
 	containerProcessState := &specs.ContainerProcessState{}
 	err = json.Unmarshal(stateBuf, containerProcessState)
 	if err != nil {
 		closeStateFds(fds)
 		return 0, "", fmt.Errorf("cannot parse OCI state: %w", err)
 	}
 	fd, err := parseStateFds(containerProcessState.Fds, fds)
 	if err != nil {
 		closeStateFds(fds)
 		return 0, "", err
 	}
 	return fd, containerProcessState.Metadata, nil
 }
 func readArgString(pid uint32, offset int64) (string, error) {
 	buffer := make([]byte, 4096) // PATH_MAX
 	memfd, err := unix.Open(fmt.Sprintf("/proc/%d/mem", pid), unix.O_RDONLY, 0o777)
 	if err != nil {
 		return "", err
 	}
 	defer unix.Close(memfd)
 	_, err = unix.Pread(memfd, buffer, offset)
 	if err != nil {
 		return "", err
 	}
 	buffer[len(buffer)-1] = 0
 	s := buffer[:bytes.IndexByte(buffer, 0)]
 	return string(s), nil
 }
 func runMkdirForContainer(pid uint32, fileName string, mode uint32, metadata string) error {
 	// We validated before that metadata is not a string that can make
 	// newFile a file in a different location other than root.
 	newFile := fmt.Sprintf("%s-%s", fileName, metadata)
 	root := fmt.Sprintf("/proc/%d/cwd/", pid)
 	if strings.HasPrefix(fileName, "/") {
 		// If it starts with /, use the rootfs as base
 		root = fmt.Sprintf("/proc/%d/root/", pid)
 	}
 	path, err := securejoin.SecureJoin(root, newFile)
 	if err != nil {
 		return err
 	}
 	return unix.Mkdir(path, mode)
 }
 // notifHandler handles seccomp notifications and responses
 func notifHandler(fd libseccomp.ScmpFd, metadata string) {
 	defer unix.Close(int(fd))
 	for {
 		req, err := libseccomp.NotifReceive(fd)
 		if err != nil {
 			logrus.Errorf("Error in NotifReceive(): %s", err)
 			continue
 		}
 		syscallName, err := req.Data.Syscall.GetName()
 		if err != nil {
 			logrus.Errorf("Error decoding syscall %v(): %s", req.Data.Syscall, err)
 			continue
 		}
 		logrus.Debugf("Received syscall %q, pid %v, arch %q, args %+v", syscallName, req.Pid, req.Data.Arch, req.Data.Args)
 		resp := &libseccomp.ScmpNotifResp{
 			ID:    req.ID,
 			Error: 0,
 			Val:   0,
 			Flags: libseccomp.NotifRespFlagContinue,
 		}
 		// TOCTOU check
 		if err := libseccomp.NotifIDValid(fd, req.ID); err != nil {
 			logrus.Errorf("TOCTOU check failed: req.ID is no longer valid: %s", err)
 			continue
 		}
 		switch syscallName {
 		case "mkdir":
 			fileName, err := readArgString(req.Pid, int64(req.Data.Args[0]))
 			if err != nil {
 				logrus.Errorf("Cannot read argument: %s", err)
 				resp.Error = int32(unix.ENOSYS)
 				resp.Val = ^uint64(0) // -1
 				goto sendResponse
 			}
 			logrus.Debugf("mkdir: %q", fileName)
 			// TOCTOU check
 			if err := libseccomp.NotifIDValid(fd, req.ID); err != nil {
 				logrus.Errorf("TOCTOU check failed: req.ID is no longer valid: %s", err)
 				continue
 			}
 			err = runMkdirForContainer(req.Pid, fileName, uint32(req.Data.Args[1]), metadata)
 			if err != nil {
 				resp.Error = int32(unix.ENOSYS)
 				resp.Val = ^uint64(0) // -1
 			}
 			resp.Flags = 0
 		case "chmod", "fchmod", "fchmodat":
 			resp.Error = int32(unix.ENOMEDIUM)
 			resp.Val = ^uint64(0) // -1
 			resp.Flags = 0
 		}
 	sendResponse:
 		if err = libseccomp.NotifRespond(fd, resp); err != nil {
 			logrus.Errorf("Error in notification response: %s", err)
 			continue
 		}
 	}
 }
 func main() {
 	flag.StringVar(&socketFile, "socketfile", "/run/seccomp-agent.socket", "Socket file")
 	flag.StringVar(&pidFile, "pid-file", "", "Pid file")
 	logrus.SetLevel(logrus.DebugLevel)
 	// Parse arguments
 	flag.Parse()
 	if flag.NArg() > 0 {
 		flag.PrintDefaults()
 		logrus.Fatal("Invalid command")
 	}
 	if err := os.Remove(socketFile); err != nil && !errors.Is(err, os.ErrNotExist) {
 		logrus.Fatalf("Cannot cleanup socket file: %v", err)
 	}
 	if pidFile != "" {
 		pid := fmt.Sprintf("%d", os.Getpid())
 		if err := os.WriteFile(pidFile, []byte(pid), 0o644); err != nil {
 			logrus.Fatalf("Cannot write pid file: %v", err)
 		}
 	}
 	logrus.Info("Waiting for seccomp file descriptors")
 	l, err := net.Listen("unix", socketFile)
 	if err != nil {
 		logrus.Fatalf("Cannot listen: %s", err)
 	}
 	defer l.Close()
 	for {
 		conn, err := l.Accept()
 		if err != nil {
 			logrus.Errorf("Cannot accept connection: %s", err)
 			continue
 		}
 		socket, err := conn.(*net.UnixConn).File()
 		conn.Close()
 		if err != nil {
 			logrus.Errorf("Cannot get socket: %v", err)
 			continue
 		}
 		newFd, metadata, err := handleNewMessage(int(socket.Fd()))
 		socket.Close()
 		if err != nil {
 			logrus.Errorf("Error receiving seccomp file descriptor: %v", err)
 			continue
 		}
 		// Make sure we don't allow strings like "/../p", as that means
 		// a file in a different location than expected. We just want
 		// safe things to use as a suffix for a file name.
 		metadata = filepath.Base(metadata)
 		if strings.Contains(metadata, "/") {
 			// Fallback to a safe string.
 			metadata = "agent-generated-suffix"
 		}
 		logrus.Infof("Received new seccomp fd: %v", newFd)
 		go notifHandler(libseccomp.ScmpFd(newFd), metadata)
 	}
 }
--- a/contrib/cmd/seccompagent/unsupported.go
+++ b/contrib/cmd/seccompagent/unsupported.go
@ -0,0 +1,10 @@
 //go:build !linux || !seccomp
 // +build !linux !seccomp
 package main
 import "fmt"
 func main() {
 	fmt.Println("Not supported, to use this compile with build tag: seccomp.")
 }
--- a/contrib/completions/bash/runc
+++ b/contrib/completions/bash/runc
@ -113,6 +113,8 @@ __runc_complete_capabilities() {
 		AUDIT_WRITE
 		AUDIT_READ
 		BLOCK_SUSPEND
 		BPF
 		CHECKPOINT_RESTORE
 		CHOWN
 		DAC_OVERRIDE
 		DAC_READ_SEARCH
@ -130,6 +132,7 @@ __runc_complete_capabilities() {
 		NET_BIND_SERVICE
 		NET_BROADCAST
 		NET_RAW
 		PERFMON
 		SETFCAP
 		SETGID
 		SETPCAP
@ -170,6 +173,7 @@ _runc_exec() {
 	   --apparmor
 	   --cap, -c
 	   --preserve-fds
 	   --ignore-paused
 	"
 	local all_options="$options_with_args $boolean_options"
@ -221,6 +225,7 @@ _runc_runc() {
 		--help
 		--version -v
 		--debug
 		--systemd-cgroup
 	"
 	local options_with_args="
 		--log
@ -733,8 +738,6 @@ _runc_update() {
 	   --cpu-share
 	   --cpuset-cpus
 	   --cpuset-mems
 	   --kernel-memory
 	   --kernel-memory-tcp
 	   --memory
 	   --memory-reservation
 	   --memory-swap
@ -769,7 +772,6 @@ _runc() {
 		delete
 		events
 		exec
 		init
 		kill
 		list
 		pause
--- a/create.go
+++ b/create.go
@ -1,6 +1,7 @@
 package main
 import (
 	"fmt"
 	"os"
 	"github.com/urfave/cli"
@ -55,20 +56,12 @@ command(s) that get executed on start, edit the args parameter of the spec. See
 		if err := checkArgs(context, 1, exactArgs); err != nil {
 			return err
 		}
-		if err := revisePidFile(context); err != nil {
+		status, err := startContainer(context, CT_ACT_CREATE, nil)
-			return err
+		if err == nil {
 			// exit with the container's exit status so any external supervisor
 			// is notified of the exit with the correct exit status.
 			os.Exit(status)
 		}
-		spec, err := setupSpec(context)
+		return fmt.Errorf("runc create failed: %w", err)
 		if err != nil {
 			return err
 		}
 		status, err := startContainer(context, spec, CT_ACT_CREATE, nil)
 		if err != nil {
 			return err
 		}
 		// exit with the container's exit status so any external supervisor is
 		// notified of the exit with the correct exit status.
 		os.Exit(status)
 		return nil
 	},
 }
--- a/debian/changelog
+++ b/debian/changelog
@ -1,3 +1,9 @@
 runc (1.1.0-ok1) yangtze; urgency=medium
  * Merge new upstream version 1.1.0 
 -- Luoyaoming <luoyaoming@kylinos.cn>  Fri, 30 Dec 2022 11:11:29 +0800
 runc (1.0.0~rc10-ok2) yangtze; urgency=medium
  * Update version.
--- a/debian/patches/test--fix_TestGetAdditionalGroups.patch
+++ b/debian/patches/test--fix_TestGetAdditionalGroups.patch
@ -0,0 +1,39 @@
 From: Dmitry Smirnov <onlyjob@debian.org>
 Date: Thu, 28 Jul 2022 16:28:22 +0800
 Subject: fix FTBFS on i686
 src/github.com/opencontainers/runc/libcontainer/user/user_test.go:448:36: constant 2147483648 overflows int
 Last-Update: 2018-06-16
 Forwarded: https://github.com/opencontainers/runc/pull/1821
 Bug-Upstream: https://github.com/opencontainers/runc/issues/941
 ---
 libcontainer/user/user.go      | 2 +-
 libcontainer/user/user_test.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
 diff --git a/libcontainer/user/user.go b/libcontainer/user/user.go
 index 7b912bb..38caded 100644
 --- a/libcontainer/user/user.go
 +++ b/libcontainer/user/user.go
@@ -473,7 +473,7 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err
 				return nil, fmt.Errorf("Unable to find group %s", ag)
 			}
 			// Ensure gid is inside gid range.
 -			if gid < minId || gid > maxId {
 +			if gid < minId || gid >= maxId {
 				return nil, ErrRange
 			}
 			gidMap[gid] = struct{}{}
 diff --git a/libcontainer/user/user_test.go b/libcontainer/user/user_test.go
 index 24ee559..a4aabdc 100644
 --- a/libcontainer/user/user_test.go
 +++ b/libcontainer/user/user_test.go
@@ -445,7 +445,7 @@ this is just some garbage data
 	if utils.GetIntSize() > 4 {
 		tests = append(tests, foo{
 			// groups with too large id
 -			groups:   []string{strconv.Itoa(1 << 31)},
 +			groups:   []string{strconv.Itoa( 1<<31 -1 )},
 			expected: nil,
 			hasError: true,
 		})
--- a/debian/patches/test--skip-Hugetlb.patch
+++ b/debian/patches/test--skip-Hugetlb.patch
@ -0,0 +1,48 @@
 From: Dmitry Smirnov <onlyjob@debian.org>
 Date: Thu, 28 Jul 2022 16:28:22 +0800
 Subject: disabled unreliable tests due to random failures on [ppc64el,
 s390x].
 Last-Update: 2018-09-27
 Forwarded: not-needed
 Bug-Upstream: https://github.com/opencontainers/runc/issues/1822
 ---
 libcontainer/cgroups/fs/hugetlb_test.go | 4 ++++
 1 file changed, 4 insertions(+)
 diff --git a/libcontainer/cgroups/fs/hugetlb_test.go b/libcontainer/cgroups/fs/hugetlb_test.go
 index 9ddacfe..9b60650 100644
 --- a/libcontainer/cgroups/fs/hugetlb_test.go
 +++ b/libcontainer/cgroups/fs/hugetlb_test.go
@@ -89,6 +89,7 @@ func TestHugetlbStats(t *testing.T) {
 }
 func TestHugetlbStatsNoUsageFile(t *testing.T) {
 +t.Skip("Disabled unreliable test")
 	helper := NewCgroupTestUtil("hugetlb", t)
 	defer helper.cleanup()
 	helper.writeFileContents(map[string]string{
@@ -104,6 +105,7 @@ func TestHugetlbStatsNoUsageFile(t *testing.T) {
 }
 func TestHugetlbStatsNoMaxUsageFile(t *testing.T) {
 +t.Skip("Disabled unreliable test")
 	helper := NewCgroupTestUtil("hugetlb", t)
 	defer helper.cleanup()
 	for _, pageSize := range HugePageSizes {
@@ -121,6 +123,7 @@ func TestHugetlbStatsNoMaxUsageFile(t *testing.T) {
 }
 func TestHugetlbStatsBadUsageFile(t *testing.T) {
 +t.Skip("Disabled unreliable test")
 	helper := NewCgroupTestUtil("hugetlb", t)
 	defer helper.cleanup()
 	for _, pageSize := range HugePageSizes {
@@ -139,6 +142,7 @@ func TestHugetlbStatsBadUsageFile(t *testing.T) {
 }
 func TestHugetlbStatsBadMaxUsageFile(t *testing.T) {
 +t.Skip("Disabled unreliable test")
 	helper := NewCgroupTestUtil("hugetlb", t)
 	defer helper.cleanup()
 	helper.writeFileContents(map[string]string{
--- a/debian/patches/test--skip_TestFactoryNewTmpfs.patch
+++ b/debian/patches/test--skip_TestFactoryNewTmpfs.patch
@ -0,0 +1,22 @@
 From: Dmitry Smirnov <onlyjob@debian.org>
 Date: Thu, 28 Jul 2022 16:28:22 +0800
 Subject: disable test (requires root)
 Last-Update: 2018-06-15
 Forwarded: not-needed
 ---
 libcontainer/factory_linux_test.go | 1 +
 1 file changed, 1 insertion(+)
 diff --git a/libcontainer/factory_linux_test.go b/libcontainer/factory_linux_test.go
 index 8d0ca8a..1dc0180 100644
 --- a/libcontainer/factory_linux_test.go
 +++ b/libcontainer/factory_linux_test.go
@@ -78,6 +78,7 @@ func TestFactoryNewIntelRdt(t *testing.T) {
 }
 func TestFactoryNewTmpfs(t *testing.T) {
 +t.Skip("DM - skipping privileged test")
 	root, rerr := newTestRoot()
 	if rerr != nil {
 		t.Fatal(rerr)
--- a/delete.go
+++ b/delete.go
@ -1,12 +1,10 @@
 // +build !solaris
 package main
 import (
 	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
 	"syscall"
 	"time"
 	"github.com/opencontainers/runc/libcontainer"
@ -19,12 +17,12 @@ func killContainer(container libcontainer.Container) error {
 	_ = container.Signal(unix.SIGKILL, false)
 	for i := 0; i < 100; i++ {
 		time.Sleep(100 * time.Millisecond)
-		if err := container.Signal(syscall.Signal(0), false); err != nil {
+		if err := container.Signal(unix.Signal(0), false); err != nil {
 			destroy(container)
 			return nil
 		}
 	}
-	return fmt.Errorf("container init still running")
+	return errors.New("container init still running")
 }
 var deleteCommand = cli.Command{
@ -55,7 +53,7 @@ status of "ubuntu01" as "stopped" the following will delete resources held for
 		force := context.Bool("force")
 		container, err := getContainer(context)
 		if err != nil {
-			if lerr, ok := err.(libcontainer.Error); ok && lerr.Code() == libcontainer.ContainerNotExists {
+			if errors.Is(err, libcontainer.ErrNotExist) {
 				// if there was an aborted start or something of the sort then the container's directory could exist but
 				// libcontainer does not see it because the state.json file inside that directory was never created.
 				path := filepath.Join(context.GlobalString("root"), id)
@ -81,7 +79,7 @@ status of "ubuntu01" as "stopped" the following will delete resources held for
 			if force {
 				return killContainer(container)
 			}
-			return fmt.Errorf("cannot delete container %s that is not stopped: %s\n", id, s)
+			return fmt.Errorf("cannot delete container %s that is not stopped: %s", id, s)
 		}
 		return nil
--- a/docs/Security-Audit.pdf
+++ b/docs/Security-Audit.pdf
--- a/docs/cgroup-v2.md
+++ b/docs/cgroup-v2.md
@ -0,0 +1,62 @@
 # cgroup v2
 runc fully supports cgroup v2 (unified mode) since v1.0.0-rc93.
 To use cgroup v2, you might need to change the configuration of the host init system.
 Fedora (>= 31) uses cgroup v2 by default and no extra configuration is required.
 On other systemd-based distros, cgroup v2 can be enabled by adding `systemd.unified_cgroup_hierarchy=1` to the kernel cmdline.
 ## Am I using cgroup v2?
 Yes if `/sys/fs/cgroup/cgroup.controllers` is present.
 ## Host Requirements
 ### Kernel
 * Recommended version: 5.2 or later
 * Minimum version: 4.15
 Kernel older than 5.2 is not recommended due to lack of freezer.
 Notably, kernel older than 4.15 MUST NOT be used (unless you are running containers with user namespaces), as it lacks support for controlling permissions of devices.
 ### Systemd
 On cgroup v2 hosts, it is highly recommended to run runc with the systemd cgroup driver (`runc --systemd-cgroup`), though not mandatory.
 The recommended systemd version is 244 or later. Older systemd does not support delegation of `cpuset` controller.
 Make sure you also have the `dbus-user-session` (Debian/Ubuntu) or `dbus-daemon` (CentOS/Fedora) package installed, and that `dbus` is running. On Debian-flavored distros, this can be accomplished like so:
 ```console
 $ sudo apt install -y dbus-user-session
 $ systemctl --user start dbus
 ```
 ## Rootless
 On cgroup v2 hosts, rootless runc can talk to systemd to get cgroup permissions to be delegated.
 ```console
 $ runc spec --rootless
 $ jq '.linux.cgroupsPath="user.slice:runc:foo"' config.json | sponge config.json
 $ runc --systemd-cgroup run foo
 ```
 The container processes are executed in a cgroup like `/user.slice/user-$(id -u).slice/user@$(id -u).service/user.slice/runc-foo.scope`.
 ### Configuring delegation
 Typically, only `memory` and `pids` controllers are delegated to non-root users by default.
 ```console
 $ cat /sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/cgroup.controllers
 memory pids
 ```
 To allow delegation of other controllers, you need to change the systemd configuration as follows:
 ```console
 # mkdir -p /etc/systemd/system/user@.service.d
 # cat > /etc/systemd/system/user@.service.d/delegate.conf << EOF
 [Service]
 Delegate=cpu cpuset io memory pids
 EOF
 # systemctl daemon-reload
 ```
--- a/docs/experimental.md
+++ b/docs/experimental.md
@ -0,0 +1,11 @@
 # Experimental features
 The following features are experimental and subject to change:
 - The `runc features` command (since runc v1.1.0)
 The following features were experimental in the past:
 Feature                                  | Experimental release | Graduation release
 ---------------------------------------- | -------------------- | ------------------
 cgroup v2                                | v1.0.0-rc91          | v1.0.0-rc93
--- a/docs/systemd.md
+++ b/docs/systemd.md
@ -0,0 +1,130 @@
 ## systemd cgroup driver
 By default, runc creates cgroups and sets cgroup limits on its own (this mode
 is known as fs cgroup driver). When `--systemd-cgroup` global option is given
 (as in e.g. `runc --systemd-cgroup run ...`), runc switches to systemd cgroup
 driver. This document describes its features and peculiarities.
 ### systemd unit name and placement
 When creating a container, runc requests systemd (over dbus) to create
 a transient unit for the container, and place it into a specified slice.
 The name of the unit and the containing slice is derived from the container
 runtime spec in the following way:
 1. If `Linux.CgroupsPath` is set, it is expected to be in the form
   `[slice]:[prefix]:[name]`.
   Here `slice` is a systemd slice under which the container is placed.
   If empty, it defaults to `system.slice`, except when cgroup v2 is
   used and rootless container is created, in which case it defaults
   to `user.slice`.
   Note that `slice` can contain dashes to denote a sub-slice
   (e.g. `user-1000.slice` is a correct notation, meaning a subslice
   of `user.slice`), but it must not contain slashes (e.g.
   `user.slice/user-1000.slice` is invalid).
   A `slice` of `-` represents a root slice.
   Next, `prefix` and `name` are used to compose the  unit name, which
   is `<prefix>-<name>.scope`, unless `name` has `.slice` suffix, in
   which case `prefix` is ignored and the `name` is used as is.
 2. If `Linux.CgroupsPath` is not set or empty, it works the same way as if it
   would be set to `:runc:<container-id>`. See the description above to see
   what it transforms to.
 As described above, a unit being created can either be a scope or a slice.
 For a scope, runc specifies its parent slice via a _Slice=_ systemd property,
 and also sets _Delegate=true_. For a slice, runc specifies a weak dependency on
 the parent slice via a _Wants=_ property.
 ### Resource limits
 runc always enables accounting for all controllers, regardless of any limits
 being set. This means it unconditionally sets the following properties for the
 systemd unit being created:
 * _CPUAccounting=true_
 * _IOAccounting=true_ (_BlockIOAccounting_ for cgroup v1)
 * _MemoryAccounting=true_
 * _TasksAccounting=true_
 The resource limits of the systemd unit are set by runc by translating the
 runtime spec resources to systemd unit properties.
 Such translation is by no means complete, as there are some cgroup properties
 that can not be set via systemd.  Therefore, runc systemd cgroup driver is
 backed by fs driver (in other words, cgroup limits are first set via systemd
 unit properties, and when by writing to cgroupfs files).
 The set of runtime spec resources which is translated by runc to systemd unit
 properties depends on kernel cgroup version being used (v1 or v2), and on the
 systemd version being run. If an older systemd version (which does not support
 some resources) is used, runc do not set those resources.
 The following tables summarize which properties are translated.
 #### cgroup v1
 | runtime spec resource | systemd property name | min systemd version |
 |-----------------------|-----------------------|---------------------|
 | memory.limit          | MemoryLimit           |                     |
 | cpu.shares            | CPUShares             |                     |
 | blockIO.weight        | BlockIOWeight         |                     |
 | pids.limit            | TasksMax              |                     |
 | cpu.cpus              | AllowedCPUs           | v244                |
 | cpu.mems              | AllowedMemoryNodes    | v244                |
 #### cgroup v2
 | runtime spec resource   | systemd property name | min systemd version |
 |-------------------------|-----------------------|---------------------|
 | memory.limit            | MemoryMax             |                     |
 | memory.reservation      | MemoryLow             |                     |
 | memory.swap             | MemorySwapMax         |                     |
 | cpu.shares              | CPUWeight             |                     |
 | pids.limit              | TasksMax              |                     |
 | cpu.cpus                | AllowedCPUs           | v244                |
 | cpu.mems                | AllowedMemoryNodes    | v244                |
 | unified.cpu.max         | CPUQuota, CPUQuotaPeriodSec | v242          |
 | unified.cpu.weight      | CPUWeight             |                     |
 | unified.cpuset.cpus     | AllowedCPUs           | v244                |
 | unified.cpuset.mems     | AllowedMemoryNodes    | v244                |
 | unified.memory.high     | MemoryHigh            |                     |
 | unified.memory.low      | MemoryLow             |                     |
 | unified.memory.min      | MemoryMin             |                     |
 | unified.memory.max      | MemoryMax             |                     |
 | unified.memory.swap.max | MemorySwapMax         |                     |
 | unified.pids.max        | TasksMax              |                     |
 For documentation on systemd unit resource properties, see
 `systemd.resource-control(5)` man page.
 ### Auxiliary properties
 Auxiliary properties of a systemd unit (as shown by `systemctl show
 <unit-name>` after the container is created) can be set (or overwritten) by
 adding annotations to the container runtime spec (`config.json`).
 For example:
 ```json
        "annotations": {
                "org.systemd.property.TimeoutStopUSec": "uint64 123456789",
                "org.systemd.property.CollectMode":"'inactive-or-failed'"
        },
 ```
 The above will set the following properties:
 * `TimeoutStopSec` to 2 minutes and 3 seconds;
 * `CollectMode` to "inactive-or-failed".
 The values must be in the gvariant format (for details, see
 [gvariant documentation](https://developer.gnome.org/glib/stable/gvariant-text.html)).
 To find out which type systemd expects for a particular parameter, please
 consult systemd sources.
--- a/docs/terminals.md
+++ b/docs/terminals.md
@ -113,6 +113,33 @@ interact with pseudo-terminal `stdio`][tty_ioctl(4)].
 > means that it is not really possible to uniquely distinguish between `stdout`
 > and `stderr` from the caller's perspective.
 #### Issues
 If you see an error like
 ```
 open /dev/tty: no such device or address
 ```
 from runc, it means it can't open a terminal (because there isn't one). This
 can happen when stdin (and possibly also stdout and stderr) are redirected,
 or in some environments that lack a tty (such as GitHub Actions runners).
 The solution to this is to *not* use a terminal for the container, i.e. have
 `terminal: false` in `config.json`. If the container really needs a terminal
 (some programs require one), you can provide one, using one of the following
 methods.
 One way is to use `ssh` with the `-tt` flag. The second `t` forces a terminal
 allocation even if there's no local one -- and so it is required when stdin is
 not a terminal (some `ssh` implementations only look for a terminal on stdin).
 Another way is to run runc under the `script` utility, like this
 ```console
 $ script -e -c 'runc run <container>'
 ```
 [tty_ioctl(4)]: https://linux.die.net/man/4/tty_ioctl
 ### <a name="pass-through"> Pass-Through ###
@ -124,7 +151,7 @@ passing of file descriptors -- [details below](#runc-modes)). As an example
 (assuming that `terminal: false` is set in `config.json`):
 ```
-% echo input | runc run some_container > /tmp/log.out 2>& /tmp/log.err
+% echo input | runc run some_container > /tmp/log.out 2> /tmp/log.err
 ```
 Here the container's various `stdio` file descriptors will be substituted with
@ -228,6 +255,19 @@ Unfortunately using detached mode is a bit more complicated and requires more
 care than the foreground mode -- mainly because it is now up to the caller to
 handle the `stdio` of the container.
 Another complication is that the parent process is responsible for acting as
 the subreaper for the container. In short, you need to call
 `prctl(PR_SET_CHILD_SUBREAPER, 1, ...)` in the parent process and correctly
 handle the implications of being a subreaper. Failing to do so may result in
 zombie processes being accumulated on your host.
 These tasks are usually performed by a dedicated (and minimal) monitor process
 per-container. For the sake of comparison, other runtimes such as LXC do not
 have an equivalent detached mode and instead integrate this monitor process
 into the container runtime itself -- this has several tradeoffs, and runc has
 opted to support delegating the monitoring responsibility to the parent process
 through this detached mode.
 #### Detached Pass-Through ####
 In detached mode, pass-through actually does what it says on the tin -- the
--- a/events.go
+++ b/events.go
@ -1,9 +1,8 @@
 // +build linux
 package main
 import (
 	"encoding/json"
 	"errors"
 	"fmt"
 	"os"
 	"sync"
@ -40,7 +39,7 @@ information is displayed once every 5 seconds.`,
 		}
 		duration := context.Duration("interval")
 		if duration <= 0 {
-			return fmt.Errorf("duration interval must be greater than 0")
+			return errors.New("duration interval must be greater than 0")
 		}
 		status, err := container.Status()
 		if err != nil {
@ -125,10 +124,14 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
 	s.CPU.Usage.User = cg.CpuStats.CpuUsage.UsageInUsermode
 	s.CPU.Usage.Total = cg.CpuStats.CpuUsage.TotalUsage
 	s.CPU.Usage.Percpu = cg.CpuStats.CpuUsage.PercpuUsage
 	s.CPU.Usage.PercpuKernel = cg.CpuStats.CpuUsage.PercpuUsageInKernelmode
 	s.CPU.Usage.PercpuUser = cg.CpuStats.CpuUsage.PercpuUsageInUsermode
 	s.CPU.Throttling.Periods = cg.CpuStats.ThrottlingData.Periods
 	s.CPU.Throttling.ThrottledPeriods = cg.CpuStats.ThrottlingData.ThrottledPeriods
 	s.CPU.Throttling.ThrottledTime = cg.CpuStats.ThrottlingData.ThrottledTime
 	s.CPUSet = types.CPUSet(cg.CPUSetStats)
 	s.Memory.Cache = cg.MemoryStats.Cache
 	s.Memory.Kernel = convertMemoryEntry(cg.MemoryStats.KernelUsage)
 	s.Memory.KernelTCP = convertMemoryEntry(cg.MemoryStats.KernelTCPUsage)
@ -151,16 +154,22 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
 	}
 	if is := ls.IntelRdtStats; is != nil {
-		if intelrdt.IsCatEnabled() {
+		if intelrdt.IsCATEnabled() {
 			s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
 			s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
 			s.IntelRdt.L3CacheSchema = is.L3CacheSchema
 		}
-		if intelrdt.IsMbaEnabled() {
+		if intelrdt.IsMBAEnabled() {
 			s.IntelRdt.MemBwInfo = convertMemBwInfo(is.MemBwInfo)
 			s.IntelRdt.MemBwSchemaRoot = is.MemBwSchemaRoot
 			s.IntelRdt.MemBwSchema = is.MemBwSchema
 		}
 		if intelrdt.IsMBMEnabled() {
 			s.IntelRdt.MBMStats = is.MBMStats
 		}
 		if intelrdt.IsCMTEnabled() {
 			s.IntelRdt.CMTStats = is.CMTStats
 		}
 	}
 	s.NetworkInterfaces = ls.Interfaces
@ -187,29 +196,17 @@ func convertMemoryEntry(c cgroups.MemoryData) types.MemoryEntry {
 func convertBlkioEntry(c []cgroups.BlkioStatEntry) []types.BlkioEntry {
 	var out []types.BlkioEntry
 	for _, e := range c {
-		out = append(out, types.BlkioEntry{
+		out = append(out, types.BlkioEntry(e))
 			Major: e.Major,
 			Minor: e.Minor,
 			Op:    e.Op,
 			Value: e.Value,
 		})
 	}
 	return out
 }
 func convertL3CacheInfo(i *intelrdt.L3CacheInfo) *types.L3CacheInfo {
-	return &types.L3CacheInfo{
+	ci := types.L3CacheInfo(*i)
-		CbmMask:    i.CbmMask,
+	return &ci
 		MinCbmBits: i.MinCbmBits,
 		NumClosids: i.NumClosids,
 	}
 }
 func convertMemBwInfo(i *intelrdt.MemBwInfo) *types.MemBwInfo {
-	return &types.MemBwInfo{
+	mi := types.MemBwInfo(*i)
-		BandwidthGran: i.BandwidthGran,
+	return &mi
 		DelayLinear:   i.DelayLinear,
 		MinBandwidth:  i.MinBandwidth,
 		NumClosids:    i.NumClosids,
 	}
 }
--- a/exec.go
+++ b/exec.go
@ -1,9 +1,8 @@
 // +build linux
 package main
 import (
 	"encoding/json"
 	"errors"
 	"fmt"
 	"os"
 	"strconv"
@ -84,15 +83,18 @@ following will output a list of processes running in the container:
 			Value: &cli.StringSlice{},
 			Usage: "add a capability to the bounding set for the process",
 		},
 		cli.BoolFlag{
 			Name:   "no-subreaper",
 			Usage:  "disable the use of the subreaper used to reap reparented processes",
 			Hidden: true,
 		},
 		cli.IntFlag{
 			Name:  "preserve-fds",
 			Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)",
 		},
 		cli.StringSliceFlag{
 			Name:  "cgroup",
 			Usage: "run the process in an (existing) sub-cgroup(s). Format is [<controller>:]<cgroup>.",
 		},
 		cli.BoolFlag{
 			Name:  "ignore-paused",
 			Usage: "allow exec in a paused container",
 		},
 	},
 	Action: func(context *cli.Context) error {
 		if err := checkArgs(context, 1, minArgs); err != nil {
@ -105,11 +107,38 @@ following will output a list of processes running in the container:
 		if err == nil {
 			os.Exit(status)
 		}
-		return fmt.Errorf("exec failed: %v", err)
+		fatalWithCode(fmt.Errorf("exec failed: %w", err), 255)
 		return nil // to satisfy the linter
 	},
 	SkipArgReorder: true,
 }
 func getSubCgroupPaths(args []string) (map[string]string, error) {
 	if len(args) == 0 {
 		return nil, nil
 	}
 	paths := make(map[string]string, len(args))
 	for _, c := range args {
 		// Split into controller:path.
 		cs := strings.SplitN(c, ":", 3)
 		if len(cs) > 2 {
 			return nil, fmt.Errorf("invalid --cgroup argument: %s", c)
 		}
 		if len(cs) == 1 { // no controller: prefix
 			if len(args) != 1 {
 				return nil, fmt.Errorf("invalid --cgroup argument: %s (missing <controller>: prefix)", c)
 			}
 			paths[""] = c
 		} else {
 			// There may be a few comma-separated controllers.
 			for _, ctrl := range strings.Split(cs[0], ",") {
 				paths[ctrl] = cs[1]
 			}
 		}
 	}
 	return paths, nil
 }
 func execProcess(context *cli.Context) (int, error) {
 	container, err := getContainer(context)
 	if err != nil {
@ -120,13 +149,15 @@ func execProcess(context *cli.Context) (int, error) {
 		return -1, err
 	}
 	if status == libcontainer.Stopped {
-		return -1, fmt.Errorf("cannot exec a container that has stopped")
+		return -1, errors.New("cannot exec in a stopped container")
 	}
 	if status == libcontainer.Paused && !context.Bool("ignore-paused") {
 		return -1, errors.New("cannot exec in a paused container (use --ignore-paused to override)")
 	}
 	path := context.String("process")
 	if path == "" && len(context.Args()) == 1 {
-		return -1, fmt.Errorf("process args cannot be empty")
+		return -1, errors.New("process args cannot be empty")
 	}
 	detach := context.Bool("detach")
 	state, err := container.State()
 	if err != nil {
 		return -1, err
@ -137,9 +168,9 @@ func execProcess(context *cli.Context) (int, error) {
 		return -1, err
 	}
-	logLevel := "info"
+	cgPaths, err := getSubCgroupPaths(context.StringSlice("cgroup"))
-	if context.GlobalBool("debug") {
+	if err != nil {
-		logLevel = "debug"
+		return -1, err
 	}
 	r := &runner{
@ -147,12 +178,12 @@ func execProcess(context *cli.Context) (int, error) {
 		shouldDestroy:   false,
 		container:       container,
 		consoleSocket:   context.String("console-socket"),
-		detach:          detach,
+		detach:          context.Bool("detach"),
 		pidFile:         context.String("pid-file"),
 		action:          CT_ACT_RUN,
 		init:            false,
 		preserveFDs:     context.Int("preserve-fds"),
-		logLevel:        logLevel,
+		subCgroupPaths:  cgPaths,
 	}
 	return r.run(p)
 }
@ -203,6 +234,7 @@ func getProcess(context *cli.Context, bundle string) (*specs.Process, error) {
 	p.Env = append(p.Env, context.StringSlice("env")...)
 	// set the tty
 	p.Terminal = false
 	if context.IsSet("tty") {
 		p.Terminal = context.Bool("tty")
 	}
@ -215,13 +247,13 @@ func getProcess(context *cli.Context, bundle string) (*specs.Process, error) {
 		if len(u) > 1 {
 			gid, err := strconv.Atoi(u[1])
 			if err != nil {
-				return nil, fmt.Errorf("parsing %s as int for gid failed: %v", u[1], err)
+				return nil, fmt.Errorf("parsing %s as int for gid failed: %w", u[1], err)
 			}
 			p.User.GID = uint32(gid)
 		}
 		uid, err := strconv.Atoi(u[0])
 		if err != nil {
-			return nil, fmt.Errorf("parsing %s as int for uid failed: %v", u[0], err)
+			return nil, fmt.Errorf("parsing %s as int for uid failed: %w", u[0], err)
 		}
 		p.User.UID = uint32(uid)
 	}
--- a/features.go
+++ b/features.go
@ -0,0 +1,75 @@
 package main
 import (
 	"encoding/json"
 	"fmt"
 	"github.com/opencontainers/runc/libcontainer/capabilities"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/seccomp"
 	"github.com/opencontainers/runc/libcontainer/specconv"
 	"github.com/opencontainers/runc/types/features"
 	"github.com/opencontainers/runtime-spec/specs-go"
 	"github.com/urfave/cli"
 )
 var featuresCommand = cli.Command{
 	Name:      "features",
 	Usage:     "show the enabled features",
 	ArgsUsage: "",
 	Description: `Show the enabled features.
   The result is parsable as a JSON.
   See https://pkg.go.dev/github.com/opencontainers/runc/types/features for the type definition.
   The types are experimental and subject to change.
 `,
 	Action: func(context *cli.Context) error {
 		if err := checkArgs(context, 0, exactArgs); err != nil {
 			return err
 		}
 		tru := true
 		feat := features.Features{
 			OCIVersionMin: "1.0.0",
 			OCIVersionMax: specs.Version,
 			Annotations: map[string]string{
 				features.AnnotationRuncVersion:           version,
 				features.AnnotationRuncCommit:            gitCommit,
 				features.AnnotationRuncCheckpointEnabled: "true",
 			},
 			Hooks:        configs.KnownHookNames(),
 			MountOptions: specconv.KnownMountOptions(),
 			Linux: &features.Linux{
 				Namespaces:   specconv.KnownNamespaces(),
 				Capabilities: capabilities.KnownCapabilities(),
 				Cgroup: &features.Cgroup{
 					V1:          &tru,
 					V2:          &tru,
 					Systemd:     &tru,
 					SystemdUser: &tru,
 				},
 				Apparmor: &features.Apparmor{
 					Enabled: &tru,
 				},
 				Selinux: &features.Selinux{
 					Enabled: &tru,
 				},
 			},
 		}
 		if seccomp.Enabled {
 			feat.Linux.Seccomp = &features.Seccomp{
 				Enabled:   &tru,
 				Actions:   seccomp.KnownActions(),
 				Operators: seccomp.KnownOperators(),
 				Archs:     seccomp.KnownArchs(),
 			}
 			major, minor, patch := seccomp.Version()
 			feat.Annotations[features.AnnotationLibseccompVersion] = fmt.Sprintf("%d.%d.%d", major, minor, patch)
 		}
 		enc := json.NewEncoder(context.App.Writer)
 		enc.SetIndent("", "    ")
 		return enc.Encode(feat)
 	},
 }
--- a/go.mod
+++ b/go.mod
@ -0,0 +1,26 @@
 module github.com/opencontainers/runc
 go 1.16
 require (
 	github.com/checkpoint-restore/go-criu/v5 v5.3.0
 	github.com/cilium/ebpf v0.7.0
 	github.com/containerd/console v1.0.3
 	github.com/coreos/go-systemd/v22 v22.3.2
 	github.com/cyphar/filepath-securejoin v0.2.3
 	github.com/docker/go-units v0.4.0
 	github.com/godbus/dbus/v5 v5.0.6
 	github.com/moby/sys/mountinfo v0.5.0
 	github.com/mrunalp/fileutils v0.5.0
 	github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417
 	github.com/opencontainers/selinux v1.10.0
 	github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921
 	github.com/sirupsen/logrus v1.8.1
 	github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635
 	// NOTE: urfave/cli must be <= v1.22.1 due to a regression: https://github.com/urfave/cli/issues/1092
 	github.com/urfave/cli v1.22.1
 	github.com/vishvananda/netlink v1.1.0
 	golang.org/x/net v0.0.0-20201224014010-6772e930b67b
 	golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c
 	google.golang.org/protobuf v1.27.1
 )
--- a/go.sum
+++ b/go.sum
@ -0,0 +1,80 @@
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/checkpoint-restore/go-criu/v5 v5.3.0 h1:wpFFOoomK3389ue2lAb0Boag6XPht5QYpipxmSNL4d8=
 github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E=
 github.com/cilium/ebpf v0.7.0 h1:1k/q3ATgxSXRdrmPfH8d7YK0GfqVsEKZAX9dQZvs56k=
 github.com/cilium/ebpf v0.7.0/go.mod h1:/oI2+1shJiTGAMgl6/RgJr36Eo1jzrRcAWbcXO2usCA=
 github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
 github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
 github.com/coreos/go-systemd/v22 v22.3.2 h1:D9/bQk5vlXQFZ6Kwuu6zaiXJ9oTPe68++AzAJc1DzSI=
 github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
 github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d h1:U+s90UTSYgptZMwQh2aRr3LuazLJIa+Pg3Kc1ylSYVY=
 github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 github.com/cyphar/filepath-securejoin v0.2.3 h1:YX6ebbZCZP7VkM3scTTokDgBL2TY741X51MTk3ycuNI=
 github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
 github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
 github.com/frankban/quicktest v1.11.3 h1:8sXhOn0uLys67V8EsXLc6eszDs8VXWxL3iRvebPhedY=
 github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k=
 github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
 github.com/godbus/dbus/v5 v5.0.6 h1:mkgN1ofwASrYnJ5W6U/BxG15eXXXjirgZc7CLqkcaro=
 github.com/godbus/dbus/v5 v5.0.6/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
 github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
 github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
 github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
 github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
 github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/moby/sys/mountinfo v0.5.0 h1:2Ks8/r6lopsxWi9m58nlwjaeSzUX9iiL1vj5qB/9ObI=
 github.com/moby/sys/mountinfo v0.5.0/go.mod h1:3bMD3Rg+zkqx8MRYPi7Pyb0Ie97QEBmdxbhnCLlSvSU=
 github.com/mrunalp/fileutils v0.5.0 h1:NKzVxiH7eSk+OQ4M+ZYW1K6h27RUV3MI6NUTsHhU6Z4=
 github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
 github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 h1:3snG66yBm59tKhhSPQrQ/0bCrv1LQbKt40LnUPiUxdc=
 github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
 github.com/opencontainers/selinux v1.10.0 h1:rAiKF8hTcgLI3w0DHm6i0ylVVcOrlgR1kK99DRLDhyU=
 github.com/opencontainers/selinux v1.10.0/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
 github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921 h1:58EBmR2dMNL2n/FnbQewK3D14nXr0V9CObDSvMJLq+Y=
 github.com/seccomp/libseccomp-golang v0.9.2-0.20210429002308-3879420cc921/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg=
 github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
 github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
 github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
 github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
 github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
 github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
 github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
 github.com/urfave/cli v1.22.1 h1:+mkCCcOFKPnCmVYVcURKps1Xe+3zP90gSYGNfRkjoIY=
 github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
 github.com/vishvananda/netlink v1.1.0 h1:1iyaYNBLmP6L0220aDnYQpo1QEV4t4hJ+xEEhhJH8j0=
 github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
 github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df h1:OviZH7qLw/7ZovXvuNyL3XQl8UFofeikI1NW1Gypu7k=
 github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
 golang.org/x/net v0.0.0-20201224014010-6772e930b67b h1:iFwSg7t5GZmB/Q5TjiEAsdoLDrdJRC1RiF2WhuV29Qw=
 golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210906170528-6f6e22806c34/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c h1:DHcbWVXeY+0Y8HHKR+rbLwnoh2F4tNCY7rTiHJ30RmA=
 golang.org/x/sys v0.0.0-20211116061358-0a5406a5449c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.27.1 h1:SnqbnDw1V7RiZcXPx5MEeqPv2s79L9i7BJUlG/+RurQ=
 google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
--- a/init.go
+++ b/init.go
@ -1,44 +1,37 @@
 package main
 import (
 	"fmt"
 	"os"
 	"runtime"
 	"strconv"
 	"github.com/opencontainers/runc/libcontainer"
 	"github.com/opencontainers/runc/libcontainer/logs"
 	_ "github.com/opencontainers/runc/libcontainer/nsenter"
 	"github.com/sirupsen/logrus"
 	"github.com/urfave/cli"
 )
 func init() {
 	if len(os.Args) > 1 && os.Args[1] == "init" {
 		// This is the golang entry point for runc init, executed
 		// before main() but after libcontainer/nsenter's nsexec().
 		runtime.GOMAXPROCS(1)
 		runtime.LockOSThread()
-		level := os.Getenv("_LIBCONTAINER_LOGLEVEL")
+		level, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_LOGLEVEL"))
 		logLevel, err := logrus.ParseLevel(level)
 		if err != nil {
-			panic(fmt.Sprintf("libcontainer: failed to parse log level: %q: %v", level, err))
+			panic(err)
 		}
-		err = logs.ConfigureLogging(logs.Config{
+		logPipeFd, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_LOGPIPE"))
 			LogPipeFd: os.Getenv("_LIBCONTAINER_LOGPIPE"),
 			LogFormat: "json",
 			LogLevel:  logLevel,
 		})
 		if err != nil {
-			panic(fmt.Sprintf("libcontainer: failed to configure logging: %v", err))
+			panic(err)
 		}
 		logrus.SetLevel(logrus.Level(level))
 		logrus.SetOutput(os.NewFile(uintptr(logPipeFd), "logpipe"))
 		logrus.SetFormatter(new(logrus.JSONFormatter))
 		logrus.Debug("child process in init()")
 	}
 }
 var initCommand = cli.Command{
 	Name:  "init",
 	Usage: `initialize the namespaces and launch the process (do not call it outside of runc)`,
 	Action: func(context *cli.Context) error {
 		factory, _ := libcontainer.New("")
 		if err := factory.StartInitialization(); err != nil {
 			// as the error is sent back to the parent there is no need to log
@ -46,5 +39,5 @@ var initCommand = cli.Command{
 			os.Exit(1)
 		}
 		panic("libcontainer: container init failed to exec")
-	},
+	}
 }
--- a/kill.go
+++ b/kill.go
@ -1,14 +1,12 @@
 // +build linux
 package main
 import (
 	"fmt"
 	"strconv"
 	"strings"
 	"syscall"
 	"github.com/urfave/cli"
 	"golang.org/x/sys/unix"
 )
 var killCommand = cli.Command{
@ -22,7 +20,7 @@ Where "<container-id>" is the name for the instance of the container and
 EXAMPLE:
 For example, if the container id is "ubuntu01" the following will send a "KILL"
 signal to the init process of the "ubuntu01" container:
-	 
+
       # runc kill ubuntu01 KILL`,
 	Flags: []cli.Flag{
 		cli.BoolFlag{
@ -55,13 +53,17 @@ signal to the init process of the "ubuntu01" container:
 	},
 }
-func parseSignal(rawSignal string) (syscall.Signal, error) {
+func parseSignal(rawSignal string) (unix.Signal, error) {
 	s, err := strconv.Atoi(rawSignal)
 	if err == nil {
-		return syscall.Signal(s), nil
+		return unix.Signal(s), nil
 	}
-	signal, ok := signalMap[strings.TrimPrefix(strings.ToUpper(rawSignal), "SIG")]
+	sig := strings.ToUpper(rawSignal)
-	if !ok {
+	if !strings.HasPrefix(sig, "SIG") {
 		sig = "SIG" + sig
 	}
 	signal := unix.SignalNum(sig)
 	if signal == 0 {
 		return -1, fmt.Errorf("unknown signal %q", rawSignal)
 	}
 	return signal, nil
--- a/libcontainer/README.md
+++ b/libcontainer/README.md
@ -57,90 +57,94 @@ struct describing how the container is to be created. A sample would look simila
 ```go
 defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
 var devices []*configs.DeviceRule
 for _, device := range specconv.AllowedDevices {
 	devices = append(devices, &device.Rule)
 }
 config := &configs.Config{
 	Rootfs: "/your/path/to/rootfs",
 	Capabilities: &configs.Capabilities{
-                Bounding: []string{
+		Bounding: []string{
-                        "CAP_CHOWN",
+			"CAP_CHOWN",
-                        "CAP_DAC_OVERRIDE",
+			"CAP_DAC_OVERRIDE",
-                        "CAP_FSETID",
+			"CAP_FSETID",
-                        "CAP_FOWNER",
+			"CAP_FOWNER",
-                        "CAP_MKNOD",
+			"CAP_MKNOD",
-                        "CAP_NET_RAW",
+			"CAP_NET_RAW",
-                        "CAP_SETGID",
+			"CAP_SETGID",
-                        "CAP_SETUID",
+			"CAP_SETUID",
-                        "CAP_SETFCAP",
+			"CAP_SETFCAP",
-                        "CAP_SETPCAP",
+			"CAP_SETPCAP",
-                        "CAP_NET_BIND_SERVICE",
+			"CAP_NET_BIND_SERVICE",
-                        "CAP_SYS_CHROOT",
+			"CAP_SYS_CHROOT",
-                        "CAP_KILL",
+			"CAP_KILL",
-                        "CAP_AUDIT_WRITE",
+			"CAP_AUDIT_WRITE",
-                },
+		},
-                Effective: []string{
+		Effective: []string{
-                        "CAP_CHOWN",
+			"CAP_CHOWN",
-                        "CAP_DAC_OVERRIDE",
+			"CAP_DAC_OVERRIDE",
-                        "CAP_FSETID",
+			"CAP_FSETID",
-                        "CAP_FOWNER",
+			"CAP_FOWNER",
-                        "CAP_MKNOD",
+			"CAP_MKNOD",
-                        "CAP_NET_RAW",
+			"CAP_NET_RAW",
-                        "CAP_SETGID",
+			"CAP_SETGID",
-                        "CAP_SETUID",
+			"CAP_SETUID",
-                        "CAP_SETFCAP",
+			"CAP_SETFCAP",
-                        "CAP_SETPCAP",
+			"CAP_SETPCAP",
-                        "CAP_NET_BIND_SERVICE",
+			"CAP_NET_BIND_SERVICE",
-                        "CAP_SYS_CHROOT",
+			"CAP_SYS_CHROOT",
-                        "CAP_KILL",
+			"CAP_KILL",
-                        "CAP_AUDIT_WRITE",
+			"CAP_AUDIT_WRITE",
-                },
+		},
-                Inheritable: []string{
+		Inheritable: []string{
-                        "CAP_CHOWN",
+			"CAP_CHOWN",
-                        "CAP_DAC_OVERRIDE",
+			"CAP_DAC_OVERRIDE",
-                        "CAP_FSETID",
+			"CAP_FSETID",
-                        "CAP_FOWNER",
+			"CAP_FOWNER",
-                        "CAP_MKNOD",
+			"CAP_MKNOD",
-                        "CAP_NET_RAW",
+			"CAP_NET_RAW",
-                        "CAP_SETGID",
+			"CAP_SETGID",
-                        "CAP_SETUID",
+			"CAP_SETUID",
-                        "CAP_SETFCAP",
+			"CAP_SETFCAP",
-                        "CAP_SETPCAP",
+			"CAP_SETPCAP",
-                        "CAP_NET_BIND_SERVICE",
+			"CAP_NET_BIND_SERVICE",
-                        "CAP_SYS_CHROOT",
+			"CAP_SYS_CHROOT",
-                        "CAP_KILL",
+			"CAP_KILL",
-                        "CAP_AUDIT_WRITE",
+			"CAP_AUDIT_WRITE",
-                },
+		},
-                Permitted: []string{
+		Permitted: []string{
-                        "CAP_CHOWN",
+			"CAP_CHOWN",
-                        "CAP_DAC_OVERRIDE",
+			"CAP_DAC_OVERRIDE",
-                        "CAP_FSETID",
+			"CAP_FSETID",
-                        "CAP_FOWNER",
+			"CAP_FOWNER",
-                        "CAP_MKNOD",
+			"CAP_MKNOD",
-                        "CAP_NET_RAW",
+			"CAP_NET_RAW",
-                        "CAP_SETGID",
+			"CAP_SETGID",
-                        "CAP_SETUID",
+			"CAP_SETUID",
-                        "CAP_SETFCAP",
+			"CAP_SETFCAP",
-                        "CAP_SETPCAP",
+			"CAP_SETPCAP",
-                        "CAP_NET_BIND_SERVICE",
+			"CAP_NET_BIND_SERVICE",
-                        "CAP_SYS_CHROOT",
+			"CAP_SYS_CHROOT",
-                        "CAP_KILL",
+			"CAP_KILL",
-                        "CAP_AUDIT_WRITE",
+			"CAP_AUDIT_WRITE",
-                },
+		},
-                Ambient: []string{
+		Ambient: []string{
-                        "CAP_CHOWN",
+			"CAP_CHOWN",
-                        "CAP_DAC_OVERRIDE",
+			"CAP_DAC_OVERRIDE",
-                        "CAP_FSETID",
+			"CAP_FSETID",
-                        "CAP_FOWNER",
+			"CAP_FOWNER",
-                        "CAP_MKNOD",
+			"CAP_MKNOD",
-                        "CAP_NET_RAW",
+			"CAP_NET_RAW",
-                        "CAP_SETGID",
+			"CAP_SETGID",
-                        "CAP_SETUID",
+			"CAP_SETUID",
-                        "CAP_SETFCAP",
+			"CAP_SETFCAP",
-                        "CAP_SETPCAP",
+			"CAP_SETPCAP",
-                        "CAP_NET_BIND_SERVICE",
+			"CAP_NET_BIND_SERVICE",
-                        "CAP_SYS_CHROOT",
+			"CAP_SYS_CHROOT",
-                        "CAP_KILL",
+			"CAP_KILL",
-                        "CAP_AUDIT_WRITE",
+			"CAP_AUDIT_WRITE",
-                },
+		},
-        },
+	},
 	Namespaces: configs.Namespaces([]configs.Namespace{
 		{Type: configs.NEWNS},
 		{Type: configs.NEWUTS},
@ -155,8 +159,7 @@ config := &configs.Config{
 		Parent: "system",
 		Resources: &configs.Resources{
 			MemorySwappiness: nil,
-			AllowAllDevices:  nil,
+			Devices:          devices,
 			AllowedDevices:   configs.DefaultAllowedDevices,
 		},
 	},
 	MaskPaths: []string{
@ -166,7 +169,7 @@ config := &configs.Config{
 	ReadonlyPaths: []string{
 		"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
 	},
-	Devices:  configs.DefaultAutoCreatedDevices,
+	Devices:  specconv.AllowedDevices,
 	Hostname: "testing",
 	Mounts: []*configs.Mount{
 		{
@ -314,7 +317,7 @@ state, err := container.State()
 #### Checkpoint & Restore
 libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers.
-This let's you save the state of a process running inside a container to disk, and then restore
+This lets you save the state of a process running inside a container to disk, and then restore
 that state into a new process, on the same machine or on another machine.
 `criu` version 1.5.2 or higher is required to use checkpoint and restore.
--- a/libcontainer/apparmor/apparmor.go
+++ b/libcontainer/apparmor/apparmor.go
@ -1,60 +1,16 @@
 // +build apparmor,linux
 package apparmor
-import (
+import "errors"
 	"fmt"
 	"io/ioutil"
 	"os"
-	"github.com/opencontainers/runc/libcontainer/utils"
+var (
 	// IsEnabled returns true if apparmor is enabled for the host.
 	IsEnabled = isEnabled
 	// ApplyProfile will apply the profile with the specified name to the process after
 	// the next exec. It is only supported on Linux and produces an ErrApparmorNotEnabled
 	// on other platforms.
 	ApplyProfile = applyProfile
 	// ErrApparmorNotEnabled indicates that AppArmor is not enabled or not supported.
 	ErrApparmorNotEnabled = errors.New("apparmor: config provided but apparmor not supported")
 )
 // IsEnabled returns true if apparmor is enabled for the host.
 func IsEnabled() bool {
 	if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil && os.Getenv("container") == "" {
 		if _, err = os.Stat("/sbin/apparmor_parser"); err == nil {
 			buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled")
 			return err == nil && len(buf) > 1 && buf[0] == 'Y'
 		}
 	}
 	return false
 }
 func setProcAttr(attr, value string) error {
 	// Under AppArmor you can only change your own attr, so use /proc/self/
 	// instead of /proc/<tid>/ like libapparmor does
 	path := fmt.Sprintf("/proc/self/attr/%s", attr)
 	f, err := os.OpenFile(path, os.O_WRONLY, 0)
 	if err != nil {
 		return err
 	}
 	defer f.Close()
 	if err := utils.EnsureProcHandle(f); err != nil {
 		return err
 	}
 	_, err = fmt.Fprintf(f, "%s", value)
 	return err
 }
 // changeOnExec reimplements aa_change_onexec from libapparmor in Go
 func changeOnExec(name string) error {
 	value := "exec " + name
 	if err := setProcAttr("exec", value); err != nil {
 		return fmt.Errorf("apparmor failed to apply profile: %s", err)
 	}
 	return nil
 }
 // ApplyProfile will apply the profile with the specified name to the process after
 // the next exec.
 func ApplyProfile(name string) error {
 	if name == "" {
 		return nil
 	}
 	return changeOnExec(name)
 }
--- a/libcontainer/apparmor/apparmor_disabled.go
+++ b/libcontainer/apparmor/apparmor_disabled.go
@ -1,20 +0,0 @@
 // +build !apparmor !linux
 package apparmor
 import (
 	"errors"
 )
 var ErrApparmorNotEnabled = errors.New("apparmor: config provided but apparmor not supported")
 func IsEnabled() bool {
 	return false
 }
 func ApplyProfile(name string) error {
 	if name != "" {
 		return ErrApparmorNotEnabled
 	}
 	return nil
 }
--- a/libcontainer/apparmor/apparmor_linux.go
+++ b/libcontainer/apparmor/apparmor_linux.go
@ -0,0 +1,68 @@
 package apparmor
 import (
 	"errors"
 	"fmt"
 	"os"
 	"sync"
 	"github.com/opencontainers/runc/libcontainer/utils"
 )
 var (
 	appArmorEnabled bool
 	checkAppArmor   sync.Once
 )
 // isEnabled returns true if apparmor is enabled for the host.
 func isEnabled() bool {
 	checkAppArmor.Do(func() {
 		if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil {
 			buf, err := os.ReadFile("/sys/module/apparmor/parameters/enabled")
 			appArmorEnabled = err == nil && len(buf) > 1 && buf[0] == 'Y'
 		}
 	})
 	return appArmorEnabled
 }
 func setProcAttr(attr, value string) error {
 	// Under AppArmor you can only change your own attr, so use /proc/self/
 	// instead of /proc/<tid>/ like libapparmor does
 	attrPath := "/proc/self/attr/apparmor/" + attr
 	if _, err := os.Stat(attrPath); errors.Is(err, os.ErrNotExist) {
 		// fall back to the old convention
 		attrPath = "/proc/self/attr/" + attr
 	}
 	f, err := os.OpenFile(attrPath, os.O_WRONLY, 0)
 	if err != nil {
 		return err
 	}
 	defer f.Close()
 	if err := utils.EnsureProcHandle(f); err != nil {
 		return err
 	}
 	_, err = f.WriteString(value)
 	return err
 }
 // changeOnExec reimplements aa_change_onexec from libapparmor in Go
 func changeOnExec(name string) error {
 	if err := setProcAttr("exec", "exec "+name); err != nil {
 		return fmt.Errorf("apparmor failed to apply profile: %w", err)
 	}
 	return nil
 }
 // applyProfile will apply the profile with the specified name to the process after
 // the next exec. It is only supported on Linux and produces an error on other
 // platforms.
 func applyProfile(name string) error {
 	if name == "" {
 		return nil
 	}
 	return changeOnExec(name)
 }
--- a/libcontainer/apparmor/apparmor_unsupported.go
+++ b/libcontainer/apparmor/apparmor_unsupported.go
@ -0,0 +1,15 @@
 //go:build !linux
 // +build !linux
 package apparmor
 func isEnabled() bool {
 	return false
 }
 func applyProfile(name string) error {
 	if name != "" {
 		return ErrApparmorNotEnabled
 	}
 	return nil
 }
--- a/libcontainer/capabilities/capabilities.go
+++ b/libcontainer/capabilities/capabilities.go
@ -0,0 +1,123 @@
 //go:build linux
 // +build linux
 package capabilities
 import (
 	"sort"
 	"strings"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/sirupsen/logrus"
 	"github.com/syndtr/gocapability/capability"
 )
 const allCapabilityTypes = capability.CAPS | capability.BOUNDING | capability.AMBIENT
 var (
 	capabilityMap map[string]capability.Cap
 	capTypes      = []capability.CapType{
 		capability.BOUNDING,
 		capability.PERMITTED,
 		capability.INHERITABLE,
 		capability.EFFECTIVE,
 		capability.AMBIENT,
 	}
 )
 func init() {
 	capabilityMap = make(map[string]capability.Cap, capability.CAP_LAST_CAP+1)
 	for _, c := range capability.List() {
 		if c > capability.CAP_LAST_CAP {
 			continue
 		}
 		capabilityMap["CAP_"+strings.ToUpper(c.String())] = c
 	}
 }
 // KnownCapabilities returns the list of the known capabilities.
 // Used by `runc features`.
 func KnownCapabilities() []string {
 	list := capability.List()
 	res := make([]string, len(list))
 	for i, c := range list {
 		res[i] = "CAP_" + strings.ToUpper(c.String())
 	}
 	return res
 }
 // New creates a new Caps from the given Capabilities config. Unknown Capabilities
 // or Capabilities that are unavailable in the current environment are ignored,
 // printing a warning instead.
 func New(capConfig *configs.Capabilities) (*Caps, error) {
 	var (
 		err error
 		c   Caps
 	)
 	unknownCaps := make(map[string]struct{})
 	c.caps = map[capability.CapType][]capability.Cap{
 		capability.BOUNDING:    capSlice(capConfig.Bounding, unknownCaps),
 		capability.EFFECTIVE:   capSlice(capConfig.Effective, unknownCaps),
 		capability.INHERITABLE: capSlice(capConfig.Inheritable, unknownCaps),
 		capability.PERMITTED:   capSlice(capConfig.Permitted, unknownCaps),
 		capability.AMBIENT:     capSlice(capConfig.Ambient, unknownCaps),
 	}
 	if c.pid, err = capability.NewPid2(0); err != nil {
 		return nil, err
 	}
 	if err = c.pid.Load(); err != nil {
 		return nil, err
 	}
 	if len(unknownCaps) > 0 {
 		logrus.Warn("ignoring unknown or unavailable capabilities: ", mapKeys(unknownCaps))
 	}
 	return &c, nil
 }
 // capSlice converts the slice of capability names in caps, to their numeric
 // equivalent, and returns them as a slice. Unknown or unavailable capabilities
 // are not returned, but appended to unknownCaps.
 func capSlice(caps []string, unknownCaps map[string]struct{}) []capability.Cap {
 	var out []capability.Cap
 	for _, c := range caps {
 		if v, ok := capabilityMap[c]; !ok {
 			unknownCaps[c] = struct{}{}
 		} else {
 			out = append(out, v)
 		}
 	}
 	return out
 }
 // mapKeys returns the keys of input in sorted order
 func mapKeys(input map[string]struct{}) []string {
 	var keys []string
 	for c := range input {
 		keys = append(keys, c)
 	}
 	sort.Strings(keys)
 	return keys
 }
 // Caps holds the capabilities for a container.
 type Caps struct {
 	pid  capability.Capabilities
 	caps map[capability.CapType][]capability.Cap
 }
 // ApplyBoundingSet sets the capability bounding set to those specified in the whitelist.
 func (c *Caps) ApplyBoundingSet() error {
 	c.pid.Clear(capability.BOUNDING)
 	c.pid.Set(capability.BOUNDING, c.caps[capability.BOUNDING]...)
 	return c.pid.Apply(capability.BOUNDING)
 }
 // Apply sets all the capabilities for the current process in the config.
 func (c *Caps) ApplyCaps() error {
 	c.pid.Clear(allCapabilityTypes)
 	for _, g := range capTypes {
 		c.pid.Set(g, c.caps[g]...)
 	}
 	return c.pid.Apply(allCapabilityTypes)
 }
--- a/libcontainer/capabilities/capabilities_linux_test.go
+++ b/libcontainer/capabilities/capabilities_linux_test.go
@ -0,0 +1,71 @@
 package capabilities
 import (
 	"io"
 	"os"
 	"testing"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/sirupsen/logrus"
 	"github.com/sirupsen/logrus/hooks/test"
 	"github.com/syndtr/gocapability/capability"
 )
 func TestNew(t *testing.T) {
 	cs := []string{"CAP_CHOWN", "CAP_UNKNOWN", "CAP_UNKNOWN2"}
 	conf := configs.Capabilities{
 		Bounding:    cs,
 		Effective:   cs,
 		Inheritable: cs,
 		Permitted:   cs,
 		Ambient:     cs,
 	}
 	hook := test.NewGlobal()
 	defer hook.Reset()
 	logrus.SetOutput(io.Discard)
 	caps, err := New(&conf)
 	logrus.SetOutput(os.Stderr)
 	if err != nil {
 		t.Error(err)
 	}
 	e := hook.AllEntries()
 	if len(e) != 1 {
 		t.Errorf("expected 1 warning, got %d", len(e))
 	}
 	expectedLogs := logrus.Entry{
 		Level:   logrus.WarnLevel,
 		Message: "ignoring unknown or unavailable capabilities: [CAP_UNKNOWN CAP_UNKNOWN2]",
 	}
 	l := hook.LastEntry()
 	if l == nil {
 		t.Fatal("expected a warning, but got none")
 	}
 	if l.Level != expectedLogs.Level {
 		t.Errorf("expected %q, got %q", expectedLogs.Level, l.Level)
 	}
 	if l.Message != expectedLogs.Message {
 		t.Errorf("expected %q, got %q", expectedLogs.Message, l.Message)
 	}
 	if len(caps.caps) != len(capTypes) {
 		t.Errorf("expected %d capability types, got %d: %v", len(capTypes), len(caps.caps), caps.caps)
 	}
 	for _, cType := range capTypes {
 		if i := len(caps.caps[cType]); i != 1 {
 			t.Errorf("expected 1 capability for %s, got %d: %v", cType, i, caps.caps[cType])
 			continue
 		}
 		if caps.caps[cType][0] != capability.CAP_CHOWN {
 			t.Errorf("expected CAP_CHOWN, got %s: ", caps.caps[cType][0])
 			continue
 		}
 	}
 	hook.Reset()
 }
--- a/libcontainer/capabilities/capabilities_unsupported.go
+++ b/libcontainer/capabilities/capabilities_unsupported.go
@ -0,0 +1,4 @@
 //go:build !linux
 // +build !linux
 package capabilities
--- a/libcontainer/capabilities_linux.go
+++ b/libcontainer/capabilities_linux.go
@ -1,117 +0,0 @@
 // +build linux
 package libcontainer
 import (
 	"fmt"
 	"strings"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/syndtr/gocapability/capability"
 )
 const allCapabilityTypes = capability.CAPS | capability.BOUNDS | capability.AMBS
 var capabilityMap map[string]capability.Cap
 func init() {
 	capabilityMap = make(map[string]capability.Cap)
 	last := capability.CAP_LAST_CAP
 	// workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap
 	if last == capability.Cap(63) {
 		last = capability.CAP_BLOCK_SUSPEND
 	}
 	for _, cap := range capability.List() {
 		if cap > last {
 			continue
 		}
 		capKey := fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))
 		capabilityMap[capKey] = cap
 	}
 }
 func newContainerCapList(capConfig *configs.Capabilities) (*containerCapabilities, error) {
 	bounding := []capability.Cap{}
 	for _, c := range capConfig.Bounding {
 		v, ok := capabilityMap[c]
 		if !ok {
 			return nil, fmt.Errorf("unknown capability %q", c)
 		}
 		bounding = append(bounding, v)
 	}
 	effective := []capability.Cap{}
 	for _, c := range capConfig.Effective {
 		v, ok := capabilityMap[c]
 		if !ok {
 			return nil, fmt.Errorf("unknown capability %q", c)
 		}
 		effective = append(effective, v)
 	}
 	inheritable := []capability.Cap{}
 	for _, c := range capConfig.Inheritable {
 		v, ok := capabilityMap[c]
 		if !ok {
 			return nil, fmt.Errorf("unknown capability %q", c)
 		}
 		inheritable = append(inheritable, v)
 	}
 	permitted := []capability.Cap{}
 	for _, c := range capConfig.Permitted {
 		v, ok := capabilityMap[c]
 		if !ok {
 			return nil, fmt.Errorf("unknown capability %q", c)
 		}
 		permitted = append(permitted, v)
 	}
 	ambient := []capability.Cap{}
 	for _, c := range capConfig.Ambient {
 		v, ok := capabilityMap[c]
 		if !ok {
 			return nil, fmt.Errorf("unknown capability %q", c)
 		}
 		ambient = append(ambient, v)
 	}
 	pid, err := capability.NewPid2(0)
 	if err != nil {
 		return nil, err
 	}
 	err = pid.Load()
 	if err != nil {
 		return nil, err
 	}
 	return &containerCapabilities{
 		bounding:    bounding,
 		effective:   effective,
 		inheritable: inheritable,
 		permitted:   permitted,
 		ambient:     ambient,
 		pid:         pid,
 	}, nil
 }
 type containerCapabilities struct {
 	pid         capability.Capabilities
 	bounding    []capability.Cap
 	effective   []capability.Cap
 	inheritable []capability.Cap
 	permitted   []capability.Cap
 	ambient     []capability.Cap
 }
 // ApplyBoundingSet sets the capability bounding set to those specified in the whitelist.
 func (c *containerCapabilities) ApplyBoundingSet() error {
 	c.pid.Clear(capability.BOUNDS)
 	c.pid.Set(capability.BOUNDS, c.bounding...)
 	return c.pid.Apply(capability.BOUNDS)
 }
 // Apply sets all the capabilities for the current process in the config.
 func (c *containerCapabilities) ApplyCaps() error {
 	c.pid.Clear(allCapabilityTypes)
 	c.pid.Set(capability.BOUNDS, c.bounding...)
 	c.pid.Set(capability.PERMITTED, c.permitted...)
 	c.pid.Set(capability.INHERITABLE, c.inheritable...)
 	c.pid.Set(capability.EFFECTIVE, c.effective...)
 	c.pid.Set(capability.AMBIENT, c.ambient...)
 	return c.pid.Apply(allCapabilityTypes)
 }
--- a/libcontainer/cgroups/cgroups.go
+++ b/libcontainer/cgroups/cgroups.go
@ -1,74 +1,59 @@
 // +build linux
 package cgroups
 import (
 	"fmt"
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
 type Manager interface {
-	// Applies cgroup configuration to the process with the specified pid
+	// Apply creates a cgroup, if not yet created, and adds a process
 	// with the specified pid into that cgroup.  A special value of -1
 	// can be used to merely create a cgroup.
 	Apply(pid int) error
-	// Returns the PIDs inside the cgroup set
+	// GetPids returns the PIDs of all processes inside the cgroup.
 	GetPids() ([]int, error)
-	// Returns the PIDs inside the cgroup set & all sub-cgroups
+	// GetAllPids returns the PIDs of all processes inside the cgroup
 	// any all its sub-cgroups.
 	GetAllPids() ([]int, error)
-	// Returns statistics for the cgroup set
+	// GetStats returns cgroups statistics.
 	GetStats() (*Stats, error)
-	// Toggles the freezer cgroup according with specified state
+	// Freeze sets the freezer cgroup to the specified state.
 	Freeze(state configs.FreezerState) error
-	// Destroys the cgroup set
+	// Destroy removes cgroup.
 	Destroy() error
-	// The option func SystemdCgroups() and Cgroupfs() require following attributes:
+	// Path returns a cgroup path to the specified controller/subsystem.
-	// 	Paths   map[string]string
+	// For cgroupv2, the argument is unused and can be empty.
-	// 	Cgroups *configs.Cgroup
+	Path(string) string
 	// Paths maps cgroup subsystem to path at which it is mounted.
 	// Cgroups specifies specific cgroup settings for the various subsystems
-	// Returns cgroup paths to save in a state file and to be able to
+	// Set sets cgroup resources parameters/limits. If the argument is nil,
-	// restore the object later.
+	// the resources specified during Manager creation (or the previous call
 	// to Set) are used.
 	Set(r *configs.Resources) error
 	// GetPaths returns cgroup path(s) to save in a state file in order to
 	// restore later.
 	//
 	// For cgroup v1, a key is cgroup subsystem name, and the value is the
 	// path to the cgroup for this subsystem.
 	//
 	// For cgroup v2 unified hierarchy, a key is "", and the value is the
 	// unified path.
 	GetPaths() map[string]string
-	// GetUnifiedPath returns the unified path when running in unified mode.
+	// GetCgroups returns the cgroup data as configured.
 	// The value corresponds to the all values of GetPaths() map.
 	//
 	// GetUnifiedPath returns error when running in hybrid mode as well as
 	// in legacy mode.
 	GetUnifiedPath() (string, error)
 	// Sets the cgroup as configured.
 	Set(container *configs.Config) error
 	// Gets the cgroup as configured.
 	GetCgroups() (*configs.Cgroup, error)
 }
-type NotFoundError struct {
+	// GetFreezerState retrieves the current FreezerState of the cgroup.
-	Subsystem string
+	GetFreezerState() (configs.FreezerState, error)
 }
-func (e *NotFoundError) Error() string {
+	// Exists returns whether the cgroup path exists or not.
-	return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
+	Exists() bool
 }
-func NewNotFoundError(sub string) error {
+	// OOMKillCount reports OOM kill count for the cgroup.
-	return &NotFoundError{
+	OOMKillCount() (uint64, error)
 		Subsystem: sub,
 	}
 }
 func IsNotFound(err error) bool {
 	if err == nil {
 		return false
 	}
 	_, ok := err.(*NotFoundError)
 	return ok
 }
--- a/libcontainer/cgroups/cgroups_test.go
+++ b/libcontainer/cgroups/cgroups_test.go
@ -1,5 +1,3 @@
 // +build linux
 package cgroups
 import (
--- a/libcontainer/cgroups/cgroups_unsupported.go
+++ b/libcontainer/cgroups/cgroups_unsupported.go
@ -1,3 +0,0 @@
 // +build !linux
 package cgroups
--- a/libcontainer/cgroups/devices/devices_emulator.go
+++ b/libcontainer/cgroups/devices/devices_emulator.go
@ -0,0 +1,386 @@
 // SPDX-License-Identifier: Apache-2.0
 /*
 * Copyright (C) 2020 Aleksa Sarai <cyphar@cyphar.com>
 * Copyright (C) 2020 SUSE LLC
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package devices
 import (
 	"bufio"
 	"fmt"
 	"io"
 	"sort"
 	"strconv"
 	"strings"
 	"github.com/opencontainers/runc/libcontainer/devices"
 )
 // deviceMeta is a Rule without the Allow or Permissions fields, and no
 // wildcard-type support. It's effectively the "match" portion of a metadata
 // rule, for the purposes of our emulation.
 type deviceMeta struct {
 	node  devices.Type
 	major int64
 	minor int64
 }
 // deviceRule is effectively the tuple (deviceMeta, Permissions).
 type deviceRule struct {
 	meta  deviceMeta
 	perms devices.Permissions
 }
 // deviceRules is a mapping of device metadata rules to the associated
 // permissions in the ruleset.
 type deviceRules map[deviceMeta]devices.Permissions
 func (r deviceRules) orderedEntries() []deviceRule {
 	var rules []deviceRule
 	for meta, perms := range r {
 		rules = append(rules, deviceRule{meta: meta, perms: perms})
 	}
 	sort.Slice(rules, func(i, j int) bool {
 		// Sort by (major, minor, type).
 		a, b := rules[i].meta, rules[j].meta
 		return a.major < b.major ||
 			(a.major == b.major && a.minor < b.minor) ||
 			(a.major == b.major && a.minor == b.minor && a.node < b.node)
 	})
 	return rules
 }
 type Emulator struct {
 	defaultAllow bool
 	rules        deviceRules
 }
 func (e *Emulator) IsBlacklist() bool {
 	return e.defaultAllow
 }
 func (e *Emulator) IsAllowAll() bool {
 	return e.IsBlacklist() && len(e.rules) == 0
 }
 func parseLine(line string) (*deviceRule, error) {
 	// Input: node major:minor perms.
 	fields := strings.FieldsFunc(line, func(r rune) bool {
 		return r == ' ' || r == ':'
 	})
 	if len(fields) != 4 {
 		return nil, fmt.Errorf("malformed devices.list rule %s", line)
 	}
 	var (
 		rule  deviceRule
 		node  = fields[0]
 		major = fields[1]
 		minor = fields[2]
 		perms = fields[3]
 	)
 	// Parse the node type.
 	switch node {
 	case "a":
 		// Super-special case -- "a" always means every device with every
 		// access mode. In fact, for devices.list this actually indicates that
 		// the cgroup is in black-list mode.
 		// TODO: Double-check that the entire file is "a *:* rwm".
 		return nil, nil
 	case "b":
 		rule.meta.node = devices.BlockDevice
 	case "c":
 		rule.meta.node = devices.CharDevice
 	default:
 		return nil, fmt.Errorf("unknown device type %q", node)
 	}
 	// Parse the major number.
 	if major == "*" {
 		rule.meta.major = devices.Wildcard
 	} else {
 		val, err := strconv.ParseUint(major, 10, 32)
 		if err != nil {
 			return nil, fmt.Errorf("invalid major number: %w", err)
 		}
 		rule.meta.major = int64(val)
 	}
 	// Parse the minor number.
 	if minor == "*" {
 		rule.meta.minor = devices.Wildcard
 	} else {
 		val, err := strconv.ParseUint(minor, 10, 32)
 		if err != nil {
 			return nil, fmt.Errorf("invalid minor number: %w", err)
 		}
 		rule.meta.minor = int64(val)
 	}
 	// Parse the access permissions.
 	rule.perms = devices.Permissions(perms)
 	if !rule.perms.IsValid() || rule.perms.IsEmpty() {
 		return nil, fmt.Errorf("parse access mode: contained unknown modes or is empty: %q", perms)
 	}
 	return &rule, nil
 }
 func (e *Emulator) addRule(rule deviceRule) error { //nolint:unparam
 	if e.rules == nil {
 		e.rules = make(map[deviceMeta]devices.Permissions)
 	}
 	// Merge with any pre-existing permissions.
 	oldPerms := e.rules[rule.meta]
 	newPerms := rule.perms.Union(oldPerms)
 	e.rules[rule.meta] = newPerms
 	return nil
 }
 func (e *Emulator) rmRule(rule deviceRule) error {
 	// Give an error if any of the permissions requested to be removed are
 	// present in a partially-matching wildcard rule, because such rules will
 	// be ignored by cgroupv1.
 	//
 	// This is a diversion from cgroupv1, but is necessary to avoid leading
 	// users into a false sense of security. cgroupv1 will silently(!) ignore
 	// requests to remove partial exceptions, but we really shouldn't do that.
 	//
 	// It may seem like we could just "split" wildcard rules which hit this
 	// issue, but unfortunately there are 2^32 possible major and minor
 	// numbers, which would exhaust kernel memory quickly if we did this. Not
 	// to mention it'd be really slow (the kernel side is implemented as a
 	// linked-list of exceptions).
 	for _, partialMeta := range []deviceMeta{
 		{node: rule.meta.node, major: devices.Wildcard, minor: rule.meta.minor},
 		{node: rule.meta.node, major: rule.meta.major, minor: devices.Wildcard},
 		{node: rule.meta.node, major: devices.Wildcard, minor: devices.Wildcard},
 	} {
 		// This wildcard rule is equivalent to the requested rule, so skip it.
 		if rule.meta == partialMeta {
 			continue
 		}
 		// Only give an error if the set of permissions overlap.
 		partialPerms := e.rules[partialMeta]
 		if !partialPerms.Intersection(rule.perms).IsEmpty() {
 			return fmt.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms)
 		}
 	}
 	// Subtract all of the permissions listed from the full match rule. If the
 	// rule didn't exist, all of this is a no-op.
 	newPerms := e.rules[rule.meta].Difference(rule.perms)
 	if newPerms.IsEmpty() {
 		delete(e.rules, rule.meta)
 	} else {
 		e.rules[rule.meta] = newPerms
 	}
 	// TODO: The actual cgroup code doesn't care if an exception didn't exist
 	//       during removal, so not erroring out here is /accurate/ but quite
 	//       worrying. Maybe we should do additional validation, but again we
 	//       have to worry about backwards-compatibility.
 	return nil
 }
 func (e *Emulator) allow(rule *deviceRule) error {
 	// This cgroup is configured as a black-list. Reset the entire emulator,
 	// and put is into black-list mode.
 	if rule == nil || rule.meta.node == devices.WildcardDevice {
 		*e = Emulator{
 			defaultAllow: true,
 			rules:        nil,
 		}
 		return nil
 	}
 	var err error
 	if e.defaultAllow {
 		err = wrapErr(e.rmRule(*rule), "unable to remove 'deny' exception")
 	} else {
 		err = wrapErr(e.addRule(*rule), "unable to add 'allow' exception")
 	}
 	return err
 }
 func (e *Emulator) deny(rule *deviceRule) error {
 	// This cgroup is configured as a white-list. Reset the entire emulator,
 	// and put is into white-list mode.
 	if rule == nil || rule.meta.node == devices.WildcardDevice {
 		*e = Emulator{
 			defaultAllow: false,
 			rules:        nil,
 		}
 		return nil
 	}
 	var err error
 	if e.defaultAllow {
 		err = wrapErr(e.addRule(*rule), "unable to add 'deny' exception")
 	} else {
 		err = wrapErr(e.rmRule(*rule), "unable to remove 'allow' exception")
 	}
 	return err
 }
 func (e *Emulator) Apply(rule devices.Rule) error {
 	if !rule.Type.CanCgroup() {
 		return fmt.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type)
 	}
 	innerRule := &deviceRule{
 		meta: deviceMeta{
 			node:  rule.Type,
 			major: rule.Major,
 			minor: rule.Minor,
 		},
 		perms: rule.Permissions,
 	}
 	if innerRule.meta.node == devices.WildcardDevice {
 		innerRule = nil
 	}
 	if rule.Allow {
 		return e.allow(innerRule)
 	}
 	return e.deny(innerRule)
 }
 // EmulatorFromList takes a reader to a "devices.list"-like source, and returns
 // a new Emulator that represents the state of the devices cgroup. Note that
 // black-list devices cgroups cannot be fully reconstructed, due to limitations
 // in the devices cgroup API. Instead, such cgroups are always treated as
 // "allow all" cgroups.
 func EmulatorFromList(list io.Reader) (*Emulator, error) {
 	// Normally cgroups are in black-list mode by default, but the way we
 	// figure out the current mode is whether or not devices.list has an
 	// allow-all rule. So we default to a white-list, and the existence of an
 	// "a *:* rwm" entry will tell us otherwise.
 	e := &Emulator{
 		defaultAllow: false,
 	}
 	// Parse the "devices.list".
 	s := bufio.NewScanner(list)
 	for s.Scan() {
 		line := s.Text()
 		deviceRule, err := parseLine(line)
 		if err != nil {
 			return nil, fmt.Errorf("error parsing line %q: %w", line, err)
 		}
 		// "devices.list" is an allow list. Note that this means that in
 		// black-list mode, we have no idea what rules are in play. As a
 		// result, we need to be very careful in Transition().
 		if err := e.allow(deviceRule); err != nil {
 			return nil, fmt.Errorf("error adding devices.list rule: %w", err)
 		}
 	}
 	if err := s.Err(); err != nil {
 		return nil, fmt.Errorf("error reading devices.list lines: %w", err)
 	}
 	return e, nil
 }
 // Transition calculates what is the minimally-disruptive set of rules need to
 // be applied to a devices cgroup in order to transition to the given target.
 // This means that any already-existing rules will not be applied, and
 // disruptive rules (like denying all device access) will only be applied if
 // necessary.
 //
 // This function is the sole reason for all of Emulator -- to allow us
 // to figure out how to update a containers' cgroups without causing spurious
 // device errors (if possible).
 func (source *Emulator) Transition(target *Emulator) ([]*devices.Rule, error) {
 	var transitionRules []*devices.Rule
 	oldRules := source.rules
 	// If the default policy doesn't match, we need to include a "disruptive"
 	// rule (either allow-all or deny-all) in order to switch the cgroup to the
 	// correct default policy.
 	//
 	// However, due to a limitation in "devices.list" we cannot be sure what
 	// deny rules are in place in a black-list cgroup. Thus if the source is a
 	// black-list we also have to include a disruptive rule.
 	if source.IsBlacklist() || source.defaultAllow != target.defaultAllow {
 		transitionRules = append(transitionRules, &devices.Rule{
 			Type:        'a',
 			Major:       -1,
 			Minor:       -1,
 			Permissions: devices.Permissions("rwm"),
 			Allow:       target.defaultAllow,
 		})
 		// The old rules are only relevant if we aren't starting out with a
 		// disruptive rule.
 		oldRules = nil
 	}
 	// NOTE: We traverse through the rules in a sorted order so we always write
 	//       the same set of rules (this is to aid testing).
 	// First, we create inverse rules for any old rules not in the new set.
 	// This includes partial-inverse rules for specific permissions. This is a
 	// no-op if we added a disruptive rule, since oldRules will be empty.
 	for _, rule := range oldRules.orderedEntries() {
 		meta, oldPerms := rule.meta, rule.perms
 		newPerms := target.rules[meta]
 		droppedPerms := oldPerms.Difference(newPerms)
 		if !droppedPerms.IsEmpty() {
 			transitionRules = append(transitionRules, &devices.Rule{
 				Type:        meta.node,
 				Major:       meta.major,
 				Minor:       meta.minor,
 				Permissions: droppedPerms,
 				Allow:       target.defaultAllow,
 			})
 		}
 	}
 	// Add any additional rules which weren't in the old set. We happen to
 	// filter out rules which are present in both sets, though this isn't
 	// strictly necessary.
 	for _, rule := range target.rules.orderedEntries() {
 		meta, newPerms := rule.meta, rule.perms
 		oldPerms := oldRules[meta]
 		gainedPerms := newPerms.Difference(oldPerms)
 		if !gainedPerms.IsEmpty() {
 			transitionRules = append(transitionRules, &devices.Rule{
 				Type:        meta.node,
 				Major:       meta.major,
 				Minor:       meta.minor,
 				Permissions: gainedPerms,
 				Allow:       !target.defaultAllow,
 			})
 		}
 	}
 	return transitionRules, nil
 }
 // Rules returns the minimum set of rules necessary to convert a *deny-all*
 // cgroup to the emulated filter state (note that this is not the same as a
 // default cgroupv1 cgroup -- which is allow-all). This is effectively just a
 // wrapper around Transition() with the source emulator being an empty cgroup.
 func (e *Emulator) Rules() ([]*devices.Rule, error) {
 	defaultCgroup := &Emulator{defaultAllow: false}
 	return defaultCgroup.Transition(e)
 }
 func wrapErr(err error, text string) error {
 	if err == nil {
 		return nil
 	}
 	return fmt.Errorf(text+": %w", err)
 }
--- a/libcontainer/cgroups/devices/devices_emulator_test.go
+++ b/libcontainer/cgroups/devices/devices_emulator_test.go
--- a/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
+++ b/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go
@ -1,4 +1,4 @@
-// Package devicefilter containes eBPF device filter program
+// Package devicefilter contains eBPF device filter program
 //
 // The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c
 //
@ -7,12 +7,14 @@
 package devicefilter
 import (
 	"errors"
 	"fmt"
 	"math"
 	"strconv"
 	"github.com/cilium/ebpf/asm"
-	"github.com/opencontainers/runc/libcontainer/configs"
+	devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices"
-	"github.com/pkg/errors"
+	"github.com/opencontainers/runc/libcontainer/devices"
 	"golang.org/x/sys/unix"
 )
@ -22,22 +24,54 @@ const (
 )
 // DeviceFilter returns eBPF device filter program and its license string
-func DeviceFilter(devices []*configs.Device) (asm.Instructions, string, error) {
+func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) {
-	p := &program{}
+	// Generate the minimum ruleset for the device rules we are given. While we
-	p.init()
+	// don't care about minimum transitions in cgroupv2, using the emulator
-	for i := len(devices) - 1; i >= 0; i-- {
+	// gives us a guarantee that the behaviour of devices filtering is the same
-		if err := p.appendDevice(devices[i]); err != nil {
+	// as cgroupv1, including security hardenings to avoid misconfiguration
 	// (such as punching holes in wildcard rules).
 	emu := new(devicesemulator.Emulator)
 	for _, rule := range rules {
 		if err := emu.Apply(*rule); err != nil {
 			return nil, "", err
 		}
 	}
-	insts, err := p.finalize()
+	cleanRules, err := emu.Rules()
-	return insts, license, err
+	if err != nil {
 		return nil, "", err
 	}
 	p := &program{
 		defaultAllow: emu.IsBlacklist(),
 	}
 	p.init()
 	for idx, rule := range cleanRules {
 		if rule.Type == devices.WildcardDevice {
 			// We can safely skip over wildcard entries because there should
 			// only be one (at most) at the very start to instruct cgroupv1 to
 			// go into allow-list mode. However we do double-check this here.
 			if idx != 0 || rule.Allow != emu.IsBlacklist() {
 				return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString())
 			}
 			continue
 		}
 		if rule.Allow == p.defaultAllow {
 			// There should be no rules which have an action equal to the
 			// default action, the emulator removes those.
 			return nil, "", fmt.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString())
 		}
 		if err := p.appendRule(rule); err != nil {
 			return nil, "", err
 		}
 	}
 	return p.finalize(), license, nil
 }
 type program struct {
-	insts       asm.Instructions
+	insts        asm.Instructions
-	hasWildCard bool
+	defaultAllow bool
-	blockID     int
+	blockID      int
 }
 func (p *program) init() {
@ -49,7 +83,8 @@ func (p *program) init() {
 	*/
 	// R2 <- type (lower 16 bit of u32 access_type at R1[0])
 	p.insts = append(p.insts,
-		asm.LoadMem(asm.R2, asm.R1, 0, asm.Half))
+		asm.LoadMem(asm.R2, asm.R1, 0, asm.Word),
 		asm.And.Imm32(asm.R2, 0xFFFF))
 	// R3 <- access (upper 16 bit of u32 access_type at R1[0])
 	p.insts = append(p.insts,
@ -66,39 +101,35 @@ func (p *program) init() {
 		asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
 }
-// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element.
+// appendRule rule converts an OCI rule to the relevant eBPF block and adds it
-func (p *program) appendDevice(dev *configs.Device) error {
+// to the in-progress filter program. In order to operate properly, it must be
 // called with a "clean" rule list (generated by devices.Emulator.Rules() --
 // with any "a" rules removed).
 func (p *program) appendRule(rule *devices.Rule) error {
 	if p.blockID < 0 {
 		return errors.New("the program is finalized")
 	}
 	if p.hasWildCard {
 		// All entries after wildcard entry are ignored
 		return nil
 	}
-	bpfType := int32(-1)
+	var bpfType int32
-	hasType := true
+	switch rule.Type {
-	switch dev.Type {
+	case devices.CharDevice:
 	case 'c':
 		bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
-	case 'b':
+	case devices.BlockDevice:
 		bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
 	case 'a':
 		hasType = false
 	default:
-		// if not specified in OCI json, typ is set to DeviceTypeAll
+		// We do not permit 'a', nor any other types we don't know about.
-		return errors.Errorf("invalid DeviceType %q", string(dev.Type))
+		return fmt.Errorf("invalid type %q", string(rule.Type))
 	}
-	if dev.Major > math.MaxUint32 {
+	if rule.Major > math.MaxUint32 {
-		return errors.Errorf("invalid major %d", dev.Major)
+		return fmt.Errorf("invalid major %d", rule.Major)
 	}
-	if dev.Minor > math.MaxUint32 {
+	if rule.Minor > math.MaxUint32 {
-		return errors.Errorf("invalid minor %d", dev.Major)
+		return fmt.Errorf("invalid minor %d", rule.Major)
 	}
-	hasMajor := dev.Major >= 0 // if not specified in OCI json, major is set to -1
+	hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1
-	hasMinor := dev.Minor >= 0
+	hasMinor := rule.Minor >= 0
 	bpfAccess := int32(0)
-	for _, r := range dev.Permissions {
+	for _, r := range rule.Permissions {
 		switch r {
 		case 'r':
 			bpfAccess |= unix.BPF_DEVCG_ACC_READ
@ -107,68 +138,65 @@ func (p *program) appendDevice(dev *configs.Device) error {
 		case 'm':
 			bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
 		default:
-			return errors.Errorf("unknown device access %v", r)
+			return fmt.Errorf("unknown device access %v", r)
 		}
 	}
 	// If the access is rwm, skip the check.
 	hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD)
-	blockSym := fmt.Sprintf("block-%d", p.blockID)
+	var (
-	nextBlockSym := fmt.Sprintf("block-%d", p.blockID+1)
+		blockSym         = "block-" + strconv.Itoa(p.blockID)
-	prevBlockLastIdx := len(p.insts) - 1
+		nextBlockSym     = "block-" + strconv.Itoa(p.blockID+1)
-	if hasType {
+		prevBlockLastIdx = len(p.insts) - 1
-		p.insts = append(p.insts,
+	)
-			// if (R2 != bpfType) goto next
+	p.insts = append(p.insts,
-			asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
+		// if (R2 != bpfType) goto next
-		)
+		asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
-	}
+	)
 	if hasAccess {
 		p.insts = append(p.insts,
-			// if (R3 & bpfAccess == 0 /* use R1 as a temp var */) goto next
+			// if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next
 			asm.Mov.Reg32(asm.R1, asm.R3),
 			asm.And.Imm32(asm.R1, bpfAccess),
-			asm.JEq.Imm(asm.R1, 0, nextBlockSym),
+			asm.JNE.Reg(asm.R1, asm.R3, nextBlockSym),
 		)
 	}
 	if hasMajor {
 		p.insts = append(p.insts,
 			// if (R4 != major) goto next
-			asm.JNE.Imm(asm.R4, int32(dev.Major), nextBlockSym),
+			asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym),
 		)
 	}
 	if hasMinor {
 		p.insts = append(p.insts,
 			// if (R5 != minor) goto next
-			asm.JNE.Imm(asm.R5, int32(dev.Minor), nextBlockSym),
+			asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym),
 		)
 	}
-	if !hasType && !hasAccess && !hasMajor && !hasMinor {
+	p.insts = append(p.insts, acceptBlock(rule.Allow)...)
 		p.hasWildCard = true
 	}
 	p.insts = append(p.insts, acceptBlock(dev.Allow)...)
 	// set blockSym to the first instruction we added in this iteration
 	p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
 	p.blockID++
 	return nil
 }
-func (p *program) finalize() (asm.Instructions, error) {
+func (p *program) finalize() asm.Instructions {
-	if p.hasWildCard {
+	var v int32
-		// acceptBlock with asm.Return() is already inserted
+	if p.defaultAllow {
-		return p.insts, nil
+		v = 1
 	}
-	blockSym := fmt.Sprintf("block-%d", p.blockID)
+	blockSym := "block-" + strconv.Itoa(p.blockID)
 	p.insts = append(p.insts,
-		// R0 <- 0
+		// R0 <- v
-		asm.Mov.Imm32(asm.R0, 0).Sym(blockSym),
+		asm.Mov.Imm32(asm.R0, v).Sym(blockSym),
 		asm.Return(),
 	)
 	p.blockID = -1
-	return p.insts, nil
+	return p.insts
 }
 func acceptBlock(accept bool) asm.Instructions {
-	v := int32(0)
+	var v int32
 	if accept {
 		v = 1
 	}
--- a/libcontainer/cgroups/ebpf/devicefilter/devicefilter_test.go
+++ b/libcontainer/cgroups/ebpf/devicefilter/devicefilter_test.go
@ -4,7 +4,7 @@ import (
 	"strings"
 	"testing"
-	"github.com/opencontainers/runc/libcontainer/configs"
+	"github.com/opencontainers/runc/libcontainer/devices"
 	"github.com/opencontainers/runc/libcontainer/specconv"
 )
@ -20,13 +20,12 @@ func hash(s, comm string) string {
 	return strings.Join(res, "\n")
 }
-func testDeviceFilter(t testing.TB, devices []*configs.Device, expectedStr string) {
+func testDeviceFilter(t testing.TB, devices []*devices.Rule, expectedStr string) {
 	insts, _, err := DeviceFilter(devices)
 	if err != nil {
 		t.Fatalf("%s: %v (devices: %+v)", t.Name(), err, devices)
 	}
 	s := insts.String()
 	t.Logf("%s: devices: %+v\n%s", t.Name(), devices, s)
 	if expectedStr != "" {
 		hashed := hash(s, "//")
 		expectedHashed := hash(expectedStr, "//")
@ -39,15 +38,16 @@ func testDeviceFilter(t testing.TB, devices []*configs.Device, expectedStr strin
 func TestDeviceFilter_Nil(t *testing.T) {
 	expected := `
 // load parameters into registers
-        0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
+        0: LdXMemW dst: r2 src: r1 off: 0 imm: 0
-        1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
+        1: And32Imm dst: r2 imm: 65535
-        2: RSh32Imm dst: r3 imm: 16
+        2: LdXMemW dst: r3 src: r1 off: 0 imm: 0
-        3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
+        3: RSh32Imm dst: r3 imm: 16
-        4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
+        4: LdXMemW dst: r4 src: r1 off: 4 imm: 0
        5: LdXMemW dst: r5 src: r1 off: 8 imm: 0
 block-0:
 // return 0 (reject)
-        5: Mov32Imm dst: r0 imm: 0
+        6: Mov32Imm dst: r0 imm: 0
-        6: Exit
+        7: Exit
 	`
 	testDeviceFilter(t, nil, expected)
 }
@ -55,97 +55,96 @@ block-0:
 func TestDeviceFilter_BuiltInAllowList(t *testing.T) {
 	expected := `
 // load parameters into registers
-         0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
+        0: LdXMemW dst: r2 src: r1 off: 0 imm: 0
-         1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
+        1: And32Imm dst: r2 imm: 65535
-         2: RSh32Imm dst: r3 imm: 16
+        2: LdXMemW dst: r3 src: r1 off: 0 imm: 0
-         3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
+        3: RSh32Imm dst: r3 imm: 16
-         4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
+        4: LdXMemW dst: r4 src: r1 off: 4 imm: 0
        5: LdXMemW dst: r5 src: r1 off: 8 imm: 0
 block-0:
 // tuntap (c, 10, 200, rwm, allow)
         5: JNEImm dst: r2 off: -1 imm: 2 <block-1>
         6: JNEImm dst: r4 off: -1 imm: 10 <block-1>
         7: JNEImm dst: r5 off: -1 imm: 200 <block-1>
         8: Mov32Imm dst: r0 imm: 1
         9: Exit
 block-1:
        10: JNEImm dst: r2 off: -1 imm: 2 <block-2>
        11: JNEImm dst: r4 off: -1 imm: 5 <block-2>
        12: JNEImm dst: r5 off: -1 imm: 2 <block-2>
        13: Mov32Imm dst: r0 imm: 1
        14: Exit
 block-2:
 // /dev/pts (c, 136, wildcard, rwm, true)
        15: JNEImm dst: r2 off: -1 imm: 2 <block-3>
        16: JNEImm dst: r4 off: -1 imm: 136 <block-3>
        17: Mov32Imm dst: r0 imm: 1
        18: Exit
 block-3:
        19: JNEImm dst: r2 off: -1 imm: 2 <block-4>
        20: JNEImm dst: r4 off: -1 imm: 5 <block-4>
        21: JNEImm dst: r5 off: -1 imm: 1 <block-4>
        22: Mov32Imm dst: r0 imm: 1
        23: Exit
 block-4:
        24: JNEImm dst: r2 off: -1 imm: 2 <block-5>
        25: JNEImm dst: r4 off: -1 imm: 1 <block-5>
        26: JNEImm dst: r5 off: -1 imm: 9 <block-5>
        27: Mov32Imm dst: r0 imm: 1
        28: Exit
 block-5:
        29: JNEImm dst: r2 off: -1 imm: 2 <block-6>
        30: JNEImm dst: r4 off: -1 imm: 1 <block-6>
        31: JNEImm dst: r5 off: -1 imm: 5 <block-6>
        32: Mov32Imm dst: r0 imm: 1
        33: Exit
 block-6:
        34: JNEImm dst: r2 off: -1 imm: 2 <block-7>
        35: JNEImm dst: r4 off: -1 imm: 5 <block-7>
        36: JNEImm dst: r5 off: -1 imm: 0 <block-7>
        37: Mov32Imm dst: r0 imm: 1
        38: Exit
 block-7:
        39: JNEImm dst: r2 off: -1 imm: 2 <block-8>
        40: JNEImm dst: r4 off: -1 imm: 1 <block-8>
        41: JNEImm dst: r5 off: -1 imm: 7 <block-8>
        42: Mov32Imm dst: r0 imm: 1
        43: Exit
 block-8:
        44: JNEImm dst: r2 off: -1 imm: 2 <block-9>
        45: JNEImm dst: r4 off: -1 imm: 1 <block-9>
        46: JNEImm dst: r5 off: -1 imm: 8 <block-9>
        47: Mov32Imm dst: r0 imm: 1
        48: Exit
 block-9:
        49: JNEImm dst: r2 off: -1 imm: 2 <block-10>
        50: JNEImm dst: r4 off: -1 imm: 1 <block-10>
        51: JNEImm dst: r5 off: -1 imm: 3 <block-10>
        52: Mov32Imm dst: r0 imm: 1
        53: Exit
 block-10:
 // (b, wildcard, wildcard, m, true)
-        54: JNEImm dst: r2 off: -1 imm: 1 <block-11>
+        6: JNEImm dst: r2 off: -1 imm: 1 <block-1>
-        55: Mov32Reg dst: r1 src: r3
+        7: Mov32Reg dst: r1 src: r3
-        56: And32Imm dst: r1 imm: 1
+        8: And32Imm dst: r1 imm: 1
-        57: JEqImm dst: r1 off: -1 imm: 0 <block-11>
+        9: JNEReg dst: r1 off: -1 src: r3 <block-1>
-        58: Mov32Imm dst: r0 imm: 1
+        10: Mov32Imm dst: r0 imm: 1
-        59: Exit
+        11: Exit
-block-11:
+block-1:
 // (c, wildcard, wildcard, m, true)
-        60: JNEImm dst: r2 off: -1 imm: 2 <block-12>
+        12: JNEImm dst: r2 off: -1 imm: 2 <block-2>
-        61: Mov32Reg dst: r1 src: r3
+        13: Mov32Reg dst: r1 src: r3
-        62: And32Imm dst: r1 imm: 1
+        14: And32Imm dst: r1 imm: 1
-        63: JEqImm dst: r1 off: -1 imm: 0 <block-12>
+        15: JNEReg dst: r1 off: -1 src: r3 <block-2>
-        64: Mov32Imm dst: r0 imm: 1
+        16: Mov32Imm dst: r0 imm: 1
-        65: Exit
+        17: Exit
-block-12:
+block-2:
-        66: Mov32Imm dst: r0 imm: 0
+        18: JNEImm dst: r2 off: -1 imm: 2 <block-3>
-        67: Exit
+        19: JNEImm dst: r4 off: -1 imm: 1 <block-3>
        20: JNEImm dst: r5 off: -1 imm: 3 <block-3>
        21: Mov32Imm dst: r0 imm: 1
        22: Exit
 block-3:
        23: JNEImm dst: r2 off: -1 imm: 2 <block-4>
        24: JNEImm dst: r4 off: -1 imm: 1 <block-4>
        25: JNEImm dst: r5 off: -1 imm: 5 <block-4>
        26: Mov32Imm dst: r0 imm: 1
        27: Exit
 block-4:
        28: JNEImm dst: r2 off: -1 imm: 2 <block-5>
        29: JNEImm dst: r4 off: -1 imm: 1 <block-5>
        30: JNEImm dst: r5 off: -1 imm: 7 <block-5>
        31: Mov32Imm dst: r0 imm: 1
        32: Exit
 block-5:
        33: JNEImm dst: r2 off: -1 imm: 2 <block-6>
        34: JNEImm dst: r4 off: -1 imm: 1 <block-6>
        35: JNEImm dst: r5 off: -1 imm: 8 <block-6>
        36: Mov32Imm dst: r0 imm: 1
        37: Exit
 block-6:
        38: JNEImm dst: r2 off: -1 imm: 2 <block-7>
        39: JNEImm dst: r4 off: -1 imm: 1 <block-7>
        40: JNEImm dst: r5 off: -1 imm: 9 <block-7>
        41: Mov32Imm dst: r0 imm: 1
        42: Exit
 block-7:
        43: JNEImm dst: r2 off: -1 imm: 2 <block-8>
        44: JNEImm dst: r4 off: -1 imm: 5 <block-8>
        45: JNEImm dst: r5 off: -1 imm: 0 <block-8>
        46: Mov32Imm dst: r0 imm: 1
        47: Exit
 block-8:
        48: JNEImm dst: r2 off: -1 imm: 2 <block-9>
        49: JNEImm dst: r4 off: -1 imm: 5 <block-9>
        50: JNEImm dst: r5 off: -1 imm: 2 <block-9>
        51: Mov32Imm dst: r0 imm: 1
        52: Exit
 block-9:
 // tuntap (c, 10, 200, rwm, allow)
        53: JNEImm dst: r2 off: -1 imm: 2 <block-10>
        54: JNEImm dst: r4 off: -1 imm: 10 <block-10>
        55: JNEImm dst: r5 off: -1 imm: 200 <block-10>
        56: Mov32Imm dst: r0 imm: 1
        57: Exit
 block-10:
 // /dev/pts (c, 136, wildcard, rwm, true)
        58: JNEImm dst: r2 off: -1 imm: 2 <block-11>
        59: JNEImm dst: r4 off: -1 imm: 136 <block-11>
        60: Mov32Imm dst: r0 imm: 1
        61: Exit
 block-11:
        62: Mov32Imm dst: r0 imm: 0
        63: Exit
 `
-	testDeviceFilter(t, specconv.AllowedDevices, expected)
+	var devices []*devices.Rule
 	for _, device := range specconv.AllowedDevices {
 		devices = append(devices, &device.Rule)
 	}
 	testDeviceFilter(t, devices, expected)
 }
 func TestDeviceFilter_Privileged(t *testing.T) {
-	devices := []*configs.Device{
+	devices := []*devices.Rule{
 		{
 			Type:        'a',
 			Major:       -1,
@ -157,21 +156,22 @@ func TestDeviceFilter_Privileged(t *testing.T) {
 	expected :=
 		`
 // load parameters into registers
-        0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
+        0: LdXMemW dst: r2 src: r1 off: 0 imm: 0
-        1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
+        1: And32Imm dst: r2 imm: 65535
-        2: RSh32Imm dst: r3 imm: 16
+        2: LdXMemW dst: r3 src: r1 off: 0 imm: 0
-        3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
+        3: RSh32Imm dst: r3 imm: 16
-        4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
+        4: LdXMemW dst: r4 src: r1 off: 4 imm: 0
        5: LdXMemW dst: r5 src: r1 off: 8 imm: 0
 block-0:
 // return 1 (accept)
-        5: Mov32Imm dst: r0 imm: 1
+        6: Mov32Imm dst: r0 imm: 1
-        6: Exit
+        7: Exit
 	`
 	testDeviceFilter(t, devices, expected)
 }
 func TestDeviceFilter_PrivilegedExceptSingleDevice(t *testing.T) {
-	devices := []*configs.Device{
+	devices := []*devices.Rule{
 		{
 			Type:        'a',
 			Major:       -1,
@ -189,28 +189,29 @@ func TestDeviceFilter_PrivilegedExceptSingleDevice(t *testing.T) {
 	}
 	expected := `
 // load parameters into registers
-         0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
+         0: LdXMemW dst: r2 src: r1 off: 0 imm: 0
-         1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
+         1: And32Imm dst: r2 imm: 65535
-         2: RSh32Imm dst: r3 imm: 16
+         2: LdXMemW dst: r3 src: r1 off: 0 imm: 0
-         3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
+         3: RSh32Imm dst: r3 imm: 16
-         4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
+         4: LdXMemW dst: r4 src: r1 off: 4 imm: 0
         5: LdXMemW dst: r5 src: r1 off: 8 imm: 0
 block-0:
 // return 0 (reject) if type==b && major == 8 && minor == 0
-         5: JNEImm dst: r2 off: -1 imm: 1 <block-1>
+         6: JNEImm dst: r2 off: -1 imm: 1 <block-1>
-         6: JNEImm dst: r4 off: -1 imm: 8 <block-1>
+         7: JNEImm dst: r4 off: -1 imm: 8 <block-1>
-         7: JNEImm dst: r5 off: -1 imm: 0 <block-1>
+         8: JNEImm dst: r5 off: -1 imm: 0 <block-1>
-         8: Mov32Imm dst: r0 imm: 0
+         9: Mov32Imm dst: r0 imm: 0
-         9: Exit
+        10: Exit
 block-1:
 // return 1 (accept)
-        10: Mov32Imm dst: r0 imm: 1
+        11: Mov32Imm dst: r0 imm: 1
-        11: Exit
+        12: Exit
 `
 	testDeviceFilter(t, devices, expected)
 }
 func TestDeviceFilter_Weird(t *testing.T) {
-	devices := []*configs.Device{
+	devices := []*devices.Rule{
 		{
 			Type:        'b',
 			Major:       8,
@ -237,22 +238,23 @@ func TestDeviceFilter_Weird(t *testing.T) {
 	// This conforms to runc v1.0.0-rc.9 (cgroup1) behavior.
 	expected := `
 // load parameters into registers
-         0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
+         0: LdXMemW dst: r2 src: r1 off: 0 imm: 0
-         1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
+         1: And32Imm dst: r2 imm: 65535
-         2: RSh32Imm dst: r3 imm: 16
+         2: LdXMemW dst: r3 src: r1 off: 0 imm: 0
-         3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
+         3: RSh32Imm dst: r3 imm: 16
-         4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
+         4: LdXMemW dst: r4 src: r1 off: 4 imm: 0
         5: LdXMemW dst: r5 src: r1 off: 8 imm: 0
 block-0:
 // return 0 (reject) if type==b && major == 8 && minor == 2
-         5: JNEImm dst: r2 off: -1 imm: 1 <block-1>
+         6: JNEImm dst: r2 off: -1 imm: 1 <block-1>
-         6: JNEImm dst: r4 off: -1 imm: 8 <block-1>
+         7: JNEImm dst: r4 off: -1 imm: 8 <block-1>
-         7: JNEImm dst: r5 off: -1 imm: 2 <block-1>
+         8: JNEImm dst: r5 off: -1 imm: 2 <block-1>
-         8: Mov32Imm dst: r0 imm: 0
+         9: Mov32Imm dst: r0 imm: 0
-         9: Exit
+        10: Exit
 block-1:
 // return 1 (accept)
-        10: Mov32Imm dst: r0 imm: 1
+        11: Mov32Imm dst: r0 imm: 1
-        11: Exit
+        12: Exit
 `
 	testDeviceFilter(t, devices, expected)
 }
--- a/libcontainer/cgroups/ebpf/ebpf.go
+++ b/libcontainer/cgroups/ebpf/ebpf.go
@ -1,45 +0,0 @@
 package ebpf
 import (
 	"github.com/cilium/ebpf"
 	"github.com/cilium/ebpf/asm"
 	"github.com/pkg/errors"
 	"golang.org/x/sys/unix"
 )
 // LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
 //
 // Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
 //
 // https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
 func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD int) (func() error, error) {
 	nilCloser := func() error {
 		return nil
 	}
 	// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
 	// This limit is not inherited into the container.
 	memlockLimit := &unix.Rlimit{
 		Cur: unix.RLIM_INFINITY,
 		Max: unix.RLIM_INFINITY,
 	}
 	_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
 	spec := &ebpf.ProgramSpec{
 		Type:         ebpf.CGroupDevice,
 		Instructions: insts,
 		License:      license,
 	}
 	prog, err := ebpf.NewProgram(spec)
 	if err != nil {
 		return nilCloser, err
 	}
 	if err := prog.Attach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
 		return nilCloser, errors.Wrap(err, "failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
 	}
 	closer := func() error {
 		if err := prog.Detach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
 			return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
 		}
 		return nil
 	}
 	return closer, nil
 }
--- a/libcontainer/cgroups/ebpf/ebpf_linux.go
+++ b/libcontainer/cgroups/ebpf/ebpf_linux.go
@ -0,0 +1,253 @@
 package ebpf
 import (
 	"errors"
 	"fmt"
 	"os"
 	"runtime"
 	"sync"
 	"unsafe"
 	"github.com/cilium/ebpf"
 	"github.com/cilium/ebpf/asm"
 	"github.com/cilium/ebpf/link"
 	"github.com/sirupsen/logrus"
 	"golang.org/x/sys/unix"
 )
 func nilCloser() error {
 	return nil
 }
 func findAttachedCgroupDeviceFilters(dirFd int) ([]*ebpf.Program, error) {
 	type bpfAttrQuery struct {
 		TargetFd    uint32
 		AttachType  uint32
 		QueryType   uint32
 		AttachFlags uint32
 		ProgIds     uint64 // __aligned_u64
 		ProgCnt     uint32
 	}
 	// Currently you can only have 64 eBPF programs attached to a cgroup.
 	size := 64
 	retries := 0
 	for retries < 10 {
 		progIds := make([]uint32, size)
 		query := bpfAttrQuery{
 			TargetFd:   uint32(dirFd),
 			AttachType: uint32(unix.BPF_CGROUP_DEVICE),
 			ProgIds:    uint64(uintptr(unsafe.Pointer(&progIds[0]))),
 			ProgCnt:    uint32(len(progIds)),
 		}
 		// Fetch the list of program ids.
 		_, _, errno := unix.Syscall(unix.SYS_BPF,
 			uintptr(unix.BPF_PROG_QUERY),
 			uintptr(unsafe.Pointer(&query)),
 			unsafe.Sizeof(query))
 		size = int(query.ProgCnt)
 		runtime.KeepAlive(query)
 		if errno != 0 {
 			// On ENOSPC we get the correct number of programs.
 			if errno == unix.ENOSPC {
 				retries++
 				continue
 			}
 			return nil, fmt.Errorf("bpf_prog_query(BPF_CGROUP_DEVICE) failed: %w", errno)
 		}
 		// Convert the ids to program handles.
 		progIds = progIds[:size]
 		programs := make([]*ebpf.Program, 0, len(progIds))
 		for _, progId := range progIds {
 			program, err := ebpf.NewProgramFromID(ebpf.ProgramID(progId))
 			if err != nil {
 				// We skip over programs that give us -EACCES or -EPERM. This
 				// is necessary because there may be BPF programs that have
 				// been attached (such as with --systemd-cgroup) which have an
 				// LSM label that blocks us from interacting with the program.
 				//
 				// Because additional BPF_CGROUP_DEVICE programs only can add
 				// restrictions, there's no real issue with just ignoring these
 				// programs (and stops runc from breaking on distributions with
 				// very strict SELinux policies).
 				if errors.Is(err, os.ErrPermission) {
 					logrus.Debugf("ignoring existing CGROUP_DEVICE program (prog_id=%v) which cannot be accessed by runc -- likely due to LSM policy: %v", progId, err)
 					continue
 				}
 				return nil, fmt.Errorf("cannot fetch program from id: %w", err)
 			}
 			programs = append(programs, program)
 		}
 		runtime.KeepAlive(progIds)
 		return programs, nil
 	}
 	return nil, errors.New("could not get complete list of CGROUP_DEVICE programs")
 }
 var (
 	haveBpfProgReplaceBool bool
 	haveBpfProgReplaceOnce sync.Once
 )
 // Loosely based on the BPF_F_REPLACE support check in
 //   <https://github.com/cilium/ebpf/blob/v0.6.0/link/syscalls.go>.
 //
 // TODO: move this logic to cilium/ebpf
 func haveBpfProgReplace() bool {
 	haveBpfProgReplaceOnce.Do(func() {
 		prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{
 			Type:    ebpf.CGroupDevice,
 			License: "MIT",
 			Instructions: asm.Instructions{
 				asm.Mov.Imm(asm.R0, 0),
 				asm.Return(),
 			},
 		})
 		if err != nil {
 			logrus.Debugf("checking for BPF_F_REPLACE support: ebpf.NewProgram failed: %v", err)
 			return
 		}
 		defer prog.Close()
 		devnull, err := os.Open("/dev/null")
 		if err != nil {
 			logrus.Debugf("checking for BPF_F_REPLACE support: open dummy target fd: %v", err)
 			return
 		}
 		defer devnull.Close()
 		// We know that we have BPF_PROG_ATTACH since we can load
 		// BPF_CGROUP_DEVICE programs. If passing BPF_F_REPLACE gives us EINVAL
 		// we know that the feature isn't present.
 		err = link.RawAttachProgram(link.RawAttachProgramOptions{
 			// We rely on this fd being checked after attachFlags.
 			Target: int(devnull.Fd()),
 			// Attempt to "replace" bad fds with this program.
 			Program: prog,
 			Attach:  ebpf.AttachCGroupDevice,
 			Flags:   unix.BPF_F_ALLOW_MULTI | unix.BPF_F_REPLACE,
 		})
 		if errors.Is(err, unix.EINVAL) {
 			// not supported
 			return
 		}
 		// attach_flags test succeeded.
 		if !errors.Is(err, unix.EBADF) {
 			logrus.Debugf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err)
 		}
 		haveBpfProgReplaceBool = true
 	})
 	return haveBpfProgReplaceBool
 }
 // LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
 //
 // Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
 //
 // https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
 func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd int) (func() error, error) {
 	// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
 	// This limit is not inherited into the container.
 	memlockLimit := &unix.Rlimit{
 		Cur: unix.RLIM_INFINITY,
 		Max: unix.RLIM_INFINITY,
 	}
 	_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
 	// Get the list of existing programs.
 	oldProgs, err := findAttachedCgroupDeviceFilters(dirFd)
 	if err != nil {
 		return nilCloser, err
 	}
 	useReplaceProg := haveBpfProgReplace() && len(oldProgs) == 1
 	// Generate new program.
 	spec := &ebpf.ProgramSpec{
 		Type:         ebpf.CGroupDevice,
 		Instructions: insts,
 		License:      license,
 	}
 	prog, err := ebpf.NewProgram(spec)
 	if err != nil {
 		return nilCloser, err
 	}
 	// If there is only one old program, we can just replace it directly.
 	var (
 		replaceProg *ebpf.Program
 		attachFlags uint32 = unix.BPF_F_ALLOW_MULTI
 	)
 	if useReplaceProg {
 		replaceProg = oldProgs[0]
 		attachFlags |= unix.BPF_F_REPLACE
 	}
 	err = link.RawAttachProgram(link.RawAttachProgramOptions{
 		Target:  dirFd,
 		Program: prog,
 		Replace: replaceProg,
 		Attach:  ebpf.AttachCGroupDevice,
 		Flags:   attachFlags,
 	})
 	if err != nil {
 		return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err)
 	}
 	closer := func() error {
 		err = link.RawDetachProgram(link.RawDetachProgramOptions{
 			Target:  dirFd,
 			Program: prog,
 			Attach:  ebpf.AttachCGroupDevice,
 		})
 		if err != nil {
 			return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err)
 		}
 		// TODO: Should we attach the old filters back in this case? Otherwise
 		//       we fail-open on a security feature, which is a bit scary.
 		return nil
 	}
 	if !useReplaceProg {
 		logLevel := logrus.DebugLevel
 		// If there was more than one old program, give a warning (since this
 		// really shouldn't happen with runc-managed cgroups) and then detach
 		// all the old programs.
 		if len(oldProgs) > 1 {
 			// NOTE: Ideally this should be a warning but it turns out that
 			//       systemd-managed cgroups trigger this warning (apparently
 			//       systemd doesn't delete old non-systemd programs when
 			//       setting properties).
 			logrus.Infof("found more than one filter (%d) attached to a cgroup -- removing extra filters!", len(oldProgs))
 			logLevel = logrus.InfoLevel
 		}
 		for idx, oldProg := range oldProgs {
 			// Output some extra debug info.
 			if info, err := oldProg.Info(); err == nil {
 				fields := logrus.Fields{
 					"type": info.Type.String(),
 					"tag":  info.Tag,
 					"name": info.Name,
 				}
 				if id, ok := info.ID(); ok {
 					fields["id"] = id
 				}
 				if runCount, ok := info.RunCount(); ok {
 					fields["run_count"] = runCount
 				}
 				if runtime, ok := info.Runtime(); ok {
 					fields["runtime"] = runtime.String()
 				}
 				logrus.WithFields(fields).Logf(logLevel, "removing old filter %d from cgroup", idx)
 			}
 			err = link.RawDetachProgram(link.RawDetachProgramOptions{
 				Target:  dirFd,
 				Program: oldProg,
 				Attach:  ebpf.AttachCGroupDevice,
 			})
 			if err != nil {
 				return closer, fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE) on old filter program: %w", err)
 			}
 		}
 	}
 	return closer, nil
 }
--- a/libcontainer/cgroups/file.go
+++ b/libcontainer/cgroups/file.go
@ -0,0 +1,190 @@
 package cgroups
 import (
 	"bytes"
 	"errors"
 	"fmt"
 	"os"
 	"path"
 	"strconv"
 	"strings"
 	"sync"
 	"github.com/sirupsen/logrus"
 	"golang.org/x/sys/unix"
 )
 // OpenFile opens a cgroup file in a given dir with given flags.
 // It is supposed to be used for cgroup files only, and returns
 // an error if the file is not a cgroup file.
 //
 // Arguments dir and file are joined together to form an absolute path
 // to a file being opened.
 func OpenFile(dir, file string, flags int) (*os.File, error) {
 	if dir == "" {
 		return nil, fmt.Errorf("no directory specified for %s", file)
 	}
 	return openFile(dir, file, flags)
 }
 // ReadFile reads data from a cgroup file in dir.
 // It is supposed to be used for cgroup files only.
 func ReadFile(dir, file string) (string, error) {
 	fd, err := OpenFile(dir, file, unix.O_RDONLY)
 	if err != nil {
 		return "", err
 	}
 	defer fd.Close()
 	var buf bytes.Buffer
 	_, err = buf.ReadFrom(fd)
 	return buf.String(), err
 }
 // WriteFile writes data to a cgroup file in dir.
 // It is supposed to be used for cgroup files only.
 func WriteFile(dir, file, data string) error {
 	fd, err := OpenFile(dir, file, unix.O_WRONLY)
 	if err != nil {
 		return err
 	}
 	defer fd.Close()
 	if err := retryingWriteFile(fd, data); err != nil {
 		// Having data in the error message helps in debugging.
 		return fmt.Errorf("failed to write %q: %w", data, err)
 	}
 	return nil
 }
 func retryingWriteFile(fd *os.File, data string) error {
 	for {
 		_, err := fd.Write([]byte(data))
 		if errors.Is(err, unix.EINTR) {
 			logrus.Infof("interrupted while writing %s to %s", data, fd.Name())
 			continue
 		}
 		return err
 	}
 }
 const (
 	cgroupfsDir    = "/sys/fs/cgroup"
 	cgroupfsPrefix = cgroupfsDir + "/"
 )
 var (
 	// TestMode is set to true by unit tests that need "fake" cgroupfs.
 	TestMode bool
 	cgroupFd     int = -1
 	prepOnce     sync.Once
 	prepErr      error
 	resolveFlags uint64
 )
 func prepareOpenat2() error {
 	prepOnce.Do(func() {
 		fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
 			Flags: unix.O_DIRECTORY | unix.O_PATH,
 		})
 		if err != nil {
 			prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
 			if err != unix.ENOSYS { //nolint:errorlint // unix errors are bare
 				logrus.Warnf("falling back to securejoin: %s", prepErr)
 			} else {
 				logrus.Debug("openat2 not available, falling back to securejoin")
 			}
 			return
 		}
 		var st unix.Statfs_t
 		if err = unix.Fstatfs(fd, &st); err != nil {
 			prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err}
 			logrus.Warnf("falling back to securejoin: %s", prepErr)
 			return
 		}
 		cgroupFd = fd
 		resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS
 		if st.Type == unix.CGROUP2_SUPER_MAGIC {
 			// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
 			resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS
 		}
 	})
 	return prepErr
 }
 func openFile(dir, file string, flags int) (*os.File, error) {
 	mode := os.FileMode(0)
 	if TestMode && flags&os.O_WRONLY != 0 {
 		// "emulate" cgroup fs for unit tests
 		flags |= os.O_TRUNC | os.O_CREATE
 		mode = 0o600
 	}
 	path := path.Join(dir, file)
 	if prepareOpenat2() != nil {
 		return openFallback(path, flags, mode)
 	}
 	relPath := strings.TrimPrefix(path, cgroupfsPrefix)
 	if len(relPath) == len(path) { // non-standard path, old system?
 		return openFallback(path, flags, mode)
 	}
 	fd, err := unix.Openat2(cgroupFd, relPath,
 		&unix.OpenHow{
 			Resolve: resolveFlags,
 			Flags:   uint64(flags) | unix.O_CLOEXEC,
 			Mode:    uint64(mode),
 		})
 	if err != nil {
 		err = &os.PathError{Op: "openat2", Path: path, Err: err}
 		// Check if cgroupFd is still opened to cgroupfsDir
 		// (happens when this package is incorrectly used
 		// across the chroot/pivot_root/mntns boundary, or
 		// when /sys/fs/cgroup is remounted).
 		//
 		// TODO: if such usage will ever be common, amend this
 		// to reopen cgroupFd and retry openat2.
 		fdStr := strconv.Itoa(cgroupFd)
 		fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr)
 		if fdDest != cgroupfsDir {
 			// Wrap the error so it is clear that cgroupFd
 			// is opened to an unexpected/wrong directory.
 			err = fmt.Errorf("cgroupFd %s unexpectedly opened to %s != %s: %w",
 				fdStr, fdDest, cgroupfsDir, err)
 		}
 		return nil, err
 	}
 	return os.NewFile(uintptr(fd), path), nil
 }
 var errNotCgroupfs = errors.New("not a cgroup file")
 // Can be changed by unit tests.
 var openFallback = openAndCheck
 // openAndCheck is used when openat2(2) is not available. It checks the opened
 // file is on cgroupfs, returning an error otherwise.
 func openAndCheck(path string, flags int, mode os.FileMode) (*os.File, error) {
 	fd, err := os.OpenFile(path, flags, mode)
 	if err != nil {
 		return nil, err
 	}
 	if TestMode {
 		return fd, nil
 	}
 	// Check this is a cgroupfs file.
 	var st unix.Statfs_t
 	if err := unix.Fstatfs(int(fd.Fd()), &st); err != nil {
 		_ = fd.Close()
 		return nil, &os.PathError{Op: "statfs", Path: path, Err: err}
 	}
 	if st.Type != unix.CGROUP_SUPER_MAGIC && st.Type != unix.CGROUP2_SUPER_MAGIC {
 		_ = fd.Close()
 		return nil, &os.PathError{Op: "open", Path: path, Err: errNotCgroupfs}
 	}
 	return fd, nil
 }
--- a/libcontainer/cgroups/file_test.go
+++ b/libcontainer/cgroups/file_test.go
@ -0,0 +1,73 @@
 package cgroups
 import (
 	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
 	"strconv"
 	"testing"
 	"time"
 )
 func TestWriteCgroupFileHandlesInterrupt(t *testing.T) {
 	const (
 		memoryCgroupMount = "/sys/fs/cgroup/memory"
 		memoryLimit       = "memory.limit_in_bytes"
 	)
 	if _, err := os.Stat(memoryCgroupMount); err != nil {
 		// most probably cgroupv2
 		t.Skip(err)
 	}
 	cgroupName := fmt.Sprintf("test-eint-%d", time.Now().Nanosecond())
 	cgroupPath := filepath.Join(memoryCgroupMount, cgroupName)
 	if err := os.MkdirAll(cgroupPath, 0o755); err != nil {
 		t.Fatal(err)
 	}
 	defer os.RemoveAll(cgroupPath)
 	if _, err := os.Stat(filepath.Join(cgroupPath, memoryLimit)); err != nil {
 		// either cgroupv2, or memory controller is not available
 		t.Skip(err)
 	}
 	for i := 0; i < 100000; i++ {
 		limit := 1024*1024 + i
 		if err := WriteFile(cgroupPath, memoryLimit, strconv.Itoa(limit)); err != nil {
 			t.Fatalf("Failed to write %d on attempt %d: %+v", limit, i, err)
 		}
 	}
 }
 func TestOpenat2(t *testing.T) {
 	if !IsCgroup2UnifiedMode() {
 		// The reason is many test cases below test opening files from
 		// the top-level directory, where cgroup v1 has no files.
 		t.Skip("test requires cgroup v2")
 	}
 	// Make sure we test openat2, not its fallback.
 	openFallback = func(_ string, _ int, _ os.FileMode) (*os.File, error) {
 		return nil, errors.New("fallback")
 	}
 	defer func() { openFallback = openAndCheck }()
 	for _, tc := range []struct{ dir, file string }{
 		{"/sys/fs/cgroup", "cgroup.controllers"},
 		{"/sys/fs/cgroup", "/cgroup.controllers"},
 		{"/sys/fs/cgroup/", "cgroup.controllers"},
 		{"/sys/fs/cgroup/", "/cgroup.controllers"},
 		{"/sys/fs/cgroup/user.slice", "cgroup.controllers"},
 		{"/sys/fs/cgroup/user.slice/", "/cgroup.controllers"},
 		{"/", "/sys/fs/cgroup/cgroup.controllers"},
 		{"/", "sys/fs/cgroup/cgroup.controllers"},
 		{"/sys/fs/cgroup/cgroup.controllers", ""},
 	} {
 		fd, err := OpenFile(tc.dir, tc.file, os.O_RDONLY)
 		if err != nil {
 			t.Errorf("case %+v: %v", tc, err)
 		}
 		fd.Close()
 	}
 }
--- a/libcontainer/cgroups/fs/apply_raw.go
+++ b/libcontainer/cgroups/fs/apply_raw.go
@ -1,411 +0,0 @@
 // +build linux
 package fs
 import (
 	"fmt"
 	"io"
 	"os"
 	"path/filepath"
 	"sync"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
 	"github.com/pkg/errors"
 	"golang.org/x/sys/unix"
 )
 var (
 	subsystemsLegacy = subsystemSet{
 		&CpusetGroup{},
 		&DevicesGroup{},
 		&MemoryGroup{},
 		&CpuGroup{},
 		&CpuacctGroup{},
 		&PidsGroup{},
 		&BlkioGroup{},
 		&HugetlbGroup{},
 		&NetClsGroup{},
 		&NetPrioGroup{},
 		&PerfEventGroup{},
 		&FreezerGroup{},
 		&NameGroup{GroupName: "name=systemd", Join: true},
 	}
 	HugePageSizes, _ = cgroups.GetHugePageSize()
 )
 var errSubsystemDoesNotExist = fmt.Errorf("cgroup: subsystem does not exist")
 type subsystemSet []subsystem
 func (s subsystemSet) Get(name string) (subsystem, error) {
 	for _, ss := range s {
 		if ss.Name() == name {
 			return ss, nil
 		}
 	}
 	return nil, errSubsystemDoesNotExist
 }
 type subsystem interface {
 	// Name returns the name of the subsystem.
 	Name() string
 	// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
 	GetStats(path string, stats *cgroups.Stats) error
 	// Removes the cgroup represented by 'cgroupData'.
 	Remove(*cgroupData) error
 	// Creates and joins the cgroup represented by 'cgroupData'.
 	Apply(*cgroupData) error
 	// Set the cgroup represented by cgroup.
 	Set(path string, cgroup *configs.Cgroup) error
 }
 type Manager struct {
 	mu       sync.Mutex
 	Cgroups  *configs.Cgroup
 	Rootless bool // ignore permission-related errors
 	Paths    map[string]string
 }
 // The absolute path to the root of the cgroup hierarchies.
 var cgroupRootLock sync.Mutex
 var cgroupRoot string
 // Gets the cgroupRoot.
 func getCgroupRoot() (string, error) {
 	cgroupRootLock.Lock()
 	defer cgroupRootLock.Unlock()
 	if cgroupRoot != "" {
 		return cgroupRoot, nil
 	}
 	root, err := cgroups.FindCgroupMountpointDir()
 	if err != nil {
 		return "", err
 	}
 	if _, err := os.Stat(root); err != nil {
 		return "", err
 	}
 	cgroupRoot = root
 	return cgroupRoot, nil
 }
 type cgroupData struct {
 	root      string
 	innerPath string
 	config    *configs.Cgroup
 	pid       int
 }
 // isIgnorableError returns whether err is a permission error (in the loose
 // sense of the word). This includes EROFS (which for an unprivileged user is
 // basically a permission error) and EACCES (for similar reasons) as well as
 // the normal EPERM.
 func isIgnorableError(rootless bool, err error) bool {
 	// We do not ignore errors if we are root.
 	if !rootless {
 		return false
 	}
 	// Is it an ordinary EPERM?
 	if os.IsPermission(errors.Cause(err)) {
 		return true
 	}
 	// Try to handle other errnos.
 	var errno error
 	switch err := errors.Cause(err).(type) {
 	case *os.PathError:
 		errno = err.Err
 	case *os.LinkError:
 		errno = err.Err
 	case *os.SyscallError:
 		errno = err.Err
 	}
 	return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
 }
 func (m *Manager) getSubsystems() subsystemSet {
 	return subsystemsLegacy
 }
 func (m *Manager) Apply(pid int) (err error) {
 	if m.Cgroups == nil {
 		return nil
 	}
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	var c = m.Cgroups
 	d, err := getCgroupData(m.Cgroups, pid)
 	if err != nil {
 		return err
 	}
 	m.Paths = make(map[string]string)
 	if c.Paths != nil {
 		for name, path := range c.Paths {
 			_, err := d.path(name)
 			if err != nil {
 				if cgroups.IsNotFound(err) {
 					continue
 				}
 				return err
 			}
 			m.Paths[name] = path
 		}
 		return cgroups.EnterPid(m.Paths, pid)
 	}
 	for _, sys := range m.getSubsystems() {
 		// TODO: Apply should, ideally, be reentrant or be broken up into a separate
 		// create and join phase so that the cgroup hierarchy for a container can be
 		// created then join consists of writing the process pids to cgroup.procs
 		p, err := d.path(sys.Name())
 		if err != nil {
 			// The non-presence of the devices subsystem is
 			// considered fatal for security reasons.
 			if cgroups.IsNotFound(err) && sys.Name() != "devices" {
 				continue
 			}
 			return err
 		}
 		m.Paths[sys.Name()] = p
 		if err := sys.Apply(d); err != nil {
 			// In the case of rootless (including euid=0 in userns), where an explicit cgroup path hasn't
 			// been set, we don't bail on error in case of permission problems.
 			// Cases where limits have been set (and we couldn't create our own
 			// cgroup) are handled by Set.
 			if isIgnorableError(m.Rootless, err) && m.Cgroups.Path == "" {
 				delete(m.Paths, sys.Name())
 				continue
 			}
 			return err
 		}
 	}
 	return nil
 }
 func (m *Manager) Destroy() error {
 	if m.Cgroups == nil || m.Cgroups.Paths != nil {
 		return nil
 	}
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	if err := cgroups.RemovePaths(m.Paths); err != nil {
 		return err
 	}
 	m.Paths = make(map[string]string)
 	return nil
 }
 func (m *Manager) GetPaths() map[string]string {
 	m.mu.Lock()
 	paths := m.Paths
 	m.mu.Unlock()
 	return paths
 }
 func (m *Manager) GetUnifiedPath() (string, error) {
 	return "", errors.New("unified path is only supported when running in unified mode")
 }
 func (m *Manager) GetStats() (*cgroups.Stats, error) {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	stats := cgroups.NewStats()
 	for name, path := range m.Paths {
 		sys, err := m.getSubsystems().Get(name)
 		if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
 			continue
 		}
 		if err := sys.GetStats(path, stats); err != nil {
 			return nil, err
 		}
 	}
 	return stats, nil
 }
 func (m *Manager) Set(container *configs.Config) error {
 	if container.Cgroups == nil {
 		return nil
 	}
 	// If Paths are set, then we are just joining cgroups paths
 	// and there is no need to set any values.
 	if m.Cgroups != nil && m.Cgroups.Paths != nil {
 		return nil
 	}
 	paths := m.GetPaths()
 	for _, sys := range m.getSubsystems() {
 		path := paths[sys.Name()]
 		if err := sys.Set(path, container.Cgroups); err != nil {
 			if m.Rootless && sys.Name() == "devices" {
 				continue
 			}
 			// When m.Rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
 			// However, errors from other subsystems are not ignored.
 			// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
 			if path == "" {
 				// We never created a path for this cgroup, so we cannot set
 				// limits for it (though we have already tried at this point).
 				return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
 			}
 			return err
 		}
 	}
 	if m.Paths["cpu"] != "" {
 		if err := CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
 			return err
 		}
 	}
 	return nil
 }
 // Freeze toggles the container's freezer cgroup depending on the state
 // provided
 func (m *Manager) Freeze(state configs.FreezerState) error {
 	if m.Cgroups == nil {
 		return errors.New("cannot toggle freezer: cgroups not configured for container")
 	}
 	paths := m.GetPaths()
 	dir := paths["freezer"]
 	prevState := m.Cgroups.Resources.Freezer
 	m.Cgroups.Resources.Freezer = state
 	freezer, err := m.getSubsystems().Get("freezer")
 	if err != nil {
 		return err
 	}
 	err = freezer.Set(dir, m.Cgroups)
 	if err != nil {
 		m.Cgroups.Resources.Freezer = prevState
 		return err
 	}
 	return nil
 }
 func (m *Manager) GetPids() ([]int, error) {
 	paths := m.GetPaths()
 	return cgroups.GetPids(paths["devices"])
 }
 func (m *Manager) GetAllPids() ([]int, error) {
 	paths := m.GetPaths()
 	return cgroups.GetAllPids(paths["devices"])
 }
 func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
 	root, err := getCgroupRoot()
 	if err != nil {
 		return nil, err
 	}
 	if (c.Name != "" || c.Parent != "") && c.Path != "" {
 		return nil, fmt.Errorf("cgroup: either Path or Name and Parent should be used")
 	}
 	// XXX: Do not remove this code. Path safety is important! -- cyphar
 	cgPath := libcontainerUtils.CleanPath(c.Path)
 	cgParent := libcontainerUtils.CleanPath(c.Parent)
 	cgName := libcontainerUtils.CleanPath(c.Name)
 	innerPath := cgPath
 	if innerPath == "" {
 		innerPath = filepath.Join(cgParent, cgName)
 	}
 	return &cgroupData{
 		root:      root,
 		innerPath: innerPath,
 		config:    c,
 		pid:       pid,
 	}, nil
 }
 func (raw *cgroupData) path(subsystem string) (string, error) {
 	mnt, err := cgroups.FindCgroupMountpoint(raw.root, subsystem)
 	// If we didn't mount the subsystem, there is no point we make the path.
 	if err != nil {
 		return "", err
 	}
 	// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
 	if filepath.IsAbs(raw.innerPath) {
 		// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
 		return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
 	}
 	// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
 	// process could in container and shared pid namespace with host, and
 	// /proc/1/cgroup could point to whole other world of cgroups.
 	parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
 	if err != nil {
 		return "", err
 	}
 	return filepath.Join(parentPath, raw.innerPath), nil
 }
 func (raw *cgroupData) join(subsystem string) (string, error) {
 	path, err := raw.path(subsystem)
 	if err != nil {
 		return "", err
 	}
 	if err := os.MkdirAll(path, 0755); err != nil {
 		return "", err
 	}
 	if err := cgroups.WriteCgroupProc(path, raw.pid); err != nil {
 		return "", err
 	}
 	return path, nil
 }
 func removePath(p string, err error) error {
 	if err != nil {
 		return err
 	}
 	if p != "" {
 		return os.RemoveAll(p)
 	}
 	return nil
 }
 func CheckCpushares(path string, c uint64) error {
 	var cpuShares uint64
 	if c == 0 {
 		return nil
 	}
 	fd, err := os.Open(filepath.Join(path, "cpu.shares"))
 	if err != nil {
 		return err
 	}
 	defer fd.Close()
 	_, err = fmt.Fscanf(fd, "%d", &cpuShares)
 	if err != nil && err != io.EOF {
 		return err
 	}
 	if c > cpuShares {
 		return fmt.Errorf("The maximum allowed cpu-shares is %d", cpuShares)
 	} else if c < cpuShares {
 		return fmt.Errorf("The minimum allowed cpu-shares is %d", cpuShares)
 	}
 	return nil
 }
 func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
 	return m.Cgroups, nil
 }
--- a/libcontainer/cgroups/fs/apply_raw_test.go
+++ b/libcontainer/cgroups/fs/apply_raw_test.go
@ -1,297 +0,0 @@
 // +build linux
 package fs
 import (
 	"path/filepath"
 	"strings"
 	"testing"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
 func TestInvalidCgroupPath(t *testing.T) {
 	if cgroups.IsCgroup2UnifiedMode() {
 		t.Skip("cgroup v1 is not supported")
 	}
 	root, err := getCgroupRoot()
 	if err != nil {
 		t.Errorf("couldn't get cgroup root: %v", err)
 	}
 	config := &configs.Cgroup{
 		Path: "../../../../../../../../../../some/path",
 	}
 	data, err := getCgroupData(config, 0)
 	if err != nil {
 		t.Errorf("couldn't get cgroup data: %v", err)
 	}
 	// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
 	if strings.HasPrefix(data.innerPath, "..") {
 		t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
 	}
 	// Double-check, using an actual cgroup.
 	deviceRoot := filepath.Join(root, "devices")
 	devicePath, err := data.path("devices")
 	if err != nil {
 		t.Errorf("couldn't get cgroup path: %v", err)
 	}
 	if !strings.HasPrefix(devicePath, deviceRoot) {
 		t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
 	}
 }
 func TestInvalidAbsoluteCgroupPath(t *testing.T) {
 	if cgroups.IsCgroup2UnifiedMode() {
 		t.Skip("cgroup v1 is not supported")
 	}
 	root, err := getCgroupRoot()
 	if err != nil {
 		t.Errorf("couldn't get cgroup root: %v", err)
 	}
 	config := &configs.Cgroup{
 		Path: "/../../../../../../../../../../some/path",
 	}
 	data, err := getCgroupData(config, 0)
 	if err != nil {
 		t.Errorf("couldn't get cgroup data: %v", err)
 	}
 	// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
 	if strings.HasPrefix(data.innerPath, "..") {
 		t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
 	}
 	// Double-check, using an actual cgroup.
 	deviceRoot := filepath.Join(root, "devices")
 	devicePath, err := data.path("devices")
 	if err != nil {
 		t.Errorf("couldn't get cgroup path: %v", err)
 	}
 	if !strings.HasPrefix(devicePath, deviceRoot) {
 		t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
 	}
 }
 // XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
 func TestInvalidCgroupParent(t *testing.T) {
 	if cgroups.IsCgroup2UnifiedMode() {
 		t.Skip("cgroup v1 is not supported")
 	}
 	root, err := getCgroupRoot()
 	if err != nil {
 		t.Errorf("couldn't get cgroup root: %v", err)
 	}
 	config := &configs.Cgroup{
 		Parent: "../../../../../../../../../../some/path",
 		Name:   "name",
 	}
 	data, err := getCgroupData(config, 0)
 	if err != nil {
 		t.Errorf("couldn't get cgroup data: %v", err)
 	}
 	// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
 	if strings.HasPrefix(data.innerPath, "..") {
 		t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
 	}
 	// Double-check, using an actual cgroup.
 	deviceRoot := filepath.Join(root, "devices")
 	devicePath, err := data.path("devices")
 	if err != nil {
 		t.Errorf("couldn't get cgroup path: %v", err)
 	}
 	if !strings.HasPrefix(devicePath, deviceRoot) {
 		t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
 	}
 }
 // XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
 func TestInvalidAbsoluteCgroupParent(t *testing.T) {
 	if cgroups.IsCgroup2UnifiedMode() {
 		t.Skip("cgroup v1 is not supported")
 	}
 	root, err := getCgroupRoot()
 	if err != nil {
 		t.Errorf("couldn't get cgroup root: %v", err)
 	}
 	config := &configs.Cgroup{
 		Parent: "/../../../../../../../../../../some/path",
 		Name:   "name",
 	}
 	data, err := getCgroupData(config, 0)
 	if err != nil {
 		t.Errorf("couldn't get cgroup data: %v", err)
 	}
 	// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
 	if strings.HasPrefix(data.innerPath, "..") {
 		t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
 	}
 	// Double-check, using an actual cgroup.
 	deviceRoot := filepath.Join(root, "devices")
 	devicePath, err := data.path("devices")
 	if err != nil {
 		t.Errorf("couldn't get cgroup path: %v", err)
 	}
 	if !strings.HasPrefix(devicePath, deviceRoot) {
 		t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
 	}
 }
 // XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
 func TestInvalidCgroupName(t *testing.T) {
 	if cgroups.IsCgroup2UnifiedMode() {
 		t.Skip("cgroup v1 is not supported")
 	}
 	root, err := getCgroupRoot()
 	if err != nil {
 		t.Errorf("couldn't get cgroup root: %v", err)
 	}
 	config := &configs.Cgroup{
 		Parent: "parent",
 		Name:   "../../../../../../../../../../some/path",
 	}
 	data, err := getCgroupData(config, 0)
 	if err != nil {
 		t.Errorf("couldn't get cgroup data: %v", err)
 	}
 	// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
 	if strings.HasPrefix(data.innerPath, "..") {
 		t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
 	}
 	// Double-check, using an actual cgroup.
 	deviceRoot := filepath.Join(root, "devices")
 	devicePath, err := data.path("devices")
 	if err != nil {
 		t.Errorf("couldn't get cgroup path: %v", err)
 	}
 	if !strings.HasPrefix(devicePath, deviceRoot) {
 		t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
 	}
 }
 // XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
 func TestInvalidAbsoluteCgroupName(t *testing.T) {
 	if cgroups.IsCgroup2UnifiedMode() {
 		t.Skip("cgroup v1 is not supported")
 	}
 	root, err := getCgroupRoot()
 	if err != nil {
 		t.Errorf("couldn't get cgroup root: %v", err)
 	}
 	config := &configs.Cgroup{
 		Parent: "parent",
 		Name:   "/../../../../../../../../../../some/path",
 	}
 	data, err := getCgroupData(config, 0)
 	if err != nil {
 		t.Errorf("couldn't get cgroup data: %v", err)
 	}
 	// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
 	if strings.HasPrefix(data.innerPath, "..") {
 		t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
 	}
 	// Double-check, using an actual cgroup.
 	deviceRoot := filepath.Join(root, "devices")
 	devicePath, err := data.path("devices")
 	if err != nil {
 		t.Errorf("couldn't get cgroup path: %v", err)
 	}
 	if !strings.HasPrefix(devicePath, deviceRoot) {
 		t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
 	}
 }
 // XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
 func TestInvalidCgroupNameAndParent(t *testing.T) {
 	if cgroups.IsCgroup2UnifiedMode() {
 		t.Skip("cgroup v1 is not supported")
 	}
 	root, err := getCgroupRoot()
 	if err != nil {
 		t.Errorf("couldn't get cgroup root: %v", err)
 	}
 	config := &configs.Cgroup{
 		Parent: "../../../../../../../../../../some/path",
 		Name:   "../../../../../../../../../../some/path",
 	}
 	data, err := getCgroupData(config, 0)
 	if err != nil {
 		t.Errorf("couldn't get cgroup data: %v", err)
 	}
 	// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
 	if strings.HasPrefix(data.innerPath, "..") {
 		t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
 	}
 	// Double-check, using an actual cgroup.
 	deviceRoot := filepath.Join(root, "devices")
 	devicePath, err := data.path("devices")
 	if err != nil {
 		t.Errorf("couldn't get cgroup path: %v", err)
 	}
 	if !strings.HasPrefix(devicePath, deviceRoot) {
 		t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
 	}
 }
 // XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
 func TestInvalidAbsoluteCgroupNameAndParent(t *testing.T) {
 	if cgroups.IsCgroup2UnifiedMode() {
 		t.Skip("cgroup v1 is not supported")
 	}
 	root, err := getCgroupRoot()
 	if err != nil {
 		t.Errorf("couldn't get cgroup root: %v", err)
 	}
 	config := &configs.Cgroup{
 		Parent: "/../../../../../../../../../../some/path",
 		Name:   "/../../../../../../../../../../some/path",
 	}
 	data, err := getCgroupData(config, 0)
 	if err != nil {
 		t.Errorf("couldn't get cgroup data: %v", err)
 	}
 	// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
 	if strings.HasPrefix(data.innerPath, "..") {
 		t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
 	}
 	// Double-check, using an actual cgroup.
 	deviceRoot := filepath.Join(root, "devices")
 	devicePath, err := data.path("devices")
 	if err != nil {
 		t.Errorf("couldn't get cgroup path: %v", err)
 	}
 	if !strings.HasPrefix(devicePath, deviceRoot) {
 		t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
 	}
 }
--- a/libcontainer/cgroups/fs/blkio.go
+++ b/libcontainer/cgroups/fs/blkio.go
@ -1,72 +1,71 @@
 // +build linux
 package fs
 import (
 	"bufio"
 	"fmt"
 	"os"
 	"path/filepath"
 	"strconv"
 	"strings"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
 type BlkioGroup struct {
 	weightFilename       string
 	weightDeviceFilename string
 }
 func (s *BlkioGroup) Name() string {
 	return "blkio"
 }
-func (s *BlkioGroup) Apply(d *cgroupData) error {
+func (s *BlkioGroup) Apply(path string, _ *configs.Resources, pid int) error {
-	_, err := d.join("blkio")
+	return apply(path, pid)
 	if err != nil && !cgroups.IsNotFound(err) {
 		return err
 	}
 	return nil
 }
-func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error {
+func (s *BlkioGroup) Set(path string, r *configs.Resources) error {
-	if cgroup.Resources.BlkioWeight != 0 {
+	s.detectWeightFilenames(path)
-		if err := fscommon.WriteFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil {
+	if r.BlkioWeight != 0 {
 		if err := cgroups.WriteFile(path, s.weightFilename, strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil {
 			return err
 		}
 	}
-	if cgroup.Resources.BlkioLeafWeight != 0 {
+	if r.BlkioLeafWeight != 0 {
-		if err := fscommon.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioLeafWeight), 10)); err != nil {
+		if err := cgroups.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(r.BlkioLeafWeight), 10)); err != nil {
 			return err
 		}
 	}
-	for _, wd := range cgroup.Resources.BlkioWeightDevice {
+	for _, wd := range r.BlkioWeightDevice {
-		if err := fscommon.WriteFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
+		if wd.Weight != 0 {
-			return err
+			if err := cgroups.WriteFile(path, s.weightDeviceFilename, wd.WeightString()); err != nil {
 				return err
 			}
 		}
-		if err := fscommon.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
+		if wd.LeafWeight != 0 {
 			if err := cgroups.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
 				return err
 			}
 		}
 	}
 	for _, td := range r.BlkioThrottleReadBpsDevice {
 		if err := cgroups.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
 			return err
 		}
 	}
-	for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice {
+	for _, td := range r.BlkioThrottleWriteBpsDevice {
-		if err := fscommon.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
+		if err := cgroups.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
 			return err
 		}
 	}
-	for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice {
+	for _, td := range r.BlkioThrottleReadIOPSDevice {
-		if err := fscommon.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
+		if err := cgroups.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
 			return err
 		}
 	}
-	for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice {
+	for _, td := range r.BlkioThrottleWriteIOPSDevice {
-		if err := fscommon.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
+		if err := cgroups.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
 			return err
 		}
 	}
 	for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice {
 		if err := fscommon.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
 			return err
 		}
 	}
@ -74,10 +73,6 @@ func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error {
 	return nil
 }
 func (s *BlkioGroup) Remove(d *cgroupData) error {
 	return removePath(d.path("blkio"))
 }
 /*
 examples:
@ -113,9 +108,9 @@ func splitBlkioStatLine(r rune) bool {
 	return r == ' ' || r == ':'
 }
-func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) {
+func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) {
 	var blkioStats []cgroups.BlkioStatEntry
-	f, err := os.Open(path)
+	f, err := cgroups.OpenFile(dir, file, os.O_RDONLY)
 	if err != nil {
 		if os.IsNotExist(err) {
 			return blkioStats, nil
@ -133,19 +128,19 @@ func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) {
 				// skip total line
 				continue
 			} else {
-				return nil, fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text())
+				return nil, malformedLine(dir, file, sc.Text())
 			}
 		}
 		v, err := strconv.ParseUint(fields[0], 10, 64)
 		if err != nil {
-			return nil, err
+			return nil, &parseError{Path: dir, File: file, Err: err}
 		}
 		major := v
 		v, err = strconv.ParseUint(fields[1], 10, 64)
 		if err != nil {
-			return nil, err
+			return nil, &parseError{Path: dir, File: file, Err: err}
 		}
 		minor := v
@ -157,82 +152,160 @@ func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) {
 		}
 		v, err = strconv.ParseUint(fields[valueField], 10, 64)
 		if err != nil {
-			return nil, err
+			return nil, &parseError{Path: dir, File: file, Err: err}
 		}
 		blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v})
 	}
 	if err := sc.Err(); err != nil {
 		return nil, &parseError{Path: dir, File: file, Err: err}
 	}
 	return blkioStats, nil
 }
 func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
-	// Try to read CFQ stats available on all CFQ enabled kernels first
+	type blkioStatInfo struct {
-	if blkioStats, err := getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err == nil && blkioStats != nil {
+		filename            string
-		return getCFQStats(path, stats)
+		blkioStatEntriesPtr *[]cgroups.BlkioStatEntry
 	}
 	bfqDebugStats := []blkioStatInfo{
 		{
 			filename:            "blkio.bfq.sectors_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
 		},
 		{
 			filename:            "blkio.bfq.io_service_time_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive,
 		},
 		{
 			filename:            "blkio.bfq.io_wait_time_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive,
 		},
 		{
 			filename:            "blkio.bfq.io_merged_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive,
 		},
 		{
 			filename:            "blkio.bfq.io_queued_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive,
 		},
 		{
 			filename:            "blkio.bfq.time_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive,
 		},
 		{
 			filename:            "blkio.bfq.io_serviced_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
 		},
 		{
 			filename:            "blkio.bfq.io_service_bytes_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
 		},
 	}
 	bfqStats := []blkioStatInfo{
 		{
 			filename:            "blkio.bfq.io_serviced_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
 		},
 		{
 			filename:            "blkio.bfq.io_service_bytes_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
 		},
 	}
 	cfqStats := []blkioStatInfo{
 		{
 			filename:            "blkio.sectors_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive,
 		},
 		{
 			filename:            "blkio.io_service_time_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive,
 		},
 		{
 			filename:            "blkio.io_wait_time_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive,
 		},
 		{
 			filename:            "blkio.io_merged_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive,
 		},
 		{
 			filename:            "blkio.io_queued_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive,
 		},
 		{
 			filename:            "blkio.time_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive,
 		},
 		{
 			filename:            "blkio.io_serviced_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
 		},
 		{
 			filename:            "blkio.io_service_bytes_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
 		},
 	}
 	throttleRecursiveStats := []blkioStatInfo{
 		{
 			filename:            "blkio.throttle.io_serviced_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
 		},
 		{
 			filename:            "blkio.throttle.io_service_bytes_recursive",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
 		},
 	}
 	baseStats := []blkioStatInfo{
 		{
 			filename:            "blkio.throttle.io_serviced",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive,
 		},
 		{
 			filename:            "blkio.throttle.io_service_bytes",
 			blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive,
 		},
 	}
 	orderedStats := [][]blkioStatInfo{
 		bfqDebugStats,
 		bfqStats,
 		cfqStats,
 		throttleRecursiveStats,
 		baseStats,
 	}
 	return getStats(path, stats) // Use generic stats as fallback
 }
 func getCFQStats(path string, stats *cgroups.Stats) error {
 	var blkioStats []cgroups.BlkioStatEntry
 	var err error
-	if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.sectors_recursive")); err != nil {
+	for _, statGroup := range orderedStats {
-		return err
+		for i, statInfo := range statGroup {
 			if blkioStats, err = getBlkioStat(path, statInfo.filename); err != nil || blkioStats == nil {
 				// if error occurs on first file, move to next group
 				if i == 0 {
 					break
 				}
 				return err
 			}
 			*statInfo.blkioStatEntriesPtr = blkioStats
 			// finish if all stats are gathered
 			if i == len(statGroup)-1 {
 				return nil
 			}
 		}
 	}
 	stats.BlkioStats.SectorsRecursive = blkioStats
 	if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_bytes_recursive")); err != nil {
 		return err
 	}
 	stats.BlkioStats.IoServiceBytesRecursive = blkioStats
 	if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err != nil {
 		return err
 	}
 	stats.BlkioStats.IoServicedRecursive = blkioStats
 	if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_queued_recursive")); err != nil {
 		return err
 	}
 	stats.BlkioStats.IoQueuedRecursive = blkioStats
 	if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_time_recursive")); err != nil {
 		return err
 	}
 	stats.BlkioStats.IoServiceTimeRecursive = blkioStats
 	if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_wait_time_recursive")); err != nil {
 		return err
 	}
 	stats.BlkioStats.IoWaitTimeRecursive = blkioStats
 	if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_merged_recursive")); err != nil {
 		return err
 	}
 	stats.BlkioStats.IoMergedRecursive = blkioStats
 	if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.time_recursive")); err != nil {
 		return err
 	}
 	stats.BlkioStats.IoTimeRecursive = blkioStats
 	return nil
 }
-func getStats(path string, stats *cgroups.Stats) error {
+func (s *BlkioGroup) detectWeightFilenames(path string) {
-	var blkioStats []cgroups.BlkioStatEntry
+	if s.weightFilename != "" {
-	var err error
+		// Already detected.
-
+		return
 	if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_service_bytes")); err != nil {
 		return err
 	}
-	stats.BlkioStats.IoServiceBytesRecursive = blkioStats
+	if cgroups.PathExists(filepath.Join(path, "blkio.weight")) {
-
+		s.weightFilename = "blkio.weight"
-	if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_serviced")); err != nil {
+		s.weightDeviceFilename = "blkio.weight_device"
-		return err
+	} else {
 		s.weightFilename = "blkio.bfq.weight"
 		s.weightDeviceFilename = "blkio.bfq.weight_device"
 	}
 	stats.BlkioStats.IoServicedRecursive = blkioStats
 	return nil
 }
--- a/libcontainer/cgroups/fs/blkio_test.go
+++ b/libcontainer/cgroups/fs/blkio_test.go
--- a/libcontainer/cgroups/fs/cpu.go
+++ b/libcontainer/cgroups/fs/cpu.go
@ -1,94 +1,105 @@
 // +build linux
 package fs
 import (
 	"bufio"
 	"errors"
 	"fmt"
 	"os"
 	"path/filepath"
 	"strconv"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"golang.org/x/sys/unix"
 )
-type CpuGroup struct {
+type CpuGroup struct{}
 }
 func (s *CpuGroup) Name() string {
 	return "cpu"
 }
-func (s *CpuGroup) Apply(d *cgroupData) error {
+func (s *CpuGroup) Apply(path string, r *configs.Resources, pid int) error {
-	// We always want to join the cpu group, to allow fair cpu scheduling
+	if err := os.MkdirAll(path, 0o755); err != nil {
 	// on a container basis
 	path, err := d.path("cpu")
 	if err != nil && !cgroups.IsNotFound(err) {
 		return err
 	}
 	return s.ApplyDir(path, d.config, d.pid)
 }
 func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error {
 	// This might happen if we have no cpu cgroup mounted.
 	// Just do nothing and don't fail.
 	if path == "" {
 		return nil
 	}
 	if err := os.MkdirAll(path, 0755); err != nil {
 		return err
 	}
 	// We should set the real-Time group scheduling settings before moving
 	// in the process because if the process is already in SCHED_RR mode
 	// and no RT bandwidth is set, adding it will fail.
-	if err := s.SetRtSched(path, cgroup); err != nil {
+	if err := s.SetRtSched(path, r); err != nil {
 		return err
 	}
-	// because we are not using d.join we need to place the pid into the procs file
+	// Since we are not using apply(), we need to place the pid
-	// unlike the other subsystems
+	// into the procs file.
 	return cgroups.WriteCgroupProc(path, pid)
 }
-func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
+func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error {
-	if cgroup.Resources.CpuRtPeriod != 0 {
+	if r.CpuRtPeriod != 0 {
-		if err := fscommon.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
+		if err := cgroups.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(r.CpuRtPeriod, 10)); err != nil {
 			return err
 		}
 	}
-	if cgroup.Resources.CpuRtRuntime != 0 {
+	if r.CpuRtRuntime != 0 {
-		if err := fscommon.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
+		if err := cgroups.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil {
 			return err
 		}
 	}
 	return nil
 }
-func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
+func (s *CpuGroup) Set(path string, r *configs.Resources) error {
-	if cgroup.Resources.CpuShares != 0 {
+	if r.CpuShares != 0 {
-		if err := fscommon.WriteFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil {
+		shares := r.CpuShares
 		if err := cgroups.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil {
 			return err
 		}
-	}
+		// read it back
-	if cgroup.Resources.CpuPeriod != 0 {
+		sharesRead, err := fscommon.GetCgroupParamUint(path, "cpu.shares")
-		if err := fscommon.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
+		if err != nil {
 			return err
 		}
-	}
+		// ... and check
-	if cgroup.Resources.CpuQuota != 0 {
+		if shares > sharesRead {
-		if err := fscommon.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.Resources.CpuQuota, 10)); err != nil {
+			return fmt.Errorf("the maximum allowed cpu-shares is %d", sharesRead)
-			return err
+		} else if shares < sharesRead {
 			return fmt.Errorf("the minimum allowed cpu-shares is %d", sharesRead)
 		}
 	}
 	return s.SetRtSched(path, cgroup)
 }
-func (s *CpuGroup) Remove(d *cgroupData) error {
+	var period string
-	return removePath(d.path("cpu"))
+	if r.CpuPeriod != 0 {
 		period = strconv.FormatUint(r.CpuPeriod, 10)
 		if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil {
 			// Sometimes when the period to be set is smaller
 			// than the current one, it is rejected by the kernel
 			// (EINVAL) as old_quota/new_period exceeds the parent
 			// cgroup quota limit. If this happens and the quota is
 			// going to be set, ignore the error for now and retry
 			// after setting the quota.
 			if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 {
 				return err
 			}
 		} else {
 			period = ""
 		}
 	}
 	if r.CpuQuota != 0 {
 		if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil {
 			return err
 		}
 		if period != "" {
 			if err := cgroups.WriteFile(path, "cpu.cfs_period_us", period); err != nil {
 				return err
 			}
 		}
 	}
 	return s.SetRtSched(path, r)
 }
 func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
-	f, err := os.Open(filepath.Join(path, "cpu.stat"))
+	const file = "cpu.stat"
 	f, err := cgroups.OpenFile(path, file, os.O_RDONLY)
 	if err != nil {
 		if os.IsNotExist(err) {
 			return nil
@ -99,9 +110,9 @@ func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
 	sc := bufio.NewScanner(f)
 	for sc.Scan() {
-		t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
+		t, v, err := fscommon.ParseKeyValue(sc.Text())
 		if err != nil {
-			return err
+			return &parseError{Path: path, File: file, Err: err}
 		}
 		switch t {
 		case "nr_periods":
--- a/libcontainer/cgroups/fs/cpu_test.go
+++ b/libcontainer/cgroups/fs/cpu_test.go
@ -1,5 +1,3 @@
 // +build linux
 package fs
 import (
@ -9,40 +7,40 @@ import (
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
 func TestCpuSetShares(t *testing.T) {
-	helper := NewCgroupTestUtil("cpu", t)
+	path := tempDir(t, "cpu")
 	defer helper.cleanup()
 	const (
 		sharesBefore = 1024
 		sharesAfter  = 512
 	)
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"cpu.shares": strconv.Itoa(sharesBefore),
 	})
-	helper.CgroupData.config.Resources.CpuShares = sharesAfter
+	r := &configs.Resources{
 		CpuShares: sharesAfter,
 	}
 	cpu := &CpuGroup{}
-	if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
+	if err := cpu.Set(path, r); err != nil {
 		t.Fatal(err)
 	}
-	value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.shares")
+	value, err := fscommon.GetCgroupParamUint(path, "cpu.shares")
 	if err != nil {
-		t.Fatalf("Failed to parse cpu.shares - %s", err)
+		t.Fatal(err)
 	}
 	if value != sharesAfter {
 		t.Fatal("Got the wrong value, set cpu.shares failed.")
 	}
 }
 func TestCpuSetBandWidth(t *testing.T) {
-	helper := NewCgroupTestUtil("cpu", t)
+	path := tempDir(t, "cpu")
 	defer helper.cleanup()
 	const (
 		quotaBefore     = 8000
@ -55,47 +53,51 @@ func TestCpuSetBandWidth(t *testing.T) {
 		rtPeriodAfter   = 7000
 	)
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"cpu.cfs_quota_us":  strconv.Itoa(quotaBefore),
 		"cpu.cfs_period_us": strconv.Itoa(periodBefore),
 		"cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore),
 		"cpu.rt_period_us":  strconv.Itoa(rtPeriodBefore),
 	})
-	helper.CgroupData.config.Resources.CpuQuota = quotaAfter
+	r := &configs.Resources{
-	helper.CgroupData.config.Resources.CpuPeriod = periodAfter
+		CpuQuota:     quotaAfter,
-	helper.CgroupData.config.Resources.CpuRtRuntime = rtRuntimeAfter
+		CpuPeriod:    periodAfter,
-	helper.CgroupData.config.Resources.CpuRtPeriod = rtPeriodAfter
+		CpuRtRuntime: rtRuntimeAfter,
 		CpuRtPeriod:  rtPeriodAfter,
 	}
 	cpu := &CpuGroup{}
-	if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
+	if err := cpu.Set(path, r); err != nil {
 		t.Fatal(err)
 	}
-	quota, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.cfs_quota_us")
+	quota, err := fscommon.GetCgroupParamUint(path, "cpu.cfs_quota_us")
 	if err != nil {
-		t.Fatalf("Failed to parse cpu.cfs_quota_us - %s", err)
+		t.Fatal(err)
 	}
 	if quota != quotaAfter {
 		t.Fatal("Got the wrong value, set cpu.cfs_quota_us failed.")
 	}
-	period, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.cfs_period_us")
+	period, err := fscommon.GetCgroupParamUint(path, "cpu.cfs_period_us")
 	if err != nil {
-		t.Fatalf("Failed to parse cpu.cfs_period_us - %s", err)
+		t.Fatal(err)
 	}
 	if period != periodAfter {
 		t.Fatal("Got the wrong value, set cpu.cfs_period_us failed.")
 	}
-	rtRuntime, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us")
+
 	rtRuntime, err := fscommon.GetCgroupParamUint(path, "cpu.rt_runtime_us")
 	if err != nil {
-		t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err)
+		t.Fatal(err)
 	}
 	if rtRuntime != rtRuntimeAfter {
 		t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.")
 	}
-	rtPeriod, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us")
+
 	rtPeriod, err := fscommon.GetCgroupParamUint(path, "cpu.rt_period_us")
 	if err != nil {
-		t.Fatalf("Failed to parse cpu.rt_period_us - %s", err)
+		t.Fatal(err)
 	}
 	if rtPeriod != rtPeriodAfter {
 		t.Fatal("Got the wrong value, set cpu.rt_period_us failed.")
@ -103,8 +105,7 @@ func TestCpuSetBandWidth(t *testing.T) {
 }
 func TestCpuStats(t *testing.T) {
-	helper := NewCgroupTestUtil("cpu", t)
+	path := tempDir(t, "cpu")
 	defer helper.cleanup()
 	const (
 		nrPeriods     = 2000
@ -112,15 +113,15 @@ func TestCpuStats(t *testing.T) {
 		throttledTime = uint64(18446744073709551615)
 	)
-	cpuStatContent := fmt.Sprintf("nr_periods %d\n nr_throttled %d\n throttled_time %d\n",
+	cpuStatContent := fmt.Sprintf("nr_periods %d\nnr_throttled %d\nthrottled_time %d\n",
 		nrPeriods, nrThrottled, throttledTime)
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"cpu.stat": cpuStatContent,
 	})
 	cpu := &CpuGroup{}
 	actualStats := *cgroups.NewStats()
-	err := cpu.GetStats(helper.CgroupPath, &actualStats)
+	err := cpu.GetStats(path, &actualStats)
 	if err != nil {
 		t.Fatal(err)
 	}
@ -128,44 +129,43 @@ func TestCpuStats(t *testing.T) {
 	expectedStats := cgroups.ThrottlingData{
 		Periods:          nrPeriods,
 		ThrottledPeriods: nrThrottled,
-		ThrottledTime:    throttledTime}
+		ThrottledTime:    throttledTime,
 	}
 	expectThrottlingDataEquals(t, expectedStats, actualStats.CpuStats.ThrottlingData)
 }
 func TestNoCpuStatFile(t *testing.T) {
-	helper := NewCgroupTestUtil("cpu", t)
+	path := tempDir(t, "cpu")
 	defer helper.cleanup()
 	cpu := &CpuGroup{}
 	actualStats := *cgroups.NewStats()
-	err := cpu.GetStats(helper.CgroupPath, &actualStats)
+	err := cpu.GetStats(path, &actualStats)
 	if err != nil {
 		t.Fatal("Expected not to fail, but did")
 	}
 }
 func TestInvalidCpuStat(t *testing.T) {
-	helper := NewCgroupTestUtil("cpu", t)
+	path := tempDir(t, "cpu")
-	defer helper.cleanup()
+
 	cpuStatContent := `nr_periods 2000
 	nr_throttled 200
 	throttled_time fortytwo`
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"cpu.stat": cpuStatContent,
 	})
 	cpu := &CpuGroup{}
 	actualStats := *cgroups.NewStats()
-	err := cpu.GetStats(helper.CgroupPath, &actualStats)
+	err := cpu.GetStats(path, &actualStats)
 	if err == nil {
 		t.Fatal("Expected failed stat parsing.")
 	}
 }
 func TestCpuSetRtSchedAtApply(t *testing.T) {
-	helper := NewCgroupTestUtil("cpu", t)
+	path := tempDir(t, "cpu")
 	defer helper.cleanup()
 	const (
 		rtRuntimeBefore = 0
@ -174,35 +174,40 @@ func TestCpuSetRtSchedAtApply(t *testing.T) {
 		rtPeriodAfter   = 7000
 	)
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore),
 		"cpu.rt_period_us":  strconv.Itoa(rtPeriodBefore),
 	})
-	helper.CgroupData.config.Resources.CpuRtRuntime = rtRuntimeAfter
+	r := &configs.Resources{
-	helper.CgroupData.config.Resources.CpuRtPeriod = rtPeriodAfter
+		CpuRtRuntime: rtRuntimeAfter,
 		CpuRtPeriod:  rtPeriodAfter,
 	}
 	cpu := &CpuGroup{}
-	if err := cpu.ApplyDir(helper.CgroupPath, helper.CgroupData.config, 1234); err != nil {
+
 	if err := cpu.Apply(path, r, 1234); err != nil {
 		t.Fatal(err)
 	}
-	rtRuntime, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us")
+	rtRuntime, err := fscommon.GetCgroupParamUint(path, "cpu.rt_runtime_us")
 	if err != nil {
-		t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err)
+		t.Fatal(err)
 	}
 	if rtRuntime != rtRuntimeAfter {
 		t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.")
 	}
-	rtPeriod, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us")
+
 	rtPeriod, err := fscommon.GetCgroupParamUint(path, "cpu.rt_period_us")
 	if err != nil {
-		t.Fatalf("Failed to parse cpu.rt_period_us - %s", err)
+		t.Fatal(err)
 	}
 	if rtPeriod != rtPeriodAfter {
 		t.Fatal("Got the wrong value, set cpu.rt_period_us failed.")
 	}
-	pid, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cgroup.procs")
+
 	pid, err := fscommon.GetCgroupParamUint(path, "cgroup.procs")
 	if err != nil {
-		t.Fatalf("Failed to parse cgroup.procs - %s", err)
+		t.Fatal(err)
 	}
 	if pid != 1234 {
 		t.Fatal("Got the wrong value, set cgroup.procs failed.")
--- a/libcontainer/cgroups/fs/cpuacct.go
+++ b/libcontainer/cgroups/fs/cpuacct.go
@ -1,52 +1,51 @@
 // +build linux
 package fs
 import (
-	"fmt"
+	"bufio"
-	"io/ioutil"
+	"os"
 	"path/filepath"
 	"strconv"
 	"strings"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/system"
 )
 const (
-	cgroupCpuacctStat   = "cpuacct.stat"
+	cgroupCpuacctStat     = "cpuacct.stat"
 	cgroupCpuacctUsageAll = "cpuacct.usage_all"
 	nanosecondsInSecond = 1000000000
 	userModeColumn              = 1
 	kernelModeColumn            = 2
 	cuacctUsageAllColumnsNumber = 3
 	// The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
 	// on Linux it's a constant which is safe to be hard coded,
 	// so we can avoid using cgo here. For details, see:
 	// https://github.com/containerd/cgroups/pull/12
 	clockTicks uint64 = 100
 )
-var clockTicks = uint64(system.GetClockTicks())
+type CpuacctGroup struct{}
 type CpuacctGroup struct {
 }
 func (s *CpuacctGroup) Name() string {
 	return "cpuacct"
 }
-func (s *CpuacctGroup) Apply(d *cgroupData) error {
+func (s *CpuacctGroup) Apply(path string, _ *configs.Resources, pid int) error {
-	// we just want to join this group even though we don't set anything
+	return apply(path, pid)
 	if _, err := d.join("cpuacct"); err != nil && !cgroups.IsNotFound(err) {
 		return err
 	}
 	return nil
 }
-func (s *CpuacctGroup) Set(path string, cgroup *configs.Cgroup) error {
+func (s *CpuacctGroup) Set(_ string, _ *configs.Resources) error {
 	return nil
 }
 func (s *CpuacctGroup) Remove(d *cgroupData) error {
 	return removePath(d.path("cpuacct"))
 }
 func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
 	if !cgroups.PathExists(path) {
 		return nil
 	}
 	userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path)
 	if err != nil {
 		return err
@ -62,8 +61,15 @@ func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
 		return err
 	}
 	percpuUsageInKernelmode, percpuUsageInUsermode, err := getPercpuUsageInModes(path)
 	if err != nil {
 		return err
 	}
 	stats.CpuStats.CpuUsage.TotalUsage = totalUsage
 	stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage
 	stats.CpuStats.CpuUsage.PercpuUsageInKernelmode = percpuUsageInKernelmode
 	stats.CpuStats.CpuUsage.PercpuUsageInUsermode = percpuUsageInUsermode
 	stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage
 	stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage
 	return nil
@ -71,52 +77,90 @@ func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
 // Returns user and kernel usage breakdown in nanoseconds.
 func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
-	userModeUsage := uint64(0)
+	var userModeUsage, kernelModeUsage uint64
 	kernelModeUsage := uint64(0)
 	const (
 		userField   = "user"
 		systemField = "system"
 		file        = cgroupCpuacctStat
 	)
 	// Expected format:
 	// user <usage in ticks>
 	// system <usage in ticks>
-	data, err := ioutil.ReadFile(filepath.Join(path, cgroupCpuacctStat))
+	data, err := cgroups.ReadFile(path, file)
 	if err != nil {
 		return 0, 0, err
 	}
-	fields := strings.Fields(string(data))
+	// TODO: use strings.SplitN instead.
-	if len(fields) < 4 {
+	fields := strings.Fields(data)
-		return 0, 0, fmt.Errorf("failure - %s is expected to have at least 4 fields", filepath.Join(path, cgroupCpuacctStat))
+	if len(fields) < 4 || fields[0] != userField || fields[2] != systemField {
-	}
+		return 0, 0, malformedLine(path, file, data)
 	if fields[0] != userField {
 		return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField)
 	}
 	if fields[2] != systemField {
 		return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[2], cgroupCpuacctStat, systemField)
 	}
 	if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil {
-		return 0, 0, err
+		return 0, 0, &parseError{Path: path, File: file, Err: err}
 	}
 	if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil {
-		return 0, 0, err
+		return 0, 0, &parseError{Path: path, File: file, Err: err}
 	}
 	return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil
 }
 func getPercpuUsage(path string) ([]uint64, error) {
 	const file = "cpuacct.usage_percpu"
 	percpuUsage := []uint64{}
-	data, err := ioutil.ReadFile(filepath.Join(path, "cpuacct.usage_percpu"))
+	data, err := cgroups.ReadFile(path, file)
 	if err != nil {
 		return percpuUsage, err
 	}
-	for _, value := range strings.Fields(string(data)) {
+	// TODO: use strings.SplitN instead.
 	for _, value := range strings.Fields(data) {
 		value, err := strconv.ParseUint(value, 10, 64)
 		if err != nil {
-			return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err)
+			return percpuUsage, &parseError{Path: path, File: file, Err: err}
 		}
 		percpuUsage = append(percpuUsage, value)
 	}
 	return percpuUsage, nil
 }
 func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) {
 	usageKernelMode := []uint64{}
 	usageUserMode := []uint64{}
 	const file = cgroupCpuacctUsageAll
 	fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
 	if os.IsNotExist(err) {
 		return usageKernelMode, usageUserMode, nil
 	} else if err != nil {
 		return nil, nil, err
 	}
 	defer fd.Close()
 	scanner := bufio.NewScanner(fd)
 	scanner.Scan() // skipping header line
 	for scanner.Scan() {
 		lineFields := strings.SplitN(scanner.Text(), " ", cuacctUsageAllColumnsNumber+1)
 		if len(lineFields) != cuacctUsageAllColumnsNumber {
 			continue
 		}
 		usageInKernelMode, err := strconv.ParseUint(lineFields[kernelModeColumn], 10, 64)
 		if err != nil {
 			return nil, nil, &parseError{Path: path, File: file, Err: err}
 		}
 		usageKernelMode = append(usageKernelMode, usageInKernelMode)
 		usageInUserMode, err := strconv.ParseUint(lineFields[userModeColumn], 10, 64)
 		if err != nil {
 			return nil, nil, &parseError{Path: path, File: file, Err: err}
 		}
 		usageUserMode = append(usageUserMode, usageInUserMode)
 	}
 	if err := scanner.Err(); err != nil {
 		return nil, nil, &parseError{Path: path, File: file, Err: err}
 	}
 	return usageKernelMode, usageUserMode, nil
 }
--- a/libcontainer/cgroups/fs/cpuacct_test.go
+++ b/libcontainer/cgroups/fs/cpuacct_test.go
@ -0,0 +1,97 @@
 package fs
 import (
 	"reflect"
 	"testing"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 )
 const (
 	cpuAcctUsageContents       = "12262454190222160"
 	cpuAcctUsagePerCPUContents = "1564936537989058 1583937096487821 1604195415465681 1596445226820187 1481069084155629 1478735613864327 1477610593414743 1476362015778086"
 	cpuAcctStatContents        = "user 452278264\nsystem 291429664"
 	cpuAcctUsageAll            = `cpu user system
 	0 962250696038415 637727786389114
 	1 981956408513304 638197595421064
 	2 1002658817529022 638956774598358
 	3 994937703492523 637985531181620
 	4 874843781648690 638837766495476
 	5 872544369885276 638763309884944
 	6 870104915696359 640081778921247
 	7 870202363887496 638716766259495
 	`
 )
 func TestCpuacctStats(t *testing.T) {
 	path := tempDir(t, "cpuacct")
 	writeFileContents(t, path, map[string]string{
 		"cpuacct.usage":        cpuAcctUsageContents,
 		"cpuacct.usage_percpu": cpuAcctUsagePerCPUContents,
 		"cpuacct.stat":         cpuAcctStatContents,
 		"cpuacct.usage_all":    cpuAcctUsageAll,
 	})
 	cpuacct := &CpuacctGroup{}
 	actualStats := *cgroups.NewStats()
 	err := cpuacct.GetStats(path, &actualStats)
 	if err != nil {
 		t.Fatal(err)
 	}
 	expectedStats := cgroups.CpuUsage{
 		TotalUsage: uint64(12262454190222160),
 		PercpuUsage: []uint64{
 			1564936537989058, 1583937096487821, 1604195415465681, 1596445226820187,
 			1481069084155629, 1478735613864327, 1477610593414743, 1476362015778086,
 		},
 		PercpuUsageInKernelmode: []uint64{
 			637727786389114, 638197595421064, 638956774598358, 637985531181620,
 			638837766495476, 638763309884944, 640081778921247, 638716766259495,
 		},
 		PercpuUsageInUsermode: []uint64{
 			962250696038415, 981956408513304, 1002658817529022, 994937703492523,
 			874843781648690, 872544369885276, 870104915696359, 870202363887496,
 		},
 		UsageInKernelmode: (uint64(291429664) * nanosecondsInSecond) / clockTicks,
 		UsageInUsermode:   (uint64(452278264) * nanosecondsInSecond) / clockTicks,
 	}
 	if !reflect.DeepEqual(expectedStats, actualStats.CpuStats.CpuUsage) {
 		t.Errorf("Expected CPU usage %#v but found %#v\n",
 			expectedStats, actualStats.CpuStats.CpuUsage)
 	}
 }
 func TestCpuacctStatsWithoutUsageAll(t *testing.T) {
 	path := tempDir(t, "cpuacct")
 	writeFileContents(t, path, map[string]string{
 		"cpuacct.usage":        cpuAcctUsageContents,
 		"cpuacct.usage_percpu": cpuAcctUsagePerCPUContents,
 		"cpuacct.stat":         cpuAcctStatContents,
 	})
 	cpuacct := &CpuacctGroup{}
 	actualStats := *cgroups.NewStats()
 	err := cpuacct.GetStats(path, &actualStats)
 	if err != nil {
 		t.Fatal(err)
 	}
 	expectedStats := cgroups.CpuUsage{
 		TotalUsage: uint64(12262454190222160),
 		PercpuUsage: []uint64{
 			1564936537989058, 1583937096487821, 1604195415465681, 1596445226820187,
 			1481069084155629, 1478735613864327, 1477610593414743, 1476362015778086,
 		},
 		PercpuUsageInKernelmode: []uint64{},
 		PercpuUsageInUsermode:   []uint64{},
 		UsageInKernelmode:       (uint64(291429664) * nanosecondsInSecond) / clockTicks,
 		UsageInUsermode:         (uint64(452278264) * nanosecondsInSecond) / clockTicks,
 	}
 	if !reflect.DeepEqual(expectedStats, actualStats.CpuStats.CpuUsage) {
 		t.Errorf("Expected CPU usage %#v but found %#v\n",
 			expectedStats, actualStats.CpuStats.CpuUsage)
 	}
 }
--- a/libcontainer/cgroups/fs/cpuset.go
+++ b/libcontainer/cgroups/fs/cpuset.go
@ -1,75 +1,159 @@
 // +build linux
 package fs
 import (
-	"bytes"
+	"errors"
 	"fmt"
 	"io/ioutil"
 	"os"
 	"path/filepath"
 	"strconv"
 	"strings"
 	"golang.org/x/sys/unix"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
 )
-type CpusetGroup struct {
+type CpusetGroup struct{}
 }
 func (s *CpusetGroup) Name() string {
 	return "cpuset"
 }
-func (s *CpusetGroup) Apply(d *cgroupData) error {
+func (s *CpusetGroup) Apply(path string, r *configs.Resources, pid int) error {
-	dir, err := d.path("cpuset")
+	return s.ApplyDir(path, r, pid)
 	if err != nil && !cgroups.IsNotFound(err) {
 		return err
 	}
 	return s.ApplyDir(dir, d.config, d.pid)
 }
-func (s *CpusetGroup) Set(path string, cgroup *configs.Cgroup) error {
+func (s *CpusetGroup) Set(path string, r *configs.Resources) error {
-	if cgroup.Resources.CpusetCpus != "" {
+	if r.CpusetCpus != "" {
-		if err := fscommon.WriteFile(path, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil {
+		if err := cgroups.WriteFile(path, "cpuset.cpus", r.CpusetCpus); err != nil {
 			return err
 		}
 	}
-	if cgroup.Resources.CpusetMems != "" {
+	if r.CpusetMems != "" {
-		if err := fscommon.WriteFile(path, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil {
+		if err := cgroups.WriteFile(path, "cpuset.mems", r.CpusetMems); err != nil {
 			return err
 		}
 	}
 	return nil
 }
-func (s *CpusetGroup) Remove(d *cgroupData) error {
+func getCpusetStat(path string, file string) ([]uint16, error) {
-	return removePath(d.path("cpuset"))
+	var extracted []uint16
 	fileContent, err := fscommon.GetCgroupParamString(path, file)
 	if err != nil {
 		return extracted, err
 	}
 	if len(fileContent) == 0 {
 		return extracted, &parseError{Path: path, File: file, Err: errors.New("empty file")}
 	}
 	for _, s := range strings.Split(fileContent, ",") {
 		sp := strings.SplitN(s, "-", 3)
 		switch len(sp) {
 		case 3:
 			return extracted, &parseError{Path: path, File: file, Err: errors.New("extra dash")}
 		case 2:
 			min, err := strconv.ParseUint(sp[0], 10, 16)
 			if err != nil {
 				return extracted, &parseError{Path: path, File: file, Err: err}
 			}
 			max, err := strconv.ParseUint(sp[1], 10, 16)
 			if err != nil {
 				return extracted, &parseError{Path: path, File: file, Err: err}
 			}
 			if min > max {
 				return extracted, &parseError{Path: path, File: file, Err: errors.New("invalid values, min > max")}
 			}
 			for i := min; i <= max; i++ {
 				extracted = append(extracted, uint16(i))
 			}
 		case 1:
 			value, err := strconv.ParseUint(s, 10, 16)
 			if err != nil {
 				return extracted, &parseError{Path: path, File: file, Err: err}
 			}
 			extracted = append(extracted, uint16(value))
 		}
 	}
 	return extracted, nil
 }
 func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
 	var err error
 	stats.CPUSetStats.CPUs, err = getCpusetStat(path, "cpuset.cpus")
 	if err != nil && !errors.Is(err, os.ErrNotExist) {
 		return err
 	}
 	stats.CPUSetStats.CPUExclusive, err = fscommon.GetCgroupParamUint(path, "cpuset.cpu_exclusive")
 	if err != nil && !errors.Is(err, os.ErrNotExist) {
 		return err
 	}
 	stats.CPUSetStats.Mems, err = getCpusetStat(path, "cpuset.mems")
 	if err != nil && !errors.Is(err, os.ErrNotExist) {
 		return err
 	}
 	stats.CPUSetStats.MemHardwall, err = fscommon.GetCgroupParamUint(path, "cpuset.mem_hardwall")
 	if err != nil && !errors.Is(err, os.ErrNotExist) {
 		return err
 	}
 	stats.CPUSetStats.MemExclusive, err = fscommon.GetCgroupParamUint(path, "cpuset.mem_exclusive")
 	if err != nil && !errors.Is(err, os.ErrNotExist) {
 		return err
 	}
 	stats.CPUSetStats.MemoryMigrate, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_migrate")
 	if err != nil && !errors.Is(err, os.ErrNotExist) {
 		return err
 	}
 	stats.CPUSetStats.MemorySpreadPage, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_spread_page")
 	if err != nil && !errors.Is(err, os.ErrNotExist) {
 		return err
 	}
 	stats.CPUSetStats.MemorySpreadSlab, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_spread_slab")
 	if err != nil && !errors.Is(err, os.ErrNotExist) {
 		return err
 	}
 	stats.CPUSetStats.MemoryPressure, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_pressure")
 	if err != nil && !errors.Is(err, os.ErrNotExist) {
 		return err
 	}
 	stats.CPUSetStats.SchedLoadBalance, err = fscommon.GetCgroupParamUint(path, "cpuset.sched_load_balance")
 	if err != nil && !errors.Is(err, os.ErrNotExist) {
 		return err
 	}
 	stats.CPUSetStats.SchedRelaxDomainLevel, err = fscommon.GetCgroupParamInt(path, "cpuset.sched_relax_domain_level")
 	if err != nil && !errors.Is(err, os.ErrNotExist) {
 		return err
 	}
 	return nil
 }
-func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error {
+func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error {
 	// This might happen if we have no cpuset cgroup mounted.
 	// Just do nothing and don't fail.
 	if dir == "" {
 		return nil
 	}
 	mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo")
 	if err != nil {
 		return err
 	}
 	root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo)))
 	// 'ensureParent' start with parent because we don't want to
 	// explicitly inherit from parent, it could conflict with
 	// 'cpuset.cpu_exclusive'.
-	if err := s.ensureParent(filepath.Dir(dir), root); err != nil {
+	if err := cpusetEnsureParent(filepath.Dir(dir)); err != nil {
 		return err
 	}
-	if err := os.MkdirAll(dir, 0755); err != nil {
+	if err := os.Mkdir(dir, 0o755); err != nil && !os.IsExist(err) {
 		return err
 	}
 	// We didn't inherit cpuset configs from parent, but we have
@ -79,82 +163,83 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
 	// specified configs, otherwise, inherit from parent. This makes
 	// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
 	// keep backward compatibility.
-	if err := s.ensureCpusAndMems(dir, cgroup); err != nil {
+	if err := s.ensureCpusAndMems(dir, r); err != nil {
 		return err
 	}
-
+	// Since we are not using apply(), we need to place the pid
-	// because we are not using d.join we need to place the pid into the procs file
+	// into the procs file.
 	// unlike the other subsystems
 	return cgroups.WriteCgroupProc(dir, pid)
 }
-func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) {
+func getCpusetSubsystemSettings(parent string) (cpus, mems string, err error) {
-	if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil {
+	if cpus, err = cgroups.ReadFile(parent, "cpuset.cpus"); err != nil {
 		return
 	}
-	if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil {
+	if mems, err = cgroups.ReadFile(parent, "cpuset.mems"); err != nil {
 		return
 	}
 	return cpus, mems, nil
 }
-// ensureParent makes sure that the parent directory of current is created
+// cpusetEnsureParent makes sure that the parent directories of current
-// and populated with the proper cpus and mems files copied from
+// are created and populated with the proper cpus and mems files copied
-// it's parent.
+// from their respective parent. It does that recursively, starting from
-func (s *CpusetGroup) ensureParent(current, root string) error {
+// the top of the cpuset hierarchy (i.e. cpuset cgroup mount point).
 func cpusetEnsureParent(current string) error {
 	var st unix.Statfs_t
 	parent := filepath.Dir(current)
-	if libcontainerUtils.CleanPath(parent) == root {
+	err := unix.Statfs(parent, &st)
 	if err == nil && st.Type != unix.CGROUP_SUPER_MAGIC {
 		return nil
 	}
-	// Avoid infinite recursion.
+	// Treat non-existing directory as cgroupfs as it will be created,
-	if parent == current {
+	// and the root cpuset directory obviously exists.
-		return fmt.Errorf("cpuset: cgroup parent path outside cgroup root")
+	if err != nil && err != unix.ENOENT { //nolint:errorlint // unix errors are bare
 		return &os.PathError{Op: "statfs", Path: parent, Err: err}
 	}
-	if err := s.ensureParent(parent, root); err != nil {
+
 	if err := cpusetEnsureParent(parent); err != nil {
 		return err
 	}
-	if err := os.MkdirAll(current, 0755); err != nil {
+	if err := os.Mkdir(current, 0o755); err != nil && !os.IsExist(err) {
 		return err
 	}
-	return s.copyIfNeeded(current, parent)
+	return cpusetCopyIfNeeded(current, parent)
 }
-// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
+// cpusetCopyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
 // directory to the current directory if the file's contents are 0
-func (s *CpusetGroup) copyIfNeeded(current, parent string) error {
+func cpusetCopyIfNeeded(current, parent string) error {
-	var (
+	currentCpus, currentMems, err := getCpusetSubsystemSettings(current)
-		err                      error
+	if err != nil {
 		currentCpus, currentMems []byte
 		parentCpus, parentMems   []byte
 	)
 	if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil {
 		return err
 	}
-	if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil {
+	parentCpus, parentMems, err := getCpusetSubsystemSettings(parent)
 	if err != nil {
 		return err
 	}
-	if s.isEmpty(currentCpus) {
+	if isEmptyCpuset(currentCpus) {
-		if err := fscommon.WriteFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
+		if err := cgroups.WriteFile(current, "cpuset.cpus", parentCpus); err != nil {
 			return err
 		}
 	}
-	if s.isEmpty(currentMems) {
+	if isEmptyCpuset(currentMems) {
-		if err := fscommon.WriteFile(current, "cpuset.mems", string(parentMems)); err != nil {
+		if err := cgroups.WriteFile(current, "cpuset.mems", parentMems); err != nil {
 			return err
 		}
 	}
 	return nil
 }
-func (s *CpusetGroup) isEmpty(b []byte) bool {
+func isEmptyCpuset(str string) bool {
-	return len(bytes.Trim(b, "\n")) == 0
+	return str == "" || str == "\n"
 }
-func (s *CpusetGroup) ensureCpusAndMems(path string, cgroup *configs.Cgroup) error {
+func (s *CpusetGroup) ensureCpusAndMems(path string, r *configs.Resources) error {
-	if err := s.Set(path, cgroup); err != nil {
+	if err := s.Set(path, r); err != nil {
 		return err
 	}
-	return s.copyIfNeeded(path, filepath.Dir(path))
+	return cpusetCopyIfNeeded(path, filepath.Dir(path))
 }
--- a/libcontainer/cgroups/fs/cpuset_test.go
+++ b/libcontainer/cgroups/fs/cpuset_test.go
@ -1,67 +1,242 @@
 // +build linux
 package fs
 import (
 	"reflect"
 	"testing"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
-func TestCpusetSetCpus(t *testing.T) {
+const (
-	helper := NewCgroupTestUtil("cpuset", t)
+	cpus                  = "0-2,7,12-14\n"
-	defer helper.cleanup()
+	cpuExclusive          = "1\n"
 	mems                  = "1-4,6,9\n"
 	memHardwall           = "0\n"
 	memExclusive          = "0\n"
 	memoryMigrate         = "1\n"
 	memorySpreadPage      = "0\n"
 	memorySpeadSlab       = "1\n"
 	memoryPressure        = "34377\n"
 	schedLoadBalance      = "1\n"
 	schedRelaxDomainLevel = "-1\n"
 )
 var cpusetTestFiles = map[string]string{
 	"cpuset.cpus":                     cpus,
 	"cpuset.cpu_exclusive":            cpuExclusive,
 	"cpuset.mems":                     mems,
 	"cpuset.mem_hardwall":             memHardwall,
 	"cpuset.mem_exclusive":            memExclusive,
 	"cpuset.memory_migrate":           memoryMigrate,
 	"cpuset.memory_spread_page":       memorySpreadPage,
 	"cpuset.memory_spread_slab":       memorySpeadSlab,
 	"cpuset.memory_pressure":          memoryPressure,
 	"cpuset.sched_load_balance":       schedLoadBalance,
 	"cpuset.sched_relax_domain_level": schedRelaxDomainLevel,
 }
 func TestCPUSetSetCpus(t *testing.T) {
 	path := tempDir(t, "cpuset")
 	const (
 		cpusBefore = "0"
 		cpusAfter  = "1-3"
 	)
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"cpuset.cpus": cpusBefore,
 	})
-	helper.CgroupData.config.Resources.CpusetCpus = cpusAfter
+	r := &configs.Resources{
 		CpusetCpus: cpusAfter,
 	}
 	cpuset := &CpusetGroup{}
-	if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
+	if err := cpuset.Set(path, r); err != nil {
 		t.Fatal(err)
 	}
-	value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "cpuset.cpus")
+	value, err := fscommon.GetCgroupParamString(path, "cpuset.cpus")
 	if err != nil {
-		t.Fatalf("Failed to parse cpuset.cpus - %s", err)
+		t.Fatal(err)
 	}
 	if value != cpusAfter {
 		t.Fatal("Got the wrong value, set cpuset.cpus failed.")
 	}
 }
-func TestCpusetSetMems(t *testing.T) {
+func TestCPUSetSetMems(t *testing.T) {
-	helper := NewCgroupTestUtil("cpuset", t)
+	path := tempDir(t, "cpuset")
 	defer helper.cleanup()
 	const (
 		memsBefore = "0"
 		memsAfter  = "1"
 	)
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"cpuset.mems": memsBefore,
 	})
-	helper.CgroupData.config.Resources.CpusetMems = memsAfter
+	r := &configs.Resources{
 		CpusetMems: memsAfter,
 	}
 	cpuset := &CpusetGroup{}
-	if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
+	if err := cpuset.Set(path, r); err != nil {
 		t.Fatal(err)
 	}
-	value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "cpuset.mems")
+	value, err := fscommon.GetCgroupParamString(path, "cpuset.mems")
 	if err != nil {
-		t.Fatalf("Failed to parse cpuset.mems - %s", err)
+		t.Fatal(err)
 	}
 	if value != memsAfter {
 		t.Fatal("Got the wrong value, set cpuset.mems failed.")
 	}
 }
 func TestCPUSetStatsCorrect(t *testing.T) {
 	path := tempDir(t, "cpuset")
 	writeFileContents(t, path, cpusetTestFiles)
 	cpuset := &CpusetGroup{}
 	actualStats := *cgroups.NewStats()
 	err := cpuset.GetStats(path, &actualStats)
 	if err != nil {
 		t.Fatal(err)
 	}
 	expectedStats := cgroups.CPUSetStats{
 		CPUs:                  []uint16{0, 1, 2, 7, 12, 13, 14},
 		CPUExclusive:          1,
 		Mems:                  []uint16{1, 2, 3, 4, 6, 9},
 		MemoryMigrate:         1,
 		MemHardwall:           0,
 		MemExclusive:          0,
 		MemorySpreadPage:      0,
 		MemorySpreadSlab:      1,
 		MemoryPressure:        34377,
 		SchedLoadBalance:      1,
 		SchedRelaxDomainLevel: -1,
 	}
 	if !reflect.DeepEqual(expectedStats, actualStats.CPUSetStats) {
 		t.Fatalf("Expected Cpuset stats usage %#v but found %#v",
 			expectedStats, actualStats.CPUSetStats)
 	}
 }
 func TestCPUSetStatsMissingFiles(t *testing.T) {
 	for _, testCase := range []struct {
 		desc               string
 		filename, contents string
 		removeFile         bool
 	}{
 		{
 			desc:       "empty cpus file",
 			filename:   "cpuset.cpus",
 			contents:   "",
 			removeFile: false,
 		},
 		{
 			desc:       "empty mems file",
 			filename:   "cpuset.mems",
 			contents:   "",
 			removeFile: false,
 		},
 		{
 			desc:       "corrupted cpus file",
 			filename:   "cpuset.cpus",
 			contents:   "0-3,*4^2",
 			removeFile: false,
 		},
 		{
 			desc:       "corrupted mems file",
 			filename:   "cpuset.mems",
 			contents:   "0,1,2-5,8-7",
 			removeFile: false,
 		},
 		{
 			desc:       "missing cpu_exclusive file",
 			filename:   "cpuset.cpu_exclusive",
 			contents:   "",
 			removeFile: true,
 		},
 		{
 			desc:       "missing memory_migrate file",
 			filename:   "cpuset.memory_migrate",
 			contents:   "",
 			removeFile: true,
 		},
 		{
 			desc:       "missing mem_hardwall file",
 			filename:   "cpuset.mem_hardwall",
 			contents:   "",
 			removeFile: true,
 		},
 		{
 			desc:       "missing mem_exclusive file",
 			filename:   "cpuset.mem_exclusive",
 			contents:   "",
 			removeFile: true,
 		},
 		{
 			desc:       "missing memory_spread_page file",
 			filename:   "cpuset.memory_spread_page",
 			contents:   "",
 			removeFile: true,
 		},
 		{
 			desc:       "missing memory_spread_slab file",
 			filename:   "cpuset.memory_spread_slab",
 			contents:   "",
 			removeFile: true,
 		},
 		{
 			desc:       "missing memory_pressure file",
 			filename:   "cpuset.memory_pressure",
 			contents:   "",
 			removeFile: true,
 		},
 		{
 			desc:       "missing sched_load_balance file",
 			filename:   "cpuset.sched_load_balance",
 			contents:   "",
 			removeFile: true,
 		},
 		{
 			desc:       "missing sched_relax_domain_level file",
 			filename:   "cpuset.sched_relax_domain_level",
 			contents:   "",
 			removeFile: true,
 		},
 	} {
 		t.Run(testCase.desc, func(t *testing.T) {
 			path := tempDir(t, "cpuset")
 			tempCpusetTestFiles := map[string]string{}
 			for i, v := range cpusetTestFiles {
 				tempCpusetTestFiles[i] = v
 			}
 			if testCase.removeFile {
 				delete(tempCpusetTestFiles, testCase.filename)
 				writeFileContents(t, path, tempCpusetTestFiles)
 				cpuset := &CpusetGroup{}
 				actualStats := *cgroups.NewStats()
 				err := cpuset.GetStats(path, &actualStats)
 				if err != nil {
 					t.Errorf("failed unexpectedly: %q", err)
 				}
 			} else {
 				tempCpusetTestFiles[testCase.filename] = testCase.contents
 				writeFileContents(t, path, tempCpusetTestFiles)
 				cpuset := &CpusetGroup{}
 				actualStats := *cgroups.NewStats()
 				err := cpuset.GetStats(path, &actualStats)
 				if err == nil {
 					t.Error("failed to return expected error")
 				}
 			}
 		})
 	}
 }
--- a/libcontainer/cgroups/fs/devices.go
+++ b/libcontainer/cgroups/fs/devices.go
@ -1,81 +1,109 @@
 // +build linux
 package fs
 import (
 	"bytes"
 	"errors"
 	"reflect"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
-	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
+	cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices"
 	"github.com/opencontainers/runc/libcontainer/configs"
-	"github.com/opencontainers/runc/libcontainer/system"
+	"github.com/opencontainers/runc/libcontainer/devices"
 	"github.com/opencontainers/runc/libcontainer/userns"
 )
 type DevicesGroup struct {
 	TestingSkipFinalCheck bool
 }
 func (s *DevicesGroup) Name() string {
 	return "devices"
 }
-func (s *DevicesGroup) Apply(d *cgroupData) error {
+func (s *DevicesGroup) Apply(path string, r *configs.Resources, pid int) error {
-	_, err := d.join("devices")
+	if r.SkipDevices {
 		return nil
 	}
 	if path == "" {
 		// Return error here, since devices cgroup
 		// is a hard requirement for container's security.
 		return errSubsystemDoesNotExist
 	}
 	return apply(path, pid)
 }
 func loadEmulator(path string) (*cgroupdevices.Emulator, error) {
 	list, err := cgroups.ReadFile(path, "devices.list")
 	if err != nil {
 		return nil, err
 	}
 	return cgroupdevices.EmulatorFromList(bytes.NewBufferString(list))
 }
 func buildEmulator(rules []*devices.Rule) (*cgroupdevices.Emulator, error) {
 	// This defaults to a white-list -- which is what we want!
 	emu := &cgroupdevices.Emulator{}
 	for _, rule := range rules {
 		if err := emu.Apply(*rule); err != nil {
 			return nil, err
 		}
 	}
 	return emu, nil
 }
 func (s *DevicesGroup) Set(path string, r *configs.Resources) error {
 	if userns.RunningInUserNS() || r.SkipDevices {
 		return nil
 	}
 	// Generate two emulators, one for the current state of the cgroup and one
 	// for the requested state by the user.
 	current, err := loadEmulator(path)
 	if err != nil {
 		// We will return error even it's `not found` error, devices
 		// cgroup is hard requirement for container's security.
 		return err
 	}
-	return nil
+	target, err := buildEmulator(r.Devices)
-}
+	if err != nil {
-
+		return err
 func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
 	if system.RunningInUserNS() {
 		return nil
 	}
-	devices := cgroup.Resources.Devices
+	// Compute the minimal set of transition rules needed to achieve the
-	if len(devices) > 0 {
+	// requested state.
-		for _, dev := range devices {
+	transitionRules, err := current.Transition(target)
-			file := "devices.deny"
+	if err != nil {
-			if dev.Allow {
+		return err
 				file = "devices.allow"
 			}
 			if err := fscommon.WriteFile(path, file, dev.CgroupString()); err != nil {
 				return err
 			}
 		}
 		return nil
 	}
-	if cgroup.Resources.AllowAllDevices != nil {
+	for _, rule := range transitionRules {
-		if *cgroup.Resources.AllowAllDevices == false {
+		file := "devices.deny"
-			if err := fscommon.WriteFile(path, "devices.deny", "a"); err != nil {
+		if rule.Allow {
-				return err
+			file = "devices.allow"
 			}
 			for _, dev := range cgroup.Resources.AllowedDevices {
 				if err := fscommon.WriteFile(path, "devices.allow", dev.CgroupString()); err != nil {
 					return err
 				}
 			}
 			return nil
 		}
-
+		if err := cgroups.WriteFile(path, file, rule.CgroupString()); err != nil {
 		if err := fscommon.WriteFile(path, "devices.allow", "a"); err != nil {
 			return err
 		}
 	}
-	for _, dev := range cgroup.Resources.DeniedDevices {
+	// Final safety check -- ensure that the resulting state is what was
-		if err := fscommon.WriteFile(path, "devices.deny", dev.CgroupString()); err != nil {
+	// requested. This is only really correct for white-lists, but for
 	// black-lists we can at least check that the cgroup is in the right mode.
 	//
 	// This safety-check is skipped for the unit tests because we cannot
 	// currently mock devices.list correctly.
 	if !s.TestingSkipFinalCheck {
 		currentAfter, err := loadEmulator(path)
 		if err != nil {
 			return err
 		}
 		if !target.IsBlacklist() && !reflect.DeepEqual(currentAfter, target) {
 			return errors.New("resulting devices cgroup doesn't precisely match target")
 		} else if target.IsBlacklist() != currentAfter.IsBlacklist() {
 			return errors.New("resulting devices cgroup doesn't match target mode")
 		}
 	}
 	return nil
 }
 func (s *DevicesGroup) Remove(d *cgroupData) error {
 	return removePath(d.path("devices"))
 }
 func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error {
 	return nil
 }
--- a/libcontainer/cgroups/fs/devices_test.go
+++ b/libcontainer/cgroups/fs/devices_test.go
@ -1,5 +1,3 @@
 // +build linux
 package fs
 import (
@ -7,93 +5,48 @@ import (
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
-)
+	"github.com/opencontainers/runc/libcontainer/devices"
 var (
 	allowedDevices = []*configs.Device{
 		{
 			Path:        "/dev/zero",
 			Type:        'c',
 			Major:       1,
 			Minor:       5,
 			Permissions: "rwm",
 			FileMode:    0666,
 		},
 	}
 	allowedList   = "c 1:5 rwm"
 	deniedDevices = []*configs.Device{
 		{
 			Path:        "/dev/null",
 			Type:        'c',
 			Major:       1,
 			Minor:       3,
 			Permissions: "rwm",
 			FileMode:    0666,
 		},
 	}
 	deniedList = "c 1:3 rwm"
 )
 func TestDevicesSetAllow(t *testing.T) {
-	helper := NewCgroupTestUtil("devices", t)
+	path := tempDir(t, "devices")
 	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
-		"devices.deny": "a",
+		"devices.allow": "",
-	})
+		"devices.deny":  "",
-	allowAllDevices := false
+		"devices.list":  "a *:* rwm",
 	helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices
 	helper.CgroupData.config.Resources.AllowedDevices = allowedDevices
 	devices := &DevicesGroup{}
 	if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
 		t.Fatal(err)
 	}
 	value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow")
 	if err != nil {
 		t.Fatalf("Failed to parse devices.allow - %s", err)
 	}
 	if value != allowedList {
 		t.Fatal("Got the wrong value, set devices.allow failed.")
 	}
 	// When AllowAllDevices is nil, devices.allow file should not be modified.
 	helper.CgroupData.config.Resources.AllowAllDevices = nil
 	if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
 		t.Fatal(err)
 	}
 	value, err = fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow")
 	if err != nil {
 		t.Fatalf("Failed to parse devices.allow - %s", err)
 	}
 	if value != allowedList {
 		t.Fatal("devices policy shouldn't have changed on AllowedAllDevices=nil.")
 	}
 }
 func TestDevicesSetDeny(t *testing.T) {
 	helper := NewCgroupTestUtil("devices", t)
 	defer helper.cleanup()
 	helper.writeFileContents(map[string]string{
 		"devices.allow": "a",
 	})
-	allowAllDevices := true
+	r := &configs.Resources{
-	helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices
+		Devices: []*devices.Rule{
-	helper.CgroupData.config.Resources.DeniedDevices = deniedDevices
+			{
-	devices := &DevicesGroup{}
+				Type:        devices.CharDevice,
-	if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
+				Major:       1,
 				Minor:       5,
 				Permissions: devices.Permissions("rwm"),
 				Allow:       true,
 			},
 		},
 	}
 	d := &DevicesGroup{TestingSkipFinalCheck: true}
 	if err := d.Set(path, r); err != nil {
 		t.Fatal(err)
 	}
-	value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.deny")
+	// The default deny rule must be written.
 	value, err := fscommon.GetCgroupParamString(path, "devices.deny")
 	if err != nil {
-		t.Fatalf("Failed to parse devices.deny - %s", err)
+		t.Fatal(err)
 	}
 	if value[0] != 'a' {
 		t.Errorf("Got the wrong value (%q), set devices.deny failed.", value)
 	}
-	if value != deniedList {
+	// Permitted rule must be written.
-		t.Fatal("Got the wrong value, set devices.deny failed.")
+	if value, err := fscommon.GetCgroupParamString(path, "devices.allow"); err != nil {
 		t.Fatal(err)
 	} else if value != "c 1:5 rwm" {
 		t.Errorf("Got the wrong value (%q), set devices.allow failed.", value)
 	}
 }
--- a/libcontainer/cgroups/fs/error.go
+++ b/libcontainer/cgroups/fs/error.go
@ -0,0 +1,15 @@
 package fs
 import (
 	"fmt"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 )
 type parseError = fscommon.ParseError
 // malformedLine is used by all cgroupfs file parsers that expect a line
 // in a particular format but get some garbage instead.
 func malformedLine(path, file, line string) error {
 	return &parseError{Path: path, File: file, Err: fmt.Errorf("malformed line: %s", line)}
 }
--- a/libcontainer/cgroups/fs/freezer.go
+++ b/libcontainer/cgroups/fs/freezer.go
@ -1,67 +1,158 @@
 // +build linux
 package fs
 import (
 	"errors"
 	"fmt"
 	"os"
 	"strings"
 	"time"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/sirupsen/logrus"
 	"golang.org/x/sys/unix"
 )
-type FreezerGroup struct {
+type FreezerGroup struct{}
 }
 func (s *FreezerGroup) Name() string {
 	return "freezer"
 }
-func (s *FreezerGroup) Apply(d *cgroupData) error {
+func (s *FreezerGroup) Apply(path string, _ *configs.Resources, pid int) error {
-	_, err := d.join("freezer")
+	return apply(path, pid)
 	if err != nil && !cgroups.IsNotFound(err) {
 		return err
 	}
 	return nil
 }
-func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
+func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) {
-	switch cgroup.Resources.Freezer {
+	switch r.Freezer {
-	case configs.Frozen, configs.Thawed:
+	case configs.Frozen:
-		for {
+		defer func() {
-			// In case this loop does not exit because it doesn't get the expected
+			if Err != nil {
-			// state, let's write again this state, hoping it's going to be properly
+				// Freezing failed, and it is bad and dangerous
-			// set this time. Otherwise, this loop could run infinitely, waiting for
+				// to leave the cgroup in FROZEN or FREEZING
-			// a state change that would never happen.
+				// state, so (try to) thaw it back.
-			if err := fscommon.WriteFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
+				_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
 			}
 		}()
 		// As per older kernel docs (freezer-subsystem.txt before
 		// kernel commit ef9fe980c6fcc1821), if FREEZING is seen,
 		// userspace should either retry or thaw. While current
 		// kernel cgroup v1 docs no longer mention a need to retry,
 		// even a recent kernel (v5.4, Ubuntu 20.04) can't reliably
 		// freeze a cgroup v1 while new processes keep appearing in it
 		// (either via fork/clone or by writing new PIDs to
 		// cgroup.procs).
 		//
 		// The numbers below are empirically chosen to have a decent
 		// chance to succeed in various scenarios ("runc pause/unpause
 		// with parallel runc exec" and "bare freeze/unfreeze on a very
 		// slow system"), tested on RHEL7 and Ubuntu 20.04 kernels.
 		//
 		// Adding any amount of sleep in between retries did not
 		// increase the chances of successful freeze in "pause/unpause
 		// with parallel exec" reproducer. OTOH, adding an occasional
 		// sleep helped for the case where the system is extremely slow
 		// (CentOS 7 VM on GHA CI).
 		//
 		// Alas, this is still a game of chances, since the real fix
 		// belong to the kernel (cgroup v2 do not have this bug).
 		for i := 0; i < 1000; i++ {
 			if i%50 == 49 {
 				// Occasional thaw and sleep improves
 				// the chances to succeed in freezing
 				// in case new processes keep appearing
 				// in the cgroup.
 				_ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
 				time.Sleep(10 * time.Millisecond)
 			}
 			if err := cgroups.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil {
 				return err
 			}
-			state, err := fscommon.ReadFile(path, "freezer.state")
+			if i%25 == 24 {
 				// Occasional short sleep before reading
 				// the state back also improves the chances to
 				// succeed in freezing in case of a very slow
 				// system.
 				time.Sleep(10 * time.Microsecond)
 			}
 			state, err := cgroups.ReadFile(path, "freezer.state")
 			if err != nil {
 				return err
 			}
-			if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) {
+			state = strings.TrimSpace(state)
-				break
+			switch state {
 			case "FREEZING":
 				continue
 			case string(configs.Frozen):
 				if i > 1 {
 					logrus.Debugf("frozen after %d retries", i)
 				}
 				return nil
 			default:
 				// should never happen
 				return fmt.Errorf("unexpected state %s while freezing", strings.TrimSpace(state))
 			}
 			time.Sleep(1 * time.Millisecond)
 		}
 		// Despite our best efforts, it got stuck in FREEZING.
 		return errors.New("unable to freeze")
 	case configs.Thawed:
 		return cgroups.WriteFile(path, "freezer.state", string(configs.Thawed))
 	case configs.Undefined:
 		return nil
 	default:
-		return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Resources.Freezer))
+		return fmt.Errorf("Invalid argument '%s' to freezer.state", string(r.Freezer))
 	}
 	return nil
 }
 func (s *FreezerGroup) Remove(d *cgroupData) error {
 	return removePath(d.path("freezer"))
 }
 func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
 	return nil
 }
 func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) {
 	for {
 		state, err := cgroups.ReadFile(path, "freezer.state")
 		if err != nil {
 			// If the kernel is too old, then we just treat the freezer as
 			// being in an "undefined" state.
 			if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) {
 				err = nil
 			}
 			return configs.Undefined, err
 		}
 		switch strings.TrimSpace(state) {
 		case "THAWED":
 			return configs.Thawed, nil
 		case "FROZEN":
 			// Find out whether the cgroup is frozen directly,
 			// or indirectly via an ancestor.
 			self, err := cgroups.ReadFile(path, "freezer.self_freezing")
 			if err != nil {
 				// If the kernel is too old, then we just treat
 				// it as being frozen.
 				if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.ENODEV) {
 					err = nil
 				}
 				return configs.Frozen, err
 			}
 			switch self {
 			case "0\n":
 				return configs.Thawed, nil
 			case "1\n":
 				return configs.Frozen, nil
 			default:
 				return configs.Undefined, fmt.Errorf(`unknown "freezer.self_freezing" state: %q`, self)
 			}
 		case "FREEZING":
 			// Make sure we get a stable freezer state, so retry if the cgroup
 			// is still undergoing freezing. This should be a temporary delay.
 			time.Sleep(1 * time.Millisecond)
 			continue
 		default:
 			return configs.Undefined, fmt.Errorf("unknown freezer.state %q", state)
 		}
 	}
 }
--- a/libcontainer/cgroups/fs/freezer_test.go
+++ b/libcontainer/cgroups/fs/freezer_test.go
@ -1,5 +1,3 @@
 // +build linux
 package fs
 import (
@ -10,22 +8,23 @@ import (
 )
 func TestFreezerSetState(t *testing.T) {
-	helper := NewCgroupTestUtil("freezer", t)
+	path := tempDir(t, "freezer")
 	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"freezer.state": string(configs.Frozen),
 	})
-	helper.CgroupData.config.Resources.Freezer = configs.Thawed
+	r := &configs.Resources{
 		Freezer: configs.Thawed,
 	}
 	freezer := &FreezerGroup{}
-	if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
+	if err := freezer.Set(path, r); err != nil {
 		t.Fatal(err)
 	}
-	value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "freezer.state")
+	value, err := fscommon.GetCgroupParamString(path, "freezer.state")
 	if err != nil {
-		t.Fatalf("Failed to parse freezer.state - %s", err)
+		t.Fatal(err)
 	}
 	if value != string(configs.Thawed) {
 		t.Fatal("Got the wrong value, set freezer.state failed.")
@ -33,16 +32,15 @@ func TestFreezerSetState(t *testing.T) {
 }
 func TestFreezerSetInvalidState(t *testing.T) {
-	helper := NewCgroupTestUtil("freezer", t)
+	path := tempDir(t, "freezer")
 	defer helper.cleanup()
-	const (
+	const invalidArg configs.FreezerState = "Invalid"
 		invalidArg configs.FreezerState = "Invalid"
 	)
-	helper.CgroupData.config.Resources.Freezer = invalidArg
+	r := &configs.Resources{
 		Freezer: invalidArg,
 	}
 	freezer := &FreezerGroup{}
-	if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err == nil {
+	if err := freezer.Set(path, r); err == nil {
 		t.Fatal("Failed to return invalid argument error")
 	}
 }
--- a/libcontainer/cgroups/fs/fs.go
+++ b/libcontainer/cgroups/fs/fs.go
@ -0,0 +1,264 @@
 package fs
 import (
 	"errors"
 	"fmt"
 	"os"
 	"sync"
 	"golang.org/x/sys/unix"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
 var subsystems = []subsystem{
 	&CpusetGroup{},
 	&DevicesGroup{},
 	&MemoryGroup{},
 	&CpuGroup{},
 	&CpuacctGroup{},
 	&PidsGroup{},
 	&BlkioGroup{},
 	&HugetlbGroup{},
 	&NetClsGroup{},
 	&NetPrioGroup{},
 	&PerfEventGroup{},
 	&FreezerGroup{},
 	&RdmaGroup{},
 	&NameGroup{GroupName: "name=systemd", Join: true},
 }
 var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
 func init() {
 	// If using cgroups-hybrid mode then add a "" controller indicating
 	// it should join the cgroups v2.
 	if cgroups.IsCgroup2HybridMode() {
 		subsystems = append(subsystems, &NameGroup{GroupName: "", Join: true})
 	}
 }
 type subsystem interface {
 	// Name returns the name of the subsystem.
 	Name() string
 	// GetStats fills in the stats for the subsystem.
 	GetStats(path string, stats *cgroups.Stats) error
 	// Apply creates and joins a cgroup, adding pid into it. Some
 	// subsystems use resources to pre-configure the cgroup parents
 	// before creating or joining it.
 	Apply(path string, r *configs.Resources, pid int) error
 	// Set sets the cgroup resources.
 	Set(path string, r *configs.Resources) error
 }
 type manager struct {
 	mu      sync.Mutex
 	cgroups *configs.Cgroup
 	paths   map[string]string
 }
 func NewManager(cg *configs.Cgroup, paths map[string]string) (cgroups.Manager, error) {
 	// Some v1 controllers (cpu, cpuset, and devices) expect
 	// cgroups.Resources to not be nil in Apply.
 	if cg.Resources == nil {
 		return nil, errors.New("cgroup v1 manager needs configs.Resources to be set during manager creation")
 	}
 	if cg.Resources.Unified != nil {
 		return nil, cgroups.ErrV1NoUnified
 	}
 	if paths == nil {
 		var err error
 		paths, err = initPaths(cg)
 		if err != nil {
 			return nil, err
 		}
 	}
 	return &manager{
 		cgroups: cg,
 		paths:   paths,
 	}, nil
 }
 // isIgnorableError returns whether err is a permission error (in the loose
 // sense of the word). This includes EROFS (which for an unprivileged user is
 // basically a permission error) and EACCES (for similar reasons) as well as
 // the normal EPERM.
 func isIgnorableError(rootless bool, err error) bool {
 	// We do not ignore errors if we are root.
 	if !rootless {
 		return false
 	}
 	// Is it an ordinary EPERM?
 	if errors.Is(err, os.ErrPermission) {
 		return true
 	}
 	// Handle some specific syscall errors.
 	var errno unix.Errno
 	if errors.As(err, &errno) {
 		return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
 	}
 	return false
 }
 func (m *manager) Apply(pid int) (err error) {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	c := m.cgroups
 	for _, sys := range subsystems {
 		name := sys.Name()
 		p, ok := m.paths[name]
 		if !ok {
 			continue
 		}
 		if err := sys.Apply(p, c.Resources, pid); err != nil {
 			// In the case of rootless (including euid=0 in userns), where an
 			// explicit cgroup path hasn't been set, we don't bail on error in
 			// case of permission problems here, but do delete the path from
 			// the m.paths map, since it is either non-existent and could not
 			// be created, or the pid could not be added to it.
 			//
 			// Cases where limits for the subsystem have been set are handled
 			// later by Set, which fails with a friendly error (see
 			// if path == "" in Set).
 			if isIgnorableError(c.Rootless, err) && c.Path == "" {
 				delete(m.paths, name)
 				continue
 			}
 			return err
 		}
 	}
 	return nil
 }
 func (m *manager) Destroy() error {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	return cgroups.RemovePaths(m.paths)
 }
 func (m *manager) Path(subsys string) string {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	return m.paths[subsys]
 }
 func (m *manager) GetStats() (*cgroups.Stats, error) {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	stats := cgroups.NewStats()
 	for _, sys := range subsystems {
 		path := m.paths[sys.Name()]
 		if path == "" {
 			continue
 		}
 		if err := sys.GetStats(path, stats); err != nil {
 			return nil, err
 		}
 	}
 	return stats, nil
 }
 func (m *manager) Set(r *configs.Resources) error {
 	if r == nil {
 		return nil
 	}
 	if r.Unified != nil {
 		return cgroups.ErrV1NoUnified
 	}
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	for _, sys := range subsystems {
 		path := m.paths[sys.Name()]
 		if err := sys.Set(path, r); err != nil {
 			// When rootless is true, errors from the device subsystem
 			// are ignored, as it is really not expected to work.
 			if m.cgroups.Rootless && sys.Name() == "devices" {
 				continue
 			}
 			// However, errors from other subsystems are not ignored.
 			// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
 			if path == "" {
 				// We never created a path for this cgroup, so we cannot set
 				// limits for it (though we have already tried at this point).
 				return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
 			}
 			return err
 		}
 	}
 	return nil
 }
 // Freeze toggles the container's freezer cgroup depending on the state
 // provided
 func (m *manager) Freeze(state configs.FreezerState) error {
 	path := m.Path("freezer")
 	if path == "" {
 		return errors.New("cannot toggle freezer: cgroups not configured for container")
 	}
 	prevState := m.cgroups.Resources.Freezer
 	m.cgroups.Resources.Freezer = state
 	freezer := &FreezerGroup{}
 	if err := freezer.Set(path, m.cgroups.Resources); err != nil {
 		m.cgroups.Resources.Freezer = prevState
 		return err
 	}
 	return nil
 }
 func (m *manager) GetPids() ([]int, error) {
 	return cgroups.GetPids(m.Path("devices"))
 }
 func (m *manager) GetAllPids() ([]int, error) {
 	return cgroups.GetAllPids(m.Path("devices"))
 }
 func (m *manager) GetPaths() map[string]string {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	return m.paths
 }
 func (m *manager) GetCgroups() (*configs.Cgroup, error) {
 	return m.cgroups, nil
 }
 func (m *manager) GetFreezerState() (configs.FreezerState, error) {
 	dir := m.Path("freezer")
 	// If the container doesn't have the freezer cgroup, say it's undefined.
 	if dir == "" {
 		return configs.Undefined, nil
 	}
 	freezer := &FreezerGroup{}
 	return freezer.GetState(dir)
 }
 func (m *manager) Exists() bool {
 	return cgroups.PathExists(m.Path("devices"))
 }
 func OOMKillCount(path string) (uint64, error) {
 	return fscommon.GetValueByKey(path, "memory.oom_control", "oom_kill")
 }
 func (m *manager) OOMKillCount() (uint64, error) {
 	c, err := OOMKillCount(m.Path("memory"))
 	// Ignore ENOENT when rootless as it couldn't create cgroup.
 	if err != nil && m.cgroups.Rootless && os.IsNotExist(err) {
 		err = nil
 	}
 	return c, err
 }
--- a/libcontainer/cgroups/fs/fs_test.go
+++ b/libcontainer/cgroups/fs/fs_test.go
@ -0,0 +1,50 @@
 package fs
 import (
 	"testing"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
 func BenchmarkGetStats(b *testing.B) {
 	if cgroups.IsCgroup2UnifiedMode() {
 		b.Skip("cgroup v2 is not supported")
 	}
 	// Unset TestMode as we work with real cgroupfs here,
 	// and we want OpenFile to perform the fstype check.
 	cgroups.TestMode = false
 	defer func() {
 		cgroups.TestMode = true
 	}()
 	cg := &configs.Cgroup{
 		Path:      "/some/kind/of/a/path/here",
 		Resources: &configs.Resources{},
 	}
 	m, err := NewManager(cg, nil)
 	if err != nil {
 		b.Fatal(err)
 	}
 	err = m.Apply(-1)
 	if err != nil {
 		b.Fatal(err)
 	}
 	defer func() {
 		_ = m.Destroy()
 	}()
 	var st *cgroups.Stats
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		st, err = m.GetStats()
 		if err != nil {
 			b.Fatal(err)
 		}
 	}
 	if st.CpuStats.CpuUsage.TotalUsage != 0 {
 		b.Fatalf("stats: %+v", st)
 	}
 }
--- a/libcontainer/cgroups/fs/fs_unsupported.go
+++ b/libcontainer/cgroups/fs/fs_unsupported.go
@ -1,3 +0,0 @@
 // +build !linux
 package fs
--- a/libcontainer/cgroups/fs/hugetlb.go
+++ b/libcontainer/cgroups/fs/hugetlb.go
@ -1,35 +1,26 @@
 // +build linux
 package fs
 import (
 	"fmt"
 	"strconv"
 	"strings"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
-type HugetlbGroup struct {
+type HugetlbGroup struct{}
 }
 func (s *HugetlbGroup) Name() string {
 	return "hugetlb"
 }
-func (s *HugetlbGroup) Apply(d *cgroupData) error {
+func (s *HugetlbGroup) Apply(path string, _ *configs.Resources, pid int) error {
-	_, err := d.join("hugetlb")
+	return apply(path, pid)
 	if err != nil && !cgroups.IsNotFound(err) {
 		return err
 	}
 	return nil
 }
-func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error {
+func (s *HugetlbGroup) Set(path string, r *configs.Resources) error {
-	for _, hugetlb := range cgroup.Resources.HugetlbLimit {
+	for _, hugetlb := range r.HugetlbLimit {
-		if err := fscommon.WriteFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
+		if err := cgroups.WriteFile(path, "hugetlb."+hugetlb.Pagesize+".limit_in_bytes", strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
 			return err
 		}
 	}
@ -37,31 +28,30 @@ func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error {
 	return nil
 }
 func (s *HugetlbGroup) Remove(d *cgroupData) error {
 	return removePath(d.path("hugetlb"))
 }
 func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
 	if !cgroups.PathExists(path) {
 		return nil
 	}
 	hugetlbStats := cgroups.HugetlbStats{}
-	for _, pageSize := range HugePageSizes {
+	for _, pageSize := range cgroups.HugePageSizes() {
-		usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".")
+		usage := "hugetlb." + pageSize + ".usage_in_bytes"
 		value, err := fscommon.GetCgroupParamUint(path, usage)
 		if err != nil {
-			return fmt.Errorf("failed to parse %s - %v", usage, err)
+			return err
 		}
 		hugetlbStats.Usage = value
-		maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".")
+		maxUsage := "hugetlb." + pageSize + ".max_usage_in_bytes"
 		value, err = fscommon.GetCgroupParamUint(path, maxUsage)
 		if err != nil {
-			return fmt.Errorf("failed to parse %s - %v", maxUsage, err)
+			return err
 		}
 		hugetlbStats.MaxUsage = value
-		failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".")
+		failcnt := "hugetlb." + pageSize + ".failcnt"
 		value, err = fscommon.GetCgroupParamUint(path, failcnt)
 		if err != nil {
-			return fmt.Errorf("failed to parse %s - %v", failcnt, err)
+			return err
 		}
 		hugetlbStats.Failcnt = value
--- a/libcontainer/cgroups/fs/hugetlb_test.go
+++ b/libcontainer/cgroups/fs/hugetlb_test.go
@ -1,5 +1,3 @@
 // +build linux
 package fs
 import (
@ -18,7 +16,7 @@ const (
 	hugetlbFailcnt          = "100\n"
 )
-var (
+const (
 	usage    = "hugetlb.%s.usage_in_bytes"
 	limit    = "hugetlb.%s.limit_in_bytes"
 	maxUsage = "hugetlb.%s.max_usage_in_bytes"
@ -26,38 +24,38 @@ var (
 )
 func TestHugetlbSetHugetlb(t *testing.T) {
-	helper := NewCgroupTestUtil("hugetlb", t)
+	path := tempDir(t, "hugetlb")
 	defer helper.cleanup()
 	const (
 		hugetlbBefore = 256
 		hugetlbAfter  = 512
 	)
-	for _, pageSize := range HugePageSizes {
+	for _, pageSize := range cgroups.HugePageSizes() {
-		helper.writeFileContents(map[string]string{
+		writeFileContents(t, path, map[string]string{
 			fmt.Sprintf(limit, pageSize): strconv.Itoa(hugetlbBefore),
 		})
 	}
-	for _, pageSize := range HugePageSizes {
+	r := &configs.Resources{}
-		helper.CgroupData.config.Resources.HugetlbLimit = []*configs.HugepageLimit{
+	for _, pageSize := range cgroups.HugePageSizes() {
 		r.HugetlbLimit = []*configs.HugepageLimit{
 			{
 				Pagesize: pageSize,
 				Limit:    hugetlbAfter,
 			},
 		}
 		hugetlb := &HugetlbGroup{}
-		if err := hugetlb.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
+		if err := hugetlb.Set(path, r); err != nil {
 			t.Fatal(err)
 		}
 	}
-	for _, pageSize := range HugePageSizes {
+	for _, pageSize := range cgroups.HugePageSizes() {
 		limit := fmt.Sprintf(limit, pageSize)
-		value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, limit)
+		value, err := fscommon.GetCgroupParamUint(path, limit)
 		if err != nil {
-			t.Fatalf("Failed to parse %s - %s", limit, err)
+			t.Fatal(err)
 		}
 		if value != hugetlbAfter {
 			t.Fatalf("Set hugetlb.limit_in_bytes failed. Expected: %v, Got: %v", hugetlbAfter, value)
@ -66,10 +64,9 @@ func TestHugetlbSetHugetlb(t *testing.T) {
 }
 func TestHugetlbStats(t *testing.T) {
-	helper := NewCgroupTestUtil("hugetlb", t)
+	path := tempDir(t, "hugetlb")
-	defer helper.cleanup()
+	for _, pageSize := range cgroups.HugePageSizes() {
-	for _, pageSize := range HugePageSizes {
+		writeFileContents(t, path, map[string]string{
 		helper.writeFileContents(map[string]string{
 			fmt.Sprintf(usage, pageSize):    hugetlbUsageContents,
 			fmt.Sprintf(maxUsage, pageSize): hugetlbMaxUsageContents,
 			fmt.Sprintf(failcnt, pageSize):  hugetlbFailcnt,
@ -78,56 +75,50 @@ func TestHugetlbStats(t *testing.T) {
 	hugetlb := &HugetlbGroup{}
 	actualStats := *cgroups.NewStats()
-	err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
+	err := hugetlb.GetStats(path, &actualStats)
 	if err != nil {
 		t.Fatal(err)
 	}
 	expectedStats := cgroups.HugetlbStats{Usage: 128, MaxUsage: 256, Failcnt: 100}
-	for _, pageSize := range HugePageSizes {
+	for _, pageSize := range cgroups.HugePageSizes() {
 		expectHugetlbStatEquals(t, expectedStats, actualStats.HugetlbStats[pageSize])
 	}
 }
 func TestHugetlbStatsNoUsageFile(t *testing.T) {
-t.Skip("Disabled unreliable test")
+	path := tempDir(t, "hugetlb")
-	helper := NewCgroupTestUtil("hugetlb", t)
+	writeFileContents(t, path, map[string]string{
 	defer helper.cleanup()
 	helper.writeFileContents(map[string]string{
 		maxUsage: hugetlbMaxUsageContents,
 	})
 	hugetlb := &HugetlbGroup{}
 	actualStats := *cgroups.NewStats()
-	err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
+	err := hugetlb.GetStats(path, &actualStats)
 	if err == nil {
 		t.Fatal("Expected failure")
 	}
 }
 func TestHugetlbStatsNoMaxUsageFile(t *testing.T) {
-t.Skip("Disabled unreliable test")
+	path := tempDir(t, "hugetlb")
-	helper := NewCgroupTestUtil("hugetlb", t)
+	for _, pageSize := range cgroups.HugePageSizes() {
-	defer helper.cleanup()
+		writeFileContents(t, path, map[string]string{
 	for _, pageSize := range HugePageSizes {
 		helper.writeFileContents(map[string]string{
 			fmt.Sprintf(usage, pageSize): hugetlbUsageContents,
 		})
 	}
 	hugetlb := &HugetlbGroup{}
 	actualStats := *cgroups.NewStats()
-	err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
+	err := hugetlb.GetStats(path, &actualStats)
 	if err == nil {
 		t.Fatal("Expected failure")
 	}
 }
 func TestHugetlbStatsBadUsageFile(t *testing.T) {
-t.Skip("Disabled unreliable test")
+	path := tempDir(t, "hugetlb")
-	helper := NewCgroupTestUtil("hugetlb", t)
+	for _, pageSize := range cgroups.HugePageSizes() {
-	defer helper.cleanup()
+		writeFileContents(t, path, map[string]string{
 	for _, pageSize := range HugePageSizes {
 		helper.writeFileContents(map[string]string{
 			fmt.Sprintf(usage, pageSize): "bad",
 			maxUsage:                     hugetlbMaxUsageContents,
 		})
@ -135,24 +126,22 @@ t.Skip("Disabled unreliable test")
 	hugetlb := &HugetlbGroup{}
 	actualStats := *cgroups.NewStats()
-	err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
+	err := hugetlb.GetStats(path, &actualStats)
 	if err == nil {
 		t.Fatal("Expected failure")
 	}
 }
 func TestHugetlbStatsBadMaxUsageFile(t *testing.T) {
-t.Skip("Disabled unreliable test")
+	path := tempDir(t, "hugetlb")
-	helper := NewCgroupTestUtil("hugetlb", t)
+	writeFileContents(t, path, map[string]string{
 	defer helper.cleanup()
 	helper.writeFileContents(map[string]string{
 		usage:    hugetlbUsageContents,
 		maxUsage: "bad",
 	})
 	hugetlb := &HugetlbGroup{}
 	actualStats := *cgroups.NewStats()
-	err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
+	err := hugetlb.GetStats(path, &actualStats)
 	if err == nil {
 		t.Fatal("Expected failure")
 	}
--- a/libcontainer/cgroups/fs/kmem.go
+++ b/libcontainer/cgroups/fs/kmem.go
@ -1,62 +0,0 @@
 // +build linux,!nokmem
 package fs
 import (
 	"errors"
 	"fmt"
 	"io/ioutil"
 	"os"
 	"path/filepath"
 	"strconv"
 	"syscall" // for Errno type only
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"golang.org/x/sys/unix"
 )
 const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
 func EnableKernelMemoryAccounting(path string) error {
 	// Ensure that kernel memory is available in this kernel build. If it
 	// isn't, we just ignore it because EnableKernelMemoryAccounting is
 	// automatically called for all memory limits.
 	if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
 		return nil
 	}
 	// We have to limit the kernel memory here as it won't be accounted at all
 	// until a limit is set on the cgroup and limit cannot be set once the
 	// cgroup has children, or if there are already tasks in the cgroup.
 	for _, i := range []int64{1, -1} {
 		if err := setKernelMemory(path, i); err != nil {
 			return err
 		}
 	}
 	return nil
 }
 func setKernelMemory(path string, kernelMemoryLimit int64) error {
 	if path == "" {
 		return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
 	}
 	if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
 		// We have specifically been asked to set a kmem limit. If the kernel
 		// doesn't support it we *must* error out.
 		return errors.New("kernel memory accounting not supported by this kernel")
 	}
 	if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
 		// Check if the error number returned by the syscall is "EBUSY"
 		// The EBUSY signal is returned on attempts to write to the
 		// memory.kmem.limit_in_bytes file if the cgroup has children or
 		// once tasks have been attached to the cgroup
 		if pathErr, ok := err.(*os.PathError); ok {
 			if errNo, ok := pathErr.Err.(syscall.Errno); ok {
 				if errNo == unix.EBUSY {
 					return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
 				}
 			}
 		}
 		return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
 	}
 	return nil
 }
--- a/libcontainer/cgroups/fs/kmem_disabled.go
+++ b/libcontainer/cgroups/fs/kmem_disabled.go
@ -1,15 +0,0 @@
 // +build linux,nokmem
 package fs
 import (
 	"errors"
 )
 func EnableKernelMemoryAccounting(path string) error {
 	return nil
 }
 func setKernelMemory(path string, kernelMemoryLimit int64) error {
 	return errors.New("kernel memory accounting disabled in this runc build")
 }
--- a/libcontainer/cgroups/fs/memory.go
+++ b/libcontainer/cgroups/fs/memory.go
@ -1,15 +1,17 @@
 // +build linux
 package fs
 import (
 	"bufio"
 	"errors"
 	"fmt"
 	"math"
 	"os"
 	"path/filepath"
 	"strconv"
 	"strings"
 	"golang.org/x/sys/unix"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
@ -18,65 +20,66 @@ import (
 const (
 	cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
 	cgroupMemoryLimit     = "memory.limit_in_bytes"
 	cgroupMemoryUsage     = "memory.usage_in_bytes"
 	cgroupMemoryMaxUsage  = "memory.max_usage_in_bytes"
 )
-type MemoryGroup struct {
+type MemoryGroup struct{}
 }
 func (s *MemoryGroup) Name() string {
 	return "memory"
 }
-func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
+func (s *MemoryGroup) Apply(path string, _ *configs.Resources, pid int) error {
-	path, err := d.path("memory")
+	return apply(path, pid)
 	if err != nil && !cgroups.IsNotFound(err) {
 		return err
 	} else if path == "" {
 		return nil
 	}
 	if memoryAssigned(d.config) {
 		if _, err := os.Stat(path); os.IsNotExist(err) {
 			if err := os.MkdirAll(path, 0755); err != nil {
 				return err
 			}
 			// Only enable kernel memory accouting when this cgroup
 			// is created by libcontainer, otherwise we might get
 			// error when people use `cgroupsPath` to join an existed
 			// cgroup whose kernel memory is not initialized.
 			if err := EnableKernelMemoryAccounting(path); err != nil {
 				return err
 			}
 		}
 	}
 	defer func() {
 		if err != nil {
 			os.RemoveAll(path)
 		}
 	}()
 	// We need to join memory cgroup after set memory limits, because
 	// kmem.limit_in_bytes can only be set when the cgroup is empty.
 	_, err = d.join("memory")
 	if err != nil && !cgroups.IsNotFound(err) {
 		return err
 	}
 	return nil
 }
-func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
+func setMemory(path string, val int64) error {
-	// If the memory update is set to -1 we should also
+	if val == 0 {
-	// set swap to -1, it means unlimited memory.
+		return nil
-	if cgroup.Resources.Memory == -1 {
+	}
 	err := cgroups.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10))
 	if !errors.Is(err, unix.EBUSY) {
 		return err
 	}
 	// EBUSY means the kernel can't set new limit as it's too low
 	// (lower than the current usage). Return more specific error.
 	usage, err := fscommon.GetCgroupParamUint(path, cgroupMemoryUsage)
 	if err != nil {
 		return err
 	}
 	max, err := fscommon.GetCgroupParamUint(path, cgroupMemoryMaxUsage)
 	if err != nil {
 		return err
 	}
 	return fmt.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max)
 }
 func setSwap(path string, val int64) error {
 	if val == 0 {
 		return nil
 	}
 	return cgroups.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10))
 }
 func setMemoryAndSwap(path string, r *configs.Resources) error {
 	// If the memory update is set to -1 and the swap is not explicitly
 	// set, we should also set swap to -1, it means unlimited memory.
 	if r.Memory == -1 && r.MemorySwap == 0 {
 		// Only set swap if it's enabled in kernel
 		if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
-			cgroup.Resources.MemorySwap = -1
+			r.MemorySwap = -1
 		}
 	}
 	// When memory and swap memory are both set, we need to handle the cases
 	// for updating container.
-	if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 {
+	if r.Memory != 0 && r.MemorySwap != 0 {
-		memoryUsage, err := getMemoryData(path, "")
+		curLimit, err := fscommon.GetCgroupParamUint(path, cgroupMemoryLimit)
 		if err != nil {
 			return err
 		}
@ -84,84 +87,61 @@ func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
 		// When update memory limit, we should adapt the write sequence
 		// for memory and swap memory, so it won't fail because the new
 		// value and the old value don't fit kernel's validation.
-		if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
+		if r.MemorySwap == -1 || curLimit < uint64(r.MemorySwap) {
-			if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
+			if err := setSwap(path, r.MemorySwap); err != nil {
 				return err
 			}
-			if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
+			if err := setMemory(path, r.Memory); err != nil {
 				return err
 			}
 		} else {
 			if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
 				return err
 			}
 			if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
 				return err
 			}
 		}
 	} else {
 		if cgroup.Resources.Memory != 0 {
 			if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
 				return err
 			}
 		}
 		if cgroup.Resources.MemorySwap != 0 {
 			if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
 				return err
 			}
 			return nil
 		}
 	}
-	return nil
+	if err := setMemory(path, r.Memory); err != nil {
-}
+		return err
-
+	}
-func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
+	if err := setSwap(path, r.MemorySwap); err != nil {
 	if err := setMemoryAndSwap(path, cgroup); err != nil {
 		return err
 	}
-	if cgroup.Resources.KernelMemory != 0 {
+	return nil
-		if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil {
+}
 func (s *MemoryGroup) Set(path string, r *configs.Resources) error {
 	if err := setMemoryAndSwap(path, r); err != nil {
 		return err
 	}
 	// ignore KernelMemory and KernelMemoryTCP
 	if r.MemoryReservation != 0 {
 		if err := cgroups.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil {
 			return err
 		}
 	}
-	if cgroup.Resources.MemoryReservation != 0 {
+	if r.OomKillDisable {
-		if err := fscommon.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
+		if err := cgroups.WriteFile(path, "memory.oom_control", "1"); err != nil {
 			return err
 		}
 	}
-
+	if r.MemorySwappiness == nil || int64(*r.MemorySwappiness) == -1 {
 	if cgroup.Resources.KernelMemoryTCP != 0 {
 		if err := fscommon.WriteFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
 			return err
 		}
 	}
 	if cgroup.Resources.OomKillDisable {
 		if err := fscommon.WriteFile(path, "memory.oom_control", "1"); err != nil {
 			return err
 		}
 	}
 	if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
 		return nil
-	} else if *cgroup.Resources.MemorySwappiness <= 100 {
+	} else if *r.MemorySwappiness <= 100 {
-		if err := fscommon.WriteFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
+		if err := cgroups.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil {
 			return err
 		}
 	} else {
-		return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *cgroup.Resources.MemorySwappiness)
+		return fmt.Errorf("invalid memory swappiness value: %d (valid range is 0-100)", *r.MemorySwappiness)
 	}
 	return nil
 }
 func (s *MemoryGroup) Remove(d *cgroupData) error {
 	return removePath(d.path("memory"))
 }
 func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
-	// Set stats from memory.stat.
+	const file = "memory.stat"
-	statsFile, err := os.Open(filepath.Join(path, "memory.stat"))
+	statsFile, err := cgroups.OpenFile(path, file, os.O_RDONLY)
 	if err != nil {
 		if os.IsNotExist(err) {
 			return nil
@ -172,9 +152,9 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
 	sc := bufio.NewScanner(statsFile)
 	for sc.Scan() {
-		t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
+		t, v, err := fscommon.ParseKeyValue(sc.Text())
 		if err != nil {
-			return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err)
+			return &parseError{Path: path, File: file, Err: err}
 		}
 		stats.MemoryStats.Stats[t] = v
 	}
@ -201,25 +181,21 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
 	}
 	stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
-	useHierarchy := strings.Join([]string{"memory", "use_hierarchy"}, ".")
+	value, err := fscommon.GetCgroupParamUint(path, "memory.use_hierarchy")
 	value, err := fscommon.GetCgroupParamUint(path, useHierarchy)
 	if err != nil {
 		return err
 	}
 	if value == 1 {
 		stats.MemoryStats.UseHierarchy = true
 	}
 	return nil
 }
-func memoryAssigned(cgroup *configs.Cgroup) bool {
+	pagesByNUMA, err := getPageUsageByNUMA(path)
-	return cgroup.Resources.Memory != 0 ||
+	if err != nil {
-		cgroup.Resources.MemoryReservation != 0 ||
+		return err
-		cgroup.Resources.MemorySwap > 0 ||
+	}
-		cgroup.Resources.KernelMemory > 0 ||
+	stats.MemoryStats.PageUsageByNUMA = pagesByNUMA
-		cgroup.Resources.KernelMemoryTCP > 0 ||
+
-		cgroup.Resources.OomKillDisable ||
+	return nil
 		(cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1)
 }
 func getMemoryData(path, name string) (cgroups.MemoryData, error) {
@ -227,45 +203,146 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
 	moduleName := "memory"
 	if name != "" {
-		moduleName = strings.Join([]string{"memory", name}, ".")
+		moduleName = "memory." + name
 	}
-	usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".")
+	var (
-	maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".")
+		usage    = moduleName + ".usage_in_bytes"
-	failcnt := strings.Join([]string{moduleName, "failcnt"}, ".")
+		maxUsage = moduleName + ".max_usage_in_bytes"
-	limit := strings.Join([]string{moduleName, "limit_in_bytes"}, ".")
+		failcnt  = moduleName + ".failcnt"
 		limit    = moduleName + ".limit_in_bytes"
 	)
 	value, err := fscommon.GetCgroupParamUint(path, usage)
 	if err != nil {
-		if moduleName != "memory" && os.IsNotExist(err) {
+		if name != "" && os.IsNotExist(err) {
 			// Ignore ENOENT as swap and kmem controllers
 			// are optional in the kernel.
 			return cgroups.MemoryData{}, nil
 		}
-		return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err)
+		return cgroups.MemoryData{}, err
 	}
 	memoryData.Usage = value
 	value, err = fscommon.GetCgroupParamUint(path, maxUsage)
 	if err != nil {
-		if moduleName != "memory" && os.IsNotExist(err) {
+		return cgroups.MemoryData{}, err
 			return cgroups.MemoryData{}, nil
 		}
 		return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err)
 	}
 	memoryData.MaxUsage = value
 	value, err = fscommon.GetCgroupParamUint(path, failcnt)
 	if err != nil {
-		if moduleName != "memory" && os.IsNotExist(err) {
+		return cgroups.MemoryData{}, err
 			return cgroups.MemoryData{}, nil
 		}
 		return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err)
 	}
 	memoryData.Failcnt = value
 	value, err = fscommon.GetCgroupParamUint(path, limit)
 	if err != nil {
-		if moduleName != "memory" && os.IsNotExist(err) {
+		return cgroups.MemoryData{}, err
 			return cgroups.MemoryData{}, nil
 		}
 		return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err)
 	}
 	memoryData.Limit = value
 	return memoryData, nil
 }
 func getPageUsageByNUMA(path string) (cgroups.PageUsageByNUMA, error) {
 	const (
 		maxColumns = math.MaxUint8 + 1
 		file       = "memory.numa_stat"
 	)
 	stats := cgroups.PageUsageByNUMA{}
 	fd, err := cgroups.OpenFile(path, file, os.O_RDONLY)
 	if os.IsNotExist(err) {
 		return stats, nil
 	} else if err != nil {
 		return stats, err
 	}
 	defer fd.Close()
 	// File format is documented in linux/Documentation/cgroup-v1/memory.txt
 	// and it looks like this:
 	//
 	// total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
 	// file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
 	// anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
 	// unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
 	// hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
 	scanner := bufio.NewScanner(fd)
 	for scanner.Scan() {
 		var field *cgroups.PageStats
 		line := scanner.Text()
 		columns := strings.SplitN(line, " ", maxColumns)
 		for i, column := range columns {
 			byNode := strings.SplitN(column, "=", 2)
 			// Some custom kernels have non-standard fields, like
 			//   numa_locality 0 0 0 0 0 0 0 0 0 0
 			//   numa_exectime 0
 			if len(byNode) < 2 {
 				if i == 0 {
 					// Ignore/skip those.
 					break
 				} else {
 					// The first column was already validated,
 					// so be strict to the rest.
 					return stats, malformedLine(path, file, line)
 				}
 			}
 			key, val := byNode[0], byNode[1]
 			if i == 0 { // First column: key is name, val is total.
 				field = getNUMAField(&stats, key)
 				if field == nil { // unknown field (new kernel?)
 					break
 				}
 				field.Total, err = strconv.ParseUint(val, 0, 64)
 				if err != nil {
 					return stats, &parseError{Path: path, File: file, Err: err}
 				}
 				field.Nodes = map[uint8]uint64{}
 			} else { // Subsequent columns: key is N<id>, val is usage.
 				if len(key) < 2 || key[0] != 'N' {
 					// This is definitely an error.
 					return stats, malformedLine(path, file, line)
 				}
 				n, err := strconv.ParseUint(key[1:], 10, 8)
 				if err != nil {
 					return stats, &parseError{Path: path, File: file, Err: err}
 				}
 				usage, err := strconv.ParseUint(val, 10, 64)
 				if err != nil {
 					return stats, &parseError{Path: path, File: file, Err: err}
 				}
 				field.Nodes[uint8(n)] = usage
 			}
 		}
 	}
 	if err := scanner.Err(); err != nil {
 		return cgroups.PageUsageByNUMA{}, &parseError{Path: path, File: file, Err: err}
 	}
 	return stats, nil
 }
 func getNUMAField(stats *cgroups.PageUsageByNUMA, name string) *cgroups.PageStats {
 	switch name {
 	case "total":
 		return &stats.Total
 	case "file":
 		return &stats.File
 	case "anon":
 		return &stats.Anon
 	case "unevictable":
 		return &stats.Unevictable
 	case "hierarchical_total":
 		return &stats.Hierarchical.Total
 	case "hierarchical_file":
 		return &stats.Hierarchical.File
 	case "hierarchical_anon":
 		return &stats.Hierarchical.Anon
 	case "hierarchical_unevictable":
 		return &stats.Hierarchical.Unevictable
 	}
 	return nil
 }
--- a/libcontainer/cgroups/fs/memory_test.go
+++ b/libcontainer/cgroups/fs/memory_test.go
@ -1,5 +1,3 @@
 // +build linux
 package fs
 import (
@ -8,6 +6,7 @@ import (
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
 const (
@ -18,11 +17,29 @@ rss 1024`
 	memoryFailcnt              = "100\n"
 	memoryLimitContents        = "8192\n"
 	memoryUseHierarchyContents = "1\n"
 	memoryNUMAStatContents     = `total=44611 N0=32631 N1=7501 N2=1982 N3=2497
 file=44428 N0=32614 N1=7335 N2=1982 N3=2497
 anon=183 N0=17 N1=166 N2=0 N3=0
 unevictable=0 N0=0 N1=0 N2=0 N3=0
 hierarchical_total=768133 N0=509113 N1=138887 N2=20464 N3=99669
 hierarchical_file=722017 N0=496516 N1=119997 N2=20181 N3=85323
 hierarchical_anon=46096 N0=12597 N1=18890 N2=283 N3=14326
 hierarchical_unevictable=20 N0=0 N1=0 N2=0 N3=20
 `
 	memoryNUMAStatNoHierarchyContents = `total=44611 N0=32631 N1=7501 N2=1982 N3=2497
 file=44428 N0=32614 N1=7335 N2=1982 N3=2497
 anon=183 N0=17 N1=166 N2=0 N3=0
 unevictable=0 N0=0 N1=0 N2=0 N3=0
 `
 	// Some custom kernels has extra fields that should be ignored
 	memoryNUMAStatExtraContents = `numa_locality 0 0 0 0 0 0 0 0 0 0
 numa_exectime 0
 whatever=100 N0=0
 `
 )
 func TestMemorySetMemory(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
+	path := tempDir(t, "memory")
 	defer helper.cleanup()
 	const (
 		memoryBefore      = 314572800 // 300M
@ -31,29 +48,31 @@ func TestMemorySetMemory(t *testing.T) {
 		reservationAfter  = 314572800 // 300M
 	)
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"memory.limit_in_bytes":      strconv.Itoa(memoryBefore),
 		"memory.soft_limit_in_bytes": strconv.Itoa(reservationBefore),
 	})
-	helper.CgroupData.config.Resources.Memory = memoryAfter
+	r := &configs.Resources{
-	helper.CgroupData.config.Resources.MemoryReservation = reservationAfter
+		Memory:            memoryAfter,
 		MemoryReservation: reservationAfter,
 	}
 	memory := &MemoryGroup{}
-	if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
+	if err := memory.Set(path, r); err != nil {
 		t.Fatal(err)
 	}
-	value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
+	value, err := fscommon.GetCgroupParamUint(path, "memory.limit_in_bytes")
 	if err != nil {
-		t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
+		t.Fatal(err)
 	}
 	if value != memoryAfter {
 		t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
 	}
-	value, err = fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.soft_limit_in_bytes")
+	value, err = fscommon.GetCgroupParamUint(path, "memory.soft_limit_in_bytes")
 	if err != nil {
-		t.Fatalf("Failed to parse memory.soft_limit_in_bytes - %s", err)
+		t.Fatal(err)
 	}
 	if value != reservationAfter {
 		t.Fatal("Got the wrong value, set memory.soft_limit_in_bytes failed.")
@ -61,27 +80,28 @@ func TestMemorySetMemory(t *testing.T) {
 }
 func TestMemorySetMemoryswap(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
+	path := tempDir(t, "memory")
 	defer helper.cleanup()
 	const (
 		memoryswapBefore = 314572800 // 300M
 		memoryswapAfter  = 524288000 // 500M
 	)
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
 	})
-	helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
+	r := &configs.Resources{
 		MemorySwap: memoryswapAfter,
 	}
 	memory := &MemoryGroup{}
-	if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
+	if err := memory.Set(path, r); err != nil {
 		t.Fatal(err)
 	}
-	value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
+	value, err := fscommon.GetCgroupParamUint(path, "memory.memsw.limit_in_bytes")
 	if err != nil {
-		t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
+		t.Fatal(err)
 	}
 	if value != memoryswapAfter {
 		t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
@ -89,8 +109,7 @@ func TestMemorySetMemoryswap(t *testing.T) {
 }
 func TestMemorySetMemoryLargerThanSwap(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
+	path := tempDir(t, "memory")
 	defer helper.cleanup()
 	const (
 		memoryBefore     = 314572800 // 300M
@ -99,7 +118,7 @@ func TestMemorySetMemoryLargerThanSwap(t *testing.T) {
 		memoryswapAfter  = 838860800 // 800M
 	)
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"memory.limit_in_bytes":       strconv.Itoa(memoryBefore),
 		"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
 		// Set will call getMemoryData when memory and swap memory are
@ -109,23 +128,26 @@ func TestMemorySetMemoryLargerThanSwap(t *testing.T) {
 		"memory.failcnt":            "0",
 	})
-	helper.CgroupData.config.Resources.Memory = memoryAfter
+	r := &configs.Resources{
-	helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
+		Memory:     memoryAfter,
 		MemorySwap: memoryswapAfter,
 	}
 	memory := &MemoryGroup{}
-	if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
+	if err := memory.Set(path, r); err != nil {
 		t.Fatal(err)
 	}
-	value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
+	value, err := fscommon.GetCgroupParamUint(path, "memory.limit_in_bytes")
 	if err != nil {
-		t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
+		t.Fatal(err)
 	}
 	if value != memoryAfter {
 		t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
 	}
-	value, err = fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
+
 	value, err = fscommon.GetCgroupParamUint(path, "memory.memsw.limit_in_bytes")
 	if err != nil {
-		t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
+		t.Fatal(err)
 	}
 	if value != memoryswapAfter {
 		t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
@ -133,8 +155,7 @@ func TestMemorySetMemoryLargerThanSwap(t *testing.T) {
 }
 func TestMemorySetSwapSmallerThanMemory(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
+	path := tempDir(t, "memory")
 	defer helper.cleanup()
 	const (
 		memoryBefore     = 629145600 // 600M
@ -143,115 +164,58 @@ func TestMemorySetSwapSmallerThanMemory(t *testing.T) {
 		memoryswapAfter  = 524288000 // 500M
 	)
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"memory.limit_in_bytes":       strconv.Itoa(memoryBefore),
 		"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
 		// Set will call getMemoryData when memory and swap memory are
 		// both set, fake these fields so we don't get error.
 		"memory.usage_in_bytes":     "0",
 		"memory.max_usage_in_bytes": "0",
 		"memory.failcnt":            "0",
 	})
-	helper.CgroupData.config.Resources.Memory = memoryAfter
+	r := &configs.Resources{
-	helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
+		Memory:     memoryAfter,
 		MemorySwap: memoryswapAfter,
 	}
 	memory := &MemoryGroup{}
-	if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
+	if err := memory.Set(path, r); err != nil {
 		t.Fatal(err)
 	}
-	value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
+	value, err := fscommon.GetCgroupParamUint(path, "memory.limit_in_bytes")
 	if err != nil {
-		t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
+		t.Fatal(err)
 	}
 	if value != memoryAfter {
-		t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
+		t.Fatalf("Got the wrong value (%d != %d), set memory.limit_in_bytes failed", value, memoryAfter)
 	}
-	value, err = fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
+
 	value, err = fscommon.GetCgroupParamUint(path, "memory.memsw.limit_in_bytes")
 	if err != nil {
-		t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
+		t.Fatal(err)
 	}
 	if value != memoryswapAfter {
-		t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
+		t.Fatalf("Got the wrong value (%d != %d), set memory.memsw.limit_in_bytes failed", value, memoryswapAfter)
 	}
 }
 func TestMemorySetKernelMemory(t *testing.T) {
 	helper := NewCgroupTestUtil("memory", t)
 	defer helper.cleanup()
 	const (
 		kernelMemoryBefore = 314572800 // 300M
 		kernelMemoryAfter  = 524288000 // 500M
 	)
 	helper.writeFileContents(map[string]string{
 		"memory.kmem.limit_in_bytes": strconv.Itoa(kernelMemoryBefore),
 	})
 	helper.CgroupData.config.Resources.KernelMemory = kernelMemoryAfter
 	memory := &MemoryGroup{}
 	if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
 		t.Fatal(err)
 	}
 	value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.kmem.limit_in_bytes")
 	if err != nil {
 		t.Fatalf("Failed to parse memory.kmem.limit_in_bytes - %s", err)
 	}
 	if value != kernelMemoryAfter {
 		t.Fatal("Got the wrong value, set memory.kmem.limit_in_bytes failed.")
 	}
 }
 func TestMemorySetKernelMemoryTCP(t *testing.T) {
 	helper := NewCgroupTestUtil("memory", t)
 	defer helper.cleanup()
 	const (
 		kernelMemoryTCPBefore = 314572800 // 300M
 		kernelMemoryTCPAfter  = 524288000 // 500M
 	)
 	helper.writeFileContents(map[string]string{
 		"memory.kmem.tcp.limit_in_bytes": strconv.Itoa(kernelMemoryTCPBefore),
 	})
 	helper.CgroupData.config.Resources.KernelMemoryTCP = kernelMemoryTCPAfter
 	memory := &MemoryGroup{}
 	if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
 		t.Fatal(err)
 	}
 	value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.kmem.tcp.limit_in_bytes")
 	if err != nil {
 		t.Fatalf("Failed to parse memory.kmem.tcp.limit_in_bytes - %s", err)
 	}
 	if value != kernelMemoryTCPAfter {
 		t.Fatal("Got the wrong value, set memory.kmem.tcp.limit_in_bytes failed.")
 	}
 }
 func TestMemorySetMemorySwappinessDefault(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
+	path := tempDir(t, "memory")
 	defer helper.cleanup()
-	swappinessBefore := 60 //default is 60
+	swappinessBefore := 60 // default is 60
 	swappinessAfter := uint64(0)
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"memory.swappiness": strconv.Itoa(swappinessBefore),
 	})
-	helper.CgroupData.config.Resources.MemorySwappiness = &swappinessAfter
+	r := &configs.Resources{
 		MemorySwappiness: &swappinessAfter,
 	}
 	memory := &MemoryGroup{}
-	if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
+	if err := memory.Set(path, r); err != nil {
 		t.Fatal(err)
 	}
-	value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.swappiness")
+	value, err := fscommon.GetCgroupParamUint(path, "memory.swappiness")
 	if err != nil {
-		t.Fatalf("Failed to parse memory.swappiness - %s", err)
+		t.Fatal(err)
 	}
 	if value != swappinessAfter {
 		t.Fatalf("Got the wrong value (%d), set memory.swappiness = %d failed.", value, swappinessAfter)
@ -259,9 +223,8 @@ func TestMemorySetMemorySwappinessDefault(t *testing.T) {
 }
 func TestMemoryStats(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
+	path := tempDir(t, "memory")
-	defer helper.cleanup()
+	writeFileContents(t, path, map[string]string{
 	helper.writeFileContents(map[string]string{
 		"memory.stat":                     memoryStatContents,
 		"memory.usage_in_bytes":           memoryUsageContents,
 		"memory.limit_in_bytes":           memoryLimitContents,
@ -276,22 +239,43 @@ func TestMemoryStats(t *testing.T) {
 		"memory.kmem.failcnt":             memoryFailcnt,
 		"memory.kmem.limit_in_bytes":      memoryLimitContents,
 		"memory.use_hierarchy":            memoryUseHierarchyContents,
 		"memory.numa_stat":                memoryNUMAStatContents + memoryNUMAStatExtraContents,
 	})
 	memory := &MemoryGroup{}
 	actualStats := *cgroups.NewStats()
-	err := memory.GetStats(helper.CgroupPath, &actualStats)
+	err := memory.GetStats(path, &actualStats)
 	if err != nil {
 		t.Fatal(err)
 	}
-	expectedStats := cgroups.MemoryStats{Cache: 512, Usage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, SwapUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, KernelUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, Stats: map[string]uint64{"cache": 512, "rss": 1024}, UseHierarchy: true}
+	expectedStats := cgroups.MemoryStats{
 		Cache:        512,
 		Usage:        cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192},
 		SwapUsage:    cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192},
 		KernelUsage:  cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192},
 		Stats:        map[string]uint64{"cache": 512, "rss": 1024},
 		UseHierarchy: true,
 		PageUsageByNUMA: cgroups.PageUsageByNUMA{
 			PageUsageByNUMAInner: cgroups.PageUsageByNUMAInner{
 				Total:       cgroups.PageStats{Total: 44611, Nodes: map[uint8]uint64{0: 32631, 1: 7501, 2: 1982, 3: 2497}},
 				File:        cgroups.PageStats{Total: 44428, Nodes: map[uint8]uint64{0: 32614, 1: 7335, 2: 1982, 3: 2497}},
 				Anon:        cgroups.PageStats{Total: 183, Nodes: map[uint8]uint64{0: 17, 1: 166, 2: 0, 3: 0}},
 				Unevictable: cgroups.PageStats{Total: 0, Nodes: map[uint8]uint64{0: 0, 1: 0, 2: 0, 3: 0}},
 			},
 			Hierarchical: cgroups.PageUsageByNUMAInner{
 				Total:       cgroups.PageStats{Total: 768133, Nodes: map[uint8]uint64{0: 509113, 1: 138887, 2: 20464, 3: 99669}},
 				File:        cgroups.PageStats{Total: 722017, Nodes: map[uint8]uint64{0: 496516, 1: 119997, 2: 20181, 3: 85323}},
 				Anon:        cgroups.PageStats{Total: 46096, Nodes: map[uint8]uint64{0: 12597, 1: 18890, 2: 283, 3: 14326}},
 				Unevictable: cgroups.PageStats{Total: 20, Nodes: map[uint8]uint64{0: 0, 1: 0, 2: 0, 3: 20}},
 			},
 		},
 	}
 	expectMemoryStatEquals(t, expectedStats, actualStats.MemoryStats)
 }
 func TestMemoryStatsNoStatFile(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
+	path := tempDir(t, "memory")
-	defer helper.cleanup()
+	writeFileContents(t, path, map[string]string{
 	helper.writeFileContents(map[string]string{
 		"memory.usage_in_bytes":     memoryUsageContents,
 		"memory.max_usage_in_bytes": memoryMaxUsageContents,
 		"memory.limit_in_bytes":     memoryLimitContents,
@ -299,16 +283,15 @@ func TestMemoryStatsNoStatFile(t *testing.T) {
 	memory := &MemoryGroup{}
 	actualStats := *cgroups.NewStats()
-	err := memory.GetStats(helper.CgroupPath, &actualStats)
+	err := memory.GetStats(path, &actualStats)
 	if err != nil {
 		t.Fatal(err)
 	}
 }
 func TestMemoryStatsNoUsageFile(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
+	path := tempDir(t, "memory")
-	defer helper.cleanup()
+	writeFileContents(t, path, map[string]string{
 	helper.writeFileContents(map[string]string{
 		"memory.stat":               memoryStatContents,
 		"memory.max_usage_in_bytes": memoryMaxUsageContents,
 		"memory.limit_in_bytes":     memoryLimitContents,
@ -316,16 +299,15 @@ func TestMemoryStatsNoUsageFile(t *testing.T) {
 	memory := &MemoryGroup{}
 	actualStats := *cgroups.NewStats()
-	err := memory.GetStats(helper.CgroupPath, &actualStats)
+	err := memory.GetStats(path, &actualStats)
 	if err == nil {
 		t.Fatal("Expected failure")
 	}
 }
 func TestMemoryStatsNoMaxUsageFile(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
+	path := tempDir(t, "memory")
-	defer helper.cleanup()
+	writeFileContents(t, path, map[string]string{
 	helper.writeFileContents(map[string]string{
 		"memory.stat":           memoryStatContents,
 		"memory.usage_in_bytes": memoryUsageContents,
 		"memory.limit_in_bytes": memoryLimitContents,
@ -333,16 +315,15 @@ func TestMemoryStatsNoMaxUsageFile(t *testing.T) {
 	memory := &MemoryGroup{}
 	actualStats := *cgroups.NewStats()
-	err := memory.GetStats(helper.CgroupPath, &actualStats)
+	err := memory.GetStats(path, &actualStats)
 	if err == nil {
 		t.Fatal("Expected failure")
 	}
 }
 func TestMemoryStatsNoLimitInBytesFile(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
+	path := tempDir(t, "memory")
-	defer helper.cleanup()
+	writeFileContents(t, path, map[string]string{
 	helper.writeFileContents(map[string]string{
 		"memory.stat":               memoryStatContents,
 		"memory.usage_in_bytes":     memoryUsageContents,
 		"memory.max_usage_in_bytes": memoryMaxUsageContents,
@ -350,16 +331,15 @@ func TestMemoryStatsNoLimitInBytesFile(t *testing.T) {
 	memory := &MemoryGroup{}
 	actualStats := *cgroups.NewStats()
-	err := memory.GetStats(helper.CgroupPath, &actualStats)
+	err := memory.GetStats(path, &actualStats)
 	if err == nil {
 		t.Fatal("Expected failure")
 	}
 }
 func TestMemoryStatsBadStatFile(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
+	path := tempDir(t, "memory")
-	defer helper.cleanup()
+	writeFileContents(t, path, map[string]string{
 	helper.writeFileContents(map[string]string{
 		"memory.stat":               "rss rss",
 		"memory.usage_in_bytes":     memoryUsageContents,
 		"memory.max_usage_in_bytes": memoryMaxUsageContents,
@ -368,16 +348,15 @@ func TestMemoryStatsBadStatFile(t *testing.T) {
 	memory := &MemoryGroup{}
 	actualStats := *cgroups.NewStats()
-	err := memory.GetStats(helper.CgroupPath, &actualStats)
+	err := memory.GetStats(path, &actualStats)
 	if err == nil {
 		t.Fatal("Expected failure")
 	}
 }
 func TestMemoryStatsBadUsageFile(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
+	path := tempDir(t, "memory")
-	defer helper.cleanup()
+	writeFileContents(t, path, map[string]string{
 	helper.writeFileContents(map[string]string{
 		"memory.stat":               memoryStatContents,
 		"memory.usage_in_bytes":     "bad",
 		"memory.max_usage_in_bytes": memoryMaxUsageContents,
@ -386,16 +365,15 @@ func TestMemoryStatsBadUsageFile(t *testing.T) {
 	memory := &MemoryGroup{}
 	actualStats := *cgroups.NewStats()
-	err := memory.GetStats(helper.CgroupPath, &actualStats)
+	err := memory.GetStats(path, &actualStats)
 	if err == nil {
 		t.Fatal("Expected failure")
 	}
 }
 func TestMemoryStatsBadMaxUsageFile(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
+	path := tempDir(t, "memory")
-	defer helper.cleanup()
+	writeFileContents(t, path, map[string]string{
 	helper.writeFileContents(map[string]string{
 		"memory.stat":               memoryStatContents,
 		"memory.usage_in_bytes":     memoryUsageContents,
 		"memory.max_usage_in_bytes": "bad",
@ -404,16 +382,15 @@ func TestMemoryStatsBadMaxUsageFile(t *testing.T) {
 	memory := &MemoryGroup{}
 	actualStats := *cgroups.NewStats()
-	err := memory.GetStats(helper.CgroupPath, &actualStats)
+	err := memory.GetStats(path, &actualStats)
 	if err == nil {
 		t.Fatal("Expected failure")
 	}
 }
 func TestMemoryStatsBadLimitInBytesFile(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
+	path := tempDir(t, "memory")
-	defer helper.cleanup()
+	writeFileContents(t, path, map[string]string{
 	helper.writeFileContents(map[string]string{
 		"memory.stat":               memoryStatContents,
 		"memory.usage_in_bytes":     memoryUsageContents,
 		"memory.max_usage_in_bytes": memoryMaxUsageContents,
@ -422,35 +399,108 @@ func TestMemoryStatsBadLimitInBytesFile(t *testing.T) {
 	memory := &MemoryGroup{}
 	actualStats := *cgroups.NewStats()
-	err := memory.GetStats(helper.CgroupPath, &actualStats)
+	err := memory.GetStats(path, &actualStats)
 	if err == nil {
 		t.Fatal("Expected failure")
 	}
 }
 func TestMemorySetOomControl(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
+	path := tempDir(t, "memory")
 	defer helper.cleanup()
 	const (
 		oomKillDisable = 1 // disable oom killer, default is 0
 	)
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"memory.oom_control": strconv.Itoa(oomKillDisable),
 	})
 	memory := &MemoryGroup{}
-	if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
+	r := &configs.Resources{}
 	if err := memory.Set(path, r); err != nil {
 		t.Fatal(err)
 	}
-	value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.oom_control")
+	value, err := fscommon.GetCgroupParamUint(path, "memory.oom_control")
 	if err != nil {
-		t.Fatalf("Failed to parse memory.oom_control - %s", err)
+		t.Fatal(err)
 	}
 	if value != oomKillDisable {
 		t.Fatalf("Got the wrong value, set memory.oom_control failed.")
 	}
 }
 func TestNoHierarchicalNumaStat(t *testing.T) {
 	path := tempDir(t, "memory")
 	writeFileContents(t, path, map[string]string{
 		"memory.numa_stat": memoryNUMAStatNoHierarchyContents + memoryNUMAStatExtraContents,
 	})
 	actualStats, err := getPageUsageByNUMA(path)
 	if err != nil {
 		t.Fatal(err)
 	}
 	pageUsageByNUMA := cgroups.PageUsageByNUMA{
 		PageUsageByNUMAInner: cgroups.PageUsageByNUMAInner{
 			Total:       cgroups.PageStats{Total: 44611, Nodes: map[uint8]uint64{0: 32631, 1: 7501, 2: 1982, 3: 2497}},
 			File:        cgroups.PageStats{Total: 44428, Nodes: map[uint8]uint64{0: 32614, 1: 7335, 2: 1982, 3: 2497}},
 			Anon:        cgroups.PageStats{Total: 183, Nodes: map[uint8]uint64{0: 17, 1: 166, 2: 0, 3: 0}},
 			Unevictable: cgroups.PageStats{Total: 0, Nodes: map[uint8]uint64{0: 0, 1: 0, 2: 0, 3: 0}},
 		},
 		Hierarchical: cgroups.PageUsageByNUMAInner{},
 	}
 	expectPageUsageByNUMAEquals(t, pageUsageByNUMA, actualStats)
 }
 func TestBadNumaStat(t *testing.T) {
 	memoryNUMAStatBadContents := []struct {
 		desc, contents string
 	}{
 		{
 			desc: "Nx where x is not a number",
 			contents: `total=44611 N0=44611,
 file=44428 Nx=0
 `,
 		}, {
 			desc:     "Nx where x > 255",
 			contents: `total=44611 N333=444`,
 		}, {
 			desc:     "Nx argument missing",
 			contents: `total=44611 N0=123 N1=`,
 		}, {
 			desc:     "Nx argument is not a number",
 			contents: `total=44611 N0=123 N1=a`,
 		}, {
 			desc:     "Missing = after Nx",
 			contents: `total=44611 N0=123 N1`,
 		}, {
 			desc: "No Nx at non-first position",
 			contents: `total=44611 N0=32631
 file=44428 N0=32614
 anon=183 N0=12 badone
 `,
 		},
 	}
 	path := tempDir(t, "memory")
 	for _, c := range memoryNUMAStatBadContents {
 		writeFileContents(t, path, map[string]string{
 			"memory.numa_stat": c.contents,
 		})
 		_, err := getPageUsageByNUMA(path)
 		if err == nil {
 			t.Errorf("case %q: expected error, got nil", c.desc)
 		}
 	}
 }
 func TestWithoutNumaStat(t *testing.T) {
 	path := tempDir(t, "memory")
 	actualStats, err := getPageUsageByNUMA(path)
 	if err != nil {
 		t.Fatal(err)
 	}
 	expectPageUsageByNUMAEquals(t, cgroups.PageUsageByNUMA{}, actualStats)
 }
--- a/libcontainer/cgroups/fs/name.go
+++ b/libcontainer/cgroups/fs/name.go
@ -1,5 +1,3 @@
 // +build linux
 package fs
 import (
@ -16,22 +14,15 @@ func (s *NameGroup) Name() string {
 	return s.GroupName
 }
-func (s *NameGroup) Apply(d *cgroupData) error {
+func (s *NameGroup) Apply(path string, _ *configs.Resources, pid int) error {
 	if s.Join {
-		// ignore errors if the named cgroup does not exist
+		// Ignore errors if the named cgroup does not exist.
-		d.join(s.GroupName)
+		_ = apply(path, pid)
 	}
 	return nil
 }
-func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error {
+func (s *NameGroup) Set(_ string, _ *configs.Resources) error {
 	return nil
 }
 func (s *NameGroup) Remove(d *cgroupData) error {
 	if s.Join {
 		removePath(d.path(s.GroupName))
 	}
 	return nil
 }
--- a/libcontainer/cgroups/fs/net_cls.go
+++ b/libcontainer/cgroups/fs/net_cls.go
@ -1,33 +1,25 @@
 // +build linux
 package fs
 import (
 	"strconv"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
-type NetClsGroup struct {
+type NetClsGroup struct{}
 }
 func (s *NetClsGroup) Name() string {
 	return "net_cls"
 }
-func (s *NetClsGroup) Apply(d *cgroupData) error {
+func (s *NetClsGroup) Apply(path string, _ *configs.Resources, pid int) error {
-	_, err := d.join("net_cls")
+	return apply(path, pid)
 	if err != nil && !cgroups.IsNotFound(err) {
 		return err
 	}
 	return nil
 }
-func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
+func (s *NetClsGroup) Set(path string, r *configs.Resources) error {
-	if cgroup.Resources.NetClsClassid != 0 {
+	if r.NetClsClassid != 0 {
-		if err := fscommon.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil {
+		if err := cgroups.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(r.NetClsClassid), 10)); err != nil {
 			return err
 		}
 	}
@ -35,10 +27,6 @@ func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
 	return nil
 }
 func (s *NetClsGroup) Remove(d *cgroupData) error {
 	return removePath(d.path("net_cls"))
 }
 func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error {
 	return nil
 }
--- a/libcontainer/cgroups/fs/net_cls_test.go
+++ b/libcontainer/cgroups/fs/net_cls_test.go
@ -1,5 +1,3 @@
 // +build linux
 package fs
 import (
@ -7,6 +5,7 @@ import (
 	"testing"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
 const (
@ -15,25 +14,26 @@ const (
 )
 func TestNetClsSetClassid(t *testing.T) {
-	helper := NewCgroupTestUtil("net_cls", t)
+	path := tempDir(t, "net_cls")
 	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"net_cls.classid": strconv.FormatUint(classidBefore, 10),
 	})
-	helper.CgroupData.config.Resources.NetClsClassid = classidAfter
+	r := &configs.Resources{
 		NetClsClassid: classidAfter,
 	}
 	netcls := &NetClsGroup{}
-	if err := netcls.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
+	if err := netcls.Set(path, r); err != nil {
 		t.Fatal(err)
 	}
 	// As we are in mock environment, we can't get correct value of classid from
 	// net_cls.classid.
 	// So. we just judge if we successfully write classid into file
-	value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "net_cls.classid")
+	value, err := fscommon.GetCgroupParamUint(path, "net_cls.classid")
 	if err != nil {
-		t.Fatalf("Failed to parse net_cls.classid - %s", err)
+		t.Fatal(err)
 	}
 	if value != classidAfter {
 		t.Fatal("Got the wrong value, set net_cls.classid failed.")
--- a/libcontainer/cgroups/fs/net_prio.go
+++ b/libcontainer/cgroups/fs/net_prio.go
@ -1,31 +1,23 @@
 // +build linux
 package fs
 import (
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
-type NetPrioGroup struct {
+type NetPrioGroup struct{}
 }
 func (s *NetPrioGroup) Name() string {
 	return "net_prio"
 }
-func (s *NetPrioGroup) Apply(d *cgroupData) error {
+func (s *NetPrioGroup) Apply(path string, _ *configs.Resources, pid int) error {
-	_, err := d.join("net_prio")
+	return apply(path, pid)
 	if err != nil && !cgroups.IsNotFound(err) {
 		return err
 	}
 	return nil
 }
-func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error {
+func (s *NetPrioGroup) Set(path string, r *configs.Resources) error {
-	for _, prioMap := range cgroup.Resources.NetPrioIfpriomap {
+	for _, prioMap := range r.NetPrioIfpriomap {
-		if err := fscommon.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
+		if err := cgroups.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
 			return err
 		}
 	}
@ -33,10 +25,6 @@ func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error {
 	return nil
 }
 func (s *NetPrioGroup) Remove(d *cgroupData) error {
 	return removePath(d.path("net_prio"))
 }
 func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error {
 	return nil
 }
--- a/libcontainer/cgroups/fs/net_prio_test.go
+++ b/libcontainer/cgroups/fs/net_prio_test.go
@ -1,5 +1,3 @@
 // +build linux
 package fs
 import (
@ -10,28 +8,27 @@ import (
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
-var (
+var prioMap = []*configs.IfPrioMap{
-	prioMap = []*configs.IfPrioMap{
+	{
-		{
+		Interface: "test",
-			Interface: "test",
+		Priority:  5,
-			Priority:  5,
+	},
-		},
+}
 	}
 )
 func TestNetPrioSetIfPrio(t *testing.T) {
-	helper := NewCgroupTestUtil("net_prio", t)
+	path := tempDir(t, "net_prio")
 	defer helper.cleanup()
-	helper.CgroupData.config.Resources.NetPrioIfpriomap = prioMap
+	r := &configs.Resources{
 		NetPrioIfpriomap: prioMap,
 	}
 	netPrio := &NetPrioGroup{}
-	if err := netPrio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
+	if err := netPrio.Set(path, r); err != nil {
 		t.Fatal(err)
 	}
-	value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "net_prio.ifpriomap")
+	value, err := fscommon.GetCgroupParamString(path, "net_prio.ifpriomap")
 	if err != nil {
-		t.Fatalf("Failed to parse net_prio.ifpriomap - %s", err)
+		t.Fatal(err)
 	}
 	if !strings.Contains(value, "test 5") {
 		t.Fatal("Got the wrong value, set net_prio.ifpriomap failed.")
--- a/libcontainer/cgroups/fs/paths.go
+++ b/libcontainer/cgroups/fs/paths.go
@ -0,0 +1,186 @@
 package fs
 import (
 	"errors"
 	"os"
 	"path/filepath"
 	"sync"
 	"golang.org/x/sys/unix"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
 	"github.com/opencontainers/runc/libcontainer/utils"
 )
 // The absolute path to the root of the cgroup hierarchies.
 var (
 	cgroupRootLock sync.Mutex
 	cgroupRoot     string
 )
 const defaultCgroupRoot = "/sys/fs/cgroup"
 func initPaths(cg *configs.Cgroup) (map[string]string, error) {
 	root, err := rootPath()
 	if err != nil {
 		return nil, err
 	}
 	inner, err := innerPath(cg)
 	if err != nil {
 		return nil, err
 	}
 	paths := make(map[string]string)
 	for _, sys := range subsystems {
 		name := sys.Name()
 		path, err := subsysPath(root, inner, name)
 		if err != nil {
 			// The non-presence of the devices subsystem
 			// is considered fatal for security reasons.
 			if cgroups.IsNotFound(err) && (cg.SkipDevices || name != "devices") {
 				continue
 			}
 			return nil, err
 		}
 		paths[name] = path
 	}
 	return paths, nil
 }
 func tryDefaultCgroupRoot() string {
 	var st, pst unix.Stat_t
 	// (1) it should be a directory...
 	err := unix.Lstat(defaultCgroupRoot, &st)
 	if err != nil || st.Mode&unix.S_IFDIR == 0 {
 		return ""
 	}
 	// (2) ... and a mount point ...
 	err = unix.Lstat(filepath.Dir(defaultCgroupRoot), &pst)
 	if err != nil {
 		return ""
 	}
 	if st.Dev == pst.Dev {
 		// parent dir has the same dev -- not a mount point
 		return ""
 	}
 	// (3) ... of 'tmpfs' fs type.
 	var fst unix.Statfs_t
 	err = unix.Statfs(defaultCgroupRoot, &fst)
 	if err != nil || fst.Type != unix.TMPFS_MAGIC {
 		return ""
 	}
 	// (4) it should have at least 1 entry ...
 	dir, err := os.Open(defaultCgroupRoot)
 	if err != nil {
 		return ""
 	}
 	names, err := dir.Readdirnames(1)
 	if err != nil {
 		return ""
 	}
 	if len(names) < 1 {
 		return ""
 	}
 	// ... which is a cgroup mount point.
 	err = unix.Statfs(filepath.Join(defaultCgroupRoot, names[0]), &fst)
 	if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC {
 		return ""
 	}
 	return defaultCgroupRoot
 }
 // rootPath finds and returns path to the root of the cgroup hierarchies.
 func rootPath() (string, error) {
 	cgroupRootLock.Lock()
 	defer cgroupRootLock.Unlock()
 	if cgroupRoot != "" {
 		return cgroupRoot, nil
 	}
 	// fast path
 	cgroupRoot = tryDefaultCgroupRoot()
 	if cgroupRoot != "" {
 		return cgroupRoot, nil
 	}
 	// slow path: parse mountinfo
 	mi, err := cgroups.GetCgroupMounts(false)
 	if err != nil {
 		return "", err
 	}
 	if len(mi) < 1 {
 		return "", errors.New("no cgroup mount found in mountinfo")
 	}
 	// Get the first cgroup mount (e.g. "/sys/fs/cgroup/memory"),
 	// use its parent directory.
 	root := filepath.Dir(mi[0].Mountpoint)
 	if _, err := os.Stat(root); err != nil {
 		return "", err
 	}
 	cgroupRoot = root
 	return cgroupRoot, nil
 }
 func innerPath(c *configs.Cgroup) (string, error) {
 	if (c.Name != "" || c.Parent != "") && c.Path != "" {
 		return "", errors.New("cgroup: either Path or Name and Parent should be used")
 	}
 	// XXX: Do not remove CleanPath. Path safety is important! -- cyphar
 	innerPath := utils.CleanPath(c.Path)
 	if innerPath == "" {
 		cgParent := utils.CleanPath(c.Parent)
 		cgName := utils.CleanPath(c.Name)
 		innerPath = filepath.Join(cgParent, cgName)
 	}
 	return innerPath, nil
 }
 func subsysPath(root, inner, subsystem string) (string, error) {
 	// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
 	if filepath.IsAbs(inner) {
 		mnt, err := cgroups.FindCgroupMountpoint(root, subsystem)
 		// If we didn't mount the subsystem, there is no point we make the path.
 		if err != nil {
 			return "", err
 		}
 		// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
 		return filepath.Join(root, filepath.Base(mnt), inner), nil
 	}
 	// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
 	// process could in container and shared pid namespace with host, and
 	// /proc/1/cgroup could point to whole other world of cgroups.
 	parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
 	if err != nil {
 		return "", err
 	}
 	return filepath.Join(parentPath, inner), nil
 }
 func apply(path string, pid int) error {
 	if path == "" {
 		return nil
 	}
 	if err := os.MkdirAll(path, 0o755); err != nil {
 		return err
 	}
 	return cgroups.WriteCgroupProc(path, pid)
 }
--- a/libcontainer/cgroups/fs/paths_test.go
+++ b/libcontainer/cgroups/fs/paths_test.go
@ -0,0 +1,104 @@
 package fs
 import (
 	"path/filepath"
 	"strings"
 	"testing"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
 func TestInvalidCgroupPath(t *testing.T) {
 	if cgroups.IsCgroup2UnifiedMode() {
 		t.Skip("cgroup v2 is not supported")
 	}
 	root, err := rootPath()
 	if err != nil {
 		t.Fatalf("couldn't get cgroup root: %v", err)
 	}
 	testCases := []struct {
 		test               string
 		path, name, parent string
 	}{
 		{
 			test: "invalid cgroup path",
 			path: "../../../../../../../../../../some/path",
 		},
 		{
 			test: "invalid absolute cgroup path",
 			path: "/../../../../../../../../../../some/path",
 		},
 		{
 			test:   "invalid cgroup parent",
 			parent: "../../../../../../../../../../some/path",
 			name:   "name",
 		},
 		{
 			test:   "invalid absolute cgroup parent",
 			parent: "/../../../../../../../../../../some/path",
 			name:   "name",
 		},
 		{
 			test:   "invalid cgroup name",
 			parent: "parent",
 			name:   "../../../../../../../../../../some/path",
 		},
 		{
 			test:   "invalid absolute cgroup name",
 			parent: "parent",
 			name:   "/../../../../../../../../../../some/path",
 		},
 		{
 			test:   "invalid cgroup name and parent",
 			parent: "../../../../../../../../../../some/path",
 			name:   "../../../../../../../../../../some/path",
 		},
 		{
 			test:   "invalid absolute cgroup name and parent",
 			parent: "/../../../../../../../../../../some/path",
 			name:   "/../../../../../../../../../../some/path",
 		},
 	}
 	for _, tc := range testCases {
 		t.Run(tc.test, func(t *testing.T) {
 			config := &configs.Cgroup{Path: tc.path, Name: tc.name, Parent: tc.parent}
 			inner, err := innerPath(config)
 			if err != nil {
 				t.Fatalf("couldn't get cgroup data: %v", err)
 			}
 			// Make sure the final inner path doesn't go outside the cgroup mountpoint.
 			if strings.HasPrefix(inner, "..") {
 				t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
 			}
 			// Double-check, using an actual cgroup.
 			deviceRoot := filepath.Join(root, "devices")
 			devicePath, err := subsysPath(root, inner, "devices")
 			if err != nil {
 				t.Fatalf("couldn't get cgroup path: %v", err)
 			}
 			if !strings.HasPrefix(devicePath, deviceRoot) {
 				t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
 			}
 		})
 	}
 }
 func TestTryDefaultCgroupRoot(t *testing.T) {
 	res := tryDefaultCgroupRoot()
 	exp := defaultCgroupRoot
 	if cgroups.IsCgroup2UnifiedMode() {
 		// checking that tryDefaultCgroupRoot does return ""
 		// in case /sys/fs/cgroup is not cgroup v1 root dir.
 		exp = ""
 	}
 	if res != exp {
 		t.Errorf("tryDefaultCgroupRoot: want %q, got %q", exp, res)
 	}
 }
--- a/libcontainer/cgroups/fs/perf_event.go
+++ b/libcontainer/cgroups/fs/perf_event.go
@ -1,5 +1,3 @@
 // +build linux
 package fs
 import (
@ -7,29 +5,20 @@ import (
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
-type PerfEventGroup struct {
+type PerfEventGroup struct{}
 }
 func (s *PerfEventGroup) Name() string {
 	return "perf_event"
 }
-func (s *PerfEventGroup) Apply(d *cgroupData) error {
+func (s *PerfEventGroup) Apply(path string, _ *configs.Resources, pid int) error {
-	// we just want to join this group even though we don't set anything
+	return apply(path, pid)
 	if _, err := d.join("perf_event"); err != nil && !cgroups.IsNotFound(err) {
 		return err
 	}
 	return nil
 }
-func (s *PerfEventGroup) Set(path string, cgroup *configs.Cgroup) error {
+func (s *PerfEventGroup) Set(_ string, _ *configs.Resources) error {
 	return nil
 }
 func (s *PerfEventGroup) Remove(d *cgroupData) error {
 	return removePath(d.path("perf_event"))
 }
 func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error {
 	return nil
 }
--- a/libcontainer/cgroups/fs/pids.go
+++ b/libcontainer/cgroups/fs/pids.go
@ -1,10 +1,7 @@
 // +build linux
 package fs
 import (
-	"fmt"
+	"math"
 	"path/filepath"
 	"strconv"
 	"github.com/opencontainers/runc/libcontainer/cgroups"
@ -12,31 +9,26 @@ import (
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
-type PidsGroup struct {
+type PidsGroup struct{}
 }
 func (s *PidsGroup) Name() string {
 	return "pids"
 }
-func (s *PidsGroup) Apply(d *cgroupData) error {
+func (s *PidsGroup) Apply(path string, _ *configs.Resources, pid int) error {
-	_, err := d.join("pids")
+	return apply(path, pid)
 	if err != nil && !cgroups.IsNotFound(err) {
 		return err
 	}
 	return nil
 }
-func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error {
+func (s *PidsGroup) Set(path string, r *configs.Resources) error {
-	if cgroup.Resources.PidsLimit != 0 {
+	if r.PidsLimit != 0 {
 		// "max" is the fallback value.
 		limit := "max"
-		if cgroup.Resources.PidsLimit > 0 {
+		if r.PidsLimit > 0 {
-			limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10)
+			limit = strconv.FormatInt(r.PidsLimit, 10)
 		}
-		if err := fscommon.WriteFile(path, "pids.max", limit); err != nil {
+		if err := cgroups.WriteFile(path, "pids.max", limit); err != nil {
 			return err
 		}
 	}
@ -44,28 +36,24 @@ func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error {
 	return nil
 }
 func (s *PidsGroup) Remove(d *cgroupData) error {
 	return removePath(d.path("pids"))
 }
 func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
 	if !cgroups.PathExists(path) {
 		return nil
 	}
 	current, err := fscommon.GetCgroupParamUint(path, "pids.current")
 	if err != nil {
-		return fmt.Errorf("failed to parse pids.current - %s", err)
+		return err
 	}
-	maxString, err := fscommon.GetCgroupParamString(path, "pids.max")
+	max, err := fscommon.GetCgroupParamUint(path, "pids.max")
 	if err != nil {
-		return fmt.Errorf("failed to parse pids.max - %s", err)
+		return err
 	}
-
+	// If no limit is set, read from pids.max returns "max", which is
-	// Default if pids.max == "max" is 0 -- which represents "no limit".
+	// converted to MaxUint64 by GetCgroupParamUint. Historically, we
-	var max uint64
+	// represent "no limit" for pids as 0, thus this conversion.
-	if maxString != "max" {
+	if max == math.MaxUint64 {
-		max, err = fscommon.ParseUint(maxString, 10, 64)
+		max = 0
 		if err != nil {
 			return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max"))
 		}
 	}
 	stats.PidsStats.Current = current
--- a/libcontainer/cgroups/fs/pids_test.go
+++ b/libcontainer/cgroups/fs/pids_test.go
@ -1,5 +1,3 @@
 // +build linux
 package fs
 import (
@ -8,6 +6,7 @@ import (
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
 const (
@ -16,65 +15,64 @@ const (
 )
 func TestPidsSetMax(t *testing.T) {
-	helper := NewCgroupTestUtil("pids", t)
+	path := tempDir(t, "pids")
 	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"pids.max": "max",
 	})
-	helper.CgroupData.config.Resources.PidsLimit = maxLimited
+	r := &configs.Resources{
 		PidsLimit: maxLimited,
 	}
 	pids := &PidsGroup{}
-	if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
+	if err := pids.Set(path, r); err != nil {
 		t.Fatal(err)
 	}
-	value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "pids.max")
+	value, err := fscommon.GetCgroupParamUint(path, "pids.max")
 	if err != nil {
-		t.Fatalf("Failed to parse pids.max - %s", err)
+		t.Fatal(err)
 	}
 	if value != maxLimited {
 		t.Fatalf("Expected %d, got %d for setting pids.max - limited", maxLimited, value)
 	}
 }
 func TestPidsSetUnlimited(t *testing.T) {
-	helper := NewCgroupTestUtil("pids", t)
+	path := tempDir(t, "pids")
 	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"pids.max": strconv.Itoa(maxLimited),
 	})
-	helper.CgroupData.config.Resources.PidsLimit = maxUnlimited
+	r := &configs.Resources{
 		PidsLimit: maxUnlimited,
 	}
 	pids := &PidsGroup{}
-	if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
+	if err := pids.Set(path, r); err != nil {
 		t.Fatal(err)
 	}
-	value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "pids.max")
+	value, err := fscommon.GetCgroupParamString(path, "pids.max")
 	if err != nil {
-		t.Fatalf("Failed to parse pids.max - %s", err)
+		t.Fatal(err)
 	}
 	if value != "max" {
 		t.Fatalf("Expected %s, got %s for setting pids.max - unlimited", "max", value)
 	}
 }
 func TestPidsStats(t *testing.T) {
-	helper := NewCgroupTestUtil("pids", t)
+	path := tempDir(t, "pids")
 	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"pids.current": strconv.Itoa(1337),
 		"pids.max":     strconv.Itoa(maxLimited),
 	})
 	pids := &PidsGroup{}
 	stats := *cgroups.NewStats()
-	if err := pids.GetStats(helper.CgroupPath, &stats); err != nil {
+	if err := pids.GetStats(path, &stats); err != nil {
 		t.Fatal(err)
 	}
@ -88,17 +86,16 @@ func TestPidsStats(t *testing.T) {
 }
 func TestPidsStatsUnlimited(t *testing.T) {
-	helper := NewCgroupTestUtil("pids", t)
+	path := tempDir(t, "pids")
 	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
+	writeFileContents(t, path, map[string]string{
 		"pids.current": strconv.Itoa(4096),
 		"pids.max":     "max",
 	})
 	pids := &PidsGroup{}
 	stats := *cgroups.NewStats()
-	if err := pids.GetStats(helper.CgroupPath, &stats); err != nil {
+	if err := pids.GetStats(path, &stats); err != nil {
 		t.Fatal(err)
 	}
--- a/libcontainer/cgroups/fs/rdma.go
+++ b/libcontainer/cgroups/fs/rdma.go
@ -0,0 +1,25 @@
 package fs
 import (
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
 type RdmaGroup struct{}
 func (s *RdmaGroup) Name() string {
 	return "rdma"
 }
 func (s *RdmaGroup) Apply(path string, _ *configs.Resources, pid int) error {
 	return apply(path, pid)
 }
 func (s *RdmaGroup) Set(path string, r *configs.Resources) error {
 	return fscommon.RdmaSet(path, r)
 }
 func (s *RdmaGroup) GetStats(path string, stats *cgroups.Stats) error {
 	return fscommon.RdmaGetStats(path, stats)
 }
--- a/Show More
+++ b/Show More
`@ -1,3 +1,3 @@`
	`# Security`	`# Security`

	`The reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/).`	`The reporting process and disclosure communications are outlined [here](https://github.com/opencontainers/org/blob/master/SECURITY.md).`