diff --git a/init/Android.bp b/init/Android.bp index 232d7e304..eaa7fd73c 100644 --- a/init/Android.bp +++ b/init/Android.bp @@ -127,6 +127,7 @@ cc_library_static { "selabel.cpp", "selinux.cpp", "service.cpp", + "service_utils.cpp", "sigchld_handler.cpp", "subcontext.cpp", "subcontext.proto", @@ -259,6 +260,7 @@ cc_binary { "rlimit_parser.cpp", "tokenizer.cpp", "service.cpp", + "service_utils.cpp", "subcontext.cpp", "subcontext.proto", "util.cpp", diff --git a/init/service.cpp b/init/service.cpp index 3e865a7bd..8fe5b309a 100644 --- a/init/service.cpp +++ b/init/service.cpp @@ -21,12 +21,10 @@ #include #include #include -#include #include #include #include #include -#include #include #include @@ -36,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -65,7 +62,6 @@ using android::base::ParseInt; using android::base::Split; using android::base::StartsWith; using android::base::StringPrintf; -using android::base::unique_fd; using android::base::WriteStringToFile; namespace android { @@ -107,87 +103,6 @@ static Result ComputeContextFromExecutable(const std::string& servi return computed_context; } -Result Service::SetUpMountNamespace() const { - constexpr unsigned int kSafeFlags = MS_NODEV | MS_NOEXEC | MS_NOSUID; - - // Recursively remount / as slave like zygote does so unmounting and mounting /proc - // doesn't interfere with the parent namespace's /proc mount. This will also - // prevent any other mounts/unmounts initiated by the service from interfering - // with the parent namespace but will still allow mount events from the parent - // namespace to propagate to the child. - if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) { - return ErrnoError() << "Could not remount(/) recursively as slave"; - } - - // umount() then mount() /proc and/or /sys - // Note that it is not sufficient to mount with MS_REMOUNT. - if (namespace_flags_ & CLONE_NEWPID) { - if (umount("/proc") == -1) { - return ErrnoError() << "Could not umount(/proc)"; - } - if (mount("", "/proc", "proc", kSafeFlags, "") == -1) { - return ErrnoError() << "Could not mount(/proc)"; - } - } - bool remount_sys = std::any_of(namespaces_to_enter_.begin(), namespaces_to_enter_.end(), - [](const auto& entry) { return entry.first == CLONE_NEWNET; }); - if (remount_sys) { - if (umount2("/sys", MNT_DETACH) == -1) { - return ErrnoError() << "Could not umount(/sys)"; - } - if (mount("", "/sys", "sysfs", kSafeFlags, "") == -1) { - return ErrnoError() << "Could not mount(/sys)"; - } - } - return Success(); -} - -Result Service::SetUpPidNamespace() const { - if (prctl(PR_SET_NAME, name_.c_str()) == -1) { - return ErrnoError() << "Could not set name"; - } - - pid_t child_pid = fork(); - if (child_pid == -1) { - return ErrnoError() << "Could not fork init inside the PID namespace"; - } - - if (child_pid > 0) { - // So that we exit with the right status. - static int init_exitstatus = 0; - signal(SIGTERM, [](int) { _exit(init_exitstatus); }); - - pid_t waited_pid; - int status; - while ((waited_pid = wait(&status)) > 0) { - // This loop will end when there are no processes left inside the - // PID namespace or when the init process inside the PID namespace - // gets a signal. - if (waited_pid == child_pid) { - init_exitstatus = status; - } - } - if (!WIFEXITED(init_exitstatus)) { - _exit(EXIT_FAILURE); - } - _exit(WEXITSTATUS(init_exitstatus)); - } - return Success(); -} - -Result Service::EnterNamespaces() const { - for (const auto& [nstype, path] : namespaces_to_enter_) { - auto fd = unique_fd{open(path.c_str(), O_RDONLY | O_CLOEXEC)}; - if (fd == -1) { - return ErrnoError() << "Could not open namespace at " << path; - } - if (setns(fd, nstype) == -1) { - return ErrnoError() << "Could not setns() namespace at " << path; - } - } - return Success(); -} - static bool ExpandArgsAndExecv(const std::vector& args, bool sigstop) { std::vector expanded_args; std::vector c_strings; @@ -230,16 +145,16 @@ Service::Service(const std::string& name, unsigned flags, uid_t uid, gid_t gid, flags_(flags), pid_(0), crash_count_(0), - uid_(uid), - gid_(gid), - supp_gids_(supp_gids), - namespace_flags_(namespace_flags), + proc_attr_{.ioprio_class = IoSchedClass_NONE, + .ioprio_pri = 0, + .uid = uid, + .gid = gid, + .supp_gids = supp_gids, + .priority = 0}, + namespaces_{.flags = namespace_flags}, seclabel_(seclabel), onrestart_(false, subcontext_for_restart_commands, "", 0, "onrestart", {}), - ioprio_class_(IoSchedClass_NONE), - ioprio_pri_(0), - priority_(0), oom_score_adjust_(-1000), start_order_(0), args_(args) {} @@ -272,24 +187,18 @@ void Service::KillProcessGroup(int signal) { << ") process group..."; int r; if (signal == SIGTERM) { - r = killProcessGroupOnce(uid_, pid_, signal); + r = killProcessGroupOnce(proc_attr_.uid, pid_, signal); } else { - r = killProcessGroup(uid_, pid_, signal); + r = killProcessGroup(proc_attr_.uid, pid_, signal); } if (r == 0) process_cgroup_empty_ = true; } } -void Service::SetProcessAttributes() { - for (const auto& rlimit : rlimits_) { - if (setrlimit(rlimit.first, &rlimit.second) == -1) { - LOG(FATAL) << StringPrintf("setrlimit(%d, {rlim_cur=%ld, rlim_max=%ld}) failed", - rlimit.first, rlimit.second.rlim_cur, rlimit.second.rlim_max); - } - } +void Service::SetProcessAttributesAndCaps() { // Keep capabilites on uid change. - if (capabilities_ && uid_) { + if (capabilities_ && proc_attr_.uid) { // If Android is running in a container, some securebits might already // be locked, so don't change those. unsigned long securebits = prctl(PR_GET_SECUREBITS); @@ -302,37 +211,21 @@ void Service::SetProcessAttributes() { } } - // TODO: work out why this fails for `console` then upgrade to FATAL. - if (setpgid(0, getpid()) == -1) PLOG(ERROR) << "setpgid failed for " << name_; + if (auto result = SetProcessAttributes(proc_attr_); !result) { + LOG(FATAL) << "cannot set attribute for " << name_ << ": " << result.error(); + } - if (gid_) { - if (setgid(gid_) != 0) { - PLOG(FATAL) << "setgid failed for " << name_; - } - } - if (setgroups(supp_gids_.size(), &supp_gids_[0]) != 0) { - PLOG(FATAL) << "setgroups failed for " << name_; - } - if (uid_) { - if (setuid(uid_) != 0) { - PLOG(FATAL) << "setuid failed for " << name_; - } - } if (!seclabel_.empty()) { if (setexeccon(seclabel_.c_str()) < 0) { PLOG(FATAL) << "cannot setexeccon('" << seclabel_ << "') for " << name_; } } - if (priority_ != 0) { - if (setpriority(PRIO_PROCESS, 0, priority_) != 0) { - PLOG(FATAL) << "setpriority failed for " << name_; - } - } + if (capabilities_) { if (!SetCapsForExec(*capabilities_)) { LOG(FATAL) << "cannot set capabilities for " << name_; } - } else if (uid_) { + } else if (proc_attr_.uid) { // Inheritable caps can be non-zero when running in a container. if (!DropInheritableCaps()) { LOG(FATAL) << "cannot drop inheritable caps for " << name_; @@ -458,7 +351,7 @@ Result Service::ParseClass(std::vector&& args) { Result Service::ParseConsole(std::vector&& args) { flags_ |= SVC_CONSOLE; - console_ = args.size() > 1 ? "/dev/" + args[1] : ""; + proc_attr_.console = args.size() > 1 ? "/dev/" + args[1] : ""; return Success(); } @@ -477,13 +370,13 @@ Result Service::ParseEnterNamespace(std::vector&& args) { if (args[1] != "net") { return Error() << "Init only supports entering network namespaces"; } - if (!namespaces_to_enter_.empty()) { + if (!namespaces_.namespaces_to_enter.empty()) { return Error() << "Only one network namespace may be entered"; } // Network namespaces require that /sys is remounted, otherwise the old adapters will still be // present. Therefore, they also require mount namespaces. - namespace_flags_ |= CLONE_NEWNS; - namespaces_to_enter_.emplace_back(CLONE_NEWNET, std::move(args[2])); + namespaces_.flags |= CLONE_NEWNS; + namespaces_.namespaces_to_enter.emplace_back(CLONE_NEWNET, std::move(args[2])); return Success(); } @@ -492,22 +385,22 @@ Result Service::ParseGroup(std::vector&& args) { if (!gid) { return Error() << "Unable to decode GID for '" << args[1] << "': " << gid.error(); } - gid_ = *gid; + proc_attr_.gid = *gid; for (std::size_t n = 2; n < args.size(); n++) { gid = DecodeUid(args[n]); if (!gid) { return Error() << "Unable to decode GID for '" << args[n] << "': " << gid.error(); } - supp_gids_.emplace_back(*gid); + proc_attr_.supp_gids.emplace_back(*gid); } return Success(); } Result Service::ParsePriority(std::vector&& args) { - priority_ = 0; - if (!ParseInt(args[1], &priority_, - static_cast(ANDROID_PRIORITY_HIGHEST), // highest is negative + proc_attr_.priority = 0; + if (!ParseInt(args[1], &proc_attr_.priority, + static_cast(ANDROID_PRIORITY_HIGHEST), // highest is negative static_cast(ANDROID_PRIORITY_LOWEST))) { return Error() << StringPrintf("process priority value must be range %d - %d", ANDROID_PRIORITY_HIGHEST, ANDROID_PRIORITY_LOWEST); @@ -547,16 +440,16 @@ Result Service::ParseInterface(std::vector&& args) { } Result Service::ParseIoprio(std::vector&& args) { - if (!ParseInt(args[2], &ioprio_pri_, 0, 7)) { + if (!ParseInt(args[2], &proc_attr_.ioprio_pri, 0, 7)) { return Error() << "priority value must be range 0 - 7"; } if (args[1] == "rt") { - ioprio_class_ = IoSchedClass_RT; + proc_attr_.ioprio_class = IoSchedClass_RT; } else if (args[1] == "be") { - ioprio_class_ = IoSchedClass_BE; + proc_attr_.ioprio_class = IoSchedClass_BE; } else if (args[1] == "idle") { - ioprio_class_ = IoSchedClass_IDLE; + proc_attr_.ioprio_class = IoSchedClass_IDLE; } else { return Error() << "ioprio option usage: ioprio <0-7>"; } @@ -613,11 +506,11 @@ Result Service::ParseOnrestart(std::vector&& args) { Result Service::ParseNamespace(std::vector&& args) { for (size_t i = 1; i < args.size(); i++) { if (args[i] == "pid") { - namespace_flags_ |= CLONE_NEWPID; + namespaces_.flags |= CLONE_NEWPID; // PID namespaces require mount namespaces. - namespace_flags_ |= CLONE_NEWNS; + namespaces_.flags |= CLONE_NEWNS; } else if (args[i] == "mnt") { - namespace_flags_ |= CLONE_NEWNS; + namespaces_.flags |= CLONE_NEWNS; } else { return Error() << "namespace must be 'pid' or 'mnt'"; } @@ -674,7 +567,7 @@ Result Service::ParseProcessRlimit(std::vector&& args) { auto rlimit = ParseRlimit(args); if (!rlimit) return rlimit.error(); - rlimits_.emplace_back(*rlimit); + proc_attr_.rlimits.emplace_back(*rlimit); return Success(); } @@ -784,7 +677,7 @@ Result Service::ParseUser(std::vector&& args) { if (!uid) { return Error() << "Unable to find UID for '" << args[1] << "': " << uid.error(); } - uid_ = *uid; + proc_attr_.uid = *uid; return Success(); } @@ -885,8 +778,8 @@ Result Service::ExecStart() { flags_ |= SVC_EXEC; is_exec_service_running_ = true; - LOG(INFO) << "SVC_EXEC service '" << name_ << "' pid " << pid_ << " (uid " << uid_ << " gid " - << gid_ << "+" << supp_gids_.size() << " context " + LOG(INFO) << "SVC_EXEC service '" << name_ << "' pid " << pid_ << " (uid " << proc_attr_.uid + << " gid " << proc_attr_.gid << "+" << proc_attr_.supp_gids.size() << " context " << (!seclabel_.empty() ? seclabel_ : "default") << ") started; waiting..."; return Success(); @@ -920,16 +813,16 @@ Result Service::Start() { bool needs_console = (flags_ & SVC_CONSOLE); if (needs_console) { - if (console_.empty()) { - console_ = default_console; + if (proc_attr_.console.empty()) { + proc_attr_.console = default_console; } // Make sure that open call succeeds to ensure a console driver is // properly registered for the device node - int console_fd = open(console_.c_str(), O_RDWR | O_CLOEXEC); + int console_fd = open(proc_attr_.console.c_str(), O_RDWR | O_CLOEXEC); if (console_fd < 0) { flags_ |= SVC_DISABLED; - return ErrnoError() << "Couldn't open console '" << console_ << "'"; + return ErrnoError() << "Couldn't open console '" << proc_attr_.console << "'"; } close(console_fd); } @@ -964,8 +857,8 @@ Result Service::Start() { LOG(INFO) << "starting service '" << name_ << "'..."; pid_t pid = -1; - if (namespace_flags_) { - pid = clone(nullptr, nullptr, namespace_flags_ | SIGCHLD, nullptr); + if (namespaces_.flags) { + pid = clone(nullptr, nullptr, namespaces_.flags | SIGCHLD, nullptr); } else { pid = fork(); } @@ -973,33 +866,9 @@ Result Service::Start() { if (pid == 0) { umask(077); - if (auto result = EnterNamespaces(); !result) { - LOG(FATAL) << "Service '" << name_ << "' could not enter namespaces: " << result.error(); - } - -#if defined(__ANDROID__) - if (pre_apexd_) { - if (!SwitchToBootstrapMountNamespaceIfNeeded()) { - LOG(FATAL) << "Service '" << name_ << "' could not enter " - << "into the bootstrap mount namespace"; - } - } -#endif - - if (namespace_flags_ & CLONE_NEWNS) { - if (auto result = SetUpMountNamespace(); !result) { - LOG(FATAL) << "Service '" << name_ - << "' could not set up mount namespace: " << result.error(); - } - } - - if (namespace_flags_ & CLONE_NEWPID) { - // This will fork again to run an init process inside the PID - // namespace. - if (auto result = SetUpPidNamespace(); !result) { - LOG(FATAL) << "Service '" << name_ - << "' could not set up PID namespace: " << result.error(); - } + if (auto result = EnterNamespaces(namespaces_, name_, pre_apexd_); !result) { + LOG(FATAL) << "Service '" << name_ + << "' failed to set up namespaces: " << result.error(); } for (const auto& [key, value] : environment_vars_) { @@ -1009,58 +878,13 @@ Result Service::Start() { std::for_each(descriptors_.begin(), descriptors_.end(), std::bind(&DescriptorInfo::CreateAndPublish, std::placeholders::_1, scon)); - // See if there were "writepid" instructions to write to files under cpuset path. - std::string cpuset_path; - if (CgroupGetControllerPath("cpuset", &cpuset_path)) { - auto cpuset_predicate = [&cpuset_path](const std::string& path) { - return StartsWith(path, cpuset_path + "/"); - }; - auto iter = - std::find_if(writepid_files_.begin(), writepid_files_.end(), cpuset_predicate); - if (iter == writepid_files_.end()) { - // There were no "writepid" instructions for cpusets, check if the system default - // cpuset is specified to be used for the process. - std::string default_cpuset = GetProperty("ro.cpuset.default", ""); - if (!default_cpuset.empty()) { - // Make sure the cpuset name starts and ends with '/'. - // A single '/' means the 'root' cpuset. - if (default_cpuset.front() != '/') { - default_cpuset.insert(0, 1, '/'); - } - if (default_cpuset.back() != '/') { - default_cpuset.push_back('/'); - } - writepid_files_.push_back( - StringPrintf("%s%stasks", cpuset_path.c_str(), default_cpuset.c_str())); - } - } - } else { - LOG(ERROR) << "cpuset cgroup controller is not mounted!"; - } - std::string pid_str = std::to_string(getpid()); - for (const auto& file : writepid_files_) { - if (!WriteStringToFile(pid_str, file)) { - PLOG(ERROR) << "couldn't write " << pid_str << " to " << file; - } - } - - if (ioprio_class_ != IoSchedClass_NONE) { - if (android_set_ioprio(getpid(), ioprio_class_, ioprio_pri_)) { - PLOG(ERROR) << "failed to set pid " << getpid() - << " ioprio=" << ioprio_class_ << "," << ioprio_pri_; - } - } - - if (needs_console) { - setsid(); - OpenConsole(); - } else { - ZapStdio(); + if (auto result = WritePidToFiles(&writepid_files_); !result) { + LOG(ERROR) << "failed to write pid to files: " << result.error(); } // As requested, set our gid, supplemental gids, uid, context, and // priority. Aborts on failure. - SetProcessAttributes(); + SetProcessAttributesAndCaps(); if (!ExpandArgsAndExecv(args_, sigstop_)) { PLOG(ERROR) << "cannot execve('" << args_[0] << "')"; @@ -1090,19 +914,19 @@ Result Service::Start() { bool use_memcg = swappiness_ != -1 || soft_limit_in_bytes_ != -1 || limit_in_bytes_ != -1 || limit_percent_ != -1 || !limit_property_.empty(); - errno = -createProcessGroup(uid_, pid_, use_memcg); + errno = -createProcessGroup(proc_attr_.uid, pid_, use_memcg); if (errno != 0) { - PLOG(ERROR) << "createProcessGroup(" << uid_ << ", " << pid_ << ") failed for service '" - << name_ << "'"; + PLOG(ERROR) << "createProcessGroup(" << proc_attr_.uid << ", " << pid_ + << ") failed for service '" << name_ << "'"; } else if (use_memcg) { if (swappiness_ != -1) { - if (!setProcessGroupSwappiness(uid_, pid_, swappiness_)) { + if (!setProcessGroupSwappiness(proc_attr_.uid, pid_, swappiness_)) { PLOG(ERROR) << "setProcessGroupSwappiness failed"; } } if (soft_limit_in_bytes_ != -1) { - if (!setProcessGroupSoftLimit(uid_, pid_, soft_limit_in_bytes_)) { + if (!setProcessGroupSoftLimit(proc_attr_.uid, pid_, soft_limit_in_bytes_)) { PLOG(ERROR) << "setProcessGroupSoftLimit failed"; } } @@ -1129,7 +953,7 @@ Result Service::Start() { } if (computed_limit_in_bytes != size_t(-1)) { - if (!setProcessGroupLimit(uid_, pid_, computed_limit_in_bytes)) { + if (!setProcessGroupLimit(proc_attr_.uid, pid_, computed_limit_in_bytes)) { PLOG(ERROR) << "setProcessGroupLimit failed"; } } @@ -1249,25 +1073,6 @@ void Service::StopOrReset(int how) { } } -void Service::ZapStdio() const { - int fd; - fd = open("/dev/null", O_RDWR); - dup2(fd, 0); - dup2(fd, 1); - dup2(fd, 2); - close(fd); -} - -void Service::OpenConsole() const { - int fd = open(console_.c_str(), O_RDWR); - if (fd == -1) fd = open("/dev/null", O_RDWR); - ioctl(fd, TIOCSCTTY, 0); - dup2(fd, 0); - dup2(fd, 1); - dup2(fd, 2); - close(fd); -} - ServiceList::ServiceList() {} ServiceList& ServiceList::GetInstance() { diff --git a/init/service.h b/init/service.h index ae29f28c3..93b5a5c3e 100644 --- a/init/service.h +++ b/init/service.h @@ -36,6 +36,7 @@ #include "descriptors.h" #include "keyword_map.h" #include "parser.h" +#include "service_utils.h" #include "subcontext.h" #define SVC_DISABLED 0x001 // do not autostart with class @@ -107,16 +108,16 @@ class Service { pid_t pid() const { return pid_; } android::base::boot_clock::time_point time_started() const { return time_started_; } int crash_count() const { return crash_count_; } - uid_t uid() const { return uid_; } - gid_t gid() const { return gid_; } - unsigned namespace_flags() const { return namespace_flags_; } - const std::vector& supp_gids() const { return supp_gids_; } + uid_t uid() const { return proc_attr_.uid; } + gid_t gid() const { return proc_attr_.gid; } + unsigned namespace_flags() const { return namespaces_.flags; } + const std::vector& supp_gids() const { return proc_attr_.supp_gids; } const std::string& seclabel() const { return seclabel_; } const std::vector& keycodes() const { return keycodes_; } - IoSchedClass ioprio_class() const { return ioprio_class_; } - int ioprio_pri() const { return ioprio_pri_; } + IoSchedClass ioprio_class() const { return proc_attr_.ioprio_class; } + int ioprio_pri() const { return proc_attr_.ioprio_pri; } const std::set& interfaces() const { return interfaces_; } - int priority() const { return priority_; } + int priority() const { return proc_attr_.priority; } int oom_score_adjust() const { return oom_score_adjust_; } bool is_override() const { return override_; } bool process_cgroup_empty() const { return process_cgroup_empty_; } @@ -132,15 +133,10 @@ class Service { using OptionParser = Result (Service::*)(std::vector&& args); class OptionParserMap; - Result SetUpMountNamespace() const; - Result SetUpPidNamespace() const; - Result EnterNamespaces() const; void NotifyStateChange(const std::string& new_state) const; void StopOrReset(int how); - void ZapStdio() const; - void OpenConsole() const; void KillProcessGroup(int signal); - void SetProcessAttributes(); + void SetProcessAttributesAndCaps(); Result ParseCapabilities(std::vector&& args); Result ParseClass(std::vector&& args); @@ -184,7 +180,6 @@ class Service { std::string name_; std::set classnames_; - std::string console_; unsigned flags_; pid_t pid_; @@ -192,13 +187,9 @@ class Service { android::base::boot_clock::time_point time_crashed_; // first crash within inspection window int crash_count_; // number of times crashed within window - uid_t uid_; - gid_t gid_; - std::vector supp_gids_; std::optional capabilities_; - unsigned namespace_flags_; - // Pair of namespace type, path to namespace. - std::vector> namespaces_to_enter_; + ProcessAttributes proc_attr_; + NamespaceInfo namespaces_; std::string seclabel_; @@ -214,10 +205,6 @@ class Service { // keycodes for triggering this service via /dev/input/input* std::vector keycodes_; - IoSchedClass ioprio_class_; - int ioprio_pri_; - int priority_; - int oom_score_adjust_; int swappiness_ = -1; @@ -233,8 +220,6 @@ class Service { unsigned long start_order_; - std::vector> rlimits_; - bool sigstop_ = false; std::chrono::seconds restart_period_ = 5s; diff --git a/init/service_utils.cpp b/init/service_utils.cpp new file mode 100644 index 000000000..17fc9c8b0 --- /dev/null +++ b/init/service_utils.cpp @@ -0,0 +1,265 @@ +/* + * Copyright (C) 2019 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "service_utils.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "mount_namespace.h" + +using android::base::GetProperty; +using android::base::StartsWith; +using android::base::StringPrintf; +using android::base::unique_fd; +using android::base::WriteStringToFile; + +namespace android { +namespace init { + +namespace { + +Result EnterNamespace(int nstype, const char* path) { + auto fd = unique_fd{open(path, O_RDONLY | O_CLOEXEC)}; + if (fd == -1) { + return ErrnoError() << "Could not open namespace at " << path; + } + if (setns(fd, nstype) == -1) { + return ErrnoError() << "Could not setns() namespace at " << path; + } + return Success(); +} + +Result SetUpMountNamespace(bool remount_proc, bool remount_sys) { + constexpr unsigned int kSafeFlags = MS_NODEV | MS_NOEXEC | MS_NOSUID; + + // Recursively remount / as slave like zygote does so unmounting and mounting /proc + // doesn't interfere with the parent namespace's /proc mount. This will also + // prevent any other mounts/unmounts initiated by the service from interfering + // with the parent namespace but will still allow mount events from the parent + // namespace to propagate to the child. + if (mount("rootfs", "/", nullptr, (MS_SLAVE | MS_REC), nullptr) == -1) { + return ErrnoError() << "Could not remount(/) recursively as slave"; + } + + // umount() then mount() /proc and/or /sys + // Note that it is not sufficient to mount with MS_REMOUNT. + if (remount_proc) { + if (umount("/proc") == -1) { + return ErrnoError() << "Could not umount(/proc)"; + } + if (mount("", "/proc", "proc", kSafeFlags, "") == -1) { + return ErrnoError() << "Could not mount(/proc)"; + } + } + if (remount_sys) { + if (umount2("/sys", MNT_DETACH) == -1) { + return ErrnoError() << "Could not umount(/sys)"; + } + if (mount("", "/sys", "sysfs", kSafeFlags, "") == -1) { + return ErrnoError() << "Could not mount(/sys)"; + } + } + return Success(); +} + +Result SetUpPidNamespace(const char* name) { + if (prctl(PR_SET_NAME, name) == -1) { + return ErrnoError() << "Could not set name"; + } + + pid_t child_pid = fork(); + if (child_pid == -1) { + return ErrnoError() << "Could not fork init inside the PID namespace"; + } + + if (child_pid > 0) { + // So that we exit with the right status. + static int init_exitstatus = 0; + signal(SIGTERM, [](int) { _exit(init_exitstatus); }); + + pid_t waited_pid; + int status; + while ((waited_pid = wait(&status)) > 0) { + // This loop will end when there are no processes left inside the + // PID namespace or when the init process inside the PID namespace + // gets a signal. + if (waited_pid == child_pid) { + init_exitstatus = status; + } + } + if (!WIFEXITED(init_exitstatus)) { + _exit(EXIT_FAILURE); + } + _exit(WEXITSTATUS(init_exitstatus)); + } + return Success(); +} + +void ZapStdio() { + int fd; + fd = open("/dev/null", O_RDWR); + dup2(fd, 0); + dup2(fd, 1); + dup2(fd, 2); + close(fd); +} + +void OpenConsole(const std::string& console) { + int fd = open(console.c_str(), O_RDWR); + if (fd == -1) fd = open("/dev/null", O_RDWR); + ioctl(fd, TIOCSCTTY, 0); + dup2(fd, 0); + dup2(fd, 1); + dup2(fd, 2); + close(fd); +} + +} // namespace + +Result EnterNamespaces(const NamespaceInfo& info, const std::string& name, + bool pre_apexd) { + for (const auto& [nstype, path] : info.namespaces_to_enter) { + if (auto result = EnterNamespace(nstype, path.c_str()); !result) { + return result; + } + } + +#if defined(__ANDROID__) + if (pre_apexd) { + if (!SwitchToBootstrapMountNamespaceIfNeeded()) { + return Error() << "could not enter into the bootstrap mount namespace"; + } + } +#endif + + if (info.flags & CLONE_NEWNS) { + bool remount_proc = info.flags & CLONE_NEWPID; + bool remount_sys = + std::any_of(info.namespaces_to_enter.begin(), info.namespaces_to_enter.end(), + [](const auto& entry) { return entry.first == CLONE_NEWNET; }); + if (auto result = SetUpMountNamespace(remount_proc, remount_sys); !result) { + return result; + } + } + + if (info.flags & CLONE_NEWPID) { + // This will fork again to run an init process inside the PID namespace. + if (auto result = SetUpPidNamespace(name.c_str()); !result) { + return result; + } + } + + return Success(); +} + +Result SetProcessAttributes(const ProcessAttributes& attr) { + if (attr.ioprio_class != IoSchedClass_NONE) { + if (android_set_ioprio(getpid(), attr.ioprio_class, attr.ioprio_pri)) { + PLOG(ERROR) << "failed to set pid " << getpid() << " ioprio=" << attr.ioprio_class + << "," << attr.ioprio_pri; + } + } + + if (!attr.console.empty()) { + setsid(); + OpenConsole(attr.console); + } else { + if (setpgid(0, getpid()) == -1) { + return ErrnoError() << "setpgid failed"; + } + ZapStdio(); + } + + for (const auto& rlimit : attr.rlimits) { + if (setrlimit(rlimit.first, &rlimit.second) == -1) { + return ErrnoError() << StringPrintf( + "setrlimit(%d, {rlim_cur=%ld, rlim_max=%ld}) failed", rlimit.first, + rlimit.second.rlim_cur, rlimit.second.rlim_max); + } + } + + if (attr.gid) { + if (setgid(attr.gid) != 0) { + return ErrnoError() << "setgid failed"; + } + } + if (setgroups(attr.supp_gids.size(), const_cast(&attr.supp_gids[0])) != 0) { + return ErrnoError() << "setgroups failed"; + } + if (attr.uid) { + if (setuid(attr.uid) != 0) { + return ErrnoError() << "setuid failed"; + } + } + + if (attr.priority != 0) { + if (setpriority(PRIO_PROCESS, 0, attr.priority) != 0) { + return ErrnoError() << "setpriority failed"; + } + } + return Success(); +} + +Result WritePidToFiles(std::vector* files) { + // See if there were "writepid" instructions to write to files under cpuset path. + std::string cpuset_path; + if (CgroupGetControllerPath("cpuset", &cpuset_path)) { + auto cpuset_predicate = [&cpuset_path](const std::string& path) { + return StartsWith(path, cpuset_path + "/"); + }; + auto iter = std::find_if(files->begin(), files->end(), cpuset_predicate); + if (iter == files->end()) { + // There were no "writepid" instructions for cpusets, check if the system default + // cpuset is specified to be used for the process. + std::string default_cpuset = GetProperty("ro.cpuset.default", ""); + if (!default_cpuset.empty()) { + // Make sure the cpuset name starts and ends with '/'. + // A single '/' means the 'root' cpuset. + if (default_cpuset.front() != '/') { + default_cpuset.insert(0, 1, '/'); + } + if (default_cpuset.back() != '/') { + default_cpuset.push_back('/'); + } + files->push_back( + StringPrintf("%s%stasks", cpuset_path.c_str(), default_cpuset.c_str())); + } + } + } else { + LOG(ERROR) << "cpuset cgroup controller is not mounted!"; + } + std::string pid_str = std::to_string(getpid()); + for (const auto& file : *files) { + if (!WriteStringToFile(pid_str, file)) { + return ErrnoError() << "couldn't write " << pid_str << " to " << file; + } + } + return Success(); +} + +} // namespace init +} // namespace android diff --git a/init/service_utils.h b/init/service_utils.h new file mode 100644 index 000000000..f7502a9b6 --- /dev/null +++ b/init/service_utils.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2019 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include + +#include + +#include "result.h" + +namespace android { +namespace init { + +struct NamespaceInfo { + unsigned flags; + // Pair of namespace type, path to name. + std::vector> namespaces_to_enter; +}; +Result EnterNamespaces(const NamespaceInfo& info, const std::string& name, bool pre_apexd); + +struct ProcessAttributes { + std::string console; + IoSchedClass ioprio_class; + int ioprio_pri; + std::vector> rlimits; + uid_t uid; + gid_t gid; + std::vector supp_gids; + int priority; +}; +Result SetProcessAttributes(const ProcessAttributes& attr); + +Result WritePidToFiles(std::vector* files); + +} // namespace init +} // namespace android