tombstoned: Support java trace dumps.

The changes here involve :
- Creating and opening a new socket to receive trace dump requests on. Having
  different sockets allows us to install different sets of access control rules.

- A minor refactor to allow us to share common pieces of implementation
  between the java and native dumping code. This will also allow us to
  add a unit test for all file / directory related logic.

There are two java trace specific additions here :
- We use SO_PEERCRED instead of trusting the PID written to the seocket
  because requests come in from untrusted processes.
- Java trace dumps are not interceptible.

kJavaTraceDumpsEnabled is set to false for now but the value of the flag
will be flipped in a future change.

Bug: 32064548
Test: Manual; Currently working on a unit_test for CrashType.

Change-Id: I1d62cc7a7035fd500c3e2b831704a2934d725e35
This commit is contained in:
Narayan Kamath 2017-05-15 15:59:30 +01:00
parent 9397adabd8
commit 922f6b22fc
5 changed files with 183 additions and 93 deletions

View File

@ -21,6 +21,7 @@
// Sockets in the ANDROID_SOCKET_NAMESPACE_RESERVED namespace.
// Both sockets are SOCK_SEQPACKET sockets, so no explicit length field is needed.
constexpr char kTombstonedCrashSocketName[] = "tombstoned_crash";
constexpr char kTombstonedJavaTraceSocketName[] = "tombstoned_java_trace";
constexpr char kTombstonedInterceptSocketName[] = "tombstoned_intercept";
enum class CrashPacketType : uint8_t {

View File

@ -21,6 +21,6 @@
#include <android-base/unique_fd.h>
bool tombstoned_connect(pid_t pid, android::base::unique_fd* tombstoned_socket,
android::base::unique_fd* output_fd);
android::base::unique_fd* output_fd, bool is_native_crash = true);
bool tombstoned_notify_completion(int tombstoned_socket);

View File

@ -50,86 +50,154 @@ enum CrashStatus {
kCrashStatusQueued,
};
struct Crash;
class CrashType {
public:
CrashType(const std::string& dir_path, const std::string& file_name_prefix, size_t max_artifacts,
size_t max_concurrent_dumps)
: file_name_prefix_(file_name_prefix),
dir_path_(dir_path),
dir_fd_(open(dir_path.c_str(), O_DIRECTORY | O_RDONLY | O_CLOEXEC)),
max_artifacts_(max_artifacts),
next_artifact_(0),
max_concurrent_dumps_(max_concurrent_dumps),
num_concurrent_dumps_(0) {
if (dir_fd_ == -1) {
PLOG(FATAL) << "failed to open directory: " << dir_path;
}
// NOTE: If max_artifacts_ <= max_concurrent_dumps_, then theoretically the
// same filename could be handed out to multiple processes.
CHECK(max_artifacts_ > max_concurrent_dumps_);
find_oldest_artifact();
}
unique_fd get_output_fd() {
unique_fd result;
char buf[PATH_MAX];
snprintf(buf, sizeof(buf), "%s%02d", file_name_prefix_.c_str(), next_artifact_);
// Unlink and create the file, instead of using O_TRUNC, to avoid two processes
// interleaving their output in case we ever get into that situation.
if (unlinkat(dir_fd_, buf, 0) != 0 && errno != ENOENT) {
PLOG(FATAL) << "failed to unlink tombstone at " << dir_path_ << buf;
}
result.reset(openat(dir_fd_, buf, O_CREAT | O_EXCL | O_WRONLY | O_APPEND | O_CLOEXEC, 0640));
if (result == -1) {
PLOG(FATAL) << "failed to create tombstone at " << dir_path_ << buf;
}
next_artifact_ = (next_artifact_ + 1) % max_artifacts_;
return result;
}
bool maybe_enqueue_crash(Crash* crash) {
if (num_concurrent_dumps_ == max_concurrent_dumps_) {
queued_requests_.push_back(crash);
return true;
}
return false;
}
void maybe_dequeue_crashes(void (*handler)(Crash* crash)) {
while (!queued_requests_.empty() && num_concurrent_dumps_ < max_concurrent_dumps_) {
Crash* next_crash = queued_requests_.front();
queued_requests_.pop_front();
handler(next_crash);
}
}
void on_crash_started() { ++num_concurrent_dumps_; }
void on_crash_completed() { --num_concurrent_dumps_; }
static CrashType* const tombstone;
static CrashType* const java_trace;
private:
void find_oldest_artifact() {
size_t oldest_tombstone = 0;
time_t oldest_time = std::numeric_limits<time_t>::max();
for (size_t i = 0; i < max_artifacts_; ++i) {
std::string path = android::base::StringPrintf("%s/%s%02zu", dir_path_.c_str(),
file_name_prefix_.c_str(), i);
struct stat st;
if (stat(path.c_str(), &st) != 0) {
if (errno == ENOENT) {
oldest_tombstone = i;
break;
} else {
PLOG(ERROR) << "failed to stat " << path;
continue;
}
}
if (st.st_mtime < oldest_time) {
oldest_tombstone = i;
oldest_time = st.st_mtime;
}
}
next_artifact_ = oldest_tombstone;
}
const std::string file_name_prefix_;
const std::string dir_path_;
const int dir_fd_;
const size_t max_artifacts_;
int next_artifact_;
const size_t max_concurrent_dumps_;
size_t num_concurrent_dumps_;
std::deque<Crash*> queued_requests_;
DISALLOW_COPY_AND_ASSIGN(CrashType);
};
// Whether java trace dumps are produced via tombstoned.
static constexpr bool kJavaTraceDumpsEnabled = false;
/* static */ CrashType* const CrashType::tombstone =
new CrashType("/data/tombstones", "tombstone_" /* file_name_prefix */, 10 /* max_artifacts */,
1 /* max_concurrent_dumps */);
/* static */ CrashType* const CrashType::java_trace =
(kJavaTraceDumpsEnabled ? new CrashType("/data/anr", "anr_" /* file_name_prefix */,
64 /* max_artifacts */, 4 /* max_concurrent_dumps */)
: nullptr);
// Ownership of Crash is a bit messy.
// It's either owned by an active event that must have a timeout, or owned by
// queued_requests, in the case that multiple crashes come in at the same time.
struct Crash {
~Crash() {
event_free(crash_event);
}
~Crash() { event_free(crash_event); }
unique_fd crash_fd;
pid_t crash_pid;
event* crash_event = nullptr;
// Not owned by |Crash|.
CrashType* crash_type = nullptr;
};
static constexpr char kTombstoneDirectory[] = "/data/tombstones/";
static constexpr size_t kTombstoneCount = 10;
static int tombstone_directory_fd = -1;
static int next_tombstone = 0;
static constexpr size_t kMaxConcurrentDumps = 1;
static size_t num_concurrent_dumps = 0;
static std::deque<Crash*> queued_requests;
// Forward declare the callbacks so they can be placed in a sensible order.
static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int, void*);
static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg);
static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg);
static void find_oldest_tombstone() {
size_t oldest_tombstone = 0;
time_t oldest_time = std::numeric_limits<time_t>::max();
for (size_t i = 0; i < kTombstoneCount; ++i) {
std::string path = android::base::StringPrintf("%stombstone_%02zu", kTombstoneDirectory, i);
struct stat st;
if (stat(path.c_str(), &st) != 0) {
if (errno == ENOENT) {
oldest_tombstone = i;
break;
} else {
PLOG(ERROR) << "failed to stat " << path;
continue;
}
}
if (st.st_mtime < oldest_time) {
oldest_tombstone = i;
oldest_time = st.st_mtime;
}
}
next_tombstone = oldest_tombstone;
}
static unique_fd get_tombstone_fd() {
// If kMaxConcurrentDumps is greater than 1, then theoretically the same
// filename could be handed out to multiple processes. Unlink and create the
// file, instead of using O_TRUNC, to avoid two processes interleaving their
// output.
unique_fd result;
char buf[PATH_MAX];
snprintf(buf, sizeof(buf), "tombstone_%02d", next_tombstone);
if (unlinkat(tombstone_directory_fd, buf, 0) != 0 && errno != ENOENT) {
PLOG(FATAL) << "failed to unlink tombstone at " << kTombstoneDirectory << buf;
}
result.reset(
openat(tombstone_directory_fd, buf, O_CREAT | O_EXCL | O_WRONLY | O_APPEND | O_CLOEXEC, 0640));
if (result == -1) {
PLOG(FATAL) << "failed to create tombstone at " << kTombstoneDirectory << buf;
}
next_tombstone = (next_tombstone + 1) % kTombstoneCount;
return result;
}
static void perform_request(Crash* crash) {
unique_fd output_fd;
if (!intercept_manager->GetIntercept(crash->crash_pid, &output_fd)) {
output_fd = get_tombstone_fd();
// Note that java traces are not interceptible.
if ((crash->crash_type == CrashType::java_trace) ||
!intercept_manager->GetIntercept(crash->crash_pid, &output_fd)) {
output_fd = crash->crash_type->get_output_fd();
}
TombstonedCrashPacket response = {
@ -152,23 +220,15 @@ static void perform_request(Crash* crash) {
event_add(crash->crash_event, &timeout);
}
++num_concurrent_dumps;
crash->crash_type->on_crash_started();
return;
fail:
delete crash;
}
static void dequeue_requests() {
while (!queued_requests.empty() && num_concurrent_dumps < kMaxConcurrentDumps) {
Crash* next_crash = queued_requests.front();
queued_requests.pop_front();
perform_request(next_crash);
}
}
static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int,
void*) {
void* crash_type) {
event_base* base = evconnlistener_get_base(listener);
Crash* crash = new Crash();
@ -176,12 +236,15 @@ static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, so
event* crash_event = event_new(base, sockfd, EV_TIMEOUT | EV_READ, crash_request_cb, crash);
crash->crash_fd.reset(sockfd);
crash->crash_event = crash_event;
crash->crash_type = static_cast<CrashType*>(crash_type);
event_add(crash_event, &timeout);
}
static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg) {
ssize_t rc;
Crash* crash = static_cast<Crash*>(arg);
CrashType* type = crash->crash_type;
TombstonedCrashPacket request = {};
if ((ev & EV_TIMEOUT) != 0) {
@ -208,12 +271,27 @@ static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg) {
goto fail;
}
crash->crash_pid = request.packet.dump_request.pid;
if (type == CrashType::tombstone) {
crash->crash_pid = request.packet.dump_request.pid;
} else {
// Requests for java traces are sent from untrusted processes, so we
// must not trust the PID sent down with the request. Instead, we ask the
// kernel.
ucred cr = {};
socklen_t len = sizeof(cr);
int ret = getsockopt(sockfd, SOL_SOCKET, SO_PEERCRED, &cr, &len);
if (ret != 0) {
PLOG(ERROR) << "Failed to getsockopt(..SO_PEERCRED)";
goto fail;
}
crash->crash_pid = cr.pid;
}
LOG(INFO) << "received crash request for pid " << crash->crash_pid;
if (num_concurrent_dumps == kMaxConcurrentDumps) {
if (type->maybe_enqueue_crash(crash)) {
LOG(INFO) << "enqueueing crash request for pid " << crash->crash_pid;
queued_requests.push_back(crash);
} else {
perform_request(crash);
}
@ -229,7 +307,7 @@ static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg) {
Crash* crash = static_cast<Crash*>(arg);
TombstonedCrashPacket request = {};
--num_concurrent_dumps;
crash->crash_type->on_crash_completed();
if ((ev & EV_READ) == 0) {
goto fail;
@ -252,10 +330,11 @@ static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg) {
}
fail:
CrashType* type = crash->crash_type;
delete crash;
// If there's something queued up, let them proceed.
dequeue_requests();
type->maybe_dequeue_crashes(perform_request);
}
int main(int, char* []) {
@ -269,13 +348,6 @@ int main(int, char* []) {
};
debuggerd_register_handlers(&action);
tombstone_directory_fd = open(kTombstoneDirectory, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
if (tombstone_directory_fd == -1) {
PLOG(FATAL) << "failed to open tombstone directory";
}
find_oldest_tombstone();
int intercept_socket = android_get_control_socket(kTombstonedInterceptSocketName);
int crash_socket = android_get_control_socket(kTombstonedCrashSocketName);
@ -293,10 +365,24 @@ int main(int, char* []) {
intercept_manager = new InterceptManager(base, intercept_socket);
evconnlistener* listener =
evconnlistener_new(base, crash_accept_cb, nullptr, -1, LEV_OPT_CLOSE_ON_FREE, crash_socket);
if (!listener) {
LOG(FATAL) << "failed to create evconnlistener";
evconnlistener* tombstone_listener = evconnlistener_new(
base, crash_accept_cb, CrashType::tombstone, -1, LEV_OPT_CLOSE_ON_FREE, crash_socket);
if (!tombstone_listener) {
LOG(FATAL) << "failed to create evconnlistener for tombstones.";
}
if (kJavaTraceDumpsEnabled) {
const int java_trace_socket = android_get_control_socket(kTombstonedJavaTraceSocketName);
if (java_trace_socket == -1) {
PLOG(FATAL) << "failed to get socket from init";
}
evutil_make_socket_nonblocking(java_trace_socket);
evconnlistener* java_trace_listener = evconnlistener_new(
base, crash_accept_cb, CrashType::java_trace, -1, LEV_OPT_CLOSE_ON_FREE, java_trace_socket);
if (!java_trace_listener) {
LOG(FATAL) << "failed to create evconnlistener for java traces.";
}
}
LOG(INFO) << "tombstoned successfully initialized";

View File

@ -7,4 +7,5 @@ service tombstoned /system/bin/tombstoned
socket tombstoned_crash seqpacket 0666 system system
socket tombstoned_intercept seqpacket 0666 system system
socket tombstoned_java_trace seqpacket 0666 system system
writepid /dev/cpuset/system-background/tasks

View File

@ -30,9 +30,11 @@
using android::base::unique_fd;
bool tombstoned_connect(pid_t pid, unique_fd* tombstoned_socket, unique_fd* output_fd) {
unique_fd sockfd(socket_local_client(kTombstonedCrashSocketName,
ANDROID_SOCKET_NAMESPACE_RESERVED, SOCK_SEQPACKET));
bool tombstoned_connect(pid_t pid, unique_fd* tombstoned_socket, unique_fd* output_fd,
bool is_native_crash) {
unique_fd sockfd(socket_local_client(
(is_native_crash ? kTombstonedCrashSocketName : kTombstonedJavaTraceSocketName),
ANDROID_SOCKET_NAMESPACE_RESERVED, SOCK_SEQPACKET));
if (sockfd == -1) {
async_safe_format_log(ANDROID_LOG_ERROR, "libc", "failed to connect to tombstoned: %s",
strerror(errno));