Modify the code to avoid potential deadlocks.

If the signal to dump a thread is never delivered, then it's
possible for a deadlock. The signal handler is responsible for
unlocking and deleting the ThreadEntry created for the pid/tid
combination. This means if the signal is lost, the ThreadEntry
gets stuck locked and never deleted. If a second attempt to get
a backtrace of this thread occurs, there is a deadlock.

Also, decrease the timeout from 10 seconds to 5 seconds. The original
10 seconds was because the unwind was actually done in the signal
handler. Now the signal handler does nothing but copy the ucontext
structure and let the caller do the unwind.

Bug: 21086132
(cherry picked from commit 2d09171758)

Change-Id: I414c500eb08983a5017caf3fce4f499465575a9d
This commit is contained in:
Christopher Ferris 2015-05-28 16:10:33 -07:00
parent e2452b4bf3
commit 7e2cb84e9c
4 changed files with 40 additions and 17 deletions

View File

@ -96,7 +96,7 @@ static pthread_mutex_t g_sigaction_mutex = PTHREAD_MUTEX_INITIALIZER;
static void SignalHandler(int, siginfo_t*, void* sigcontext) {
ThreadEntry* entry = ThreadEntry::Get(getpid(), gettid(), false);
if (!entry) {
BACK_LOGW("Unable to find pid %d tid %d information", getpid(), gettid());
BACK_LOGE("pid %d, tid %d entry not found", getpid(), gettid());
return;
}
@ -109,9 +109,14 @@ static void SignalHandler(int, siginfo_t*, void* sigcontext) {
// the thread run ahead causing problems.
// The number indicates that we are waiting for the second Wake() call
// overall which is made by the thread requesting an unwind.
entry->Wait(2);
ThreadEntry::Remove(entry);
if (entry->Wait(2)) {
// Do not remove the entry here because that can result in a deadlock
// if the code cannot properly send a signal to the thread under test.
entry->Wake();
} else {
// At this point, it is possible that entry has been freed, so just exit.
BACK_LOGE("Timed out waiting for unwind thread to indicate it completed.");
}
}
bool BacktraceCurrent::UnwindThread(size_t num_ignore_frames) {
@ -128,17 +133,15 @@ bool BacktraceCurrent::UnwindThread(size_t num_ignore_frames) {
act.sa_flags = SA_RESTART | SA_SIGINFO | SA_ONSTACK;
sigemptyset(&act.sa_mask);
if (sigaction(THREAD_SIGNAL, &act, &oldact) != 0) {
BACK_LOGW("sigaction failed %s", strerror(errno));
entry->Unlock();
BACK_LOGE("sigaction failed: %s", strerror(errno));
ThreadEntry::Remove(entry);
pthread_mutex_unlock(&g_sigaction_mutex);
return false;
}
if (tgkill(Pid(), Tid(), THREAD_SIGNAL) != 0) {
BACK_LOGW("tgkill %d failed: %s", Tid(), strerror(errno));
BACK_LOGE("tgkill %d failed: %s", Tid(), strerror(errno));
sigaction(THREAD_SIGNAL, &oldact, nullptr);
entry->Unlock();
ThreadEntry::Remove(entry);
pthread_mutex_unlock(&g_sigaction_mutex);
return false;
@ -146,17 +149,30 @@ bool BacktraceCurrent::UnwindThread(size_t num_ignore_frames) {
// Wait for the thread to get the ucontext. The number indicates
// that we are waiting for the first Wake() call made by the thread.
entry->Wait(1);
bool wait_completed = entry->Wait(1);
// After the thread has received the signal, allow other unwinders to
// continue.
sigaction(THREAD_SIGNAL, &oldact, nullptr);
pthread_mutex_unlock(&g_sigaction_mutex);
bool unwind_done = UnwindFromContext(num_ignore_frames, entry->GetUcontext());
bool unwind_done = false;
if (wait_completed) {
unwind_done = UnwindFromContext(num_ignore_frames, entry->GetUcontext());
// Tell the signal handler to exit and release the entry.
entry->Wake();
// Tell the signal handler to exit and release the entry.
entry->Wake();
// Wait for the thread to indicate it is done with the ThreadEntry.
if (!entry->Wait(3)) {
// Send a warning, but do not mark as a failure to unwind.
BACK_LOGW("Timed out waiting for signal handler to indicate it finished.");
}
} else {
BACK_LOGE("Timed out waiting for signal handler to get ucontext data.");
}
ThreadEntry::Remove(entry);
return unwind_done;
}

View File

@ -25,4 +25,7 @@
#define BACK_LOGW(format, ...) \
ALOGW("%s: " format, __PRETTY_FUNCTION__, ##__VA_ARGS__)
#define BACK_LOGE(format, ...) \
ALOGE("%s: " format, __PRETTY_FUNCTION__, ##__VA_ARGS__)
#endif // _LIBBACKTRACE_BACKTRACE_LOG_H

View File

@ -69,7 +69,7 @@ ThreadEntry* ThreadEntry::Get(pid_t pid, pid_t tid, bool create) {
}
void ThreadEntry::Remove(ThreadEntry* entry) {
pthread_mutex_unlock(&entry->mutex_);
entry->Unlock();
pthread_mutex_lock(&ThreadEntry::list_mutex_);
if (--entry->ref_count_ == 0) {
@ -96,20 +96,24 @@ ThreadEntry::~ThreadEntry() {
pthread_cond_destroy(&wait_cond_);
}
void ThreadEntry::Wait(int value) {
bool ThreadEntry::Wait(int value) {
timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
ts.tv_sec += 10;
ts.tv_sec += 5;
bool wait_completed = true;
pthread_mutex_lock(&wait_mutex_);
while (wait_value_ != value) {
int ret = pthread_cond_timedwait(&wait_cond_, &wait_mutex_, &ts);
if (ret != 0) {
BACK_LOGW("pthread_cond_timedwait failed: %s", strerror(ret));
BACK_LOGW("pthread_cond_timedwait for value %d failed: %s", value, strerror(ret));
wait_completed = false;
break;
}
}
pthread_mutex_unlock(&wait_mutex_);
return wait_completed;
}
void ThreadEntry::Wake() {

View File

@ -29,7 +29,7 @@ public:
void Wake();
void Wait(int);
bool Wait(int);
void CopyUcontextFromSigcontext(void*);