From d0e8451d59e41a8435daaca6c1702154f3d418de Mon Sep 17 00:00:00 2001 From: Tom Cherry Date: Wed, 12 Aug 2020 09:44:52 -0700 Subject: [PATCH 1/2] logd: always wake 'wrapped' readers on prune See the comment in the code for more details. 'wrapped' readers are uncommon and error prone, and this change makes them more reliable. Its side effect is that wrapped readers will wake more often, but they'll still be batched to a large degree. Bug: 163617910 Test: logging unit tests Test: logcat --wrap does the right thing Change-Id: I4b6f8331ff7854787c97f821b2a5bf8d7da321c6 --- logd/SerializedLogBuffer.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/logd/SerializedLogBuffer.cpp b/logd/SerializedLogBuffer.cpp index 65fedb026..f4bf37f66 100644 --- a/logd/SerializedLogBuffer.cpp +++ b/logd/SerializedLogBuffer.cpp @@ -143,9 +143,16 @@ bool SerializedLogBuffer::Prune(log_id_t log_id, size_t bytes_to_free, uid_t uid if (!reader_thread->IsWatching(log_id)) { continue; } - if (!oldest || oldest->start() > reader_thread->start() || - (oldest->start() == reader_thread->start() && - reader_thread->deadline().time_since_epoch().count() != 0)) { + if (reader_thread->deadline().time_since_epoch().count() != 0) { + // Always wake up wrapped readers when pruning. 'Wrapped' readers are an optimization + // that allows the reader to wait until logs starting at a specified time stamp are + // about to be pruned. This is error-prone however, since if that timestamp is about to + // be pruned, the reader is not likely to read the messages fast enough to not back-up + // logd. Instead, we can achieve an nearly-as-efficient but not error-prune batching + // effect by waking the reader whenever any chunk is about to be pruned. + reader_thread->triggerReader_Locked(); + } + if (!oldest || oldest->start() > reader_thread->start()) { oldest = reader_thread.get(); } } @@ -202,9 +209,6 @@ void SerializedLogBuffer::KickReader(LogReaderThread* reader, log_id_t id, size_ LOG(WARNING) << "Kicking blocked reader, " << reader->name() << ", from LogBuffer::kickMe()"; reader->release_Locked(); - } else if (reader->deadline().time_since_epoch().count() != 0) { - // Allow a blocked WRAP deadline reader to trigger and start reporting the log data. - reader->triggerReader_Locked(); } else { // Tell slow reader to skip entries to catch up. unsigned long prune_rows = bytes_to_free / 300; From 3932a9c0747b6aece517652ea0aba4be6c8df9b6 Mon Sep 17 00:00:00 2001 From: Tom Cherry Date: Wed, 12 Aug 2020 09:36:15 -0700 Subject: [PATCH 2/2] logd: SerializedLogBuffer: never wait for a reader during prune/clear Previously, chatty had logic that would skip a certain number of log entries to satify pruning, but otherwise keep the reader connected. This was a best-effort attempt at helping pruning and had additional logic that handled further disruptions, if logd's memory was 2x the allotted memory. The new logic has two components: 1) memcpy() each individual log message in FlushTo() such that there are no references to the underlying log data without a lock held. Note, that this memcpy is completely negligible for performance. 2) In Prune(), immediately delete all log chunks required to reduce memory to the allotted amount, which is now safe given 1). If readers will lose logs, continue to print a warning. This additionally makes the Clear() logic deterministic. It was previously best effort in chatty, but will immediately and always clear all logs for SerializedLogBuffer. Bug: 163617910 Test: logging unit tests Test: Prune() immediately frees buffers during high log pressure Test: Clear() immediately frees buffers during high log pressure Change-Id: I40fe9b791312af3dc256b166e5c34425f4ca51ac --- logd/SerializedLogBuffer.cpp | 131 ++++++++++++----------------------- logd/SerializedLogBuffer.h | 4 +- 2 files changed, 45 insertions(+), 90 deletions(-) diff --git a/logd/SerializedLogBuffer.cpp b/logd/SerializedLogBuffer.cpp index f4bf37f66..5012d3d2b 100644 --- a/logd/SerializedLogBuffer.cpp +++ b/logd/SerializedLogBuffer.cpp @@ -75,6 +75,10 @@ int SerializedLogBuffer::Log(log_id_t log_id, log_time realtime, uid_t uid, pid_ return -EINVAL; } + if (len > LOGGER_ENTRY_MAX_PAYLOAD) { + len = LOGGER_ENTRY_MAX_PAYLOAD; + } + if (!ShouldLog(log_id, msg, len)) { stats_->AddTotal(log_id, len); return -EACCES; @@ -135,40 +139,42 @@ void SerializedLogBuffer::NotifyReadersOfPrune( } } -bool SerializedLogBuffer::Prune(log_id_t log_id, size_t bytes_to_free, uid_t uid) { - // Don't prune logs that are newer than the point at which any reader threads are reading from. - LogReaderThread* oldest = nullptr; +void SerializedLogBuffer::Prune(log_id_t log_id, size_t bytes_to_free, uid_t uid) { auto reader_threads_lock = std::lock_guard{reader_list_->reader_threads_lock()}; - for (const auto& reader_thread : reader_list_->reader_threads()) { - if (!reader_thread->IsWatching(log_id)) { - continue; - } - if (reader_thread->deadline().time_since_epoch().count() != 0) { - // Always wake up wrapped readers when pruning. 'Wrapped' readers are an optimization - // that allows the reader to wait until logs starting at a specified time stamp are - // about to be pruned. This is error-prone however, since if that timestamp is about to - // be pruned, the reader is not likely to read the messages fast enough to not back-up - // logd. Instead, we can achieve an nearly-as-efficient but not error-prune batching - // effect by waking the reader whenever any chunk is about to be pruned. - reader_thread->triggerReader_Locked(); - } - if (!oldest || oldest->start() > reader_thread->start()) { - oldest = reader_thread.get(); - } - } auto& log_buffer = logs_[log_id]; auto it = log_buffer.begin(); while (it != log_buffer.end()) { - if (oldest != nullptr && it->highest_sequence_number() >= oldest->start()) { - break; + for (const auto& reader_thread : reader_list_->reader_threads()) { + if (!reader_thread->IsWatching(log_id)) { + continue; + } + + if (reader_thread->deadline().time_since_epoch().count() != 0) { + // Always wake up wrapped readers when pruning. 'Wrapped' readers are an + // optimization that allows the reader to wait until logs starting at a specified + // time stamp are about to be pruned. This is error-prone however, since if that + // timestamp is about to be pruned, the reader is not likely to read the messages + // fast enough to not back-up logd. Instead, we can achieve an nearly-as-efficient + // but not error-prune batching effect by waking the reader whenever any chunk is + // about to be pruned. + reader_thread->triggerReader_Locked(); + } + + // Some readers may be still reading from this log chunk, log a warning that they are + // about to lose logs. + // TODO: We should forcefully disconnect the reader instead, such that the reader itself + // has an indication that they've lost logs. + if (reader_thread->start() <= it->highest_sequence_number()) { + LOG(WARNING) << "Skipping entries from slow reader, " << reader_thread->name() + << ", from LogBuffer::Prune()"; + } } // Increment ahead of time since we're going to erase this iterator from the list. auto it_to_prune = it++; - // The sequence number check ensures that all readers have read all logs in this chunk, but - // they may still hold a reference to the chunk to track their last read log_position. + // Readers may have a reference to the chunk to track their last read log_position. // Notify them to delete the reference. NotifyReadersOfPrune(log_id, it_to_prune); @@ -183,39 +189,11 @@ bool SerializedLogBuffer::Prune(log_id_t log_id, size_t bytes_to_free, uid_t uid RemoveChunkFromStats(log_id, *it_to_prune); log_buffer.erase(it_to_prune); if (buffer_size >= bytes_to_free) { - return true; + return; } bytes_to_free -= buffer_size; } } - - // If we've deleted all buffers without bytes_to_free hitting 0, then we're called by Clear() - // and should return true. - if (it == log_buffer.end()) { - return true; - } - - // Otherwise we are stuck due to a reader, so mitigate it. - CHECK(oldest != nullptr); - KickReader(oldest, log_id, bytes_to_free); - return false; -} - -// If the selected reader is blocking our pruning progress, decide on -// what kind of mitigation is necessary to unblock the situation. -void SerializedLogBuffer::KickReader(LogReaderThread* reader, log_id_t id, size_t bytes_to_free) { - if (bytes_to_free >= max_size_[id]) { // +100% - // A misbehaving or slow reader is dropped if we hit too much memory pressure. - LOG(WARNING) << "Kicking blocked reader, " << reader->name() - << ", from LogBuffer::kickMe()"; - reader->release_Locked(); - } else { - // Tell slow reader to skip entries to catch up. - unsigned long prune_rows = bytes_to_free / 300; - LOG(WARNING) << "Skipping " << prune_rows << " entries from slow reader, " << reader->name() - << ", from LogBuffer::kickMe()"; - reader->triggerSkip_Locked(id, prune_rows); - } } std::unique_ptr SerializedLogBuffer::CreateFlushToState(uint64_t start, @@ -256,12 +234,18 @@ bool SerializedLogBuffer::FlushTo( } } + // We copy the log entry such that we can flush it without the lock. We never block pruning + // waiting for this Flush() to complete. + constexpr size_t kMaxEntrySize = sizeof(*entry) + LOGGER_ENTRY_MAX_PAYLOAD + 1; + unsigned char entry_copy[kMaxEntrySize] __attribute__((uninitialized)); + CHECK_LT(entry->msg_len(), LOGGER_ENTRY_MAX_PAYLOAD + 1); + memcpy(entry_copy, entry, sizeof(*entry) + entry->msg_len()); lock.unlock(); - // We never prune logs equal to or newer than any LogReaderThreads' `start` value, so the - // `entry` pointer is safe here without the lock - if (!entry->Flush(writer, log_id)) { + + if (!reinterpret_cast(entry_copy)->Flush(writer, log_id)) { return false; } + lock.lock(); } @@ -270,38 +254,11 @@ bool SerializedLogBuffer::FlushTo( } bool SerializedLogBuffer::Clear(log_id_t id, uid_t uid) { - // Try three times to clear, then disconnect the readers and try one final time. - for (int retry = 0; retry < 3; ++retry) { - { - auto lock = std::lock_guard{lock_}; - bool prune_success = Prune(id, ULONG_MAX, uid); - if (prune_success) { - return true; - } - } - sleep(1); - } - // Check if it is still busy after the sleep, we try to prune one entry, not another clear run, - // so we are looking for the quick side effect of the return value to tell us if we have a - // _blocked_ reader. - bool busy = false; - { - auto lock = std::lock_guard{lock_}; - busy = !Prune(id, 1, uid); - } - // It is still busy, disconnect all readers. - if (busy) { - auto reader_threads_lock = std::lock_guard{reader_list_->reader_threads_lock()}; - for (const auto& reader_thread : reader_list_->reader_threads()) { - if (reader_thread->IsWatching(id)) { - LOG(WARNING) << "Kicking blocked reader, " << reader_thread->name() - << ", from LogBuffer::clear()"; - reader_thread->release_Locked(); - } - } - } auto lock = std::lock_guard{lock_}; - return Prune(id, ULONG_MAX, uid); + Prune(id, ULONG_MAX, uid); + + // Clearing SerializedLogBuffer never waits for readers and therefore is always successful. + return true; } size_t SerializedLogBuffer::GetSizeUsed(log_id_t id) { diff --git a/logd/SerializedLogBuffer.h b/logd/SerializedLogBuffer.h index 32fbe5e98..294cfe6f1 100644 --- a/logd/SerializedLogBuffer.h +++ b/logd/SerializedLogBuffer.h @@ -55,9 +55,7 @@ class SerializedLogBuffer final : public LogBuffer { private: bool ShouldLog(log_id_t log_id, const char* msg, uint16_t len); void MaybePrune(log_id_t log_id) REQUIRES(lock_); - bool Prune(log_id_t log_id, size_t bytes_to_free, uid_t uid) REQUIRES(lock_); - void KickReader(LogReaderThread* reader, log_id_t id, size_t bytes_to_free) - REQUIRES_SHARED(lock_); + void Prune(log_id_t log_id, size_t bytes_to_free, uid_t uid) REQUIRES(lock_); void NotifyReadersOfPrune(log_id_t log_id, const std::list::iterator& chunk) REQUIRES(reader_list_->reader_threads_lock()); void RemoveChunkFromStats(log_id_t log_id, SerializedLogChunk& chunk);