From 247d682fe1b0dd4c8f149b7f5c89c546df17873a Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Tue, 3 Jan 2017 14:00:19 -0800 Subject: [PATCH] logd: sepolicy dynamic rate limiting Processing overhead for selinux violation messages is costly. We want to deal with bursts of violations, but we have no intent of allowing that sustained burst to go unabated as there is a cost of processing and battery usage. Tunables in libaudit.h are: AUDIT_RATE_LIMIT_DEFAULT 20 /* acceptable burst rate */ AUDIT_RATE_LIMIT_BURST_DURATION 10 /* number of seconds of burst */ AUDIT_RATE_LIMIT_MAX 5 /* acceptable sustained rate */ Since we can only asymptotically handle DEFAULT rate, we set an upper threshold of half way between the MAX and DEFAULT rate. Default kernel audit subsystem message rate is set to 20 a second. If sepolicy exceeds 125 violation messages over up to ten seconds (>=~12/s), tell kernel audit subsystem to drop the rate to 5 messages a second. If rate drops below 50 messages over the past ten seconds (<5/s), tell kernel it is ok to increase the burst rate back to 20 messages a second. Test: gTest logd-unit-tests --gtest_filter=logd.sepolicy_rate_limiter_* Bug: 27878170 Change-Id: I843f8dcfbb3ecfbbe94a4865ea332c858e3be7f2 --- logd/LogAudit.cpp | 51 +++++++++++- logd/LogAudit.h | 7 ++ logd/libaudit.c | 26 ++++++- logd/libaudit.h | 31 ++++++-- logd/tests/Android.mk | 5 +- logd/tests/logd_test.cpp | 163 ++++++++++++++++++++++++++++++++++++++- 6 files changed, 266 insertions(+), 17 deletions(-) diff --git a/logd/LogAudit.cpp b/logd/LogAudit.cpp index 11ffcb785..92d957fb1 100644 --- a/logd/LogAudit.cpp +++ b/logd/LogAudit.cpp @@ -43,7 +43,7 @@ '>' LogAudit::LogAudit(LogBuffer *buf, LogReader *reader, int fdDmesg) : - SocketListener(getLogSocket(), false), + SocketListener(mSock = getLogSocket(), false), logbuf(buf), reader(reader), fdDmesg(fdDmesg), @@ -51,19 +51,62 @@ LogAudit::LogAudit(LogBuffer *buf, LogReader *reader, int fdDmesg) : BOOL_DEFAULT_TRUE)), events(__android_logger_property_get_bool("ro.logd.auditd.events", BOOL_DEFAULT_TRUE)), - initialized(false) { + initialized(false), + tooFast(false) { static const char auditd_message[] = { KMSG_PRIORITY(LOG_INFO), 'l', 'o', 'g', 'd', '.', 'a', 'u', 'd', 'i', 't', 'd', ':', ' ', 's', 't', 'a', 'r', 't', '\n' }; write(fdDmesg, auditd_message, sizeof(auditd_message)); } +void LogAudit::checkRateLimit() { + + // trim list for AUDIT_RATE_LIMIT_BURST_DURATION of history + log_time oldest(AUDIT_RATE_LIMIT_BURST_DURATION, 0); + bucket.emplace(android_log_clockid()); + oldest = bucket.back() - oldest; + while (bucket.front() < oldest) bucket.pop(); + + static const size_t upperThreshold = + ((AUDIT_RATE_LIMIT_BURST_DURATION * + (AUDIT_RATE_LIMIT_DEFAULT + AUDIT_RATE_LIMIT_MAX)) + 1) / + 2; + if (bucket.size() >= upperThreshold) { + // Hit peak, slow down source + if (!tooFast) { + tooFast = true; + audit_rate_limit(mSock, AUDIT_RATE_LIMIT_MAX); + } + + // We do not need to hold on to the full set of timing data history, + // let's ensure it does not grow without bounds. This also ensures + // that std::dequeue underneath behaves almost like a ring buffer. + do { + bucket.pop(); + } while (bucket.size() >= upperThreshold); + return; + } + + if (!tooFast) return; + + static const size_t lowerThreshold = AUDIT_RATE_LIMIT_BURST_DURATION * + AUDIT_RATE_LIMIT_MAX; + + if (bucket.size() >= lowerThreshold) return; + + tooFast = false; + // Went below max sustained rate, allow source to speed up + audit_rate_limit(mSock, AUDIT_RATE_LIMIT_DEFAULT); +} + bool LogAudit::onDataAvailable(SocketClient *cli) { if (!initialized) { prctl(PR_SET_NAME, "logd.auditd"); initialized = true; } + checkRateLimit(); + struct audit_message rep; rep.nlh.nlmsg_type = 0; @@ -75,8 +118,7 @@ bool LogAudit::onDataAvailable(SocketClient *cli) { return false; } - logPrint("type=%d %.*s", - rep.nlh.nlmsg_type, rep.nlh.nlmsg_len, rep.data); + logPrint("type=%d %.*s", rep.nlh.nlmsg_type, rep.nlh.nlmsg_len, rep.data); return true; } @@ -351,5 +393,6 @@ int LogAudit::getLogSocket() { audit_close(fd); fd = -1; } + (void)audit_rate_limit(fd, AUDIT_RATE_LIMIT_DEFAULT); return fd; } diff --git a/logd/LogAudit.h b/logd/LogAudit.h index 844951dae..045d6314f 100644 --- a/logd/LogAudit.h +++ b/logd/LogAudit.h @@ -17,6 +17,8 @@ #ifndef _LOGD_LOG_AUDIT_H__ #define _LOGD_LOG_AUDIT_H__ +#include + #include #include "LogBuffer.h" @@ -31,6 +33,11 @@ class LogAudit : public SocketListener { bool events; bool initialized; + bool tooFast; + int mSock; + std::queue bucket; + void checkRateLimit(); + public: LogAudit(LogBuffer *buf, LogReader *reader, int fdDmesg); int log(char *buf, size_t len); diff --git a/logd/libaudit.c b/logd/libaudit.c index d2b212ee4..216f1a18c 100644 --- a/logd/libaudit.c +++ b/logd/libaudit.c @@ -149,7 +149,7 @@ out: return rc; } -int audit_setup(int fd, uint32_t pid) +int audit_setup(int fd, pid_t pid) { int rc; struct audit_message rep; @@ -163,8 +163,7 @@ int audit_setup(int fd, uint32_t pid) * and the the mask set to AUDIT_STATUS_PID */ status.pid = pid; - status.mask = AUDIT_STATUS_PID | AUDIT_STATUS_RATE_LIMIT; - status.rate_limit = 20; // audit entries per second + status.mask = AUDIT_STATUS_PID; /* Let the kernel know this pid will be registering for audit events */ rc = audit_send(fd, AUDIT_SET, &status, sizeof(status)); @@ -187,6 +186,27 @@ int audit_setup(int fd, uint32_t pid) return 0; } +int audit_rate_limit(int fd, unsigned rate_limit) +{ + int rc; + struct audit_message rep; + struct audit_status status; + + memset(&status, 0, sizeof(status)); + + status.mask = AUDIT_STATUS_RATE_LIMIT; + status.rate_limit = rate_limit; /* audit entries per second */ + + rc = audit_send(fd, AUDIT_SET, &status, sizeof(status)); + if (rc < 0) { + return rc; + } + + audit_get_reply(fd, &rep, GET_REPLY_NONBLOCKING, 0); + + return 0; +} + int audit_open() { return socket(PF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_AUDIT); diff --git a/logd/libaudit.h b/logd/libaudit.h index b9e330d97..9865d4304 100644 --- a/logd/libaudit.h +++ b/logd/libaudit.h @@ -33,7 +33,7 @@ __BEGIN_DECLS #define MAX_AUDIT_MESSAGE_LENGTH 8970 typedef enum { - GET_REPLY_BLOCKING=0, + GET_REPLY_BLOCKING = 0, GET_REPLY_NONBLOCKING } reply_t; @@ -55,7 +55,7 @@ struct audit_message { * A valid fd on success or < 0 on error with errno set. * Returns the same errors as man 2 socket. */ -extern int audit_open(void); +extern int audit_open(void); /** * Closes the fd returned from audit_open() @@ -78,19 +78,36 @@ extern void audit_close(int fd); * @return * This function returns 0 on success, else -errno. */ -extern int audit_get_reply(int fd, struct audit_message *rep, reply_t block, - int peek); +extern int audit_get_reply(int fd, struct audit_message *rep, reply_t block, + int peek); /** - * Sets a pid to recieve audit netlink events from the kernel + * Sets a pid to receive audit netlink events from the kernel * @param fd * The fd returned by a call to audit_open() * @param pid - * The pid whom to set as the reciever of audit messages + * The pid whom to set as the receiver of audit messages * @return * This function returns 0 on success, -errno on error. */ -extern int audit_setup(int fd, uint32_t pid); +extern int audit_setup(int fd, pid_t pid); + +/** + * Sets the rate limit to receive audit netlink events from the kernel + * @param fd + * The fd returned by a call to audit_open() + * @param max_rate + * The cap of the maximum number of audit messages a second + * @return + * This function returns 0 on success, -errno on error. + */ + +/* Guidelines to follow for dynamic rate_limit */ +#define AUDIT_RATE_LIMIT_DEFAULT 20 /* acceptable burst rate */ +#define AUDIT_RATE_LIMIT_BURST_DURATION 10 /* number of seconds of burst */ +#define AUDIT_RATE_LIMIT_MAX 5 /* acceptable sustained rate */ + +extern int audit_rate_limit(int fd, unsigned rate_limit); __END_DECLS diff --git a/logd/tests/Android.mk b/logd/tests/Android.mk index 808087a9b..c05399336 100644 --- a/logd/tests/Android.mk +++ b/logd/tests/Android.mk @@ -27,12 +27,15 @@ test_tags := tests # Unit tests. # ----------------------------------------------------------------------------- +event_flag := -DAUDITD_LOG_TAG=1003 -DCHATTY_LOG_TAG=1004 + test_c_flags := \ -fstack-protector-all \ -g \ -Wall -Wextra \ -Werror \ -fno-builtin \ + $(event_flag) test_src_files := \ logd_test.cpp @@ -43,6 +46,6 @@ include $(CLEAR_VARS) LOCAL_MODULE := $(test_module_prefix)unit-tests LOCAL_MODULE_TAGS := $(test_tags) LOCAL_CFLAGS += $(test_c_flags) -LOCAL_SHARED_LIBRARIES := libbase libcutils liblog +LOCAL_SHARED_LIBRARIES := libbase libcutils liblog libselinux LOCAL_SRC_FILES := $(test_src_files) include $(BUILD_NATIVE_TEST) diff --git a/logd/tests/logd_test.cpp b/logd/tests/logd_test.cpp index 5cd9fc8f2..703c0fb9d 100644 --- a/logd/tests/logd_test.cpp +++ b/logd/tests/logd_test.cpp @@ -20,6 +20,9 @@ #include #include #include +#include +#include +#include #include @@ -28,7 +31,12 @@ #include #include #include +#include +#ifdef __ANDROID__ +#include +#endif +#include "../libaudit.h" // pickup AUDIT_RATE_LIMIT_* #include "../LogReader.h" // pickup LOGD_SNDTIMEO /* @@ -873,10 +881,10 @@ void __android_log_btwrite_multiple__helper(int count) { ++second_count; } } else if (eventData[4] == EVENT_TYPE_STRING) { - // chatty - if (tag != 1004) continue; + if (tag != CHATTY_LOG_TAG) continue; ++chatty_count; // int len = get4LE(eventData + 4 + 1); + log_msg.buf[LOGGER_ENTRY_MAX_LEN] = '\0'; const char *cp = strstr(eventData + 4 + 1 + 4, " expire "); if (!cp) continue; unsigned val = 0; @@ -908,3 +916,154 @@ TEST(logd, multiple_test_3) { TEST(logd, multiple_test_10) { __android_log_btwrite_multiple__helper(10); } + +#ifdef __ANDROID__ +// returns violating pid +static pid_t sepolicy_rate(unsigned rate, unsigned num) { + pid_t pid = fork(); + + if (pid) { + siginfo_t info = {}; + if (TEMP_FAILURE_RETRY(waitid(P_PID, pid, &info, WEXITED))) return 0; + if (info.si_status) return 0; + return pid; + } + + // We may have DAC, but let's not have MAC + if (setcon("u:object_r:shell:s0") < 0) { + int save_errno = errno; + security_context_t context; + getcon(&context); + fprintf(stderr, "setcon(\"u:r:shell:s0\") failed @\"%s\" %s\n", + context, strerror(save_errno)); + freecon(context); + _exit(-1); + // NOTREACHED + return 0; + } + + // Requests dac_read_search, falls back to request dac_override + rate /= 2; + useconds_t usec; + if (rate == 0) { + rate = 1; + usec = 2000000; + } else { + usec = (1000000 + (rate / 2)) / rate; + } + num = (num + 1) / 2; + + if (usec < 2) usec = 2; + + while (num > 0) { + if (access(android::base::StringPrintf( + "/data/misc/logd/cannot_access_directory_%u", + num).c_str(), + F_OK) == 0) { + _exit(-1); + // NOTREACHED + return 0; + } + usleep(usec); + --num; + } + _exit(0); + // NOTREACHED + return 0; +} + +static int count_avc(pid_t pid) { + int count = 0; + + if (pid == 0) return count; + + struct logger_list *logger_list; + if (!(logger_list = android_logger_list_open(LOG_ID_EVENTS, + ANDROID_LOG_RDONLY | + ANDROID_LOG_NONBLOCK, + 0, + pid))) return count; + for (;;) { + log_msg log_msg; + + if (android_logger_list_read(logger_list, &log_msg) <= 0) break; + + if ((log_msg.entry.pid != pid) || + (log_msg.entry.len < (4 + 1 + 8)) || + (log_msg.id() != LOG_ID_EVENTS)) continue; + + char *eventData = log_msg.msg(); + if (!eventData) continue; + + uint32_t tag = get4LE(eventData); + if (tag != AUDITD_LOG_TAG) continue; + + if (eventData[4] != EVENT_TYPE_STRING) continue; + + // int len = get4LE(eventData + 4 + 1); + log_msg.buf[LOGGER_ENTRY_MAX_LEN] = '\0'; + const char *cp = strstr(eventData + 4 + 1 + 4, "): avc: "); + if (!cp) continue; + + ++count; + } + + android_logger_list_close(logger_list); + + return count; +} +#endif + +TEST(logd, sepolicy_rate_limiter_maximum) { +#ifdef __ANDROID__ + static const int rate = AUDIT_RATE_LIMIT_MAX; + static const int duration = 2; + // Two seconds of a liveable sustained rate + EXPECT_EQ(rate * duration, count_avc(sepolicy_rate(rate, rate * duration))); +#else + GTEST_LOG_(INFO) << "This test does nothing.\n"; +#endif +} + +TEST(logd, sepolicy_rate_limiter_sub_burst) { +#ifdef __ANDROID__ + // maximum period below half way between sustainable and burst rate. + static const int threshold = ((AUDIT_RATE_LIMIT_BURST_DURATION * + (AUDIT_RATE_LIMIT_DEFAULT + + AUDIT_RATE_LIMIT_MAX)) + + 1) / 2; + static const int rate = (threshold / AUDIT_RATE_LIMIT_BURST_DURATION) - 1; + static const int duration = AUDIT_RATE_LIMIT_BURST_DURATION; + EXPECT_EQ(rate * duration, count_avc(sepolicy_rate(rate, rate * duration))); +#else + GTEST_LOG_(INFO) << "This test does nothing.\n"; +#endif +} + +TEST(logd, sepolicy_rate_limiter_spam) { +#ifdef __ANDROID__ + // maximum period of double the maximum burst rate + static const int threshold = ((AUDIT_RATE_LIMIT_BURST_DURATION * + (AUDIT_RATE_LIMIT_DEFAULT + + AUDIT_RATE_LIMIT_MAX)) + + 1) / 2; + static const int rate = AUDIT_RATE_LIMIT_DEFAULT * 2; + static const int duration = threshold / AUDIT_RATE_LIMIT_DEFAULT; + EXPECT_GE(((AUDIT_RATE_LIMIT_DEFAULT * duration) * 115) / + 100, // +15% margin + count_avc(sepolicy_rate(rate, rate * duration))); + // give logd another 3 seconds to react to the burst before checking + sepolicy_rate(rate, rate * 3); + // maximum period at double the maximum burst rate (spam filter kicked in) + EXPECT_GE(((AUDIT_RATE_LIMIT_MAX * AUDIT_RATE_LIMIT_BURST_DURATION) * 130) / + 100, // +30% margin + count_avc(sepolicy_rate(rate, + rate * AUDIT_RATE_LIMIT_BURST_DURATION))); + // cool down, and check unspammy rate still works + sleep(2); + EXPECT_LE(AUDIT_RATE_LIMIT_BURST_DURATION - 1, // allow _one_ to be lost + count_avc(sepolicy_rate(1, AUDIT_RATE_LIMIT_BURST_DURATION))); +#else + GTEST_LOG_(INFO) << "This test does nothing.\n"; +#endif +}