logd: sepolicy dynamic rate limiting

Processing overhead for selinux violation messages is costly. We want
to deal with bursts of violations, but we have no intent of allowing
that sustained burst to go unabated as there is a cost of processing
and battery usage.

Tunables in libaudit.h are:

AUDIT_RATE_LIMIT_DEFAULT 20        /* acceptable burst rate      */
AUDIT_RATE_LIMIT_BURST_DURATION 10 /* number of seconds of burst */
AUDIT_RATE_LIMIT_MAX     5         /* acceptable sustained rate  */

Since we can only asymptotically handle DEFAULT rate, we set an upper
threshold of half way between the MAX and DEFAULT rate.

Default kernel audit subsystem message rate is set to 20 a second.
If sepolicy exceeds 125 violation messages over up to ten seconds
(>=~12/s), tell kernel audit subsystem to drop the rate to 5 messages
a second.  If rate drops below 50 messages over the past ten seconds
(<5/s), tell kernel it is ok to increase the burst rate back to 20
messages a second.

Test: gTest logd-unit-tests --gtest_filter=logd.sepolicy_rate_limiter_*
Bug: 27878170
Change-Id: I843f8dcfbb3ecfbbe94a4865ea332c858e3be7f2
This commit is contained in:
Mark Salyzyn 2017-01-03 14:00:19 -08:00
parent fe05f1cde4
commit 247d682fe1
6 changed files with 266 additions and 17 deletions

View File

@ -43,7 +43,7 @@
'>'
LogAudit::LogAudit(LogBuffer *buf, LogReader *reader, int fdDmesg) :
SocketListener(getLogSocket(), false),
SocketListener(mSock = getLogSocket(), false),
logbuf(buf),
reader(reader),
fdDmesg(fdDmesg),
@ -51,19 +51,62 @@ LogAudit::LogAudit(LogBuffer *buf, LogReader *reader, int fdDmesg) :
BOOL_DEFAULT_TRUE)),
events(__android_logger_property_get_bool("ro.logd.auditd.events",
BOOL_DEFAULT_TRUE)),
initialized(false) {
initialized(false),
tooFast(false) {
static const char auditd_message[] = { KMSG_PRIORITY(LOG_INFO),
'l', 'o', 'g', 'd', '.', 'a', 'u', 'd', 'i', 't', 'd', ':',
' ', 's', 't', 'a', 'r', 't', '\n' };
write(fdDmesg, auditd_message, sizeof(auditd_message));
}
void LogAudit::checkRateLimit() {
// trim list for AUDIT_RATE_LIMIT_BURST_DURATION of history
log_time oldest(AUDIT_RATE_LIMIT_BURST_DURATION, 0);
bucket.emplace(android_log_clockid());
oldest = bucket.back() - oldest;
while (bucket.front() < oldest) bucket.pop();
static const size_t upperThreshold =
((AUDIT_RATE_LIMIT_BURST_DURATION *
(AUDIT_RATE_LIMIT_DEFAULT + AUDIT_RATE_LIMIT_MAX)) + 1) /
2;
if (bucket.size() >= upperThreshold) {
// Hit peak, slow down source
if (!tooFast) {
tooFast = true;
audit_rate_limit(mSock, AUDIT_RATE_LIMIT_MAX);
}
// We do not need to hold on to the full set of timing data history,
// let's ensure it does not grow without bounds. This also ensures
// that std::dequeue underneath behaves almost like a ring buffer.
do {
bucket.pop();
} while (bucket.size() >= upperThreshold);
return;
}
if (!tooFast) return;
static const size_t lowerThreshold = AUDIT_RATE_LIMIT_BURST_DURATION *
AUDIT_RATE_LIMIT_MAX;
if (bucket.size() >= lowerThreshold) return;
tooFast = false;
// Went below max sustained rate, allow source to speed up
audit_rate_limit(mSock, AUDIT_RATE_LIMIT_DEFAULT);
}
bool LogAudit::onDataAvailable(SocketClient *cli) {
if (!initialized) {
prctl(PR_SET_NAME, "logd.auditd");
initialized = true;
}
checkRateLimit();
struct audit_message rep;
rep.nlh.nlmsg_type = 0;
@ -75,8 +118,7 @@ bool LogAudit::onDataAvailable(SocketClient *cli) {
return false;
}
logPrint("type=%d %.*s",
rep.nlh.nlmsg_type, rep.nlh.nlmsg_len, rep.data);
logPrint("type=%d %.*s", rep.nlh.nlmsg_type, rep.nlh.nlmsg_len, rep.data);
return true;
}
@ -351,5 +393,6 @@ int LogAudit::getLogSocket() {
audit_close(fd);
fd = -1;
}
(void)audit_rate_limit(fd, AUDIT_RATE_LIMIT_DEFAULT);
return fd;
}

View File

@ -17,6 +17,8 @@
#ifndef _LOGD_LOG_AUDIT_H__
#define _LOGD_LOG_AUDIT_H__
#include <queue>
#include <sysutils/SocketListener.h>
#include "LogBuffer.h"
@ -31,6 +33,11 @@ class LogAudit : public SocketListener {
bool events;
bool initialized;
bool tooFast;
int mSock;
std::queue<log_time> bucket;
void checkRateLimit();
public:
LogAudit(LogBuffer *buf, LogReader *reader, int fdDmesg);
int log(char *buf, size_t len);

View File

@ -149,7 +149,7 @@ out:
return rc;
}
int audit_setup(int fd, uint32_t pid)
int audit_setup(int fd, pid_t pid)
{
int rc;
struct audit_message rep;
@ -163,8 +163,7 @@ int audit_setup(int fd, uint32_t pid)
* and the the mask set to AUDIT_STATUS_PID
*/
status.pid = pid;
status.mask = AUDIT_STATUS_PID | AUDIT_STATUS_RATE_LIMIT;
status.rate_limit = 20; // audit entries per second
status.mask = AUDIT_STATUS_PID;
/* Let the kernel know this pid will be registering for audit events */
rc = audit_send(fd, AUDIT_SET, &status, sizeof(status));
@ -187,6 +186,27 @@ int audit_setup(int fd, uint32_t pid)
return 0;
}
int audit_rate_limit(int fd, unsigned rate_limit)
{
int rc;
struct audit_message rep;
struct audit_status status;
memset(&status, 0, sizeof(status));
status.mask = AUDIT_STATUS_RATE_LIMIT;
status.rate_limit = rate_limit; /* audit entries per second */
rc = audit_send(fd, AUDIT_SET, &status, sizeof(status));
if (rc < 0) {
return rc;
}
audit_get_reply(fd, &rep, GET_REPLY_NONBLOCKING, 0);
return 0;
}
int audit_open()
{
return socket(PF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_AUDIT);

View File

@ -33,7 +33,7 @@ __BEGIN_DECLS
#define MAX_AUDIT_MESSAGE_LENGTH 8970
typedef enum {
GET_REPLY_BLOCKING=0,
GET_REPLY_BLOCKING = 0,
GET_REPLY_NONBLOCKING
} reply_t;
@ -55,7 +55,7 @@ struct audit_message {
* A valid fd on success or < 0 on error with errno set.
* Returns the same errors as man 2 socket.
*/
extern int audit_open(void);
extern int audit_open(void);
/**
* Closes the fd returned from audit_open()
@ -78,19 +78,36 @@ extern void audit_close(int fd);
* @return
* This function returns 0 on success, else -errno.
*/
extern int audit_get_reply(int fd, struct audit_message *rep, reply_t block,
int peek);
extern int audit_get_reply(int fd, struct audit_message *rep, reply_t block,
int peek);
/**
* Sets a pid to recieve audit netlink events from the kernel
* Sets a pid to receive audit netlink events from the kernel
* @param fd
* The fd returned by a call to audit_open()
* @param pid
* The pid whom to set as the reciever of audit messages
* The pid whom to set as the receiver of audit messages
* @return
* This function returns 0 on success, -errno on error.
*/
extern int audit_setup(int fd, uint32_t pid);
extern int audit_setup(int fd, pid_t pid);
/**
* Sets the rate limit to receive audit netlink events from the kernel
* @param fd
* The fd returned by a call to audit_open()
* @param max_rate
* The cap of the maximum number of audit messages a second
* @return
* This function returns 0 on success, -errno on error.
*/
/* Guidelines to follow for dynamic rate_limit */
#define AUDIT_RATE_LIMIT_DEFAULT 20 /* acceptable burst rate */
#define AUDIT_RATE_LIMIT_BURST_DURATION 10 /* number of seconds of burst */
#define AUDIT_RATE_LIMIT_MAX 5 /* acceptable sustained rate */
extern int audit_rate_limit(int fd, unsigned rate_limit);
__END_DECLS

View File

@ -27,12 +27,15 @@ test_tags := tests
# Unit tests.
# -----------------------------------------------------------------------------
event_flag := -DAUDITD_LOG_TAG=1003 -DCHATTY_LOG_TAG=1004
test_c_flags := \
-fstack-protector-all \
-g \
-Wall -Wextra \
-Werror \
-fno-builtin \
$(event_flag)
test_src_files := \
logd_test.cpp
@ -43,6 +46,6 @@ include $(CLEAR_VARS)
LOCAL_MODULE := $(test_module_prefix)unit-tests
LOCAL_MODULE_TAGS := $(test_tags)
LOCAL_CFLAGS += $(test_c_flags)
LOCAL_SHARED_LIBRARIES := libbase libcutils liblog
LOCAL_SHARED_LIBRARIES := libbase libcutils liblog libselinux
LOCAL_SRC_FILES := $(test_src_files)
include $(BUILD_NATIVE_TEST)

View File

@ -20,6 +20,9 @@
#include <signal.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <string>
@ -28,7 +31,12 @@
#include <cutils/sockets.h>
#include <gtest/gtest.h>
#include <log/log.h>
#include <private/android_filesystem_config.h>
#ifdef __ANDROID__
#include <selinux/selinux.h>
#endif
#include "../libaudit.h" // pickup AUDIT_RATE_LIMIT_*
#include "../LogReader.h" // pickup LOGD_SNDTIMEO
/*
@ -873,10 +881,10 @@ void __android_log_btwrite_multiple__helper(int count) {
++second_count;
}
} else if (eventData[4] == EVENT_TYPE_STRING) {
// chatty
if (tag != 1004) continue;
if (tag != CHATTY_LOG_TAG) continue;
++chatty_count;
// int len = get4LE(eventData + 4 + 1);
log_msg.buf[LOGGER_ENTRY_MAX_LEN] = '\0';
const char *cp = strstr(eventData + 4 + 1 + 4, " expire ");
if (!cp) continue;
unsigned val = 0;
@ -908,3 +916,154 @@ TEST(logd, multiple_test_3) {
TEST(logd, multiple_test_10) {
__android_log_btwrite_multiple__helper(10);
}
#ifdef __ANDROID__
// returns violating pid
static pid_t sepolicy_rate(unsigned rate, unsigned num) {
pid_t pid = fork();
if (pid) {
siginfo_t info = {};
if (TEMP_FAILURE_RETRY(waitid(P_PID, pid, &info, WEXITED))) return 0;
if (info.si_status) return 0;
return pid;
}
// We may have DAC, but let's not have MAC
if (setcon("u:object_r:shell:s0") < 0) {
int save_errno = errno;
security_context_t context;
getcon(&context);
fprintf(stderr, "setcon(\"u:r:shell:s0\") failed @\"%s\" %s\n",
context, strerror(save_errno));
freecon(context);
_exit(-1);
// NOTREACHED
return 0;
}
// Requests dac_read_search, falls back to request dac_override
rate /= 2;
useconds_t usec;
if (rate == 0) {
rate = 1;
usec = 2000000;
} else {
usec = (1000000 + (rate / 2)) / rate;
}
num = (num + 1) / 2;
if (usec < 2) usec = 2;
while (num > 0) {
if (access(android::base::StringPrintf(
"/data/misc/logd/cannot_access_directory_%u",
num).c_str(),
F_OK) == 0) {
_exit(-1);
// NOTREACHED
return 0;
}
usleep(usec);
--num;
}
_exit(0);
// NOTREACHED
return 0;
}
static int count_avc(pid_t pid) {
int count = 0;
if (pid == 0) return count;
struct logger_list *logger_list;
if (!(logger_list = android_logger_list_open(LOG_ID_EVENTS,
ANDROID_LOG_RDONLY |
ANDROID_LOG_NONBLOCK,
0,
pid))) return count;
for (;;) {
log_msg log_msg;
if (android_logger_list_read(logger_list, &log_msg) <= 0) break;
if ((log_msg.entry.pid != pid) ||
(log_msg.entry.len < (4 + 1 + 8)) ||
(log_msg.id() != LOG_ID_EVENTS)) continue;
char *eventData = log_msg.msg();
if (!eventData) continue;
uint32_t tag = get4LE(eventData);
if (tag != AUDITD_LOG_TAG) continue;
if (eventData[4] != EVENT_TYPE_STRING) continue;
// int len = get4LE(eventData + 4 + 1);
log_msg.buf[LOGGER_ENTRY_MAX_LEN] = '\0';
const char *cp = strstr(eventData + 4 + 1 + 4, "): avc: ");
if (!cp) continue;
++count;
}
android_logger_list_close(logger_list);
return count;
}
#endif
TEST(logd, sepolicy_rate_limiter_maximum) {
#ifdef __ANDROID__
static const int rate = AUDIT_RATE_LIMIT_MAX;
static const int duration = 2;
// Two seconds of a liveable sustained rate
EXPECT_EQ(rate * duration, count_avc(sepolicy_rate(rate, rate * duration)));
#else
GTEST_LOG_(INFO) << "This test does nothing.\n";
#endif
}
TEST(logd, sepolicy_rate_limiter_sub_burst) {
#ifdef __ANDROID__
// maximum period below half way between sustainable and burst rate.
static const int threshold = ((AUDIT_RATE_LIMIT_BURST_DURATION *
(AUDIT_RATE_LIMIT_DEFAULT +
AUDIT_RATE_LIMIT_MAX)) +
1) / 2;
static const int rate = (threshold / AUDIT_RATE_LIMIT_BURST_DURATION) - 1;
static const int duration = AUDIT_RATE_LIMIT_BURST_DURATION;
EXPECT_EQ(rate * duration, count_avc(sepolicy_rate(rate, rate * duration)));
#else
GTEST_LOG_(INFO) << "This test does nothing.\n";
#endif
}
TEST(logd, sepolicy_rate_limiter_spam) {
#ifdef __ANDROID__
// maximum period of double the maximum burst rate
static const int threshold = ((AUDIT_RATE_LIMIT_BURST_DURATION *
(AUDIT_RATE_LIMIT_DEFAULT +
AUDIT_RATE_LIMIT_MAX)) +
1) / 2;
static const int rate = AUDIT_RATE_LIMIT_DEFAULT * 2;
static const int duration = threshold / AUDIT_RATE_LIMIT_DEFAULT;
EXPECT_GE(((AUDIT_RATE_LIMIT_DEFAULT * duration) * 115) /
100, // +15% margin
count_avc(sepolicy_rate(rate, rate * duration)));
// give logd another 3 seconds to react to the burst before checking
sepolicy_rate(rate, rate * 3);
// maximum period at double the maximum burst rate (spam filter kicked in)
EXPECT_GE(((AUDIT_RATE_LIMIT_MAX * AUDIT_RATE_LIMIT_BURST_DURATION) * 130) /
100, // +30% margin
count_avc(sepolicy_rate(rate,
rate * AUDIT_RATE_LIMIT_BURST_DURATION)));
// cool down, and check unspammy rate still works
sleep(2);
EXPECT_LE(AUDIT_RATE_LIMIT_BURST_DURATION - 1, // allow _one_ to be lost
count_avc(sepolicy_rate(1, AUDIT_RATE_LIMIT_BURST_DURATION)));
#else
GTEST_LOG_(INFO) << "This test does nothing.\n";
#endif
}