lmkd: Add optional kill reason description into kill reports

Allow kill report to be appended with the explanation of the reasons
killing has been done. This would help identify kill reasons while
troubleshooting lmkd kills.

Change-Id: Ie5dd7a44e51d04c43c2492be8c1bc964d1b03555
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
This commit is contained in:
Suren Baghdasaryan 2019-07-15 15:50:17 -07:00
parent 844f26bc24
commit fd7518f8c1
1 changed files with 26 additions and 8 deletions

View File

@ -1743,7 +1743,7 @@ static void set_process_group_and_prio(int pid, SchedPolicy sp, int prio) {
static int last_killed_pid = -1;
/* Kill one process specified by procp. Returns the size of the process killed */
static int kill_one_process(struct proc* procp, int min_oom_score) {
static int kill_one_process(struct proc* procp, int min_oom_score, const char *reason) {
int pid = procp->pid;
uid_t uid = procp->uid;
int tgid;
@ -1794,8 +1794,13 @@ static int kill_one_process(struct proc* procp, int min_oom_score) {
set_process_group_and_prio(pid, SP_FOREGROUND, ANDROID_PRIORITY_HIGHEST);
inc_killcnt(procp->oomadj);
ALOGE("Kill '%s' (%d), uid %d, oom_adj %d to free %ldkB", taskname, pid, uid, procp->oomadj,
tasksize * page_k);
if (reason) {
ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %ldkB; reason: %s", taskname, pid,
uid, procp->oomadj, tasksize * page_k, reason);
} else {
ALOGI("Kill '%s' (%d), uid %d, oom_adj %d to free %ldkB", taskname, pid,
uid, procp->oomadj, tasksize * page_k);
}
TRACE_KILL_END();
@ -1833,7 +1838,7 @@ out:
* Find one process to kill at or above the given oom_adj level.
* Returns size of the killed process.
*/
static int find_and_kill_process(int min_score_adj) {
static int find_and_kill_process(int min_score_adj, const char *reason) {
int i;
int killed_size = 0;
@ -1851,7 +1856,7 @@ static int find_and_kill_process(int min_score_adj) {
if (!procp)
break;
killed_size = kill_one_process(procp, min_score_adj);
killed_size = kill_one_process(procp, min_score_adj, reason);
if (killed_size >= 0) {
#ifdef LMKD_LOG_STATS
if (enable_stats_log && !lmk_state_change_start) {
@ -2040,6 +2045,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
bool cycle_after_kill = false;
enum reclaim_state reclaim = NO_RECLAIM;
enum zone_watermark wmark = WMARK_NONE;
char kill_desc[LINE_MAX];
/* Skip while still killing a process */
if (is_kill_pending()) {
@ -2135,6 +2141,7 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
* free even after a kill. Mostly happens when running memory stress tests.
*/
kill_reason = PRESSURE_AFTER_KILL;
strncpy(kill_desc, "min watermark is breached even after kill", sizeof(kill_desc));
} else if (level == VMPRESS_LEVEL_CRITICAL && events != 0) {
/*
* Device is too busy reclaiming memory which might lead to ANR.
@ -2142,25 +2149,36 @@ static void mp_event_psi(int data, uint32_t events, struct polling_params *poll_
* of the memory congestion) breaches the configured threshold.
*/
kill_reason = NOT_RESPONDING;
strncpy(kill_desc, "device is not responding", sizeof(kill_desc));
} else if (swap_is_low && thrashing > thrashing_limit_pct) {
/* Page cache is thrashing while swap is low */
kill_reason = LOW_SWAP_AND_THRASHING;
snprintf(kill_desc, sizeof(kill_desc), "device is low on swap (%" PRId64
"kB < %" PRId64 "kB) and thrashing (%" PRId64 "%%)",
mi.field.free_swap * page_k, swap_low_threshold * page_k, thrashing);
} else if (swap_is_low && wmark < WMARK_HIGH) {
/* Both free memory and swap are low */
kill_reason = LOW_MEM_AND_SWAP;
snprintf(kill_desc, sizeof(kill_desc), "%s watermark is breached and swap is low (%"
PRId64 "kB < %" PRId64 "kB)", wmark > WMARK_LOW ? "min" : "low",
mi.field.free_swap * page_k, swap_low_threshold * page_k);
} else if (wmark < WMARK_HIGH && thrashing > thrashing_limit) {
/* Page cache is thrashing while memory is low */
thrashing_limit = (thrashing_limit * (100 - thrashing_limit_decay_pct)) / 100;
kill_reason = LOW_MEM_AND_THRASHING;
snprintf(kill_desc, sizeof(kill_desc), "%s watermark is breached and thrashing (%"
PRId64 "%%)", wmark > WMARK_LOW ? "min" : "low", thrashing);
} else if (reclaim == DIRECT_RECLAIM && thrashing > thrashing_limit) {
/* Page cache is thrashing while in direct reclaim (mostly happens on lowram devices) */
thrashing_limit = (thrashing_limit * (100 - thrashing_limit_decay_pct)) / 100;
kill_reason = DIRECT_RECL_AND_THRASHING;
snprintf(kill_desc, sizeof(kill_desc), "device is in direct reclaim and thrashing (%"
PRId64 "%%)", thrashing);
}
/* Kill a process if necessary */
if (kill_reason != NONE) {
int pages_freed = find_and_kill_process(0);
int pages_freed = find_and_kill_process(0, kill_desc);
killing = (pages_freed > 0);
meminfo_log(&mi);
}
@ -2351,7 +2369,7 @@ static void mp_event_common(int data, uint32_t events, struct polling_params *po
do_kill:
if (low_ram_device) {
/* For Go devices kill only one task */
if (find_and_kill_process(level_oomadj[level]) == 0) {
if (find_and_kill_process(level_oomadj[level], NULL) == 0) {
if (debug_process_killing) {
ALOGI("Nothing to kill");
}
@ -2376,7 +2394,7 @@ do_kill:
min_score_adj = level_oomadj[level];
}
pages_freed = find_and_kill_process(min_score_adj);
pages_freed = find_and_kill_process(min_score_adj, NULL);
if (pages_freed == 0) {
/* Rate limit kill reports when nothing was reclaimed */