From 9175e0311ec9e6d1bf1f6dfecf9268baf08765e6 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Thu, 7 Feb 2008 00:14:08 -0800 Subject: [PATCH] bugfix for memory controller: add helper function for assigning cgroup to page This patch adds following functions. - clear_page_cgroup(page, pc) - page_cgroup_assign_new_page_group(page, pc) Mainly for cleanup. A manner "check page->cgroup again after lock_page_cgroup()" is implemented in straight way. A comment in mem_cgroup_uncharge() will be removed by force-empty patch Signed-off-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Pavel Emelianov Cc: Paul Menage Cc: Peter Zijlstra Cc: "Eric W. Biederman" Cc: Nick Piggin Cc: Kirill Korotaev Cc: Herbert Poetzl Cc: David Rientjes Cc: Vaidyanathan Srinivasan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 105 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 76 insertions(+), 29 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2fadd4896a14..3270ce7375db 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -162,6 +162,48 @@ static void __always_inline unlock_page_cgroup(struct page *page) bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); } +/* + * Tie new page_cgroup to struct page under lock_page_cgroup() + * This can fail if the page has been tied to a page_cgroup. + * If success, returns 0. + */ +static inline int +page_cgroup_assign_new_page_cgroup(struct page *page, struct page_cgroup *pc) +{ + int ret = 0; + + lock_page_cgroup(page); + if (!page_get_page_cgroup(page)) + page_assign_page_cgroup(page, pc); + else /* A page is tied to other pc. */ + ret = 1; + unlock_page_cgroup(page); + return ret; +} + +/* + * Clear page->page_cgroup member under lock_page_cgroup(). + * If given "pc" value is different from one page->page_cgroup, + * page->cgroup is not cleared. + * Returns a value of page->page_cgroup at lock taken. + * A can can detect failure of clearing by following + * clear_page_cgroup(page, pc) == pc + */ + +static inline struct page_cgroup * +clear_page_cgroup(struct page *page, struct page_cgroup *pc) +{ + struct page_cgroup *ret; + /* lock and clear */ + lock_page_cgroup(page); + ret = page_get_page_cgroup(page); + if (likely(ret == pc)) + page_assign_page_cgroup(page, NULL); + unlock_page_cgroup(page); + return ret; +} + + static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active) { if (active) @@ -270,7 +312,7 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { struct mem_cgroup *mem; - struct page_cgroup *pc, *race_pc; + struct page_cgroup *pc; unsigned long flags; unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES; @@ -293,8 +335,10 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, unlock_page_cgroup(page); cpu_relax(); goto retry; - } else + } else { + unlock_page_cgroup(page); goto done; + } } unlock_page_cgroup(page); @@ -364,31 +408,26 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, goto free_pc; } - lock_page_cgroup(page); - /* - * Check if somebody else beat us to allocating the page_cgroup - */ - race_pc = page_get_page_cgroup(page); - if (race_pc) { - kfree(pc); - pc = race_pc; - atomic_inc(&pc->ref_cnt); - res_counter_uncharge(&mem->res, PAGE_SIZE); - css_put(&mem->css); - goto done; - } - atomic_set(&pc->ref_cnt, 1); pc->mem_cgroup = mem; pc->page = page; - page_assign_page_cgroup(page, pc); + if (page_cgroup_assign_new_page_cgroup(page, pc)) { + /* + * an another charge is added to this page already. + * we do take lock_page_cgroup(page) again and read + * page->cgroup, increment refcnt.... just retry is OK. + */ + res_counter_uncharge(&mem->res, PAGE_SIZE); + css_put(&mem->css); + kfree(pc); + goto retry; + } spin_lock_irqsave(&mem->lru_lock, flags); list_add(&pc->lru, &mem->active_list); spin_unlock_irqrestore(&mem->lru_lock, flags); done: - unlock_page_cgroup(page); return 0; free_pc: kfree(pc); @@ -432,17 +471,25 @@ void mem_cgroup_uncharge(struct page_cgroup *pc) if (atomic_dec_and_test(&pc->ref_cnt)) { page = pc->page; - lock_page_cgroup(page); - mem = pc->mem_cgroup; - css_put(&mem->css); - page_assign_page_cgroup(page, NULL); - unlock_page_cgroup(page); - res_counter_uncharge(&mem->res, PAGE_SIZE); - - spin_lock_irqsave(&mem->lru_lock, flags); - list_del_init(&pc->lru); - spin_unlock_irqrestore(&mem->lru_lock, flags); - kfree(pc); + /* + * get page->cgroup and clear it under lock. + */ + if (clear_page_cgroup(page, pc) == pc) { + mem = pc->mem_cgroup; + css_put(&mem->css); + res_counter_uncharge(&mem->res, PAGE_SIZE); + spin_lock_irqsave(&mem->lru_lock, flags); + list_del_init(&pc->lru); + spin_unlock_irqrestore(&mem->lru_lock, flags); + kfree(pc); + } else { + /* + * Note:This will be removed when force-empty patch is + * applied. just show warning here. + */ + printk(KERN_ERR "Race in mem_cgroup_uncharge() ?"); + dump_stack(); + } } }