diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d9b8df5ef212..667790636cd7 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -442,6 +442,7 @@ static inline struct obj_cgroup *__page_objcg(struct page *page) * - LRU isolation * - lock_page_memcg() * - exclusive reference + * - mem_cgroup_trylock_pages() * * For a kmem page a caller should hold an rcu read lock to protect memcg * associated with a kmem page from being released. @@ -497,6 +498,7 @@ static inline struct mem_cgroup *page_memcg_rcu(struct page *page) * - LRU isolation * - lock_page_memcg() * - exclusive reference + * - mem_cgroup_trylock_pages() * * For a kmem page a caller should hold an rcu read lock to protect memcg * associated with a kmem page from being released. @@ -953,6 +955,23 @@ void unlock_page_memcg(struct page *page); void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val); +/* try to stablize page_memcg() for all the pages in a memcg */ +static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) +{ + rcu_read_lock(); + + if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account)) + return true; + + rcu_read_unlock(); + return false; +} + +static inline void mem_cgroup_unlock_pages(void) +{ + rcu_read_unlock(); +} + /* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) @@ -1357,6 +1376,18 @@ static inline void unlock_page_memcg(struct page *page) { } +static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) +{ + /* to match page_memcg_rcu() */ + rcu_read_lock(); + return true; +} + +static inline void mem_cgroup_unlock_pages(void) +{ + rcu_read_unlock(); +} + static inline void mem_cgroup_handle_over_high(void) { } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 05ac15663773..94765e2ed6b6 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -350,6 +350,7 @@ enum lruvec_flags { #ifndef __GENERATING_BOUNDS_H struct lruvec; +struct page_vma_mapped_walk; #define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) #define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) @@ -403,6 +404,7 @@ struct lru_gen_struct { }; void lru_gen_init_lruvec(struct lruvec *lruvec); +void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); #ifdef CONFIG_MEMCG void lru_gen_init_memcg(struct mem_cgroup *memcg); @@ -415,6 +417,10 @@ static inline void lru_gen_init_lruvec(struct lruvec *lruvec) { } +static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) +{ +} + #ifdef CONFIG_MEMCG static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) { diff --git a/include/linux/swap.h b/include/linux/swap.h index fce3a966718d..5e20b5b39d57 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -368,6 +368,7 @@ extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); extern void rotate_reclaimable_page(struct page *page); +extern void activate_page(struct page *page); extern void deactivate_file_page(struct page *page); extern void deactivate_page(struct page *page); extern void mark_page_lazyfree(struct page *page); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1598162cfc5a..c27ea96124dd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2788,6 +2788,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg) * - LRU isolation * - lock_page_memcg() * - exclusive reference + * - mem_cgroup_trylock_pages() */ page->memcg_data = (unsigned long)memcg; } diff --git a/mm/rmap.c b/mm/rmap.c index 6aebd1747251..d759222ba040 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -73,6 +73,7 @@ #include #include #include +#include #include @@ -790,6 +791,12 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma, } if (pvmw.pte) { + if (lru_gen_enabled() && pte_young(*pvmw.pte) && + !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))) { + lru_gen_look_around(&pvmw); + referenced++; + } + if (ptep_clear_flush_young_notify(vma, address, pvmw.pte)) { /* diff --git a/mm/swap.c b/mm/swap.c index d301ef965ebf..74f7c5e1b675 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -325,7 +325,7 @@ static bool need_activate_page_drain(int cpu) return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0; } -static void activate_page(struct page *page) +void activate_page(struct page *page) { page = compound_head(page); if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { @@ -345,7 +345,7 @@ static inline void activate_page_drain(int cpu) { } -static void activate_page(struct page *page) +void activate_page(struct page *page) { struct lruvec *lruvec; diff --git a/mm/vmscan.c b/mm/vmscan.c index 670a46dff1af..5f4b0873112b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1412,6 +1412,11 @@ static unsigned int shrink_page_list(struct list_head *page_list, if (!sc->may_unmap && page_mapped(page)) goto keep_locked; + /* page_update_gen() tried to promote this page? */ + if (lru_gen_enabled() && !ignore_references && + page_mapped(page) && PageReferenced(page)) + goto keep_locked; + may_enter_fs = (sc->gfp_mask & __GFP_FS) || (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); @@ -3079,6 +3084,31 @@ static bool positive_ctrl_err(struct ctrl_pos *sp, struct ctrl_pos *pv) * the aging ******************************************************************************/ +static int page_update_gen(struct page *page, int gen) +{ + unsigned long old_flags, new_flags; + + VM_BUG_ON(gen >= MAX_NR_GENS); + VM_BUG_ON(!rcu_read_lock_held()); + + do { + new_flags = old_flags = READ_ONCE(page->flags); + + /* for shrink_page_list() */ + if (!(new_flags & LRU_GEN_MASK)) { + new_flags |= BIT(PG_referenced); + continue; + } + + new_flags &= ~LRU_GEN_MASK; + new_flags |= (gen + 1UL) << LRU_GEN_PGOFF; + new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS); + } while (new_flags != old_flags && + cmpxchg(&page->flags, old_flags, new_flags) != old_flags); + + return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; +} + static int page_inc_gen(struct lruvec *lruvec, struct page *page, bool reclaiming) { unsigned long old_flags, new_flags; @@ -3090,6 +3120,11 @@ static int page_inc_gen(struct lruvec *lruvec, struct page *page, bool reclaimin new_flags = old_flags = READ_ONCE(page->flags); VM_BUG_ON_PAGE(!(new_flags & LRU_GEN_MASK), page); + new_gen = ((new_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; + /* page_update_gen() has promoted this page? */ + if (new_gen >= 0 && new_gen != old_gen) + return new_gen; + new_gen = (old_gen + 1) % MAX_NR_GENS; new_flags &= ~LRU_GEN_MASK; @@ -3311,6 +3346,122 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); } +/* + * This function exploits spatial locality when shrink_page_list() walks the + * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages. + */ +void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) +{ + int i; + pte_t *pte; + unsigned long start; + unsigned long end; + unsigned long addr; + struct page *page; + unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {}; + struct mem_cgroup *memcg = page_memcg(pvmw->page); + struct pglist_data *pgdat = page_pgdat(pvmw->page); + struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); + DEFINE_MAX_SEQ(lruvec); + int old_gen, new_gen = lru_gen_from_seq(max_seq); + + lockdep_assert_held(pvmw->ptl); + VM_BUG_ON_PAGE(PageLRU(pvmw->page), pvmw->page); + + if (spin_is_contended(pvmw->ptl)) + return; + + start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start); + end = pmd_addr_end(pvmw->address, pvmw->vma->vm_end); + + if (end - start > MIN_LRU_BATCH * PAGE_SIZE) { + if (pvmw->address - start < MIN_LRU_BATCH * PAGE_SIZE / 2) + end = start + MIN_LRU_BATCH * PAGE_SIZE; + else if (end - pvmw->address < MIN_LRU_BATCH * PAGE_SIZE / 2) + start = end - MIN_LRU_BATCH * PAGE_SIZE; + else { + start = pvmw->address - MIN_LRU_BATCH * PAGE_SIZE / 2; + end = pvmw->address + MIN_LRU_BATCH * PAGE_SIZE / 2; + } + } + + pte = pvmw->pte - (pvmw->address - start) / PAGE_SIZE; + + rcu_read_lock(); + arch_enter_lazy_mmu_mode(); + + for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) { + unsigned long pfn = pte_pfn(pte[i]); + + VM_BUG_ON(addr < pvmw->vma->vm_start || addr >= pvmw->vma->vm_end); + + if (!pte_present(pte[i]) || is_zero_pfn(pfn)) + continue; + + if (WARN_ON_ONCE(pte_devmap(pte[i]) || pte_special(pte[i]))) + continue; + + if (!pte_young(pte[i])) + continue; + + VM_BUG_ON(!pfn_valid(pfn)); + if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) + continue; + + page = compound_head(pfn_to_page(pfn)); + if (page_to_nid(page) != pgdat->node_id) + continue; + + if (page_memcg_rcu(page) != memcg) + continue; + + if (!ptep_test_and_clear_young(pvmw->vma, addr, pte + i)) + continue; + + if (pte_dirty(pte[i]) && !PageDirty(page) && + !(PageAnon(page) && PageSwapBacked(page) && !PageSwapCache(page))) + set_page_dirty(page); + + old_gen = page_lru_gen(page); + if (old_gen < 0) + SetPageReferenced(page); + else if (old_gen != new_gen) + __set_bit(i, bitmap); + } + + arch_leave_lazy_mmu_mode(); + rcu_read_unlock(); + + if (bitmap_weight(bitmap, MIN_LRU_BATCH) < PAGEVEC_SIZE) { + for_each_set_bit(i, bitmap, MIN_LRU_BATCH) + activate_page(pte_page(pte[i])); + return; + } + + /* page_update_gen() requires stable page_memcg() */ + if (!mem_cgroup_trylock_pages(memcg)) + return; + + spin_lock_irq(&lruvec->lru_lock); + new_gen = lru_gen_from_seq(lruvec->lrugen.max_seq); + + for_each_set_bit(i, bitmap, MIN_LRU_BATCH) { + page = compound_head(pte_page(pte[i])); + if (page_memcg_rcu(page) != memcg) + continue; + + old_gen = page_update_gen(page, new_gen); + if (old_gen < 0 || old_gen == new_gen) + continue; + + lru_gen_update_size(lruvec, page, old_gen, new_gen); + } + + spin_unlock_irq(&lruvec->lru_lock); + + mem_cgroup_unlock_pages(); +} + /****************************************************************************** * the eviction ******************************************************************************/ @@ -3344,6 +3495,11 @@ static bool sort_page(struct lruvec *lruvec, struct page *page, int tier_idx) return true; } + if (gen != lru_gen_from_seq(lrugen->min_seq[type])) { + list_move(&page->lru, &lrugen->lists[gen][type][zone]); + return true; + } + if (tier > tier_idx) { int hist = lru_hist_from_seq(lrugen->min_seq[type]);