diff --git a/mm/internal.h b/mm/internal.h index b3f0dd3ffba2..18af980bb1b8 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -402,7 +402,8 @@ extern int mlock_future_check(struct mm_struct *mm, unsigned long flags, * * mlock is usually called at the end of page_add_*_rmap(), * munlock at the end of page_remove_rmap(); but new anon - * pages are managed in lru_cache_add_inactive_or_unevictable(). + * pages are managed by lru_cache_add_inactive_or_unevictable() + * calling mlock_new_page(). * * @compound is used to include pmd mappings of THPs, but filter out * pte mappings of THPs, which cannot be consistently counted: a pte @@ -425,6 +426,9 @@ static inline void munlock_vma_page(struct page *page, (compound || !PageTransCompound(page))) munlock_page(page); } +void mlock_new_page(struct page *page); +bool need_mlock_page_drain(int cpu); +void mlock_page_drain(int cpu); extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); @@ -503,6 +507,9 @@ static inline void mlock_vma_page(struct page *page, struct vm_area_struct *vma, bool compound) { } static inline void munlock_vma_page(struct page *page, struct vm_area_struct *vma, bool compound) { } +static inline void mlock_new_page(struct page *page) { } +static inline bool need_mlock_page_drain(int cpu) { return false; } +static inline void mlock_page_drain(int cpu) { } static inline void vunmap_range_noflush(unsigned long start, unsigned long end) { } diff --git a/mm/mlock.c b/mm/mlock.c index 581ea8bf1b83..d28e56529e5b 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -28,6 +28,8 @@ #include "internal.h" +static DEFINE_PER_CPU(struct pagevec, mlock_pvec); + bool can_do_mlock(void) { if (rlimit(RLIMIT_MEMLOCK) != 0) @@ -49,57 +51,79 @@ EXPORT_SYMBOL(can_do_mlock); * PageUnevictable is set to indicate the unevictable state. */ -/** - * mlock_page - mlock a page - * @page: page to be mlocked, either a normal page or a THP head. - */ -void mlock_page(struct page *page) +static struct lruvec *__mlock_page(struct page *page, struct lruvec *lruvec) { - struct lruvec *lruvec; - int nr_pages = thp_nr_pages(page); - - VM_BUG_ON_PAGE(PageTail(page), page); - - if (!TestSetPageMlocked(page)) { - mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); - __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); - } - /* There is nothing more we can do while it's off LRU */ if (!TestClearPageLRU(page)) - return; + return lruvec; + + lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec); + + if (unlikely(page_evictable(page))) { + /* + * This is a little surprising, but quite possible: + * PageMlocked must have got cleared already by another CPU. + * Could this page be on the Unevictable LRU? I'm not sure, + * but move it now if so. + */ + if (PageUnevictable(page)) { + del_page_from_lru_list(page, lruvec); + ClearPageUnevictable(page); + add_page_to_lru_list(page, lruvec); + __count_vm_events(UNEVICTABLE_PGRESCUED, + thp_nr_pages(page)); + } + goto out; + } - lruvec = folio_lruvec_lock_irq(page_folio(page)); if (PageUnevictable(page)) { - page->mlock_count++; + if (PageMlocked(page)) + page->mlock_count++; goto out; } del_page_from_lru_list(page, lruvec); ClearPageActive(page); SetPageUnevictable(page); - page->mlock_count = 1; + page->mlock_count = !!PageMlocked(page); add_page_to_lru_list(page, lruvec); - __count_vm_events(UNEVICTABLE_PGCULLED, nr_pages); + __count_vm_events(UNEVICTABLE_PGCULLED, thp_nr_pages(page)); out: SetPageLRU(page); - unlock_page_lruvec_irq(lruvec); + return lruvec; } -/** - * munlock_page - munlock a page - * @page: page to be munlocked, either a normal page or a THP head. - */ -void munlock_page(struct page *page) +static struct lruvec *__mlock_new_page(struct page *page, struct lruvec *lruvec) +{ + VM_BUG_ON_PAGE(PageLRU(page), page); + + lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec); + + /* As above, this is a little surprising, but possible */ + if (unlikely(page_evictable(page))) + goto out; + + SetPageUnevictable(page); + page->mlock_count = !!PageMlocked(page); + __count_vm_events(UNEVICTABLE_PGCULLED, thp_nr_pages(page)); +out: + add_page_to_lru_list(page, lruvec); + SetPageLRU(page); + return lruvec; +} + +static struct lruvec *__munlock_page(struct page *page, struct lruvec *lruvec) { - struct lruvec *lruvec; int nr_pages = thp_nr_pages(page); + bool isolated = false; - VM_BUG_ON_PAGE(PageTail(page), page); + if (!TestClearPageLRU(page)) + goto munlock; - lock_page_memcg(page); - lruvec = folio_lruvec_lock_irq(page_folio(page)); - if (PageLRU(page) && PageUnevictable(page)) { + isolated = true; + lruvec = folio_lruvec_relock_irq(page_folio(page), lruvec); + + if (PageUnevictable(page)) { /* Then mlock_count is maintained, but might undercount */ if (page->mlock_count) page->mlock_count--; @@ -108,24 +132,151 @@ void munlock_page(struct page *page) } /* else assume that was the last mlock: reclaim will fix it if not */ +munlock: if (TestClearPageMlocked(page)) { __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); - if (PageLRU(page) || !PageUnevictable(page)) + if (isolated || !PageUnevictable(page)) __count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages); else __count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages); } /* page_evictable() has to be checked *after* clearing Mlocked */ - if (PageLRU(page) && PageUnevictable(page) && page_evictable(page)) { + if (isolated && PageUnevictable(page) && page_evictable(page)) { del_page_from_lru_list(page, lruvec); ClearPageUnevictable(page); add_page_to_lru_list(page, lruvec); __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages); } out: - unlock_page_lruvec_irq(lruvec); - unlock_page_memcg(page); + if (isolated) + SetPageLRU(page); + return lruvec; +} + +/* + * Flags held in the low bits of a struct page pointer on the mlock_pvec. + */ +#define LRU_PAGE 0x1 +#define NEW_PAGE 0x2 +static inline struct page *mlock_lru(struct page *page) +{ + return (struct page *)((unsigned long)page + LRU_PAGE); +} + +static inline struct page *mlock_new(struct page *page) +{ + return (struct page *)((unsigned long)page + NEW_PAGE); +} + +/* + * mlock_pagevec() is derived from pagevec_lru_move_fn(): + * perhaps that can make use of such page pointer flags in future, + * but for now just keep it for mlock. We could use three separate + * pagevecs instead, but one feels better (munlocking a full pagevec + * does not need to drain mlocking pagevecs first). + */ +static void mlock_pagevec(struct pagevec *pvec) +{ + struct lruvec *lruvec = NULL; + unsigned long mlock; + struct page *page; + int i; + + for (i = 0; i < pagevec_count(pvec); i++) { + page = pvec->pages[i]; + mlock = (unsigned long)page & (LRU_PAGE | NEW_PAGE); + page = (struct page *)((unsigned long)page - mlock); + pvec->pages[i] = page; + + if (mlock & LRU_PAGE) + lruvec = __mlock_page(page, lruvec); + else if (mlock & NEW_PAGE) + lruvec = __mlock_new_page(page, lruvec); + else + lruvec = __munlock_page(page, lruvec); + } + + if (lruvec) + unlock_page_lruvec_irq(lruvec); + release_pages(pvec->pages, pvec->nr); + pagevec_reinit(pvec); +} + +void mlock_page_drain(int cpu) +{ + struct pagevec *pvec; + + pvec = &per_cpu(mlock_pvec, cpu); + if (pagevec_count(pvec)) + mlock_pagevec(pvec); +} + +bool need_mlock_page_drain(int cpu) +{ + return pagevec_count(&per_cpu(mlock_pvec, cpu)); +} + +/** + * mlock_page - mlock a page already on (or temporarily off) LRU + * @page: page to be mlocked, either a normal page or a THP head. + */ +void mlock_page(struct page *page) +{ + struct pagevec *pvec = &get_cpu_var(mlock_pvec); + + if (!TestSetPageMlocked(page)) { + int nr_pages = thp_nr_pages(page); + + mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); + __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); + } + + get_page(page); + if (!pagevec_add(pvec, mlock_lru(page)) || + PageHead(page) || lru_cache_disabled()) + mlock_pagevec(pvec); + put_cpu_var(mlock_pvec); +} + +/** + * mlock_new_page - mlock a newly allocated page not yet on LRU + * @page: page to be mlocked, either a normal page or a THP head. + */ +void mlock_new_page(struct page *page) +{ + struct pagevec *pvec = &get_cpu_var(mlock_pvec); + int nr_pages = thp_nr_pages(page); + + SetPageMlocked(page); + mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); + __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); + + get_page(page); + if (!pagevec_add(pvec, mlock_new(page)) || + PageHead(page) || lru_cache_disabled()) + mlock_pagevec(pvec); + put_cpu_var(mlock_pvec); +} + +/** + * munlock_page - munlock a page + * @page: page to be munlocked, either a normal page or a THP head. + */ +void munlock_page(struct page *page) +{ + struct pagevec *pvec = &get_cpu_var(mlock_pvec); + + /* + * TestClearPageMlocked(page) must be left to __munlock_page(), + * which will check whether the page is multiply mlocked. + */ + + get_page(page); + if (!pagevec_add(pvec, page) || + PageHead(page) || lru_cache_disabled()) + mlock_pagevec(pvec); + put_cpu_var(mlock_pvec); } static int mlock_pte_range(pmd_t *pmd, unsigned long addr, diff --git a/mm/swap.c b/mm/swap.c index 3f770b1ea2c1..842d5cd92cf6 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -490,18 +490,12 @@ EXPORT_SYMBOL(folio_add_lru); void lru_cache_add_inactive_or_unevictable(struct page *page, struct vm_area_struct *vma) { - bool unevictable; - VM_BUG_ON_PAGE(PageLRU(page), page); - unevictable = (vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED; - if (unlikely(unevictable) && !TestSetPageMlocked(page)) { - int nr_pages = thp_nr_pages(page); - - mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); - count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); - } - lru_cache_add(page); + if (unlikely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED)) + mlock_new_page(page); + else + lru_cache_add(page); } /* @@ -640,6 +634,7 @@ void lru_add_drain_cpu(int cpu) pagevec_lru_move_fn(pvec, lru_lazyfree_fn); activate_page_drain(cpu); + mlock_page_drain(cpu); } /** @@ -842,6 +837,7 @@ inline void __lru_add_drain_all(bool force_all_cpus) pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) || pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) || need_activate_page_drain(cpu) || + need_mlock_page_drain(cpu) || has_bh_in_lru(cpu, NULL)) { INIT_WORK(work, lru_add_drain_per_cpu); queue_work_on(cpu, mm_percpu_wq, work); @@ -1030,7 +1026,7 @@ static void __pagevec_lru_add_fn(struct folio *folio, struct lruvec *lruvec) * Is an smp_mb__after_atomic() still required here, before * folio_evictable() tests PageMlocked, to rule out the possibility * of stranding an evictable folio on an unevictable LRU? I think - * not, because munlock_page() only clears PageMlocked while the LRU + * not, because __munlock_page() only clears PageMlocked while the LRU * lock is held. * * (That is not true of __page_cache_release(), and not necessarily @@ -1043,7 +1039,14 @@ static void __pagevec_lru_add_fn(struct folio *folio, struct lruvec *lruvec) } else { folio_clear_active(folio); folio_set_unevictable(folio); - folio->mlock_count = !!folio_test_mlocked(folio); + /* + * folio->mlock_count = !!folio_test_mlocked(folio)? + * But that leaves __mlock_page() in doubt whether another + * actor has already counted the mlock or not. Err on the + * safe side, underestimate, let page reclaim fix it, rather + * than leaving a page on the unevictable LRU indefinitely. + */ + folio->mlock_count = 0; if (!was_unevictable) __count_vm_events(UNEVICTABLE_PGCULLED, nr_pages); }