mm/munlock: maintain page->mlock_count while unevictable
Previous patches have been preparatory: now implement page->mlock_count. The ordering of the "Unevictable LRU" is of no significance, and there is no point holding unevictable pages on a list: place page->mlock_count to overlay page->lru.prev (since page->lru.next is overlaid by compound_head, which needs to be even so as not to satisfy PageTail - though 2 could be added instead of 1 for each mlock, if that's ever an improvement). But it's only safe to rely on or modify page->mlock_count while lruvec lock is held and page is on unevictable "LRU" - we can save lots of edits by continuing to pretend that there's an imaginary LRU here (there is an unevictable count which still needs to be maintained, but not a list). The mlock_count technique suffers from an unreliability much like with page_mlock(): while someone else has the page off LRU, not much can be done. As before, err on the safe side (behave as if mlock_count 0), and let try_to_unlock_one() move the page to unevictable if reclaim finds out later on - a few misplaced pages don't matter, what we want to avoid is imbalancing reclaim by flooding evictable lists with unevictable pages. I am not a fan of "if (!isolate_lru_page(page)) putback_lru_page(page);": if we have taken lruvec lock to get the page off its present list, then we save everyone trouble (and however many extra atomic ops) by putting it on its destination list immediately. Signed-off-by: Hugh Dickins <hughd@google.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
This commit is contained in:
parent
b109b87050
commit
07ca760673
|
@ -99,7 +99,8 @@ void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio)
|
|||
|
||||
update_lru_size(lruvec, lru, folio_zonenum(folio),
|
||||
folio_nr_pages(folio));
|
||||
list_add(&folio->lru, &lruvec->lists[lru]);
|
||||
if (lru != LRU_UNEVICTABLE)
|
||||
list_add(&folio->lru, &lruvec->lists[lru]);
|
||||
}
|
||||
|
||||
static __always_inline void add_page_to_lru_list(struct page *page,
|
||||
|
@ -115,6 +116,7 @@ void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio)
|
|||
|
||||
update_lru_size(lruvec, lru, folio_zonenum(folio),
|
||||
folio_nr_pages(folio));
|
||||
/* This is not expected to be used on LRU_UNEVICTABLE */
|
||||
list_add_tail(&folio->lru, &lruvec->lists[lru]);
|
||||
}
|
||||
|
||||
|
@ -127,8 +129,11 @@ static __always_inline void add_page_to_lru_list_tail(struct page *page,
|
|||
static __always_inline
|
||||
void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio)
|
||||
{
|
||||
list_del(&folio->lru);
|
||||
update_lru_size(lruvec, folio_lru_list(folio), folio_zonenum(folio),
|
||||
enum lru_list lru = folio_lru_list(folio);
|
||||
|
||||
if (lru != LRU_UNEVICTABLE)
|
||||
list_del(&folio->lru);
|
||||
update_lru_size(lruvec, lru, folio_zonenum(folio),
|
||||
-folio_nr_pages(folio));
|
||||
}
|
||||
|
||||
|
|
|
@ -85,7 +85,16 @@ struct page {
|
|||
* lruvec->lru_lock. Sometimes used as a generic list
|
||||
* by the page owner.
|
||||
*/
|
||||
struct list_head lru;
|
||||
union {
|
||||
struct list_head lru;
|
||||
/* Or, for the Unevictable "LRU list" slot */
|
||||
struct {
|
||||
/* Always even, to negate PageTail */
|
||||
void *__filler;
|
||||
/* Count page's or folio's mlocks */
|
||||
unsigned int mlock_count;
|
||||
};
|
||||
};
|
||||
/* See page-flags.h for PAGE_MAPPING_FLAGS */
|
||||
struct address_space *mapping;
|
||||
pgoff_t index; /* Our offset within mapping. */
|
||||
|
@ -241,7 +250,13 @@ struct folio {
|
|||
struct {
|
||||
/* public: */
|
||||
unsigned long flags;
|
||||
struct list_head lru;
|
||||
union {
|
||||
struct list_head lru;
|
||||
struct {
|
||||
void *__filler;
|
||||
unsigned int mlock_count;
|
||||
};
|
||||
};
|
||||
struct address_space *mapping;
|
||||
pgoff_t index;
|
||||
void *private;
|
||||
|
|
|
@ -2300,8 +2300,11 @@ static void lru_add_page_tail(struct page *head, struct page *tail,
|
|||
} else {
|
||||
/* head is still on lru (and we have it frozen) */
|
||||
VM_WARN_ON(!PageLRU(head));
|
||||
if (PageUnevictable(tail))
|
||||
tail->mlock_count = 0;
|
||||
else
|
||||
list_add_tail(&tail->lru, &head->lru);
|
||||
SetPageLRU(tail);
|
||||
list_add_tail(&tail->lru, &head->lru);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1257,8 +1257,7 @@ struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
|
|||
* @nr_pages: positive when adding or negative when removing
|
||||
*
|
||||
* This function must be called under lru_lock, just before a page is added
|
||||
* to or just after a page is removed from an lru list (that ordering being
|
||||
* so as to allow it to check that lru_size 0 is consistent with list_empty).
|
||||
* to or just after a page is removed from an lru list.
|
||||
*/
|
||||
void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
|
||||
int zid, int nr_pages)
|
||||
|
|
70
mm/mlock.c
70
mm/mlock.c
|
@ -54,16 +54,35 @@ EXPORT_SYMBOL(can_do_mlock);
|
|||
*/
|
||||
void mlock_page(struct page *page)
|
||||
{
|
||||
struct lruvec *lruvec;
|
||||
int nr_pages = thp_nr_pages(page);
|
||||
|
||||
VM_BUG_ON_PAGE(PageTail(page), page);
|
||||
|
||||
if (!TestSetPageMlocked(page)) {
|
||||
int nr_pages = thp_nr_pages(page);
|
||||
|
||||
mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
|
||||
count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
|
||||
if (!isolate_lru_page(page))
|
||||
putback_lru_page(page);
|
||||
__count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
|
||||
}
|
||||
|
||||
/* There is nothing more we can do while it's off LRU */
|
||||
if (!TestClearPageLRU(page))
|
||||
return;
|
||||
|
||||
lruvec = folio_lruvec_lock_irq(page_folio(page));
|
||||
if (PageUnevictable(page)) {
|
||||
page->mlock_count++;
|
||||
goto out;
|
||||
}
|
||||
|
||||
del_page_from_lru_list(page, lruvec);
|
||||
ClearPageActive(page);
|
||||
SetPageUnevictable(page);
|
||||
page->mlock_count = 1;
|
||||
add_page_to_lru_list(page, lruvec);
|
||||
__count_vm_events(UNEVICTABLE_PGCULLED, nr_pages);
|
||||
out:
|
||||
SetPageLRU(page);
|
||||
unlock_page_lruvec_irq(lruvec);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -72,19 +91,40 @@ void mlock_page(struct page *page)
|
|||
*/
|
||||
void munlock_page(struct page *page)
|
||||
{
|
||||
struct lruvec *lruvec;
|
||||
int nr_pages = thp_nr_pages(page);
|
||||
|
||||
VM_BUG_ON_PAGE(PageTail(page), page);
|
||||
|
||||
if (TestClearPageMlocked(page)) {
|
||||
int nr_pages = thp_nr_pages(page);
|
||||
|
||||
mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
|
||||
if (!isolate_lru_page(page)) {
|
||||
putback_lru_page(page);
|
||||
count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages);
|
||||
} else if (PageUnevictable(page)) {
|
||||
count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages);
|
||||
}
|
||||
lock_page_memcg(page);
|
||||
lruvec = folio_lruvec_lock_irq(page_folio(page));
|
||||
if (PageLRU(page) && PageUnevictable(page)) {
|
||||
/* Then mlock_count is maintained, but might undercount */
|
||||
if (page->mlock_count)
|
||||
page->mlock_count--;
|
||||
if (page->mlock_count)
|
||||
goto out;
|
||||
}
|
||||
/* else assume that was the last mlock: reclaim will fix it if not */
|
||||
|
||||
if (TestClearPageMlocked(page)) {
|
||||
__mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
|
||||
if (PageLRU(page) || !PageUnevictable(page))
|
||||
__count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages);
|
||||
else
|
||||
__count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages);
|
||||
}
|
||||
|
||||
/* page_evictable() has to be checked *after* clearing Mlocked */
|
||||
if (PageLRU(page) && PageUnevictable(page) && page_evictable(page)) {
|
||||
del_page_from_lru_list(page, lruvec);
|
||||
ClearPageUnevictable(page);
|
||||
add_page_to_lru_list(page, lruvec);
|
||||
__count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages);
|
||||
}
|
||||
out:
|
||||
unlock_page_lruvec_irq(lruvec);
|
||||
unlock_page_memcg(page);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -81,6 +81,13 @@ void lruvec_init(struct lruvec *lruvec)
|
|||
|
||||
for_each_lru(lru)
|
||||
INIT_LIST_HEAD(&lruvec->lists[lru]);
|
||||
/*
|
||||
* The "Unevictable LRU" is imaginary: though its size is maintained,
|
||||
* it is never scanned, and unevictable pages are not threaded on it
|
||||
* (so that their lru fields can be reused to hold mlock_count).
|
||||
* Poison its list head, so that any operations on it would crash.
|
||||
*/
|
||||
list_del(&lruvec->lists[LRU_UNEVICTABLE]);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS)
|
||||
|
|
|
@ -1062,6 +1062,7 @@ static void __pagevec_lru_add_fn(struct folio *folio, struct lruvec *lruvec)
|
|||
} else {
|
||||
folio_clear_active(folio);
|
||||
folio_set_unevictable(folio);
|
||||
folio->mlock_count = !!folio_test_mlocked(folio);
|
||||
if (!was_unevictable)
|
||||
__count_vm_events(UNEVICTABLE_PGCULLED, nr_pages);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue