mirror of https://gitee.com/openkylin/linux.git
mm: replace vma prio_tree with an interval tree
Implement an interval tree as a replacement for the VMA prio_tree. The algorithms are similar to lib/interval_tree.c; however that code can't be directly reused as the interval endpoints are not explicitly stored in the VMA. So instead, the common algorithm is moved into a template and the details (node type, how to get interval endpoints from the node, etc) are filled in using the C preprocessor. Once the interval tree functions are available, using them as a replacement to the VMA prio tree is a relatively simple, mechanical job. Signed-off-by: Michel Lespinasse <walken@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Hillf Danton <dhillf@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: David Woodhouse <dwmw2@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
fff3fd8a12
commit
6b2dbba8b6
|
@ -134,7 +134,6 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
|
|||
{
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
struct vm_area_struct *mpnt;
|
||||
struct prio_tree_iter iter;
|
||||
unsigned long offset;
|
||||
pgoff_t pgoff;
|
||||
int aliases = 0;
|
||||
|
@ -147,7 +146,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
|
|||
* cache coherency.
|
||||
*/
|
||||
flush_dcache_mmap_lock(mapping);
|
||||
vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
|
||||
vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
|
||||
/*
|
||||
* If this VMA is not in our MM, we can ignore it.
|
||||
* Note that we intentionally mask out the VMA
|
||||
|
|
|
@ -196,7 +196,6 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
|
|||
{
|
||||
struct mm_struct *mm = current->active_mm;
|
||||
struct vm_area_struct *mpnt;
|
||||
struct prio_tree_iter iter;
|
||||
pgoff_t pgoff;
|
||||
|
||||
/*
|
||||
|
@ -208,7 +207,7 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
|
|||
pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
|
||||
|
||||
flush_dcache_mmap_lock(mapping);
|
||||
vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
|
||||
vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
|
||||
unsigned long offset;
|
||||
|
||||
/*
|
||||
|
|
|
@ -276,7 +276,6 @@ void flush_dcache_page(struct page *page)
|
|||
{
|
||||
struct address_space *mapping = page_mapping(page);
|
||||
struct vm_area_struct *mpnt;
|
||||
struct prio_tree_iter iter;
|
||||
unsigned long offset;
|
||||
unsigned long addr, old_addr = 0;
|
||||
pgoff_t pgoff;
|
||||
|
@ -299,7 +298,7 @@ void flush_dcache_page(struct page *page)
|
|||
* to flush one address here for them all to become coherent */
|
||||
|
||||
flush_dcache_mmap_lock(mapping);
|
||||
vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
|
||||
vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
|
||||
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
|
||||
addr = mpnt->vm_start + offset;
|
||||
|
||||
|
|
|
@ -71,7 +71,6 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
|
|||
struct address_space *mapping = vma->vm_file->f_mapping;
|
||||
pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
|
||||
vma->vm_pgoff;
|
||||
struct prio_tree_iter iter;
|
||||
struct vm_area_struct *svma;
|
||||
unsigned long saddr;
|
||||
pte_t *spte = NULL;
|
||||
|
@ -81,7 +80,7 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
|
|||
return (pte_t *)pmd_alloc(mm, pud, addr);
|
||||
|
||||
mutex_lock(&mapping->i_mmap_mutex);
|
||||
vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
|
||||
vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
|
||||
if (svma == vma)
|
||||
continue;
|
||||
|
||||
|
|
|
@ -397,17 +397,16 @@ static void hugetlbfs_evict_inode(struct inode *inode)
|
|||
}
|
||||
|
||||
static inline void
|
||||
hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
|
||||
hugetlb_vmtruncate_list(struct rb_root *root, pgoff_t pgoff)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
struct prio_tree_iter iter;
|
||||
|
||||
vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) {
|
||||
vma_interval_tree_foreach(vma, root, pgoff, ULONG_MAX) {
|
||||
unsigned long v_offset;
|
||||
|
||||
/*
|
||||
* Can the expression below overflow on 32-bit arches?
|
||||
* No, because the prio_tree returns us only those vmas
|
||||
* No, because the interval tree returns us only those vmas
|
||||
* which overlap the truncated area starting at pgoff,
|
||||
* and no vma on a 32-bit arch can span beyond the 4GB.
|
||||
*/
|
||||
|
@ -432,7 +431,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
|
|||
|
||||
i_size_write(inode, offset);
|
||||
mutex_lock(&mapping->i_mmap_mutex);
|
||||
if (!prio_tree_empty(&mapping->i_mmap))
|
||||
if (!RB_EMPTY_ROOT(&mapping->i_mmap))
|
||||
hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
|
||||
mutex_unlock(&mapping->i_mmap_mutex);
|
||||
truncate_hugepages(inode, offset);
|
||||
|
|
|
@ -348,7 +348,7 @@ void address_space_init_once(struct address_space *mapping)
|
|||
mutex_init(&mapping->i_mmap_mutex);
|
||||
INIT_LIST_HEAD(&mapping->private_list);
|
||||
spin_lock_init(&mapping->private_lock);
|
||||
INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
|
||||
mapping->i_mmap = RB_ROOT;
|
||||
INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
|
||||
}
|
||||
EXPORT_SYMBOL(address_space_init_once);
|
||||
|
|
|
@ -401,7 +401,7 @@ struct inodes_stat_t {
|
|||
#include <linux/cache.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/radix-tree.h>
|
||||
#include <linux/prio_tree.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/pid.h>
|
||||
#include <linux/bug.h>
|
||||
|
@ -669,7 +669,7 @@ struct address_space {
|
|||
struct radix_tree_root page_tree; /* radix tree of all pages */
|
||||
spinlock_t tree_lock; /* and lock protecting it */
|
||||
unsigned int i_mmap_writable;/* count VM_SHARED mappings */
|
||||
struct prio_tree_root i_mmap; /* tree of private and shared mappings */
|
||||
struct rb_root i_mmap; /* tree of private and shared mappings */
|
||||
struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
|
||||
struct mutex i_mmap_mutex; /* protect tree, count, list */
|
||||
/* Protected by tree_lock together with the radix tree */
|
||||
|
@ -741,7 +741,7 @@ int mapping_tagged(struct address_space *mapping, int tag);
|
|||
*/
|
||||
static inline int mapping_mapped(struct address_space *mapping)
|
||||
{
|
||||
return !prio_tree_empty(&mapping->i_mmap) ||
|
||||
return !RB_EMPTY_ROOT(&mapping->i_mmap) ||
|
||||
!list_empty(&mapping->i_mmap_nonlinear);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,215 @@
|
|||
/*
|
||||
Interval Trees
|
||||
(C) 2012 Michel Lespinasse <walken@google.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
include/linux/interval_tree_tmpl.h
|
||||
*/
|
||||
|
||||
/*
|
||||
* Template for implementing interval trees
|
||||
*
|
||||
* ITSTRUCT: struct type of the interval tree nodes
|
||||
* ITRB: name of struct rb_node field within ITSTRUCT
|
||||
* ITTYPE: type of the interval endpoints
|
||||
* ITSUBTREE: name of ITTYPE field within ITSTRUCT holding last-in-subtree
|
||||
* ITSTART(n): start endpoint of ITSTRUCT node n
|
||||
* ITLAST(n): last endpoing of ITSTRUCT node n
|
||||
* ITSTATIC: 'static' or empty
|
||||
* ITPREFIX: prefix to use for the inline tree definitions
|
||||
*/
|
||||
|
||||
/* IT(name) -> ITPREFIX_name */
|
||||
#define _ITNAME(prefix, name) prefix ## _ ## name
|
||||
#define ITNAME(prefix, name) _ITNAME(prefix, name)
|
||||
#define IT(name) ITNAME(ITPREFIX, name)
|
||||
|
||||
/* Callbacks for augmented rbtree insert and remove */
|
||||
|
||||
static inline ITTYPE IT(compute_subtree_last)(ITSTRUCT *node)
|
||||
{
|
||||
ITTYPE max = ITLAST(node), subtree_last;
|
||||
if (node->ITRB.rb_left) {
|
||||
subtree_last = rb_entry(node->ITRB.rb_left,
|
||||
ITSTRUCT, ITRB)->ITSUBTREE;
|
||||
if (max < subtree_last)
|
||||
max = subtree_last;
|
||||
}
|
||||
if (node->ITRB.rb_right) {
|
||||
subtree_last = rb_entry(node->ITRB.rb_right,
|
||||
ITSTRUCT, ITRB)->ITSUBTREE;
|
||||
if (max < subtree_last)
|
||||
max = subtree_last;
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
static void IT(augment_propagate)(struct rb_node *rb, struct rb_node *stop)
|
||||
{
|
||||
while (rb != stop) {
|
||||
ITSTRUCT *node = rb_entry(rb, ITSTRUCT, ITRB);
|
||||
ITTYPE subtree_last = IT(compute_subtree_last)(node);
|
||||
if (node->ITSUBTREE == subtree_last)
|
||||
break;
|
||||
node->ITSUBTREE = subtree_last;
|
||||
rb = rb_parent(&node->ITRB);
|
||||
}
|
||||
}
|
||||
|
||||
static void IT(augment_copy)(struct rb_node *rb_old, struct rb_node *rb_new)
|
||||
{
|
||||
ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB);
|
||||
ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB);
|
||||
|
||||
new->ITSUBTREE = old->ITSUBTREE;
|
||||
}
|
||||
|
||||
static void IT(augment_rotate)(struct rb_node *rb_old, struct rb_node *rb_new)
|
||||
{
|
||||
ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB);
|
||||
ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB);
|
||||
|
||||
new->ITSUBTREE = old->ITSUBTREE;
|
||||
old->ITSUBTREE = IT(compute_subtree_last)(old);
|
||||
}
|
||||
|
||||
static const struct rb_augment_callbacks IT(augment_callbacks) = {
|
||||
IT(augment_propagate), IT(augment_copy), IT(augment_rotate)
|
||||
};
|
||||
|
||||
/* Insert / remove interval nodes from the tree */
|
||||
|
||||
ITSTATIC void IT(insert)(ITSTRUCT *node, struct rb_root *root)
|
||||
{
|
||||
struct rb_node **link = &root->rb_node, *rb_parent = NULL;
|
||||
ITTYPE start = ITSTART(node), last = ITLAST(node);
|
||||
ITSTRUCT *parent;
|
||||
|
||||
while (*link) {
|
||||
rb_parent = *link;
|
||||
parent = rb_entry(rb_parent, ITSTRUCT, ITRB);
|
||||
if (parent->ITSUBTREE < last)
|
||||
parent->ITSUBTREE = last;
|
||||
if (start < ITSTART(parent))
|
||||
link = &parent->ITRB.rb_left;
|
||||
else
|
||||
link = &parent->ITRB.rb_right;
|
||||
}
|
||||
|
||||
node->ITSUBTREE = last;
|
||||
rb_link_node(&node->ITRB, rb_parent, link);
|
||||
rb_insert_augmented(&node->ITRB, root, &IT(augment_callbacks));
|
||||
}
|
||||
|
||||
ITSTATIC void IT(remove)(ITSTRUCT *node, struct rb_root *root)
|
||||
{
|
||||
rb_erase_augmented(&node->ITRB, root, &IT(augment_callbacks));
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate over intervals intersecting [start;last]
|
||||
*
|
||||
* Note that a node's interval intersects [start;last] iff:
|
||||
* Cond1: ITSTART(node) <= last
|
||||
* and
|
||||
* Cond2: start <= ITLAST(node)
|
||||
*/
|
||||
|
||||
static ITSTRUCT *IT(subtree_search)(ITSTRUCT *node, ITTYPE start, ITTYPE last)
|
||||
{
|
||||
while (true) {
|
||||
/*
|
||||
* Loop invariant: start <= node->ITSUBTREE
|
||||
* (Cond2 is satisfied by one of the subtree nodes)
|
||||
*/
|
||||
if (node->ITRB.rb_left) {
|
||||
ITSTRUCT *left = rb_entry(node->ITRB.rb_left,
|
||||
ITSTRUCT, ITRB);
|
||||
if (start <= left->ITSUBTREE) {
|
||||
/*
|
||||
* Some nodes in left subtree satisfy Cond2.
|
||||
* Iterate to find the leftmost such node N.
|
||||
* If it also satisfies Cond1, that's the match
|
||||
* we are looking for. Otherwise, there is no
|
||||
* matching interval as nodes to the right of N
|
||||
* can't satisfy Cond1 either.
|
||||
*/
|
||||
node = left;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (ITSTART(node) <= last) { /* Cond1 */
|
||||
if (start <= ITLAST(node)) /* Cond2 */
|
||||
return node; /* node is leftmost match */
|
||||
if (node->ITRB.rb_right) {
|
||||
node = rb_entry(node->ITRB.rb_right,
|
||||
ITSTRUCT, ITRB);
|
||||
if (start <= node->ITSUBTREE)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return NULL; /* No match */
|
||||
}
|
||||
}
|
||||
|
||||
ITSTATIC ITSTRUCT *IT(iter_first)(struct rb_root *root,
|
||||
ITTYPE start, ITTYPE last)
|
||||
{
|
||||
ITSTRUCT *node;
|
||||
|
||||
if (!root->rb_node)
|
||||
return NULL;
|
||||
node = rb_entry(root->rb_node, ITSTRUCT, ITRB);
|
||||
if (node->ITSUBTREE < start)
|
||||
return NULL;
|
||||
return IT(subtree_search)(node, start, last);
|
||||
}
|
||||
|
||||
ITSTATIC ITSTRUCT *IT(iter_next)(ITSTRUCT *node, ITTYPE start, ITTYPE last)
|
||||
{
|
||||
struct rb_node *rb = node->ITRB.rb_right, *prev;
|
||||
|
||||
while (true) {
|
||||
/*
|
||||
* Loop invariants:
|
||||
* Cond1: ITSTART(node) <= last
|
||||
* rb == node->ITRB.rb_right
|
||||
*
|
||||
* First, search right subtree if suitable
|
||||
*/
|
||||
if (rb) {
|
||||
ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB);
|
||||
if (start <= right->ITSUBTREE)
|
||||
return IT(subtree_search)(right, start, last);
|
||||
}
|
||||
|
||||
/* Move up the tree until we come from a node's left child */
|
||||
do {
|
||||
rb = rb_parent(&node->ITRB);
|
||||
if (!rb)
|
||||
return NULL;
|
||||
prev = &node->ITRB;
|
||||
node = rb_entry(rb, ITSTRUCT, ITRB);
|
||||
rb = node->ITRB.rb_right;
|
||||
} while (prev == rb);
|
||||
|
||||
/* Check if the node intersects [start;last] */
|
||||
if (last < ITSTART(node)) /* !Cond1 */
|
||||
return NULL;
|
||||
else if (start <= ITLAST(node)) /* Cond2 */
|
||||
return node;
|
||||
}
|
||||
}
|
|
@ -10,7 +10,6 @@
|
|||
#include <linux/list.h>
|
||||
#include <linux/mmzone.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/prio_tree.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/debug_locks.h>
|
||||
#include <linux/mm_types.h>
|
||||
|
@ -1355,22 +1354,27 @@ extern void zone_pcp_reset(struct zone *zone);
|
|||
extern atomic_long_t mmap_pages_allocated;
|
||||
extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
|
||||
|
||||
/* prio_tree.c */
|
||||
void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
|
||||
void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
|
||||
void vma_prio_tree_remove(struct vm_area_struct *, struct prio_tree_root *);
|
||||
struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma,
|
||||
struct prio_tree_iter *iter);
|
||||
/* interval_tree.c */
|
||||
void vma_interval_tree_add(struct vm_area_struct *vma,
|
||||
struct vm_area_struct *old,
|
||||
struct address_space *mapping);
|
||||
void vma_interval_tree_insert(struct vm_area_struct *node,
|
||||
struct rb_root *root);
|
||||
void vma_interval_tree_remove(struct vm_area_struct *node,
|
||||
struct rb_root *root);
|
||||
struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root,
|
||||
unsigned long start, unsigned long last);
|
||||
struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
|
||||
unsigned long start, unsigned long last);
|
||||
|
||||
#define vma_prio_tree_foreach(vma, iter, root, begin, end) \
|
||||
for (prio_tree_iter_init(iter, root, begin, end), vma = NULL; \
|
||||
(vma = vma_prio_tree_next(vma, iter)); )
|
||||
#define vma_interval_tree_foreach(vma, root, start, last) \
|
||||
for (vma = vma_interval_tree_iter_first(root, start, last); \
|
||||
vma; vma = vma_interval_tree_iter_next(vma, start, last))
|
||||
|
||||
static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
|
||||
struct list_head *list)
|
||||
{
|
||||
vma->shared.vm_set.parent = NULL;
|
||||
list_add_tail(&vma->shared.vm_set.list, list);
|
||||
list_add_tail(&vma->shared.nonlinear, list);
|
||||
}
|
||||
|
||||
/* mmap.c */
|
||||
|
|
|
@ -6,7 +6,6 @@
|
|||
#include <linux/threads.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/prio_tree.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/completion.h>
|
||||
|
@ -240,18 +239,15 @@ struct vm_area_struct {
|
|||
|
||||
/*
|
||||
* For areas with an address space and backing store,
|
||||
* linkage into the address_space->i_mmap prio tree, or
|
||||
* linkage to the list of like vmas hanging off its node, or
|
||||
* linkage into the address_space->i_mmap interval tree, or
|
||||
* linkage of vma in the address_space->i_mmap_nonlinear list.
|
||||
*/
|
||||
union {
|
||||
struct {
|
||||
struct list_head list;
|
||||
void *parent; /* aligns with prio_tree_node parent */
|
||||
struct vm_area_struct *head;
|
||||
} vm_set;
|
||||
|
||||
struct raw_prio_tree_node prio_tree_node;
|
||||
struct rb_node rb;
|
||||
unsigned long rb_subtree_last;
|
||||
} linear;
|
||||
struct list_head nonlinear;
|
||||
} shared;
|
||||
|
||||
/*
|
||||
|
|
|
@ -735,7 +735,6 @@ static struct map_info *
|
|||
build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
|
||||
{
|
||||
unsigned long pgoff = offset >> PAGE_SHIFT;
|
||||
struct prio_tree_iter iter;
|
||||
struct vm_area_struct *vma;
|
||||
struct map_info *curr = NULL;
|
||||
struct map_info *prev = NULL;
|
||||
|
@ -744,7 +743,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
|
|||
|
||||
again:
|
||||
mutex_lock(&mapping->i_mmap_mutex);
|
||||
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
|
||||
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
|
||||
if (!valid_vma(vma, is_register))
|
||||
continue;
|
||||
|
||||
|
|
|
@ -423,7 +423,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
|
|||
mapping->i_mmap_writable++;
|
||||
flush_dcache_mmap_lock(mapping);
|
||||
/* insert tmp into the share list, just after mpnt */
|
||||
vma_prio_tree_add(tmp, mpnt);
|
||||
vma_interval_tree_add(tmp, mpnt, mapping);
|
||||
flush_dcache_mmap_unlock(mapping);
|
||||
mutex_unlock(&mapping->i_mmap_mutex);
|
||||
}
|
||||
|
|
|
@ -1,159 +1,13 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/interval_tree.h>
|
||||
|
||||
/* Callbacks for augmented rbtree insert and remove */
|
||||
#define ITSTRUCT struct interval_tree_node
|
||||
#define ITRB rb
|
||||
#define ITTYPE unsigned long
|
||||
#define ITSUBTREE __subtree_last
|
||||
#define ITSTART(n) ((n)->start)
|
||||
#define ITLAST(n) ((n)->last)
|
||||
#define ITSTATIC
|
||||
#define ITPREFIX interval_tree
|
||||
|
||||
static inline unsigned long
|
||||
compute_subtree_last(struct interval_tree_node *node)
|
||||
{
|
||||
unsigned long max = node->last, subtree_last;
|
||||
if (node->rb.rb_left) {
|
||||
subtree_last = rb_entry(node->rb.rb_left,
|
||||
struct interval_tree_node, rb)->__subtree_last;
|
||||
if (max < subtree_last)
|
||||
max = subtree_last;
|
||||
}
|
||||
if (node->rb.rb_right) {
|
||||
subtree_last = rb_entry(node->rb.rb_right,
|
||||
struct interval_tree_node, rb)->__subtree_last;
|
||||
if (max < subtree_last)
|
||||
max = subtree_last;
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
RB_DECLARE_CALLBACKS(static, augment_callbacks, struct interval_tree_node, rb,
|
||||
unsigned long, __subtree_last, compute_subtree_last)
|
||||
|
||||
/* Insert / remove interval nodes from the tree */
|
||||
|
||||
void interval_tree_insert(struct interval_tree_node *node,
|
||||
struct rb_root *root)
|
||||
{
|
||||
struct rb_node **link = &root->rb_node, *rb_parent = NULL;
|
||||
unsigned long start = node->start, last = node->last;
|
||||
struct interval_tree_node *parent;
|
||||
|
||||
while (*link) {
|
||||
rb_parent = *link;
|
||||
parent = rb_entry(rb_parent, struct interval_tree_node, rb);
|
||||
if (parent->__subtree_last < last)
|
||||
parent->__subtree_last = last;
|
||||
if (start < parent->start)
|
||||
link = &parent->rb.rb_left;
|
||||
else
|
||||
link = &parent->rb.rb_right;
|
||||
}
|
||||
|
||||
node->__subtree_last = last;
|
||||
rb_link_node(&node->rb, rb_parent, link);
|
||||
rb_insert_augmented(&node->rb, root, &augment_callbacks);
|
||||
}
|
||||
|
||||
void interval_tree_remove(struct interval_tree_node *node,
|
||||
struct rb_root *root)
|
||||
{
|
||||
rb_erase_augmented(&node->rb, root, &augment_callbacks);
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate over intervals intersecting [start;last]
|
||||
*
|
||||
* Note that a node's interval intersects [start;last] iff:
|
||||
* Cond1: node->start <= last
|
||||
* and
|
||||
* Cond2: start <= node->last
|
||||
*/
|
||||
|
||||
static struct interval_tree_node *
|
||||
subtree_search(struct interval_tree_node *node,
|
||||
unsigned long start, unsigned long last)
|
||||
{
|
||||
while (true) {
|
||||
/*
|
||||
* Loop invariant: start <= node->__subtree_last
|
||||
* (Cond2 is satisfied by one of the subtree nodes)
|
||||
*/
|
||||
if (node->rb.rb_left) {
|
||||
struct interval_tree_node *left =
|
||||
rb_entry(node->rb.rb_left,
|
||||
struct interval_tree_node, rb);
|
||||
if (start <= left->__subtree_last) {
|
||||
/*
|
||||
* Some nodes in left subtree satisfy Cond2.
|
||||
* Iterate to find the leftmost such node N.
|
||||
* If it also satisfies Cond1, that's the match
|
||||
* we are looking for. Otherwise, there is no
|
||||
* matching interval as nodes to the right of N
|
||||
* can't satisfy Cond1 either.
|
||||
*/
|
||||
node = left;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (node->start <= last) { /* Cond1 */
|
||||
if (start <= node->last) /* Cond2 */
|
||||
return node; /* node is leftmost match */
|
||||
if (node->rb.rb_right) {
|
||||
node = rb_entry(node->rb.rb_right,
|
||||
struct interval_tree_node, rb);
|
||||
if (start <= node->__subtree_last)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return NULL; /* No match */
|
||||
}
|
||||
}
|
||||
|
||||
struct interval_tree_node *
|
||||
interval_tree_iter_first(struct rb_root *root,
|
||||
unsigned long start, unsigned long last)
|
||||
{
|
||||
struct interval_tree_node *node;
|
||||
|
||||
if (!root->rb_node)
|
||||
return NULL;
|
||||
node = rb_entry(root->rb_node, struct interval_tree_node, rb);
|
||||
if (node->__subtree_last < start)
|
||||
return NULL;
|
||||
return subtree_search(node, start, last);
|
||||
}
|
||||
|
||||
struct interval_tree_node *
|
||||
interval_tree_iter_next(struct interval_tree_node *node,
|
||||
unsigned long start, unsigned long last)
|
||||
{
|
||||
struct rb_node *rb = node->rb.rb_right, *prev;
|
||||
|
||||
while (true) {
|
||||
/*
|
||||
* Loop invariants:
|
||||
* Cond1: node->start <= last
|
||||
* rb == node->rb.rb_right
|
||||
*
|
||||
* First, search right subtree if suitable
|
||||
*/
|
||||
if (rb) {
|
||||
struct interval_tree_node *right =
|
||||
rb_entry(rb, struct interval_tree_node, rb);
|
||||
if (start <= right->__subtree_last)
|
||||
return subtree_search(right, start, last);
|
||||
}
|
||||
|
||||
/* Move up the tree until we come from a node's left child */
|
||||
do {
|
||||
rb = rb_parent(&node->rb);
|
||||
if (!rb)
|
||||
return NULL;
|
||||
prev = &node->rb;
|
||||
node = rb_entry(rb, struct interval_tree_node, rb);
|
||||
rb = node->rb.rb_right;
|
||||
} while (prev == rb);
|
||||
|
||||
/* Check if the node intersects [start;last] */
|
||||
if (last < node->start) /* !Cond1 */
|
||||
return NULL;
|
||||
else if (start <= node->last) /* Cond2 */
|
||||
return node;
|
||||
}
|
||||
}
|
||||
#include <linux/interval_tree_tmpl.h>
|
||||
|
|
|
@ -44,28 +44,13 @@
|
|||
* The following macros are used for implementing prio_tree for i_mmap
|
||||
*/
|
||||
|
||||
#define RADIX_INDEX(vma) ((vma)->vm_pgoff)
|
||||
#define VMA_SIZE(vma) (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT)
|
||||
/* avoid overflow */
|
||||
#define HEAP_INDEX(vma) ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1))
|
||||
|
||||
|
||||
static void get_index(const struct prio_tree_root *root,
|
||||
const struct prio_tree_node *node,
|
||||
unsigned long *radix, unsigned long *heap)
|
||||
{
|
||||
if (root->raw) {
|
||||
struct vm_area_struct *vma = prio_tree_entry(
|
||||
node, struct vm_area_struct, shared.prio_tree_node);
|
||||
|
||||
*radix = RADIX_INDEX(vma);
|
||||
*heap = HEAP_INDEX(vma);
|
||||
}
|
||||
else {
|
||||
*radix = node->start;
|
||||
*heap = node->last;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long index_bits_to_maxindex[BITS_PER_LONG];
|
||||
|
||||
|
|
|
@ -14,9 +14,9 @@ endif
|
|||
obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
|
||||
maccess.o page_alloc.o page-writeback.o \
|
||||
readahead.o swap.o truncate.o vmscan.o shmem.o \
|
||||
prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
|
||||
util.o mmzone.o vmstat.o backing-dev.o \
|
||||
mm_init.o mmu_context.o percpu.o slab_common.o \
|
||||
compaction.o $(mmu-y)
|
||||
compaction.o interval_tree.o $(mmu-y)
|
||||
|
||||
obj-y += init-mm.o
|
||||
|
||||
|
|
|
@ -167,7 +167,6 @@ __xip_unmap (struct address_space * mapping,
|
|||
{
|
||||
struct vm_area_struct *vma;
|
||||
struct mm_struct *mm;
|
||||
struct prio_tree_iter iter;
|
||||
unsigned long address;
|
||||
pte_t *pte;
|
||||
pte_t pteval;
|
||||
|
@ -184,7 +183,7 @@ __xip_unmap (struct address_space * mapping,
|
|||
|
||||
retry:
|
||||
mutex_lock(&mapping->i_mmap_mutex);
|
||||
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
|
||||
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
|
||||
mm = vma->vm_mm;
|
||||
address = vma->vm_start +
|
||||
((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
|
||||
|
|
|
@ -214,7 +214,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
|
|||
mutex_lock(&mapping->i_mmap_mutex);
|
||||
flush_dcache_mmap_lock(mapping);
|
||||
vma->vm_flags |= VM_NONLINEAR;
|
||||
vma_prio_tree_remove(vma, &mapping->i_mmap);
|
||||
vma_interval_tree_remove(vma, &mapping->i_mmap);
|
||||
vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
|
||||
flush_dcache_mmap_unlock(mapping);
|
||||
mutex_unlock(&mapping->i_mmap_mutex);
|
||||
|
|
|
@ -2474,7 +2474,6 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||
struct hstate *h = hstate_vma(vma);
|
||||
struct vm_area_struct *iter_vma;
|
||||
struct address_space *mapping;
|
||||
struct prio_tree_iter iter;
|
||||
pgoff_t pgoff;
|
||||
|
||||
/*
|
||||
|
@ -2491,7 +2490,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||
* __unmap_hugepage_range() is called as the lock is already held
|
||||
*/
|
||||
mutex_lock(&mapping->i_mmap_mutex);
|
||||
vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
|
||||
vma_interval_tree_foreach(iter_vma, &mapping->i_mmap, pgoff, pgoff) {
|
||||
/* Do not unmap the current VMA */
|
||||
if (iter_vma == vma)
|
||||
continue;
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* mm/interval_tree.c - interval tree for mapping->i_mmap
|
||||
*
|
||||
* Copyright (C) 2012, Michel Lespinasse <walken@google.com>
|
||||
*
|
||||
* This file is released under the GPL v2.
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
#define ITSTRUCT struct vm_area_struct
|
||||
#define ITRB shared.linear.rb
|
||||
#define ITTYPE unsigned long
|
||||
#define ITSUBTREE shared.linear.rb_subtree_last
|
||||
#define ITSTART(n) ((n)->vm_pgoff)
|
||||
#define ITLAST(n) ((n)->vm_pgoff + \
|
||||
(((n)->vm_end - (n)->vm_start) >> PAGE_SHIFT) - 1)
|
||||
#define ITSTATIC
|
||||
#define ITPREFIX vma_interval_tree
|
||||
|
||||
#include <linux/interval_tree_tmpl.h>
|
||||
|
||||
/* Insert old immediately after vma in the interval tree */
|
||||
void vma_interval_tree_add(struct vm_area_struct *vma,
|
||||
struct vm_area_struct *old,
|
||||
struct address_space *mapping)
|
||||
{
|
||||
struct rb_node **link;
|
||||
struct vm_area_struct *parent;
|
||||
unsigned long last;
|
||||
|
||||
if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
|
||||
list_add(&vma->shared.nonlinear, &old->shared.nonlinear);
|
||||
return;
|
||||
}
|
||||
|
||||
last = ITLAST(vma);
|
||||
|
||||
if (!old->shared.linear.rb.rb_right) {
|
||||
parent = old;
|
||||
link = &old->shared.linear.rb.rb_right;
|
||||
} else {
|
||||
parent = rb_entry(old->shared.linear.rb.rb_right,
|
||||
struct vm_area_struct, shared.linear.rb);
|
||||
if (parent->shared.linear.rb_subtree_last < last)
|
||||
parent->shared.linear.rb_subtree_last = last;
|
||||
while (parent->shared.linear.rb.rb_left) {
|
||||
parent = rb_entry(parent->shared.linear.rb.rb_left,
|
||||
struct vm_area_struct, shared.linear.rb);
|
||||
if (parent->shared.linear.rb_subtree_last < last)
|
||||
parent->shared.linear.rb_subtree_last = last;
|
||||
}
|
||||
link = &parent->shared.linear.rb.rb_left;
|
||||
}
|
||||
|
||||
vma->shared.linear.rb_subtree_last = last;
|
||||
rb_link_node(&vma->shared.linear.rb, &parent->shared.linear.rb, link);
|
||||
rb_insert_augmented(&vma->shared.linear.rb, &mapping->i_mmap,
|
||||
&vma_interval_tree_augment_callbacks);
|
||||
}
|
|
@ -431,7 +431,6 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
|
|||
{
|
||||
struct vm_area_struct *vma;
|
||||
struct task_struct *tsk;
|
||||
struct prio_tree_iter iter;
|
||||
struct address_space *mapping = page->mapping;
|
||||
|
||||
mutex_lock(&mapping->i_mmap_mutex);
|
||||
|
@ -442,7 +441,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
|
|||
if (!task_early_kill(tsk))
|
||||
continue;
|
||||
|
||||
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff,
|
||||
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff,
|
||||
pgoff) {
|
||||
/*
|
||||
* Send early kill signal to tasks where a vma covers
|
||||
|
|
|
@ -2801,14 +2801,13 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma,
|
|||
zap_page_range_single(vma, start_addr, end_addr - start_addr, details);
|
||||
}
|
||||
|
||||
static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
|
||||
static inline void unmap_mapping_range_tree(struct rb_root *root,
|
||||
struct zap_details *details)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
struct prio_tree_iter iter;
|
||||
pgoff_t vba, vea, zba, zea;
|
||||
|
||||
vma_prio_tree_foreach(vma, &iter, root,
|
||||
vma_interval_tree_foreach(vma, root,
|
||||
details->first_index, details->last_index) {
|
||||
|
||||
vba = vma->vm_pgoff;
|
||||
|
@ -2839,7 +2838,7 @@ static inline void unmap_mapping_range_list(struct list_head *head,
|
|||
* across *all* the pages in each nonlinear VMA, not just the pages
|
||||
* whose virtual address lies outside the file truncation point.
|
||||
*/
|
||||
list_for_each_entry(vma, head, shared.vm_set.list) {
|
||||
list_for_each_entry(vma, head, shared.nonlinear) {
|
||||
details->nonlinear_vma = vma;
|
||||
unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details);
|
||||
}
|
||||
|
@ -2883,7 +2882,7 @@ void unmap_mapping_range(struct address_space *mapping,
|
|||
|
||||
|
||||
mutex_lock(&mapping->i_mmap_mutex);
|
||||
if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
|
||||
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
|
||||
unmap_mapping_range_tree(&mapping->i_mmap, &details);
|
||||
if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
|
||||
unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
|
||||
|
|
22
mm/mmap.c
22
mm/mmap.c
|
@ -199,14 +199,14 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
|
|||
|
||||
flush_dcache_mmap_lock(mapping);
|
||||
if (unlikely(vma->vm_flags & VM_NONLINEAR))
|
||||
list_del_init(&vma->shared.vm_set.list);
|
||||
list_del_init(&vma->shared.nonlinear);
|
||||
else
|
||||
vma_prio_tree_remove(vma, &mapping->i_mmap);
|
||||
vma_interval_tree_remove(vma, &mapping->i_mmap);
|
||||
flush_dcache_mmap_unlock(mapping);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlink a file-based vm structure from its prio_tree, to hide
|
||||
* Unlink a file-based vm structure from its interval tree, to hide
|
||||
* vma from rmap and vmtruncate before freeing its page tables.
|
||||
*/
|
||||
void unlink_file_vma(struct vm_area_struct *vma)
|
||||
|
@ -411,7 +411,7 @@ static void __vma_link_file(struct vm_area_struct *vma)
|
|||
if (unlikely(vma->vm_flags & VM_NONLINEAR))
|
||||
vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
|
||||
else
|
||||
vma_prio_tree_insert(vma, &mapping->i_mmap);
|
||||
vma_interval_tree_insert(vma, &mapping->i_mmap);
|
||||
flush_dcache_mmap_unlock(mapping);
|
||||
}
|
||||
}
|
||||
|
@ -449,7 +449,7 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||
|
||||
/*
|
||||
* Helper for vma_adjust() in the split_vma insert case: insert a vma into the
|
||||
* mm's list and rbtree. It has already been inserted into the prio_tree.
|
||||
* mm's list and rbtree. It has already been inserted into the interval tree.
|
||||
*/
|
||||
static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
|
||||
{
|
||||
|
@ -491,7 +491,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
|
|||
struct vm_area_struct *next = vma->vm_next;
|
||||
struct vm_area_struct *importer = NULL;
|
||||
struct address_space *mapping = NULL;
|
||||
struct prio_tree_root *root = NULL;
|
||||
struct rb_root *root = NULL;
|
||||
struct anon_vma *anon_vma = NULL;
|
||||
struct file *file = vma->vm_file;
|
||||
long adjust_next = 0;
|
||||
|
@ -554,7 +554,7 @@ again: remove_next = 1 + (end > next->vm_end);
|
|||
mutex_lock(&mapping->i_mmap_mutex);
|
||||
if (insert) {
|
||||
/*
|
||||
* Put into prio_tree now, so instantiated pages
|
||||
* Put into interval tree now, so instantiated pages
|
||||
* are visible to arm/parisc __flush_dcache_page
|
||||
* throughout; but we cannot insert into address
|
||||
* space until vma start or end is updated.
|
||||
|
@ -582,9 +582,9 @@ again: remove_next = 1 + (end > next->vm_end);
|
|||
|
||||
if (root) {
|
||||
flush_dcache_mmap_lock(mapping);
|
||||
vma_prio_tree_remove(vma, root);
|
||||
vma_interval_tree_remove(vma, root);
|
||||
if (adjust_next)
|
||||
vma_prio_tree_remove(next, root);
|
||||
vma_interval_tree_remove(next, root);
|
||||
}
|
||||
|
||||
vma->vm_start = start;
|
||||
|
@ -597,8 +597,8 @@ again: remove_next = 1 + (end > next->vm_end);
|
|||
|
||||
if (root) {
|
||||
if (adjust_next)
|
||||
vma_prio_tree_insert(next, root);
|
||||
vma_prio_tree_insert(vma, root);
|
||||
vma_interval_tree_insert(next, root);
|
||||
vma_interval_tree_insert(vma, root);
|
||||
flush_dcache_mmap_unlock(mapping);
|
||||
}
|
||||
|
||||
|
|
10
mm/nommu.c
10
mm/nommu.c
|
@ -698,7 +698,7 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
|
|||
|
||||
mutex_lock(&mapping->i_mmap_mutex);
|
||||
flush_dcache_mmap_lock(mapping);
|
||||
vma_prio_tree_insert(vma, &mapping->i_mmap);
|
||||
vma_interval_tree_insert(vma, &mapping->i_mmap);
|
||||
flush_dcache_mmap_unlock(mapping);
|
||||
mutex_unlock(&mapping->i_mmap_mutex);
|
||||
}
|
||||
|
@ -764,7 +764,7 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
|
|||
|
||||
mutex_lock(&mapping->i_mmap_mutex);
|
||||
flush_dcache_mmap_lock(mapping);
|
||||
vma_prio_tree_remove(vma, &mapping->i_mmap);
|
||||
vma_interval_tree_remove(vma, &mapping->i_mmap);
|
||||
flush_dcache_mmap_unlock(mapping);
|
||||
mutex_unlock(&mapping->i_mmap_mutex);
|
||||
}
|
||||
|
@ -2044,7 +2044,6 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
|
|||
size_t newsize)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
struct prio_tree_iter iter;
|
||||
struct vm_region *region;
|
||||
pgoff_t low, high;
|
||||
size_t r_size, r_top;
|
||||
|
@ -2056,8 +2055,7 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
|
|||
mutex_lock(&inode->i_mapping->i_mmap_mutex);
|
||||
|
||||
/* search for VMAs that fall within the dead zone */
|
||||
vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
|
||||
low, high) {
|
||||
vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, low, high) {
|
||||
/* found one - only interested if it's shared out of the page
|
||||
* cache */
|
||||
if (vma->vm_flags & VM_SHARED) {
|
||||
|
@ -2073,7 +2071,7 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
|
|||
* we don't check for any regions that start beyond the EOF as there
|
||||
* shouldn't be any
|
||||
*/
|
||||
vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
|
||||
vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap,
|
||||
0, ULONG_MAX) {
|
||||
if (!(vma->vm_flags & VM_SHARED))
|
||||
continue;
|
||||
|
|
208
mm/prio_tree.c
208
mm/prio_tree.c
|
@ -1,208 +0,0 @@
|
|||
/*
|
||||
* mm/prio_tree.c - priority search tree for mapping->i_mmap
|
||||
*
|
||||
* Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh@umich.edu>
|
||||
*
|
||||
* This file is released under the GPL v2.
|
||||
*
|
||||
* Based on the radix priority search tree proposed by Edward M. McCreight
|
||||
* SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985
|
||||
*
|
||||
* 02Feb2004 Initial version
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/prio_tree.h>
|
||||
#include <linux/prefetch.h>
|
||||
|
||||
/*
|
||||
* See lib/prio_tree.c for details on the general radix priority search tree
|
||||
* code.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The following #defines are mirrored from lib/prio_tree.c. They're only used
|
||||
* for debugging, and should be removed (along with the debugging code using
|
||||
* them) when switching also VMAs to the regular prio_tree code.
|
||||
*/
|
||||
|
||||
#define RADIX_INDEX(vma) ((vma)->vm_pgoff)
|
||||
#define VMA_SIZE(vma) (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT)
|
||||
/* avoid overflow */
|
||||
#define HEAP_INDEX(vma) ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1))
|
||||
|
||||
/*
|
||||
* Radix priority search tree for address_space->i_mmap
|
||||
*
|
||||
* For each vma that map a unique set of file pages i.e., unique [radix_index,
|
||||
* heap_index] value, we have a corresponding priority search tree node. If
|
||||
* multiple vmas have identical [radix_index, heap_index] value, then one of
|
||||
* them is used as a tree node and others are stored in a vm_set list. The tree
|
||||
* node points to the first vma (head) of the list using vm_set.head.
|
||||
*
|
||||
* prio_tree_root
|
||||
* |
|
||||
* A vm_set.head
|
||||
* / \ /
|
||||
* L R -> H-I-J-K-M-N-O-P-Q-S
|
||||
* ^ ^ <-- vm_set.list -->
|
||||
* tree nodes
|
||||
*
|
||||
* We need some way to identify whether a vma is a tree node, head of a vm_set
|
||||
* list, or just a member of a vm_set list. We cannot use vm_flags to store
|
||||
* such information. The reason is, in the above figure, it is possible that
|
||||
* vm_flags' of R and H are covered by the different mmap_sems. When R is
|
||||
* removed under R->mmap_sem, H replaces R as a tree node. Since we do not hold
|
||||
* H->mmap_sem, we cannot use H->vm_flags for marking that H is a tree node now.
|
||||
* That's why some trick involving shared.vm_set.parent is used for identifying
|
||||
* tree nodes and list head nodes.
|
||||
*
|
||||
* vma radix priority search tree node rules:
|
||||
*
|
||||
* vma->shared.vm_set.parent != NULL ==> a tree node
|
||||
* vma->shared.vm_set.head != NULL ==> list of others mapping same range
|
||||
* vma->shared.vm_set.head == NULL ==> no others map the same range
|
||||
*
|
||||
* vma->shared.vm_set.parent == NULL
|
||||
* vma->shared.vm_set.head != NULL ==> list head of vmas mapping same range
|
||||
* vma->shared.vm_set.head == NULL ==> a list node
|
||||
*/
|
||||
|
||||
/*
|
||||
* Add a new vma known to map the same set of pages as the old vma:
|
||||
* useful for fork's dup_mmap as well as vma_prio_tree_insert below.
|
||||
* Note that it just happens to work correctly on i_mmap_nonlinear too.
|
||||
*/
|
||||
void vma_prio_tree_add(struct vm_area_struct *vma, struct vm_area_struct *old)
|
||||
{
|
||||
/* Leave these BUG_ONs till prio_tree patch stabilizes */
|
||||
BUG_ON(RADIX_INDEX(vma) != RADIX_INDEX(old));
|
||||
BUG_ON(HEAP_INDEX(vma) != HEAP_INDEX(old));
|
||||
|
||||
vma->shared.vm_set.head = NULL;
|
||||
vma->shared.vm_set.parent = NULL;
|
||||
|
||||
if (!old->shared.vm_set.parent)
|
||||
list_add(&vma->shared.vm_set.list,
|
||||
&old->shared.vm_set.list);
|
||||
else if (old->shared.vm_set.head)
|
||||
list_add_tail(&vma->shared.vm_set.list,
|
||||
&old->shared.vm_set.head->shared.vm_set.list);
|
||||
else {
|
||||
INIT_LIST_HEAD(&vma->shared.vm_set.list);
|
||||
vma->shared.vm_set.head = old;
|
||||
old->shared.vm_set.head = vma;
|
||||
}
|
||||
}
|
||||
|
||||
void vma_prio_tree_insert(struct vm_area_struct *vma,
|
||||
struct prio_tree_root *root)
|
||||
{
|
||||
struct prio_tree_node *ptr;
|
||||
struct vm_area_struct *old;
|
||||
|
||||
vma->shared.vm_set.head = NULL;
|
||||
|
||||
ptr = raw_prio_tree_insert(root, &vma->shared.prio_tree_node);
|
||||
if (ptr != (struct prio_tree_node *) &vma->shared.prio_tree_node) {
|
||||
old = prio_tree_entry(ptr, struct vm_area_struct,
|
||||
shared.prio_tree_node);
|
||||
vma_prio_tree_add(vma, old);
|
||||
}
|
||||
}
|
||||
|
||||
void vma_prio_tree_remove(struct vm_area_struct *vma,
|
||||
struct prio_tree_root *root)
|
||||
{
|
||||
struct vm_area_struct *node, *head, *new_head;
|
||||
|
||||
if (!vma->shared.vm_set.head) {
|
||||
if (!vma->shared.vm_set.parent)
|
||||
list_del_init(&vma->shared.vm_set.list);
|
||||
else
|
||||
raw_prio_tree_remove(root, &vma->shared.prio_tree_node);
|
||||
} else {
|
||||
/* Leave this BUG_ON till prio_tree patch stabilizes */
|
||||
BUG_ON(vma->shared.vm_set.head->shared.vm_set.head != vma);
|
||||
if (vma->shared.vm_set.parent) {
|
||||
head = vma->shared.vm_set.head;
|
||||
if (!list_empty(&head->shared.vm_set.list)) {
|
||||
new_head = list_entry(
|
||||
head->shared.vm_set.list.next,
|
||||
struct vm_area_struct,
|
||||
shared.vm_set.list);
|
||||
list_del_init(&head->shared.vm_set.list);
|
||||
} else
|
||||
new_head = NULL;
|
||||
|
||||
raw_prio_tree_replace(root, &vma->shared.prio_tree_node,
|
||||
&head->shared.prio_tree_node);
|
||||
head->shared.vm_set.head = new_head;
|
||||
if (new_head)
|
||||
new_head->shared.vm_set.head = head;
|
||||
|
||||
} else {
|
||||
node = vma->shared.vm_set.head;
|
||||
if (!list_empty(&vma->shared.vm_set.list)) {
|
||||
new_head = list_entry(
|
||||
vma->shared.vm_set.list.next,
|
||||
struct vm_area_struct,
|
||||
shared.vm_set.list);
|
||||
list_del_init(&vma->shared.vm_set.list);
|
||||
node->shared.vm_set.head = new_head;
|
||||
new_head->shared.vm_set.head = node;
|
||||
} else
|
||||
node->shared.vm_set.head = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function to enumerate vmas that map a given file page or a set of
|
||||
* contiguous file pages. The function returns vmas that at least map a single
|
||||
* page in the given range of contiguous file pages.
|
||||
*/
|
||||
struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma,
|
||||
struct prio_tree_iter *iter)
|
||||
{
|
||||
struct prio_tree_node *ptr;
|
||||
struct vm_area_struct *next;
|
||||
|
||||
if (!vma) {
|
||||
/*
|
||||
* First call is with NULL vma
|
||||
*/
|
||||
ptr = prio_tree_next(iter);
|
||||
if (ptr) {
|
||||
next = prio_tree_entry(ptr, struct vm_area_struct,
|
||||
shared.prio_tree_node);
|
||||
prefetch(next->shared.vm_set.head);
|
||||
return next;
|
||||
} else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (vma->shared.vm_set.parent) {
|
||||
if (vma->shared.vm_set.head) {
|
||||
next = vma->shared.vm_set.head;
|
||||
prefetch(next->shared.vm_set.list.next);
|
||||
return next;
|
||||
}
|
||||
} else {
|
||||
next = list_entry(vma->shared.vm_set.list.next,
|
||||
struct vm_area_struct, shared.vm_set.list);
|
||||
if (!next->shared.vm_set.head) {
|
||||
prefetch(next->shared.vm_set.list.next);
|
||||
return next;
|
||||
}
|
||||
}
|
||||
|
||||
ptr = prio_tree_next(iter);
|
||||
if (ptr) {
|
||||
next = prio_tree_entry(ptr, struct vm_area_struct,
|
||||
shared.prio_tree_node);
|
||||
prefetch(next->shared.vm_set.head);
|
||||
return next;
|
||||
} else
|
||||
return NULL;
|
||||
}
|
18
mm/rmap.c
18
mm/rmap.c
|
@ -820,7 +820,6 @@ static int page_referenced_file(struct page *page,
|
|||
struct address_space *mapping = page->mapping;
|
||||
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
|
||||
struct vm_area_struct *vma;
|
||||
struct prio_tree_iter iter;
|
||||
int referenced = 0;
|
||||
|
||||
/*
|
||||
|
@ -846,7 +845,7 @@ static int page_referenced_file(struct page *page,
|
|||
*/
|
||||
mapcount = page_mapcount(page);
|
||||
|
||||
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
|
||||
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
|
||||
unsigned long address = vma_address(page, vma);
|
||||
if (address == -EFAULT)
|
||||
continue;
|
||||
|
@ -945,13 +944,12 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page)
|
|||
{
|
||||
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
|
||||
struct vm_area_struct *vma;
|
||||
struct prio_tree_iter iter;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(PageAnon(page));
|
||||
|
||||
mutex_lock(&mapping->i_mmap_mutex);
|
||||
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
|
||||
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
|
||||
if (vma->vm_flags & VM_SHARED) {
|
||||
unsigned long address = vma_address(page, vma);
|
||||
if (address == -EFAULT)
|
||||
|
@ -1547,7 +1545,6 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
|
|||
struct address_space *mapping = page->mapping;
|
||||
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
|
||||
struct vm_area_struct *vma;
|
||||
struct prio_tree_iter iter;
|
||||
int ret = SWAP_AGAIN;
|
||||
unsigned long cursor;
|
||||
unsigned long max_nl_cursor = 0;
|
||||
|
@ -1555,7 +1552,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
|
|||
unsigned int mapcount;
|
||||
|
||||
mutex_lock(&mapping->i_mmap_mutex);
|
||||
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
|
||||
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
|
||||
unsigned long address = vma_address(page, vma);
|
||||
if (address == -EFAULT)
|
||||
continue;
|
||||
|
@ -1576,7 +1573,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
|
|||
goto out;
|
||||
|
||||
list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
|
||||
shared.vm_set.list) {
|
||||
shared.nonlinear) {
|
||||
cursor = (unsigned long) vma->vm_private_data;
|
||||
if (cursor > max_nl_cursor)
|
||||
max_nl_cursor = cursor;
|
||||
|
@ -1608,7 +1605,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
|
|||
|
||||
do {
|
||||
list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
|
||||
shared.vm_set.list) {
|
||||
shared.nonlinear) {
|
||||
cursor = (unsigned long) vma->vm_private_data;
|
||||
while ( cursor < max_nl_cursor &&
|
||||
cursor < vma->vm_end - vma->vm_start) {
|
||||
|
@ -1631,7 +1628,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
|
|||
* in locked vmas). Reset cursor on all unreserved nonlinear
|
||||
* vmas, now forgetting on which ones it had fallen behind.
|
||||
*/
|
||||
list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
|
||||
list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
|
||||
vma->vm_private_data = NULL;
|
||||
out:
|
||||
mutex_unlock(&mapping->i_mmap_mutex);
|
||||
|
@ -1748,13 +1745,12 @@ static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
|
|||
struct address_space *mapping = page->mapping;
|
||||
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
|
||||
struct vm_area_struct *vma;
|
||||
struct prio_tree_iter iter;
|
||||
int ret = SWAP_AGAIN;
|
||||
|
||||
if (!mapping)
|
||||
return ret;
|
||||
mutex_lock(&mapping->i_mmap_mutex);
|
||||
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
|
||||
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
|
||||
unsigned long address = vma_address(page, vma);
|
||||
if (address == -EFAULT)
|
||||
continue;
|
||||
|
|
Loading…
Reference in New Issue