[PATCH] NUMA slab locking fixes: move color_next to l3

colour_next is used as an index to add a colouring offset to a new slab in the
cache (colour_off * colour_next).  Now with the NUMA aware slab allocator, it
makes sense to colour slabs added on the same node sequentially with
colour_next.

This patch moves the colouring index "colour_next" per-node by placing it on
kmem_list3 rather than kmem_cache.

This also helps simplify locking for CPU up and down paths.

Signed-off-by: Alok N Kataria <alokk@calsoftinc.com>
Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org>
Signed-off-by: Shai Fultheim <shai@scalex86.org>
Cc: Christoph Lameter <christoph@lameter.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
Ravikiran G Thirumalai 2006-02-04 23:27:56 -08:00 committed by Linus Torvalds
parent 64b4a954b0
commit 2e1217cf96
1 changed files with 11 additions and 11 deletions

View File

@ -294,6 +294,7 @@ struct kmem_list3 {
unsigned long next_reap; unsigned long next_reap;
int free_touched; int free_touched;
unsigned int free_limit; unsigned int free_limit;
unsigned int colour_next; /* Per-node cache coloring */
spinlock_t list_lock; spinlock_t list_lock;
struct array_cache *shared; /* shared per node */ struct array_cache *shared; /* shared per node */
struct array_cache **alien; /* on other nodes */ struct array_cache **alien; /* on other nodes */
@ -344,6 +345,7 @@ static void kmem_list3_init(struct kmem_list3 *parent)
INIT_LIST_HEAD(&parent->slabs_free); INIT_LIST_HEAD(&parent->slabs_free);
parent->shared = NULL; parent->shared = NULL;
parent->alien = NULL; parent->alien = NULL;
parent->colour_next = 0;
spin_lock_init(&parent->list_lock); spin_lock_init(&parent->list_lock);
parent->free_objects = 0; parent->free_objects = 0;
parent->free_touched = 0; parent->free_touched = 0;
@ -390,7 +392,6 @@ struct kmem_cache {
size_t colour; /* cache colouring range */ size_t colour; /* cache colouring range */
unsigned int colour_off; /* colour offset */ unsigned int colour_off; /* colour offset */
unsigned int colour_next; /* cache colouring */
struct kmem_cache *slabp_cache; struct kmem_cache *slabp_cache;
unsigned int slab_size; unsigned int slab_size;
unsigned int dflags; /* dynamic flags */ unsigned int dflags; /* dynamic flags */
@ -1119,7 +1120,6 @@ void __init kmem_cache_init(void)
BUG(); BUG();
cache_cache.colour = left_over / cache_cache.colour_off; cache_cache.colour = left_over / cache_cache.colour_off;
cache_cache.colour_next = 0;
cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
sizeof(struct slab), cache_line_size()); sizeof(struct slab), cache_line_size());
@ -2324,18 +2324,19 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
*/ */
ctor_flags |= SLAB_CTOR_ATOMIC; ctor_flags |= SLAB_CTOR_ATOMIC;
/* About to mess with non-constant members - lock. */ /* Take the l3 list lock to change the colour_next on this node */
check_irq_off(); check_irq_off();
spin_lock(&cachep->spinlock); l3 = cachep->nodelists[nodeid];
spin_lock(&l3->list_lock);
/* Get colour for the slab, and cal the next value. */ /* Get colour for the slab, and cal the next value. */
offset = cachep->colour_next; offset = l3->colour_next;
cachep->colour_next++; l3->colour_next++;
if (cachep->colour_next >= cachep->colour) if (l3->colour_next >= cachep->colour)
cachep->colour_next = 0; l3->colour_next = 0;
offset *= cachep->colour_off; spin_unlock(&l3->list_lock);
spin_unlock(&cachep->spinlock); offset *= cachep->colour_off;
check_irq_off(); check_irq_off();
if (local_flags & __GFP_WAIT) if (local_flags & __GFP_WAIT)
@ -2367,7 +2368,6 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
if (local_flags & __GFP_WAIT) if (local_flags & __GFP_WAIT)
local_irq_disable(); local_irq_disable();
check_irq_off(); check_irq_off();
l3 = cachep->nodelists[nodeid];
spin_lock(&l3->list_lock); spin_lock(&l3->list_lock);
/* Make slab active. */ /* Make slab active. */