IB/qib: RCU locking for MR validation

Profiling indicates that MR validation locking is expensive.  The MR
table is largely read-only and is a suitable candidate for RCU locking.

The patch uses RCU locking during validation to eliminate one
lock/unlock during that validation.

Reviewed-by: Mike Heinz <michael.william.heinz@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
This commit is contained in:
Mike Marciniszyn 2012-06-27 18:33:19 -04:00 committed by Roland Dreier
parent 6a82649f21
commit 8aac4cc3a9
4 changed files with 66 additions and 50 deletions

View File

@ -40,8 +40,7 @@
* *
* Returns 0 if successful, otherwise returns -errno. * Returns 0 if successful, otherwise returns -errno.
* *
* Increments mr reference count and sets published * Increments mr reference count as required.
* as required.
* *
* Sets the lkey field mr for non-dma regions. * Sets the lkey field mr for non-dma regions.
* *
@ -60,10 +59,12 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
/* special case for dma_mr lkey == 0 */ /* special case for dma_mr lkey == 0 */
if (dma_region) { if (dma_region) {
/* should the dma_mr be relative to the pd? */ struct qib_mregion *tmr;
if (!dev->dma_mr) {
tmr = rcu_dereference(dev->dma_mr);
if (!tmr) {
qib_get_mr(mr); qib_get_mr(mr);
dev->dma_mr = mr; rcu_assign_pointer(dev->dma_mr, mr);
mr->lkey_published = 1; mr->lkey_published = 1;
} }
goto success; goto success;
@ -93,7 +94,7 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
rkt->gen++; rkt->gen++;
} }
qib_get_mr(mr); qib_get_mr(mr);
rkt->table[r] = mr; rcu_assign_pointer(rkt->table[r], mr);
mr->lkey_published = 1; mr->lkey_published = 1;
success: success:
spin_unlock_irqrestore(&rkt->lock, flags); spin_unlock_irqrestore(&rkt->lock, flags);
@ -120,33 +121,30 @@ void qib_free_lkey(struct qib_mregion *mr)
spin_lock_irqsave(&rkt->lock, flags); spin_lock_irqsave(&rkt->lock, flags);
if (!mr->lkey_published) if (!mr->lkey_published)
goto out; goto out;
mr->lkey_published = 0; if (lkey == 0)
rcu_assign_pointer(dev->dma_mr, NULL);
else {
spin_lock_irqsave(&dev->lk_table.lock, flags);
if (lkey == 0) {
if (dev->dma_mr && dev->dma_mr == mr) {
qib_put_mr(dev->dma_mr);
dev->dma_mr = NULL;
}
} else {
r = lkey >> (32 - ib_qib_lkey_table_size); r = lkey >> (32 - ib_qib_lkey_table_size);
qib_put_mr(dev->dma_mr); rcu_assign_pointer(rkt->table[r], NULL);
rkt->table[r] = NULL;
} }
qib_put_mr(mr);
mr->lkey_published = 0;
out: out:
spin_unlock_irqrestore(&dev->lk_table.lock, flags); spin_unlock_irqrestore(&rkt->lock, flags);
} }
/** /**
* qib_lkey_ok - check IB SGE for validity and initialize * qib_lkey_ok - check IB SGE for validity and initialize
* @rkt: table containing lkey to check SGE against * @rkt: table containing lkey to check SGE against
* @pd: protection domain
* @isge: outgoing internal SGE * @isge: outgoing internal SGE
* @sge: SGE to check * @sge: SGE to check
* @acc: access flags * @acc: access flags
* *
* Return 1 if valid and successful, otherwise returns 0. * Return 1 if valid and successful, otherwise returns 0.
* *
* increments the reference count upon success
*
* Check the IB SGE for validity and initialize our internal version * Check the IB SGE for validity and initialize our internal version
* of it. * of it.
*/ */
@ -156,24 +154,25 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
struct qib_mregion *mr; struct qib_mregion *mr;
unsigned n, m; unsigned n, m;
size_t off; size_t off;
unsigned long flags;
/* /*
* We use LKEY == zero for kernel virtual addresses * We use LKEY == zero for kernel virtual addresses
* (see qib_get_dma_mr and qib_dma.c). * (see qib_get_dma_mr and qib_dma.c).
*/ */
spin_lock_irqsave(&rkt->lock, flags); rcu_read_lock();
if (sge->lkey == 0) { if (sge->lkey == 0) {
struct qib_ibdev *dev = to_idev(pd->ibpd.device); struct qib_ibdev *dev = to_idev(pd->ibpd.device);
if (pd->user) if (pd->user)
goto bail; goto bail;
if (!dev->dma_mr) mr = rcu_dereference(dev->dma_mr);
if (!mr)
goto bail; goto bail;
qib_get_mr(dev->dma_mr); if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
spin_unlock_irqrestore(&rkt->lock, flags); goto bail;
rcu_read_unlock();
isge->mr = dev->dma_mr; isge->mr = mr;
isge->vaddr = (void *) sge->addr; isge->vaddr = (void *) sge->addr;
isge->length = sge->length; isge->length = sge->length;
isge->sge_length = sge->length; isge->sge_length = sge->length;
@ -181,18 +180,18 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
isge->n = 0; isge->n = 0;
goto ok; goto ok;
} }
mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]; mr = rcu_dereference(
if (unlikely(mr == NULL || mr->lkey != sge->lkey || rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]);
mr->pd != &pd->ibpd)) if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
goto bail; goto bail;
off = sge->addr - mr->user_base; off = sge->addr - mr->user_base;
if (unlikely(sge->addr < mr->user_base || if (unlikely(sge->addr < mr->iova || off + sge->length > mr->length ||
off + sge->length > mr->length || (mr->access_flags & acc) == 0))
(mr->access_flags & acc) != acc))
goto bail; goto bail;
qib_get_mr(mr); if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
spin_unlock_irqrestore(&rkt->lock, flags); goto bail;
rcu_read_unlock();
off += mr->offset; off += mr->offset;
if (mr->page_shift) { if (mr->page_shift) {
@ -228,20 +227,22 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
ok: ok:
return 1; return 1;
bail: bail:
spin_unlock_irqrestore(&rkt->lock, flags); rcu_read_unlock();
return 0; return 0;
} }
/** /**
* qib_rkey_ok - check the IB virtual address, length, and RKEY * qib_rkey_ok - check the IB virtual address, length, and RKEY
* @dev: infiniband device * @qp: qp for validation
* @ss: SGE state * @sge: SGE state
* @len: length of data * @len: length of data
* @vaddr: virtual address to place data * @vaddr: virtual address to place data
* @rkey: rkey to check * @rkey: rkey to check
* @acc: access flags * @acc: access flags
* *
* Return 1 if successful, otherwise 0. * Return 1 if successful, otherwise 0.
*
* increments the reference count upon success
*/ */
int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
u32 len, u64 vaddr, u32 rkey, int acc) u32 len, u64 vaddr, u32 rkey, int acc)
@ -250,25 +251,26 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
struct qib_mregion *mr; struct qib_mregion *mr;
unsigned n, m; unsigned n, m;
size_t off; size_t off;
unsigned long flags;
/* /*
* We use RKEY == zero for kernel virtual addresses * We use RKEY == zero for kernel virtual addresses
* (see qib_get_dma_mr and qib_dma.c). * (see qib_get_dma_mr and qib_dma.c).
*/ */
spin_lock_irqsave(&rkt->lock, flags); rcu_read_lock();
if (rkey == 0) { if (rkey == 0) {
struct qib_pd *pd = to_ipd(qp->ibqp.pd); struct qib_pd *pd = to_ipd(qp->ibqp.pd);
struct qib_ibdev *dev = to_idev(pd->ibpd.device); struct qib_ibdev *dev = to_idev(pd->ibpd.device);
if (pd->user) if (pd->user)
goto bail; goto bail;
if (!dev->dma_mr) mr = rcu_dereference(dev->dma_mr);
if (!mr)
goto bail; goto bail;
qib_get_mr(dev->dma_mr); if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
spin_unlock_irqrestore(&rkt->lock, flags); goto bail;
rcu_read_unlock();
sge->mr = dev->dma_mr; sge->mr = mr;
sge->vaddr = (void *) vaddr; sge->vaddr = (void *) vaddr;
sge->length = len; sge->length = len;
sge->sge_length = len; sge->sge_length = len;
@ -277,16 +279,18 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
goto ok; goto ok;
} }
mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]; mr = rcu_dereference(
if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]);
if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
goto bail; goto bail;
off = vaddr - mr->iova; off = vaddr - mr->iova;
if (unlikely(vaddr < mr->iova || off + len > mr->length || if (unlikely(vaddr < mr->iova || off + len > mr->length ||
(mr->access_flags & acc) == 0)) (mr->access_flags & acc) == 0))
goto bail; goto bail;
qib_get_mr(mr); if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
spin_unlock_irqrestore(&rkt->lock, flags); goto bail;
rcu_read_unlock();
off += mr->offset; off += mr->offset;
if (mr->page_shift) { if (mr->page_shift) {
@ -322,7 +326,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
ok: ok:
return 1; return 1;
bail: bail:
spin_unlock_irqrestore(&rkt->lock, flags); rcu_read_unlock();
return 0; return 0;
} }

View File

@ -527,3 +527,10 @@ int qib_dealloc_fmr(struct ib_fmr *ibfmr)
out: out:
return ret; return ret;
} }
void mr_rcu_callback(struct rcu_head *list)
{
struct qib_mregion *mr = container_of(list, struct qib_mregion, list);
complete(&mr->comp);
}

View File

@ -2066,7 +2066,9 @@ int qib_register_ib_device(struct qib_devdata *dd)
ret = -ENOMEM; ret = -ENOMEM;
goto err_lk; goto err_lk;
} }
memset(dev->lk_table.table, 0, lk_tab_size); RCU_INIT_POINTER(dev->dma_mr, NULL);
for (i = 0; i < dev->lk_table.max; i++)
RCU_INIT_POINTER(dev->lk_table.table[i], NULL);
INIT_LIST_HEAD(&dev->pending_mmaps); INIT_LIST_HEAD(&dev->pending_mmaps);
spin_lock_init(&dev->pending_lock); spin_lock_init(&dev->pending_lock);
dev->mmap_offset = PAGE_SIZE; dev->mmap_offset = PAGE_SIZE;

View File

@ -303,8 +303,9 @@ struct qib_mregion {
u32 max_segs; /* number of qib_segs in all the arrays */ u32 max_segs; /* number of qib_segs in all the arrays */
u32 mapsz; /* size of the map array */ u32 mapsz; /* size of the map array */
u8 page_shift; /* 0 - non unform/non powerof2 sizes */ u8 page_shift; /* 0 - non unform/non powerof2 sizes */
u8 lkey_published; /* in global table */ u8 lkey_published; /* in global table */
struct completion comp; /* complete when refcount goes to zero */ struct completion comp; /* complete when refcount goes to zero */
struct rcu_head list;
atomic_t refcount; atomic_t refcount;
struct qib_segarray *map[0]; /* the segments */ struct qib_segarray *map[0]; /* the segments */
}; };
@ -1022,10 +1023,12 @@ static inline void qib_get_mr(struct qib_mregion *mr)
atomic_inc(&mr->refcount); atomic_inc(&mr->refcount);
} }
void mr_rcu_callback(struct rcu_head *list);
static inline void qib_put_mr(struct qib_mregion *mr) static inline void qib_put_mr(struct qib_mregion *mr)
{ {
if (unlikely(atomic_dec_and_test(&mr->refcount))) if (unlikely(atomic_dec_and_test(&mr->refcount)))
complete(&mr->comp); call_rcu(&mr->list, mr_rcu_callback);
} }
static inline void qib_put_ss(struct qib_sge_state *ss) static inline void qib_put_ss(struct qib_sge_state *ss)