mirror of https://gitee.com/openkylin/linux.git
IB/qib: RCU locking for MR validation
Profiling indicates that MR validation locking is expensive. The MR table is largely read-only and is a suitable candidate for RCU locking. The patch uses RCU locking during validation to eliminate one lock/unlock during that validation. Reviewed-by: Mike Heinz <michael.william.heinz@intel.com> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
This commit is contained in:
parent
6a82649f21
commit
8aac4cc3a9
|
@ -40,8 +40,7 @@
|
||||||
*
|
*
|
||||||
* Returns 0 if successful, otherwise returns -errno.
|
* Returns 0 if successful, otherwise returns -errno.
|
||||||
*
|
*
|
||||||
* Increments mr reference count and sets published
|
* Increments mr reference count as required.
|
||||||
* as required.
|
|
||||||
*
|
*
|
||||||
* Sets the lkey field mr for non-dma regions.
|
* Sets the lkey field mr for non-dma regions.
|
||||||
*
|
*
|
||||||
|
@ -60,10 +59,12 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
|
||||||
|
|
||||||
/* special case for dma_mr lkey == 0 */
|
/* special case for dma_mr lkey == 0 */
|
||||||
if (dma_region) {
|
if (dma_region) {
|
||||||
/* should the dma_mr be relative to the pd? */
|
struct qib_mregion *tmr;
|
||||||
if (!dev->dma_mr) {
|
|
||||||
|
tmr = rcu_dereference(dev->dma_mr);
|
||||||
|
if (!tmr) {
|
||||||
qib_get_mr(mr);
|
qib_get_mr(mr);
|
||||||
dev->dma_mr = mr;
|
rcu_assign_pointer(dev->dma_mr, mr);
|
||||||
mr->lkey_published = 1;
|
mr->lkey_published = 1;
|
||||||
}
|
}
|
||||||
goto success;
|
goto success;
|
||||||
|
@ -93,7 +94,7 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
|
||||||
rkt->gen++;
|
rkt->gen++;
|
||||||
}
|
}
|
||||||
qib_get_mr(mr);
|
qib_get_mr(mr);
|
||||||
rkt->table[r] = mr;
|
rcu_assign_pointer(rkt->table[r], mr);
|
||||||
mr->lkey_published = 1;
|
mr->lkey_published = 1;
|
||||||
success:
|
success:
|
||||||
spin_unlock_irqrestore(&rkt->lock, flags);
|
spin_unlock_irqrestore(&rkt->lock, flags);
|
||||||
|
@ -120,33 +121,30 @@ void qib_free_lkey(struct qib_mregion *mr)
|
||||||
spin_lock_irqsave(&rkt->lock, flags);
|
spin_lock_irqsave(&rkt->lock, flags);
|
||||||
if (!mr->lkey_published)
|
if (!mr->lkey_published)
|
||||||
goto out;
|
goto out;
|
||||||
mr->lkey_published = 0;
|
if (lkey == 0)
|
||||||
|
rcu_assign_pointer(dev->dma_mr, NULL);
|
||||||
|
else {
|
||||||
spin_lock_irqsave(&dev->lk_table.lock, flags);
|
|
||||||
if (lkey == 0) {
|
|
||||||
if (dev->dma_mr && dev->dma_mr == mr) {
|
|
||||||
qib_put_mr(dev->dma_mr);
|
|
||||||
dev->dma_mr = NULL;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
r = lkey >> (32 - ib_qib_lkey_table_size);
|
r = lkey >> (32 - ib_qib_lkey_table_size);
|
||||||
qib_put_mr(dev->dma_mr);
|
rcu_assign_pointer(rkt->table[r], NULL);
|
||||||
rkt->table[r] = NULL;
|
|
||||||
}
|
}
|
||||||
|
qib_put_mr(mr);
|
||||||
|
mr->lkey_published = 0;
|
||||||
out:
|
out:
|
||||||
spin_unlock_irqrestore(&dev->lk_table.lock, flags);
|
spin_unlock_irqrestore(&rkt->lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* qib_lkey_ok - check IB SGE for validity and initialize
|
* qib_lkey_ok - check IB SGE for validity and initialize
|
||||||
* @rkt: table containing lkey to check SGE against
|
* @rkt: table containing lkey to check SGE against
|
||||||
|
* @pd: protection domain
|
||||||
* @isge: outgoing internal SGE
|
* @isge: outgoing internal SGE
|
||||||
* @sge: SGE to check
|
* @sge: SGE to check
|
||||||
* @acc: access flags
|
* @acc: access flags
|
||||||
*
|
*
|
||||||
* Return 1 if valid and successful, otherwise returns 0.
|
* Return 1 if valid and successful, otherwise returns 0.
|
||||||
*
|
*
|
||||||
|
* increments the reference count upon success
|
||||||
|
*
|
||||||
* Check the IB SGE for validity and initialize our internal version
|
* Check the IB SGE for validity and initialize our internal version
|
||||||
* of it.
|
* of it.
|
||||||
*/
|
*/
|
||||||
|
@ -156,24 +154,25 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
|
||||||
struct qib_mregion *mr;
|
struct qib_mregion *mr;
|
||||||
unsigned n, m;
|
unsigned n, m;
|
||||||
size_t off;
|
size_t off;
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We use LKEY == zero for kernel virtual addresses
|
* We use LKEY == zero for kernel virtual addresses
|
||||||
* (see qib_get_dma_mr and qib_dma.c).
|
* (see qib_get_dma_mr and qib_dma.c).
|
||||||
*/
|
*/
|
||||||
spin_lock_irqsave(&rkt->lock, flags);
|
rcu_read_lock();
|
||||||
if (sge->lkey == 0) {
|
if (sge->lkey == 0) {
|
||||||
struct qib_ibdev *dev = to_idev(pd->ibpd.device);
|
struct qib_ibdev *dev = to_idev(pd->ibpd.device);
|
||||||
|
|
||||||
if (pd->user)
|
if (pd->user)
|
||||||
goto bail;
|
goto bail;
|
||||||
if (!dev->dma_mr)
|
mr = rcu_dereference(dev->dma_mr);
|
||||||
|
if (!mr)
|
||||||
goto bail;
|
goto bail;
|
||||||
qib_get_mr(dev->dma_mr);
|
if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
|
||||||
spin_unlock_irqrestore(&rkt->lock, flags);
|
goto bail;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
isge->mr = dev->dma_mr;
|
isge->mr = mr;
|
||||||
isge->vaddr = (void *) sge->addr;
|
isge->vaddr = (void *) sge->addr;
|
||||||
isge->length = sge->length;
|
isge->length = sge->length;
|
||||||
isge->sge_length = sge->length;
|
isge->sge_length = sge->length;
|
||||||
|
@ -181,18 +180,18 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
|
||||||
isge->n = 0;
|
isge->n = 0;
|
||||||
goto ok;
|
goto ok;
|
||||||
}
|
}
|
||||||
mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))];
|
mr = rcu_dereference(
|
||||||
if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
|
rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]);
|
||||||
mr->pd != &pd->ibpd))
|
if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
|
||||||
goto bail;
|
goto bail;
|
||||||
|
|
||||||
off = sge->addr - mr->user_base;
|
off = sge->addr - mr->user_base;
|
||||||
if (unlikely(sge->addr < mr->user_base ||
|
if (unlikely(sge->addr < mr->iova || off + sge->length > mr->length ||
|
||||||
off + sge->length > mr->length ||
|
(mr->access_flags & acc) == 0))
|
||||||
(mr->access_flags & acc) != acc))
|
|
||||||
goto bail;
|
goto bail;
|
||||||
qib_get_mr(mr);
|
if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
|
||||||
spin_unlock_irqrestore(&rkt->lock, flags);
|
goto bail;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
off += mr->offset;
|
off += mr->offset;
|
||||||
if (mr->page_shift) {
|
if (mr->page_shift) {
|
||||||
|
@ -228,20 +227,22 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
|
||||||
ok:
|
ok:
|
||||||
return 1;
|
return 1;
|
||||||
bail:
|
bail:
|
||||||
spin_unlock_irqrestore(&rkt->lock, flags);
|
rcu_read_unlock();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* qib_rkey_ok - check the IB virtual address, length, and RKEY
|
* qib_rkey_ok - check the IB virtual address, length, and RKEY
|
||||||
* @dev: infiniband device
|
* @qp: qp for validation
|
||||||
* @ss: SGE state
|
* @sge: SGE state
|
||||||
* @len: length of data
|
* @len: length of data
|
||||||
* @vaddr: virtual address to place data
|
* @vaddr: virtual address to place data
|
||||||
* @rkey: rkey to check
|
* @rkey: rkey to check
|
||||||
* @acc: access flags
|
* @acc: access flags
|
||||||
*
|
*
|
||||||
* Return 1 if successful, otherwise 0.
|
* Return 1 if successful, otherwise 0.
|
||||||
|
*
|
||||||
|
* increments the reference count upon success
|
||||||
*/
|
*/
|
||||||
int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
|
int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
|
||||||
u32 len, u64 vaddr, u32 rkey, int acc)
|
u32 len, u64 vaddr, u32 rkey, int acc)
|
||||||
|
@ -250,25 +251,26 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
|
||||||
struct qib_mregion *mr;
|
struct qib_mregion *mr;
|
||||||
unsigned n, m;
|
unsigned n, m;
|
||||||
size_t off;
|
size_t off;
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We use RKEY == zero for kernel virtual addresses
|
* We use RKEY == zero for kernel virtual addresses
|
||||||
* (see qib_get_dma_mr and qib_dma.c).
|
* (see qib_get_dma_mr and qib_dma.c).
|
||||||
*/
|
*/
|
||||||
spin_lock_irqsave(&rkt->lock, flags);
|
rcu_read_lock();
|
||||||
if (rkey == 0) {
|
if (rkey == 0) {
|
||||||
struct qib_pd *pd = to_ipd(qp->ibqp.pd);
|
struct qib_pd *pd = to_ipd(qp->ibqp.pd);
|
||||||
struct qib_ibdev *dev = to_idev(pd->ibpd.device);
|
struct qib_ibdev *dev = to_idev(pd->ibpd.device);
|
||||||
|
|
||||||
if (pd->user)
|
if (pd->user)
|
||||||
goto bail;
|
goto bail;
|
||||||
if (!dev->dma_mr)
|
mr = rcu_dereference(dev->dma_mr);
|
||||||
|
if (!mr)
|
||||||
goto bail;
|
goto bail;
|
||||||
qib_get_mr(dev->dma_mr);
|
if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
|
||||||
spin_unlock_irqrestore(&rkt->lock, flags);
|
goto bail;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
sge->mr = dev->dma_mr;
|
sge->mr = mr;
|
||||||
sge->vaddr = (void *) vaddr;
|
sge->vaddr = (void *) vaddr;
|
||||||
sge->length = len;
|
sge->length = len;
|
||||||
sge->sge_length = len;
|
sge->sge_length = len;
|
||||||
|
@ -277,16 +279,18 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
|
||||||
goto ok;
|
goto ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))];
|
mr = rcu_dereference(
|
||||||
if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
|
rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]);
|
||||||
|
if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
|
||||||
goto bail;
|
goto bail;
|
||||||
|
|
||||||
off = vaddr - mr->iova;
|
off = vaddr - mr->iova;
|
||||||
if (unlikely(vaddr < mr->iova || off + len > mr->length ||
|
if (unlikely(vaddr < mr->iova || off + len > mr->length ||
|
||||||
(mr->access_flags & acc) == 0))
|
(mr->access_flags & acc) == 0))
|
||||||
goto bail;
|
goto bail;
|
||||||
qib_get_mr(mr);
|
if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
|
||||||
spin_unlock_irqrestore(&rkt->lock, flags);
|
goto bail;
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
off += mr->offset;
|
off += mr->offset;
|
||||||
if (mr->page_shift) {
|
if (mr->page_shift) {
|
||||||
|
@ -322,7 +326,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
|
||||||
ok:
|
ok:
|
||||||
return 1;
|
return 1;
|
||||||
bail:
|
bail:
|
||||||
spin_unlock_irqrestore(&rkt->lock, flags);
|
rcu_read_unlock();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -527,3 +527,10 @@ int qib_dealloc_fmr(struct ib_fmr *ibfmr)
|
||||||
out:
|
out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void mr_rcu_callback(struct rcu_head *list)
|
||||||
|
{
|
||||||
|
struct qib_mregion *mr = container_of(list, struct qib_mregion, list);
|
||||||
|
|
||||||
|
complete(&mr->comp);
|
||||||
|
}
|
||||||
|
|
|
@ -2066,7 +2066,9 @@ int qib_register_ib_device(struct qib_devdata *dd)
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto err_lk;
|
goto err_lk;
|
||||||
}
|
}
|
||||||
memset(dev->lk_table.table, 0, lk_tab_size);
|
RCU_INIT_POINTER(dev->dma_mr, NULL);
|
||||||
|
for (i = 0; i < dev->lk_table.max; i++)
|
||||||
|
RCU_INIT_POINTER(dev->lk_table.table[i], NULL);
|
||||||
INIT_LIST_HEAD(&dev->pending_mmaps);
|
INIT_LIST_HEAD(&dev->pending_mmaps);
|
||||||
spin_lock_init(&dev->pending_lock);
|
spin_lock_init(&dev->pending_lock);
|
||||||
dev->mmap_offset = PAGE_SIZE;
|
dev->mmap_offset = PAGE_SIZE;
|
||||||
|
|
|
@ -303,8 +303,9 @@ struct qib_mregion {
|
||||||
u32 max_segs; /* number of qib_segs in all the arrays */
|
u32 max_segs; /* number of qib_segs in all the arrays */
|
||||||
u32 mapsz; /* size of the map array */
|
u32 mapsz; /* size of the map array */
|
||||||
u8 page_shift; /* 0 - non unform/non powerof2 sizes */
|
u8 page_shift; /* 0 - non unform/non powerof2 sizes */
|
||||||
u8 lkey_published; /* in global table */
|
u8 lkey_published; /* in global table */
|
||||||
struct completion comp; /* complete when refcount goes to zero */
|
struct completion comp; /* complete when refcount goes to zero */
|
||||||
|
struct rcu_head list;
|
||||||
atomic_t refcount;
|
atomic_t refcount;
|
||||||
struct qib_segarray *map[0]; /* the segments */
|
struct qib_segarray *map[0]; /* the segments */
|
||||||
};
|
};
|
||||||
|
@ -1022,10 +1023,12 @@ static inline void qib_get_mr(struct qib_mregion *mr)
|
||||||
atomic_inc(&mr->refcount);
|
atomic_inc(&mr->refcount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void mr_rcu_callback(struct rcu_head *list);
|
||||||
|
|
||||||
static inline void qib_put_mr(struct qib_mregion *mr)
|
static inline void qib_put_mr(struct qib_mregion *mr)
|
||||||
{
|
{
|
||||||
if (unlikely(atomic_dec_and_test(&mr->refcount)))
|
if (unlikely(atomic_dec_and_test(&mr->refcount)))
|
||||||
complete(&mr->comp);
|
call_rcu(&mr->list, mr_rcu_callback);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void qib_put_ss(struct qib_sge_state *ss)
|
static inline void qib_put_ss(struct qib_sge_state *ss)
|
||||||
|
|
Loading…
Reference in New Issue