mirror of https://gitee.com/openkylin/linux.git
{net,IB}/mlx5: Refactor page fault handling
* Update page fault event according to last specification. * Separate code path for page fault EQ, completion EQ and async EQ. * Move page fault handling work queue from mlx5_ib static variable into mlx5_core page fault EQ. * Allocate memory to store ODP event dynamically as the events arrive, since in atomic context - use mempool. * Make mlx5_ib page fault handler run in process context. Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com> Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
223cdc7242
commit
d9aaed8387
|
@ -3319,6 +3319,9 @@ static struct mlx5_interface mlx5_ib_interface = {
|
|||
.add = mlx5_ib_add,
|
||||
.remove = mlx5_ib_remove,
|
||||
.event = mlx5_ib_event,
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
.pfault = mlx5_ib_pfault,
|
||||
#endif
|
||||
.protocol = MLX5_INTERFACE_PROTOCOL_IB,
|
||||
};
|
||||
|
||||
|
@ -3329,25 +3332,14 @@ static int __init mlx5_ib_init(void)
|
|||
if (deprecated_prof_sel != 2)
|
||||
pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
|
||||
|
||||
err = mlx5_ib_odp_init();
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = mlx5_register_interface(&mlx5_ib_interface);
|
||||
if (err)
|
||||
goto clean_odp;
|
||||
|
||||
return err;
|
||||
|
||||
clean_odp:
|
||||
mlx5_ib_odp_cleanup();
|
||||
return err;
|
||||
}
|
||||
|
||||
static void __exit mlx5_ib_cleanup(void)
|
||||
{
|
||||
mlx5_unregister_interface(&mlx5_ib_interface);
|
||||
mlx5_ib_odp_cleanup();
|
||||
}
|
||||
|
||||
module_init(mlx5_ib_init);
|
||||
|
|
|
@ -277,29 +277,6 @@ struct mlx5_ib_rwq_ind_table {
|
|||
u32 rqtn;
|
||||
};
|
||||
|
||||
/*
|
||||
* Connect-IB can trigger up to four concurrent pagefaults
|
||||
* per-QP.
|
||||
*/
|
||||
enum mlx5_ib_pagefault_context {
|
||||
MLX5_IB_PAGEFAULT_RESPONDER_READ,
|
||||
MLX5_IB_PAGEFAULT_REQUESTOR_READ,
|
||||
MLX5_IB_PAGEFAULT_RESPONDER_WRITE,
|
||||
MLX5_IB_PAGEFAULT_REQUESTOR_WRITE,
|
||||
MLX5_IB_PAGEFAULT_CONTEXTS
|
||||
};
|
||||
|
||||
static inline enum mlx5_ib_pagefault_context
|
||||
mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault)
|
||||
{
|
||||
return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE);
|
||||
}
|
||||
|
||||
struct mlx5_ib_pfault {
|
||||
struct work_struct work;
|
||||
struct mlx5_pagefault mpfault;
|
||||
};
|
||||
|
||||
struct mlx5_ib_ubuffer {
|
||||
struct ib_umem *umem;
|
||||
int buf_size;
|
||||
|
@ -385,20 +362,6 @@ struct mlx5_ib_qp {
|
|||
/* Store signature errors */
|
||||
bool signature_en;
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
/*
|
||||
* A flag that is true for QP's that are in a state that doesn't
|
||||
* allow page faults, and shouldn't schedule any more faults.
|
||||
*/
|
||||
int disable_page_faults;
|
||||
/*
|
||||
* The disable_page_faults_lock protects a QP's disable_page_faults
|
||||
* field, allowing for a thread to atomically check whether the QP
|
||||
* allows page faults, and if so schedule a page fault.
|
||||
*/
|
||||
spinlock_t disable_page_faults_lock;
|
||||
struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
|
||||
#endif
|
||||
struct list_head qps_list;
|
||||
struct list_head cq_recv_list;
|
||||
struct list_head cq_send_list;
|
||||
|
@ -869,18 +832,13 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
|
|||
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
extern struct workqueue_struct *mlx5_ib_page_fault_wq;
|
||||
|
||||
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
|
||||
void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
|
||||
struct mlx5_ib_pfault *pfault);
|
||||
void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp);
|
||||
void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
|
||||
struct mlx5_pagefault *pfault);
|
||||
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
|
||||
void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev);
|
||||
int __init mlx5_ib_odp_init(void);
|
||||
void mlx5_ib_odp_cleanup(void);
|
||||
void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp);
|
||||
void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
|
||||
void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
|
||||
unsigned long end);
|
||||
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
|
||||
|
@ -889,13 +847,10 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
|
|||
return;
|
||||
}
|
||||
|
||||
static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) {}
|
||||
static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
|
||||
static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
|
||||
static inline int mlx5_ib_odp_init(void) { return 0; }
|
||||
static inline void mlx5_ib_odp_cleanup(void) {}
|
||||
static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {}
|
||||
static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {}
|
||||
|
||||
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
|
||||
|
||||
|
|
|
@ -41,8 +41,6 @@
|
|||
* a pagefault. */
|
||||
#define MMU_NOTIFIER_TIMEOUT 1000
|
||||
|
||||
struct workqueue_struct *mlx5_ib_page_fault_wq;
|
||||
|
||||
void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
|
@ -162,38 +160,38 @@ static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
|
|||
return container_of(mmkey, struct mlx5_ib_mr, mmkey);
|
||||
}
|
||||
|
||||
static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
|
||||
struct mlx5_ib_pfault *pfault,
|
||||
static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
|
||||
struct mlx5_pagefault *pfault,
|
||||
int error)
|
||||
{
|
||||
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
|
||||
u32 qpn = qp->trans_qp.base.mqp.qpn;
|
||||
int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ?
|
||||
pfault->wqe.wq_num : pfault->token;
|
||||
int ret = mlx5_core_page_fault_resume(dev->mdev,
|
||||
qpn,
|
||||
pfault->mpfault.flags,
|
||||
pfault->token,
|
||||
wq_num,
|
||||
pfault->type,
|
||||
error);
|
||||
if (ret)
|
||||
pr_err("Failed to resolve the page fault on QP 0x%x\n", qpn);
|
||||
mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x\n",
|
||||
wq_num);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle a single data segment in a page-fault WQE.
|
||||
* Handle a single data segment in a page-fault WQE or RDMA region.
|
||||
*
|
||||
* Returns number of pages retrieved on success. The caller will continue to
|
||||
* Returns number of pages retrieved on success. The caller may continue to
|
||||
* the next data segment.
|
||||
* Can return the following error codes:
|
||||
* -EAGAIN to designate a temporary error. The caller will abort handling the
|
||||
* page fault and resolve it.
|
||||
* -EFAULT when there's an error mapping the requested pages. The caller will
|
||||
* abort the page fault handling and possibly move the QP to an error state.
|
||||
* On other errors the QP should also be closed with an error.
|
||||
* abort the page fault handling.
|
||||
*/
|
||||
static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
|
||||
struct mlx5_ib_pfault *pfault,
|
||||
static int pagefault_single_data_segment(struct mlx5_ib_dev *mib_dev,
|
||||
u32 key, u64 io_virt, size_t bcnt,
|
||||
u32 *bytes_committed,
|
||||
u32 *bytes_mapped)
|
||||
{
|
||||
struct mlx5_ib_dev *mib_dev = to_mdev(qp->ibqp.pd->device);
|
||||
int srcu_key;
|
||||
unsigned int current_seq;
|
||||
u64 start_idx;
|
||||
|
@ -219,12 +217,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
|
|||
key);
|
||||
if (bytes_mapped)
|
||||
*bytes_mapped +=
|
||||
(bcnt - pfault->mpfault.bytes_committed);
|
||||
goto srcu_unlock;
|
||||
}
|
||||
if (mr->ibmr.pd != qp->ibqp.pd) {
|
||||
pr_err("Page-fault with different PDs for QP and MR.\n");
|
||||
ret = -EFAULT;
|
||||
(bcnt - *bytes_committed);
|
||||
goto srcu_unlock;
|
||||
}
|
||||
|
||||
|
@ -240,8 +233,8 @@ static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
|
|||
* in all iterations (in iteration 2 and above,
|
||||
* bytes_committed == 0).
|
||||
*/
|
||||
io_virt += pfault->mpfault.bytes_committed;
|
||||
bcnt -= pfault->mpfault.bytes_committed;
|
||||
io_virt += *bytes_committed;
|
||||
bcnt -= *bytes_committed;
|
||||
|
||||
start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT;
|
||||
|
||||
|
@ -300,7 +293,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
|
|||
}
|
||||
}
|
||||
srcu_read_unlock(&mib_dev->mr_srcu, srcu_key);
|
||||
pfault->mpfault.bytes_committed = 0;
|
||||
*bytes_committed = 0;
|
||||
return ret ? ret : npages;
|
||||
}
|
||||
|
||||
|
@ -322,8 +315,9 @@ static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
|
|||
* Returns the number of pages loaded if positive, zero for an empty WQE, or a
|
||||
* negative error code.
|
||||
*/
|
||||
static int pagefault_data_segments(struct mlx5_ib_qp *qp,
|
||||
struct mlx5_ib_pfault *pfault, void *wqe,
|
||||
static int pagefault_data_segments(struct mlx5_ib_dev *dev,
|
||||
struct mlx5_pagefault *pfault,
|
||||
struct mlx5_ib_qp *qp, void *wqe,
|
||||
void *wqe_end, u32 *bytes_mapped,
|
||||
u32 *total_wqe_bytes, int receive_queue)
|
||||
{
|
||||
|
@ -367,22 +361,23 @@ static int pagefault_data_segments(struct mlx5_ib_qp *qp,
|
|||
|
||||
if (!inline_segment && total_wqe_bytes) {
|
||||
*total_wqe_bytes += bcnt - min_t(size_t, bcnt,
|
||||
pfault->mpfault.bytes_committed);
|
||||
pfault->bytes_committed);
|
||||
}
|
||||
|
||||
/* A zero length data segment designates a length of 2GB. */
|
||||
if (bcnt == 0)
|
||||
bcnt = 1U << 31;
|
||||
|
||||
if (inline_segment || bcnt <= pfault->mpfault.bytes_committed) {
|
||||
pfault->mpfault.bytes_committed -=
|
||||
if (inline_segment || bcnt <= pfault->bytes_committed) {
|
||||
pfault->bytes_committed -=
|
||||
min_t(size_t, bcnt,
|
||||
pfault->mpfault.bytes_committed);
|
||||
pfault->bytes_committed);
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = pagefault_single_data_segment(qp, pfault, key, io_virt,
|
||||
bcnt, bytes_mapped);
|
||||
ret = pagefault_single_data_segment(dev, key, io_virt, bcnt,
|
||||
&pfault->bytes_committed,
|
||||
bytes_mapped);
|
||||
if (ret < 0)
|
||||
break;
|
||||
npages += ret;
|
||||
|
@ -396,12 +391,11 @@ static int pagefault_data_segments(struct mlx5_ib_qp *qp,
|
|||
* scatter-gather list, and set wqe_end to the end of the WQE.
|
||||
*/
|
||||
static int mlx5_ib_mr_initiator_pfault_handler(
|
||||
struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault,
|
||||
void **wqe, void **wqe_end, int wqe_length)
|
||||
struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault,
|
||||
struct mlx5_ib_qp *qp, void **wqe, void **wqe_end, int wqe_length)
|
||||
{
|
||||
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
|
||||
struct mlx5_wqe_ctrl_seg *ctrl = *wqe;
|
||||
u16 wqe_index = pfault->mpfault.wqe.wqe_index;
|
||||
u16 wqe_index = pfault->wqe.wqe_index;
|
||||
unsigned ds, opcode;
|
||||
#if defined(DEBUG)
|
||||
u32 ctrl_wqe_index, ctrl_qpn;
|
||||
|
@ -502,10 +496,9 @@ static int mlx5_ib_mr_initiator_pfault_handler(
|
|||
* scatter-gather list, and set wqe_end to the end of the WQE.
|
||||
*/
|
||||
static int mlx5_ib_mr_responder_pfault_handler(
|
||||
struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault,
|
||||
void **wqe, void **wqe_end, int wqe_length)
|
||||
struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault,
|
||||
struct mlx5_ib_qp *qp, void **wqe, void **wqe_end, int wqe_length)
|
||||
{
|
||||
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
|
||||
struct mlx5_ib_wq *wq = &qp->rq;
|
||||
int wqe_size = 1 << wq->wqe_shift;
|
||||
|
||||
|
@ -542,70 +535,83 @@ static int mlx5_ib_mr_responder_pfault_handler(
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
|
||||
struct mlx5_ib_pfault *pfault)
|
||||
static struct mlx5_ib_qp *mlx5_ib_odp_find_qp(struct mlx5_ib_dev *dev,
|
||||
u32 wq_num)
|
||||
{
|
||||
struct mlx5_core_qp *mqp = __mlx5_qp_lookup(dev->mdev, wq_num);
|
||||
|
||||
if (!mqp) {
|
||||
mlx5_ib_err(dev, "QPN 0x%6x not found\n", wq_num);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return to_mibqp(mqp);
|
||||
}
|
||||
|
||||
static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
|
||||
struct mlx5_pagefault *pfault)
|
||||
{
|
||||
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
|
||||
int ret;
|
||||
void *wqe, *wqe_end;
|
||||
u32 bytes_mapped, total_wqe_bytes;
|
||||
char *buffer = NULL;
|
||||
int resume_with_error = 0;
|
||||
u16 wqe_index = pfault->mpfault.wqe.wqe_index;
|
||||
int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR;
|
||||
u32 qpn = qp->trans_qp.base.mqp.qpn;
|
||||
int resume_with_error = 1;
|
||||
u16 wqe_index = pfault->wqe.wqe_index;
|
||||
int requestor = pfault->type & MLX5_PFAULT_REQUESTOR;
|
||||
struct mlx5_ib_qp *qp;
|
||||
|
||||
buffer = (char *)__get_free_page(GFP_KERNEL);
|
||||
if (!buffer) {
|
||||
mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
|
||||
resume_with_error = 1;
|
||||
goto resolve_page_fault;
|
||||
}
|
||||
|
||||
qp = mlx5_ib_odp_find_qp(dev, pfault->wqe.wq_num);
|
||||
if (!qp)
|
||||
goto resolve_page_fault;
|
||||
|
||||
ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
|
||||
PAGE_SIZE, &qp->trans_qp.base);
|
||||
if (ret < 0) {
|
||||
mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n",
|
||||
-ret, wqe_index, qpn);
|
||||
resume_with_error = 1;
|
||||
mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%d, wqe_index=%x, qpn=%x\n",
|
||||
ret, wqe_index, pfault->token);
|
||||
goto resolve_page_fault;
|
||||
}
|
||||
|
||||
wqe = buffer;
|
||||
if (requestor)
|
||||
ret = mlx5_ib_mr_initiator_pfault_handler(qp, pfault, &wqe,
|
||||
ret = mlx5_ib_mr_initiator_pfault_handler(dev, pfault, qp, &wqe,
|
||||
&wqe_end, ret);
|
||||
else
|
||||
ret = mlx5_ib_mr_responder_pfault_handler(qp, pfault, &wqe,
|
||||
ret = mlx5_ib_mr_responder_pfault_handler(dev, pfault, qp, &wqe,
|
||||
&wqe_end, ret);
|
||||
if (ret < 0) {
|
||||
resume_with_error = 1;
|
||||
if (ret < 0)
|
||||
goto resolve_page_fault;
|
||||
}
|
||||
|
||||
if (wqe >= wqe_end) {
|
||||
mlx5_ib_err(dev, "ODP fault on invalid WQE.\n");
|
||||
resume_with_error = 1;
|
||||
goto resolve_page_fault;
|
||||
}
|
||||
|
||||
ret = pagefault_data_segments(qp, pfault, wqe, wqe_end, &bytes_mapped,
|
||||
&total_wqe_bytes, !requestor);
|
||||
ret = pagefault_data_segments(dev, pfault, qp, wqe, wqe_end,
|
||||
&bytes_mapped, &total_wqe_bytes,
|
||||
!requestor);
|
||||
if (ret == -EAGAIN) {
|
||||
resume_with_error = 0;
|
||||
goto resolve_page_fault;
|
||||
} else if (ret < 0 || total_wqe_bytes > bytes_mapped) {
|
||||
mlx5_ib_err(dev, "Error getting user pages for page fault. Error: 0x%x\n",
|
||||
-ret);
|
||||
resume_with_error = 1;
|
||||
if (ret != -ENOENT)
|
||||
mlx5_ib_err(dev, "Error getting user pages for page fault. Error: %d\n",
|
||||
ret);
|
||||
goto resolve_page_fault;
|
||||
}
|
||||
|
||||
resume_with_error = 0;
|
||||
resolve_page_fault:
|
||||
mlx5_ib_page_fault_resume(qp, pfault, resume_with_error);
|
||||
mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n",
|
||||
qpn, resume_with_error,
|
||||
pfault->mpfault.flags);
|
||||
|
||||
mlx5_ib_page_fault_resume(dev, pfault, resume_with_error);
|
||||
mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n",
|
||||
pfault->token, resume_with_error,
|
||||
pfault->type);
|
||||
free_page((unsigned long)buffer);
|
||||
}
|
||||
|
||||
|
@ -615,15 +621,14 @@ static int pages_in_range(u64 address, u32 length)
|
|||
(address & PAGE_MASK)) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
|
||||
struct mlx5_ib_pfault *pfault)
|
||||
static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
|
||||
struct mlx5_pagefault *pfault)
|
||||
{
|
||||
struct mlx5_pagefault *mpfault = &pfault->mpfault;
|
||||
u64 address;
|
||||
u32 length;
|
||||
u32 prefetch_len = mpfault->bytes_committed;
|
||||
u32 prefetch_len = pfault->bytes_committed;
|
||||
int prefetch_activated = 0;
|
||||
u32 rkey = mpfault->rdma.r_key;
|
||||
u32 rkey = pfault->rdma.r_key;
|
||||
int ret;
|
||||
|
||||
/* The RDMA responder handler handles the page fault in two parts.
|
||||
|
@ -632,38 +637,40 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
|
|||
* prefetches more pages. The second operation cannot use the pfault
|
||||
* context and therefore uses the dummy_pfault context allocated on
|
||||
* the stack */
|
||||
struct mlx5_ib_pfault dummy_pfault = {};
|
||||
pfault->rdma.rdma_va += pfault->bytes_committed;
|
||||
pfault->rdma.rdma_op_len -= min(pfault->bytes_committed,
|
||||
pfault->rdma.rdma_op_len);
|
||||
pfault->bytes_committed = 0;
|
||||
|
||||
dummy_pfault.mpfault.bytes_committed = 0;
|
||||
|
||||
mpfault->rdma.rdma_va += mpfault->bytes_committed;
|
||||
mpfault->rdma.rdma_op_len -= min(mpfault->bytes_committed,
|
||||
mpfault->rdma.rdma_op_len);
|
||||
mpfault->bytes_committed = 0;
|
||||
|
||||
address = mpfault->rdma.rdma_va;
|
||||
length = mpfault->rdma.rdma_op_len;
|
||||
address = pfault->rdma.rdma_va;
|
||||
length = pfault->rdma.rdma_op_len;
|
||||
|
||||
/* For some operations, the hardware cannot tell the exact message
|
||||
* length, and in those cases it reports zero. Use prefetch
|
||||
* logic. */
|
||||
if (length == 0) {
|
||||
prefetch_activated = 1;
|
||||
length = mpfault->rdma.packet_size;
|
||||
length = pfault->rdma.packet_size;
|
||||
prefetch_len = min(MAX_PREFETCH_LEN, prefetch_len);
|
||||
}
|
||||
|
||||
ret = pagefault_single_data_segment(qp, pfault, rkey, address, length,
|
||||
NULL);
|
||||
ret = pagefault_single_data_segment(dev, rkey, address, length,
|
||||
&pfault->bytes_committed, NULL);
|
||||
if (ret == -EAGAIN) {
|
||||
/* We're racing with an invalidation, don't prefetch */
|
||||
prefetch_activated = 0;
|
||||
} else if (ret < 0 || pages_in_range(address, length) > ret) {
|
||||
mlx5_ib_page_fault_resume(qp, pfault, 1);
|
||||
mlx5_ib_page_fault_resume(dev, pfault, 1);
|
||||
if (ret != -ENOENT)
|
||||
mlx5_ib_warn(dev, "PAGE FAULT error %d. QP 0x%x, type: 0x%x\n",
|
||||
ret, pfault->token, pfault->type);
|
||||
return;
|
||||
}
|
||||
|
||||
mlx5_ib_page_fault_resume(qp, pfault, 0);
|
||||
mlx5_ib_page_fault_resume(dev, pfault, 0);
|
||||
mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x, type: 0x%x, prefetch_activated: %d\n",
|
||||
pfault->token, pfault->type,
|
||||
prefetch_activated);
|
||||
|
||||
/* At this point, there might be a new pagefault already arriving in
|
||||
* the eq, switch to the dummy pagefault for the rest of the
|
||||
|
@ -671,112 +678,39 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
|
|||
* work-queue is being fenced. */
|
||||
|
||||
if (prefetch_activated) {
|
||||
ret = pagefault_single_data_segment(qp, &dummy_pfault, rkey,
|
||||
address,
|
||||
u32 bytes_committed = 0;
|
||||
|
||||
ret = pagefault_single_data_segment(dev, rkey, address,
|
||||
prefetch_len,
|
||||
NULL);
|
||||
&bytes_committed, NULL);
|
||||
if (ret < 0) {
|
||||
pr_warn("Prefetch failed (ret = %d, prefetch_activated = %d) for QPN %d, address: 0x%.16llx, length = 0x%.16x\n",
|
||||
ret, prefetch_activated,
|
||||
qp->ibqp.qp_num, address, prefetch_len);
|
||||
mlx5_ib_warn(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n",
|
||||
ret, pfault->token, address,
|
||||
prefetch_len);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
|
||||
struct mlx5_ib_pfault *pfault)
|
||||
void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
|
||||
struct mlx5_pagefault *pfault)
|
||||
{
|
||||
u8 event_subtype = pfault->mpfault.event_subtype;
|
||||
struct mlx5_ib_dev *dev = context;
|
||||
u8 event_subtype = pfault->event_subtype;
|
||||
|
||||
switch (event_subtype) {
|
||||
case MLX5_PFAULT_SUBTYPE_WQE:
|
||||
mlx5_ib_mr_wqe_pfault_handler(qp, pfault);
|
||||
mlx5_ib_mr_wqe_pfault_handler(dev, pfault);
|
||||
break;
|
||||
case MLX5_PFAULT_SUBTYPE_RDMA:
|
||||
mlx5_ib_mr_rdma_pfault_handler(qp, pfault);
|
||||
mlx5_ib_mr_rdma_pfault_handler(dev, pfault);
|
||||
break;
|
||||
default:
|
||||
pr_warn("Invalid page fault event subtype: 0x%x\n",
|
||||
event_subtype);
|
||||
mlx5_ib_page_fault_resume(qp, pfault, 1);
|
||||
break;
|
||||
mlx5_ib_err(dev, "Invalid page fault event subtype: 0x%x\n",
|
||||
event_subtype);
|
||||
mlx5_ib_page_fault_resume(dev, pfault, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void mlx5_ib_qp_pfault_action(struct work_struct *work)
|
||||
{
|
||||
struct mlx5_ib_pfault *pfault = container_of(work,
|
||||
struct mlx5_ib_pfault,
|
||||
work);
|
||||
enum mlx5_ib_pagefault_context context =
|
||||
mlx5_ib_get_pagefault_context(&pfault->mpfault);
|
||||
struct mlx5_ib_qp *qp = container_of(pfault, struct mlx5_ib_qp,
|
||||
pagefaults[context]);
|
||||
mlx5_ib_mr_pfault_handler(qp, pfault);
|
||||
}
|
||||
|
||||
void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&qp->disable_page_faults_lock, flags);
|
||||
qp->disable_page_faults = 1;
|
||||
spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags);
|
||||
|
||||
/*
|
||||
* Note that at this point, we are guarenteed that no more
|
||||
* work queue elements will be posted to the work queue with
|
||||
* the QP we are closing.
|
||||
*/
|
||||
flush_workqueue(mlx5_ib_page_fault_wq);
|
||||
}
|
||||
|
||||
void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&qp->disable_page_faults_lock, flags);
|
||||
qp->disable_page_faults = 0;
|
||||
spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags);
|
||||
}
|
||||
|
||||
static void mlx5_ib_pfault_handler(struct mlx5_core_qp *qp,
|
||||
struct mlx5_pagefault *pfault)
|
||||
{
|
||||
/*
|
||||
* Note that we will only get one fault event per QP per context
|
||||
* (responder/initiator, read/write), until we resolve the page fault
|
||||
* with the mlx5_ib_page_fault_resume command. Since this function is
|
||||
* called from within the work element, there is no risk of missing
|
||||
* events.
|
||||
*/
|
||||
struct mlx5_ib_qp *mibqp = to_mibqp(qp);
|
||||
enum mlx5_ib_pagefault_context context =
|
||||
mlx5_ib_get_pagefault_context(pfault);
|
||||
struct mlx5_ib_pfault *qp_pfault = &mibqp->pagefaults[context];
|
||||
|
||||
qp_pfault->mpfault = *pfault;
|
||||
|
||||
/* No need to stop interrupts here since we are in an interrupt */
|
||||
spin_lock(&mibqp->disable_page_faults_lock);
|
||||
if (!mibqp->disable_page_faults)
|
||||
queue_work(mlx5_ib_page_fault_wq, &qp_pfault->work);
|
||||
spin_unlock(&mibqp->disable_page_faults_lock);
|
||||
}
|
||||
|
||||
void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)
|
||||
{
|
||||
int i;
|
||||
|
||||
qp->disable_page_faults = 1;
|
||||
spin_lock_init(&qp->disable_page_faults_lock);
|
||||
|
||||
qp->trans_qp.base.mqp.pfault_handler = mlx5_ib_pfault_handler;
|
||||
|
||||
for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i)
|
||||
INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action);
|
||||
}
|
||||
|
||||
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev)
|
||||
{
|
||||
int ret;
|
||||
|
@ -793,17 +727,3 @@ void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)
|
|||
cleanup_srcu_struct(&ibdev->mr_srcu);
|
||||
}
|
||||
|
||||
int __init mlx5_ib_odp_init(void)
|
||||
{
|
||||
mlx5_ib_page_fault_wq = alloc_ordered_workqueue("mlx5_ib_page_faults",
|
||||
WQ_MEM_RECLAIM);
|
||||
if (!mlx5_ib_page_fault_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void mlx5_ib_odp_cleanup(void)
|
||||
{
|
||||
destroy_workqueue(mlx5_ib_page_fault_wq);
|
||||
}
|
||||
|
|
|
@ -1526,9 +1526,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
|
|||
&qp->raw_packet_qp.rq.base :
|
||||
&qp->trans_qp.base;
|
||||
|
||||
if (init_attr->qp_type != IB_QPT_RAW_PACKET)
|
||||
mlx5_ib_odp_create_qp(qp);
|
||||
|
||||
mutex_init(&qp->mutex);
|
||||
spin_lock_init(&qp->sq.lock);
|
||||
spin_lock_init(&qp->rq.lock);
|
||||
|
@ -1923,7 +1920,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
|
|||
|
||||
if (qp->state != IB_QPS_RESET) {
|
||||
if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
|
||||
mlx5_ib_qp_disable_pagefaults(qp);
|
||||
err = mlx5_core_qp_modify(dev->mdev,
|
||||
MLX5_CMD_OP_2RST_QP, 0,
|
||||
NULL, &base->mqp);
|
||||
|
@ -2823,16 +2819,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
|
|||
if (mlx5_st < 0)
|
||||
goto out;
|
||||
|
||||
/* If moving to a reset or error state, we must disable page faults on
|
||||
* this QP and flush all current page faults. Otherwise a stale page
|
||||
* fault may attempt to work on this QP after it is reset and moved
|
||||
* again to RTS, and may cause the driver and the device to get out of
|
||||
* sync. */
|
||||
if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
|
||||
(new_state == IB_QPS_RESET || new_state == IB_QPS_ERR) &&
|
||||
(qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
|
||||
mlx5_ib_qp_disable_pagefaults(qp);
|
||||
|
||||
if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
|
||||
!optab[mlx5_cur][mlx5_new])
|
||||
goto out;
|
||||
|
@ -2864,10 +2850,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
|
|||
if (err)
|
||||
goto out;
|
||||
|
||||
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT &&
|
||||
(qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
|
||||
mlx5_ib_qp_enable_pagefaults(qp);
|
||||
|
||||
qp->state = new_state;
|
||||
|
||||
if (attr_mask & IB_QP_ACCESS_FLAGS)
|
||||
|
@ -4533,14 +4515,6 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
|
|||
return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
|
||||
qp_init_attr);
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
/*
|
||||
* Wait for any outstanding page faults, in case the user frees memory
|
||||
* based upon this query's result.
|
||||
*/
|
||||
flush_workqueue(mlx5_ib_page_fault_wq);
|
||||
#endif
|
||||
|
||||
mutex_lock(&qp->mutex);
|
||||
|
||||
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
|
||||
|
|
|
@ -71,6 +71,16 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
|
|||
if (dev_ctx->context) {
|
||||
spin_lock_irq(&priv->ctx_lock);
|
||||
list_add_tail(&dev_ctx->list, &priv->ctx_list);
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
if (dev_ctx->intf->pfault) {
|
||||
if (priv->pfault) {
|
||||
mlx5_core_err(dev, "multiple page fault handlers not supported");
|
||||
} else {
|
||||
priv->pfault_ctx = dev_ctx->context;
|
||||
priv->pfault = dev_ctx->intf->pfault;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
spin_unlock_irq(&priv->ctx_lock);
|
||||
} else {
|
||||
kfree(dev_ctx);
|
||||
|
@ -97,6 +107,15 @@ void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
|
|||
if (!dev_ctx)
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
spin_lock_irq(&priv->ctx_lock);
|
||||
if (priv->pfault == dev_ctx->intf->pfault)
|
||||
priv->pfault = NULL;
|
||||
spin_unlock_irq(&priv->ctx_lock);
|
||||
|
||||
synchronize_srcu(&priv->pfault_srcu);
|
||||
#endif
|
||||
|
||||
spin_lock_irq(&priv->ctx_lock);
|
||||
list_del(&dev_ctx->list);
|
||||
spin_unlock_irq(&priv->ctx_lock);
|
||||
|
@ -329,6 +348,20 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
|
|||
spin_unlock_irqrestore(&priv->ctx_lock, flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
void mlx5_core_page_fault(struct mlx5_core_dev *dev,
|
||||
struct mlx5_pagefault *pfault)
|
||||
{
|
||||
struct mlx5_priv *priv = &dev->priv;
|
||||
int srcu_idx;
|
||||
|
||||
srcu_idx = srcu_read_lock(&priv->pfault_srcu);
|
||||
if (priv->pfault)
|
||||
priv->pfault(dev, priv->pfault_ctx, pfault);
|
||||
srcu_read_unlock(&priv->pfault_srcu, srcu_idx);
|
||||
}
|
||||
#endif
|
||||
|
||||
void mlx5_dev_list_lock(void)
|
||||
{
|
||||
mutex_lock(&mlx5_intf_mutex);
|
||||
|
|
|
@ -54,6 +54,7 @@ enum {
|
|||
MLX5_NUM_SPARE_EQE = 0x80,
|
||||
MLX5_NUM_ASYNC_EQE = 0x100,
|
||||
MLX5_NUM_CMD_EQE = 32,
|
||||
MLX5_NUM_PF_DRAIN = 64,
|
||||
};
|
||||
|
||||
enum {
|
||||
|
@ -188,10 +189,193 @@ static void eq_update_ci(struct mlx5_eq *eq, int arm)
|
|||
mb();
|
||||
}
|
||||
|
||||
static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
static void eqe_pf_action(struct work_struct *work)
|
||||
{
|
||||
struct mlx5_pagefault *pfault = container_of(work,
|
||||
struct mlx5_pagefault,
|
||||
work);
|
||||
struct mlx5_eq *eq = pfault->eq;
|
||||
|
||||
mlx5_core_page_fault(eq->dev, pfault);
|
||||
mempool_free(pfault, eq->pf_ctx.pool);
|
||||
}
|
||||
|
||||
static void eq_pf_process(struct mlx5_eq *eq)
|
||||
{
|
||||
struct mlx5_core_dev *dev = eq->dev;
|
||||
struct mlx5_eqe_page_fault *pf_eqe;
|
||||
struct mlx5_pagefault *pfault;
|
||||
struct mlx5_eqe *eqe;
|
||||
int set_ci = 0;
|
||||
|
||||
while ((eqe = next_eqe_sw(eq))) {
|
||||
pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC);
|
||||
if (!pfault) {
|
||||
schedule_work(&eq->pf_ctx.work);
|
||||
break;
|
||||
}
|
||||
|
||||
dma_rmb();
|
||||
pf_eqe = &eqe->data.page_fault;
|
||||
pfault->event_subtype = eqe->sub_type;
|
||||
pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
|
||||
|
||||
mlx5_core_dbg(dev,
|
||||
"PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
|
||||
eqe->sub_type, pfault->bytes_committed);
|
||||
|
||||
switch (eqe->sub_type) {
|
||||
case MLX5_PFAULT_SUBTYPE_RDMA:
|
||||
/* RDMA based event */
|
||||
pfault->type =
|
||||
be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
|
||||
pfault->token =
|
||||
be32_to_cpu(pf_eqe->rdma.pftype_token) &
|
||||
MLX5_24BIT_MASK;
|
||||
pfault->rdma.r_key =
|
||||
be32_to_cpu(pf_eqe->rdma.r_key);
|
||||
pfault->rdma.packet_size =
|
||||
be16_to_cpu(pf_eqe->rdma.packet_length);
|
||||
pfault->rdma.rdma_op_len =
|
||||
be32_to_cpu(pf_eqe->rdma.rdma_op_len);
|
||||
pfault->rdma.rdma_va =
|
||||
be64_to_cpu(pf_eqe->rdma.rdma_va);
|
||||
mlx5_core_dbg(dev,
|
||||
"PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
|
||||
pfault->type, pfault->token,
|
||||
pfault->rdma.r_key);
|
||||
mlx5_core_dbg(dev,
|
||||
"PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
|
||||
pfault->rdma.rdma_op_len,
|
||||
pfault->rdma.rdma_va);
|
||||
break;
|
||||
|
||||
case MLX5_PFAULT_SUBTYPE_WQE:
|
||||
/* WQE based event */
|
||||
pfault->type =
|
||||
be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24;
|
||||
pfault->token =
|
||||
be32_to_cpu(pf_eqe->wqe.token);
|
||||
pfault->wqe.wq_num =
|
||||
be32_to_cpu(pf_eqe->wqe.pftype_wq) &
|
||||
MLX5_24BIT_MASK;
|
||||
pfault->wqe.wqe_index =
|
||||
be16_to_cpu(pf_eqe->wqe.wqe_index);
|
||||
pfault->wqe.packet_size =
|
||||
be16_to_cpu(pf_eqe->wqe.packet_length);
|
||||
mlx5_core_dbg(dev,
|
||||
"PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
|
||||
pfault->type, pfault->token,
|
||||
pfault->wqe.wq_num,
|
||||
pfault->wqe.wqe_index);
|
||||
break;
|
||||
|
||||
default:
|
||||
mlx5_core_warn(dev,
|
||||
"Unsupported page fault event sub-type: 0x%02hhx\n",
|
||||
eqe->sub_type);
|
||||
/* Unsupported page faults should still be
|
||||
* resolved by the page fault handler
|
||||
*/
|
||||
}
|
||||
|
||||
pfault->eq = eq;
|
||||
INIT_WORK(&pfault->work, eqe_pf_action);
|
||||
queue_work(eq->pf_ctx.wq, &pfault->work);
|
||||
|
||||
++eq->cons_index;
|
||||
++set_ci;
|
||||
|
||||
if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
|
||||
eq_update_ci(eq, 0);
|
||||
set_ci = 0;
|
||||
}
|
||||
}
|
||||
|
||||
eq_update_ci(eq, 1);
|
||||
}
|
||||
|
||||
static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr)
|
||||
{
|
||||
struct mlx5_eq *eq = eq_ptr;
|
||||
unsigned long flags;
|
||||
|
||||
if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) {
|
||||
eq_pf_process(eq);
|
||||
spin_unlock_irqrestore(&eq->pf_ctx.lock, flags);
|
||||
} else {
|
||||
schedule_work(&eq->pf_ctx.work);
|
||||
}
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
/* mempool_refill() was proposed but unfortunately wasn't accepted
|
||||
* http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
|
||||
* Chip workaround.
|
||||
*/
|
||||
static void mempool_refill(mempool_t *pool)
|
||||
{
|
||||
while (pool->curr_nr < pool->min_nr)
|
||||
mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
|
||||
}
|
||||
|
||||
static void eq_pf_action(struct work_struct *work)
|
||||
{
|
||||
struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work);
|
||||
|
||||
mempool_refill(eq->pf_ctx.pool);
|
||||
|
||||
spin_lock_irq(&eq->pf_ctx.lock);
|
||||
eq_pf_process(eq);
|
||||
spin_unlock_irq(&eq->pf_ctx.lock);
|
||||
}
|
||||
|
||||
static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name)
|
||||
{
|
||||
spin_lock_init(&pf_ctx->lock);
|
||||
INIT_WORK(&pf_ctx->work, eq_pf_action);
|
||||
|
||||
pf_ctx->wq = alloc_ordered_workqueue(name,
|
||||
WQ_MEM_RECLAIM);
|
||||
if (!pf_ctx->wq)
|
||||
return -ENOMEM;
|
||||
|
||||
pf_ctx->pool = mempool_create_kmalloc_pool
|
||||
(MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault));
|
||||
if (!pf_ctx->pool)
|
||||
goto err_wq;
|
||||
|
||||
return 0;
|
||||
err_wq:
|
||||
destroy_workqueue(pf_ctx->wq);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
|
||||
u32 wq_num, u8 type, int error)
|
||||
{
|
||||
u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
|
||||
u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0};
|
||||
|
||||
MLX5_SET(page_fault_resume_in, in, opcode,
|
||||
MLX5_CMD_OP_PAGE_FAULT_RESUME);
|
||||
MLX5_SET(page_fault_resume_in, in, error, !!error);
|
||||
MLX5_SET(page_fault_resume_in, in, page_fault_type, type);
|
||||
MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
|
||||
MLX5_SET(page_fault_resume_in, in, token, token);
|
||||
|
||||
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
|
||||
#endif
|
||||
|
||||
static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
|
||||
{
|
||||
struct mlx5_eq *eq = eq_ptr;
|
||||
struct mlx5_core_dev *dev = eq->dev;
|
||||
struct mlx5_eqe *eqe;
|
||||
int eqes_found = 0;
|
||||
int set_ci = 0;
|
||||
u32 cqn = -1;
|
||||
u32 rsn;
|
||||
|
@ -276,12 +460,6 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
|
|||
}
|
||||
break;
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
case MLX5_EVENT_TYPE_PAGE_FAULT:
|
||||
mlx5_eq_pagefault(dev, eqe);
|
||||
break;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MLX5_CORE_EN
|
||||
case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
|
||||
mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
|
||||
|
@ -299,7 +477,6 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
|
|||
}
|
||||
|
||||
++eq->cons_index;
|
||||
eqes_found = 1;
|
||||
++set_ci;
|
||||
|
||||
/* The HCA will think the queue has overflowed if we
|
||||
|
@ -319,17 +496,6 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
|
|||
if (cqn != -1)
|
||||
tasklet_schedule(&eq->tasklet_ctx.task);
|
||||
|
||||
return eqes_found;
|
||||
}
|
||||
|
||||
static irqreturn_t mlx5_msix_handler(int irq, void *eq_ptr)
|
||||
{
|
||||
struct mlx5_eq *eq = eq_ptr;
|
||||
struct mlx5_core_dev *dev = eq->dev;
|
||||
|
||||
mlx5_eq_int(dev, eq);
|
||||
|
||||
/* MSI-X vectors always belong to us */
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
|
@ -345,22 +511,32 @@ static void init_eq_buf(struct mlx5_eq *eq)
|
|||
}
|
||||
|
||||
int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
|
||||
int nent, u64 mask, const char *name, struct mlx5_uar *uar)
|
||||
int nent, u64 mask, const char *name,
|
||||
struct mlx5_uar *uar, enum mlx5_eq_type type)
|
||||
{
|
||||
u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
|
||||
struct mlx5_priv *priv = &dev->priv;
|
||||
irq_handler_t handler;
|
||||
__be64 *pas;
|
||||
void *eqc;
|
||||
int inlen;
|
||||
u32 *in;
|
||||
int err;
|
||||
|
||||
eq->type = type;
|
||||
eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
|
||||
eq->cons_index = 0;
|
||||
err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
if (type == MLX5_EQ_TYPE_PF)
|
||||
handler = mlx5_eq_pf_int;
|
||||
else
|
||||
#endif
|
||||
handler = mlx5_eq_int;
|
||||
|
||||
init_eq_buf(eq);
|
||||
|
||||
inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
|
||||
|
@ -396,7 +572,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
|
|||
eq->irqn = priv->msix_arr[vecidx].vector;
|
||||
eq->dev = dev;
|
||||
eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET;
|
||||
err = request_irq(eq->irqn, mlx5_msix_handler, 0,
|
||||
err = request_irq(eq->irqn, handler, 0,
|
||||
priv->irq_info[vecidx].name, eq);
|
||||
if (err)
|
||||
goto err_eq;
|
||||
|
@ -405,11 +581,20 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
|
|||
if (err)
|
||||
goto err_irq;
|
||||
|
||||
INIT_LIST_HEAD(&eq->tasklet_ctx.list);
|
||||
INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
|
||||
spin_lock_init(&eq->tasklet_ctx.lock);
|
||||
tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
|
||||
(unsigned long)&eq->tasklet_ctx);
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
if (type == MLX5_EQ_TYPE_PF) {
|
||||
err = init_pf_ctx(&eq->pf_ctx, name);
|
||||
if (err)
|
||||
goto err_irq;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
INIT_LIST_HEAD(&eq->tasklet_ctx.list);
|
||||
INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
|
||||
spin_lock_init(&eq->tasklet_ctx.lock);
|
||||
tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
|
||||
(unsigned long)&eq->tasklet_ctx);
|
||||
}
|
||||
|
||||
/* EQs are created in ARMED state
|
||||
*/
|
||||
|
@ -444,7 +629,16 @@ int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
|
|||
mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
|
||||
eq->eqn);
|
||||
synchronize_irq(eq->irqn);
|
||||
tasklet_disable(&eq->tasklet_ctx.task);
|
||||
|
||||
if (eq->type == MLX5_EQ_TYPE_COMP) {
|
||||
tasklet_disable(&eq->tasklet_ctx.task);
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
} else if (eq->type == MLX5_EQ_TYPE_PF) {
|
||||
cancel_work_sync(&eq->pf_ctx.work);
|
||||
destroy_workqueue(eq->pf_ctx.wq);
|
||||
mempool_destroy(eq->pf_ctx.pool);
|
||||
#endif
|
||||
}
|
||||
mlx5_buf_free(dev, &eq->buf);
|
||||
|
||||
return err;
|
||||
|
@ -479,8 +673,6 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
|
|||
u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
|
||||
int err;
|
||||
|
||||
if (MLX5_CAP_GEN(dev, pg))
|
||||
async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT);
|
||||
|
||||
if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
|
||||
MLX5_CAP_GEN(dev, vport_group_manager) &&
|
||||
|
@ -494,7 +686,8 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
|
|||
|
||||
err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
|
||||
MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
|
||||
"mlx5_cmd_eq", &dev->priv.uuari.uars[0]);
|
||||
"mlx5_cmd_eq", &dev->priv.uuari.uars[0],
|
||||
MLX5_EQ_TYPE_ASYNC);
|
||||
if (err) {
|
||||
mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
|
||||
return err;
|
||||
|
@ -504,7 +697,8 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
|
|||
|
||||
err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
|
||||
MLX5_NUM_ASYNC_EQE, async_event_mask,
|
||||
"mlx5_async_eq", &dev->priv.uuari.uars[0]);
|
||||
"mlx5_async_eq", &dev->priv.uuari.uars[0],
|
||||
MLX5_EQ_TYPE_ASYNC);
|
||||
if (err) {
|
||||
mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
|
||||
goto err1;
|
||||
|
@ -514,13 +708,35 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
|
|||
MLX5_EQ_VEC_PAGES,
|
||||
/* TODO: sriov max_vf + */ 1,
|
||||
1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
|
||||
&dev->priv.uuari.uars[0]);
|
||||
&dev->priv.uuari.uars[0],
|
||||
MLX5_EQ_TYPE_ASYNC);
|
||||
if (err) {
|
||||
mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
|
||||
goto err2;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
if (MLX5_CAP_GEN(dev, pg)) {
|
||||
err = mlx5_create_map_eq(dev, &table->pfault_eq,
|
||||
MLX5_EQ_VEC_PFAULT,
|
||||
MLX5_NUM_ASYNC_EQE,
|
||||
1 << MLX5_EVENT_TYPE_PAGE_FAULT,
|
||||
"mlx5_page_fault_eq",
|
||||
&dev->priv.uuari.uars[0],
|
||||
MLX5_EQ_TYPE_PF);
|
||||
if (err) {
|
||||
mlx5_core_warn(dev, "failed to create page fault EQ %d\n",
|
||||
err);
|
||||
goto err3;
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
err3:
|
||||
mlx5_destroy_unmap_eq(dev, &table->pages_eq);
|
||||
#else
|
||||
return err;
|
||||
#endif
|
||||
|
||||
err2:
|
||||
mlx5_destroy_unmap_eq(dev, &table->async_eq);
|
||||
|
@ -536,6 +752,14 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev)
|
|||
struct mlx5_eq_table *table = &dev->priv.eq_table;
|
||||
int err;
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
if (MLX5_CAP_GEN(dev, pg)) {
|
||||
err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
|
||||
err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
|
||||
if (err)
|
||||
return err;
|
||||
|
|
|
@ -753,7 +753,8 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev)
|
|||
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
|
||||
err = mlx5_create_map_eq(dev, eq,
|
||||
i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
|
||||
name, &dev->priv.uuari.uars[0]);
|
||||
name, &dev->priv.uuari.uars[0],
|
||||
MLX5_EQ_TYPE_COMP);
|
||||
if (err) {
|
||||
kfree(eq);
|
||||
goto clean;
|
||||
|
@ -1295,10 +1296,19 @@ static int init_one(struct pci_dev *pdev,
|
|||
spin_lock_init(&priv->ctx_lock);
|
||||
mutex_init(&dev->pci_status_mutex);
|
||||
mutex_init(&dev->intf_state_mutex);
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
err = init_srcu_struct(&priv->pfault_srcu);
|
||||
if (err) {
|
||||
dev_err(&pdev->dev, "init_srcu_struct failed with error code %d\n",
|
||||
err);
|
||||
goto clean_dev;
|
||||
}
|
||||
#endif
|
||||
err = mlx5_pci_init(dev, priv);
|
||||
if (err) {
|
||||
dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err);
|
||||
goto clean_dev;
|
||||
goto clean_srcu;
|
||||
}
|
||||
|
||||
err = mlx5_health_init(dev);
|
||||
|
@ -1332,7 +1342,11 @@ static int init_one(struct pci_dev *pdev,
|
|||
mlx5_health_cleanup(dev);
|
||||
close_pci:
|
||||
mlx5_pci_close(dev, priv);
|
||||
clean_srcu:
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
cleanup_srcu_struct(&priv->pfault_srcu);
|
||||
clean_dev:
|
||||
#endif
|
||||
pci_set_drvdata(pdev, NULL);
|
||||
devlink_free(devlink);
|
||||
|
||||
|
@ -1357,6 +1371,9 @@ static void remove_one(struct pci_dev *pdev)
|
|||
mlx5_pagealloc_cleanup(dev);
|
||||
mlx5_health_cleanup(dev);
|
||||
mlx5_pci_close(dev, priv);
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
cleanup_srcu_struct(&priv->pfault_srcu);
|
||||
#endif
|
||||
pci_set_drvdata(pdev, NULL);
|
||||
devlink_free(devlink);
|
||||
}
|
||||
|
|
|
@ -86,6 +86,8 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev);
|
|||
int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
|
||||
void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
|
||||
unsigned long param);
|
||||
void mlx5_core_page_fault(struct mlx5_core_dev *dev,
|
||||
struct mlx5_pagefault *pfault);
|
||||
void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
|
||||
void mlx5_enter_error_state(struct mlx5_core_dev *dev);
|
||||
void mlx5_disable_device(struct mlx5_core_dev *dev);
|
||||
|
|
|
@ -143,95 +143,6 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
|
|||
mlx5_core_put_rsc(common);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
|
||||
{
|
||||
struct mlx5_eqe_page_fault *pf_eqe = &eqe->data.page_fault;
|
||||
int qpn = be32_to_cpu(pf_eqe->flags_qpn) & MLX5_QPN_MASK;
|
||||
struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, qpn);
|
||||
struct mlx5_core_qp *qp =
|
||||
container_of(common, struct mlx5_core_qp, common);
|
||||
struct mlx5_pagefault pfault;
|
||||
|
||||
if (!qp) {
|
||||
mlx5_core_warn(dev, "ODP event for non-existent QP %06x\n",
|
||||
qpn);
|
||||
return;
|
||||
}
|
||||
|
||||
pfault.event_subtype = eqe->sub_type;
|
||||
pfault.flags = (be32_to_cpu(pf_eqe->flags_qpn) >> MLX5_QPN_BITS) &
|
||||
(MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE | MLX5_PFAULT_RDMA);
|
||||
pfault.bytes_committed = be32_to_cpu(
|
||||
pf_eqe->bytes_committed);
|
||||
|
||||
mlx5_core_dbg(dev,
|
||||
"PAGE_FAULT: subtype: 0x%02x, flags: 0x%02x,\n",
|
||||
eqe->sub_type, pfault.flags);
|
||||
|
||||
switch (eqe->sub_type) {
|
||||
case MLX5_PFAULT_SUBTYPE_RDMA:
|
||||
/* RDMA based event */
|
||||
pfault.rdma.r_key =
|
||||
be32_to_cpu(pf_eqe->rdma.r_key);
|
||||
pfault.rdma.packet_size =
|
||||
be16_to_cpu(pf_eqe->rdma.packet_length);
|
||||
pfault.rdma.rdma_op_len =
|
||||
be32_to_cpu(pf_eqe->rdma.rdma_op_len);
|
||||
pfault.rdma.rdma_va =
|
||||
be64_to_cpu(pf_eqe->rdma.rdma_va);
|
||||
mlx5_core_dbg(dev,
|
||||
"PAGE_FAULT: qpn: 0x%06x, r_key: 0x%08x,\n",
|
||||
qpn, pfault.rdma.r_key);
|
||||
mlx5_core_dbg(dev,
|
||||
"PAGE_FAULT: rdma_op_len: 0x%08x,\n",
|
||||
pfault.rdma.rdma_op_len);
|
||||
mlx5_core_dbg(dev,
|
||||
"PAGE_FAULT: rdma_va: 0x%016llx,\n",
|
||||
pfault.rdma.rdma_va);
|
||||
mlx5_core_dbg(dev,
|
||||
"PAGE_FAULT: bytes_committed: 0x%06x\n",
|
||||
pfault.bytes_committed);
|
||||
break;
|
||||
|
||||
case MLX5_PFAULT_SUBTYPE_WQE:
|
||||
/* WQE based event */
|
||||
pfault.wqe.wqe_index =
|
||||
be16_to_cpu(pf_eqe->wqe.wqe_index);
|
||||
pfault.wqe.packet_size =
|
||||
be16_to_cpu(pf_eqe->wqe.packet_length);
|
||||
mlx5_core_dbg(dev,
|
||||
"PAGE_FAULT: qpn: 0x%06x, wqe_index: 0x%04x,\n",
|
||||
qpn, pfault.wqe.wqe_index);
|
||||
mlx5_core_dbg(dev,
|
||||
"PAGE_FAULT: bytes_committed: 0x%06x\n",
|
||||
pfault.bytes_committed);
|
||||
break;
|
||||
|
||||
default:
|
||||
mlx5_core_warn(dev,
|
||||
"Unsupported page fault event sub-type: 0x%02hhx, QP %06x\n",
|
||||
eqe->sub_type, qpn);
|
||||
/* Unsupported page faults should still be resolved by the
|
||||
* page fault handler
|
||||
*/
|
||||
}
|
||||
|
||||
if (qp->pfault_handler) {
|
||||
qp->pfault_handler(qp, &pfault);
|
||||
} else {
|
||||
mlx5_core_err(dev,
|
||||
"ODP event for QP %08x, without a fault handler in QP\n",
|
||||
qpn);
|
||||
/* Page fault will remain unresolved. QP will hang until it is
|
||||
* destroyed
|
||||
*/
|
||||
}
|
||||
|
||||
mlx5_core_put_rsc(common);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int create_qprqsq_common(struct mlx5_core_dev *dev,
|
||||
struct mlx5_core_qp *qp,
|
||||
int rsc_type)
|
||||
|
@ -506,25 +417,6 @@ int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
|
||||
u8 flags, int error)
|
||||
{
|
||||
u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
|
||||
u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0};
|
||||
|
||||
MLX5_SET(page_fault_resume_in, in, opcode,
|
||||
MLX5_CMD_OP_PAGE_FAULT_RESUME);
|
||||
MLX5_SET(page_fault_resume_in, in, wq_number, qpn);
|
||||
MLX5_SET(page_fault_resume_in, in, page_fault_type, flags);
|
||||
if (error)
|
||||
MLX5_SET(page_fault_resume_in, in, error, 1);
|
||||
|
||||
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
|
||||
#endif
|
||||
|
||||
int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
|
||||
struct mlx5_core_qp *rq)
|
||||
{
|
||||
|
|
|
@ -534,7 +534,9 @@ struct mlx5_eqe_page_fault {
|
|||
__be16 wqe_index;
|
||||
u16 reserved2;
|
||||
__be16 packet_length;
|
||||
u8 reserved3[12];
|
||||
__be32 token;
|
||||
u8 reserved4[8];
|
||||
__be32 pftype_wq;
|
||||
} __packed wqe;
|
||||
struct {
|
||||
__be32 r_key;
|
||||
|
@ -542,9 +544,9 @@ struct mlx5_eqe_page_fault {
|
|||
__be16 packet_length;
|
||||
__be32 rdma_op_len;
|
||||
__be64 rdma_va;
|
||||
__be32 pftype_token;
|
||||
} __packed rdma;
|
||||
} __packed;
|
||||
__be32 flags_qpn;
|
||||
} __packed;
|
||||
|
||||
struct mlx5_eqe_vport_change {
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
#include <linux/vmalloc.h>
|
||||
#include <linux/radix-tree.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/mempool.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
#include <linux/mlx5/device.h>
|
||||
|
@ -83,6 +84,7 @@ enum {
|
|||
MLX5_EQ_VEC_PAGES = 0,
|
||||
MLX5_EQ_VEC_CMD = 1,
|
||||
MLX5_EQ_VEC_ASYNC = 2,
|
||||
MLX5_EQ_VEC_PFAULT = 3,
|
||||
MLX5_EQ_VEC_COMP_BASE,
|
||||
};
|
||||
|
||||
|
@ -178,6 +180,14 @@ enum mlx5_port_status {
|
|||
MLX5_PORT_DOWN = 2,
|
||||
};
|
||||
|
||||
enum mlx5_eq_type {
|
||||
MLX5_EQ_TYPE_COMP,
|
||||
MLX5_EQ_TYPE_ASYNC,
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
MLX5_EQ_TYPE_PF,
|
||||
#endif
|
||||
};
|
||||
|
||||
struct mlx5_uuar_info {
|
||||
struct mlx5_uar *uars;
|
||||
int num_uars;
|
||||
|
@ -333,6 +343,14 @@ struct mlx5_eq_tasklet {
|
|||
spinlock_t lock;
|
||||
};
|
||||
|
||||
struct mlx5_eq_pagefault {
|
||||
struct work_struct work;
|
||||
/* Pagefaults lock */
|
||||
spinlock_t lock;
|
||||
struct workqueue_struct *wq;
|
||||
mempool_t *pool;
|
||||
};
|
||||
|
||||
struct mlx5_eq {
|
||||
struct mlx5_core_dev *dev;
|
||||
__be32 __iomem *doorbell;
|
||||
|
@ -346,7 +364,13 @@ struct mlx5_eq {
|
|||
struct list_head list;
|
||||
int index;
|
||||
struct mlx5_rsc_debug *dbg;
|
||||
struct mlx5_eq_tasklet tasklet_ctx;
|
||||
enum mlx5_eq_type type;
|
||||
union {
|
||||
struct mlx5_eq_tasklet tasklet_ctx;
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
struct mlx5_eq_pagefault pf_ctx;
|
||||
#endif
|
||||
};
|
||||
};
|
||||
|
||||
struct mlx5_core_psv {
|
||||
|
@ -377,6 +401,8 @@ struct mlx5_core_mkey {
|
|||
u32 pd;
|
||||
};
|
||||
|
||||
#define MLX5_24BIT_MASK ((1 << 24) - 1)
|
||||
|
||||
enum mlx5_res_type {
|
||||
MLX5_RES_QP = MLX5_EVENT_QUEUE_TYPE_QP,
|
||||
MLX5_RES_RQ = MLX5_EVENT_QUEUE_TYPE_RQ,
|
||||
|
@ -411,6 +437,9 @@ struct mlx5_eq_table {
|
|||
struct mlx5_eq pages_eq;
|
||||
struct mlx5_eq async_eq;
|
||||
struct mlx5_eq cmd_eq;
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
struct mlx5_eq pfault_eq;
|
||||
#endif
|
||||
int num_comp_vectors;
|
||||
/* protect EQs list
|
||||
*/
|
||||
|
@ -497,6 +526,7 @@ struct mlx5_fc_stats {
|
|||
|
||||
struct mlx5_eswitch;
|
||||
struct mlx5_lag;
|
||||
struct mlx5_pagefault;
|
||||
|
||||
struct mlx5_rl_entry {
|
||||
u32 rate;
|
||||
|
@ -601,6 +631,14 @@ struct mlx5_priv {
|
|||
struct mlx5_rl_table rl_table;
|
||||
|
||||
struct mlx5_port_module_event_stats pme_stats;
|
||||
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
void (*pfault)(struct mlx5_core_dev *dev,
|
||||
void *context,
|
||||
struct mlx5_pagefault *pfault);
|
||||
void *pfault_ctx;
|
||||
struct srcu_struct pfault_srcu;
|
||||
#endif
|
||||
};
|
||||
|
||||
enum mlx5_device_state {
|
||||
|
@ -619,6 +657,50 @@ enum mlx5_pci_status {
|
|||
MLX5_PCI_STATUS_ENABLED,
|
||||
};
|
||||
|
||||
enum mlx5_pagefault_type_flags {
|
||||
MLX5_PFAULT_REQUESTOR = 1 << 0,
|
||||
MLX5_PFAULT_WRITE = 1 << 1,
|
||||
MLX5_PFAULT_RDMA = 1 << 2,
|
||||
};
|
||||
|
||||
/* Contains the details of a pagefault. */
|
||||
struct mlx5_pagefault {
|
||||
u32 bytes_committed;
|
||||
u32 token;
|
||||
u8 event_subtype;
|
||||
u8 type;
|
||||
union {
|
||||
/* Initiator or send message responder pagefault details. */
|
||||
struct {
|
||||
/* Received packet size, only valid for responders. */
|
||||
u32 packet_size;
|
||||
/*
|
||||
* Number of resource holding WQE, depends on type.
|
||||
*/
|
||||
u32 wq_num;
|
||||
/*
|
||||
* WQE index. Refers to either the send queue or
|
||||
* receive queue, according to event_subtype.
|
||||
*/
|
||||
u16 wqe_index;
|
||||
} wqe;
|
||||
/* RDMA responder pagefault details */
|
||||
struct {
|
||||
u32 r_key;
|
||||
/*
|
||||
* Received packet size, minimal size page fault
|
||||
* resolution required for forward progress.
|
||||
*/
|
||||
u32 packet_size;
|
||||
u32 rdma_op_len;
|
||||
u64 rdma_va;
|
||||
} rdma;
|
||||
};
|
||||
|
||||
struct mlx5_eq *eq;
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
struct mlx5_td {
|
||||
struct list_head tirs_list;
|
||||
u32 tdn;
|
||||
|
@ -879,15 +961,13 @@ void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
|
|||
void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
|
||||
void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
|
||||
void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
|
||||
#endif
|
||||
void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
|
||||
struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
|
||||
void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec);
|
||||
void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type);
|
||||
int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
|
||||
int nent, u64 mask, const char *name, struct mlx5_uar *uar);
|
||||
int nent, u64 mask, const char *name,
|
||||
struct mlx5_uar *uar, enum mlx5_eq_type type);
|
||||
int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
|
||||
int mlx5_start_eqs(struct mlx5_core_dev *dev);
|
||||
int mlx5_stop_eqs(struct mlx5_core_dev *dev);
|
||||
|
@ -926,6 +1006,10 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
|
|||
struct mlx5_odp_caps *odp_caps);
|
||||
int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
|
||||
u8 port_num, void *out, size_t sz);
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
|
||||
u32 wq_num, u8 type, int error);
|
||||
#endif
|
||||
|
||||
int mlx5_init_rl_table(struct mlx5_core_dev *dev);
|
||||
void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
|
||||
|
@ -974,6 +1058,9 @@ struct mlx5_interface {
|
|||
void (*detach)(struct mlx5_core_dev *dev, void *context);
|
||||
void (*event)(struct mlx5_core_dev *dev, void *context,
|
||||
enum mlx5_dev_event event, unsigned long param);
|
||||
void (*pfault)(struct mlx5_core_dev *dev,
|
||||
void *context,
|
||||
struct mlx5_pagefault *pfault);
|
||||
void * (*get_dev)(void *context);
|
||||
int protocol;
|
||||
struct list_head list;
|
||||
|
|
|
@ -50,9 +50,6 @@
|
|||
#define MLX5_BSF_APPTAG_ESCAPE 0x1
|
||||
#define MLX5_BSF_APPREF_ESCAPE 0x2
|
||||
|
||||
#define MLX5_QPN_BITS 24
|
||||
#define MLX5_QPN_MASK ((1 << MLX5_QPN_BITS) - 1)
|
||||
|
||||
enum mlx5_qp_optpar {
|
||||
MLX5_QP_OPTPAR_ALT_ADDR_PATH = 1 << 0,
|
||||
MLX5_QP_OPTPAR_RRE = 1 << 1,
|
||||
|
@ -418,46 +415,9 @@ struct mlx5_stride_block_ctrl_seg {
|
|||
__be16 num_entries;
|
||||
};
|
||||
|
||||
enum mlx5_pagefault_flags {
|
||||
MLX5_PFAULT_REQUESTOR = 1 << 0,
|
||||
MLX5_PFAULT_WRITE = 1 << 1,
|
||||
MLX5_PFAULT_RDMA = 1 << 2,
|
||||
};
|
||||
|
||||
/* Contains the details of a pagefault. */
|
||||
struct mlx5_pagefault {
|
||||
u32 bytes_committed;
|
||||
u8 event_subtype;
|
||||
enum mlx5_pagefault_flags flags;
|
||||
union {
|
||||
/* Initiator or send message responder pagefault details. */
|
||||
struct {
|
||||
/* Received packet size, only valid for responders. */
|
||||
u32 packet_size;
|
||||
/*
|
||||
* WQE index. Refers to either the send queue or
|
||||
* receive queue, according to event_subtype.
|
||||
*/
|
||||
u16 wqe_index;
|
||||
} wqe;
|
||||
/* RDMA responder pagefault details */
|
||||
struct {
|
||||
u32 r_key;
|
||||
/*
|
||||
* Received packet size, minimal size page fault
|
||||
* resolution required for forward progress.
|
||||
*/
|
||||
u32 packet_size;
|
||||
u32 rdma_op_len;
|
||||
u64 rdma_va;
|
||||
} rdma;
|
||||
};
|
||||
};
|
||||
|
||||
struct mlx5_core_qp {
|
||||
struct mlx5_core_rsc_common common; /* must be first */
|
||||
void (*event) (struct mlx5_core_qp *, int);
|
||||
void (*pfault_handler)(struct mlx5_core_qp *, struct mlx5_pagefault *);
|
||||
int qpn;
|
||||
struct mlx5_rsc_debug *dbg;
|
||||
int pid;
|
||||
|
@ -557,10 +517,6 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev);
|
|||
void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
|
||||
int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
|
||||
void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
|
||||
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
|
||||
int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
|
||||
u8 context, int error);
|
||||
#endif
|
||||
int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
|
||||
struct mlx5_core_qp *rq);
|
||||
void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
|
||||
|
|
Loading…
Reference in New Issue