mirror of https://gitee.com/openkylin/linux.git
RDMA subsystem updates for 5.4-rc
Bug fixes for old bugs in the hns and hfi1 drivers: - Calculate various values in hns properly to avoid over/underflows in some cases - Fix an oops, PCI negotiation on Gen4 systems, and bugs related to retries -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEfB7FMLh+8QxL+6i3OG33FX4gmxoFAl3NbgAACgkQOG33FX4g mxoRZQ//cCm6L9q6KJJ4Cg2gHcR3zKQzWiA2slQhRIttjS2fNcoI8a3l6unk8mwZ qaXhFVDnIRBIhfS0shCzrC6eFYE702DzmZ+3CDo5EtFdNZFqWtOWI9YeUh40dGex 1zd7bo0lE7Tx/Bv76wNVuLuP/LToY794Rd9s9YFd0KkZu+2gjUDeChl4aD4PNbr3 ky08rS1WchzglX5DjD5SYZXodZf1DSjOurWEZpV/cgRsIYJw5edKegD5I4NNSawe gWW/Qh1WBL5DO5tdgq8w06ZrHYCMOdN7pAwBOUfh2oEflNBsmvY1pyMoNfycUkbQ jcd6W3FCt5KrTes2nhh2gX3dbtB0iIwrVJkXS2gFhOinQNsbR9dbIc3zm2Nt2VGR JJbxZ3ROaXuMYCSp9GJTFNfkFCX1GX13T1uMbjsUXFdbbFPVcCYxwe4lA0hj7YVO Zw27TsIvOU3NWrMv0deTZaXs5ghGk0JCqkFPB61TPRb67a2Cr2qREdiqhPugI8K/ yAsUaL/mQxM9081U89UZNUjeImt14MvnoenLyCZD4SgFjo6avA30VbKWN4KAAG6n XCXnt2A5CkrcMdHXmKmHBYk+vqA+rXCYRLOZST9mGQRYsy4HaC0n7Ipfkz91/ew1 fTr5U7yR3HTyXJBwsfb/uQpiAMNTG2GrQ/i1za2vAsMCFlJ1mtg= =mPKm -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma Pull RDMA fixes from Jason Gunthorpe: "Bug fixes for old bugs in the hns and hfi1 drivers: - Calculate various values in hns properly to avoid over/underflows in some cases - Fix an oops, PCI negotiation on Gen4 systems, and bugs related to retries" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: RDMA/hns: Correct the value of srq_desc_size RDMA/hns: Correct the value of HNS_ROCE_HEM_CHUNK_LEN IB/hfi1: TID RDMA WRITE should not return IB_WC_RNR_RETRY_EXC_ERR IB/hfi1: Calculate flow weight based on QP MTU for TID RDMA IB/hfi1: Ensure r_tid_ack is valid before building TID RDMA ACK packet IB/hfi1: Ensure full Gen3 speed in a Gen4 system
This commit is contained in:
commit
4e84608c78
|
@ -1489,7 +1489,6 @@ static int __init hfi1_mod_init(void)
|
|||
goto bail_dev;
|
||||
}
|
||||
|
||||
hfi1_compute_tid_rdma_flow_wt();
|
||||
/*
|
||||
* These must be called before the driver is registered with
|
||||
* the PCI subsystem.
|
||||
|
|
|
@ -319,7 +319,9 @@ int pcie_speeds(struct hfi1_devdata *dd)
|
|||
/*
|
||||
* bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
|
||||
*/
|
||||
if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
|
||||
if (parent &&
|
||||
(dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT ||
|
||||
dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) {
|
||||
dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
|
||||
dd->link_gen3_capable = 0;
|
||||
}
|
||||
|
|
|
@ -2209,15 +2209,15 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
|
|||
if (qp->s_flags & RVT_S_WAIT_RNR)
|
||||
goto bail_stop;
|
||||
rdi = ib_to_rvt(qp->ibqp.device);
|
||||
if (qp->s_rnr_retry == 0 &&
|
||||
!((rdi->post_parms[wqe->wr.opcode].flags &
|
||||
RVT_OPERATION_IGN_RNR_CNT) &&
|
||||
qp->s_rnr_retry_cnt == 0)) {
|
||||
status = IB_WC_RNR_RETRY_EXC_ERR;
|
||||
goto class_b;
|
||||
if (!(rdi->post_parms[wqe->wr.opcode].flags &
|
||||
RVT_OPERATION_IGN_RNR_CNT)) {
|
||||
if (qp->s_rnr_retry == 0) {
|
||||
status = IB_WC_RNR_RETRY_EXC_ERR;
|
||||
goto class_b;
|
||||
}
|
||||
if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
|
||||
qp->s_rnr_retry--;
|
||||
}
|
||||
if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
|
||||
qp->s_rnr_retry--;
|
||||
|
||||
/*
|
||||
* The last valid PSN is the previous PSN. For TID RDMA WRITE
|
||||
|
|
|
@ -107,8 +107,6 @@ static u32 mask_generation(u32 a)
|
|||
* C - Capcode
|
||||
*/
|
||||
|
||||
static u32 tid_rdma_flow_wt;
|
||||
|
||||
static void tid_rdma_trigger_resume(struct work_struct *work);
|
||||
static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
|
||||
static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
|
||||
|
@ -136,6 +134,26 @@ static void update_r_next_psn_fecn(struct hfi1_packet *packet,
|
|||
struct tid_rdma_flow *flow,
|
||||
bool fecn);
|
||||
|
||||
static void validate_r_tid_ack(struct hfi1_qp_priv *priv)
|
||||
{
|
||||
if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
|
||||
priv->r_tid_ack = priv->r_tid_tail;
|
||||
}
|
||||
|
||||
static void tid_rdma_schedule_ack(struct rvt_qp *qp)
|
||||
{
|
||||
struct hfi1_qp_priv *priv = qp->priv;
|
||||
|
||||
priv->s_flags |= RVT_S_ACK_PENDING;
|
||||
hfi1_schedule_tid_send(qp);
|
||||
}
|
||||
|
||||
static void tid_rdma_trigger_ack(struct rvt_qp *qp)
|
||||
{
|
||||
validate_r_tid_ack(qp->priv);
|
||||
tid_rdma_schedule_ack(qp);
|
||||
}
|
||||
|
||||
static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
|
||||
{
|
||||
return
|
||||
|
@ -3005,10 +3023,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
|
|||
qpriv->s_nak_state = IB_NAK_PSN_ERROR;
|
||||
/* We are NAK'ing the next expected PSN */
|
||||
qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
|
||||
qpriv->s_flags |= RVT_S_ACK_PENDING;
|
||||
if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID)
|
||||
qpriv->r_tid_ack = qpriv->r_tid_tail;
|
||||
hfi1_schedule_tid_send(qp);
|
||||
tid_rdma_trigger_ack(qp);
|
||||
}
|
||||
goto unlock;
|
||||
}
|
||||
|
@ -3371,18 +3386,17 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
|
|||
return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
|
||||
}
|
||||
|
||||
void hfi1_compute_tid_rdma_flow_wt(void)
|
||||
static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp)
|
||||
{
|
||||
/*
|
||||
* Heuristic for computing the RNR timeout when waiting on the flow
|
||||
* queue. Rather than a computationaly expensive exact estimate of when
|
||||
* a flow will be available, we assume that if a QP is at position N in
|
||||
* the flow queue it has to wait approximately (N + 1) * (number of
|
||||
* segments between two sync points), assuming PMTU of 4K. The rationale
|
||||
* for this is that flows are released and recycled at each sync point.
|
||||
* segments between two sync points). The rationale for this is that
|
||||
* flows are released and recycled at each sync point.
|
||||
*/
|
||||
tid_rdma_flow_wt = MAX_TID_FLOW_PSN * enum_to_mtu(OPA_MTU_4096) /
|
||||
TID_RDMA_MAX_SEGMENT_SIZE;
|
||||
return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT;
|
||||
}
|
||||
|
||||
static u32 position_in_queue(struct hfi1_qp_priv *qpriv,
|
||||
|
@ -3505,7 +3519,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
|
|||
if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
|
||||
ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
|
||||
if (ret) {
|
||||
to_seg = tid_rdma_flow_wt *
|
||||
to_seg = hfi1_compute_tid_rdma_flow_wt(qp) *
|
||||
position_in_queue(qpriv,
|
||||
&rcd->flow_queue);
|
||||
break;
|
||||
|
@ -3526,7 +3540,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
|
|||
/*
|
||||
* If overtaking req->acked_tail, send an RNR NAK. Because the
|
||||
* QP is not queued in this case, and the issue can only be
|
||||
* caused due a delay in scheduling the second leg which we
|
||||
* caused by a delay in scheduling the second leg which we
|
||||
* cannot estimate, we use a rather arbitrary RNR timeout of
|
||||
* (MAX_FLOWS / 2) segments
|
||||
*/
|
||||
|
@ -3534,8 +3548,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
|
|||
MAX_FLOWS)) {
|
||||
ret = -EAGAIN;
|
||||
to_seg = MAX_FLOWS >> 1;
|
||||
qpriv->s_flags |= RVT_S_ACK_PENDING;
|
||||
hfi1_schedule_tid_send(qp);
|
||||
tid_rdma_trigger_ack(qp);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -4335,8 +4348,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
|
|||
trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
|
||||
req);
|
||||
trace_hfi1_tid_write_rsp_rcv_data(qp);
|
||||
if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
|
||||
priv->r_tid_ack = priv->r_tid_tail;
|
||||
validate_r_tid_ack(priv);
|
||||
|
||||
if (opcode == TID_OP(WRITE_DATA_LAST)) {
|
||||
release_rdma_sge_mr(e);
|
||||
|
@ -4375,8 +4387,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
|
|||
}
|
||||
|
||||
done:
|
||||
priv->s_flags |= RVT_S_ACK_PENDING;
|
||||
hfi1_schedule_tid_send(qp);
|
||||
tid_rdma_schedule_ack(qp);
|
||||
exit:
|
||||
priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
|
||||
if (fecn)
|
||||
|
@ -4388,10 +4399,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
|
|||
if (!priv->s_nak_state) {
|
||||
priv->s_nak_state = IB_NAK_PSN_ERROR;
|
||||
priv->s_nak_psn = flow->flow_state.r_next_psn;
|
||||
priv->s_flags |= RVT_S_ACK_PENDING;
|
||||
if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
|
||||
priv->r_tid_ack = priv->r_tid_tail;
|
||||
hfi1_schedule_tid_send(qp);
|
||||
tid_rdma_trigger_ack(qp);
|
||||
}
|
||||
goto done;
|
||||
}
|
||||
|
@ -4939,8 +4947,7 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
|
|||
qpriv->resync = true;
|
||||
/* RESYNC request always gets a TID RDMA ACK. */
|
||||
qpriv->s_nak_state = 0;
|
||||
qpriv->s_flags |= RVT_S_ACK_PENDING;
|
||||
hfi1_schedule_tid_send(qp);
|
||||
tid_rdma_trigger_ack(qp);
|
||||
bail:
|
||||
if (fecn)
|
||||
qp->s_flags |= RVT_S_ECN;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
|
||||
#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
|
||||
#define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT)
|
||||
#define TID_RDMA_SEGMENT_SHIFT 18
|
||||
|
||||
/*
|
||||
* Bit definitions for priv->s_flags.
|
||||
|
@ -274,8 +275,6 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
|
|||
struct ib_other_headers *ohdr,
|
||||
u32 *bth1, u32 *bth2, u32 *len);
|
||||
|
||||
void hfi1_compute_tid_rdma_flow_wt(void);
|
||||
|
||||
void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet);
|
||||
|
||||
u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
|
||||
|
|
|
@ -59,7 +59,7 @@ enum {
|
|||
|
||||
#define HNS_ROCE_HEM_CHUNK_LEN \
|
||||
((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \
|
||||
(sizeof(struct scatterlist)))
|
||||
(sizeof(struct scatterlist) + sizeof(void *)))
|
||||
|
||||
#define check_whether_bt_num_3(type, hop_num) \
|
||||
(type < HEM_TYPE_MTT && hop_num == 2)
|
||||
|
|
|
@ -376,7 +376,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
|
|||
srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
|
||||
srq->max_gs = srq_init_attr->attr.max_sge;
|
||||
|
||||
srq_desc_size = max(16, 16 * srq->max_gs);
|
||||
srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs));
|
||||
|
||||
srq->wqe_shift = ilog2(srq_desc_size);
|
||||
|
||||
|
|
Loading…
Reference in New Issue