Second -rc update for 4.14 kernel

- a few core fixes
 - a few ipoib fixes
 - a few mlx5 fixes
 - a 7 patch hfi1 related series
 -----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJZy8HKAAoJELgmozMOVy/d/3YP/RtJ4I+7dlHAdTrUsLkNIXzj
 6e2sc5A7JQRvhbWa6ZfqkbD4DBz2gkz9bXmlYotP1nVfunBie9xQPi+nN39YNnTv
 VPYa0G7RD53APw71ETCGh0uBBAjc8lGm0AOPj+HpSP7PvrLdH6B68IcAeXCSOf8D
 orzXI0bRpRnLsW4IJ0zN09zShigYuCJVl0Wf59QB0Wrbw4veQD4W7bLSCAUTmuZk
 TPb8bPlXY64Bf731HRftxIRl3HwUrpTPv5DuHcASAbVL/KeucWpPmOAj9XqhXTQp
 tnqtiwBWYDcsLBwS/IS40B2gfN1BCh6hn03pSVbPj+HD/FLY7x8Gf/Lu0qQNmklz
 9nvgMKHL/2h+T4M7DulhS7DTP58bvtkyKG+j77gjEmKX1OI0NXHOntKZDSjGAT2J
 zw2dNx4Y/Sgng1HBCbHAAHMrFUdyj7XpQNR8mzdGvDcwtRfrDKmchGtvhVclPsbl
 R3U9GN2NcAwg2+bIN96hTzUMB10QOZdvddGFvbxuB7FaWkskPaN52O1ptT3+MyWt
 xccZp0iYu40zV80mEm+nF/kZwR8omfE6xM1ujQdIhMHstGe+z29BhqsaQ8Zw1qEG
 oaU7+9m2aK57SvcSimR2S4kdK7Gxw9+BIVKdRREJwe9xvWVf96OvJnhnh5t5Fs56
 BTN1mBn+7LxlK9eDVler
 =HbhA
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull rdma fixes from Doug Ledford:
 "Second -rc update for 4.14.

  Both Mellanox and Intel had a series of -rc fixes that landed this
  week. The Mellanox bunch is spread throughout the stack and not just
  in their driver, where as the Intel bunch was mostly in the hfi1
  driver. And, several of the fixes in the hfi1 driver were more than
  just simple 5 line fixes. As a result, the hfi1 driver fixes has a
  sizable LOC count.

  Everything else is as one would expect in an RC cycle in terms of LOC
  count. One item that might jump out and make you think "That's not an
  rc item" is the fix that corrects a typo. But, that change fixes a
  typo in a user visible API that was just added in this merge window,
  so if we fix it now, we can fix it. If we don't, the typo is in the
  API forever. Another that might not appear to be a fix at first glance
  is the Simplify mlx5_ib_cont_pages patch, but the simplification
  allows them to fix a bug in the existing function whenever the length
  of an SGE exceeded page size. We also had to revert one patch from the
  merge window that was wrong.

  Summary:

   - a few core fixes
   - a few ipoib fixes
   - a few mlx5 fixes
   - a 7-patch hfi1 related series"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma:
  IB/hfi1: Unsuccessful PCIe caps tuning should not fail driver load
  IB/hfi1: On error, fix use after free during user context setup
  Revert "IB/ipoib: Update broadcast object if PKey value was changed in index 0"
  IB/hfi1: Return correct value in general interrupt handler
  IB/hfi1: Check eeprom config partition validity
  IB/hfi1: Only reset QSFP after link up and turn off AOC TX
  IB/hfi1: Turn off AOC TX after offline substates
  IB/mlx5: Fix NULL deference on mlx5_ib_update_xlt failure
  IB/mlx5: Simplify mlx5_ib_cont_pages
  IB/ipoib: Fix inconsistency with free_netdev and free_rdma_netdev
  IB/ipoib: Fix sysfs Pkey create<->remove possible deadlock
  IB: Correct MR length field to be 64-bit
  IB/core: Fix qp_sec use after free access
  IB/core: Fix typo in the name of the tag-matching cap struct
This commit is contained in:
Linus Torvalds 2017-09-28 12:12:51 -07:00
commit 9173583226
19 changed files with 231 additions and 164 deletions

View File

@ -432,8 +432,10 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
atomic_set(&qp->qp_sec->error_list_count, 0); atomic_set(&qp->qp_sec->error_list_count, 0);
init_completion(&qp->qp_sec->error_complete); init_completion(&qp->qp_sec->error_complete);
ret = security_ib_alloc_security(&qp->qp_sec->security); ret = security_ib_alloc_security(&qp->qp_sec->security);
if (ret) if (ret) {
kfree(qp->qp_sec); kfree(qp->qp_sec);
qp->qp_sec = NULL;
}
return ret; return ret;
} }

View File

@ -3869,15 +3869,15 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.raw_packet_caps = attr.raw_packet_caps; resp.raw_packet_caps = attr.raw_packet_caps;
resp.response_length += sizeof(resp.raw_packet_caps); resp.response_length += sizeof(resp.raw_packet_caps);
if (ucore->outlen < resp.response_length + sizeof(resp.xrq_caps)) if (ucore->outlen < resp.response_length + sizeof(resp.tm_caps))
goto end; goto end;
resp.xrq_caps.max_rndv_hdr_size = attr.xrq_caps.max_rndv_hdr_size; resp.tm_caps.max_rndv_hdr_size = attr.tm_caps.max_rndv_hdr_size;
resp.xrq_caps.max_num_tags = attr.xrq_caps.max_num_tags; resp.tm_caps.max_num_tags = attr.tm_caps.max_num_tags;
resp.xrq_caps.max_ops = attr.xrq_caps.max_ops; resp.tm_caps.max_ops = attr.tm_caps.max_ops;
resp.xrq_caps.max_sge = attr.xrq_caps.max_sge; resp.tm_caps.max_sge = attr.tm_caps.max_sge;
resp.xrq_caps.flags = attr.xrq_caps.flags; resp.tm_caps.flags = attr.tm_caps.flags;
resp.response_length += sizeof(resp.xrq_caps); resp.response_length += sizeof(resp.tm_caps);
end: end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length); err = ib_copy_to_udata(ucore, &resp, resp.response_length);
return err; return err;

View File

@ -1066,6 +1066,8 @@ static int read_idle_sma(struct hfi1_devdata *dd, u64 *data);
static int thermal_init(struct hfi1_devdata *dd); static int thermal_init(struct hfi1_devdata *dd);
static void update_statusp(struct hfi1_pportdata *ppd, u32 state); static void update_statusp(struct hfi1_pportdata *ppd, u32 state);
static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
int msecs);
static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
int msecs); int msecs);
static void log_state_transition(struct hfi1_pportdata *ppd, u32 state); static void log_state_transition(struct hfi1_pportdata *ppd, u32 state);
@ -8238,6 +8240,7 @@ static irqreturn_t general_interrupt(int irq, void *data)
u64 regs[CCE_NUM_INT_CSRS]; u64 regs[CCE_NUM_INT_CSRS];
u32 bit; u32 bit;
int i; int i;
irqreturn_t handled = IRQ_NONE;
this_cpu_inc(*dd->int_counter); this_cpu_inc(*dd->int_counter);
@ -8258,9 +8261,10 @@ static irqreturn_t general_interrupt(int irq, void *data)
for_each_set_bit(bit, (unsigned long *)&regs[0], for_each_set_bit(bit, (unsigned long *)&regs[0],
CCE_NUM_INT_CSRS * 64) { CCE_NUM_INT_CSRS * 64) {
is_interrupt(dd, bit); is_interrupt(dd, bit);
handled = IRQ_HANDLED;
} }
return IRQ_HANDLED; return handled;
} }
static irqreturn_t sdma_interrupt(int irq, void *data) static irqreturn_t sdma_interrupt(int irq, void *data)
@ -9413,7 +9417,7 @@ static void set_qsfp_int_n(struct hfi1_pportdata *ppd, u8 enable)
write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask); write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask);
} }
void reset_qsfp(struct hfi1_pportdata *ppd) int reset_qsfp(struct hfi1_pportdata *ppd)
{ {
struct hfi1_devdata *dd = ppd->dd; struct hfi1_devdata *dd = ppd->dd;
u64 mask, qsfp_mask; u64 mask, qsfp_mask;
@ -9443,6 +9447,13 @@ void reset_qsfp(struct hfi1_pportdata *ppd)
* for alarms and warnings * for alarms and warnings
*/ */
set_qsfp_int_n(ppd, 1); set_qsfp_int_n(ppd, 1);
/*
* After the reset, AOC transmitters are enabled by default. They need
* to be turned off to complete the QSFP setup before they can be
* enabled again.
*/
return set_qsfp_tx(ppd, 0);
} }
static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
@ -10305,6 +10316,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
{ {
struct hfi1_devdata *dd = ppd->dd; struct hfi1_devdata *dd = ppd->dd;
u32 previous_state; u32 previous_state;
int offline_state_ret;
int ret; int ret;
update_lcb_cache(dd); update_lcb_cache(dd);
@ -10326,28 +10338,11 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
ppd->offline_disabled_reason = ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT); HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
/* offline_state_ret = wait_phys_link_offline_substates(ppd, 10000);
* Wait for offline transition. It can take a while for if (offline_state_ret < 0)
* the link to go down. return offline_state_ret;
*/
ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000);
if (ret < 0)
return ret;
/*
* Now in charge of LCB - must be after the physical state is
* offline.quiet and before host_link_state is changed.
*/
set_host_lcb_access(dd);
write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
/* make sure the logical state is also down */
ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
if (ret)
force_logical_link_state_down(ppd);
ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
/* Disabling AOC transmitters */
if (ppd->port_type == PORT_TYPE_QSFP && if (ppd->port_type == PORT_TYPE_QSFP &&
ppd->qsfp_info.limiting_active && ppd->qsfp_info.limiting_active &&
qsfp_mod_present(ppd)) { qsfp_mod_present(ppd)) {
@ -10364,6 +10359,30 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
} }
} }
/*
* Wait for the offline.Quiet transition if it hasn't happened yet. It
* can take a while for the link to go down.
*/
if (offline_state_ret != PLS_OFFLINE_QUIET) {
ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 30000);
if (ret < 0)
return ret;
}
/*
* Now in charge of LCB - must be after the physical state is
* offline.quiet and before host_link_state is changed.
*/
set_host_lcb_access(dd);
write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
/* make sure the logical state is also down */
ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
if (ret)
force_logical_link_state_down(ppd);
ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
/* /*
* The LNI has a mandatory wait time after the physical state * The LNI has a mandatory wait time after the physical state
* moves to Offline.Quiet. The wait time may be different * moves to Offline.Quiet. The wait time may be different
@ -10396,6 +10415,9 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
& (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) { & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
/* went down while attempting link up */ /* went down while attempting link up */
check_lni_states(ppd); check_lni_states(ppd);
/* The QSFP doesn't need to be reset on LNI failure */
ppd->qsfp_info.reset_needed = 0;
} }
/* the active link width (downgrade) is 0 on link down */ /* the active link width (downgrade) is 0 on link down */
@ -12804,6 +12826,39 @@ static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
return 0; return 0;
} }
/*
* wait_phys_link_offline_quiet_substates - wait for any offline substate
* @ppd: port device
* @msecs: the number of milliseconds to wait
*
* Wait up to msecs milliseconds for any offline physical link
* state change to occur.
* Returns 0 if at least one state is reached, otherwise -ETIMEDOUT.
*/
static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
int msecs)
{
u32 read_state;
unsigned long timeout;
timeout = jiffies + msecs_to_jiffies(msecs);
while (1) {
read_state = read_physical_state(ppd->dd);
if ((read_state & 0xF0) == PLS_OFFLINE)
break;
if (time_after(jiffies, timeout)) {
dd_dev_err(ppd->dd,
"timeout waiting for phy link offline.quiet substates. Read state 0x%x, %dms\n",
read_state, msecs);
return -ETIMEDOUT;
}
usleep_range(1950, 2050); /* sleep 2ms-ish */
}
log_state_transition(ppd, read_state);
return read_state;
}
#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
(r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)

View File

@ -204,6 +204,7 @@
#define PLS_OFFLINE_READY_TO_QUIET_LT 0x92 #define PLS_OFFLINE_READY_TO_QUIET_LT 0x92
#define PLS_OFFLINE_REPORT_FAILURE 0x93 #define PLS_OFFLINE_REPORT_FAILURE 0x93
#define PLS_OFFLINE_READY_TO_QUIET_BCC 0x94 #define PLS_OFFLINE_READY_TO_QUIET_BCC 0x94
#define PLS_OFFLINE_QUIET_DURATION 0x95
#define PLS_POLLING 0x20 #define PLS_POLLING 0x20
#define PLS_POLLING_QUIET 0x20 #define PLS_POLLING_QUIET 0x20
#define PLS_POLLING_ACTIVE 0x21 #define PLS_POLLING_ACTIVE 0x21
@ -722,7 +723,7 @@ void handle_link_downgrade(struct work_struct *work);
void handle_link_bounce(struct work_struct *work); void handle_link_bounce(struct work_struct *work);
void handle_start_link(struct work_struct *work); void handle_start_link(struct work_struct *work);
void handle_sma_message(struct work_struct *work); void handle_sma_message(struct work_struct *work);
void reset_qsfp(struct hfi1_pportdata *ppd); int reset_qsfp(struct hfi1_pportdata *ppd);
void qsfp_event(struct work_struct *work); void qsfp_event(struct work_struct *work);
void start_freeze_handling(struct hfi1_pportdata *ppd, int flags); void start_freeze_handling(struct hfi1_pportdata *ppd, int flags);
int send_idle_sma(struct hfi1_devdata *dd, u64 message); int send_idle_sma(struct hfi1_devdata *dd, u64 message);

View File

@ -204,7 +204,10 @@ int eprom_init(struct hfi1_devdata *dd)
return ret; return ret;
} }
/* magic character sequence that trails an image */ /* magic character sequence that begins an image */
#define IMAGE_START_MAGIC "APO="
/* magic character sequence that might trail an image */
#define IMAGE_TRAIL_MAGIC "egamiAPO" #define IMAGE_TRAIL_MAGIC "egamiAPO"
/* EPROM file types */ /* EPROM file types */
@ -250,6 +253,7 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
{ {
void *buffer; void *buffer;
void *p; void *p;
u32 length;
int ret; int ret;
buffer = kmalloc(P1_SIZE, GFP_KERNEL); buffer = kmalloc(P1_SIZE, GFP_KERNEL);
@ -262,15 +266,21 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
return ret; return ret;
} }
/* scan for image magic that may trail the actual data */ /* config partition is valid only if it starts with IMAGE_START_MAGIC */
p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE); if (memcmp(buffer, IMAGE_START_MAGIC, strlen(IMAGE_START_MAGIC))) {
if (!p) {
kfree(buffer); kfree(buffer);
return -ENOENT; return -ENOENT;
} }
/* scan for image magic that may trail the actual data */
p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
if (p)
length = p - buffer;
else
length = P1_SIZE;
*data = buffer; *data = buffer;
*size = p - buffer; *size = length;
return 0; return 0;
} }

View File

@ -930,15 +930,8 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
switch (ret) { switch (ret) {
case 0: case 0:
ret = setup_base_ctxt(fd, uctxt); ret = setup_base_ctxt(fd, uctxt);
if (uctxt->subctxt_cnt) { if (ret)
/* deallocate_ctxt(uctxt);
* Base context is done (successfully or not), notify
* anybody using a sub-context that is waiting for
* this completion.
*/
clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
wake_up(&uctxt->wait);
}
break; break;
case 1: case 1:
ret = complete_subctxt(fd); ret = complete_subctxt(fd);
@ -1305,25 +1298,25 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
/* Now allocate the RcvHdr queue and eager buffers. */ /* Now allocate the RcvHdr queue and eager buffers. */
ret = hfi1_create_rcvhdrq(dd, uctxt); ret = hfi1_create_rcvhdrq(dd, uctxt);
if (ret) if (ret)
return ret; goto done;
ret = hfi1_setup_eagerbufs(uctxt); ret = hfi1_setup_eagerbufs(uctxt);
if (ret) if (ret)
goto setup_failed; goto done;
/* If sub-contexts are enabled, do the appropriate setup */ /* If sub-contexts are enabled, do the appropriate setup */
if (uctxt->subctxt_cnt) if (uctxt->subctxt_cnt)
ret = setup_subctxt(uctxt); ret = setup_subctxt(uctxt);
if (ret) if (ret)
goto setup_failed; goto done;
ret = hfi1_alloc_ctxt_rcv_groups(uctxt); ret = hfi1_alloc_ctxt_rcv_groups(uctxt);
if (ret) if (ret)
goto setup_failed; goto done;
ret = init_user_ctxt(fd, uctxt); ret = init_user_ctxt(fd, uctxt);
if (ret) if (ret)
goto setup_failed; goto done;
user_init(uctxt); user_init(uctxt);
@ -1331,12 +1324,22 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
fd->uctxt = uctxt; fd->uctxt = uctxt;
hfi1_rcd_get(uctxt); hfi1_rcd_get(uctxt);
return 0; done:
if (uctxt->subctxt_cnt) {
/*
* On error, set the failed bit so sub-contexts will clean up
* correctly.
*/
if (ret)
set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags);
setup_failed: /*
/* Set the failed bit so sub-context init can do the right thing */ * Base context is done (successfully or not), notify anybody
set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags); * using a sub-context that is waiting for this completion.
deallocate_ctxt(uctxt); */
clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
wake_up(&uctxt->wait);
}
return ret; return ret;
} }

View File

@ -68,7 +68,7 @@
/* /*
* Code to adjust PCIe capabilities. * Code to adjust PCIe capabilities.
*/ */
static int tune_pcie_caps(struct hfi1_devdata *); static void tune_pcie_caps(struct hfi1_devdata *);
/* /*
* Do all the common PCIe setup and initialization. * Do all the common PCIe setup and initialization.
@ -351,7 +351,7 @@ int pcie_speeds(struct hfi1_devdata *dd)
*/ */
int request_msix(struct hfi1_devdata *dd, u32 msireq) int request_msix(struct hfi1_devdata *dd, u32 msireq)
{ {
int nvec, ret; int nvec;
nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq, nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq,
PCI_IRQ_MSIX | PCI_IRQ_LEGACY); PCI_IRQ_MSIX | PCI_IRQ_LEGACY);
@ -360,12 +360,7 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq)
return nvec; return nvec;
} }
ret = tune_pcie_caps(dd); tune_pcie_caps(dd);
if (ret) {
dd_dev_err(dd, "tune_pcie_caps() failed: %d\n", ret);
pci_free_irq_vectors(dd->pcidev);
return ret;
}
/* check for legacy IRQ */ /* check for legacy IRQ */
if (nvec == 1 && !dd->pcidev->msix_enabled) if (nvec == 1 && !dd->pcidev->msix_enabled)
@ -502,7 +497,7 @@ uint aspm_mode = ASPM_MODE_DISABLED;
module_param_named(aspm, aspm_mode, uint, S_IRUGO); module_param_named(aspm, aspm_mode, uint, S_IRUGO);
MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic"); MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
static int tune_pcie_caps(struct hfi1_devdata *dd) static void tune_pcie_caps(struct hfi1_devdata *dd)
{ {
struct pci_dev *parent; struct pci_dev *parent;
u16 rc_mpss, rc_mps, ep_mpss, ep_mps; u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
@ -513,22 +508,14 @@ static int tune_pcie_caps(struct hfi1_devdata *dd)
* Turn on extended tags in DevCtl in case the BIOS has turned it off * Turn on extended tags in DevCtl in case the BIOS has turned it off
* to improve WFR SDMA bandwidth * to improve WFR SDMA bandwidth
*/ */
ret = pcie_capability_read_word(dd->pcidev, ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl);
PCI_EXP_DEVCTL, &ectl); if ((!ret) && !(ectl & PCI_EXP_DEVCTL_EXT_TAG)) {
if (ret) {
dd_dev_err(dd, "Unable to read from PCI config\n");
return ret;
}
if (!(ectl & PCI_EXP_DEVCTL_EXT_TAG)) {
dd_dev_info(dd, "Enabling PCIe extended tags\n"); dd_dev_info(dd, "Enabling PCIe extended tags\n");
ectl |= PCI_EXP_DEVCTL_EXT_TAG; ectl |= PCI_EXP_DEVCTL_EXT_TAG;
ret = pcie_capability_write_word(dd->pcidev, ret = pcie_capability_write_word(dd->pcidev,
PCI_EXP_DEVCTL, ectl); PCI_EXP_DEVCTL, ectl);
if (ret) { if (ret)
dd_dev_err(dd, "Unable to write to PCI config\n"); dd_dev_info(dd, "Unable to write to PCI config\n");
return ret;
}
} }
/* Find out supported and configured values for parent (root) */ /* Find out supported and configured values for parent (root) */
parent = dd->pcidev->bus->self; parent = dd->pcidev->bus->self;
@ -536,15 +523,22 @@ static int tune_pcie_caps(struct hfi1_devdata *dd)
* The driver cannot perform the tuning if it does not have * The driver cannot perform the tuning if it does not have
* access to the upstream component. * access to the upstream component.
*/ */
if (!parent) if (!parent) {
return -EINVAL; dd_dev_info(dd, "Parent not found\n");
return;
}
if (!pci_is_root_bus(parent->bus)) { if (!pci_is_root_bus(parent->bus)) {
dd_dev_info(dd, "Parent not root\n"); dd_dev_info(dd, "Parent not root\n");
return -EINVAL; return;
}
if (!pci_is_pcie(parent)) {
dd_dev_info(dd, "Parent is not PCI Express capable\n");
return;
}
if (!pci_is_pcie(dd->pcidev)) {
dd_dev_info(dd, "PCI device is not PCI Express capable\n");
return;
} }
if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev))
return -EINVAL;
rc_mpss = parent->pcie_mpss; rc_mpss = parent->pcie_mpss;
rc_mps = ffs(pcie_get_mps(parent)) - 8; rc_mps = ffs(pcie_get_mps(parent)) - 8;
/* Find out supported and configured values for endpoint (us) */ /* Find out supported and configured values for endpoint (us) */
@ -590,8 +584,6 @@ static int tune_pcie_caps(struct hfi1_devdata *dd)
ep_mrrs = max_mrrs; ep_mrrs = max_mrrs;
pcie_set_readrq(dd->pcidev, ep_mrrs); pcie_set_readrq(dd->pcidev, ep_mrrs);
} }
return 0;
} }
/* End of PCIe capability tuning */ /* End of PCIe capability tuning */

View File

@ -790,7 +790,9 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
* reuse of stale settings established in our previous pass through. * reuse of stale settings established in our previous pass through.
*/ */
if (ppd->qsfp_info.reset_needed) { if (ppd->qsfp_info.reset_needed) {
reset_qsfp(ppd); ret = reset_qsfp(ppd);
if (ret)
return ret;
refresh_qsfp_cache(ppd, &ppd->qsfp_info); refresh_qsfp_cache(ppd, &ppd->qsfp_info);
} else { } else {
ppd->qsfp_info.reset_needed = 1; ppd->qsfp_info.reset_needed = 1;

View File

@ -778,13 +778,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
} }
if (MLX5_CAP_GEN(mdev, tag_matching)) { if (MLX5_CAP_GEN(mdev, tag_matching)) {
props->xrq_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE; props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
props->xrq_caps.max_num_tags = props->tm_caps.max_num_tags =
(1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1; (1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1;
props->xrq_caps.flags = IB_TM_CAP_RC; props->tm_caps.flags = IB_TM_CAP_RC;
props->xrq_caps.max_ops = props->tm_caps.max_ops =
1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
props->xrq_caps.max_sge = MLX5_TM_MAX_SGE; props->tm_caps.max_sge = MLX5_TM_MAX_SGE;
} }
if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {

View File

@ -50,13 +50,9 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
{ {
unsigned long tmp; unsigned long tmp;
unsigned long m; unsigned long m;
int i, k; u64 base = ~0, p = 0;
u64 base = 0; u64 len, pfn;
int p = 0; int i = 0;
int skip;
int mask;
u64 len;
u64 pfn;
struct scatterlist *sg; struct scatterlist *sg;
int entry; int entry;
unsigned long page_shift = umem->page_shift; unsigned long page_shift = umem->page_shift;
@ -76,33 +72,24 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
m = find_first_bit(&tmp, BITS_PER_LONG); m = find_first_bit(&tmp, BITS_PER_LONG);
if (max_page_shift) if (max_page_shift)
m = min_t(unsigned long, max_page_shift - page_shift, m); m = min_t(unsigned long, max_page_shift - page_shift, m);
skip = 1 << m;
mask = skip - 1;
i = 0;
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
len = sg_dma_len(sg) >> page_shift; len = sg_dma_len(sg) >> page_shift;
pfn = sg_dma_address(sg) >> page_shift; pfn = sg_dma_address(sg) >> page_shift;
for (k = 0; k < len; k++) { if (base + p != pfn) {
if (!(i & mask)) { /* If either the offset or the new
tmp = (unsigned long)pfn; * base are unaligned update m
m = min_t(unsigned long, m, find_first_bit(&tmp, BITS_PER_LONG)); */
skip = 1 << m; tmp = (unsigned long)(pfn | p);
mask = skip - 1; if (!IS_ALIGNED(tmp, 1 << m))
base = pfn; m = find_first_bit(&tmp, BITS_PER_LONG);
p = 0;
} else { base = pfn;
if (base + p != pfn) { p = 0;
tmp = (unsigned long)p;
m = find_first_bit(&tmp, BITS_PER_LONG);
skip = 1 << m;
mask = skip - 1;
base = pfn;
p = 0;
}
}
p++;
i++;
} }
p += len;
i += len;
} }
if (i) { if (i) {

View File

@ -47,7 +47,8 @@ enum {
#define MLX5_UMR_ALIGN 2048 #define MLX5_UMR_ALIGN 2048
static int clean_mr(struct mlx5_ib_mr *mr); static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static int mr_cache_max_order(struct mlx5_ib_dev *dev); static int mr_cache_max_order(struct mlx5_ib_dev *dev);
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
@ -1270,8 +1271,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift, err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
update_xlt_flags); update_xlt_flags);
if (err) { if (err) {
mlx5_ib_dereg_mr(&mr->ibmr); dereg_mr(dev, mr);
return ERR_PTR(err); return ERR_PTR(err);
} }
} }
@ -1356,7 +1358,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
err = mr_umem_get(pd, addr, len, access_flags, &mr->umem, err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
&npages, &page_shift, &ncont, &order); &npages, &page_shift, &ncont, &order);
if (err < 0) { if (err < 0) {
clean_mr(mr); clean_mr(dev, mr);
return err; return err;
} }
} }
@ -1410,7 +1412,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
if (err) { if (err) {
mlx5_ib_warn(dev, "Failed to rereg UMR\n"); mlx5_ib_warn(dev, "Failed to rereg UMR\n");
ib_umem_release(mr->umem); ib_umem_release(mr->umem);
clean_mr(mr); clean_mr(dev, mr);
return err; return err;
} }
} }
@ -1469,9 +1471,8 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
} }
} }
static int clean_mr(struct mlx5_ib_mr *mr) static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{ {
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
int allocated_from_cache = mr->allocated_from_cache; int allocated_from_cache = mr->allocated_from_cache;
int err; int err;
@ -1507,10 +1508,8 @@ static int clean_mr(struct mlx5_ib_mr *mr)
return 0; return 0;
} }
int mlx5_ib_dereg_mr(struct ib_mr *ibmr) static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{ {
struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
struct mlx5_ib_mr *mr = to_mmr(ibmr);
int npages = mr->npages; int npages = mr->npages;
struct ib_umem *umem = mr->umem; struct ib_umem *umem = mr->umem;
@ -1539,7 +1538,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
} }
#endif #endif
clean_mr(mr); clean_mr(dev, mr);
if (umem) { if (umem) {
ib_umem_release(umem); ib_umem_release(umem);
@ -1549,6 +1548,14 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
return 0; return 0;
} }
int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
struct mlx5_ib_mr *mr = to_mmr(ibmr);
return dereg_mr(dev, mr);
}
struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type, enum ib_mr_type mr_type,
u32 max_num_sg) u32 max_num_sg)

View File

@ -3232,7 +3232,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
mr->ibmr.iova); mr->ibmr.iova);
set_wqe_32bit_value(wqe->wqe_words, set_wqe_32bit_value(wqe->wqe_words,
NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX, NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
mr->ibmr.length); lower_32_bits(mr->ibmr.length));
set_wqe_32bit_value(wqe->wqe_words, set_wqe_32bit_value(wqe->wqe_words,
NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0); NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0);
set_wqe_32bit_value(wqe->wqe_words, set_wqe_32bit_value(wqe->wqe_words,
@ -3274,7 +3274,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
mr->npages * 8); mr->npages * 8);
nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, " nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, "
"length: %d, rkey: %0x, pgl_paddr: %llx, " "length: %lld, rkey: %0x, pgl_paddr: %llx, "
"page_list_len: %u, wqe_misc: %x\n", "page_list_len: %u, wqe_misc: %x\n",
(unsigned long long) mr->ibmr.iova, (unsigned long long) mr->ibmr.iova,
mr->ibmr.length, mr->ibmr.length,

View File

@ -1000,19 +1000,6 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
*/ */
priv->dev->broadcast[8] = priv->pkey >> 8; priv->dev->broadcast[8] = priv->pkey >> 8;
priv->dev->broadcast[9] = priv->pkey & 0xff; priv->dev->broadcast[9] = priv->pkey & 0xff;
/*
* Update the broadcast address in the priv->broadcast object,
* in case it already exists, otherwise no one will do that.
*/
if (priv->broadcast) {
spin_lock_irq(&priv->lock);
memcpy(priv->broadcast->mcmember.mgid.raw,
priv->dev->broadcast + 4,
sizeof(union ib_gid));
spin_unlock_irq(&priv->lock);
}
return 0; return 0;
} }

View File

@ -2180,6 +2180,7 @@ static struct net_device *ipoib_add_port(const char *format,
{ {
struct ipoib_dev_priv *priv; struct ipoib_dev_priv *priv;
struct ib_port_attr attr; struct ib_port_attr attr;
struct rdma_netdev *rn;
int result = -ENOMEM; int result = -ENOMEM;
priv = ipoib_intf_alloc(hca, port, format); priv = ipoib_intf_alloc(hca, port, format);
@ -2279,7 +2280,8 @@ static struct net_device *ipoib_add_port(const char *format,
ipoib_dev_cleanup(priv->dev); ipoib_dev_cleanup(priv->dev);
device_init_failed: device_init_failed:
free_netdev(priv->dev); rn = netdev_priv(priv->dev);
rn->free_rdma_netdev(priv->dev);
kfree(priv); kfree(priv);
alloc_mem_failed: alloc_mem_failed:
@ -2328,7 +2330,7 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
return; return;
list_for_each_entry_safe(priv, tmp, dev_list, list) { list_for_each_entry_safe(priv, tmp, dev_list, list) {
struct rdma_netdev *rn = netdev_priv(priv->dev); struct rdma_netdev *parent_rn = netdev_priv(priv->dev);
ib_unregister_event_handler(&priv->event_handler); ib_unregister_event_handler(&priv->event_handler);
flush_workqueue(ipoib_workqueue); flush_workqueue(ipoib_workqueue);
@ -2350,10 +2352,15 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
unregister_netdev(priv->dev); unregister_netdev(priv->dev);
mutex_unlock(&priv->sysfs_mutex); mutex_unlock(&priv->sysfs_mutex);
rn->free_rdma_netdev(priv->dev); parent_rn->free_rdma_netdev(priv->dev);
list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
struct rdma_netdev *child_rn;
child_rn = netdev_priv(cpriv->dev);
child_rn->free_rdma_netdev(cpriv->dev);
kfree(cpriv); kfree(cpriv);
}
kfree(priv); kfree(priv);
} }

View File

@ -141,14 +141,17 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
return restart_syscall(); return restart_syscall();
} }
priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name); if (!down_write_trylock(&ppriv->vlan_rwsem)) {
if (!priv) {
rtnl_unlock(); rtnl_unlock();
mutex_unlock(&ppriv->sysfs_mutex); mutex_unlock(&ppriv->sysfs_mutex);
return -ENOMEM; return restart_syscall();
} }
down_write(&ppriv->vlan_rwsem); priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
if (!priv) {
result = -ENOMEM;
goto out;
}
/* /*
* First ensure this isn't a duplicate. We check the parent device and * First ensure this isn't a duplicate. We check the parent device and
@ -175,8 +178,11 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
rtnl_unlock(); rtnl_unlock();
mutex_unlock(&ppriv->sysfs_mutex); mutex_unlock(&ppriv->sysfs_mutex);
if (result) { if (result && priv) {
free_netdev(priv->dev); struct rdma_netdev *rn;
rn = netdev_priv(priv->dev);
rn->free_rdma_netdev(priv->dev);
kfree(priv); kfree(priv);
} }
@ -204,7 +210,12 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
return restart_syscall(); return restart_syscall();
} }
down_write(&ppriv->vlan_rwsem); if (!down_write_trylock(&ppriv->vlan_rwsem)) {
rtnl_unlock();
mutex_unlock(&ppriv->sysfs_mutex);
return restart_syscall();
}
list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
if (priv->pkey == pkey && if (priv->pkey == pkey &&
priv->child_type == IPOIB_LEGACY_CHILD) { priv->child_type == IPOIB_LEGACY_CHILD) {
@ -224,7 +235,10 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
mutex_unlock(&ppriv->sysfs_mutex); mutex_unlock(&ppriv->sysfs_mutex);
if (dev) { if (dev) {
free_netdev(dev); struct rdma_netdev *rn;
rn = netdev_priv(dev);
rn->free_rdma_netdev(priv->dev);
kfree(priv); kfree(priv);
return 0; return 0;
} }

View File

@ -154,7 +154,7 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec)
{ {
int i; int i;
iser_err("page vec npages %d data length %d\n", iser_err("page vec npages %d data length %lld\n",
page_vec->npages, page_vec->fake_mr.length); page_vec->npages, page_vec->fake_mr.length);
for (i = 0; i < page_vec->npages; i++) for (i = 0; i < page_vec->npages; i++)
iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]); iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]);

View File

@ -285,7 +285,7 @@ enum ib_tm_cap_flags {
IB_TM_CAP_RC = 1 << 0, IB_TM_CAP_RC = 1 << 0,
}; };
struct ib_xrq_caps { struct ib_tm_caps {
/* Max size of RNDV header */ /* Max size of RNDV header */
u32 max_rndv_hdr_size; u32 max_rndv_hdr_size;
/* Max number of entries in tag matching list */ /* Max number of entries in tag matching list */
@ -358,7 +358,7 @@ struct ib_device_attr {
struct ib_rss_caps rss_caps; struct ib_rss_caps rss_caps;
u32 max_wq_type_rq; u32 max_wq_type_rq;
u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */ u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */
struct ib_xrq_caps xrq_caps; struct ib_tm_caps tm_caps;
}; };
enum ib_mtu { enum ib_mtu {
@ -1739,7 +1739,7 @@ struct ib_mr {
u32 lkey; u32 lkey;
u32 rkey; u32 rkey;
u64 iova; u64 iova;
u32 length; u64 length;
unsigned int page_size; unsigned int page_size;
bool need_inval; bool need_inval;
union { union {

View File

@ -261,7 +261,7 @@ struct ib_uverbs_ex_query_device_resp {
struct ib_uverbs_rss_caps rss_caps; struct ib_uverbs_rss_caps rss_caps;
__u32 max_wq_type_rq; __u32 max_wq_type_rq;
__u32 raw_packet_caps; __u32 raw_packet_caps;
struct ib_uverbs_tm_caps xrq_caps; struct ib_uverbs_tm_caps tm_caps;
}; };
struct ib_uverbs_query_port { struct ib_uverbs_query_port {

View File

@ -401,7 +401,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
if (unlikely(n != mw->mw_nents)) if (unlikely(n != mw->mw_nents))
goto out_mapmr_err; goto out_mapmr_err;
dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n", dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n",
__func__, frmr, mw->mw_nents, mr->length); __func__, frmr, mw->mw_nents, mr->length);
key = (u8)(mr->rkey & 0x000000FF); key = (u8)(mr->rkey & 0x000000FF);