2018-03-23 01:08:48 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
2018-04-26 23:08:09 +08:00
|
|
|
/* Copyright(c) 2013 - 2018 Intel Corporation. */
|
2014-09-21 07:46:05 +08:00
|
|
|
|
|
|
|
#include <linux/module.h>
|
2017-03-29 23:17:31 +08:00
|
|
|
#include <linux/interrupt.h>
|
2014-09-21 07:50:27 +08:00
|
|
|
#include <linux/aer.h>
|
2014-09-21 07:46:05 +08:00
|
|
|
|
|
|
|
#include "fm10k.h"
|
|
|
|
|
2014-09-21 07:48:10 +08:00
|
|
|
static const struct fm10k_info *fm10k_info_tbl[] = {
|
|
|
|
[fm10k_device_pf] = &fm10k_pf_info,
|
2014-09-21 07:51:40 +08:00
|
|
|
[fm10k_device_vf] = &fm10k_vf_info,
|
2014-09-21 07:48:10 +08:00
|
|
|
};
|
|
|
|
|
2018-01-17 03:20:52 +08:00
|
|
|
/*
|
2014-09-21 07:46:05 +08:00
|
|
|
* fm10k_pci_tbl - PCI Device ID Table
|
|
|
|
*
|
|
|
|
* Wildcard entries (PCI_ANY_ID) should come last
|
|
|
|
* Last entry must be all 0s
|
|
|
|
*
|
|
|
|
* { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
|
|
|
|
* Class, Class Mask, private data (not used) }
|
|
|
|
*/
|
|
|
|
static const struct pci_device_id fm10k_pci_tbl[] = {
|
2014-09-21 07:48:10 +08:00
|
|
|
{ PCI_VDEVICE(INTEL, FM10K_DEV_ID_PF), fm10k_device_pf },
|
2018-10-16 03:18:29 +08:00
|
|
|
{ PCI_VDEVICE(INTEL, FM10K_DEV_ID_SDI_FM10420_QDA2), fm10k_device_pf },
|
|
|
|
{ PCI_VDEVICE(INTEL, FM10K_DEV_ID_SDI_FM10420_DA2), fm10k_device_pf },
|
2014-09-21 07:51:40 +08:00
|
|
|
{ PCI_VDEVICE(INTEL, FM10K_DEV_ID_VF), fm10k_device_vf },
|
2014-09-21 07:46:05 +08:00
|
|
|
/* required last entry */
|
|
|
|
{ 0, }
|
|
|
|
};
|
|
|
|
MODULE_DEVICE_TABLE(pci, fm10k_pci_tbl);
|
|
|
|
|
2014-09-21 07:46:45 +08:00
|
|
|
u16 fm10k_read_pci_cfg_word(struct fm10k_hw *hw, u32 reg)
|
|
|
|
{
|
|
|
|
struct fm10k_intfc *interface = hw->back;
|
|
|
|
u16 value = 0;
|
|
|
|
|
|
|
|
if (FM10K_REMOVED(hw->hw_addr))
|
|
|
|
return ~value;
|
|
|
|
|
|
|
|
pci_read_config_word(interface->pdev, reg, &value);
|
|
|
|
if (value == 0xFFFF)
|
|
|
|
fm10k_write_flush(hw);
|
|
|
|
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
|
|
|
|
u32 fm10k_read_reg(struct fm10k_hw *hw, int reg)
|
|
|
|
{
|
2016-06-18 07:21:11 +08:00
|
|
|
u32 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
|
2014-09-21 07:46:45 +08:00
|
|
|
u32 value = 0;
|
|
|
|
|
|
|
|
if (FM10K_REMOVED(hw_addr))
|
|
|
|
return ~value;
|
|
|
|
|
|
|
|
value = readl(&hw_addr[reg]);
|
2014-09-21 07:48:10 +08:00
|
|
|
if (!(~value) && (!reg || !(~readl(hw_addr)))) {
|
|
|
|
struct fm10k_intfc *interface = hw->back;
|
|
|
|
struct net_device *netdev = interface->netdev;
|
|
|
|
|
2014-09-21 07:46:45 +08:00
|
|
|
hw->hw_addr = NULL;
|
2014-09-21 07:48:10 +08:00
|
|
|
netif_device_detach(netdev);
|
|
|
|
netdev_err(netdev, "PCIe link lost, device now detached\n");
|
|
|
|
}
|
2014-09-21 07:46:45 +08:00
|
|
|
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:48:10 +08:00
|
|
|
static int fm10k_hw_ready(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
|
|
|
|
fm10k_write_flush(hw);
|
|
|
|
|
|
|
|
return FM10K_REMOVED(hw->hw_addr) ? -ENODEV : 0;
|
|
|
|
}
|
|
|
|
|
fm10k: introduce a message queue for MAC/VLAN messages
Under some circumstances, when dealing with a large number of MAC
address or VLAN updates at once, the fm10k driver, particularly the VFs
can overload the mailbox with too many messages at once.
This results in a mailbox timeout, which causes the driver to initiate
a reset. During the reset, we re-send all the same messages that
originally caused the timeout. This results in a cycle of resets each
triggering a future reset.
To fix or avoid this, we introduce a workqueue item which monitors
a queue of MAC and VLAN requests. These requests are queued to the end
of the list, and we process as a FIFO periodically.
Initially we only handle requests for the netdev, but we do handle
unicast MAC addresses, multicast MAC addresses, and update VLAN
requests.
A future patch will add support to use this queue for handling MAC
update requests from the VF<->PF mailbox.
The MAC/VLAN work item will keep checking to make sure that each request
does not overflow the mailbox and cause a timeout. If it might, then the
work item will reschedule itself a short time later. This avoids any
reset cycle, since we never send the message if the mailbox is not
ready.
As an alternative, we tried increasing the mailbox message FIFO, but
this just delays the problem and results in needless memory waste on the
system. Our new message queue is dynamically allocated so only uses as
much memory as it needs. Additionally, it need not be contiguous like
the Tx and Rx FIFOs.
Note that this patch chose to only create a queue for MAC and VLAN
messages, since these are the only messages sent in a large enough
volume to cause the reset loop. Other messages are very unlikely to
overflow the mailbox Tx FIFO so easily.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:17 +08:00
|
|
|
/**
|
|
|
|
* fm10k_macvlan_schedule - Schedule MAC/VLAN queue task
|
|
|
|
* @interface: fm10k private interface structure
|
|
|
|
*
|
|
|
|
* Schedule the MAC/VLAN queue monitor task. If the MAC/VLAN task cannot be
|
|
|
|
* started immediately, request that it be restarted when possible.
|
|
|
|
*/
|
|
|
|
void fm10k_macvlan_schedule(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
/* Avoid processing the MAC/VLAN queue when the service task is
|
|
|
|
* disabled, or when we're resetting the device.
|
|
|
|
*/
|
|
|
|
if (!test_bit(__FM10K_MACVLAN_DISABLE, interface->state) &&
|
|
|
|
!test_and_set_bit(__FM10K_MACVLAN_SCHED, interface->state)) {
|
|
|
|
clear_bit(__FM10K_MACVLAN_REQUEST, interface->state);
|
|
|
|
/* We delay the actual start of execution in order to allow
|
|
|
|
* multiple MAC/VLAN updates to accumulate before handling
|
|
|
|
* them, and to allow some time to let the mailbox drain
|
|
|
|
* between runs.
|
|
|
|
*/
|
|
|
|
queue_delayed_work(fm10k_workqueue,
|
|
|
|
&interface->macvlan_task, 10);
|
|
|
|
} else {
|
|
|
|
set_bit(__FM10K_MACVLAN_REQUEST, interface->state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_stop_macvlan_task - Stop the MAC/VLAN queue monitor
|
|
|
|
* @interface: fm10k private interface structure
|
|
|
|
*
|
|
|
|
* Wait until the MAC/VLAN queue task has stopped, and cancel any future
|
|
|
|
* requests.
|
|
|
|
*/
|
|
|
|
static void fm10k_stop_macvlan_task(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
/* Disable the MAC/VLAN work item */
|
|
|
|
set_bit(__FM10K_MACVLAN_DISABLE, interface->state);
|
|
|
|
|
|
|
|
/* Make sure we waited until any current invocations have stopped */
|
|
|
|
cancel_delayed_work_sync(&interface->macvlan_task);
|
|
|
|
|
|
|
|
/* We set the __FM10K_MACVLAN_SCHED bit when we schedule the task.
|
|
|
|
* However, it may not be unset of the MAC/VLAN task never actually
|
|
|
|
* got a chance to run. Since we've canceled the task here, and it
|
|
|
|
* cannot be rescheuled right now, we need to ensure the scheduled bit
|
|
|
|
* gets unset.
|
|
|
|
*/
|
|
|
|
clear_bit(__FM10K_MACVLAN_SCHED, interface->state);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_resume_macvlan_task - Restart the MAC/VLAN queue monitor
|
|
|
|
* @interface: fm10k private interface structure
|
|
|
|
*
|
|
|
|
* Clear the __FM10K_MACVLAN_DISABLE bit and, if a request occurred, schedule
|
|
|
|
* the MAC/VLAN work monitor.
|
|
|
|
*/
|
|
|
|
static void fm10k_resume_macvlan_task(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
/* Re-enable the MAC/VLAN work item */
|
|
|
|
clear_bit(__FM10K_MACVLAN_DISABLE, interface->state);
|
|
|
|
|
|
|
|
/* We might have received a MAC/VLAN request while disabled. If so,
|
|
|
|
* kick off the queue now.
|
|
|
|
*/
|
|
|
|
if (test_bit(__FM10K_MACVLAN_REQUEST, interface->state))
|
|
|
|
fm10k_macvlan_schedule(interface);
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:49:25 +08:00
|
|
|
void fm10k_service_event_schedule(struct fm10k_intfc *interface)
|
|
|
|
{
|
2017-01-13 07:59:39 +08:00
|
|
|
if (!test_bit(__FM10K_SERVICE_DISABLE, interface->state) &&
|
fm10k: allow service task to reschedule itself
If some code path executes fm10k_service_event_schedule(), it is
guaranteed that we only queue the service task once, since we use
__FM10K_SERVICE_SCHED flag. Unfortunately this has a side effect that if
a service request occurs while we are currently running the watchdog, it
is possible that we will fail to notice the request and ignore it until
the next time the request occurs.
This can cause problems with pf/vf mailbox communication and other
service event tasks. To avoid this, introduce a FM10K_SERVICE_REQUEST
bit. When we successfully schedule (and set the _SCHED bit) the service
task, we will clear this bit. However, if we are unable to currently
schedule the service event, we just set the new SERVICE_REQUEST bit.
Finally, after the service event completes, we will re-schedule if the
request bit has been set.
This should ensure that we do not miss any service event schedules,
since we will re-schedule it once the currently running task finishes.
This means that for each request, we will always schedule the service
task to run at least once in full after the request came in.
This will avoid timing issues that can occur with the service event
scheduling. We do pay a cost in re-running many tasks, but all the
service event tasks use either flags to avoid duplicate work, or are
tolerant of being run multiple times.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-01-13 07:59:40 +08:00
|
|
|
!test_and_set_bit(__FM10K_SERVICE_SCHED, interface->state)) {
|
|
|
|
clear_bit(__FM10K_SERVICE_REQUEST, interface->state);
|
2015-04-04 04:27:05 +08:00
|
|
|
queue_work(fm10k_workqueue, &interface->service_task);
|
fm10k: allow service task to reschedule itself
If some code path executes fm10k_service_event_schedule(), it is
guaranteed that we only queue the service task once, since we use
__FM10K_SERVICE_SCHED flag. Unfortunately this has a side effect that if
a service request occurs while we are currently running the watchdog, it
is possible that we will fail to notice the request and ignore it until
the next time the request occurs.
This can cause problems with pf/vf mailbox communication and other
service event tasks. To avoid this, introduce a FM10K_SERVICE_REQUEST
bit. When we successfully schedule (and set the _SCHED bit) the service
task, we will clear this bit. However, if we are unable to currently
schedule the service event, we just set the new SERVICE_REQUEST bit.
Finally, after the service event completes, we will re-schedule if the
request bit has been set.
This should ensure that we do not miss any service event schedules,
since we will re-schedule it once the currently running task finishes.
This means that for each request, we will always schedule the service
task to run at least once in full after the request came in.
This will avoid timing issues that can occur with the service event
scheduling. We do pay a cost in re-running many tasks, but all the
service event tasks use either flags to avoid duplicate work, or are
tolerant of being run multiple times.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-01-13 07:59:40 +08:00
|
|
|
} else {
|
|
|
|
set_bit(__FM10K_SERVICE_REQUEST, interface->state);
|
|
|
|
}
|
2014-09-21 07:49:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void fm10k_service_event_complete(struct fm10k_intfc *interface)
|
|
|
|
{
|
2017-01-13 07:59:39 +08:00
|
|
|
WARN_ON(!test_bit(__FM10K_SERVICE_SCHED, interface->state));
|
2014-09-21 07:49:25 +08:00
|
|
|
|
|
|
|
/* flush memory to make sure state is correct before next watchog */
|
|
|
|
smp_mb__before_atomic();
|
2017-01-13 07:59:39 +08:00
|
|
|
clear_bit(__FM10K_SERVICE_SCHED, interface->state);
|
fm10k: allow service task to reschedule itself
If some code path executes fm10k_service_event_schedule(), it is
guaranteed that we only queue the service task once, since we use
__FM10K_SERVICE_SCHED flag. Unfortunately this has a side effect that if
a service request occurs while we are currently running the watchdog, it
is possible that we will fail to notice the request and ignore it until
the next time the request occurs.
This can cause problems with pf/vf mailbox communication and other
service event tasks. To avoid this, introduce a FM10K_SERVICE_REQUEST
bit. When we successfully schedule (and set the _SCHED bit) the service
task, we will clear this bit. However, if we are unable to currently
schedule the service event, we just set the new SERVICE_REQUEST bit.
Finally, after the service event completes, we will re-schedule if the
request bit has been set.
This should ensure that we do not miss any service event schedules,
since we will re-schedule it once the currently running task finishes.
This means that for each request, we will always schedule the service
task to run at least once in full after the request came in.
This will avoid timing issues that can occur with the service event
scheduling. We do pay a cost in re-running many tasks, but all the
service event tasks use either flags to avoid duplicate work, or are
tolerant of being run multiple times.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-01-13 07:59:40 +08:00
|
|
|
|
|
|
|
/* If a service event was requested since we started, immediately
|
|
|
|
* re-schedule now. This ensures we don't drop a request until the
|
|
|
|
* next timer event.
|
|
|
|
*/
|
|
|
|
if (test_bit(__FM10K_SERVICE_REQUEST, interface->state))
|
|
|
|
fm10k_service_event_schedule(interface);
|
2014-09-21 07:49:25 +08:00
|
|
|
}
|
|
|
|
|
2017-07-11 04:23:13 +08:00
|
|
|
static void fm10k_stop_service_event(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
set_bit(__FM10K_SERVICE_DISABLE, interface->state);
|
|
|
|
cancel_work_sync(&interface->service_task);
|
|
|
|
|
|
|
|
/* It's possible that cancel_work_sync stopped the service task from
|
|
|
|
* running before it could actually start. In this case the
|
|
|
|
* __FM10K_SERVICE_SCHED bit will never be cleared. Since we know that
|
|
|
|
* the service task cannot be running at this point, we need to clear
|
|
|
|
* the scheduled bit, as otherwise the service task may never be
|
|
|
|
* restarted.
|
|
|
|
*/
|
|
|
|
clear_bit(__FM10K_SERVICE_SCHED, interface->state);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void fm10k_start_service_event(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
clear_bit(__FM10K_SERVICE_DISABLE, interface->state);
|
|
|
|
fm10k_service_event_schedule(interface);
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:49:25 +08:00
|
|
|
/**
|
|
|
|
* fm10k_service_timer - Timer Call-back
|
2018-01-17 03:20:51 +08:00
|
|
|
* @t: pointer to timer data
|
2014-09-21 07:49:25 +08:00
|
|
|
**/
|
2017-10-17 08:29:35 +08:00
|
|
|
static void fm10k_service_timer(struct timer_list *t)
|
2014-09-21 07:49:25 +08:00
|
|
|
{
|
2017-10-17 08:29:35 +08:00
|
|
|
struct fm10k_intfc *interface = from_timer(interface, t,
|
|
|
|
service_timer);
|
2014-09-21 07:49:25 +08:00
|
|
|
|
|
|
|
/* Reset the timer */
|
|
|
|
mod_timer(&interface->service_timer, (HZ * 2) + jiffies);
|
|
|
|
|
|
|
|
fm10k_service_event_schedule(interface);
|
|
|
|
}
|
|
|
|
|
fm10k: prepare_for_reset() when we lose PCIe Link
If we lose PCIe link, such as when an unannounced PFLR event occurs, or
when a device is surprise removed, we currently detach the device and
close the netdev. This unfortunately leaves a lot of things still
active, such as the msix_mbx_pf IRQ, and Tx/Rx resources.
This can cause problems because the register reads will return
potentially invalid values which may result in unknown driver behavior.
Begin the process of resetting using fm10k_prepare_for_reset(), much in
the same way as the suspend and resume cycle does. This will attempt to
shutdown as much as possible, in order to prevent possible issues.
A naive implementation for this has issues, because there are now
multiple flows calling the reset logic and setting a reset bit. This
would cause problems, because the "re-attach" routine might call
fm10k_handle_reset() prior to the reset actually finishing. Instead,
we'll add state bits to indicate which flow actually initiated the
reset.
For the general reset flow, we'll assume that if someone else is
resetting that we do not need to handle it at all, so it does not need
its own state bit. For the suspend case, we will simply issue a warning
indicating that we are attempting to recover from this case when
resuming.
For the detached subtask, we'll simply refuse to re-attach until we've
actually initiated a reset as part of that flow.
Finally, we'll stop attempting to manage the mailbox subtask when we're
detached, since there's nothing we can do if we don't have a PCIe
address.
Overall this produces a much cleaner shutdown and recovery cycle for
a PCIe surprise remove event.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:14 +08:00
|
|
|
/**
|
|
|
|
* fm10k_prepare_for_reset - Prepare the driver and device for a pending reset
|
|
|
|
* @interface: fm10k private data structure
|
|
|
|
*
|
|
|
|
* This function prepares for a device reset by shutting as much down as we
|
|
|
|
* can. It does nothing and returns false if __FM10K_RESETTING was already set
|
|
|
|
* prior to calling this function. It returns true if it actually did work.
|
|
|
|
*/
|
|
|
|
static bool fm10k_prepare_for_reset(struct fm10k_intfc *interface)
|
2014-09-21 07:49:25 +08:00
|
|
|
{
|
|
|
|
struct net_device *netdev = interface->netdev;
|
|
|
|
|
|
|
|
WARN_ON(in_interrupt());
|
|
|
|
|
|
|
|
/* put off any impending NetWatchDogTimeout */
|
2016-05-03 22:33:13 +08:00
|
|
|
netif_trans_update(netdev);
|
2014-09-21 07:49:25 +08:00
|
|
|
|
fm10k: prepare_for_reset() when we lose PCIe Link
If we lose PCIe link, such as when an unannounced PFLR event occurs, or
when a device is surprise removed, we currently detach the device and
close the netdev. This unfortunately leaves a lot of things still
active, such as the msix_mbx_pf IRQ, and Tx/Rx resources.
This can cause problems because the register reads will return
potentially invalid values which may result in unknown driver behavior.
Begin the process of resetting using fm10k_prepare_for_reset(), much in
the same way as the suspend and resume cycle does. This will attempt to
shutdown as much as possible, in order to prevent possible issues.
A naive implementation for this has issues, because there are now
multiple flows calling the reset logic and setting a reset bit. This
would cause problems, because the "re-attach" routine might call
fm10k_handle_reset() prior to the reset actually finishing. Instead,
we'll add state bits to indicate which flow actually initiated the
reset.
For the general reset flow, we'll assume that if someone else is
resetting that we do not need to handle it at all, so it does not need
its own state bit. For the suspend case, we will simply issue a warning
indicating that we are attempting to recover from this case when
resuming.
For the detached subtask, we'll simply refuse to re-attach until we've
actually initiated a reset as part of that flow.
Finally, we'll stop attempting to manage the mailbox subtask when we're
detached, since there's nothing we can do if we don't have a PCIe
address.
Overall this produces a much cleaner shutdown and recovery cycle for
a PCIe surprise remove event.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:14 +08:00
|
|
|
/* Nothing to do if a reset is already in progress */
|
|
|
|
if (test_and_set_bit(__FM10K_RESETTING, interface->state))
|
|
|
|
return false;
|
2014-09-21 07:49:25 +08:00
|
|
|
|
fm10k: introduce a message queue for MAC/VLAN messages
Under some circumstances, when dealing with a large number of MAC
address or VLAN updates at once, the fm10k driver, particularly the VFs
can overload the mailbox with too many messages at once.
This results in a mailbox timeout, which causes the driver to initiate
a reset. During the reset, we re-send all the same messages that
originally caused the timeout. This results in a cycle of resets each
triggering a future reset.
To fix or avoid this, we introduce a workqueue item which monitors
a queue of MAC and VLAN requests. These requests are queued to the end
of the list, and we process as a FIFO periodically.
Initially we only handle requests for the netdev, but we do handle
unicast MAC addresses, multicast MAC addresses, and update VLAN
requests.
A future patch will add support to use this queue for handling MAC
update requests from the VF<->PF mailbox.
The MAC/VLAN work item will keep checking to make sure that each request
does not overflow the mailbox and cause a timeout. If it might, then the
work item will reschedule itself a short time later. This avoids any
reset cycle, since we never send the message if the mailbox is not
ready.
As an alternative, we tried increasing the mailbox message FIFO, but
this just delays the problem and results in needless memory waste on the
system. Our new message queue is dynamically allocated so only uses as
much memory as it needs. Additionally, it need not be contiguous like
the Tx and Rx FIFOs.
Note that this patch chose to only create a queue for MAC and VLAN
messages, since these are the only messages sent in a large enough
volume to cause the reset loop. Other messages are very unlikely to
overflow the mailbox Tx FIFO so easily.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:17 +08:00
|
|
|
/* As the MAC/VLAN task will be accessing registers it must not be
|
|
|
|
* running while we reset. Although the task will not be scheduled
|
|
|
|
* once we start resetting it may already be running
|
|
|
|
*/
|
|
|
|
fm10k_stop_macvlan_task(interface);
|
|
|
|
|
2014-09-21 07:49:25 +08:00
|
|
|
rtnl_lock();
|
|
|
|
|
2014-09-21 07:52:09 +08:00
|
|
|
fm10k_iov_suspend(interface->pdev);
|
|
|
|
|
2014-09-21 07:49:25 +08:00
|
|
|
if (netif_running(netdev))
|
|
|
|
fm10k_close(netdev);
|
|
|
|
|
|
|
|
fm10k_mbx_free_irq(interface);
|
|
|
|
|
2015-10-17 01:56:59 +08:00
|
|
|
/* free interrupts */
|
|
|
|
fm10k_clear_queueing_scheme(interface);
|
|
|
|
|
2014-09-21 07:49:25 +08:00
|
|
|
/* delay any future reset requests */
|
|
|
|
interface->last_reset = jiffies + (10 * HZ);
|
|
|
|
|
2016-06-08 07:08:52 +08:00
|
|
|
rtnl_unlock();
|
fm10k: prepare_for_reset() when we lose PCIe Link
If we lose PCIe link, such as when an unannounced PFLR event occurs, or
when a device is surprise removed, we currently detach the device and
close the netdev. This unfortunately leaves a lot of things still
active, such as the msix_mbx_pf IRQ, and Tx/Rx resources.
This can cause problems because the register reads will return
potentially invalid values which may result in unknown driver behavior.
Begin the process of resetting using fm10k_prepare_for_reset(), much in
the same way as the suspend and resume cycle does. This will attempt to
shutdown as much as possible, in order to prevent possible issues.
A naive implementation for this has issues, because there are now
multiple flows calling the reset logic and setting a reset bit. This
would cause problems, because the "re-attach" routine might call
fm10k_handle_reset() prior to the reset actually finishing. Instead,
we'll add state bits to indicate which flow actually initiated the
reset.
For the general reset flow, we'll assume that if someone else is
resetting that we do not need to handle it at all, so it does not need
its own state bit. For the suspend case, we will simply issue a warning
indicating that we are attempting to recover from this case when
resuming.
For the detached subtask, we'll simply refuse to re-attach until we've
actually initiated a reset as part of that flow.
Finally, we'll stop attempting to manage the mailbox subtask when we're
detached, since there's nothing we can do if we don't have a PCIe
address.
Overall this produces a much cleaner shutdown and recovery cycle for
a PCIe surprise remove event.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:14 +08:00
|
|
|
|
|
|
|
return true;
|
2016-06-08 07:08:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int fm10k_handle_reset(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct net_device *netdev = interface->netdev;
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
int err;
|
|
|
|
|
fm10k: prepare_for_reset() when we lose PCIe Link
If we lose PCIe link, such as when an unannounced PFLR event occurs, or
when a device is surprise removed, we currently detach the device and
close the netdev. This unfortunately leaves a lot of things still
active, such as the msix_mbx_pf IRQ, and Tx/Rx resources.
This can cause problems because the register reads will return
potentially invalid values which may result in unknown driver behavior.
Begin the process of resetting using fm10k_prepare_for_reset(), much in
the same way as the suspend and resume cycle does. This will attempt to
shutdown as much as possible, in order to prevent possible issues.
A naive implementation for this has issues, because there are now
multiple flows calling the reset logic and setting a reset bit. This
would cause problems, because the "re-attach" routine might call
fm10k_handle_reset() prior to the reset actually finishing. Instead,
we'll add state bits to indicate which flow actually initiated the
reset.
For the general reset flow, we'll assume that if someone else is
resetting that we do not need to handle it at all, so it does not need
its own state bit. For the suspend case, we will simply issue a warning
indicating that we are attempting to recover from this case when
resuming.
For the detached subtask, we'll simply refuse to re-attach until we've
actually initiated a reset as part of that flow.
Finally, we'll stop attempting to manage the mailbox subtask when we're
detached, since there's nothing we can do if we don't have a PCIe
address.
Overall this produces a much cleaner shutdown and recovery cycle for
a PCIe surprise remove event.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:14 +08:00
|
|
|
WARN_ON(!test_bit(__FM10K_RESETTING, interface->state));
|
|
|
|
|
2016-06-08 07:08:52 +08:00
|
|
|
rtnl_lock();
|
|
|
|
|
2016-06-08 07:08:57 +08:00
|
|
|
pci_set_master(interface->pdev);
|
|
|
|
|
2014-09-21 07:49:25 +08:00
|
|
|
/* reset and initialize the hardware so it is in a known state */
|
2015-10-17 01:56:58 +08:00
|
|
|
err = hw->mac.ops.reset_hw(hw);
|
|
|
|
if (err) {
|
|
|
|
dev_err(&interface->pdev->dev, "reset_hw failed: %d\n", err);
|
|
|
|
goto reinit_err;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = hw->mac.ops.init_hw(hw);
|
|
|
|
if (err) {
|
2014-09-21 07:49:25 +08:00
|
|
|
dev_err(&interface->pdev->dev, "init_hw failed: %d\n", err);
|
2015-10-17 01:56:58 +08:00
|
|
|
goto reinit_err;
|
|
|
|
}
|
2014-09-21 07:49:25 +08:00
|
|
|
|
2015-10-17 01:56:59 +08:00
|
|
|
err = fm10k_init_queueing_scheme(interface);
|
|
|
|
if (err) {
|
2015-10-29 08:19:56 +08:00
|
|
|
dev_err(&interface->pdev->dev,
|
|
|
|
"init_queueing_scheme failed: %d\n", err);
|
2015-10-17 01:56:59 +08:00
|
|
|
goto reinit_err;
|
|
|
|
}
|
|
|
|
|
2016-06-08 07:08:52 +08:00
|
|
|
/* re-associate interrupts */
|
2015-11-11 01:40:30 +08:00
|
|
|
err = fm10k_mbx_request_irq(interface);
|
|
|
|
if (err)
|
|
|
|
goto err_mbx_irq;
|
|
|
|
|
|
|
|
err = fm10k_hw_ready(interface);
|
|
|
|
if (err)
|
|
|
|
goto err_open;
|
2014-09-21 07:49:25 +08:00
|
|
|
|
2015-06-16 06:00:56 +08:00
|
|
|
/* update hardware address for VFs if perm_addr has changed */
|
|
|
|
if (hw->mac.type == fm10k_mac_vf) {
|
|
|
|
if (is_valid_ether_addr(hw->mac.perm_addr)) {
|
|
|
|
ether_addr_copy(hw->mac.addr, hw->mac.perm_addr);
|
|
|
|
ether_addr_copy(netdev->perm_addr, hw->mac.perm_addr);
|
|
|
|
ether_addr_copy(netdev->dev_addr, hw->mac.perm_addr);
|
|
|
|
netdev->addr_assign_type &= ~NET_ADDR_RANDOM;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hw->mac.vlan_override)
|
|
|
|
netdev->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
|
|
|
|
else
|
|
|
|
netdev->features |= NETIF_F_HW_VLAN_CTAG_RX;
|
|
|
|
}
|
|
|
|
|
2015-11-11 01:40:30 +08:00
|
|
|
err = netif_running(netdev) ? fm10k_open(netdev) : 0;
|
|
|
|
if (err)
|
|
|
|
goto err_open;
|
2014-09-21 07:49:25 +08:00
|
|
|
|
2014-09-21 07:52:09 +08:00
|
|
|
fm10k_iov_resume(interface->pdev);
|
|
|
|
|
2015-11-11 01:40:30 +08:00
|
|
|
rtnl_unlock();
|
|
|
|
|
fm10k: introduce a message queue for MAC/VLAN messages
Under some circumstances, when dealing with a large number of MAC
address or VLAN updates at once, the fm10k driver, particularly the VFs
can overload the mailbox with too many messages at once.
This results in a mailbox timeout, which causes the driver to initiate
a reset. During the reset, we re-send all the same messages that
originally caused the timeout. This results in a cycle of resets each
triggering a future reset.
To fix or avoid this, we introduce a workqueue item which monitors
a queue of MAC and VLAN requests. These requests are queued to the end
of the list, and we process as a FIFO periodically.
Initially we only handle requests for the netdev, but we do handle
unicast MAC addresses, multicast MAC addresses, and update VLAN
requests.
A future patch will add support to use this queue for handling MAC
update requests from the VF<->PF mailbox.
The MAC/VLAN work item will keep checking to make sure that each request
does not overflow the mailbox and cause a timeout. If it might, then the
work item will reschedule itself a short time later. This avoids any
reset cycle, since we never send the message if the mailbox is not
ready.
As an alternative, we tried increasing the mailbox message FIFO, but
this just delays the problem and results in needless memory waste on the
system. Our new message queue is dynamically allocated so only uses as
much memory as it needs. Additionally, it need not be contiguous like
the Tx and Rx FIFOs.
Note that this patch chose to only create a queue for MAC and VLAN
messages, since these are the only messages sent in a large enough
volume to cause the reset loop. Other messages are very unlikely to
overflow the mailbox Tx FIFO so easily.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:17 +08:00
|
|
|
fm10k_resume_macvlan_task(interface);
|
|
|
|
|
2017-01-13 07:59:39 +08:00
|
|
|
clear_bit(__FM10K_RESETTING, interface->state);
|
2015-11-11 01:40:30 +08:00
|
|
|
|
2016-06-08 07:08:52 +08:00
|
|
|
return err;
|
2015-11-11 01:40:30 +08:00
|
|
|
err_open:
|
|
|
|
fm10k_mbx_free_irq(interface);
|
|
|
|
err_mbx_irq:
|
|
|
|
fm10k_clear_queueing_scheme(interface);
|
2015-10-17 01:56:58 +08:00
|
|
|
reinit_err:
|
2015-11-11 01:40:30 +08:00
|
|
|
netif_device_detach(netdev);
|
2015-10-17 01:56:58 +08:00
|
|
|
|
2014-09-21 07:49:25 +08:00
|
|
|
rtnl_unlock();
|
|
|
|
|
2017-01-13 07:59:39 +08:00
|
|
|
clear_bit(__FM10K_RESETTING, interface->state);
|
2016-06-08 07:08:52 +08:00
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-07-11 04:23:12 +08:00
|
|
|
static void fm10k_detach_subtask(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct net_device *netdev = interface->netdev;
|
|
|
|
u32 __iomem *hw_addr;
|
|
|
|
u32 value;
|
fm10k: prepare_for_reset() when we lose PCIe Link
If we lose PCIe link, such as when an unannounced PFLR event occurs, or
when a device is surprise removed, we currently detach the device and
close the netdev. This unfortunately leaves a lot of things still
active, such as the msix_mbx_pf IRQ, and Tx/Rx resources.
This can cause problems because the register reads will return
potentially invalid values which may result in unknown driver behavior.
Begin the process of resetting using fm10k_prepare_for_reset(), much in
the same way as the suspend and resume cycle does. This will attempt to
shutdown as much as possible, in order to prevent possible issues.
A naive implementation for this has issues, because there are now
multiple flows calling the reset logic and setting a reset bit. This
would cause problems, because the "re-attach" routine might call
fm10k_handle_reset() prior to the reset actually finishing. Instead,
we'll add state bits to indicate which flow actually initiated the
reset.
For the general reset flow, we'll assume that if someone else is
resetting that we do not need to handle it at all, so it does not need
its own state bit. For the suspend case, we will simply issue a warning
indicating that we are attempting to recover from this case when
resuming.
For the detached subtask, we'll simply refuse to re-attach until we've
actually initiated a reset as part of that flow.
Finally, we'll stop attempting to manage the mailbox subtask when we're
detached, since there's nothing we can do if we don't have a PCIe
address.
Overall this produces a much cleaner shutdown and recovery cycle for
a PCIe surprise remove event.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:14 +08:00
|
|
|
int err;
|
2017-07-11 04:23:12 +08:00
|
|
|
|
fm10k: prepare_for_reset() when we lose PCIe Link
If we lose PCIe link, such as when an unannounced PFLR event occurs, or
when a device is surprise removed, we currently detach the device and
close the netdev. This unfortunately leaves a lot of things still
active, such as the msix_mbx_pf IRQ, and Tx/Rx resources.
This can cause problems because the register reads will return
potentially invalid values which may result in unknown driver behavior.
Begin the process of resetting using fm10k_prepare_for_reset(), much in
the same way as the suspend and resume cycle does. This will attempt to
shutdown as much as possible, in order to prevent possible issues.
A naive implementation for this has issues, because there are now
multiple flows calling the reset logic and setting a reset bit. This
would cause problems, because the "re-attach" routine might call
fm10k_handle_reset() prior to the reset actually finishing. Instead,
we'll add state bits to indicate which flow actually initiated the
reset.
For the general reset flow, we'll assume that if someone else is
resetting that we do not need to handle it at all, so it does not need
its own state bit. For the suspend case, we will simply issue a warning
indicating that we are attempting to recover from this case when
resuming.
For the detached subtask, we'll simply refuse to re-attach until we've
actually initiated a reset as part of that flow.
Finally, we'll stop attempting to manage the mailbox subtask when we're
detached, since there's nothing we can do if we don't have a PCIe
address.
Overall this produces a much cleaner shutdown and recovery cycle for
a PCIe surprise remove event.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:14 +08:00
|
|
|
/* do nothing if netdev is still present or hw_addr is set */
|
2017-07-11 04:23:12 +08:00
|
|
|
if (netif_device_present(netdev) || interface->hw.hw_addr)
|
|
|
|
return;
|
|
|
|
|
fm10k: prepare_for_reset() when we lose PCIe Link
If we lose PCIe link, such as when an unannounced PFLR event occurs, or
when a device is surprise removed, we currently detach the device and
close the netdev. This unfortunately leaves a lot of things still
active, such as the msix_mbx_pf IRQ, and Tx/Rx resources.
This can cause problems because the register reads will return
potentially invalid values which may result in unknown driver behavior.
Begin the process of resetting using fm10k_prepare_for_reset(), much in
the same way as the suspend and resume cycle does. This will attempt to
shutdown as much as possible, in order to prevent possible issues.
A naive implementation for this has issues, because there are now
multiple flows calling the reset logic and setting a reset bit. This
would cause problems, because the "re-attach" routine might call
fm10k_handle_reset() prior to the reset actually finishing. Instead,
we'll add state bits to indicate which flow actually initiated the
reset.
For the general reset flow, we'll assume that if someone else is
resetting that we do not need to handle it at all, so it does not need
its own state bit. For the suspend case, we will simply issue a warning
indicating that we are attempting to recover from this case when
resuming.
For the detached subtask, we'll simply refuse to re-attach until we've
actually initiated a reset as part of that flow.
Finally, we'll stop attempting to manage the mailbox subtask when we're
detached, since there's nothing we can do if we don't have a PCIe
address.
Overall this produces a much cleaner shutdown and recovery cycle for
a PCIe surprise remove event.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:14 +08:00
|
|
|
/* We've lost the PCIe register space, and can no longer access the
|
|
|
|
* device. Shut everything except the detach subtask down and prepare
|
|
|
|
* to reset the device in case we recover. If we actually prepare for
|
|
|
|
* reset, indicate that we're detached.
|
|
|
|
*/
|
|
|
|
if (fm10k_prepare_for_reset(interface))
|
|
|
|
set_bit(__FM10K_RESET_DETACHED, interface->state);
|
|
|
|
|
2017-07-11 04:23:12 +08:00
|
|
|
/* check the real address space to see if we've recovered */
|
|
|
|
hw_addr = READ_ONCE(interface->uc_addr);
|
|
|
|
value = readl(hw_addr);
|
|
|
|
if (~value) {
|
fm10k: prepare_for_reset() when we lose PCIe Link
If we lose PCIe link, such as when an unannounced PFLR event occurs, or
when a device is surprise removed, we currently detach the device and
close the netdev. This unfortunately leaves a lot of things still
active, such as the msix_mbx_pf IRQ, and Tx/Rx resources.
This can cause problems because the register reads will return
potentially invalid values which may result in unknown driver behavior.
Begin the process of resetting using fm10k_prepare_for_reset(), much in
the same way as the suspend and resume cycle does. This will attempt to
shutdown as much as possible, in order to prevent possible issues.
A naive implementation for this has issues, because there are now
multiple flows calling the reset logic and setting a reset bit. This
would cause problems, because the "re-attach" routine might call
fm10k_handle_reset() prior to the reset actually finishing. Instead,
we'll add state bits to indicate which flow actually initiated the
reset.
For the general reset flow, we'll assume that if someone else is
resetting that we do not need to handle it at all, so it does not need
its own state bit. For the suspend case, we will simply issue a warning
indicating that we are attempting to recover from this case when
resuming.
For the detached subtask, we'll simply refuse to re-attach until we've
actually initiated a reset as part of that flow.
Finally, we'll stop attempting to manage the mailbox subtask when we're
detached, since there's nothing we can do if we don't have a PCIe
address.
Overall this produces a much cleaner shutdown and recovery cycle for
a PCIe surprise remove event.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:14 +08:00
|
|
|
/* Make sure the reset was initiated because we detached,
|
|
|
|
* otherwise we might race with a different reset flow.
|
|
|
|
*/
|
|
|
|
if (!test_and_clear_bit(__FM10K_RESET_DETACHED,
|
|
|
|
interface->state))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* Restore the hardware address */
|
2017-07-11 04:23:12 +08:00
|
|
|
interface->hw.hw_addr = interface->uc_addr;
|
fm10k: prepare_for_reset() when we lose PCIe Link
If we lose PCIe link, such as when an unannounced PFLR event occurs, or
when a device is surprise removed, we currently detach the device and
close the netdev. This unfortunately leaves a lot of things still
active, such as the msix_mbx_pf IRQ, and Tx/Rx resources.
This can cause problems because the register reads will return
potentially invalid values which may result in unknown driver behavior.
Begin the process of resetting using fm10k_prepare_for_reset(), much in
the same way as the suspend and resume cycle does. This will attempt to
shutdown as much as possible, in order to prevent possible issues.
A naive implementation for this has issues, because there are now
multiple flows calling the reset logic and setting a reset bit. This
would cause problems, because the "re-attach" routine might call
fm10k_handle_reset() prior to the reset actually finishing. Instead,
we'll add state bits to indicate which flow actually initiated the
reset.
For the general reset flow, we'll assume that if someone else is
resetting that we do not need to handle it at all, so it does not need
its own state bit. For the suspend case, we will simply issue a warning
indicating that we are attempting to recover from this case when
resuming.
For the detached subtask, we'll simply refuse to re-attach until we've
actually initiated a reset as part of that flow.
Finally, we'll stop attempting to manage the mailbox subtask when we're
detached, since there's nothing we can do if we don't have a PCIe
address.
Overall this produces a much cleaner shutdown and recovery cycle for
a PCIe surprise remove event.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:14 +08:00
|
|
|
|
|
|
|
/* PCIe link has been restored, and the device is active
|
|
|
|
* again. Restore everything and reset the device.
|
|
|
|
*/
|
|
|
|
err = fm10k_handle_reset(interface);
|
|
|
|
if (err) {
|
|
|
|
netdev_err(netdev, "Unable to reset device: %d\n", err);
|
|
|
|
interface->hw.hw_addr = NULL;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Re-attach the netdev */
|
2017-07-11 04:23:12 +08:00
|
|
|
netif_device_attach(netdev);
|
|
|
|
netdev_warn(netdev, "PCIe link restored, device now attached\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
fm10k: prepare_for_reset() when we lose PCIe Link
If we lose PCIe link, such as when an unannounced PFLR event occurs, or
when a device is surprise removed, we currently detach the device and
close the netdev. This unfortunately leaves a lot of things still
active, such as the msix_mbx_pf IRQ, and Tx/Rx resources.
This can cause problems because the register reads will return
potentially invalid values which may result in unknown driver behavior.
Begin the process of resetting using fm10k_prepare_for_reset(), much in
the same way as the suspend and resume cycle does. This will attempt to
shutdown as much as possible, in order to prevent possible issues.
A naive implementation for this has issues, because there are now
multiple flows calling the reset logic and setting a reset bit. This
would cause problems, because the "re-attach" routine might call
fm10k_handle_reset() prior to the reset actually finishing. Instead,
we'll add state bits to indicate which flow actually initiated the
reset.
For the general reset flow, we'll assume that if someone else is
resetting that we do not need to handle it at all, so it does not need
its own state bit. For the suspend case, we will simply issue a warning
indicating that we are attempting to recover from this case when
resuming.
For the detached subtask, we'll simply refuse to re-attach until we've
actually initiated a reset as part of that flow.
Finally, we'll stop attempting to manage the mailbox subtask when we're
detached, since there's nothing we can do if we don't have a PCIe
address.
Overall this produces a much cleaner shutdown and recovery cycle for
a PCIe surprise remove event.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:14 +08:00
|
|
|
static void fm10k_reset_subtask(struct fm10k_intfc *interface)
|
2016-06-08 07:08:52 +08:00
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
fm10k: use a BITMAP for flags to avoid race conditions
Replace bitwise operators and #defines with a BITMAP and enumeration
values. This is similar to how we handle the "state" values as well.
This has two distinct advantages over the old method. First, we ensure
correctness of operations which are currently problematic due to race
conditions. Suppose that two kernel threads are running, such as the
watchdog and an ethtool ioctl, and both modify flags. We'll say that the
watchdog is CPU A, and the ethtool ioctl is CPU B.
CPU A sets FLAG_1, which can be seen as
CPU A read FLAGS
CPU A write FLAGS | FLAG_1
CPU B sets FLAG_2, which can be seen as
CPU B read FLAGS
CPU A write FLAGS | FLAG_2
However, "|=" and "&=" operators are not actually atomic. So this could
be ordered like the following:
CPU A read FLAGS -> variable
CPU B read FLAGS -> variable
CPU A write FLAGS (variable | FLAG_1)
CPU B write FLAGS (variable | FLAG_2)
Notice how the 2nd write from CPU B could actually undo the write from
CPU A because it isn't guaranteed that the |= operation is atomic.
In practice the race windows for most flag writes is incredibly narrow
so it is not easy to isolate issues. However, the more flags we have,
the more likely they will cause problems. Additionally, if such
a problem were to arise, it would be incredibly difficult to track down.
Second, there is an additional advantage beyond code correctness. We can
now automatically size the BITMAP if more flags were added, so that we
do not need to remember that flags is u32 and thus if we added too many
flags we would over-run the variable. This is not a likely occurrence
for fm10k driver, but this patch can serve as an example for other
drivers which have many more flags.
This particular change does have a bit of trouble converting some of the
idioms previously used with the #defines for flags. Specifically, when
converting FM10K_FLAG_RSS_FIELD_IPV[46]_UDP flags. This whole operation
was actually quite problematic, because we actually stored flags
separately. This could more easily show the problem of the above
re-ordering issue.
This is really difficult to test whether atomics make a difference in
practical scenarios, but you can ensure that basic functionality remains
the same. This patch has a lot of code coverage, but most of it is
relatively simple.
While we are modifying these files, update their copyright year.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-01-13 07:59:38 +08:00
|
|
|
if (!test_and_clear_bit(FM10K_FLAG_RESET_REQUESTED,
|
|
|
|
interface->flags))
|
2014-09-21 07:49:25 +08:00
|
|
|
return;
|
|
|
|
|
fm10k: prepare_for_reset() when we lose PCIe Link
If we lose PCIe link, such as when an unannounced PFLR event occurs, or
when a device is surprise removed, we currently detach the device and
close the netdev. This unfortunately leaves a lot of things still
active, such as the msix_mbx_pf IRQ, and Tx/Rx resources.
This can cause problems because the register reads will return
potentially invalid values which may result in unknown driver behavior.
Begin the process of resetting using fm10k_prepare_for_reset(), much in
the same way as the suspend and resume cycle does. This will attempt to
shutdown as much as possible, in order to prevent possible issues.
A naive implementation for this has issues, because there are now
multiple flows calling the reset logic and setting a reset bit. This
would cause problems, because the "re-attach" routine might call
fm10k_handle_reset() prior to the reset actually finishing. Instead,
we'll add state bits to indicate which flow actually initiated the
reset.
For the general reset flow, we'll assume that if someone else is
resetting that we do not need to handle it at all, so it does not need
its own state bit. For the suspend case, we will simply issue a warning
indicating that we are attempting to recover from this case when
resuming.
For the detached subtask, we'll simply refuse to re-attach until we've
actually initiated a reset as part of that flow.
Finally, we'll stop attempting to manage the mailbox subtask when we're
detached, since there's nothing we can do if we don't have a PCIe
address.
Overall this produces a much cleaner shutdown and recovery cycle for
a PCIe surprise remove event.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:14 +08:00
|
|
|
/* If another thread has already prepared to reset the device, we
|
|
|
|
* should not attempt to handle a reset here, since we'd race with
|
|
|
|
* that thread. This may happen if we suspend the device or if the
|
|
|
|
* PCIe link is lost. In this case, we'll just ignore the RESET
|
|
|
|
* request, as it will (eventually) be taken care of when the thread
|
|
|
|
* which actually started the reset is finished.
|
|
|
|
*/
|
|
|
|
if (!fm10k_prepare_for_reset(interface))
|
|
|
|
return;
|
|
|
|
|
2014-09-21 07:49:25 +08:00
|
|
|
netdev_err(interface->netdev, "Reset interface\n");
|
|
|
|
|
fm10k: prepare_for_reset() when we lose PCIe Link
If we lose PCIe link, such as when an unannounced PFLR event occurs, or
when a device is surprise removed, we currently detach the device and
close the netdev. This unfortunately leaves a lot of things still
active, such as the msix_mbx_pf IRQ, and Tx/Rx resources.
This can cause problems because the register reads will return
potentially invalid values which may result in unknown driver behavior.
Begin the process of resetting using fm10k_prepare_for_reset(), much in
the same way as the suspend and resume cycle does. This will attempt to
shutdown as much as possible, in order to prevent possible issues.
A naive implementation for this has issues, because there are now
multiple flows calling the reset logic and setting a reset bit. This
would cause problems, because the "re-attach" routine might call
fm10k_handle_reset() prior to the reset actually finishing. Instead,
we'll add state bits to indicate which flow actually initiated the
reset.
For the general reset flow, we'll assume that if someone else is
resetting that we do not need to handle it at all, so it does not need
its own state bit. For the suspend case, we will simply issue a warning
indicating that we are attempting to recover from this case when
resuming.
For the detached subtask, we'll simply refuse to re-attach until we've
actually initiated a reset as part of that flow.
Finally, we'll stop attempting to manage the mailbox subtask when we're
detached, since there's nothing we can do if we don't have a PCIe
address.
Overall this produces a much cleaner shutdown and recovery cycle for
a PCIe surprise remove event.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:14 +08:00
|
|
|
err = fm10k_handle_reset(interface);
|
|
|
|
if (err)
|
|
|
|
dev_err(&interface->pdev->dev,
|
|
|
|
"fm10k_handle_reset failed: %d\n", err);
|
2014-09-21 07:49:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_configure_swpri_map - Configure Receive SWPRI to PC mapping
|
|
|
|
* @interface: board private structure
|
|
|
|
*
|
|
|
|
* Configure the SWPRI to PC mapping for the port.
|
|
|
|
**/
|
|
|
|
static void fm10k_configure_swpri_map(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct net_device *netdev = interface->netdev;
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* clear flag indicating update is needed */
|
fm10k: use a BITMAP for flags to avoid race conditions
Replace bitwise operators and #defines with a BITMAP and enumeration
values. This is similar to how we handle the "state" values as well.
This has two distinct advantages over the old method. First, we ensure
correctness of operations which are currently problematic due to race
conditions. Suppose that two kernel threads are running, such as the
watchdog and an ethtool ioctl, and both modify flags. We'll say that the
watchdog is CPU A, and the ethtool ioctl is CPU B.
CPU A sets FLAG_1, which can be seen as
CPU A read FLAGS
CPU A write FLAGS | FLAG_1
CPU B sets FLAG_2, which can be seen as
CPU B read FLAGS
CPU A write FLAGS | FLAG_2
However, "|=" and "&=" operators are not actually atomic. So this could
be ordered like the following:
CPU A read FLAGS -> variable
CPU B read FLAGS -> variable
CPU A write FLAGS (variable | FLAG_1)
CPU B write FLAGS (variable | FLAG_2)
Notice how the 2nd write from CPU B could actually undo the write from
CPU A because it isn't guaranteed that the |= operation is atomic.
In practice the race windows for most flag writes is incredibly narrow
so it is not easy to isolate issues. However, the more flags we have,
the more likely they will cause problems. Additionally, if such
a problem were to arise, it would be incredibly difficult to track down.
Second, there is an additional advantage beyond code correctness. We can
now automatically size the BITMAP if more flags were added, so that we
do not need to remember that flags is u32 and thus if we added too many
flags we would over-run the variable. This is not a likely occurrence
for fm10k driver, but this patch can serve as an example for other
drivers which have many more flags.
This particular change does have a bit of trouble converting some of the
idioms previously used with the #defines for flags. Specifically, when
converting FM10K_FLAG_RSS_FIELD_IPV[46]_UDP flags. This whole operation
was actually quite problematic, because we actually stored flags
separately. This could more easily show the problem of the above
re-ordering issue.
This is really difficult to test whether atomics make a difference in
practical scenarios, but you can ensure that basic functionality remains
the same. This patch has a lot of code coverage, but most of it is
relatively simple.
While we are modifying these files, update their copyright year.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-01-13 07:59:38 +08:00
|
|
|
clear_bit(FM10K_FLAG_SWPRI_CONFIG, interface->flags);
|
2014-09-21 07:49:25 +08:00
|
|
|
|
|
|
|
/* these registers are only available on the PF */
|
|
|
|
if (hw->mac.type != fm10k_mac_pf)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* configure SWPRI to PC map */
|
|
|
|
for (i = 0; i < FM10K_SWPRI_MAX; i++)
|
|
|
|
fm10k_write_reg(hw, FM10K_SWPRI_MAP(i),
|
|
|
|
netdev_get_prio_tc_map(netdev, i));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_watchdog_update_host_state - Update the link status based on host.
|
|
|
|
* @interface: board private structure
|
|
|
|
**/
|
|
|
|
static void fm10k_watchdog_update_host_state(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
s32 err;
|
|
|
|
|
2017-01-13 07:59:39 +08:00
|
|
|
if (test_bit(__FM10K_LINK_DOWN, interface->state)) {
|
2014-09-21 07:49:25 +08:00
|
|
|
interface->host_ready = false;
|
|
|
|
if (time_is_after_jiffies(interface->link_down_event))
|
|
|
|
return;
|
2017-01-13 07:59:39 +08:00
|
|
|
clear_bit(__FM10K_LINK_DOWN, interface->state);
|
2014-09-21 07:49:25 +08:00
|
|
|
}
|
|
|
|
|
fm10k: use a BITMAP for flags to avoid race conditions
Replace bitwise operators and #defines with a BITMAP and enumeration
values. This is similar to how we handle the "state" values as well.
This has two distinct advantages over the old method. First, we ensure
correctness of operations which are currently problematic due to race
conditions. Suppose that two kernel threads are running, such as the
watchdog and an ethtool ioctl, and both modify flags. We'll say that the
watchdog is CPU A, and the ethtool ioctl is CPU B.
CPU A sets FLAG_1, which can be seen as
CPU A read FLAGS
CPU A write FLAGS | FLAG_1
CPU B sets FLAG_2, which can be seen as
CPU B read FLAGS
CPU A write FLAGS | FLAG_2
However, "|=" and "&=" operators are not actually atomic. So this could
be ordered like the following:
CPU A read FLAGS -> variable
CPU B read FLAGS -> variable
CPU A write FLAGS (variable | FLAG_1)
CPU B write FLAGS (variable | FLAG_2)
Notice how the 2nd write from CPU B could actually undo the write from
CPU A because it isn't guaranteed that the |= operation is atomic.
In practice the race windows for most flag writes is incredibly narrow
so it is not easy to isolate issues. However, the more flags we have,
the more likely they will cause problems. Additionally, if such
a problem were to arise, it would be incredibly difficult to track down.
Second, there is an additional advantage beyond code correctness. We can
now automatically size the BITMAP if more flags were added, so that we
do not need to remember that flags is u32 and thus if we added too many
flags we would over-run the variable. This is not a likely occurrence
for fm10k driver, but this patch can serve as an example for other
drivers which have many more flags.
This particular change does have a bit of trouble converting some of the
idioms previously used with the #defines for flags. Specifically, when
converting FM10K_FLAG_RSS_FIELD_IPV[46]_UDP flags. This whole operation
was actually quite problematic, because we actually stored flags
separately. This could more easily show the problem of the above
re-ordering issue.
This is really difficult to test whether atomics make a difference in
practical scenarios, but you can ensure that basic functionality remains
the same. This patch has a lot of code coverage, but most of it is
relatively simple.
While we are modifying these files, update their copyright year.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-01-13 07:59:38 +08:00
|
|
|
if (test_bit(FM10K_FLAG_SWPRI_CONFIG, interface->flags)) {
|
2014-09-21 07:49:25 +08:00
|
|
|
if (rtnl_trylock()) {
|
|
|
|
fm10k_configure_swpri_map(interface);
|
|
|
|
rtnl_unlock();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* lock the mailbox for transmit and receive */
|
|
|
|
fm10k_mbx_lock(interface);
|
|
|
|
|
|
|
|
err = hw->mac.ops.get_host_state(hw, &interface->host_ready);
|
|
|
|
if (err && time_is_before_jiffies(interface->last_reset))
|
fm10k: use a BITMAP for flags to avoid race conditions
Replace bitwise operators and #defines with a BITMAP and enumeration
values. This is similar to how we handle the "state" values as well.
This has two distinct advantages over the old method. First, we ensure
correctness of operations which are currently problematic due to race
conditions. Suppose that two kernel threads are running, such as the
watchdog and an ethtool ioctl, and both modify flags. We'll say that the
watchdog is CPU A, and the ethtool ioctl is CPU B.
CPU A sets FLAG_1, which can be seen as
CPU A read FLAGS
CPU A write FLAGS | FLAG_1
CPU B sets FLAG_2, which can be seen as
CPU B read FLAGS
CPU A write FLAGS | FLAG_2
However, "|=" and "&=" operators are not actually atomic. So this could
be ordered like the following:
CPU A read FLAGS -> variable
CPU B read FLAGS -> variable
CPU A write FLAGS (variable | FLAG_1)
CPU B write FLAGS (variable | FLAG_2)
Notice how the 2nd write from CPU B could actually undo the write from
CPU A because it isn't guaranteed that the |= operation is atomic.
In practice the race windows for most flag writes is incredibly narrow
so it is not easy to isolate issues. However, the more flags we have,
the more likely they will cause problems. Additionally, if such
a problem were to arise, it would be incredibly difficult to track down.
Second, there is an additional advantage beyond code correctness. We can
now automatically size the BITMAP if more flags were added, so that we
do not need to remember that flags is u32 and thus if we added too many
flags we would over-run the variable. This is not a likely occurrence
for fm10k driver, but this patch can serve as an example for other
drivers which have many more flags.
This particular change does have a bit of trouble converting some of the
idioms previously used with the #defines for flags. Specifically, when
converting FM10K_FLAG_RSS_FIELD_IPV[46]_UDP flags. This whole operation
was actually quite problematic, because we actually stored flags
separately. This could more easily show the problem of the above
re-ordering issue.
This is really difficult to test whether atomics make a difference in
practical scenarios, but you can ensure that basic functionality remains
the same. This patch has a lot of code coverage, but most of it is
relatively simple.
While we are modifying these files, update their copyright year.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-01-13 07:59:38 +08:00
|
|
|
set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
|
2014-09-21 07:49:25 +08:00
|
|
|
|
|
|
|
/* free the lock */
|
|
|
|
fm10k_mbx_unlock(interface);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_mbx_subtask - Process upstream and downstream mailboxes
|
|
|
|
* @interface: board private structure
|
|
|
|
*
|
|
|
|
* This function will process both the upstream and downstream mailboxes.
|
|
|
|
**/
|
|
|
|
static void fm10k_mbx_subtask(struct fm10k_intfc *interface)
|
|
|
|
{
|
fm10k: prepare_for_reset() when we lose PCIe Link
If we lose PCIe link, such as when an unannounced PFLR event occurs, or
when a device is surprise removed, we currently detach the device and
close the netdev. This unfortunately leaves a lot of things still
active, such as the msix_mbx_pf IRQ, and Tx/Rx resources.
This can cause problems because the register reads will return
potentially invalid values which may result in unknown driver behavior.
Begin the process of resetting using fm10k_prepare_for_reset(), much in
the same way as the suspend and resume cycle does. This will attempt to
shutdown as much as possible, in order to prevent possible issues.
A naive implementation for this has issues, because there are now
multiple flows calling the reset logic and setting a reset bit. This
would cause problems, because the "re-attach" routine might call
fm10k_handle_reset() prior to the reset actually finishing. Instead,
we'll add state bits to indicate which flow actually initiated the
reset.
For the general reset flow, we'll assume that if someone else is
resetting that we do not need to handle it at all, so it does not need
its own state bit. For the suspend case, we will simply issue a warning
indicating that we are attempting to recover from this case when
resuming.
For the detached subtask, we'll simply refuse to re-attach until we've
actually initiated a reset as part of that flow.
Finally, we'll stop attempting to manage the mailbox subtask when we're
detached, since there's nothing we can do if we don't have a PCIe
address.
Overall this produces a much cleaner shutdown and recovery cycle for
a PCIe surprise remove event.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:14 +08:00
|
|
|
/* If we're resetting, bail out */
|
|
|
|
if (test_bit(__FM10K_RESETTING, interface->state))
|
|
|
|
return;
|
|
|
|
|
2014-09-21 07:49:25 +08:00
|
|
|
/* process upstream mailbox and update device state */
|
|
|
|
fm10k_watchdog_update_host_state(interface);
|
2014-09-21 07:52:09 +08:00
|
|
|
|
|
|
|
/* process downstream mailboxes */
|
|
|
|
fm10k_iov_mbx(interface);
|
2014-09-21 07:49:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_watchdog_host_is_ready - Update netdev status based on host ready
|
|
|
|
* @interface: board private structure
|
|
|
|
**/
|
|
|
|
static void fm10k_watchdog_host_is_ready(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct net_device *netdev = interface->netdev;
|
|
|
|
|
|
|
|
/* only continue if link state is currently down */
|
|
|
|
if (netif_carrier_ok(netdev))
|
|
|
|
return;
|
|
|
|
|
|
|
|
netif_info(interface, drv, netdev, "NIC Link is up\n");
|
|
|
|
|
|
|
|
netif_carrier_on(netdev);
|
|
|
|
netif_tx_wake_all_queues(netdev);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_watchdog_host_not_ready - Update netdev status based on host not ready
|
|
|
|
* @interface: board private structure
|
|
|
|
**/
|
|
|
|
static void fm10k_watchdog_host_not_ready(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct net_device *netdev = interface->netdev;
|
|
|
|
|
|
|
|
/* only continue if link state is currently up */
|
|
|
|
if (!netif_carrier_ok(netdev))
|
|
|
|
return;
|
|
|
|
|
|
|
|
netif_info(interface, drv, netdev, "NIC Link is down\n");
|
|
|
|
|
|
|
|
netif_carrier_off(netdev);
|
|
|
|
netif_tx_stop_all_queues(netdev);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_update_stats - Update the board statistics counters.
|
|
|
|
* @interface: board private structure
|
|
|
|
**/
|
|
|
|
void fm10k_update_stats(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct net_device_stats *net_stats = &interface->netdev->stats;
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
2015-07-02 08:38:36 +08:00
|
|
|
u64 hw_csum_tx_good = 0, hw_csum_rx_good = 0, rx_length_errors = 0;
|
|
|
|
u64 rx_switch_errors = 0, rx_drops = 0, rx_pp_errors = 0;
|
|
|
|
u64 rx_link_errors = 0;
|
2014-09-21 07:49:25 +08:00
|
|
|
u64 rx_errors = 0, rx_csum_errors = 0, tx_csum_errors = 0;
|
|
|
|
u64 restart_queue = 0, tx_busy = 0, alloc_failed = 0;
|
|
|
|
u64 rx_bytes_nic = 0, rx_pkts_nic = 0, rx_drops_nic = 0;
|
|
|
|
u64 tx_bytes_nic = 0, tx_pkts_nic = 0;
|
|
|
|
u64 bytes, pkts;
|
|
|
|
int i;
|
|
|
|
|
2016-06-08 07:08:45 +08:00
|
|
|
/* ensure only one thread updates stats at a time */
|
2017-01-13 07:59:39 +08:00
|
|
|
if (test_and_set_bit(__FM10K_UPDATING_STATS, interface->state))
|
2016-06-08 07:08:45 +08:00
|
|
|
return;
|
|
|
|
|
2014-09-21 07:49:25 +08:00
|
|
|
/* do not allow stats update via service task for next second */
|
|
|
|
interface->next_stats_update = jiffies + HZ;
|
|
|
|
|
|
|
|
/* gather some stats to the interface struct that are per queue */
|
|
|
|
for (bytes = 0, pkts = 0, i = 0; i < interface->num_tx_queues; i++) {
|
2016-06-04 06:42:12 +08:00
|
|
|
struct fm10k_ring *tx_ring = READ_ONCE(interface->tx_ring[i]);
|
|
|
|
|
|
|
|
if (!tx_ring)
|
|
|
|
continue;
|
2014-09-21 07:49:25 +08:00
|
|
|
|
|
|
|
restart_queue += tx_ring->tx_stats.restart_queue;
|
|
|
|
tx_busy += tx_ring->tx_stats.tx_busy;
|
|
|
|
tx_csum_errors += tx_ring->tx_stats.csum_err;
|
|
|
|
bytes += tx_ring->stats.bytes;
|
|
|
|
pkts += tx_ring->stats.packets;
|
2015-07-02 08:38:36 +08:00
|
|
|
hw_csum_tx_good += tx_ring->tx_stats.csum_good;
|
2014-09-21 07:49:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
interface->restart_queue = restart_queue;
|
|
|
|
interface->tx_busy = tx_busy;
|
|
|
|
net_stats->tx_bytes = bytes;
|
|
|
|
net_stats->tx_packets = pkts;
|
|
|
|
interface->tx_csum_errors = tx_csum_errors;
|
2015-07-02 08:38:36 +08:00
|
|
|
interface->hw_csum_tx_good = hw_csum_tx_good;
|
|
|
|
|
2014-09-21 07:49:25 +08:00
|
|
|
/* gather some stats to the interface struct that are per queue */
|
|
|
|
for (bytes = 0, pkts = 0, i = 0; i < interface->num_rx_queues; i++) {
|
2016-06-04 06:42:12 +08:00
|
|
|
struct fm10k_ring *rx_ring = READ_ONCE(interface->rx_ring[i]);
|
|
|
|
|
|
|
|
if (!rx_ring)
|
|
|
|
continue;
|
2014-09-21 07:49:25 +08:00
|
|
|
|
|
|
|
bytes += rx_ring->stats.bytes;
|
|
|
|
pkts += rx_ring->stats.packets;
|
|
|
|
alloc_failed += rx_ring->rx_stats.alloc_failed;
|
|
|
|
rx_csum_errors += rx_ring->rx_stats.csum_err;
|
|
|
|
rx_errors += rx_ring->rx_stats.errors;
|
2015-07-02 08:38:36 +08:00
|
|
|
hw_csum_rx_good += rx_ring->rx_stats.csum_good;
|
|
|
|
rx_switch_errors += rx_ring->rx_stats.switch_errors;
|
|
|
|
rx_drops += rx_ring->rx_stats.drops;
|
|
|
|
rx_pp_errors += rx_ring->rx_stats.pp_errors;
|
|
|
|
rx_link_errors += rx_ring->rx_stats.link_errors;
|
|
|
|
rx_length_errors += rx_ring->rx_stats.length_errors;
|
2014-09-21 07:49:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
net_stats->rx_bytes = bytes;
|
|
|
|
net_stats->rx_packets = pkts;
|
|
|
|
interface->alloc_failed = alloc_failed;
|
|
|
|
interface->rx_csum_errors = rx_csum_errors;
|
2015-07-02 08:38:36 +08:00
|
|
|
interface->hw_csum_rx_good = hw_csum_rx_good;
|
|
|
|
interface->rx_switch_errors = rx_switch_errors;
|
|
|
|
interface->rx_drops = rx_drops;
|
|
|
|
interface->rx_pp_errors = rx_pp_errors;
|
|
|
|
interface->rx_link_errors = rx_link_errors;
|
|
|
|
interface->rx_length_errors = rx_length_errors;
|
2014-09-21 07:49:25 +08:00
|
|
|
|
|
|
|
hw->mac.ops.update_hw_stats(hw, &interface->stats);
|
|
|
|
|
2015-04-04 04:26:59 +08:00
|
|
|
for (i = 0; i < hw->mac.max_queues; i++) {
|
2014-09-21 07:49:25 +08:00
|
|
|
struct fm10k_hw_stats_q *q = &interface->stats.q[i];
|
|
|
|
|
|
|
|
tx_bytes_nic += q->tx_bytes.count;
|
|
|
|
tx_pkts_nic += q->tx_packets.count;
|
|
|
|
rx_bytes_nic += q->rx_bytes.count;
|
|
|
|
rx_pkts_nic += q->rx_packets.count;
|
|
|
|
rx_drops_nic += q->rx_drops.count;
|
|
|
|
}
|
|
|
|
|
|
|
|
interface->tx_bytes_nic = tx_bytes_nic;
|
|
|
|
interface->tx_packets_nic = tx_pkts_nic;
|
|
|
|
interface->rx_bytes_nic = rx_bytes_nic;
|
|
|
|
interface->rx_packets_nic = rx_pkts_nic;
|
|
|
|
interface->rx_drops_nic = rx_drops_nic;
|
|
|
|
|
|
|
|
/* Fill out the OS statistics structure */
|
2015-04-04 04:26:52 +08:00
|
|
|
net_stats->rx_errors = rx_errors;
|
2014-09-21 07:49:25 +08:00
|
|
|
net_stats->rx_dropped = interface->stats.nodesc_drop.count;
|
2016-06-08 07:08:45 +08:00
|
|
|
|
2017-01-13 07:59:39 +08:00
|
|
|
clear_bit(__FM10K_UPDATING_STATS, interface->state);
|
2014-09-21 07:49:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_watchdog_flush_tx - flush queues on host not ready
|
2018-01-17 03:20:51 +08:00
|
|
|
* @interface: pointer to the device interface structure
|
2014-09-21 07:49:25 +08:00
|
|
|
**/
|
|
|
|
static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
int some_tx_pending = 0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* nothing to do if carrier is up */
|
|
|
|
if (netif_carrier_ok(interface->netdev))
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (i = 0; i < interface->num_tx_queues; i++) {
|
|
|
|
struct fm10k_ring *tx_ring = interface->tx_ring[i];
|
|
|
|
|
|
|
|
if (tx_ring->next_to_use != tx_ring->next_to_clean) {
|
|
|
|
some_tx_pending = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We've lost link, so the controller stops DMA, but we've got
|
|
|
|
* queued Tx work that's never going to get done, so reset
|
|
|
|
* controller to flush Tx.
|
|
|
|
*/
|
|
|
|
if (some_tx_pending)
|
fm10k: use a BITMAP for flags to avoid race conditions
Replace bitwise operators and #defines with a BITMAP and enumeration
values. This is similar to how we handle the "state" values as well.
This has two distinct advantages over the old method. First, we ensure
correctness of operations which are currently problematic due to race
conditions. Suppose that two kernel threads are running, such as the
watchdog and an ethtool ioctl, and both modify flags. We'll say that the
watchdog is CPU A, and the ethtool ioctl is CPU B.
CPU A sets FLAG_1, which can be seen as
CPU A read FLAGS
CPU A write FLAGS | FLAG_1
CPU B sets FLAG_2, which can be seen as
CPU B read FLAGS
CPU A write FLAGS | FLAG_2
However, "|=" and "&=" operators are not actually atomic. So this could
be ordered like the following:
CPU A read FLAGS -> variable
CPU B read FLAGS -> variable
CPU A write FLAGS (variable | FLAG_1)
CPU B write FLAGS (variable | FLAG_2)
Notice how the 2nd write from CPU B could actually undo the write from
CPU A because it isn't guaranteed that the |= operation is atomic.
In practice the race windows for most flag writes is incredibly narrow
so it is not easy to isolate issues. However, the more flags we have,
the more likely they will cause problems. Additionally, if such
a problem were to arise, it would be incredibly difficult to track down.
Second, there is an additional advantage beyond code correctness. We can
now automatically size the BITMAP if more flags were added, so that we
do not need to remember that flags is u32 and thus if we added too many
flags we would over-run the variable. This is not a likely occurrence
for fm10k driver, but this patch can serve as an example for other
drivers which have many more flags.
This particular change does have a bit of trouble converting some of the
idioms previously used with the #defines for flags. Specifically, when
converting FM10K_FLAG_RSS_FIELD_IPV[46]_UDP flags. This whole operation
was actually quite problematic, because we actually stored flags
separately. This could more easily show the problem of the above
re-ordering issue.
This is really difficult to test whether atomics make a difference in
practical scenarios, but you can ensure that basic functionality remains
the same. This patch has a lot of code coverage, but most of it is
relatively simple.
While we are modifying these files, update their copyright year.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-01-13 07:59:38 +08:00
|
|
|
set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
|
2014-09-21 07:49:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_watchdog_subtask - check and bring link up
|
2018-01-17 03:20:51 +08:00
|
|
|
* @interface: pointer to the device interface structure
|
2014-09-21 07:49:25 +08:00
|
|
|
**/
|
|
|
|
static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
/* if interface is down do nothing */
|
2017-01-13 07:59:39 +08:00
|
|
|
if (test_bit(__FM10K_DOWN, interface->state) ||
|
|
|
|
test_bit(__FM10K_RESETTING, interface->state))
|
2014-09-21 07:49:25 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
if (interface->host_ready)
|
|
|
|
fm10k_watchdog_host_is_ready(interface);
|
|
|
|
else
|
|
|
|
fm10k_watchdog_host_not_ready(interface);
|
|
|
|
|
|
|
|
/* update stats only once every second */
|
|
|
|
if (time_is_before_jiffies(interface->next_stats_update))
|
|
|
|
fm10k_update_stats(interface);
|
|
|
|
|
|
|
|
/* flush any uncompleted work */
|
|
|
|
fm10k_watchdog_flush_tx(interface);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_check_hang_subtask - check for hung queues and dropped interrupts
|
2018-01-17 03:20:51 +08:00
|
|
|
* @interface: pointer to the device interface structure
|
2014-09-21 07:49:25 +08:00
|
|
|
*
|
|
|
|
* This function serves two purposes. First it strobes the interrupt lines
|
|
|
|
* in order to make certain interrupts are occurring. Secondly it sets the
|
|
|
|
* bits needed to check for TX hangs. As a result we should immediately
|
|
|
|
* determine if a hang has occurred.
|
|
|
|
*/
|
|
|
|
static void fm10k_check_hang_subtask(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* If we're down or resetting, just bail */
|
2017-01-13 07:59:39 +08:00
|
|
|
if (test_bit(__FM10K_DOWN, interface->state) ||
|
|
|
|
test_bit(__FM10K_RESETTING, interface->state))
|
2014-09-21 07:49:25 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
/* rate limit tx hang checks to only once every 2 seconds */
|
|
|
|
if (time_is_after_eq_jiffies(interface->next_tx_hang_check))
|
|
|
|
return;
|
|
|
|
interface->next_tx_hang_check = jiffies + (2 * HZ);
|
|
|
|
|
|
|
|
if (netif_carrier_ok(interface->netdev)) {
|
|
|
|
/* Force detection of hung controller */
|
|
|
|
for (i = 0; i < interface->num_tx_queues; i++)
|
|
|
|
set_check_for_tx_hang(interface->tx_ring[i]);
|
|
|
|
|
|
|
|
/* Rearm all in-use q_vectors for immediate firing */
|
|
|
|
for (i = 0; i < interface->num_q_vectors; i++) {
|
|
|
|
struct fm10k_q_vector *qv = interface->q_vector[i];
|
|
|
|
|
|
|
|
if (!qv->tx.count && !qv->rx.count)
|
|
|
|
continue;
|
|
|
|
writel(FM10K_ITR_ENABLE | FM10K_ITR_PENDING2, qv->itr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_service_task - manages and runs subtasks
|
|
|
|
* @work: pointer to work_struct containing our data
|
|
|
|
**/
|
|
|
|
static void fm10k_service_task(struct work_struct *work)
|
|
|
|
{
|
|
|
|
struct fm10k_intfc *interface;
|
|
|
|
|
|
|
|
interface = container_of(work, struct fm10k_intfc, service_task);
|
|
|
|
|
fm10k: prepare_for_reset() when we lose PCIe Link
If we lose PCIe link, such as when an unannounced PFLR event occurs, or
when a device is surprise removed, we currently detach the device and
close the netdev. This unfortunately leaves a lot of things still
active, such as the msix_mbx_pf IRQ, and Tx/Rx resources.
This can cause problems because the register reads will return
potentially invalid values which may result in unknown driver behavior.
Begin the process of resetting using fm10k_prepare_for_reset(), much in
the same way as the suspend and resume cycle does. This will attempt to
shutdown as much as possible, in order to prevent possible issues.
A naive implementation for this has issues, because there are now
multiple flows calling the reset logic and setting a reset bit. This
would cause problems, because the "re-attach" routine might call
fm10k_handle_reset() prior to the reset actually finishing. Instead,
we'll add state bits to indicate which flow actually initiated the
reset.
For the general reset flow, we'll assume that if someone else is
resetting that we do not need to handle it at all, so it does not need
its own state bit. For the suspend case, we will simply issue a warning
indicating that we are attempting to recover from this case when
resuming.
For the detached subtask, we'll simply refuse to re-attach until we've
actually initiated a reset as part of that flow.
Finally, we'll stop attempting to manage the mailbox subtask when we're
detached, since there's nothing we can do if we don't have a PCIe
address.
Overall this produces a much cleaner shutdown and recovery cycle for
a PCIe surprise remove event.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:14 +08:00
|
|
|
/* Check whether we're detached first */
|
|
|
|
fm10k_detach_subtask(interface);
|
|
|
|
|
2015-06-16 06:00:52 +08:00
|
|
|
/* tasks run even when interface is down */
|
2014-09-21 07:49:25 +08:00
|
|
|
fm10k_mbx_subtask(interface);
|
|
|
|
fm10k_reset_subtask(interface);
|
|
|
|
|
|
|
|
/* tasks only run when interface is up */
|
|
|
|
fm10k_watchdog_subtask(interface);
|
|
|
|
fm10k_check_hang_subtask(interface);
|
|
|
|
|
|
|
|
/* release lock on service events to allow scheduling next event */
|
|
|
|
fm10k_service_event_complete(interface);
|
|
|
|
}
|
|
|
|
|
fm10k: introduce a message queue for MAC/VLAN messages
Under some circumstances, when dealing with a large number of MAC
address or VLAN updates at once, the fm10k driver, particularly the VFs
can overload the mailbox with too many messages at once.
This results in a mailbox timeout, which causes the driver to initiate
a reset. During the reset, we re-send all the same messages that
originally caused the timeout. This results in a cycle of resets each
triggering a future reset.
To fix or avoid this, we introduce a workqueue item which monitors
a queue of MAC and VLAN requests. These requests are queued to the end
of the list, and we process as a FIFO periodically.
Initially we only handle requests for the netdev, but we do handle
unicast MAC addresses, multicast MAC addresses, and update VLAN
requests.
A future patch will add support to use this queue for handling MAC
update requests from the VF<->PF mailbox.
The MAC/VLAN work item will keep checking to make sure that each request
does not overflow the mailbox and cause a timeout. If it might, then the
work item will reschedule itself a short time later. This avoids any
reset cycle, since we never send the message if the mailbox is not
ready.
As an alternative, we tried increasing the mailbox message FIFO, but
this just delays the problem and results in needless memory waste on the
system. Our new message queue is dynamically allocated so only uses as
much memory as it needs. Additionally, it need not be contiguous like
the Tx and Rx FIFOs.
Note that this patch chose to only create a queue for MAC and VLAN
messages, since these are the only messages sent in a large enough
volume to cause the reset loop. Other messages are very unlikely to
overflow the mailbox Tx FIFO so easily.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:17 +08:00
|
|
|
/**
|
|
|
|
* fm10k_macvlan_task - send queued MAC/VLAN requests to switch manager
|
|
|
|
* @work: pointer to work_struct containing our data
|
|
|
|
*
|
|
|
|
* This work item handles sending MAC/VLAN updates to the switch manager. When
|
|
|
|
* the interface is up, it will attempt to queue mailbox messages to the
|
|
|
|
* switch manager requesting updates for MAC/VLAN pairs. If the Tx fifo of the
|
|
|
|
* mailbox is full, it will reschedule itself to try again in a short while.
|
|
|
|
* This ensures that the driver does not overload the switch mailbox with too
|
|
|
|
* many simultaneous requests, causing an unnecessary reset.
|
|
|
|
**/
|
|
|
|
static void fm10k_macvlan_task(struct work_struct *work)
|
|
|
|
{
|
|
|
|
struct fm10k_macvlan_request *item;
|
|
|
|
struct fm10k_intfc *interface;
|
|
|
|
struct delayed_work *dwork;
|
|
|
|
struct list_head *requests;
|
|
|
|
struct fm10k_hw *hw;
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
dwork = to_delayed_work(work);
|
|
|
|
interface = container_of(dwork, struct fm10k_intfc, macvlan_task);
|
|
|
|
hw = &interface->hw;
|
|
|
|
requests = &interface->macvlan_requests;
|
|
|
|
|
|
|
|
do {
|
|
|
|
/* Pop the first item off the list */
|
|
|
|
spin_lock_irqsave(&interface->macvlan_lock, flags);
|
|
|
|
item = list_first_entry_or_null(requests,
|
|
|
|
struct fm10k_macvlan_request,
|
|
|
|
list);
|
|
|
|
if (item)
|
|
|
|
list_del_init(&item->list);
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(&interface->macvlan_lock, flags);
|
|
|
|
|
|
|
|
/* We have no more items to process */
|
|
|
|
if (!item)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
fm10k_mbx_lock(interface);
|
|
|
|
|
|
|
|
/* Check that we have plenty of space to send the message. We
|
|
|
|
* want to ensure that the mailbox stays low enough to avoid a
|
|
|
|
* change in the host state, otherwise we may see spurious
|
|
|
|
* link up / link down notifications.
|
|
|
|
*/
|
|
|
|
if (!hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU + 5)) {
|
|
|
|
hw->mbx.ops.process(hw, &hw->mbx);
|
|
|
|
set_bit(__FM10K_MACVLAN_REQUEST, interface->state);
|
|
|
|
fm10k_mbx_unlock(interface);
|
|
|
|
|
|
|
|
/* Put the request back on the list */
|
|
|
|
spin_lock_irqsave(&interface->macvlan_lock, flags);
|
|
|
|
list_add(&item->list, requests);
|
|
|
|
spin_unlock_irqrestore(&interface->macvlan_lock, flags);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (item->type) {
|
|
|
|
case FM10K_MC_MAC_REQUEST:
|
|
|
|
hw->mac.ops.update_mc_addr(hw,
|
|
|
|
item->mac.glort,
|
|
|
|
item->mac.addr,
|
|
|
|
item->mac.vid,
|
|
|
|
item->set);
|
|
|
|
break;
|
|
|
|
case FM10K_UC_MAC_REQUEST:
|
|
|
|
hw->mac.ops.update_uc_addr(hw,
|
|
|
|
item->mac.glort,
|
|
|
|
item->mac.addr,
|
|
|
|
item->mac.vid,
|
|
|
|
item->set,
|
|
|
|
0);
|
|
|
|
break;
|
|
|
|
case FM10K_VLAN_REQUEST:
|
|
|
|
hw->mac.ops.update_vlan(hw,
|
|
|
|
item->vlan.vid,
|
|
|
|
item->vlan.vsi,
|
|
|
|
item->set);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
fm10k_mbx_unlock(interface);
|
|
|
|
|
|
|
|
/* Free the item now that we've sent the update */
|
|
|
|
kfree(item);
|
|
|
|
} while (true);
|
|
|
|
|
|
|
|
done:
|
|
|
|
WARN_ON(!test_bit(__FM10K_MACVLAN_SCHED, interface->state));
|
|
|
|
|
|
|
|
/* flush memory to make sure state is correct */
|
|
|
|
smp_mb__before_atomic();
|
|
|
|
clear_bit(__FM10K_MACVLAN_SCHED, interface->state);
|
|
|
|
|
|
|
|
/* If a MAC/VLAN request was scheduled since we started, we should
|
|
|
|
* re-schedule. However, there is no reason to re-schedule if there is
|
|
|
|
* no work to do.
|
|
|
|
*/
|
|
|
|
if (test_bit(__FM10K_MACVLAN_REQUEST, interface->state))
|
|
|
|
fm10k_macvlan_schedule(interface);
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:49:43 +08:00
|
|
|
/**
|
|
|
|
* fm10k_configure_tx_ring - Configure Tx ring after Reset
|
|
|
|
* @interface: board private structure
|
|
|
|
* @ring: structure containing ring specific data
|
|
|
|
*
|
|
|
|
* Configure the Tx descriptor ring after a reset.
|
|
|
|
**/
|
|
|
|
static void fm10k_configure_tx_ring(struct fm10k_intfc *interface,
|
|
|
|
struct fm10k_ring *ring)
|
|
|
|
{
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
u64 tdba = ring->dma;
|
|
|
|
u32 size = ring->count * sizeof(struct fm10k_tx_desc);
|
|
|
|
u32 txint = FM10K_INT_MAP_DISABLE;
|
2015-12-23 05:43:49 +08:00
|
|
|
u32 txdctl = BIT(FM10K_TXDCTL_MAX_TIME_SHIFT) | FM10K_TXDCTL_ENABLE;
|
2014-09-21 07:49:43 +08:00
|
|
|
u8 reg_idx = ring->reg_idx;
|
|
|
|
|
|
|
|
/* disable queue to avoid issues while updating state */
|
|
|
|
fm10k_write_reg(hw, FM10K_TXDCTL(reg_idx), 0);
|
|
|
|
fm10k_write_flush(hw);
|
|
|
|
|
|
|
|
/* possible poll here to verify ring resources have been cleaned */
|
|
|
|
|
|
|
|
/* set location and size for descriptor ring */
|
|
|
|
fm10k_write_reg(hw, FM10K_TDBAL(reg_idx), tdba & DMA_BIT_MASK(32));
|
|
|
|
fm10k_write_reg(hw, FM10K_TDBAH(reg_idx), tdba >> 32);
|
|
|
|
fm10k_write_reg(hw, FM10K_TDLEN(reg_idx), size);
|
|
|
|
|
|
|
|
/* reset head and tail pointers */
|
|
|
|
fm10k_write_reg(hw, FM10K_TDH(reg_idx), 0);
|
|
|
|
fm10k_write_reg(hw, FM10K_TDT(reg_idx), 0);
|
|
|
|
|
|
|
|
/* store tail pointer */
|
|
|
|
ring->tail = &interface->uc_addr[FM10K_TDT(reg_idx)];
|
|
|
|
|
2015-10-17 01:57:10 +08:00
|
|
|
/* reset ntu and ntc to place SW in sync with hardware */
|
2014-09-21 07:49:43 +08:00
|
|
|
ring->next_to_clean = 0;
|
|
|
|
ring->next_to_use = 0;
|
|
|
|
|
|
|
|
/* Map interrupt */
|
|
|
|
if (ring->q_vector) {
|
|
|
|
txint = ring->q_vector->v_idx + NON_Q_VECTORS(hw);
|
|
|
|
txint |= FM10K_INT_MAP_TIMER0;
|
|
|
|
}
|
|
|
|
|
|
|
|
fm10k_write_reg(hw, FM10K_TXINT(reg_idx), txint);
|
|
|
|
|
|
|
|
/* enable use of FTAG bit in Tx descriptor, register is RO for VF */
|
|
|
|
fm10k_write_reg(hw, FM10K_PFVTCTL(reg_idx),
|
|
|
|
FM10K_PFVTCTL_FTAG_DESC_ENABLE);
|
|
|
|
|
2015-10-30 04:43:40 +08:00
|
|
|
/* Initialize XPS */
|
2017-01-13 07:59:39 +08:00
|
|
|
if (!test_and_set_bit(__FM10K_TX_XPS_INIT_DONE, ring->state) &&
|
2015-10-30 04:43:40 +08:00
|
|
|
ring->q_vector)
|
|
|
|
netif_set_xps_queue(ring->netdev,
|
|
|
|
&ring->q_vector->affinity_mask,
|
|
|
|
ring->queue_index);
|
|
|
|
|
2014-09-21 07:49:43 +08:00
|
|
|
/* enable queue */
|
|
|
|
fm10k_write_reg(hw, FM10K_TXDCTL(reg_idx), txdctl);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_enable_tx_ring - Verify Tx ring is enabled after configuration
|
|
|
|
* @interface: board private structure
|
|
|
|
* @ring: structure containing ring specific data
|
|
|
|
*
|
|
|
|
* Verify the Tx descriptor ring is ready for transmit.
|
|
|
|
**/
|
|
|
|
static void fm10k_enable_tx_ring(struct fm10k_intfc *interface,
|
|
|
|
struct fm10k_ring *ring)
|
|
|
|
{
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
int wait_loop = 10;
|
|
|
|
u32 txdctl;
|
|
|
|
u8 reg_idx = ring->reg_idx;
|
|
|
|
|
|
|
|
/* if we are already enabled just exit */
|
|
|
|
if (fm10k_read_reg(hw, FM10K_TXDCTL(reg_idx)) & FM10K_TXDCTL_ENABLE)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* poll to verify queue is enabled */
|
|
|
|
do {
|
|
|
|
usleep_range(1000, 2000);
|
|
|
|
txdctl = fm10k_read_reg(hw, FM10K_TXDCTL(reg_idx));
|
|
|
|
} while (!(txdctl & FM10K_TXDCTL_ENABLE) && --wait_loop);
|
|
|
|
if (!wait_loop)
|
|
|
|
netif_err(interface, drv, interface->netdev,
|
|
|
|
"Could not enable Tx Queue %d\n", reg_idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_configure_tx - Configure Transmit Unit after Reset
|
|
|
|
* @interface: board private structure
|
|
|
|
*
|
|
|
|
* Configure the Tx unit of the MAC after a reset.
|
|
|
|
**/
|
|
|
|
static void fm10k_configure_tx(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* Setup the HW Tx Head and Tail descriptor pointers */
|
|
|
|
for (i = 0; i < interface->num_tx_queues; i++)
|
|
|
|
fm10k_configure_tx_ring(interface, interface->tx_ring[i]);
|
|
|
|
|
|
|
|
/* poll here to verify that Tx rings are now enabled */
|
|
|
|
for (i = 0; i < interface->num_tx_queues; i++)
|
|
|
|
fm10k_enable_tx_ring(interface, interface->tx_ring[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_configure_rx_ring - Configure Rx ring after Reset
|
|
|
|
* @interface: board private structure
|
|
|
|
* @ring: structure containing ring specific data
|
|
|
|
*
|
|
|
|
* Configure the Rx descriptor ring after a reset.
|
|
|
|
**/
|
|
|
|
static void fm10k_configure_rx_ring(struct fm10k_intfc *interface,
|
|
|
|
struct fm10k_ring *ring)
|
|
|
|
{
|
|
|
|
u64 rdba = ring->dma;
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
u32 size = ring->count * sizeof(union fm10k_rx_desc);
|
2016-08-04 06:05:27 +08:00
|
|
|
u32 rxqctl, rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY;
|
2014-09-21 07:49:43 +08:00
|
|
|
u32 srrctl = FM10K_SRRCTL_BUFFER_CHAINING_EN;
|
|
|
|
u32 rxint = FM10K_INT_MAP_DISABLE;
|
|
|
|
u8 rx_pause = interface->rx_pause;
|
|
|
|
u8 reg_idx = ring->reg_idx;
|
|
|
|
|
|
|
|
/* disable queue to avoid issues while updating state */
|
2016-08-04 06:05:27 +08:00
|
|
|
rxqctl = fm10k_read_reg(hw, FM10K_RXQCTL(reg_idx));
|
|
|
|
rxqctl &= ~FM10K_RXQCTL_ENABLE;
|
2017-01-13 07:59:42 +08:00
|
|
|
fm10k_write_reg(hw, FM10K_RXQCTL(reg_idx), rxqctl);
|
2014-09-21 07:49:43 +08:00
|
|
|
fm10k_write_flush(hw);
|
|
|
|
|
|
|
|
/* possible poll here to verify ring resources have been cleaned */
|
|
|
|
|
|
|
|
/* set location and size for descriptor ring */
|
|
|
|
fm10k_write_reg(hw, FM10K_RDBAL(reg_idx), rdba & DMA_BIT_MASK(32));
|
|
|
|
fm10k_write_reg(hw, FM10K_RDBAH(reg_idx), rdba >> 32);
|
|
|
|
fm10k_write_reg(hw, FM10K_RDLEN(reg_idx), size);
|
|
|
|
|
|
|
|
/* reset head and tail pointers */
|
|
|
|
fm10k_write_reg(hw, FM10K_RDH(reg_idx), 0);
|
|
|
|
fm10k_write_reg(hw, FM10K_RDT(reg_idx), 0);
|
|
|
|
|
|
|
|
/* store tail pointer */
|
|
|
|
ring->tail = &interface->uc_addr[FM10K_RDT(reg_idx)];
|
|
|
|
|
2015-10-17 01:57:10 +08:00
|
|
|
/* reset ntu and ntc to place SW in sync with hardware */
|
2014-09-21 07:49:43 +08:00
|
|
|
ring->next_to_clean = 0;
|
|
|
|
ring->next_to_use = 0;
|
|
|
|
ring->next_to_alloc = 0;
|
|
|
|
|
|
|
|
/* Configure the Rx buffer size for one buff without split */
|
|
|
|
srrctl |= FM10K_RX_BUFSZ >> FM10K_SRRCTL_BSIZEPKT_SHIFT;
|
|
|
|
|
2015-01-31 10:23:05 +08:00
|
|
|
/* Configure the Rx ring to suppress loopback packets */
|
2014-09-21 07:49:43 +08:00
|
|
|
srrctl |= FM10K_SRRCTL_LOOPBACK_SUPPRESS;
|
|
|
|
fm10k_write_reg(hw, FM10K_SRRCTL(reg_idx), srrctl);
|
|
|
|
|
|
|
|
/* Enable drop on empty */
|
2014-09-21 07:53:08 +08:00
|
|
|
#ifdef CONFIG_DCB
|
2014-09-21 07:49:43 +08:00
|
|
|
if (interface->pfc_en)
|
|
|
|
rx_pause = interface->pfc_en;
|
|
|
|
#endif
|
2015-12-23 05:43:49 +08:00
|
|
|
if (!(rx_pause & BIT(ring->qos_pc)))
|
2014-09-21 07:49:43 +08:00
|
|
|
rxdctl |= FM10K_RXDCTL_DROP_ON_EMPTY;
|
|
|
|
|
|
|
|
fm10k_write_reg(hw, FM10K_RXDCTL(reg_idx), rxdctl);
|
|
|
|
|
|
|
|
/* assign default VLAN to queue */
|
|
|
|
ring->vid = hw->mac.default_vid;
|
|
|
|
|
2015-11-03 04:10:22 +08:00
|
|
|
/* if we have an active VLAN, disable default VLAN ID */
|
2015-06-25 04:34:46 +08:00
|
|
|
if (test_bit(hw->mac.default_vid, interface->active_vlans))
|
|
|
|
ring->vid |= FM10K_VLAN_CLEAR;
|
|
|
|
|
2014-09-21 07:49:43 +08:00
|
|
|
/* Map interrupt */
|
|
|
|
if (ring->q_vector) {
|
|
|
|
rxint = ring->q_vector->v_idx + NON_Q_VECTORS(hw);
|
|
|
|
rxint |= FM10K_INT_MAP_TIMER1;
|
|
|
|
}
|
|
|
|
|
|
|
|
fm10k_write_reg(hw, FM10K_RXINT(reg_idx), rxint);
|
|
|
|
|
|
|
|
/* enable queue */
|
2016-08-04 06:05:27 +08:00
|
|
|
rxqctl = fm10k_read_reg(hw, FM10K_RXQCTL(reg_idx));
|
|
|
|
rxqctl |= FM10K_RXQCTL_ENABLE;
|
2014-09-21 07:49:43 +08:00
|
|
|
fm10k_write_reg(hw, FM10K_RXQCTL(reg_idx), rxqctl);
|
2014-09-21 07:50:03 +08:00
|
|
|
|
|
|
|
/* place buffers on ring for receive data */
|
|
|
|
fm10k_alloc_rx_buffers(ring, fm10k_desc_unused(ring));
|
2014-09-21 07:49:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_update_rx_drop_en - Configures the drop enable bits for Rx rings
|
|
|
|
* @interface: board private structure
|
|
|
|
*
|
|
|
|
* Configure the drop enable bits for the Rx rings.
|
|
|
|
**/
|
|
|
|
void fm10k_update_rx_drop_en(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
u8 rx_pause = interface->rx_pause;
|
|
|
|
int i;
|
|
|
|
|
2014-09-21 07:53:08 +08:00
|
|
|
#ifdef CONFIG_DCB
|
2014-09-21 07:49:43 +08:00
|
|
|
if (interface->pfc_en)
|
|
|
|
rx_pause = interface->pfc_en;
|
|
|
|
|
|
|
|
#endif
|
|
|
|
for (i = 0; i < interface->num_rx_queues; i++) {
|
|
|
|
struct fm10k_ring *ring = interface->rx_ring[i];
|
|
|
|
u32 rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY;
|
|
|
|
u8 reg_idx = ring->reg_idx;
|
|
|
|
|
2015-12-23 05:43:49 +08:00
|
|
|
if (!(rx_pause & BIT(ring->qos_pc)))
|
2014-09-21 07:49:43 +08:00
|
|
|
rxdctl |= FM10K_RXDCTL_DROP_ON_EMPTY;
|
|
|
|
|
|
|
|
fm10k_write_reg(hw, FM10K_RXDCTL(reg_idx), rxdctl);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_configure_dglort - Configure Receive DGLORT after reset
|
|
|
|
* @interface: board private structure
|
|
|
|
*
|
|
|
|
* Configure the DGLORT description and RSS tables.
|
|
|
|
**/
|
|
|
|
static void fm10k_configure_dglort(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct fm10k_dglort_cfg dglort = { 0 };
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
int i;
|
|
|
|
u32 mrqc;
|
|
|
|
|
|
|
|
/* Fill out hash function seeds */
|
|
|
|
for (i = 0; i < FM10K_RSSRK_SIZE; i++)
|
|
|
|
fm10k_write_reg(hw, FM10K_RSSRK(0, i), interface->rssrk[i]);
|
|
|
|
|
|
|
|
/* Write RETA table to hardware */
|
|
|
|
for (i = 0; i < FM10K_RETA_SIZE; i++)
|
|
|
|
fm10k_write_reg(hw, FM10K_RETA(0, i), interface->reta[i]);
|
|
|
|
|
|
|
|
/* Generate RSS hash based on packet types, TCP/UDP
|
|
|
|
* port numbers and/or IPv4/v6 src and dst addresses
|
|
|
|
*/
|
|
|
|
mrqc = FM10K_MRQC_IPV4 |
|
|
|
|
FM10K_MRQC_TCP_IPV4 |
|
|
|
|
FM10K_MRQC_IPV6 |
|
|
|
|
FM10K_MRQC_TCP_IPV6;
|
|
|
|
|
fm10k: use a BITMAP for flags to avoid race conditions
Replace bitwise operators and #defines with a BITMAP and enumeration
values. This is similar to how we handle the "state" values as well.
This has two distinct advantages over the old method. First, we ensure
correctness of operations which are currently problematic due to race
conditions. Suppose that two kernel threads are running, such as the
watchdog and an ethtool ioctl, and both modify flags. We'll say that the
watchdog is CPU A, and the ethtool ioctl is CPU B.
CPU A sets FLAG_1, which can be seen as
CPU A read FLAGS
CPU A write FLAGS | FLAG_1
CPU B sets FLAG_2, which can be seen as
CPU B read FLAGS
CPU A write FLAGS | FLAG_2
However, "|=" and "&=" operators are not actually atomic. So this could
be ordered like the following:
CPU A read FLAGS -> variable
CPU B read FLAGS -> variable
CPU A write FLAGS (variable | FLAG_1)
CPU B write FLAGS (variable | FLAG_2)
Notice how the 2nd write from CPU B could actually undo the write from
CPU A because it isn't guaranteed that the |= operation is atomic.
In practice the race windows for most flag writes is incredibly narrow
so it is not easy to isolate issues. However, the more flags we have,
the more likely they will cause problems. Additionally, if such
a problem were to arise, it would be incredibly difficult to track down.
Second, there is an additional advantage beyond code correctness. We can
now automatically size the BITMAP if more flags were added, so that we
do not need to remember that flags is u32 and thus if we added too many
flags we would over-run the variable. This is not a likely occurrence
for fm10k driver, but this patch can serve as an example for other
drivers which have many more flags.
This particular change does have a bit of trouble converting some of the
idioms previously used with the #defines for flags. Specifically, when
converting FM10K_FLAG_RSS_FIELD_IPV[46]_UDP flags. This whole operation
was actually quite problematic, because we actually stored flags
separately. This could more easily show the problem of the above
re-ordering issue.
This is really difficult to test whether atomics make a difference in
practical scenarios, but you can ensure that basic functionality remains
the same. This patch has a lot of code coverage, but most of it is
relatively simple.
While we are modifying these files, update their copyright year.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-01-13 07:59:38 +08:00
|
|
|
if (test_bit(FM10K_FLAG_RSS_FIELD_IPV4_UDP, interface->flags))
|
2014-09-21 07:49:43 +08:00
|
|
|
mrqc |= FM10K_MRQC_UDP_IPV4;
|
fm10k: use a BITMAP for flags to avoid race conditions
Replace bitwise operators and #defines with a BITMAP and enumeration
values. This is similar to how we handle the "state" values as well.
This has two distinct advantages over the old method. First, we ensure
correctness of operations which are currently problematic due to race
conditions. Suppose that two kernel threads are running, such as the
watchdog and an ethtool ioctl, and both modify flags. We'll say that the
watchdog is CPU A, and the ethtool ioctl is CPU B.
CPU A sets FLAG_1, which can be seen as
CPU A read FLAGS
CPU A write FLAGS | FLAG_1
CPU B sets FLAG_2, which can be seen as
CPU B read FLAGS
CPU A write FLAGS | FLAG_2
However, "|=" and "&=" operators are not actually atomic. So this could
be ordered like the following:
CPU A read FLAGS -> variable
CPU B read FLAGS -> variable
CPU A write FLAGS (variable | FLAG_1)
CPU B write FLAGS (variable | FLAG_2)
Notice how the 2nd write from CPU B could actually undo the write from
CPU A because it isn't guaranteed that the |= operation is atomic.
In practice the race windows for most flag writes is incredibly narrow
so it is not easy to isolate issues. However, the more flags we have,
the more likely they will cause problems. Additionally, if such
a problem were to arise, it would be incredibly difficult to track down.
Second, there is an additional advantage beyond code correctness. We can
now automatically size the BITMAP if more flags were added, so that we
do not need to remember that flags is u32 and thus if we added too many
flags we would over-run the variable. This is not a likely occurrence
for fm10k driver, but this patch can serve as an example for other
drivers which have many more flags.
This particular change does have a bit of trouble converting some of the
idioms previously used with the #defines for flags. Specifically, when
converting FM10K_FLAG_RSS_FIELD_IPV[46]_UDP flags. This whole operation
was actually quite problematic, because we actually stored flags
separately. This could more easily show the problem of the above
re-ordering issue.
This is really difficult to test whether atomics make a difference in
practical scenarios, but you can ensure that basic functionality remains
the same. This patch has a lot of code coverage, but most of it is
relatively simple.
While we are modifying these files, update their copyright year.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-01-13 07:59:38 +08:00
|
|
|
if (test_bit(FM10K_FLAG_RSS_FIELD_IPV6_UDP, interface->flags))
|
2014-09-21 07:49:43 +08:00
|
|
|
mrqc |= FM10K_MRQC_UDP_IPV6;
|
|
|
|
|
|
|
|
fm10k_write_reg(hw, FM10K_MRQC(0), mrqc);
|
|
|
|
|
|
|
|
/* configure default DGLORT mapping for RSS/DCB */
|
|
|
|
dglort.inner_rss = 1;
|
|
|
|
dglort.rss_l = fls(interface->ring_feature[RING_F_RSS].mask);
|
|
|
|
dglort.pc_l = fls(interface->ring_feature[RING_F_QOS].mask);
|
|
|
|
hw->mac.ops.configure_dglort_map(hw, &dglort);
|
|
|
|
|
|
|
|
/* assign GLORT per queue for queue mapped testing */
|
|
|
|
if (interface->glort_count > 64) {
|
|
|
|
memset(&dglort, 0, sizeof(dglort));
|
|
|
|
dglort.inner_rss = 1;
|
|
|
|
dglort.glort = interface->glort + 64;
|
|
|
|
dglort.idx = fm10k_dglort_pf_queue;
|
|
|
|
dglort.queue_l = fls(interface->num_rx_queues - 1);
|
|
|
|
hw->mac.ops.configure_dglort_map(hw, &dglort);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* assign glort value for RSS/DCB specific to this interface */
|
|
|
|
memset(&dglort, 0, sizeof(dglort));
|
|
|
|
dglort.inner_rss = 1;
|
|
|
|
dglort.glort = interface->glort;
|
|
|
|
dglort.rss_l = fls(interface->ring_feature[RING_F_RSS].mask);
|
|
|
|
dglort.pc_l = fls(interface->ring_feature[RING_F_QOS].mask);
|
|
|
|
/* configure DGLORT mapping for RSS/DCB */
|
|
|
|
dglort.idx = fm10k_dglort_pf_rss;
|
2014-09-21 07:51:15 +08:00
|
|
|
if (interface->l2_accel)
|
|
|
|
dglort.shared_l = fls(interface->l2_accel->size);
|
2014-09-21 07:49:43 +08:00
|
|
|
hw->mac.ops.configure_dglort_map(hw, &dglort);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_configure_rx - Configure Receive Unit after Reset
|
|
|
|
* @interface: board private structure
|
|
|
|
*
|
|
|
|
* Configure the Rx unit of the MAC after a reset.
|
|
|
|
**/
|
|
|
|
static void fm10k_configure_rx(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/* Configure SWPRI to PC map */
|
|
|
|
fm10k_configure_swpri_map(interface);
|
|
|
|
|
|
|
|
/* Configure RSS and DGLORT map */
|
|
|
|
fm10k_configure_dglort(interface);
|
|
|
|
|
|
|
|
/* Setup the HW Rx Head and Tail descriptor pointers */
|
|
|
|
for (i = 0; i < interface->num_rx_queues; i++)
|
|
|
|
fm10k_configure_rx_ring(interface, interface->rx_ring[i]);
|
|
|
|
|
|
|
|
/* possible poll here to verify that Rx rings are now enabled */
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:48:51 +08:00
|
|
|
static void fm10k_napi_enable_all(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct fm10k_q_vector *q_vector;
|
|
|
|
int q_idx;
|
|
|
|
|
|
|
|
for (q_idx = 0; q_idx < interface->num_q_vectors; q_idx++) {
|
|
|
|
q_vector = interface->q_vector[q_idx];
|
|
|
|
napi_enable(&q_vector->napi);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-04 04:26:56 +08:00
|
|
|
static irqreturn_t fm10k_msix_clean_rings(int __always_unused irq, void *data)
|
2014-09-21 07:48:51 +08:00
|
|
|
{
|
|
|
|
struct fm10k_q_vector *q_vector = data;
|
|
|
|
|
|
|
|
if (q_vector->rx.count || q_vector->tx.count)
|
2015-09-30 06:19:56 +08:00
|
|
|
napi_schedule_irqoff(&q_vector->napi);
|
2014-09-21 07:48:51 +08:00
|
|
|
|
|
|
|
return IRQ_HANDLED;
|
|
|
|
}
|
|
|
|
|
2015-04-04 04:26:56 +08:00
|
|
|
static irqreturn_t fm10k_msix_mbx_vf(int __always_unused irq, void *data)
|
2014-09-21 07:51:40 +08:00
|
|
|
{
|
|
|
|
struct fm10k_intfc *interface = data;
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
struct fm10k_mbx_info *mbx = &hw->mbx;
|
|
|
|
|
|
|
|
/* re-enable mailbox interrupt and indicate 20us delay */
|
|
|
|
fm10k_write_reg(hw, FM10K_VFITR(FM10K_MBX_VECTOR),
|
2015-12-23 05:43:44 +08:00
|
|
|
(FM10K_MBX_INT_DELAY >> hw->mac.itr_scale) |
|
|
|
|
FM10K_ITR_ENABLE);
|
2014-09-21 07:51:40 +08:00
|
|
|
|
|
|
|
/* service upstream mailbox */
|
|
|
|
if (fm10k_mbx_trylock(interface)) {
|
|
|
|
mbx->ops.process(hw, mbx);
|
|
|
|
fm10k_mbx_unlock(interface);
|
|
|
|
}
|
|
|
|
|
2015-12-09 07:51:11 +08:00
|
|
|
hw->mac.get_host_state = true;
|
2014-09-21 07:51:40 +08:00
|
|
|
fm10k_service_event_schedule(interface);
|
|
|
|
|
|
|
|
return IRQ_HANDLED;
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:48:51 +08:00
|
|
|
#define FM10K_ERR_MSG(type) case (type): error = #type; break
|
2015-06-25 04:34:48 +08:00
|
|
|
static void fm10k_handle_fault(struct fm10k_intfc *interface, int type,
|
2015-10-29 08:19:40 +08:00
|
|
|
struct fm10k_fault *fault)
|
2014-09-21 07:48:51 +08:00
|
|
|
{
|
|
|
|
struct pci_dev *pdev = interface->pdev;
|
2015-06-25 04:34:48 +08:00
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
struct fm10k_iov_data *iov_data = interface->iov_data;
|
2014-09-21 07:48:51 +08:00
|
|
|
char *error;
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case FM10K_PCA_FAULT:
|
|
|
|
switch (fault->type) {
|
|
|
|
default:
|
|
|
|
error = "Unknown PCA error";
|
|
|
|
break;
|
|
|
|
FM10K_ERR_MSG(PCA_NO_FAULT);
|
|
|
|
FM10K_ERR_MSG(PCA_UNMAPPED_ADDR);
|
|
|
|
FM10K_ERR_MSG(PCA_BAD_QACCESS_PF);
|
|
|
|
FM10K_ERR_MSG(PCA_BAD_QACCESS_VF);
|
|
|
|
FM10K_ERR_MSG(PCA_MALICIOUS_REQ);
|
|
|
|
FM10K_ERR_MSG(PCA_POISONED_TLP);
|
|
|
|
FM10K_ERR_MSG(PCA_TLP_ABORT);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case FM10K_THI_FAULT:
|
|
|
|
switch (fault->type) {
|
|
|
|
default:
|
|
|
|
error = "Unknown THI error";
|
|
|
|
break;
|
|
|
|
FM10K_ERR_MSG(THI_NO_FAULT);
|
|
|
|
FM10K_ERR_MSG(THI_MAL_DIS_Q_FAULT);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case FM10K_FUM_FAULT:
|
|
|
|
switch (fault->type) {
|
|
|
|
default:
|
|
|
|
error = "Unknown FUM error";
|
|
|
|
break;
|
|
|
|
FM10K_ERR_MSG(FUM_NO_FAULT);
|
|
|
|
FM10K_ERR_MSG(FUM_UNMAPPED_ADDR);
|
|
|
|
FM10K_ERR_MSG(FUM_BAD_VF_QACCESS);
|
|
|
|
FM10K_ERR_MSG(FUM_ADD_DECODE_ERR);
|
|
|
|
FM10K_ERR_MSG(FUM_RO_ERROR);
|
|
|
|
FM10K_ERR_MSG(FUM_QPRC_CRC_ERROR);
|
|
|
|
FM10K_ERR_MSG(FUM_CSR_TIMEOUT);
|
|
|
|
FM10K_ERR_MSG(FUM_INVALID_TYPE);
|
|
|
|
FM10K_ERR_MSG(FUM_INVALID_LENGTH);
|
|
|
|
FM10K_ERR_MSG(FUM_INVALID_BE);
|
|
|
|
FM10K_ERR_MSG(FUM_INVALID_ALIGN);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
error = "Undocumented fault";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
dev_warn(&pdev->dev,
|
|
|
|
"%s Address: 0x%llx SpecInfo: 0x%x Func: %02x.%0x\n",
|
|
|
|
error, fault->address, fault->specinfo,
|
|
|
|
PCI_SLOT(fault->func), PCI_FUNC(fault->func));
|
2015-06-25 04:34:48 +08:00
|
|
|
|
|
|
|
/* For VF faults, clear out the respective LPORT, reset the queue
|
|
|
|
* resources, and then reconnect to the mailbox. This allows the
|
|
|
|
* VF in question to resume behavior. For transient faults that are
|
|
|
|
* the result of non-malicious behavior this will log the fault and
|
|
|
|
* allow the VF to resume functionality. Obviously for malicious VFs
|
|
|
|
* they will be able to attempt malicious behavior again. In this
|
|
|
|
* case, the system administrator will need to step in and manually
|
|
|
|
* remove or disable the VF in question.
|
|
|
|
*/
|
|
|
|
if (fault->func && iov_data) {
|
|
|
|
int vf = fault->func - 1;
|
|
|
|
struct fm10k_vf_info *vf_info = &iov_data->vf_info[vf];
|
|
|
|
|
|
|
|
hw->iov.ops.reset_lport(hw, vf_info);
|
|
|
|
hw->iov.ops.reset_resources(hw, vf_info);
|
|
|
|
|
|
|
|
/* reset_lport disables the VF, so re-enable it */
|
|
|
|
hw->iov.ops.set_lport(hw, vf_info, vf,
|
|
|
|
FM10K_VF_FLAG_MULTI_CAPABLE);
|
|
|
|
|
|
|
|
/* reset_resources will disconnect from the mbx */
|
|
|
|
vf_info->mbx.ops.connect(hw, &vf_info->mbx);
|
|
|
|
}
|
2014-09-21 07:48:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void fm10k_report_fault(struct fm10k_intfc *interface, u32 eicr)
|
|
|
|
{
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
struct fm10k_fault fault = { 0 };
|
|
|
|
int type, err;
|
|
|
|
|
|
|
|
for (eicr &= FM10K_EICR_FAULT_MASK, type = FM10K_PCA_FAULT;
|
|
|
|
eicr;
|
|
|
|
eicr >>= 1, type += FM10K_FAULT_SIZE) {
|
|
|
|
/* only check if there is an error reported */
|
|
|
|
if (!(eicr & 0x1))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* retrieve fault info */
|
|
|
|
err = hw->mac.ops.get_fault(hw, type, &fault);
|
|
|
|
if (err) {
|
|
|
|
dev_err(&interface->pdev->dev,
|
|
|
|
"error reading fault\n");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2015-06-25 04:34:48 +08:00
|
|
|
fm10k_handle_fault(interface, type, &fault);
|
2014-09-21 07:48:51 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void fm10k_reset_drop_on_empty(struct fm10k_intfc *interface, u32 eicr)
|
|
|
|
{
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
const u32 rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY;
|
|
|
|
u32 maxholdq;
|
|
|
|
int q;
|
|
|
|
|
|
|
|
if (!(eicr & FM10K_EICR_MAXHOLDTIME))
|
|
|
|
return;
|
|
|
|
|
|
|
|
maxholdq = fm10k_read_reg(hw, FM10K_MAXHOLDQ(7));
|
|
|
|
if (maxholdq)
|
|
|
|
fm10k_write_reg(hw, FM10K_MAXHOLDQ(7), maxholdq);
|
|
|
|
for (q = 255;;) {
|
2015-12-23 05:43:49 +08:00
|
|
|
if (maxholdq & BIT(31)) {
|
2014-09-21 07:48:51 +08:00
|
|
|
if (q < FM10K_MAX_QUEUES_PF) {
|
|
|
|
interface->rx_overrun_pf++;
|
|
|
|
fm10k_write_reg(hw, FM10K_RXDCTL(q), rxdctl);
|
|
|
|
} else {
|
|
|
|
interface->rx_overrun_vf++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
maxholdq *= 2;
|
|
|
|
if (!maxholdq)
|
|
|
|
q &= ~(32 - 1);
|
|
|
|
|
|
|
|
if (!q)
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (q-- % 32)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
maxholdq = fm10k_read_reg(hw, FM10K_MAXHOLDQ(q / 32));
|
|
|
|
if (maxholdq)
|
|
|
|
fm10k_write_reg(hw, FM10K_MAXHOLDQ(q / 32), maxholdq);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-04 04:26:56 +08:00
|
|
|
static irqreturn_t fm10k_msix_mbx_pf(int __always_unused irq, void *data)
|
2014-09-21 07:48:51 +08:00
|
|
|
{
|
|
|
|
struct fm10k_intfc *interface = data;
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
struct fm10k_mbx_info *mbx = &hw->mbx;
|
|
|
|
u32 eicr;
|
2016-11-03 07:44:46 +08:00
|
|
|
s32 err = 0;
|
2014-09-21 07:48:51 +08:00
|
|
|
|
|
|
|
/* unmask any set bits related to this interrupt */
|
|
|
|
eicr = fm10k_read_reg(hw, FM10K_EICR);
|
|
|
|
fm10k_write_reg(hw, FM10K_EICR, eicr & (FM10K_EICR_MAILBOX |
|
|
|
|
FM10K_EICR_SWITCHREADY |
|
|
|
|
FM10K_EICR_SWITCHNOTREADY));
|
|
|
|
|
|
|
|
/* report any faults found to the message log */
|
|
|
|
fm10k_report_fault(interface, eicr);
|
|
|
|
|
|
|
|
/* reset any queues disabled due to receiver overrun */
|
|
|
|
fm10k_reset_drop_on_empty(interface, eicr);
|
|
|
|
|
|
|
|
/* service mailboxes */
|
|
|
|
if (fm10k_mbx_trylock(interface)) {
|
2016-11-03 07:44:46 +08:00
|
|
|
err = mbx->ops.process(hw, mbx);
|
fm10k: don't handle mailbox events in iov_event path and always process mailbox
Since we already schedule the service task, we can just wait for this
task to handle the mailbox events from the VF. This reduces some complex
code flow, and makes it so we have a single path for handling the VF
messages. There is a possibility that we have a slight delay in handling
VF messages, but it should be minimal.
The result of tx_complete and !rx_ready is insufficient to determine
whether we need to process the mailbox. There is a possible race
condition whereby the VF fills up the mbmem for us, but we have already
recently processed the mailboxes in the interrupt. During this time,
the interrupt is disabled. Thus, our Rx FIFO is empty, but the mbmem now
has data in it. Since we continually check whether Rx FIFO is empty, we
then never call process. This results in the possibility to prevent PF
from handling the VF mailbox messages.
Instead, just call process every time, despite the fact that we may or
may not have anything to process for the VF. There should be minimal
overhead for doing this, and it resolves an issue where the VF never
comes up due to never getting response for its SET_LPORT_STATE message.
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Acked-by: Matthew Vick <matthew.vick@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2015-04-11 08:20:17 +08:00
|
|
|
/* handle VFLRE events */
|
2014-09-21 07:52:09 +08:00
|
|
|
fm10k_iov_event(interface);
|
2014-09-21 07:48:51 +08:00
|
|
|
fm10k_mbx_unlock(interface);
|
|
|
|
}
|
|
|
|
|
2016-11-03 07:44:46 +08:00
|
|
|
if (err == FM10K_ERR_RESET_REQUESTED)
|
fm10k: use a BITMAP for flags to avoid race conditions
Replace bitwise operators and #defines with a BITMAP and enumeration
values. This is similar to how we handle the "state" values as well.
This has two distinct advantages over the old method. First, we ensure
correctness of operations which are currently problematic due to race
conditions. Suppose that two kernel threads are running, such as the
watchdog and an ethtool ioctl, and both modify flags. We'll say that the
watchdog is CPU A, and the ethtool ioctl is CPU B.
CPU A sets FLAG_1, which can be seen as
CPU A read FLAGS
CPU A write FLAGS | FLAG_1
CPU B sets FLAG_2, which can be seen as
CPU B read FLAGS
CPU A write FLAGS | FLAG_2
However, "|=" and "&=" operators are not actually atomic. So this could
be ordered like the following:
CPU A read FLAGS -> variable
CPU B read FLAGS -> variable
CPU A write FLAGS (variable | FLAG_1)
CPU B write FLAGS (variable | FLAG_2)
Notice how the 2nd write from CPU B could actually undo the write from
CPU A because it isn't guaranteed that the |= operation is atomic.
In practice the race windows for most flag writes is incredibly narrow
so it is not easy to isolate issues. However, the more flags we have,
the more likely they will cause problems. Additionally, if such
a problem were to arise, it would be incredibly difficult to track down.
Second, there is an additional advantage beyond code correctness. We can
now automatically size the BITMAP if more flags were added, so that we
do not need to remember that flags is u32 and thus if we added too many
flags we would over-run the variable. This is not a likely occurrence
for fm10k driver, but this patch can serve as an example for other
drivers which have many more flags.
This particular change does have a bit of trouble converting some of the
idioms previously used with the #defines for flags. Specifically, when
converting FM10K_FLAG_RSS_FIELD_IPV[46]_UDP flags. This whole operation
was actually quite problematic, because we actually stored flags
separately. This could more easily show the problem of the above
re-ordering issue.
This is really difficult to test whether atomics make a difference in
practical scenarios, but you can ensure that basic functionality remains
the same. This patch has a lot of code coverage, but most of it is
relatively simple.
While we are modifying these files, update their copyright year.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-01-13 07:59:38 +08:00
|
|
|
set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
|
2016-11-03 07:44:46 +08:00
|
|
|
|
2014-09-21 07:49:25 +08:00
|
|
|
/* if switch toggled state we should reset GLORTs */
|
|
|
|
if (eicr & FM10K_EICR_SWITCHNOTREADY) {
|
|
|
|
/* force link down for at least 4 seconds */
|
|
|
|
interface->link_down_event = jiffies + (4 * HZ);
|
2017-01-13 07:59:39 +08:00
|
|
|
set_bit(__FM10K_LINK_DOWN, interface->state);
|
2014-09-21 07:49:25 +08:00
|
|
|
|
|
|
|
/* reset dglort_map back to no config */
|
|
|
|
hw->mac.dglort_map = FM10K_DGLORTMAP_NONE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* we should validate host state after interrupt event */
|
2015-12-09 07:51:11 +08:00
|
|
|
hw->mac.get_host_state = true;
|
fm10k: don't handle mailbox events in iov_event path and always process mailbox
Since we already schedule the service task, we can just wait for this
task to handle the mailbox events from the VF. This reduces some complex
code flow, and makes it so we have a single path for handling the VF
messages. There is a possibility that we have a slight delay in handling
VF messages, but it should be minimal.
The result of tx_complete and !rx_ready is insufficient to determine
whether we need to process the mailbox. There is a possible race
condition whereby the VF fills up the mbmem for us, but we have already
recently processed the mailboxes in the interrupt. During this time,
the interrupt is disabled. Thus, our Rx FIFO is empty, but the mbmem now
has data in it. Since we continually check whether Rx FIFO is empty, we
then never call process. This results in the possibility to prevent PF
from handling the VF mailbox messages.
Instead, just call process every time, despite the fact that we may or
may not have anything to process for the VF. There should be minimal
overhead for doing this, and it resolves an issue where the VF never
comes up due to never getting response for its SET_LPORT_STATE message.
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Acked-by: Matthew Vick <matthew.vick@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2015-04-11 08:20:17 +08:00
|
|
|
|
|
|
|
/* validate host state, and handle VF mailboxes in the service task */
|
2014-09-21 07:49:25 +08:00
|
|
|
fm10k_service_event_schedule(interface);
|
|
|
|
|
2014-09-21 07:48:51 +08:00
|
|
|
/* re-enable mailbox interrupt and indicate 20us delay */
|
|
|
|
fm10k_write_reg(hw, FM10K_ITR(FM10K_MBX_VECTOR),
|
2015-12-23 05:43:44 +08:00
|
|
|
(FM10K_MBX_INT_DELAY >> hw->mac.itr_scale) |
|
|
|
|
FM10K_ITR_ENABLE);
|
2014-09-21 07:48:51 +08:00
|
|
|
|
|
|
|
return IRQ_HANDLED;
|
|
|
|
}
|
|
|
|
|
|
|
|
void fm10k_mbx_free_irq(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
2016-02-05 02:47:54 +08:00
|
|
|
struct msix_entry *entry;
|
2014-09-21 07:48:51 +08:00
|
|
|
int itr_reg;
|
|
|
|
|
2015-10-28 07:59:18 +08:00
|
|
|
/* no mailbox IRQ to free if MSI-X is not enabled */
|
|
|
|
if (!interface->msix_entries)
|
|
|
|
return;
|
|
|
|
|
2016-02-05 02:47:54 +08:00
|
|
|
entry = &interface->msix_entries[FM10K_MBX_VECTOR];
|
|
|
|
|
2014-09-21 07:48:51 +08:00
|
|
|
/* disconnect the mailbox */
|
|
|
|
hw->mbx.ops.disconnect(hw, &hw->mbx);
|
|
|
|
|
|
|
|
/* disable Mailbox cause */
|
|
|
|
if (hw->mac.type == fm10k_mac_pf) {
|
|
|
|
fm10k_write_reg(hw, FM10K_EIMR,
|
|
|
|
FM10K_EIMR_DISABLE(PCA_FAULT) |
|
|
|
|
FM10K_EIMR_DISABLE(FUM_FAULT) |
|
|
|
|
FM10K_EIMR_DISABLE(MAILBOX) |
|
|
|
|
FM10K_EIMR_DISABLE(SWITCHREADY) |
|
|
|
|
FM10K_EIMR_DISABLE(SWITCHNOTREADY) |
|
|
|
|
FM10K_EIMR_DISABLE(SRAMERROR) |
|
|
|
|
FM10K_EIMR_DISABLE(VFLR) |
|
|
|
|
FM10K_EIMR_DISABLE(MAXHOLDTIME));
|
|
|
|
itr_reg = FM10K_ITR(FM10K_MBX_VECTOR);
|
2014-09-21 07:51:40 +08:00
|
|
|
} else {
|
|
|
|
itr_reg = FM10K_VFITR(FM10K_MBX_VECTOR);
|
2014-09-21 07:48:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
fm10k_write_reg(hw, itr_reg, FM10K_ITR_MASK_SET);
|
|
|
|
|
|
|
|
free_irq(entry->vector, interface);
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:51:40 +08:00
|
|
|
static s32 fm10k_mbx_mac_addr(struct fm10k_hw *hw, u32 **results,
|
|
|
|
struct fm10k_mbx_info *mbx)
|
|
|
|
{
|
|
|
|
bool vlan_override = hw->mac.vlan_override;
|
|
|
|
u16 default_vid = hw->mac.default_vid;
|
|
|
|
struct fm10k_intfc *interface;
|
|
|
|
s32 err;
|
|
|
|
|
|
|
|
err = fm10k_msg_mac_vlan_vf(hw, results, mbx);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
interface = container_of(hw, struct fm10k_intfc, hw);
|
|
|
|
|
|
|
|
/* MAC was changed so we need reset */
|
|
|
|
if (is_valid_ether_addr(hw->mac.perm_addr) &&
|
2015-11-17 07:33:34 +08:00
|
|
|
!ether_addr_equal(hw->mac.perm_addr, hw->mac.addr))
|
fm10k: use a BITMAP for flags to avoid race conditions
Replace bitwise operators and #defines with a BITMAP and enumeration
values. This is similar to how we handle the "state" values as well.
This has two distinct advantages over the old method. First, we ensure
correctness of operations which are currently problematic due to race
conditions. Suppose that two kernel threads are running, such as the
watchdog and an ethtool ioctl, and both modify flags. We'll say that the
watchdog is CPU A, and the ethtool ioctl is CPU B.
CPU A sets FLAG_1, which can be seen as
CPU A read FLAGS
CPU A write FLAGS | FLAG_1
CPU B sets FLAG_2, which can be seen as
CPU B read FLAGS
CPU A write FLAGS | FLAG_2
However, "|=" and "&=" operators are not actually atomic. So this could
be ordered like the following:
CPU A read FLAGS -> variable
CPU B read FLAGS -> variable
CPU A write FLAGS (variable | FLAG_1)
CPU B write FLAGS (variable | FLAG_2)
Notice how the 2nd write from CPU B could actually undo the write from
CPU A because it isn't guaranteed that the |= operation is atomic.
In practice the race windows for most flag writes is incredibly narrow
so it is not easy to isolate issues. However, the more flags we have,
the more likely they will cause problems. Additionally, if such
a problem were to arise, it would be incredibly difficult to track down.
Second, there is an additional advantage beyond code correctness. We can
now automatically size the BITMAP if more flags were added, so that we
do not need to remember that flags is u32 and thus if we added too many
flags we would over-run the variable. This is not a likely occurrence
for fm10k driver, but this patch can serve as an example for other
drivers which have many more flags.
This particular change does have a bit of trouble converting some of the
idioms previously used with the #defines for flags. Specifically, when
converting FM10K_FLAG_RSS_FIELD_IPV[46]_UDP flags. This whole operation
was actually quite problematic, because we actually stored flags
separately. This could more easily show the problem of the above
re-ordering issue.
This is really difficult to test whether atomics make a difference in
practical scenarios, but you can ensure that basic functionality remains
the same. This patch has a lot of code coverage, but most of it is
relatively simple.
While we are modifying these files, update their copyright year.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-01-13 07:59:38 +08:00
|
|
|
set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
|
2014-09-21 07:51:40 +08:00
|
|
|
|
|
|
|
/* VLAN override was changed, or default VLAN changed */
|
|
|
|
if ((vlan_override != hw->mac.vlan_override) ||
|
|
|
|
(default_vid != hw->mac.default_vid))
|
fm10k: use a BITMAP for flags to avoid race conditions
Replace bitwise operators and #defines with a BITMAP and enumeration
values. This is similar to how we handle the "state" values as well.
This has two distinct advantages over the old method. First, we ensure
correctness of operations which are currently problematic due to race
conditions. Suppose that two kernel threads are running, such as the
watchdog and an ethtool ioctl, and both modify flags. We'll say that the
watchdog is CPU A, and the ethtool ioctl is CPU B.
CPU A sets FLAG_1, which can be seen as
CPU A read FLAGS
CPU A write FLAGS | FLAG_1
CPU B sets FLAG_2, which can be seen as
CPU B read FLAGS
CPU A write FLAGS | FLAG_2
However, "|=" and "&=" operators are not actually atomic. So this could
be ordered like the following:
CPU A read FLAGS -> variable
CPU B read FLAGS -> variable
CPU A write FLAGS (variable | FLAG_1)
CPU B write FLAGS (variable | FLAG_2)
Notice how the 2nd write from CPU B could actually undo the write from
CPU A because it isn't guaranteed that the |= operation is atomic.
In practice the race windows for most flag writes is incredibly narrow
so it is not easy to isolate issues. However, the more flags we have,
the more likely they will cause problems. Additionally, if such
a problem were to arise, it would be incredibly difficult to track down.
Second, there is an additional advantage beyond code correctness. We can
now automatically size the BITMAP if more flags were added, so that we
do not need to remember that flags is u32 and thus if we added too many
flags we would over-run the variable. This is not a likely occurrence
for fm10k driver, but this patch can serve as an example for other
drivers which have many more flags.
This particular change does have a bit of trouble converting some of the
idioms previously used with the #defines for flags. Specifically, when
converting FM10K_FLAG_RSS_FIELD_IPV[46]_UDP flags. This whole operation
was actually quite problematic, because we actually stored flags
separately. This could more easily show the problem of the above
re-ordering issue.
This is really difficult to test whether atomics make a difference in
practical scenarios, but you can ensure that basic functionality remains
the same. This patch has a lot of code coverage, but most of it is
relatively simple.
While we are modifying these files, update their copyright year.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-01-13 07:59:38 +08:00
|
|
|
set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
|
2014-09-21 07:51:40 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:48:51 +08:00
|
|
|
/* generic error handler for mailbox issues */
|
|
|
|
static s32 fm10k_mbx_error(struct fm10k_hw *hw, u32 **results,
|
2015-04-04 04:26:56 +08:00
|
|
|
struct fm10k_mbx_info __always_unused *mbx)
|
2014-09-21 07:48:51 +08:00
|
|
|
{
|
|
|
|
struct fm10k_intfc *interface;
|
|
|
|
struct pci_dev *pdev;
|
|
|
|
|
|
|
|
interface = container_of(hw, struct fm10k_intfc, hw);
|
|
|
|
pdev = interface->pdev;
|
|
|
|
|
|
|
|
dev_err(&pdev->dev, "Unknown message ID %u\n",
|
|
|
|
**results & FM10K_TLV_ID_MASK);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:51:40 +08:00
|
|
|
static const struct fm10k_msg_data vf_mbx_data[] = {
|
|
|
|
FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test),
|
|
|
|
FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_mbx_mac_addr),
|
|
|
|
FM10K_VF_MSG_LPORT_STATE_HANDLER(fm10k_msg_lport_state_vf),
|
|
|
|
FM10K_TLV_MSG_ERROR_HANDLER(fm10k_mbx_error),
|
|
|
|
};
|
|
|
|
|
|
|
|
static int fm10k_mbx_request_irq_vf(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct msix_entry *entry = &interface->msix_entries[FM10K_MBX_VECTOR];
|
|
|
|
struct net_device *dev = interface->netdev;
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/* Use timer0 for interrupt moderation on the mailbox */
|
2015-12-23 05:43:44 +08:00
|
|
|
u32 itr = entry->entry | FM10K_INT_MAP_TIMER0;
|
2014-09-21 07:51:40 +08:00
|
|
|
|
|
|
|
/* register mailbox handlers */
|
|
|
|
err = hw->mbx.ops.register_handlers(&hw->mbx, vf_mbx_data);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* request the IRQ */
|
|
|
|
err = request_irq(entry->vector, fm10k_msix_mbx_vf, 0,
|
|
|
|
dev->name, interface);
|
|
|
|
if (err) {
|
|
|
|
netif_err(interface, probe, dev,
|
|
|
|
"request_irq for msix_mbx failed: %d\n", err);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* map all of the interrupt sources */
|
|
|
|
fm10k_write_reg(hw, FM10K_VFINT_MAP, itr);
|
|
|
|
|
|
|
|
/* enable interrupt */
|
|
|
|
fm10k_write_reg(hw, FM10K_VFITR(entry->entry), FM10K_ITR_ENABLE);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:48:51 +08:00
|
|
|
static s32 fm10k_lport_map(struct fm10k_hw *hw, u32 **results,
|
|
|
|
struct fm10k_mbx_info *mbx)
|
|
|
|
{
|
|
|
|
struct fm10k_intfc *interface;
|
|
|
|
u32 dglort_map = hw->mac.dglort_map;
|
|
|
|
s32 err;
|
|
|
|
|
2016-04-02 07:17:33 +08:00
|
|
|
interface = container_of(hw, struct fm10k_intfc, hw);
|
|
|
|
|
|
|
|
err = fm10k_msg_err_pf(hw, results, mbx);
|
|
|
|
if (!err && hw->swapi.status) {
|
|
|
|
/* force link down for a reasonable delay */
|
|
|
|
interface->link_down_event = jiffies + (2 * HZ);
|
2017-01-13 07:59:39 +08:00
|
|
|
set_bit(__FM10K_LINK_DOWN, interface->state);
|
2016-04-02 07:17:33 +08:00
|
|
|
|
|
|
|
/* reset dglort_map back to no config */
|
|
|
|
hw->mac.dglort_map = FM10K_DGLORTMAP_NONE;
|
|
|
|
|
|
|
|
fm10k_service_event_schedule(interface);
|
|
|
|
|
|
|
|
/* prevent overloading kernel message buffer */
|
|
|
|
if (interface->lport_map_failed)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
interface->lport_map_failed = true;
|
|
|
|
|
|
|
|
if (hw->swapi.status == FM10K_MSG_ERR_PEP_NOT_SCHEDULED)
|
|
|
|
dev_warn(&interface->pdev->dev,
|
|
|
|
"cannot obtain link because the host interface is configured for a PCIe host interface bandwidth of zero\n");
|
|
|
|
dev_warn(&interface->pdev->dev,
|
|
|
|
"request logical port map failed: %d\n",
|
|
|
|
hw->swapi.status);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:48:51 +08:00
|
|
|
err = fm10k_msg_lport_map_pf(hw, results, mbx);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2016-04-02 07:17:33 +08:00
|
|
|
interface->lport_map_failed = false;
|
2014-09-21 07:48:51 +08:00
|
|
|
|
|
|
|
/* we need to reset if port count was just updated */
|
|
|
|
if (dglort_map != hw->mac.dglort_map)
|
fm10k: use a BITMAP for flags to avoid race conditions
Replace bitwise operators and #defines with a BITMAP and enumeration
values. This is similar to how we handle the "state" values as well.
This has two distinct advantages over the old method. First, we ensure
correctness of operations which are currently problematic due to race
conditions. Suppose that two kernel threads are running, such as the
watchdog and an ethtool ioctl, and both modify flags. We'll say that the
watchdog is CPU A, and the ethtool ioctl is CPU B.
CPU A sets FLAG_1, which can be seen as
CPU A read FLAGS
CPU A write FLAGS | FLAG_1
CPU B sets FLAG_2, which can be seen as
CPU B read FLAGS
CPU A write FLAGS | FLAG_2
However, "|=" and "&=" operators are not actually atomic. So this could
be ordered like the following:
CPU A read FLAGS -> variable
CPU B read FLAGS -> variable
CPU A write FLAGS (variable | FLAG_1)
CPU B write FLAGS (variable | FLAG_2)
Notice how the 2nd write from CPU B could actually undo the write from
CPU A because it isn't guaranteed that the |= operation is atomic.
In practice the race windows for most flag writes is incredibly narrow
so it is not easy to isolate issues. However, the more flags we have,
the more likely they will cause problems. Additionally, if such
a problem were to arise, it would be incredibly difficult to track down.
Second, there is an additional advantage beyond code correctness. We can
now automatically size the BITMAP if more flags were added, so that we
do not need to remember that flags is u32 and thus if we added too many
flags we would over-run the variable. This is not a likely occurrence
for fm10k driver, but this patch can serve as an example for other
drivers which have many more flags.
This particular change does have a bit of trouble converting some of the
idioms previously used with the #defines for flags. Specifically, when
converting FM10K_FLAG_RSS_FIELD_IPV[46]_UDP flags. This whole operation
was actually quite problematic, because we actually stored flags
separately. This could more easily show the problem of the above
re-ordering issue.
This is really difficult to test whether atomics make a difference in
practical scenarios, but you can ensure that basic functionality remains
the same. This patch has a lot of code coverage, but most of it is
relatively simple.
While we are modifying these files, update their copyright year.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-01-13 07:59:38 +08:00
|
|
|
set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
|
2014-09-21 07:48:51 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static s32 fm10k_update_pvid(struct fm10k_hw *hw, u32 **results,
|
2015-04-04 04:26:56 +08:00
|
|
|
struct fm10k_mbx_info __always_unused *mbx)
|
2014-09-21 07:48:51 +08:00
|
|
|
{
|
|
|
|
struct fm10k_intfc *interface;
|
|
|
|
u16 glort, pvid;
|
|
|
|
u32 pvid_update;
|
|
|
|
s32 err;
|
|
|
|
|
|
|
|
err = fm10k_tlv_attr_get_u32(results[FM10K_PF_ATTR_ID_UPDATE_PVID],
|
|
|
|
&pvid_update);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* extract values from the pvid update */
|
|
|
|
glort = FM10K_MSG_HDR_FIELD_GET(pvid_update, UPDATE_PVID_GLORT);
|
|
|
|
pvid = FM10K_MSG_HDR_FIELD_GET(pvid_update, UPDATE_PVID_PVID);
|
|
|
|
|
|
|
|
/* if glort is not valid return error */
|
|
|
|
if (!fm10k_glort_valid_pf(hw, glort))
|
|
|
|
return FM10K_ERR_PARAM;
|
|
|
|
|
2015-11-03 04:10:22 +08:00
|
|
|
/* verify VLAN ID is valid */
|
2014-09-21 07:48:51 +08:00
|
|
|
if (pvid >= FM10K_VLAN_TABLE_VID_MAX)
|
|
|
|
return FM10K_ERR_PARAM;
|
|
|
|
|
|
|
|
interface = container_of(hw, struct fm10k_intfc, hw);
|
|
|
|
|
2014-09-21 07:52:09 +08:00
|
|
|
/* check to see if this belongs to one of the VFs */
|
|
|
|
err = fm10k_iov_update_pvid(interface, glort, pvid);
|
|
|
|
if (!err)
|
|
|
|
return 0;
|
|
|
|
|
2014-09-21 07:48:51 +08:00
|
|
|
/* we need to reset if default VLAN was just updated */
|
|
|
|
if (pvid != hw->mac.default_vid)
|
fm10k: use a BITMAP for flags to avoid race conditions
Replace bitwise operators and #defines with a BITMAP and enumeration
values. This is similar to how we handle the "state" values as well.
This has two distinct advantages over the old method. First, we ensure
correctness of operations which are currently problematic due to race
conditions. Suppose that two kernel threads are running, such as the
watchdog and an ethtool ioctl, and both modify flags. We'll say that the
watchdog is CPU A, and the ethtool ioctl is CPU B.
CPU A sets FLAG_1, which can be seen as
CPU A read FLAGS
CPU A write FLAGS | FLAG_1
CPU B sets FLAG_2, which can be seen as
CPU B read FLAGS
CPU A write FLAGS | FLAG_2
However, "|=" and "&=" operators are not actually atomic. So this could
be ordered like the following:
CPU A read FLAGS -> variable
CPU B read FLAGS -> variable
CPU A write FLAGS (variable | FLAG_1)
CPU B write FLAGS (variable | FLAG_2)
Notice how the 2nd write from CPU B could actually undo the write from
CPU A because it isn't guaranteed that the |= operation is atomic.
In practice the race windows for most flag writes is incredibly narrow
so it is not easy to isolate issues. However, the more flags we have,
the more likely they will cause problems. Additionally, if such
a problem were to arise, it would be incredibly difficult to track down.
Second, there is an additional advantage beyond code correctness. We can
now automatically size the BITMAP if more flags were added, so that we
do not need to remember that flags is u32 and thus if we added too many
flags we would over-run the variable. This is not a likely occurrence
for fm10k driver, but this patch can serve as an example for other
drivers which have many more flags.
This particular change does have a bit of trouble converting some of the
idioms previously used with the #defines for flags. Specifically, when
converting FM10K_FLAG_RSS_FIELD_IPV[46]_UDP flags. This whole operation
was actually quite problematic, because we actually stored flags
separately. This could more easily show the problem of the above
re-ordering issue.
This is really difficult to test whether atomics make a difference in
practical scenarios, but you can ensure that basic functionality remains
the same. This patch has a lot of code coverage, but most of it is
relatively simple.
While we are modifying these files, update their copyright year.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-01-13 07:59:38 +08:00
|
|
|
set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
|
2014-09-21 07:48:51 +08:00
|
|
|
|
|
|
|
hw->mac.default_vid = pvid;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct fm10k_msg_data pf_mbx_data[] = {
|
|
|
|
FM10K_PF_MSG_ERR_HANDLER(XCAST_MODES, fm10k_msg_err_pf),
|
|
|
|
FM10K_PF_MSG_ERR_HANDLER(UPDATE_MAC_FWD_RULE, fm10k_msg_err_pf),
|
|
|
|
FM10K_PF_MSG_LPORT_MAP_HANDLER(fm10k_lport_map),
|
|
|
|
FM10K_PF_MSG_ERR_HANDLER(LPORT_CREATE, fm10k_msg_err_pf),
|
|
|
|
FM10K_PF_MSG_ERR_HANDLER(LPORT_DELETE, fm10k_msg_err_pf),
|
|
|
|
FM10K_PF_MSG_UPDATE_PVID_HANDLER(fm10k_update_pvid),
|
|
|
|
FM10K_TLV_MSG_ERROR_HANDLER(fm10k_mbx_error),
|
|
|
|
};
|
|
|
|
|
|
|
|
static int fm10k_mbx_request_irq_pf(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct msix_entry *entry = &interface->msix_entries[FM10K_MBX_VECTOR];
|
|
|
|
struct net_device *dev = interface->netdev;
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/* Use timer0 for interrupt moderation on the mailbox */
|
2015-12-23 05:43:44 +08:00
|
|
|
u32 mbx_itr = entry->entry | FM10K_INT_MAP_TIMER0;
|
|
|
|
u32 other_itr = entry->entry | FM10K_INT_MAP_IMMEDIATE;
|
2014-09-21 07:48:51 +08:00
|
|
|
|
|
|
|
/* register mailbox handlers */
|
|
|
|
err = hw->mbx.ops.register_handlers(&hw->mbx, pf_mbx_data);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* request the IRQ */
|
|
|
|
err = request_irq(entry->vector, fm10k_msix_mbx_pf, 0,
|
|
|
|
dev->name, interface);
|
|
|
|
if (err) {
|
|
|
|
netif_err(interface, probe, dev,
|
|
|
|
"request_irq for msix_mbx failed: %d\n", err);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Enable interrupts w/ no moderation for "other" interrupts */
|
2015-10-27 07:32:04 +08:00
|
|
|
fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_pcie_fault), other_itr);
|
|
|
|
fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_switch_up_down), other_itr);
|
|
|
|
fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_sram), other_itr);
|
|
|
|
fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_max_hold_time), other_itr);
|
|
|
|
fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_vflr), other_itr);
|
2014-09-21 07:48:51 +08:00
|
|
|
|
|
|
|
/* Enable interrupts w/ moderation for mailbox */
|
2015-10-27 07:32:04 +08:00
|
|
|
fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_mailbox), mbx_itr);
|
2014-09-21 07:48:51 +08:00
|
|
|
|
|
|
|
/* Enable individual interrupt causes */
|
|
|
|
fm10k_write_reg(hw, FM10K_EIMR, FM10K_EIMR_ENABLE(PCA_FAULT) |
|
|
|
|
FM10K_EIMR_ENABLE(FUM_FAULT) |
|
|
|
|
FM10K_EIMR_ENABLE(MAILBOX) |
|
|
|
|
FM10K_EIMR_ENABLE(SWITCHREADY) |
|
|
|
|
FM10K_EIMR_ENABLE(SWITCHNOTREADY) |
|
|
|
|
FM10K_EIMR_ENABLE(SRAMERROR) |
|
|
|
|
FM10K_EIMR_ENABLE(VFLR) |
|
|
|
|
FM10K_EIMR_ENABLE(MAXHOLDTIME));
|
|
|
|
|
|
|
|
/* enable interrupt */
|
|
|
|
fm10k_write_reg(hw, FM10K_ITR(entry->entry), FM10K_ITR_ENABLE);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int fm10k_mbx_request_irq(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/* enable Mailbox cause */
|
2014-09-21 07:51:40 +08:00
|
|
|
if (hw->mac.type == fm10k_mac_pf)
|
|
|
|
err = fm10k_mbx_request_irq_pf(interface);
|
|
|
|
else
|
|
|
|
err = fm10k_mbx_request_irq_vf(interface);
|
2015-10-28 07:59:18 +08:00
|
|
|
if (err)
|
|
|
|
return err;
|
2014-09-21 07:48:51 +08:00
|
|
|
|
|
|
|
/* connect mailbox */
|
2015-10-28 07:59:18 +08:00
|
|
|
err = hw->mbx.ops.connect(hw, &hw->mbx);
|
|
|
|
|
|
|
|
/* if the mailbox failed to connect, then free IRQ */
|
|
|
|
if (err)
|
|
|
|
fm10k_mbx_free_irq(interface);
|
2014-09-21 07:48:51 +08:00
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_qv_free_irq - release interrupts associated with queue vectors
|
|
|
|
* @interface: board private structure
|
|
|
|
*
|
|
|
|
* Release all interrupts associated with this interface
|
|
|
|
**/
|
|
|
|
void fm10k_qv_free_irq(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
int vector = interface->num_q_vectors;
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
struct msix_entry *entry;
|
|
|
|
|
|
|
|
entry = &interface->msix_entries[NON_Q_VECTORS(hw) + vector];
|
|
|
|
|
|
|
|
while (vector) {
|
|
|
|
struct fm10k_q_vector *q_vector;
|
|
|
|
|
|
|
|
vector--;
|
|
|
|
entry--;
|
|
|
|
q_vector = interface->q_vector[vector];
|
|
|
|
|
|
|
|
if (!q_vector->tx.count && !q_vector->rx.count)
|
|
|
|
continue;
|
|
|
|
|
2015-10-30 04:43:40 +08:00
|
|
|
/* clear the affinity_mask in the IRQ descriptor */
|
|
|
|
irq_set_affinity_hint(entry->vector, NULL);
|
2014-09-21 07:48:51 +08:00
|
|
|
|
2015-10-30 04:43:40 +08:00
|
|
|
/* disable interrupts */
|
2014-09-21 07:48:51 +08:00
|
|
|
writel(FM10K_ITR_MASK_SET, q_vector->itr);
|
|
|
|
|
|
|
|
free_irq(entry->vector, q_vector);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_qv_request_irq - initialize interrupts for queue vectors
|
|
|
|
* @interface: board private structure
|
|
|
|
*
|
|
|
|
* Attempts to configure interrupts using the best available
|
|
|
|
* capabilities of the hardware and kernel.
|
|
|
|
**/
|
|
|
|
int fm10k_qv_request_irq(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct net_device *dev = interface->netdev;
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
struct msix_entry *entry;
|
2017-07-11 04:23:06 +08:00
|
|
|
unsigned int ri = 0, ti = 0;
|
2014-09-21 07:48:51 +08:00
|
|
|
int vector, err;
|
|
|
|
|
|
|
|
entry = &interface->msix_entries[NON_Q_VECTORS(hw)];
|
|
|
|
|
|
|
|
for (vector = 0; vector < interface->num_q_vectors; vector++) {
|
|
|
|
struct fm10k_q_vector *q_vector = interface->q_vector[vector];
|
|
|
|
|
|
|
|
/* name the vector */
|
|
|
|
if (q_vector->tx.count && q_vector->rx.count) {
|
2017-07-11 04:23:06 +08:00
|
|
|
snprintf(q_vector->name, sizeof(q_vector->name),
|
|
|
|
"%s-TxRx-%u", dev->name, ri++);
|
2014-09-21 07:48:51 +08:00
|
|
|
ti++;
|
|
|
|
} else if (q_vector->rx.count) {
|
2017-07-11 04:23:06 +08:00
|
|
|
snprintf(q_vector->name, sizeof(q_vector->name),
|
|
|
|
"%s-rx-%u", dev->name, ri++);
|
2014-09-21 07:48:51 +08:00
|
|
|
} else if (q_vector->tx.count) {
|
2017-07-11 04:23:06 +08:00
|
|
|
snprintf(q_vector->name, sizeof(q_vector->name),
|
|
|
|
"%s-tx-%u", dev->name, ti++);
|
2014-09-21 07:48:51 +08:00
|
|
|
} else {
|
|
|
|
/* skip this unused q_vector */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Assign ITR register to q_vector */
|
2014-09-21 07:51:40 +08:00
|
|
|
q_vector->itr = (hw->mac.type == fm10k_mac_pf) ?
|
|
|
|
&interface->uc_addr[FM10K_ITR(entry->entry)] :
|
|
|
|
&interface->uc_addr[FM10K_VFITR(entry->entry)];
|
2014-09-21 07:48:51 +08:00
|
|
|
|
|
|
|
/* request the IRQ */
|
|
|
|
err = request_irq(entry->vector, &fm10k_msix_clean_rings, 0,
|
|
|
|
q_vector->name, q_vector);
|
|
|
|
if (err) {
|
|
|
|
netif_err(interface, probe, dev,
|
|
|
|
"request_irq failed for MSIX interrupt Error: %d\n",
|
|
|
|
err);
|
|
|
|
goto err_out;
|
|
|
|
}
|
|
|
|
|
2015-10-30 04:43:40 +08:00
|
|
|
/* assign the mask for this irq */
|
|
|
|
irq_set_affinity_hint(entry->vector, &q_vector->affinity_mask);
|
|
|
|
|
2014-09-21 07:48:51 +08:00
|
|
|
/* Enable q_vector */
|
|
|
|
writel(FM10K_ITR_ENABLE, q_vector->itr);
|
|
|
|
|
|
|
|
entry++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err_out:
|
|
|
|
/* wind through the ring freeing all entries and vectors */
|
|
|
|
while (vector) {
|
|
|
|
struct fm10k_q_vector *q_vector;
|
|
|
|
|
|
|
|
entry--;
|
|
|
|
vector--;
|
|
|
|
q_vector = interface->q_vector[vector];
|
|
|
|
|
|
|
|
if (!q_vector->tx.count && !q_vector->rx.count)
|
|
|
|
continue;
|
|
|
|
|
2015-10-30 04:43:40 +08:00
|
|
|
/* clear the affinity_mask in the IRQ descriptor */
|
|
|
|
irq_set_affinity_hint(entry->vector, NULL);
|
2014-09-21 07:48:51 +08:00
|
|
|
|
2015-10-30 04:43:40 +08:00
|
|
|
/* disable interrupts */
|
2014-09-21 07:48:51 +08:00
|
|
|
writel(FM10K_ITR_MASK_SET, q_vector->itr);
|
|
|
|
|
|
|
|
free_irq(entry->vector, q_vector);
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:48:29 +08:00
|
|
|
void fm10k_up(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
|
|
|
|
/* Enable Tx/Rx DMA */
|
|
|
|
hw->mac.ops.start_hw(hw);
|
|
|
|
|
2014-09-21 07:49:43 +08:00
|
|
|
/* configure Tx descriptor rings */
|
|
|
|
fm10k_configure_tx(interface);
|
|
|
|
|
|
|
|
/* configure Rx descriptor rings */
|
|
|
|
fm10k_configure_rx(interface);
|
|
|
|
|
2014-09-21 07:48:29 +08:00
|
|
|
/* configure interrupts */
|
|
|
|
hw->mac.ops.update_int_moderator(hw);
|
|
|
|
|
2016-06-08 07:08:45 +08:00
|
|
|
/* enable statistics capture again */
|
2017-01-13 07:59:39 +08:00
|
|
|
clear_bit(__FM10K_UPDATING_STATS, interface->state);
|
2016-06-08 07:08:45 +08:00
|
|
|
|
2014-09-21 07:48:29 +08:00
|
|
|
/* clear down bit to indicate we are ready to go */
|
2017-01-13 07:59:39 +08:00
|
|
|
clear_bit(__FM10K_DOWN, interface->state);
|
2014-09-21 07:48:29 +08:00
|
|
|
|
2014-09-21 07:48:51 +08:00
|
|
|
/* enable polling cleanups */
|
|
|
|
fm10k_napi_enable_all(interface);
|
|
|
|
|
2014-09-21 07:48:29 +08:00
|
|
|
/* re-establish Rx filters */
|
|
|
|
fm10k_restore_rx_state(interface);
|
|
|
|
|
|
|
|
/* enable transmits */
|
|
|
|
netif_tx_start_all_queues(interface->netdev);
|
2014-09-21 07:49:25 +08:00
|
|
|
|
2015-04-04 04:27:09 +08:00
|
|
|
/* kick off the service timer now */
|
2015-12-09 07:51:11 +08:00
|
|
|
hw->mac.get_host_state = true;
|
2014-09-21 07:49:25 +08:00
|
|
|
mod_timer(&interface->service_timer, jiffies);
|
2014-09-21 07:48:29 +08:00
|
|
|
}
|
|
|
|
|
2014-09-21 07:48:51 +08:00
|
|
|
static void fm10k_napi_disable_all(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct fm10k_q_vector *q_vector;
|
|
|
|
int q_idx;
|
|
|
|
|
|
|
|
for (q_idx = 0; q_idx < interface->num_q_vectors; q_idx++) {
|
|
|
|
q_vector = interface->q_vector[q_idx];
|
|
|
|
napi_disable(&q_vector->napi);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:48:29 +08:00
|
|
|
void fm10k_down(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct net_device *netdev = interface->netdev;
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
2016-06-08 07:08:51 +08:00
|
|
|
int err, i = 0, count = 0;
|
2014-09-21 07:48:29 +08:00
|
|
|
|
|
|
|
/* signal that we are down to the interrupt handler and service task */
|
2017-01-13 07:59:39 +08:00
|
|
|
if (test_and_set_bit(__FM10K_DOWN, interface->state))
|
2016-06-04 06:42:11 +08:00
|
|
|
return;
|
2014-09-21 07:48:29 +08:00
|
|
|
|
|
|
|
/* call carrier off first to avoid false dev_watchdog timeouts */
|
|
|
|
netif_carrier_off(netdev);
|
|
|
|
|
|
|
|
/* disable transmits */
|
|
|
|
netif_tx_stop_all_queues(netdev);
|
|
|
|
netif_tx_disable(netdev);
|
|
|
|
|
|
|
|
/* reset Rx filters */
|
|
|
|
fm10k_reset_rx_state(interface);
|
|
|
|
|
2014-09-21 07:48:51 +08:00
|
|
|
/* disable polling routines */
|
|
|
|
fm10k_napi_disable_all(interface);
|
|
|
|
|
2014-09-21 07:49:25 +08:00
|
|
|
/* capture stats one last time before stopping interface */
|
|
|
|
fm10k_update_stats(interface);
|
|
|
|
|
2016-06-08 07:08:45 +08:00
|
|
|
/* prevent updating statistics while we're down */
|
2017-01-13 07:59:39 +08:00
|
|
|
while (test_and_set_bit(__FM10K_UPDATING_STATS, interface->state))
|
2016-06-08 07:08:45 +08:00
|
|
|
usleep_range(1000, 2000);
|
|
|
|
|
2016-06-08 07:08:51 +08:00
|
|
|
/* skip waiting for TX DMA if we lost PCIe link */
|
|
|
|
if (FM10K_REMOVED(hw->hw_addr))
|
|
|
|
goto skip_tx_dma_drain;
|
|
|
|
|
|
|
|
/* In some rare circumstances it can take a while for Tx queues to
|
|
|
|
* quiesce and be fully disabled. Attempt to .stop_hw() first, and
|
|
|
|
* then if we get ERR_REQUESTS_PENDING, go ahead and wait in a loop
|
|
|
|
* until the Tx queues have emptied, or until a number of retries. If
|
|
|
|
* we fail to clear within the retry loop, we will issue a warning
|
|
|
|
* indicating that Tx DMA is probably hung. Note this means we call
|
|
|
|
* .stop_hw() twice but this shouldn't cause any problems.
|
|
|
|
*/
|
|
|
|
err = hw->mac.ops.stop_hw(hw);
|
|
|
|
if (err != FM10K_ERR_REQUESTS_PENDING)
|
|
|
|
goto skip_tx_dma_drain;
|
|
|
|
|
|
|
|
#define TX_DMA_DRAIN_RETRIES 25
|
|
|
|
for (count = 0; count < TX_DMA_DRAIN_RETRIES; count++) {
|
|
|
|
usleep_range(10000, 20000);
|
|
|
|
|
|
|
|
/* start checking at the last ring to have pending Tx */
|
|
|
|
for (; i < interface->num_tx_queues; i++)
|
2016-06-10 05:56:05 +08:00
|
|
|
if (fm10k_get_tx_pending(interface->tx_ring[i], false))
|
2016-06-08 07:08:51 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
/* if all the queues are drained, we can break now */
|
|
|
|
if (i == interface->num_tx_queues)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (count >= TX_DMA_DRAIN_RETRIES)
|
|
|
|
dev_err(&interface->pdev->dev,
|
|
|
|
"Tx queues failed to drain after %d tries. Tx DMA is probably hung.\n",
|
|
|
|
count);
|
|
|
|
skip_tx_dma_drain:
|
2014-09-21 07:48:29 +08:00
|
|
|
/* Disable DMA engine for Tx/Rx */
|
2016-02-05 02:47:57 +08:00
|
|
|
err = hw->mac.ops.stop_hw(hw);
|
2016-06-08 07:08:50 +08:00
|
|
|
if (err == FM10K_ERR_REQUESTS_PENDING)
|
2016-06-08 07:08:51 +08:00
|
|
|
dev_err(&interface->pdev->dev,
|
|
|
|
"due to pending requests hw was not shut down gracefully\n");
|
2016-06-08 07:08:50 +08:00
|
|
|
else if (err)
|
2016-02-05 02:47:57 +08:00
|
|
|
dev_err(&interface->pdev->dev, "stop_hw failed: %d\n", err);
|
2014-09-21 07:49:43 +08:00
|
|
|
|
|
|
|
/* free any buffers still on the rings */
|
|
|
|
fm10k_clean_all_tx_rings(interface);
|
2015-06-04 07:31:02 +08:00
|
|
|
fm10k_clean_all_rx_rings(interface);
|
2014-09-21 07:48:29 +08:00
|
|
|
}
|
|
|
|
|
2014-09-21 07:48:10 +08:00
|
|
|
/**
|
|
|
|
* fm10k_sw_init - Initialize general software structures
|
|
|
|
* @interface: host interface private structure to initialize
|
2018-01-17 03:20:51 +08:00
|
|
|
* @ent: PCI device ID entry
|
2014-09-21 07:48:10 +08:00
|
|
|
*
|
|
|
|
* fm10k_sw_init initializes the interface private data structure.
|
|
|
|
* Fields are initialized based on PCI device information and
|
|
|
|
* OS network device settings (MTU size).
|
|
|
|
**/
|
|
|
|
static int fm10k_sw_init(struct fm10k_intfc *interface,
|
|
|
|
const struct pci_device_id *ent)
|
|
|
|
{
|
|
|
|
const struct fm10k_info *fi = fm10k_info_tbl[ent->driver_data];
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
struct pci_dev *pdev = interface->pdev;
|
|
|
|
struct net_device *netdev = interface->netdev;
|
2014-11-16 22:23:12 +08:00
|
|
|
u32 rss_key[FM10K_RSSRK_SIZE];
|
2014-09-21 07:48:10 +08:00
|
|
|
unsigned int rss;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/* initialize back pointer */
|
|
|
|
hw->back = interface;
|
|
|
|
hw->hw_addr = interface->uc_addr;
|
|
|
|
|
|
|
|
/* PCI config space info */
|
|
|
|
hw->vendor_id = pdev->vendor;
|
|
|
|
hw->device_id = pdev->device;
|
|
|
|
hw->revision_id = pdev->revision;
|
|
|
|
hw->subsystem_vendor_id = pdev->subsystem_vendor;
|
|
|
|
hw->subsystem_device_id = pdev->subsystem_device;
|
|
|
|
|
|
|
|
/* Setup hw api */
|
|
|
|
memcpy(&hw->mac.ops, fi->mac_ops, sizeof(hw->mac.ops));
|
|
|
|
hw->mac.type = fi->mac;
|
|
|
|
|
2014-09-21 07:52:09 +08:00
|
|
|
/* Setup IOV handlers */
|
|
|
|
if (fi->iov_ops)
|
|
|
|
memcpy(&hw->iov.ops, fi->iov_ops, sizeof(hw->iov.ops));
|
|
|
|
|
2014-09-21 07:48:10 +08:00
|
|
|
/* Set common capability flags and settings */
|
|
|
|
rss = min_t(int, FM10K_MAX_RSS_INDICES, num_online_cpus());
|
|
|
|
interface->ring_feature[RING_F_RSS].limit = rss;
|
|
|
|
fi->get_invariants(hw);
|
|
|
|
|
|
|
|
/* pick up the PCIe bus settings for reporting later */
|
|
|
|
if (hw->mac.ops.get_bus_info)
|
|
|
|
hw->mac.ops.get_bus_info(hw);
|
|
|
|
|
|
|
|
/* limit the usable DMA range */
|
|
|
|
if (hw->mac.ops.set_dma_mask)
|
|
|
|
hw->mac.ops.set_dma_mask(hw, dma_get_mask(&pdev->dev));
|
|
|
|
|
|
|
|
/* update netdev with DMA restrictions */
|
|
|
|
if (dma_get_mask(&pdev->dev) > DMA_BIT_MASK(32)) {
|
|
|
|
netdev->features |= NETIF_F_HIGHDMA;
|
|
|
|
netdev->vlan_features |= NETIF_F_HIGHDMA;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* reset and initialize the hardware so it is in a known state */
|
2015-10-17 01:56:58 +08:00
|
|
|
err = hw->mac.ops.reset_hw(hw);
|
|
|
|
if (err) {
|
|
|
|
dev_err(&pdev->dev, "reset_hw failed: %d\n", err);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = hw->mac.ops.init_hw(hw);
|
2014-09-21 07:48:10 +08:00
|
|
|
if (err) {
|
|
|
|
dev_err(&pdev->dev, "init_hw failed: %d\n", err);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* initialize hardware statistics */
|
|
|
|
hw->mac.ops.update_hw_stats(hw, &interface->stats);
|
|
|
|
|
2014-09-21 07:52:09 +08:00
|
|
|
/* Set upper limit on IOV VFs that can be allocated */
|
|
|
|
pci_sriov_set_totalvfs(pdev, hw->iov.total_vfs);
|
|
|
|
|
2014-09-21 07:48:10 +08:00
|
|
|
/* Start with random Ethernet address */
|
|
|
|
eth_random_addr(hw->mac.addr);
|
|
|
|
|
|
|
|
/* Initialize MAC address from hardware */
|
|
|
|
err = hw->mac.ops.read_mac_addr(hw);
|
|
|
|
if (err) {
|
|
|
|
dev_warn(&pdev->dev,
|
|
|
|
"Failed to obtain MAC address defaulting to random\n");
|
|
|
|
/* tag address assignment as random */
|
|
|
|
netdev->addr_assign_type |= NET_ADDR_RANDOM;
|
|
|
|
}
|
|
|
|
|
2015-12-29 10:00:30 +08:00
|
|
|
ether_addr_copy(netdev->dev_addr, hw->mac.addr);
|
|
|
|
ether_addr_copy(netdev->perm_addr, hw->mac.addr);
|
2014-09-21 07:48:10 +08:00
|
|
|
|
|
|
|
if (!is_valid_ether_addr(netdev->perm_addr)) {
|
|
|
|
dev_err(&pdev->dev, "Invalid MAC Address\n");
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:53:08 +08:00
|
|
|
/* initialize DCBNL interface */
|
|
|
|
fm10k_dcbnl_set_ops(netdev);
|
|
|
|
|
2014-09-21 07:49:03 +08:00
|
|
|
/* set default ring sizes */
|
|
|
|
interface->tx_ring_count = FM10K_DEFAULT_TXD;
|
|
|
|
interface->rx_ring_count = FM10K_DEFAULT_RXD;
|
|
|
|
|
2014-09-21 07:48:51 +08:00
|
|
|
/* set default interrupt moderation */
|
2015-10-17 01:57:08 +08:00
|
|
|
interface->tx_itr = FM10K_TX_ITR_DEFAULT;
|
|
|
|
interface->rx_itr = FM10K_ITR_ADAPTIVE | FM10K_RX_ITR_DEFAULT;
|
2014-09-21 07:48:51 +08:00
|
|
|
|
2016-06-24 04:54:03 +08:00
|
|
|
/* initialize udp port lists */
|
2014-09-21 07:48:10 +08:00
|
|
|
INIT_LIST_HEAD(&interface->vxlan_port);
|
2016-06-24 04:54:03 +08:00
|
|
|
INIT_LIST_HEAD(&interface->geneve_port);
|
2014-09-21 07:48:10 +08:00
|
|
|
|
fm10k: introduce a message queue for MAC/VLAN messages
Under some circumstances, when dealing with a large number of MAC
address or VLAN updates at once, the fm10k driver, particularly the VFs
can overload the mailbox with too many messages at once.
This results in a mailbox timeout, which causes the driver to initiate
a reset. During the reset, we re-send all the same messages that
originally caused the timeout. This results in a cycle of resets each
triggering a future reset.
To fix or avoid this, we introduce a workqueue item which monitors
a queue of MAC and VLAN requests. These requests are queued to the end
of the list, and we process as a FIFO periodically.
Initially we only handle requests for the netdev, but we do handle
unicast MAC addresses, multicast MAC addresses, and update VLAN
requests.
A future patch will add support to use this queue for handling MAC
update requests from the VF<->PF mailbox.
The MAC/VLAN work item will keep checking to make sure that each request
does not overflow the mailbox and cause a timeout. If it might, then the
work item will reschedule itself a short time later. This avoids any
reset cycle, since we never send the message if the mailbox is not
ready.
As an alternative, we tried increasing the mailbox message FIFO, but
this just delays the problem and results in needless memory waste on the
system. Our new message queue is dynamically allocated so only uses as
much memory as it needs. Additionally, it need not be contiguous like
the Tx and Rx FIFOs.
Note that this patch chose to only create a queue for MAC and VLAN
messages, since these are the only messages sent in a large enough
volume to cause the reset loop. Other messages are very unlikely to
overflow the mailbox Tx FIFO so easily.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:17 +08:00
|
|
|
/* Initialize the MAC/VLAN queue */
|
|
|
|
INIT_LIST_HEAD(&interface->macvlan_requests);
|
|
|
|
|
2014-11-16 22:23:12 +08:00
|
|
|
netdev_rss_key_fill(rss_key, sizeof(rss_key));
|
|
|
|
memcpy(interface->rssrk, rss_key, sizeof(rss_key));
|
2014-09-21 07:48:10 +08:00
|
|
|
|
2017-07-11 04:23:15 +08:00
|
|
|
/* Initialize the mailbox lock */
|
|
|
|
spin_lock_init(&interface->mbx_lock);
|
fm10k: introduce a message queue for MAC/VLAN messages
Under some circumstances, when dealing with a large number of MAC
address or VLAN updates at once, the fm10k driver, particularly the VFs
can overload the mailbox with too many messages at once.
This results in a mailbox timeout, which causes the driver to initiate
a reset. During the reset, we re-send all the same messages that
originally caused the timeout. This results in a cycle of resets each
triggering a future reset.
To fix or avoid this, we introduce a workqueue item which monitors
a queue of MAC and VLAN requests. These requests are queued to the end
of the list, and we process as a FIFO periodically.
Initially we only handle requests for the netdev, but we do handle
unicast MAC addresses, multicast MAC addresses, and update VLAN
requests.
A future patch will add support to use this queue for handling MAC
update requests from the VF<->PF mailbox.
The MAC/VLAN work item will keep checking to make sure that each request
does not overflow the mailbox and cause a timeout. If it might, then the
work item will reschedule itself a short time later. This avoids any
reset cycle, since we never send the message if the mailbox is not
ready.
As an alternative, we tried increasing the mailbox message FIFO, but
this just delays the problem and results in needless memory waste on the
system. Our new message queue is dynamically allocated so only uses as
much memory as it needs. Additionally, it need not be contiguous like
the Tx and Rx FIFOs.
Note that this patch chose to only create a queue for MAC and VLAN
messages, since these are the only messages sent in a large enough
volume to cause the reset loop. Other messages are very unlikely to
overflow the mailbox Tx FIFO so easily.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:17 +08:00
|
|
|
spin_lock_init(&interface->macvlan_lock);
|
2017-07-11 04:23:15 +08:00
|
|
|
|
2014-09-21 07:48:10 +08:00
|
|
|
/* Start off interface as being down */
|
2017-01-13 07:59:39 +08:00
|
|
|
set_bit(__FM10K_DOWN, interface->state);
|
|
|
|
set_bit(__FM10K_UPDATING_STATS, interface->state);
|
2014-09-21 07:48:10 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:46:05 +08:00
|
|
|
/**
|
|
|
|
* fm10k_probe - Device Initialization Routine
|
|
|
|
* @pdev: PCI device information struct
|
|
|
|
* @ent: entry in fm10k_pci_tbl
|
|
|
|
*
|
|
|
|
* Returns 0 on success, negative on failure
|
|
|
|
*
|
|
|
|
* fm10k_probe initializes an interface identified by a pci_dev structure.
|
|
|
|
* The OS initialization, configuring of the interface private structure,
|
|
|
|
* and a hardware reset occur.
|
|
|
|
**/
|
2015-10-29 08:19:40 +08:00
|
|
|
static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
2014-09-21 07:46:05 +08:00
|
|
|
{
|
2014-09-21 07:48:10 +08:00
|
|
|
struct net_device *netdev;
|
|
|
|
struct fm10k_intfc *interface;
|
2014-09-21 07:46:05 +08:00
|
|
|
int err;
|
|
|
|
|
2016-06-24 04:31:01 +08:00
|
|
|
if (pdev->error_state != pci_channel_io_normal) {
|
|
|
|
dev_err(&pdev->dev,
|
|
|
|
"PCI device still in an error state. Unable to load...\n");
|
|
|
|
return -EIO;
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:46:05 +08:00
|
|
|
err = pci_enable_device_mem(pdev);
|
2016-06-24 04:31:00 +08:00
|
|
|
if (err) {
|
|
|
|
dev_err(&pdev->dev,
|
|
|
|
"PCI enable device failed: %d\n", err);
|
2014-09-21 07:46:05 +08:00
|
|
|
return err;
|
2016-06-24 04:31:00 +08:00
|
|
|
}
|
2014-09-21 07:46:05 +08:00
|
|
|
|
2015-06-17 04:41:43 +08:00
|
|
|
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
|
|
|
|
if (err)
|
2014-09-21 07:46:05 +08:00
|
|
|
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
|
2015-06-17 04:41:43 +08:00
|
|
|
if (err) {
|
|
|
|
dev_err(&pdev->dev,
|
|
|
|
"DMA configuration failed: %d\n", err);
|
|
|
|
goto err_dma;
|
2014-09-21 07:46:05 +08:00
|
|
|
}
|
|
|
|
|
2016-06-07 15:44:05 +08:00
|
|
|
err = pci_request_mem_regions(pdev, fm10k_driver_name);
|
2014-09-21 07:46:05 +08:00
|
|
|
if (err) {
|
|
|
|
dev_err(&pdev->dev,
|
2015-06-17 04:40:32 +08:00
|
|
|
"pci_request_selected_regions failed: %d\n", err);
|
2014-09-21 07:46:05 +08:00
|
|
|
goto err_pci_reg;
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:50:27 +08:00
|
|
|
pci_enable_pcie_error_reporting(pdev);
|
|
|
|
|
2014-09-21 07:46:05 +08:00
|
|
|
pci_set_master(pdev);
|
|
|
|
pci_save_state(pdev);
|
|
|
|
|
2015-10-17 01:56:56 +08:00
|
|
|
netdev = fm10k_alloc_netdev(fm10k_info_tbl[ent->driver_data]);
|
2014-09-21 07:48:10 +08:00
|
|
|
if (!netdev) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto err_alloc_netdev;
|
|
|
|
}
|
|
|
|
|
|
|
|
SET_NETDEV_DEV(netdev, &pdev->dev);
|
|
|
|
|
|
|
|
interface = netdev_priv(netdev);
|
|
|
|
pci_set_drvdata(pdev, interface);
|
|
|
|
|
|
|
|
interface->netdev = netdev;
|
|
|
|
interface->pdev = pdev;
|
|
|
|
|
|
|
|
interface->uc_addr = ioremap(pci_resource_start(pdev, 0),
|
|
|
|
FM10K_UC_ADDR_SIZE);
|
|
|
|
if (!interface->uc_addr) {
|
|
|
|
err = -EIO;
|
|
|
|
goto err_ioremap;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = fm10k_sw_init(interface, ent);
|
|
|
|
if (err)
|
|
|
|
goto err_sw_init;
|
|
|
|
|
2014-09-21 07:53:23 +08:00
|
|
|
/* enable debugfs support */
|
|
|
|
fm10k_dbg_intfc_init(interface);
|
|
|
|
|
2014-09-21 07:48:51 +08:00
|
|
|
err = fm10k_init_queueing_scheme(interface);
|
|
|
|
if (err)
|
|
|
|
goto err_sw_init;
|
|
|
|
|
2016-02-05 02:47:55 +08:00
|
|
|
/* the mbx interrupt might attempt to schedule the service task, so we
|
|
|
|
* must ensure it is disabled since we haven't yet requested the timer
|
|
|
|
* or work item.
|
|
|
|
*/
|
2017-01-13 07:59:39 +08:00
|
|
|
set_bit(__FM10K_SERVICE_DISABLE, interface->state);
|
2016-02-05 02:47:55 +08:00
|
|
|
|
2014-09-21 07:48:51 +08:00
|
|
|
err = fm10k_mbx_request_irq(interface);
|
|
|
|
if (err)
|
|
|
|
goto err_mbx_interrupt;
|
|
|
|
|
2014-09-21 07:48:10 +08:00
|
|
|
/* final check of hardware state before registering the interface */
|
|
|
|
err = fm10k_hw_ready(interface);
|
|
|
|
if (err)
|
|
|
|
goto err_register;
|
|
|
|
|
|
|
|
err = register_netdev(netdev);
|
|
|
|
if (err)
|
|
|
|
goto err_register;
|
|
|
|
|
|
|
|
/* carrier off reporting is important to ethtool even BEFORE open */
|
|
|
|
netif_carrier_off(netdev);
|
|
|
|
|
|
|
|
/* stop all the transmit queues from transmitting until link is up */
|
|
|
|
netif_tx_stop_all_queues(netdev);
|
|
|
|
|
2016-02-05 02:47:55 +08:00
|
|
|
/* Initialize service timer and service task late in order to avoid
|
|
|
|
* cleanup issues.
|
|
|
|
*/
|
2017-10-17 08:29:35 +08:00
|
|
|
timer_setup(&interface->service_timer, fm10k_service_timer, 0);
|
2016-02-05 02:47:55 +08:00
|
|
|
INIT_WORK(&interface->service_task, fm10k_service_task);
|
|
|
|
|
fm10k: introduce a message queue for MAC/VLAN messages
Under some circumstances, when dealing with a large number of MAC
address or VLAN updates at once, the fm10k driver, particularly the VFs
can overload the mailbox with too many messages at once.
This results in a mailbox timeout, which causes the driver to initiate
a reset. During the reset, we re-send all the same messages that
originally caused the timeout. This results in a cycle of resets each
triggering a future reset.
To fix or avoid this, we introduce a workqueue item which monitors
a queue of MAC and VLAN requests. These requests are queued to the end
of the list, and we process as a FIFO periodically.
Initially we only handle requests for the netdev, but we do handle
unicast MAC addresses, multicast MAC addresses, and update VLAN
requests.
A future patch will add support to use this queue for handling MAC
update requests from the VF<->PF mailbox.
The MAC/VLAN work item will keep checking to make sure that each request
does not overflow the mailbox and cause a timeout. If it might, then the
work item will reschedule itself a short time later. This avoids any
reset cycle, since we never send the message if the mailbox is not
ready.
As an alternative, we tried increasing the mailbox message FIFO, but
this just delays the problem and results in needless memory waste on the
system. Our new message queue is dynamically allocated so only uses as
much memory as it needs. Additionally, it need not be contiguous like
the Tx and Rx FIFOs.
Note that this patch chose to only create a queue for MAC and VLAN
messages, since these are the only messages sent in a large enough
volume to cause the reset loop. Other messages are very unlikely to
overflow the mailbox Tx FIFO so easily.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:17 +08:00
|
|
|
/* Setup the MAC/VLAN queue */
|
|
|
|
INIT_DELAYED_WORK(&interface->macvlan_task, fm10k_macvlan_task);
|
|
|
|
|
2016-02-05 02:47:55 +08:00
|
|
|
/* kick off service timer now, even when interface is down */
|
|
|
|
mod_timer(&interface->service_timer, (HZ * 2) + jiffies);
|
|
|
|
|
2014-09-21 07:48:10 +08:00
|
|
|
/* print warning for non-optimal configurations */
|
2018-03-31 03:22:44 +08:00
|
|
|
pcie_print_link_status(interface->pdev);
|
2014-09-21 07:48:10 +08:00
|
|
|
|
2015-06-19 10:41:10 +08:00
|
|
|
/* report MAC address for logging */
|
|
|
|
dev_info(&pdev->dev, "%pM\n", netdev->dev_addr);
|
|
|
|
|
2014-09-21 07:52:09 +08:00
|
|
|
/* enable SR-IOV after registering netdev to enforce PF/VF ordering */
|
|
|
|
fm10k_iov_configure(pdev, 0);
|
|
|
|
|
2017-07-11 04:23:08 +08:00
|
|
|
/* clear the service task disable bit and kick off service task */
|
2017-01-13 07:59:39 +08:00
|
|
|
clear_bit(__FM10K_SERVICE_DISABLE, interface->state);
|
2017-07-11 04:23:08 +08:00
|
|
|
fm10k_service_event_schedule(interface);
|
2014-09-21 07:49:25 +08:00
|
|
|
|
2014-09-21 07:46:05 +08:00
|
|
|
return 0;
|
|
|
|
|
2014-09-21 07:48:10 +08:00
|
|
|
err_register:
|
2014-09-21 07:48:51 +08:00
|
|
|
fm10k_mbx_free_irq(interface);
|
|
|
|
err_mbx_interrupt:
|
|
|
|
fm10k_clear_queueing_scheme(interface);
|
2014-09-21 07:48:10 +08:00
|
|
|
err_sw_init:
|
2014-09-21 07:54:07 +08:00
|
|
|
if (interface->sw_addr)
|
|
|
|
iounmap(interface->sw_addr);
|
2014-09-21 07:48:10 +08:00
|
|
|
iounmap(interface->uc_addr);
|
|
|
|
err_ioremap:
|
|
|
|
free_netdev(netdev);
|
|
|
|
err_alloc_netdev:
|
2016-06-07 15:44:05 +08:00
|
|
|
pci_release_mem_regions(pdev);
|
2014-09-21 07:46:05 +08:00
|
|
|
err_pci_reg:
|
|
|
|
err_dma:
|
|
|
|
pci_disable_device(pdev);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_remove - Device Removal Routine
|
|
|
|
* @pdev: PCI device information struct
|
|
|
|
*
|
|
|
|
* fm10k_remove is called by the PCI subsystem to alert the driver
|
|
|
|
* that it should release a PCI device. The could be caused by a
|
|
|
|
* Hot-Plug event, or because the driver is going to be removed from
|
|
|
|
* memory.
|
|
|
|
**/
|
|
|
|
static void fm10k_remove(struct pci_dev *pdev)
|
|
|
|
{
|
2014-09-21 07:48:10 +08:00
|
|
|
struct fm10k_intfc *interface = pci_get_drvdata(pdev);
|
|
|
|
struct net_device *netdev = interface->netdev;
|
|
|
|
|
2015-04-04 04:27:09 +08:00
|
|
|
del_timer_sync(&interface->service_timer);
|
|
|
|
|
2017-07-11 04:23:13 +08:00
|
|
|
fm10k_stop_service_event(interface);
|
fm10k: introduce a message queue for MAC/VLAN messages
Under some circumstances, when dealing with a large number of MAC
address or VLAN updates at once, the fm10k driver, particularly the VFs
can overload the mailbox with too many messages at once.
This results in a mailbox timeout, which causes the driver to initiate
a reset. During the reset, we re-send all the same messages that
originally caused the timeout. This results in a cycle of resets each
triggering a future reset.
To fix or avoid this, we introduce a workqueue item which monitors
a queue of MAC and VLAN requests. These requests are queued to the end
of the list, and we process as a FIFO periodically.
Initially we only handle requests for the netdev, but we do handle
unicast MAC addresses, multicast MAC addresses, and update VLAN
requests.
A future patch will add support to use this queue for handling MAC
update requests from the VF<->PF mailbox.
The MAC/VLAN work item will keep checking to make sure that each request
does not overflow the mailbox and cause a timeout. If it might, then the
work item will reschedule itself a short time later. This avoids any
reset cycle, since we never send the message if the mailbox is not
ready.
As an alternative, we tried increasing the mailbox message FIFO, but
this just delays the problem and results in needless memory waste on the
system. Our new message queue is dynamically allocated so only uses as
much memory as it needs. Additionally, it need not be contiguous like
the Tx and Rx FIFOs.
Note that this patch chose to only create a queue for MAC and VLAN
messages, since these are the only messages sent in a large enough
volume to cause the reset loop. Other messages are very unlikely to
overflow the mailbox Tx FIFO so easily.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:17 +08:00
|
|
|
fm10k_stop_macvlan_task(interface);
|
|
|
|
|
|
|
|
/* Remove all pending MAC/VLAN requests */
|
|
|
|
fm10k_clear_macvlan_queue(interface, interface->glort, true);
|
2014-09-21 07:49:25 +08:00
|
|
|
|
2014-09-21 07:48:10 +08:00
|
|
|
/* free netdev, this may bounce the interrupts due to setup_tc */
|
|
|
|
if (netdev->reg_state == NETREG_REGISTERED)
|
|
|
|
unregister_netdev(netdev);
|
|
|
|
|
2014-09-21 07:52:09 +08:00
|
|
|
/* release VFs */
|
|
|
|
fm10k_iov_disable(pdev);
|
|
|
|
|
2014-09-21 07:48:51 +08:00
|
|
|
/* disable mailbox interrupt */
|
|
|
|
fm10k_mbx_free_irq(interface);
|
|
|
|
|
|
|
|
/* free interrupts */
|
|
|
|
fm10k_clear_queueing_scheme(interface);
|
|
|
|
|
2014-09-21 07:53:23 +08:00
|
|
|
/* remove any debugfs interfaces */
|
|
|
|
fm10k_dbg_intfc_exit(interface);
|
|
|
|
|
2014-09-21 07:54:07 +08:00
|
|
|
if (interface->sw_addr)
|
|
|
|
iounmap(interface->sw_addr);
|
2014-09-21 07:48:10 +08:00
|
|
|
iounmap(interface->uc_addr);
|
|
|
|
|
|
|
|
free_netdev(netdev);
|
|
|
|
|
2016-06-07 15:44:05 +08:00
|
|
|
pci_release_mem_regions(pdev);
|
2014-09-21 07:46:05 +08:00
|
|
|
|
2014-09-21 07:50:27 +08:00
|
|
|
pci_disable_pcie_error_reporting(pdev);
|
|
|
|
|
2014-09-21 07:46:05 +08:00
|
|
|
pci_disable_device(pdev);
|
|
|
|
}
|
|
|
|
|
2016-06-08 07:08:53 +08:00
|
|
|
static void fm10k_prepare_suspend(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
/* the watchdog task reads from registers, which might appear like
|
|
|
|
* a surprise remove if the PCIe device is disabled while we're
|
|
|
|
* stopped. We stop the watchdog task until after we resume software
|
|
|
|
* activity.
|
fm10k: introduce a message queue for MAC/VLAN messages
Under some circumstances, when dealing with a large number of MAC
address or VLAN updates at once, the fm10k driver, particularly the VFs
can overload the mailbox with too many messages at once.
This results in a mailbox timeout, which causes the driver to initiate
a reset. During the reset, we re-send all the same messages that
originally caused the timeout. This results in a cycle of resets each
triggering a future reset.
To fix or avoid this, we introduce a workqueue item which monitors
a queue of MAC and VLAN requests. These requests are queued to the end
of the list, and we process as a FIFO periodically.
Initially we only handle requests for the netdev, but we do handle
unicast MAC addresses, multicast MAC addresses, and update VLAN
requests.
A future patch will add support to use this queue for handling MAC
update requests from the VF<->PF mailbox.
The MAC/VLAN work item will keep checking to make sure that each request
does not overflow the mailbox and cause a timeout. If it might, then the
work item will reschedule itself a short time later. This avoids any
reset cycle, since we never send the message if the mailbox is not
ready.
As an alternative, we tried increasing the mailbox message FIFO, but
this just delays the problem and results in needless memory waste on the
system. Our new message queue is dynamically allocated so only uses as
much memory as it needs. Additionally, it need not be contiguous like
the Tx and Rx FIFOs.
Note that this patch chose to only create a queue for MAC and VLAN
messages, since these are the only messages sent in a large enough
volume to cause the reset loop. Other messages are very unlikely to
overflow the mailbox Tx FIFO so easily.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:17 +08:00
|
|
|
*
|
|
|
|
* Note that the MAC/VLAN task will be stopped as part of preparing
|
|
|
|
* for reset so we don't need to handle it here.
|
2016-06-08 07:08:53 +08:00
|
|
|
*/
|
2017-07-11 04:23:13 +08:00
|
|
|
fm10k_stop_service_event(interface);
|
2016-06-08 07:08:53 +08:00
|
|
|
|
fm10k: prepare_for_reset() when we lose PCIe Link
If we lose PCIe link, such as when an unannounced PFLR event occurs, or
when a device is surprise removed, we currently detach the device and
close the netdev. This unfortunately leaves a lot of things still
active, such as the msix_mbx_pf IRQ, and Tx/Rx resources.
This can cause problems because the register reads will return
potentially invalid values which may result in unknown driver behavior.
Begin the process of resetting using fm10k_prepare_for_reset(), much in
the same way as the suspend and resume cycle does. This will attempt to
shutdown as much as possible, in order to prevent possible issues.
A naive implementation for this has issues, because there are now
multiple flows calling the reset logic and setting a reset bit. This
would cause problems, because the "re-attach" routine might call
fm10k_handle_reset() prior to the reset actually finishing. Instead,
we'll add state bits to indicate which flow actually initiated the
reset.
For the general reset flow, we'll assume that if someone else is
resetting that we do not need to handle it at all, so it does not need
its own state bit. For the suspend case, we will simply issue a warning
indicating that we are attempting to recover from this case when
resuming.
For the detached subtask, we'll simply refuse to re-attach until we've
actually initiated a reset as part of that flow.
Finally, we'll stop attempting to manage the mailbox subtask when we're
detached, since there's nothing we can do if we don't have a PCIe
address.
Overall this produces a much cleaner shutdown and recovery cycle for
a PCIe surprise remove event.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:14 +08:00
|
|
|
if (fm10k_prepare_for_reset(interface))
|
|
|
|
set_bit(__FM10K_RESET_SUSPENDED, interface->state);
|
2016-06-08 07:08:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int fm10k_handle_resume(struct fm10k_intfc *interface)
|
|
|
|
{
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
|
|
|
int err;
|
|
|
|
|
fm10k: prepare_for_reset() when we lose PCIe Link
If we lose PCIe link, such as when an unannounced PFLR event occurs, or
when a device is surprise removed, we currently detach the device and
close the netdev. This unfortunately leaves a lot of things still
active, such as the msix_mbx_pf IRQ, and Tx/Rx resources.
This can cause problems because the register reads will return
potentially invalid values which may result in unknown driver behavior.
Begin the process of resetting using fm10k_prepare_for_reset(), much in
the same way as the suspend and resume cycle does. This will attempt to
shutdown as much as possible, in order to prevent possible issues.
A naive implementation for this has issues, because there are now
multiple flows calling the reset logic and setting a reset bit. This
would cause problems, because the "re-attach" routine might call
fm10k_handle_reset() prior to the reset actually finishing. Instead,
we'll add state bits to indicate which flow actually initiated the
reset.
For the general reset flow, we'll assume that if someone else is
resetting that we do not need to handle it at all, so it does not need
its own state bit. For the suspend case, we will simply issue a warning
indicating that we are attempting to recover from this case when
resuming.
For the detached subtask, we'll simply refuse to re-attach until we've
actually initiated a reset as part of that flow.
Finally, we'll stop attempting to manage the mailbox subtask when we're
detached, since there's nothing we can do if we don't have a PCIe
address.
Overall this produces a much cleaner shutdown and recovery cycle for
a PCIe surprise remove event.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:14 +08:00
|
|
|
/* Even if we didn't properly prepare for reset in
|
|
|
|
* fm10k_prepare_suspend, we'll attempt to resume anyways.
|
|
|
|
*/
|
|
|
|
if (!test_and_clear_bit(__FM10K_RESET_SUSPENDED, interface->state))
|
|
|
|
dev_warn(&interface->pdev->dev,
|
|
|
|
"Device was shut down as part of suspend... Attempting to recover\n");
|
|
|
|
|
2016-06-08 07:08:53 +08:00
|
|
|
/* reset statistics starting values */
|
|
|
|
hw->mac.ops.rebind_hw_stats(hw, &interface->stats);
|
|
|
|
|
|
|
|
err = fm10k_handle_reset(interface);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* assume host is not ready, to prevent race with watchdog in case we
|
|
|
|
* actually don't have connection to the switch
|
|
|
|
*/
|
|
|
|
interface->host_ready = false;
|
|
|
|
fm10k_watchdog_host_not_ready(interface);
|
|
|
|
|
2016-06-08 07:09:00 +08:00
|
|
|
/* force link to stay down for a second to prevent link flutter */
|
|
|
|
interface->link_down_event = jiffies + (HZ);
|
2017-01-13 07:59:39 +08:00
|
|
|
set_bit(__FM10K_LINK_DOWN, interface->state);
|
2016-06-08 07:09:00 +08:00
|
|
|
|
2017-07-11 04:23:13 +08:00
|
|
|
/* restart the service task */
|
|
|
|
fm10k_start_service_event(interface);
|
2016-06-08 07:08:53 +08:00
|
|
|
|
fm10k: introduce a message queue for MAC/VLAN messages
Under some circumstances, when dealing with a large number of MAC
address or VLAN updates at once, the fm10k driver, particularly the VFs
can overload the mailbox with too many messages at once.
This results in a mailbox timeout, which causes the driver to initiate
a reset. During the reset, we re-send all the same messages that
originally caused the timeout. This results in a cycle of resets each
triggering a future reset.
To fix or avoid this, we introduce a workqueue item which monitors
a queue of MAC and VLAN requests. These requests are queued to the end
of the list, and we process as a FIFO periodically.
Initially we only handle requests for the netdev, but we do handle
unicast MAC addresses, multicast MAC addresses, and update VLAN
requests.
A future patch will add support to use this queue for handling MAC
update requests from the VF<->PF mailbox.
The MAC/VLAN work item will keep checking to make sure that each request
does not overflow the mailbox and cause a timeout. If it might, then the
work item will reschedule itself a short time later. This avoids any
reset cycle, since we never send the message if the mailbox is not
ready.
As an alternative, we tried increasing the mailbox message FIFO, but
this just delays the problem and results in needless memory waste on the
system. Our new message queue is dynamically allocated so only uses as
much memory as it needs. Additionally, it need not be contiguous like
the Tx and Rx FIFOs.
Note that this patch chose to only create a queue for MAC and VLAN
messages, since these are the only messages sent in a large enough
volume to cause the reset loop. Other messages are very unlikely to
overflow the mailbox Tx FIFO so easily.
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
2017-07-11 04:23:17 +08:00
|
|
|
/* Restart the MAC/VLAN request queue in-case of outstanding events */
|
|
|
|
fm10k_macvlan_schedule(interface);
|
|
|
|
|
2016-06-08 07:08:53 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:50:27 +08:00
|
|
|
/**
|
2017-07-11 04:23:16 +08:00
|
|
|
* fm10k_resume - Generic PM resume hook
|
|
|
|
* @dev: generic device structure
|
2014-09-21 07:50:27 +08:00
|
|
|
*
|
2017-07-11 04:23:16 +08:00
|
|
|
* Generic PM hook used when waking the device from a low power state after
|
|
|
|
* suspend or hibernation. This function does not need to handle lower PCIe
|
|
|
|
* device state as the stack takes care of that for us.
|
2014-09-21 07:50:27 +08:00
|
|
|
**/
|
2018-01-17 23:57:32 +08:00
|
|
|
static int __maybe_unused fm10k_resume(struct device *dev)
|
2014-09-21 07:50:27 +08:00
|
|
|
{
|
2019-07-23 22:15:33 +08:00
|
|
|
struct fm10k_intfc *interface = dev_get_drvdata(dev);
|
2014-09-21 07:50:27 +08:00
|
|
|
struct net_device *netdev = interface->netdev;
|
|
|
|
struct fm10k_hw *hw = &interface->hw;
|
2017-07-11 04:23:16 +08:00
|
|
|
int err;
|
2014-09-21 07:50:27 +08:00
|
|
|
|
|
|
|
/* refresh hw_addr in case it was dropped */
|
|
|
|
hw->hw_addr = interface->uc_addr;
|
|
|
|
|
2016-06-08 07:08:56 +08:00
|
|
|
err = fm10k_handle_resume(interface);
|
2015-11-11 01:40:30 +08:00
|
|
|
if (err)
|
2016-06-08 07:08:56 +08:00
|
|
|
return err;
|
2014-09-21 07:52:09 +08:00
|
|
|
|
2014-09-21 07:50:27 +08:00
|
|
|
netif_device_attach(netdev);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2017-07-11 04:23:16 +08:00
|
|
|
* fm10k_suspend - Generic PM suspend hook
|
|
|
|
* @dev: generic device structure
|
2014-09-21 07:50:27 +08:00
|
|
|
*
|
2017-07-11 04:23:16 +08:00
|
|
|
* Generic PM hook used when setting the device into a low power state for
|
|
|
|
* system suspend or hibernation. This function does not need to handle lower
|
|
|
|
* PCIe device state as the stack takes care of that for us.
|
2014-09-21 07:50:27 +08:00
|
|
|
**/
|
2018-01-17 23:57:32 +08:00
|
|
|
static int __maybe_unused fm10k_suspend(struct device *dev)
|
2014-09-21 07:50:27 +08:00
|
|
|
{
|
2019-07-23 22:15:33 +08:00
|
|
|
struct fm10k_intfc *interface = dev_get_drvdata(dev);
|
2014-09-21 07:50:27 +08:00
|
|
|
struct net_device *netdev = interface->netdev;
|
|
|
|
|
|
|
|
netif_device_detach(netdev);
|
|
|
|
|
2016-06-08 07:08:56 +08:00
|
|
|
fm10k_prepare_suspend(interface);
|
2014-09-21 07:50:27 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_io_error_detected - called when PCI error is detected
|
|
|
|
* @pdev: Pointer to PCI device
|
|
|
|
* @state: The current pci connection state
|
|
|
|
*
|
|
|
|
* This function is called after a PCI bus error affecting
|
|
|
|
* this device has been detected.
|
|
|
|
*/
|
|
|
|
static pci_ers_result_t fm10k_io_error_detected(struct pci_dev *pdev,
|
|
|
|
pci_channel_state_t state)
|
|
|
|
{
|
|
|
|
struct fm10k_intfc *interface = pci_get_drvdata(pdev);
|
|
|
|
struct net_device *netdev = interface->netdev;
|
|
|
|
|
|
|
|
netif_device_detach(netdev);
|
|
|
|
|
|
|
|
if (state == pci_channel_io_perm_failure)
|
|
|
|
return PCI_ERS_RESULT_DISCONNECT;
|
|
|
|
|
2016-06-08 07:08:54 +08:00
|
|
|
fm10k_prepare_suspend(interface);
|
2016-03-12 01:52:32 +08:00
|
|
|
|
2014-09-21 07:50:27 +08:00
|
|
|
/* Request a slot reset. */
|
|
|
|
return PCI_ERS_RESULT_NEED_RESET;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_io_slot_reset - called after the pci bus has been reset.
|
|
|
|
* @pdev: Pointer to PCI device
|
|
|
|
*
|
|
|
|
* Restart the card from scratch, as if from a cold-boot.
|
|
|
|
*/
|
|
|
|
static pci_ers_result_t fm10k_io_slot_reset(struct pci_dev *pdev)
|
|
|
|
{
|
|
|
|
pci_ers_result_t result;
|
|
|
|
|
2016-06-10 03:02:03 +08:00
|
|
|
if (pci_reenable_device(pdev)) {
|
2014-09-21 07:50:27 +08:00
|
|
|
dev_err(&pdev->dev,
|
|
|
|
"Cannot re-enable PCI device after reset.\n");
|
|
|
|
result = PCI_ERS_RESULT_DISCONNECT;
|
|
|
|
} else {
|
|
|
|
pci_set_master(pdev);
|
|
|
|
pci_restore_state(pdev);
|
|
|
|
|
|
|
|
/* After second error pci->state_saved is false, this
|
|
|
|
* resets it so EEH doesn't break.
|
|
|
|
*/
|
|
|
|
pci_save_state(pdev);
|
|
|
|
|
|
|
|
pci_wake_from_d3(pdev, false);
|
|
|
|
|
|
|
|
result = PCI_ERS_RESULT_RECOVERED;
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_io_resume - called when traffic can start flowing again.
|
|
|
|
* @pdev: Pointer to PCI device
|
|
|
|
*
|
|
|
|
* This callback is called when the error recovery driver tells us that
|
|
|
|
* its OK to resume normal operation.
|
|
|
|
*/
|
|
|
|
static void fm10k_io_resume(struct pci_dev *pdev)
|
|
|
|
{
|
|
|
|
struct fm10k_intfc *interface = pci_get_drvdata(pdev);
|
|
|
|
struct net_device *netdev = interface->netdev;
|
2016-06-08 07:08:54 +08:00
|
|
|
int err;
|
2014-09-21 07:50:27 +08:00
|
|
|
|
2016-06-08 07:08:54 +08:00
|
|
|
err = fm10k_handle_resume(interface);
|
2014-09-21 07:50:27 +08:00
|
|
|
|
2016-06-08 07:08:54 +08:00
|
|
|
if (err)
|
|
|
|
dev_warn(&pdev->dev,
|
2017-08-12 02:14:37 +08:00
|
|
|
"%s failed: %d\n", __func__, err);
|
2016-06-08 07:08:54 +08:00
|
|
|
else
|
2014-09-21 07:50:27 +08:00
|
|
|
netif_device_attach(netdev);
|
|
|
|
}
|
|
|
|
|
2017-08-12 02:14:37 +08:00
|
|
|
/**
|
|
|
|
* fm10k_io_reset_prepare - called when PCI function is about to be reset
|
|
|
|
* @pdev: Pointer to PCI device
|
|
|
|
*
|
|
|
|
* This callback is called when the PCI function is about to be reset,
|
|
|
|
* allowing the device driver to prepare for it.
|
|
|
|
*/
|
2017-06-01 19:10:38 +08:00
|
|
|
static void fm10k_io_reset_prepare(struct pci_dev *pdev)
|
2016-06-08 07:08:55 +08:00
|
|
|
{
|
2017-06-01 19:10:38 +08:00
|
|
|
/* warn incase we have any active VF devices */
|
|
|
|
if (pci_num_vf(pdev))
|
|
|
|
dev_warn(&pdev->dev,
|
|
|
|
"PCIe FLR may cause issues for any active VF devices\n");
|
|
|
|
fm10k_prepare_suspend(pci_get_drvdata(pdev));
|
|
|
|
}
|
2016-06-08 07:08:55 +08:00
|
|
|
|
2017-08-12 02:14:37 +08:00
|
|
|
/**
|
|
|
|
* fm10k_io_reset_done - called when PCI function has finished resetting
|
|
|
|
* @pdev: Pointer to PCI device
|
|
|
|
*
|
|
|
|
* This callback is called just after the PCI function is reset, such as via
|
|
|
|
* /sys/class/net/<enpX>/device/reset or similar.
|
|
|
|
*/
|
2017-06-01 19:10:38 +08:00
|
|
|
static void fm10k_io_reset_done(struct pci_dev *pdev)
|
|
|
|
{
|
|
|
|
struct fm10k_intfc *interface = pci_get_drvdata(pdev);
|
|
|
|
int err = fm10k_handle_resume(interface);
|
2016-06-08 07:08:55 +08:00
|
|
|
|
|
|
|
if (err) {
|
|
|
|
dev_warn(&pdev->dev,
|
2017-08-12 02:14:37 +08:00
|
|
|
"%s failed: %d\n", __func__, err);
|
2016-06-08 07:08:55 +08:00
|
|
|
netif_device_detach(interface->netdev);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-09-21 07:50:27 +08:00
|
|
|
static const struct pci_error_handlers fm10k_err_handler = {
|
|
|
|
.error_detected = fm10k_io_error_detected,
|
|
|
|
.slot_reset = fm10k_io_slot_reset,
|
|
|
|
.resume = fm10k_io_resume,
|
2017-06-01 19:10:38 +08:00
|
|
|
.reset_prepare = fm10k_io_reset_prepare,
|
|
|
|
.reset_done = fm10k_io_reset_done,
|
2014-09-21 07:50:27 +08:00
|
|
|
};
|
|
|
|
|
2017-07-11 04:23:16 +08:00
|
|
|
static SIMPLE_DEV_PM_OPS(fm10k_pm_ops, fm10k_suspend, fm10k_resume);
|
|
|
|
|
2014-09-21 07:46:05 +08:00
|
|
|
static struct pci_driver fm10k_driver = {
|
|
|
|
.name = fm10k_driver_name,
|
|
|
|
.id_table = fm10k_pci_tbl,
|
|
|
|
.probe = fm10k_probe,
|
|
|
|
.remove = fm10k_remove,
|
2017-07-11 04:23:16 +08:00
|
|
|
.driver = {
|
|
|
|
.pm = &fm10k_pm_ops,
|
|
|
|
},
|
2014-09-21 07:52:09 +08:00
|
|
|
.sriov_configure = fm10k_iov_configure,
|
2014-09-21 07:50:27 +08:00
|
|
|
.err_handler = &fm10k_err_handler
|
2014-09-21 07:46:05 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_register_pci_driver - register driver interface
|
|
|
|
*
|
2016-02-11 06:45:51 +08:00
|
|
|
* This function is called on module load in order to register the driver.
|
2014-09-21 07:46:05 +08:00
|
|
|
**/
|
|
|
|
int fm10k_register_pci_driver(void)
|
|
|
|
{
|
|
|
|
return pci_register_driver(&fm10k_driver);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* fm10k_unregister_pci_driver - unregister driver interface
|
|
|
|
*
|
2016-02-11 06:45:51 +08:00
|
|
|
* This function is called on module unload in order to remove the driver.
|
2014-09-21 07:46:05 +08:00
|
|
|
**/
|
|
|
|
void fm10k_unregister_pci_driver(void)
|
|
|
|
{
|
|
|
|
pci_unregister_driver(&fm10k_driver);
|
|
|
|
}
|