From b7ec70be01a87f2c85df3ae11046e74f9b67e323 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Tue, 31 Jul 2012 02:02:43 +0000 Subject: [PATCH 1/3] e1000e: NIC goes up and immediately goes down Found that commit d478eb44 was a bad commit. If the link partner is transmitting codeword (even if NULL codeword), then the RXCW.C bit will be set so check for RXCW.CW is unnecessary. Ref: RH BZ 840642 Reported-by: Fabio Futigami Signed-off-by: Tushar Dave CC: Marcelo Ricardo Leitner CC: stable [2.6.38+] Tested-by: Aaron Brown Signed-off-by: Peter P Waskiewicz Jr --- drivers/net/ethernet/intel/e1000e/82571.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 0b3bade957fd..2a4ded2fd6e5 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -1601,10 +1601,8 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw) * auto-negotiation in the TXCW register and disable * forced link in the Device Control register in an * attempt to auto-negotiate with our link partner. - * If the partner code word is null, stop forcing - * and restart auto negotiation. */ - if ((rxcw & E1000_RXCW_C) || !(rxcw & E1000_RXCW_CW)) { + if (rxcw & E1000_RXCW_C) { /* Enable autoneg, and unforce link up */ ew32(TXCW, mac->txcw); ew32(CTRL, (ctrl & ~E1000_CTRL_SLU)); From eca90f550494171f54f8a700caee65ec16455a5b Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Wed, 1 Aug 2012 02:11:15 +0000 Subject: [PATCH 2/3] e1000e: 82571 Tx Data Corruption during Tx hang recovery A bus trace shows that while executing e1000e_down, TCTL is cleared except for the PSP bit. This occurs while in the middle of fetching a TSO packet since the Tx packet buffer is full at that point. Before the device is reset, the e1000_watchdog_task starts to run from the middle (it was apparently pre-empted earlier, although that is not in the trace) and sets TCTL.EN. At that point, 82571 transmits the corrupted packet, apparently because TCTL.MULR was cleared in the middle of fetching a packet, which is forbidden. Driver should just clear TCTL.EN in e1000_reset_hw_82571 instead of clearing the entire register, so as not to change any settings in the middle of fetching a packet. Signed-off-by: Tushar Dave Tested-by: Aaron Brown Signed-off-by: Peter P Waskiewicz Jr --- drivers/net/ethernet/intel/e1000e/82571.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 2a4ded2fd6e5..080c89093feb 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -999,7 +999,7 @@ static s32 e1000_set_d0_lplu_state_82571(struct e1000_hw *hw, bool active) **/ static s32 e1000_reset_hw_82571(struct e1000_hw *hw) { - u32 ctrl, ctrl_ext, eecd; + u32 ctrl, ctrl_ext, eecd, tctl; s32 ret_val; /* @@ -1014,7 +1014,9 @@ static s32 e1000_reset_hw_82571(struct e1000_hw *hw) ew32(IMC, 0xffffffff); ew32(RCTL, 0); - ew32(TCTL, E1000_TCTL_PSP); + tctl = er32(TCTL); + tctl &= ~E1000_TCTL_EN; + ew32(TCTL, tctl); e1e_flush(); usleep_range(10000, 20000); From 119b0e0351bcdb7833f368781e6241ff283b49e5 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Tue, 7 Aug 2012 00:45:57 -0700 Subject: [PATCH 3/3] igb: add delay to allow igb loopback test to succeed on 8086:10c9 Some 8086:10c9 NICs have a problem completing the ethtool loopback test. The result looks like this: ethtool -t eth1 The test result is FAIL The test extra info: Register test (offline) 0 Eeprom test (offline) 0 Interrupt test (offline) 0 Loopback test (offline) 13 Link test (on/offline) 0 A bisect clearly points to commit a95a07445ee97a2fef65befafbadcc30ca1bd145. However that seems to only trigger the bug. While adding some printk the problem disappeared, so this might be a timing issue. After some trial and error I discovered that adding a small delay just before igb_write_phy_reg() in igb_integrated_phy_loopback() allows the loopback test to succeed. I was unable to figure out the root cause so far but I expect it to be somewhere in the following executing path igb_integrated_phy_loopback ->igb_write_phy_reg_igp ->igb_write_phy_reg_mdic ->igb_acquire_phy_82575 ->igb_acquire_swfw_sync_82575 The problem could only be observed on 8086:10c9 NICs so far and not all of them show the behaviour. I did not restrict the workaround to this type of NIC as it should do no harm to other igb NICs. With the patch below the loopback test succeeded 500 times in a row using a NIC that would otherwise fail. Signed-off-by: Stefan Assmann Tested-by: Aaron Brown Signed-off-by: Peter P Waskiewicz Jr --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 90550f5e3dd9..70591117051b 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -1498,6 +1498,9 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter) break; } + /* add small delay to avoid loopback test failure */ + msleep(50); + /* force 1000, set loopback */ igb_write_phy_reg(hw, PHY_CONTROL, 0x4140);