From 9446f3efc53512e5ad9e0966539021a2a41fe5a0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 17 Jan 2012 10:32:01 +0100 Subject: [PATCH 01/67] mac80211: fix debugfs key->station symlink Since stations moved into a virtual interface subdirectory, this link has been broken. Fix it. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/debugfs_key.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 38e6101190d9..59edcd95a58d 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -225,9 +225,9 @@ KEY_OPS(key); key, &key_##name##_ops); void ieee80211_debugfs_key_add(struct ieee80211_key *key) - { +{ static int keycount; - char buf[50]; + char buf[100]; struct sta_info *sta; if (!key->local->debugfs.keys) @@ -244,7 +244,8 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key) sta = key->sta; if (sta) { - sprintf(buf, "../../stations/%pM", sta->sta.addr); + sprintf(buf, "../../netdev:%s/stations/%pM", + sta->sdata->name, sta->sta.addr); key->debugfs.stalink = debugfs_create_symlink("station", key->debugfs.dir, buf); } From f96b08a7e6f69c0f0a576554df3df5b1b519c479 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 17 Jan 2012 12:38:50 +0100 Subject: [PATCH 02/67] brcmsmac: fix tx queue flush infinite loop This patch workaround live deadlock problem caused by infinite loop in brcms_c_wait_for_tx_completion(). I do not consider the patch as the proper fix, which should fix the real reason of tx queue flush failure, but patch helps with system lockup. Reference: https://bugzilla.kernel.org/show_bug.cgi?id=42576 Reported-and-tested-by: Patrick Cc: stable@vger.kernel.org # 3.2+ Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/brcmsmac/main.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c index f7ed34034f88..f6affc6fd12a 100644 --- a/drivers/net/wireless/brcm80211/brcmsmac/main.c +++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c @@ -7981,13 +7981,21 @@ int brcms_c_get_curband(struct brcms_c_info *wlc) void brcms_c_wait_for_tx_completion(struct brcms_c_info *wlc, bool drop) { + int timeout = 20; + /* flush packet queue when requested */ if (drop) brcmu_pktq_flush(&wlc->pkt_queue->q, false, NULL, NULL); /* wait for queue and DMA fifos to run dry */ - while (!pktq_empty(&wlc->pkt_queue->q) || brcms_txpktpendtot(wlc) > 0) + while (!pktq_empty(&wlc->pkt_queue->q) || brcms_txpktpendtot(wlc) > 0) { brcms_msleep(wlc->wl, 1); + + if (--timeout == 0) + break; + } + + WARN_ON_ONCE(timeout == 0); } void brcms_c_set_beacon_listen_interval(struct brcms_c_info *wlc, u8 interval) From 65e8b0ccb6cf176f8eddb1b05534be46580da9dd Mon Sep 17 00:00:00 2001 From: Javier Cardona Date: Tue, 17 Jan 2012 18:17:46 -0800 Subject: [PATCH 03/67] mac80211: Use the right headroom size for mesh mgmt frames Use local->tx_headroom instad of local->hw.extra_tx_headroom. local->tx_headroom is the max of hw.extra_tx_headroom required by the driver and the headroom required by mac80211 for status reporting. On drivers where hw.extra_tx_headroom is smaller than what mac80211 requires (e.g. ath5k), we would not reserve sufficient buffer space to report tx status. Also, don't reserve local->tx_headroom + local->hw.extra_tx_headroom. Reported-by: Simon Morgenthaler Reported-by: Kai Scharwies Signed-off-by: Javier Cardona Signed-off-by: John W. Linville --- net/mac80211/mesh_hwmp.c | 8 ++++---- net/mac80211/mesh_plink.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 73abb7524b2c..54df1b2bafd2 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -119,12 +119,12 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) + sizeof(mgmt->u.action.u.mesh_action); - skb = dev_alloc_skb(local->hw.extra_tx_headroom + + skb = dev_alloc_skb(local->tx_headroom + hdr_len + 2 + 37); /* max HWMP IE */ if (!skb) return -1; - skb_reserve(skb, local->hw.extra_tx_headroom); + skb_reserve(skb, local->tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); memset(mgmt, 0, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | @@ -250,12 +250,12 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, if (time_before(jiffies, ifmsh->next_perr)) return -EAGAIN; - skb = dev_alloc_skb(local->hw.extra_tx_headroom + + skb = dev_alloc_skb(local->tx_headroom + hdr_len + 2 + 15 /* PERR IE */); if (!skb) return -1; - skb_reserve(skb, local->tx_headroom + local->hw.extra_tx_headroom); + skb_reserve(skb, local->tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); memset(mgmt, 0, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 41ef1b476442..a17251730b9e 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -172,7 +172,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.self_prot) + sizeof(mgmt->u.action.u.self_prot); - skb = dev_alloc_skb(local->hw.extra_tx_headroom + + skb = dev_alloc_skb(local->tx_headroom + hdr_len + 2 + /* capability info */ 2 + /* AID */ @@ -186,7 +186,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, sdata->u.mesh.ie_len); if (!skb) return -1; - skb_reserve(skb, local->hw.extra_tx_headroom); + skb_reserve(skb, local->tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, hdr_len); memset(mgmt, 0, hdr_len); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | From bc4934bc61d0a11fd62c5187ff83645628f8be8b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Jan 2012 14:10:25 +0100 Subject: [PATCH 04/67] mac80211: fix work removal on deauth request When deauth is requested while an auth or assoc work item is in progress, we currently delete it without regard for any state it might need to clean up. Fix it by cleaning up for those items. In the case Pontus found, the problem manifested itself as such: authenticate with 00:23:69:aa:dd:7b (try 1) authenticated failed to insert Dummy STA entry for the AP (error -17) deauthenticating from 00:23:69:aa:dd:7b by local choice (reason=2) It could also happen differently if the driver uses the tx_sync callback. We can't just call the ->done() method of the work items because that will lock up due to the locking in cfg80211. This fix isn't very clean, but that seems acceptable since I have patches pending to remove this code completely. Cc: stable@vger.kernel.org Reported-by: Pontus Fuchs Tested-by: Pontus Fuchs Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ecb4c84c1bb3..295be92f7c77 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2750,7 +2750,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct ieee80211_work *wk; u8 bssid[ETH_ALEN]; bool assoc_bss = false; @@ -2763,30 +2762,47 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, assoc_bss = true; } else { bool not_auth_yet = false; + struct ieee80211_work *tmp, *wk = NULL; mutex_unlock(&ifmgd->mtx); mutex_lock(&local->mtx); - list_for_each_entry(wk, &local->work_list, list) { - if (wk->sdata != sdata) + list_for_each_entry(tmp, &local->work_list, list) { + if (tmp->sdata != sdata) continue; - if (wk->type != IEEE80211_WORK_DIRECT_PROBE && - wk->type != IEEE80211_WORK_AUTH && - wk->type != IEEE80211_WORK_ASSOC && - wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT) + if (tmp->type != IEEE80211_WORK_DIRECT_PROBE && + tmp->type != IEEE80211_WORK_AUTH && + tmp->type != IEEE80211_WORK_ASSOC && + tmp->type != IEEE80211_WORK_ASSOC_BEACON_WAIT) continue; - if (memcmp(req->bss->bssid, wk->filter_ta, ETH_ALEN)) + if (memcmp(req->bss->bssid, tmp->filter_ta, ETH_ALEN)) continue; - not_auth_yet = wk->type == IEEE80211_WORK_DIRECT_PROBE; - list_del_rcu(&wk->list); - free_work(wk); + not_auth_yet = tmp->type == IEEE80211_WORK_DIRECT_PROBE; + list_del_rcu(&tmp->list); + synchronize_rcu(); + wk = tmp; break; } mutex_unlock(&local->mtx); + if (wk && wk->type == IEEE80211_WORK_ASSOC) { + /* clean up dummy sta & TX sync */ + sta_info_destroy_addr(wk->sdata, wk->filter_ta); + if (wk->assoc.synced) + drv_finish_tx_sync(local, wk->sdata, + wk->filter_ta, + IEEE80211_TX_SYNC_ASSOC); + } else if (wk && wk->type == IEEE80211_WORK_AUTH) { + if (wk->probe_auth.synced) + drv_finish_tx_sync(local, wk->sdata, + wk->filter_ta, + IEEE80211_TX_SYNC_AUTH); + } + kfree(wk); + /* * If somebody requests authentication and we haven't * sent out an auth frame yet there's no need to send From 4f3d09de38d234ce7ffba5ec5a7e6704f983d375 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 11 Jan 2012 15:50:15 -0500 Subject: [PATCH 05/67] b43: add option to avoid duplicating device support with brcmsmac Signed-off-by: John W. Linville --- drivers/net/wireless/b43/Kconfig | 6 ++++++ drivers/net/wireless/b43/main.c | 2 ++ 2 files changed, 8 insertions(+) diff --git a/drivers/net/wireless/b43/Kconfig b/drivers/net/wireless/b43/Kconfig index b97a40ed5fff..3876c7ea54f4 100644 --- a/drivers/net/wireless/b43/Kconfig +++ b/drivers/net/wireless/b43/Kconfig @@ -31,6 +31,12 @@ config B43_BCMA depends on B43 && BCMA default y +config B43_BCMA_EXTRA + bool "Hardware support that overlaps with the brcmsmac driver" + depends on B43_BCMA + default n if BRCMSMAC || BRCMSMAC_MODULE + default y + config B43_SSB bool depends on B43 && SSB diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index b91f28ef1032..23ffb1b9a86f 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -116,8 +116,10 @@ MODULE_PARM_DESC(pio, "Use PIO accesses by default: 0=DMA, 1=PIO"); #ifdef CONFIG_B43_BCMA static const struct bcma_device_id b43_bcma_tbl[] = { BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_80211, 0x11, BCMA_ANY_CLASS), +#ifdef CONFIG_B43_BCMA_EXTRA BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_80211, 0x17, BCMA_ANY_CLASS), BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_80211, 0x18, BCMA_ANY_CLASS), +#endif BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_80211, 0x1D, BCMA_ANY_CLASS), BCMA_CORETABLE_END }; From d00a9dd21bdf7908b70866794c8313ee8a5abd5c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Jan 2012 07:21:42 +0000 Subject: [PATCH 06/67] net: bpf_jit: fix divide by 0 generation Several problems fixed in this patch : 1) Target of the conditional jump in case a divide by 0 is performed by a bpf is wrong. 2) Must 'generate' the full function prologue/epilogue at pass=0, or else we can stop too early in pass=1 if the proglen doesnt change. (if the increase of prologue/epilogue equals decrease of all instructions length because some jumps are converted to near jumps) 3) Change the wrong length detection at the end of code generation to issue a more explicit message, no need for a full stack trace. Reported-by: Phil Oester Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit_comp.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 7b65f752c5f8..7c1b765ecc59 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -151,17 +151,18 @@ void bpf_jit_compile(struct sk_filter *fp) cleanup_addr = proglen; /* epilogue address */ for (pass = 0; pass < 10; pass++) { + u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen; /* no prologue/epilogue for trivial filters (RET something) */ proglen = 0; prog = temp; - if (seen) { + if (seen_or_pass0) { EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */ EMIT4(0x48, 0x83, 0xec, 96); /* subq $96,%rsp */ /* note : must save %rbx in case bpf_error is hit */ - if (seen & (SEEN_XREG | SEEN_DATAREF)) + if (seen_or_pass0 & (SEEN_XREG | SEEN_DATAREF)) EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */ - if (seen & SEEN_XREG) + if (seen_or_pass0 & SEEN_XREG) CLEAR_X(); /* make sure we dont leek kernel memory */ /* @@ -170,7 +171,7 @@ void bpf_jit_compile(struct sk_filter *fp) * r9 = skb->len - skb->data_len * r8 = skb->data */ - if (seen & SEEN_DATAREF) { + if (seen_or_pass0 & SEEN_DATAREF) { if (offsetof(struct sk_buff, len) <= 127) /* mov off8(%rdi),%r9d */ EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len)); @@ -260,9 +261,14 @@ void bpf_jit_compile(struct sk_filter *fp) case BPF_S_ALU_DIV_X: /* A /= X; */ seen |= SEEN_XREG; EMIT2(0x85, 0xdb); /* test %ebx,%ebx */ - if (pc_ret0 != -1) - EMIT_COND_JMP(X86_JE, addrs[pc_ret0] - (addrs[i] - 4)); - else { + if (pc_ret0 > 0) { + /* addrs[pc_ret0 - 1] is start address of target + * (addrs[i] - 4) is the address following this jmp + * ("xor %edx,%edx; div %ebx" being 4 bytes long) + */ + EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] - + (addrs[i] - 4)); + } else { EMIT_COND_JMP(X86_JNE, 2 + 5); CLEAR_A(); EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */ @@ -335,12 +341,12 @@ void bpf_jit_compile(struct sk_filter *fp) } /* fallinto */ case BPF_S_RET_A: - if (seen) { + if (seen_or_pass0) { if (i != flen - 1) { EMIT_JMP(cleanup_addr - addrs[i]); break; } - if (seen & SEEN_XREG) + if (seen_or_pass0 & SEEN_XREG) EMIT4(0x48, 0x8b, 0x5d, 0xf8); /* mov -8(%rbp),%rbx */ EMIT1(0xc9); /* leaveq */ } @@ -483,8 +489,9 @@ common_load: seen |= SEEN_DATAREF; goto common_load; case BPF_S_LDX_B_MSH: if ((int)K < 0) { - if (pc_ret0 != -1) { - EMIT_JMP(addrs[pc_ret0] - addrs[i]); + if (pc_ret0 > 0) { + /* addrs[pc_ret0 - 1] is the start address */ + EMIT_JMP(addrs[pc_ret0 - 1] - addrs[i]); break; } CLEAR_A(); @@ -599,13 +606,14 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; * use it to give the cleanup instruction(s) addr */ cleanup_addr = proglen - 1; /* ret */ - if (seen) + if (seen_or_pass0) cleanup_addr -= 1; /* leaveq */ - if (seen & SEEN_XREG) + if (seen_or_pass0 & SEEN_XREG) cleanup_addr -= 4; /* mov -8(%rbp),%rbx */ if (image) { - WARN_ON(proglen != oldproglen); + if (proglen != oldproglen) + pr_err("bpb_jit_compile proglen=%u != oldproglen=%u\n", proglen, oldproglen); break; } if (proglen == oldproglen) { From 93d3e3678f23109363cd6f99f2944d2cda616b23 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Tue, 17 Jan 2012 22:54:55 +0000 Subject: [PATCH 07/67] mlx4_en: set number of rx rings used by RSS using ethtool Value must be a power of 2 due to HW limitation. Driver supports only 'equal' mode in ethtool and can't be set by using weights. Signed-off-by: Amir Vadai Reviewed-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx4/en_ethtool.c | 93 +++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/en_main.c | 6 +- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 8 +- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 2 +- 4 files changed, 102 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 7dbc6a230779..53c66869aecd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -479,6 +479,95 @@ static void mlx4_en_get_ringparam(struct net_device *dev, param->tx_pending = priv->tx_ring[0].size; } +static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + return priv->rx_ring_num; +} + +static int mlx4_en_get_rxfh_indir(struct net_device *dev, u32 *ring_index) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_rss_map *rss_map = &priv->rss_map; + int rss_rings; + size_t n = priv->rx_ring_num; + int err = 0; + + rss_rings = priv->prof->rss_rings ?: priv->rx_ring_num; + + while (n--) { + ring_index[n] = rss_map->qps[n % rss_rings].qpn - + rss_map->base_qpn; + } + + return err; +} + +static int mlx4_en_set_rxfh_indir(struct net_device *dev, + const u32 *ring_index) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int port_up = 0; + int err = 0; + int i; + int rss_rings = 0; + + /* Calculate RSS table size and make sure flows are spread evenly + * between rings + */ + for (i = 0; i < priv->rx_ring_num; i++) { + if (i > 0 && !ring_index[i] && !rss_rings) + rss_rings = i; + + if (ring_index[i] != (i % (rss_rings ?: priv->rx_ring_num))) + return -EINVAL; + } + + if (!rss_rings) + rss_rings = priv->rx_ring_num; + + /* RSS table size must be an order of 2 */ + if (!is_power_of_2(rss_rings)) + return -EINVAL; + + mutex_lock(&mdev->state_lock); + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev); + } + + priv->prof->rss_rings = rss_rings; + + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) + en_err(priv, "Failed starting port\n"); + } + + mutex_unlock(&mdev->state_lock); + return err; +} + +static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + int err = 0; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = priv->rx_ring_num; + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + const struct ethtool_ops mlx4_en_ethtool_ops = { .get_drvinfo = mlx4_en_get_drvinfo, .get_settings = mlx4_en_get_settings, @@ -498,6 +587,10 @@ const struct ethtool_ops mlx4_en_ethtool_ops = { .set_pauseparam = mlx4_en_set_pauseparam, .get_ringparam = mlx4_en_get_ringparam, .set_ringparam = mlx4_en_set_ringparam, + .get_rxnfc = mlx4_en_get_rxnfc, + .get_rxfh_indir_size = mlx4_en_get_rxfh_indir_size, + .get_rxfh_indir = mlx4_en_get_rxfh_indir, + .set_rxfh_indir = mlx4_en_set_rxfh_indir, }; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index a06096fcc0b8..2097a7d3c5b8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -62,10 +62,6 @@ static const char mlx4_en_version[] = * Device scope module parameters */ - -/* Enable RSS TCP traffic */ -MLX4_EN_PARM_INT(tcp_rss, 1, - "Enable RSS for incomming TCP traffic or disabled (0)"); /* Enable RSS UDP traffic */ MLX4_EN_PARM_INT(udp_rss, 1, "Enable RSS for incomming UDP traffic or disabled (0)"); @@ -104,7 +100,6 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) struct mlx4_en_profile *params = &mdev->profile; int i; - params->tcp_rss = tcp_rss; params->udp_rss = udp_rss; if (params->udp_rss && !(mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UDP_RSS)) { @@ -120,6 +115,7 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE; params->prof[i].tx_ring_num = MLX4_EN_NUM_TX_RINGS + (!!pfcrx) * MLX4_EN_NUM_PPP_RINGS; + params->prof[i].rss_rings = 0; } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index e8d6ad2dce0a..971d4b6b8dfe 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -853,6 +853,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) struct mlx4_en_rss_map *rss_map = &priv->rss_map; struct mlx4_qp_context context; struct mlx4_rss_context *rss_context; + int rss_rings; void *ptr; u8 rss_mask = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 | MLX4_RSS_TCP_IPV6); @@ -893,10 +894,15 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv) mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn, priv->rx_ring[0].cqn, &context); + if (!priv->prof->rss_rings || priv->prof->rss_rings > priv->rx_ring_num) + rss_rings = priv->rx_ring_num; + else + rss_rings = priv->prof->rss_rings; + ptr = ((void *) &context) + offsetof(struct mlx4_qp_context, pri_path) + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH; rss_context = ptr; - rss_context->base_qpn = cpu_to_be32(ilog2(priv->rx_ring_num) << 24 | + rss_context->base_qpn = cpu_to_be32(ilog2(rss_rings) << 24 | (rss_map->base_qpn)); rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn); if (priv->mdev->profile.udp_rss) { diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index f2a8e65f5f88..f4d189a1290e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -325,11 +325,11 @@ struct mlx4_en_port_profile { u8 rx_ppp; u8 tx_pause; u8 tx_ppp; + int rss_rings; }; struct mlx4_en_profile { int rss_xor; - int tcp_rss; int udp_rss; u8 rss_mask; u32 active_ports; From 3a4adef5c0adbbd0d898578e13ba1adbbdecbbd5 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 18 Jan 2012 04:23:55 +0000 Subject: [PATCH 08/67] enic: This patch adds pci id 0x71 for SRIOV VF's This patch adds pci id 0x71 for sriov VF's. Signed-off-by: Roopa Prabhu Signed-off-by: Christian Benvenuti Signed-off-by: Sujith Sankar Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic.h | 2 +- drivers/net/ethernet/cisco/enic/enic_main.c | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index fe0c29acdbe6..ee93a2087fe6 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -32,7 +32,7 @@ #define DRV_NAME "enic" #define DRV_DESCRIPTION "Cisco VIC Ethernet NIC Driver" -#define DRV_VERSION "2.1.1.28" +#define DRV_VERSION "2.1.1.31" #define DRV_COPYRIGHT "Copyright 2008-2011 Cisco Systems, Inc" #define ENIC_BARS_MAX 6 diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 2fd9db4b1be5..a634f58976af 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -57,11 +57,13 @@ #define PCI_DEVICE_ID_CISCO_VIC_ENET 0x0043 /* ethernet vnic */ #define PCI_DEVICE_ID_CISCO_VIC_ENET_DYN 0x0044 /* enet dynamic vnic */ +#define PCI_DEVICE_ID_CISCO_VIC_ENET_VF 0x0071 /* enet SRIOV VF */ /* Supported devices */ static DEFINE_PCI_DEVICE_TABLE(enic_id_table) = { { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) }, { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_DYN) }, + { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_VF) }, { 0, } /* end of table */ }; @@ -132,6 +134,11 @@ int enic_sriov_enabled(struct enic *enic) return (enic->priv_flags & ENIC_SRIOV_ENABLED) ? 1 : 0; } +static int enic_is_sriov_vf(struct enic *enic) +{ + return enic->pdev->device == PCI_DEVICE_ID_CISCO_VIC_ENET_VF; +} + int enic_is_valid_vf(struct enic *enic, int vf) { #ifdef CONFIG_PCI_IOV From 7335903cba523fc2fd801a178a0e4fcf8ccae756 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 18 Jan 2012 04:24:02 +0000 Subject: [PATCH 09/67] enic: Add sriov vf device id checks in port profile code This patch adds checks for sriov vf's in enic port profile handling code. sriov vf's are same as dynamic vnics but with a different device id. Signed-off-by: Roopa Prabhu Signed-off-by: Christian Benvenuti Signed-off-by: Sujith Sankar Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_main.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index a634f58976af..9f37d92fc76d 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -444,7 +444,7 @@ static void enic_mtu_check(struct enic *enic) if (mtu && mtu != enic->port_mtu) { enic->port_mtu = mtu; - if (enic_is_dynamic(enic)) { + if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) { mtu = max_t(int, ENIC_MIN_MTU, min_t(int, ENIC_MAX_MTU, mtu)); if (mtu != netdev->mtu) @@ -856,7 +856,7 @@ static int enic_set_mac_addr(struct net_device *netdev, char *addr) { struct enic *enic = netdev_priv(netdev); - if (enic_is_dynamic(enic)) { + if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) { if (!is_valid_ether_addr(addr) && !is_zero_ether_addr(addr)) return -EADDRNOTAVAIL; } else { @@ -1615,7 +1615,7 @@ static int enic_open(struct net_device *netdev) for (i = 0; i < enic->rq_count; i++) vnic_rq_enable(&enic->rq[i]); - if (!enic_is_dynamic(enic)) + if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) enic_dev_add_station_addr(enic); enic_set_rx_mode(netdev); @@ -1666,7 +1666,7 @@ static int enic_stop(struct net_device *netdev) netif_carrier_off(netdev); netif_tx_disable(netdev); - if (!enic_is_dynamic(enic)) + if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) enic_dev_del_station_addr(enic); for (i = 0; i < enic->wq_count; i++) { @@ -1703,7 +1703,7 @@ static int enic_change_mtu(struct net_device *netdev, int new_mtu) if (new_mtu < ENIC_MIN_MTU || new_mtu > ENIC_MAX_MTU) return -EINVAL; - if (enic_is_dynamic(enic)) + if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) return -EOPNOTSUPP; if (running) @@ -2433,7 +2433,7 @@ static int __devinit enic_probe(struct pci_dev *pdev, * called later by an upper layer. */ - if (!enic_is_dynamic(enic)) { + if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) { err = vnic_dev_init(enic->vdev, 0); if (err) { dev_err(dev, "vNIC dev init failed, aborting\n"); @@ -2467,8 +2467,7 @@ static int __devinit enic_probe(struct pci_dev *pdev, (void)enic_change_mtu(netdev, enic->port_mtu); #ifdef CONFIG_PCI_IOV - if (enic_is_dynamic(enic) && pdev->is_virtfn && - is_zero_ether_addr(enic->mac_addr)) + if (enic_is_sriov_vf(enic) && is_zero_ether_addr(enic->mac_addr)) random_ether_addr(enic->mac_addr); #endif @@ -2481,7 +2480,7 @@ static int __devinit enic_probe(struct pci_dev *pdev, enic->tx_coalesce_usecs = enic->config.intr_timer_usec; enic->rx_coalesce_usecs = enic->tx_coalesce_usecs; - if (enic_is_dynamic(enic)) + if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic)) netdev->netdev_ops = &enic_netdev_dynamic_ops; else netdev->netdev_ops = &enic_netdev_ops; From ca2b721de25b9893310fd695a38b495d14c70ff3 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 18 Jan 2012 04:24:07 +0000 Subject: [PATCH 10/67] enic: rearrange some of the port profile code This patch rearranges some of the port profile code in enic_probe. It moves out some lines of port profile related code currently inside CONFIG_PCI_IOV. This is only done to move all port profile related code together so that it can help isolate the port profile handling code under a separate #ifdef in our internal build scripts. Signed-off-by: Roopa Prabhu Signed-off-by: Christian Benvenuti Signed-off-by: Sujith Sankar Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 9f37d92fc76d..03b0577bd619 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2270,10 +2270,10 @@ static int __devinit enic_probe(struct pci_dev *pdev, int using_dac = 0; unsigned int i; int err; - int num_pps = 1; #ifdef CONFIG_PCI_IOV int pos = 0; #endif + int num_pps; /* Allocate net device structure and initialize. Private * instance data is initialized to zero. @@ -2380,17 +2380,17 @@ static int __devinit enic_probe(struct pci_dev *pdev, goto err_out_vnic_unregister; } enic->priv_flags |= ENIC_SRIOV_ENABLED; - num_pps = enic->num_vfs; } } - #endif + + num_pps = enic->num_vfs ? enic->num_vfs : 1; /* Allocate structure for port profiles */ enic->pp = kcalloc(num_pps, sizeof(*enic->pp), GFP_KERNEL); if (!enic->pp) { pr_err("port profile alloc failed, aborting\n"); err = -ENOMEM; - goto err_out_disable_sriov; + goto err_out_disable_sriov_pp; } /* Issue device open to get device in known state @@ -2399,7 +2399,7 @@ static int __devinit enic_probe(struct pci_dev *pdev, err = enic_dev_open(enic); if (err) { dev_err(dev, "vNIC dev open failed, aborting\n"); - goto err_out_free_pp; + goto err_out_disable_sriov; } /* Setup devcmd lock @@ -2522,9 +2522,9 @@ static int __devinit enic_probe(struct pci_dev *pdev, enic_dev_deinit(enic); err_out_dev_close: vnic_dev_close(enic->vdev); -err_out_free_pp: - kfree(enic->pp); err_out_disable_sriov: + kfree(enic->pp); +err_out_disable_sriov_pp: #ifdef CONFIG_PCI_IOV if (enic_sriov_enabled(enic)) { pci_disable_sriov(pdev); From 35d87e3325e91ec6bc446bf71f50eccc9a7b3b8f Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Wed, 18 Jan 2012 04:24:12 +0000 Subject: [PATCH 11/67] enic: fix location of vnic dev unregister in enic_probe cleanup code The vnic_dev_unregister is erroneously under CONFIG_PCI_IOV. This patch moves it out of CONFIG_PCI_IOV Signed-off-by: Roopa Prabhu Signed-off-by: Christian Benvenuti Signed-off-by: Sujith Sankar Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 03b0577bd619..045c468ac8ef 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2531,8 +2531,8 @@ static int __devinit enic_probe(struct pci_dev *pdev, enic->priv_flags &= ~ENIC_SRIOV_ENABLED; } err_out_vnic_unregister: - vnic_dev_unregister(enic->vdev); #endif + vnic_dev_unregister(enic->vdev); err_out_iounmap: enic_iounmap(enic); err_out_release_regions: From 013d97e9da1877f1334aa8ff3a19921ebbfe99b5 Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Mon, 16 Jan 2012 10:40:10 +0000 Subject: [PATCH 12/67] net: race condition in ipv6 forwarding and disable_ipv6 parameters There is a race condition in addrconf_sysctl_forward() and addrconf_sysctl_disable(). These functions change idev->cnf.forwarding (resp. idev->cnf.disable_ipv6) and then try to grab the rtnl lock before performing any actions. If that fails they restore the original value and restart the syscall. This creates race conditions if ipv6 code tries to access these parameters, or if multiple instances try to do the same operation. As an example of the former, if __ipv6_ifa_notify() finds a 0 in idev->cnf.forwarding when invoked by addrconf_ifdown() it may not free anycast addresses, ultimately resulting in the net_device not being freed. This patch reads the user parameters into a temporary location and only writes the actual parameters when the rtnl lock is acquired. Tested in 2.6.38.8. Signed-off-by: Francesco Ruggeri Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 61 +++++++++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a225d5ee3c2f..c02280a4d126 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -502,29 +502,31 @@ static void addrconf_forward_change(struct net *net, __s32 newf) rcu_read_unlock(); } -static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) +static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) { struct net *net; + int old; + + if (!rtnl_trylock()) + return restart_syscall(); net = (struct net *)table->extra2; - if (p == &net->ipv6.devconf_dflt->forwarding) - return 0; + old = *p; + *p = newf; - if (!rtnl_trylock()) { - /* Restore the original values before restarting */ - *p = old; - return restart_syscall(); + if (p == &net->ipv6.devconf_dflt->forwarding) { + rtnl_unlock(); + return 0; } if (p == &net->ipv6.devconf_all->forwarding) { - __s32 newf = net->ipv6.devconf_all->forwarding; net->ipv6.devconf_dflt->forwarding = newf; addrconf_forward_change(net, newf); - } else if ((!*p) ^ (!old)) + } else if ((!newf) ^ (!old)) dev_forward_change((struct inet6_dev *)table->extra1); rtnl_unlock(); - if (*p) + if (newf) rt6_purge_dflt_routers(net); return 1; } @@ -4260,9 +4262,17 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, int *valp = ctl->data; int val = *valp; loff_t pos = *ppos; + ctl_table lctl; int ret; - ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + /* + * ctl->data points to idev->cnf.forwarding, we should + * not modify it until we get the rtnl lock. + */ + lctl = *ctl; + lctl.data = &val; + + ret = proc_dointvec(&lctl, write, buffer, lenp, ppos); if (write) ret = addrconf_fixup_forwarding(ctl, valp, val); @@ -4300,26 +4310,27 @@ static void addrconf_disable_change(struct net *net, __s32 newf) rcu_read_unlock(); } -static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old) +static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) { struct net *net; + int old; + + if (!rtnl_trylock()) + return restart_syscall(); net = (struct net *)table->extra2; + old = *p; + *p = newf; - if (p == &net->ipv6.devconf_dflt->disable_ipv6) + if (p == &net->ipv6.devconf_dflt->disable_ipv6) { + rtnl_unlock(); return 0; - - if (!rtnl_trylock()) { - /* Restore the original values before restarting */ - *p = old; - return restart_syscall(); } if (p == &net->ipv6.devconf_all->disable_ipv6) { - __s32 newf = net->ipv6.devconf_all->disable_ipv6; net->ipv6.devconf_dflt->disable_ipv6 = newf; addrconf_disable_change(net, newf); - } else if ((!*p) ^ (!old)) + } else if ((!newf) ^ (!old)) dev_disable_change((struct inet6_dev *)table->extra1); rtnl_unlock(); @@ -4333,9 +4344,17 @@ int addrconf_sysctl_disable(ctl_table *ctl, int write, int *valp = ctl->data; int val = *valp; loff_t pos = *ppos; + ctl_table lctl; int ret; - ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + /* + * ctl->data points to idev->cnf.disable_ipv6, we should + * not modify it until we get the rtnl lock. + */ + lctl = *ctl; + lctl.data = &val; + + ret = proc_dointvec(&lctl, write, buffer, lenp, ppos); if (write) ret = addrconf_disable_ipv6(ctl, valp, val); From 2b53d07891630dead46d65c8f896955fd3ae0302 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Mon, 16 Jan 2012 09:50:19 +0000 Subject: [PATCH 13/67] pch_gbe: Do not abort probe on bad MAC If the MAC is invalid or not implemented, do not abort the probe. Issue a warning and prevent bringing the interface up until a MAC is set manually (via ifconfig $IFACE hw ether $MAC). Tested on two platforms, one with a valid MAC, the other without a MAC. The real MAC is used if present, the interface fails to come up until the MAC is set on the other. They successfully get an IP over DHCP and pass a simple ping and login over ssh test. This is meant to allow the Inforce SYS940X development board: http://www.inforcecomputing.com/SYS940X_ECX.html (and others suffering from a missing MAC) to work with the mainline kernel. Without this patch, the probe will fail and the interface will not be created, preventing the user from configuring the MAC manually. This does not make any attempt to address a missing or invalid MAC for the pch_phub driver. Signed-off-by: Darren Hart CC: Arjan van de Ven CC: Alan Cox CC: Tomoya MORINAGA CC: Jeff Kirsher CC: "David S. Miller" CC: Paul Gortmaker CC: Jon Mason CC: netdev@vger.kernel.org CC: Mark Brown CC: David Laight CC: Joe Perches Signed-off-by: David S. Miller --- .../ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index 964e9c0948bc..3ead111111e1 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -1745,6 +1745,12 @@ int pch_gbe_up(struct pch_gbe_adapter *adapter) struct pch_gbe_rx_ring *rx_ring = adapter->rx_ring; int err; + /* Ensure we have a valid MAC */ + if (!is_valid_ether_addr(adapter->hw.mac.addr)) { + pr_err("Error: Invalid MAC address\n"); + return -EINVAL; + } + /* hardware has been reset, we need to reload some things */ pch_gbe_set_multi(netdev); @@ -2468,9 +2474,14 @@ static int pch_gbe_probe(struct pci_dev *pdev, memcpy(netdev->dev_addr, adapter->hw.mac.addr, netdev->addr_len); if (!is_valid_ether_addr(netdev->dev_addr)) { - dev_err(&pdev->dev, "Invalid MAC Address\n"); - ret = -EIO; - goto err_free_adapter; + /* + * If the MAC is invalid (or just missing), display a warning + * but do not abort setting up the device. pch_gbe_up will + * prevent the interface from being brought up until a valid MAC + * is set. + */ + dev_err(&pdev->dev, "Invalid MAC address, " + "interface disabled.\n"); } setup_timer(&adapter->watchdog_timer, pch_gbe_watchdog, (unsigned long)adapter); From b924551bed09f61b64f21bffe241afc5526b091a Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Wed, 18 Jan 2012 12:24:54 +0000 Subject: [PATCH 14/67] bonding: fix enslaving in alb mode when link down bond_alb_init_slave() is called from bond_enslave() and sets the slave's MAC address. This is done differently for TLB and ALB modes. bond->alb_info.rlb_enabled is used to discriminate between the two modes but this flag may be uninitialized if the slave is being enslaved prior to calling bond_open() -> bond_alb_initialize() on the master. It turns out all the callers of alb_set_slave_mac_addr() pass bond->alb_info.rlb_enabled as the hw parameter. This patch cleans up the unnecessary parameter of alb_set_slave_mac_addr() and makes the function decide based on the bonding mode instead, which fixes the above problem. Reported-by: Narendra K Signed-off-by: Jiri Bohac Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_alb.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 342626f4bc46..f820b26b9db3 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -909,16 +909,12 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) } } -/* hw is a boolean parameter that determines whether we should try and - * set the hw address of the device as well as the hw address of the - * net_device - */ -static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[], int hw) +static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[]) { struct net_device *dev = slave->dev; struct sockaddr s_addr; - if (!hw) { + if (slave->bond->params.mode == BOND_MODE_TLB) { memcpy(dev->dev_addr, addr, dev->addr_len); return 0; } @@ -948,8 +944,8 @@ static void alb_swap_mac_addr(struct bonding *bond, struct slave *slave1, struct u8 tmp_mac_addr[ETH_ALEN]; memcpy(tmp_mac_addr, slave1->dev->dev_addr, ETH_ALEN); - alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr, bond->alb_info.rlb_enabled); - alb_set_slave_mac_addr(slave2, tmp_mac_addr, bond->alb_info.rlb_enabled); + alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr); + alb_set_slave_mac_addr(slave2, tmp_mac_addr); } @@ -1096,8 +1092,7 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav /* Try setting slave mac to bond address and fall-through to code handling that situation below... */ - alb_set_slave_mac_addr(slave, bond->dev->dev_addr, - bond->alb_info.rlb_enabled); + alb_set_slave_mac_addr(slave, bond->dev->dev_addr); } /* The slave's address is equal to the address of the bond. @@ -1133,8 +1128,7 @@ static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slav } if (free_mac_slave) { - alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr, - bond->alb_info.rlb_enabled); + alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr); pr_warning("%s: Warning: the hw address of slave %s is in use by the bond; giving it the hw address of %s\n", bond->dev->name, slave->dev->name, @@ -1491,8 +1485,7 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave) { int res; - res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr, - bond->alb_info.rlb_enabled); + res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr); if (res) { return res; } @@ -1643,8 +1636,7 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave alb_swap_mac_addr(bond, swap_slave, new_slave); } else { /* set the new_slave to the bond mac address */ - alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr, - bond->alb_info.rlb_enabled); + alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr); } if (swap_slave) { @@ -1704,8 +1696,7 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr) alb_swap_mac_addr(bond, swap_slave, bond->curr_active_slave); alb_fasten_mac_swap(bond, swap_slave, bond->curr_active_slave); } else { - alb_set_slave_mac_addr(bond->curr_active_slave, bond_dev->dev_addr, - bond->alb_info.rlb_enabled); + alb_set_slave_mac_addr(bond->curr_active_slave, bond_dev->dev_addr); read_lock(&bond->lock); alb_send_learning_packets(bond->curr_active_slave, bond_dev->dev_addr); From 17f1bbca1baf2f652fa9c102ec239d6abded94c1 Mon Sep 17 00:00:00 2001 From: Thomas Faber Date: Wed, 18 Jan 2012 13:45:44 +0000 Subject: [PATCH 15/67] net: ftgmac100/ftmac100: add missing interrupt.h include Fixes compilation failure of these modules due to missing irqreturn_t type for the ft(g)mac100_interrupt definition. Signed-off-by: Thomas Faber Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 1 + drivers/net/ethernet/faraday/ftmac100.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index fb5579a3b19d..47f85c337cf7 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c index a127cb2476c7..bb336a0959c9 100644 --- a/drivers/net/ethernet/faraday/ftmac100.c +++ b/drivers/net/ethernet/faraday/ftmac100.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include From c50b52a0c7ec8522983f5021c0b0952b4d678adf Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 18 Jan 2012 22:13:26 +0000 Subject: [PATCH 16/67] igb: make local functions static Sparse caught two functions that were only being used in one file. Signed-off-by: Stephen Hemminger Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/e1000_mac.c | 2 +- drivers/net/ethernet/intel/igb/igb_main.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c index 73aac082c44d..36450d35f621 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.c +++ b/drivers/net/ethernet/intel/igb/e1000_mac.c @@ -151,7 +151,7 @@ void igb_clear_vfta_i350(struct e1000_hw *hw) * Writes value at the given offset in the register array which stores * the VLAN filter table. **/ -void igb_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value) +static void igb_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value) { int i; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 01e5e89ef959..92fd642b2dfe 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4003,8 +4003,8 @@ static void igb_set_itr(struct igb_q_vector *q_vector) } } -void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens, - u32 type_tucmd, u32 mss_l4len_idx) +static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens, + u32 type_tucmd, u32 mss_l4len_idx) { struct e1000_adv_tx_context_desc *context_desc; u16 i = tx_ring->next_to_use; @@ -5623,7 +5623,7 @@ static irqreturn_t igb_intr(int irq, void *data) return IRQ_HANDLED; } -void igb_ring_irq_enable(struct igb_q_vector *q_vector) +static void igb_ring_irq_enable(struct igb_q_vector *q_vector) { struct igb_adapter *adapter = q_vector->adapter; struct e1000_hw *hw = &adapter->hw; From 6e861326b1d78bb439c0724864a6ca83ec23d289 Mon Sep 17 00:00:00 2001 From: Carolyn Wyborny Date: Wed, 18 Jan 2012 22:13:27 +0000 Subject: [PATCH 17/67] igb: Update Copyright on all Intel copyrighted files. Signed-off-by: Carolyn Wyborny Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igb/Makefile | 2 +- drivers/net/ethernet/intel/igb/e1000_82575.c | 2 +- drivers/net/ethernet/intel/igb/e1000_82575.h | 2 +- drivers/net/ethernet/intel/igb/e1000_defines.h | 2 +- drivers/net/ethernet/intel/igb/e1000_hw.h | 2 +- drivers/net/ethernet/intel/igb/e1000_mac.c | 2 +- drivers/net/ethernet/intel/igb/e1000_mac.h | 2 +- drivers/net/ethernet/intel/igb/e1000_mbx.c | 2 +- drivers/net/ethernet/intel/igb/e1000_mbx.h | 2 +- drivers/net/ethernet/intel/igb/e1000_nvm.c | 2 +- drivers/net/ethernet/intel/igb/e1000_nvm.h | 2 +- drivers/net/ethernet/intel/igb/e1000_phy.c | 2 +- drivers/net/ethernet/intel/igb/e1000_phy.h | 2 +- drivers/net/ethernet/intel/igb/e1000_regs.h | 2 +- drivers/net/ethernet/intel/igb/igb.h | 2 +- drivers/net/ethernet/intel/igb/igb_ethtool.c | 2 +- drivers/net/ethernet/intel/igb/igb_main.c | 4 ++-- 17 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/Makefile b/drivers/net/ethernet/intel/igb/Makefile index c6e4621b6262..6565c463185c 100644 --- a/drivers/net/ethernet/intel/igb/Makefile +++ b/drivers/net/ethernet/intel/igb/Makefile @@ -1,7 +1,7 @@ ################################################################################ # # Intel 82575 PCI-Express Ethernet Linux driver -# Copyright(c) 1999 - 2011 Intel Corporation. +# Copyright(c) 1999 - 2012 Intel Corporation. # # This program is free software; you can redistribute it and/or modify it # under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index b8e20f037d0a..08bdc33715ee 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2011 Intel Corporation. + Copyright(c) 2007-2012 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h index 08a757eb6608..b927d79ab536 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.h +++ b/drivers/net/ethernet/intel/igb/e1000_82575.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2011 Intel Corporation. + Copyright(c) 2007-2012 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index f5fc5725ea94..aed217449f0d 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2011 Intel Corporation. + Copyright(c) 2007-2012 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h index 4519a1367170..f67cbd3fa307 100644 --- a/drivers/net/ethernet/intel/igb/e1000_hw.h +++ b/drivers/net/ethernet/intel/igb/e1000_hw.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2011 Intel Corporation. + Copyright(c) 2007-2012 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c index 36450d35f621..f57338afd71f 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.c +++ b/drivers/net/ethernet/intel/igb/e1000_mac.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2011 Intel Corporation. + Copyright(c) 2007-2012 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.h b/drivers/net/ethernet/intel/igb/e1000_mac.h index e45996b4ea34..cbddc4e51e30 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mac.h +++ b/drivers/net/ethernet/intel/igb/e1000_mac.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2011 Intel Corporation. + Copyright(c) 2007-2012 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.c b/drivers/net/ethernet/intel/igb/e1000_mbx.c index 469d95eaa154..5988b8958baf 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mbx.c +++ b/drivers/net/ethernet/intel/igb/e1000_mbx.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2011 Intel Corporation. + Copyright(c) 2007-2012 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.h b/drivers/net/ethernet/intel/igb/e1000_mbx.h index eddb0f83dcea..dbcfa3d5caec 100644 --- a/drivers/net/ethernet/intel/igb/e1000_mbx.h +++ b/drivers/net/ethernet/intel/igb/e1000_mbx.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2011 Intel Corporation. + Copyright(c) 2007-2012 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.c b/drivers/net/ethernet/intel/igb/e1000_nvm.c index 40407124e722..fa2c6ba62139 100644 --- a/drivers/net/ethernet/intel/igb/e1000_nvm.c +++ b/drivers/net/ethernet/intel/igb/e1000_nvm.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2011 Intel Corporation. + Copyright(c) 2007-2012 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.h b/drivers/net/ethernet/intel/igb/e1000_nvm.h index a2a7ca9fa733..825b0228cac0 100644 --- a/drivers/net/ethernet/intel/igb/e1000_nvm.h +++ b/drivers/net/ethernet/intel/igb/e1000_nvm.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2011 Intel Corporation. + Copyright(c) 2012 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index b17d7c20f817..789de5b83aad 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2011 Intel Corporation. + Copyright(c) 2007-2012 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.h b/drivers/net/ethernet/intel/igb/e1000_phy.h index 8510797b9d81..4c32ac66ff39 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.h +++ b/drivers/net/ethernet/intel/igb/e1000_phy.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2011 Intel Corporation. + Copyright(c) 2007-2012 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 0a860bc1198e..ccdf36d503fd 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2011 Intel Corporation. + Copyright(c) 2007-2012 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 3d12e67eebb4..8e33bdd33eea 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2011 Intel Corporation. + Copyright(c) 2007-2012 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 7998bf4d5946..aa399a8a8f0d 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2011 Intel Corporation. + Copyright(c) 2007-2012 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 92fd642b2dfe..e91d73c8aa4e 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2011 Intel Corporation. + Copyright(c) 2007-2012 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -68,7 +68,7 @@ char igb_driver_name[] = "igb"; char igb_driver_version[] = DRV_VERSION; static const char igb_driver_string[] = "Intel(R) Gigabit Ethernet Network Driver"; -static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation."; +static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation."; static const struct e1000_info *igb_info_tbl[] = { [board_82575] = &e1000_82575_info, From b2f0f6bb874df645766930e0f023a208d93c12cd Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 18 Jan 2012 22:13:28 +0000 Subject: [PATCH 18/67] igbvf: remove unneeded cast The cast and comment are unnecessary in the current upstream kernel. Signed-off-by: Stephen Hemminger Tested-by: Garrett, RobertX E Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igbvf/ethtool.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igbvf/ethtool.c b/drivers/net/ethernet/intel/igbvf/ethtool.c index 7b600a1f6366..2dba53446064 100644 --- a/drivers/net/ethernet/intel/igbvf/ethtool.c +++ b/drivers/net/ethernet/intel/igbvf/ethtool.c @@ -468,6 +468,5 @@ static const struct ethtool_ops igbvf_ethtool_ops = { void igbvf_set_ethtool_ops(struct net_device *netdev) { - /* have to "undeclare" const on this struct to remove warnings */ - SET_ETHTOOL_OPS(netdev, (struct ethtool_ops *)&igbvf_ethtool_ops); + SET_ETHTOOL_OPS(netdev, &igbvf_ethtool_ops); } From 56b68960ce996db7947440179cc153652a504e9a Mon Sep 17 00:00:00 2001 From: Mitch A Williams Date: Wed, 18 Jan 2012 22:13:29 +0000 Subject: [PATCH 19/67] igbvf: Remove unnecessary irq disable/enable This irq disable/enable pair used to wrap access to the driver's vlgrp struct, which is no longer present. So, then, this could also so no longer be present. Signed-off-by: Mitch Williams Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/igbvf/netdev.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index fd3da3076c2f..a4b20c865759 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -1194,11 +1194,6 @@ static int igbvf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) struct igbvf_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - igbvf_irq_disable(adapter); - - if (!test_bit(__IGBVF_DOWN, &adapter->state)) - igbvf_irq_enable(adapter); - if (hw->mac.ops.set_vfta(hw, vid, false)) { dev_err(&adapter->pdev->dev, "Failed to remove vlan id %d\n", vid); From 795be954dcc4a72329c8591aafe5eadb03b264af Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 18 Jan 2012 22:13:30 +0000 Subject: [PATCH 20/67] ixgbe: Fix register defines to correctly handle complex expressions This patch is meant to address possible issues with the IXGBE register defines generating incorrect values when given a complex expression for the register offset. Signed-off-by: Alexander Duyck Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 802bfa0f62cc..775602ef90e5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -161,19 +161,19 @@ /* Receive DMA Registers */ #define IXGBE_RDBAL(_i) (((_i) < 64) ? (0x01000 + ((_i) * 0x40)) : \ - (0x0D000 + ((_i - 64) * 0x40))) + (0x0D000 + (((_i) - 64) * 0x40))) #define IXGBE_RDBAH(_i) (((_i) < 64) ? (0x01004 + ((_i) * 0x40)) : \ - (0x0D004 + ((_i - 64) * 0x40))) + (0x0D004 + (((_i) - 64) * 0x40))) #define IXGBE_RDLEN(_i) (((_i) < 64) ? (0x01008 + ((_i) * 0x40)) : \ - (0x0D008 + ((_i - 64) * 0x40))) + (0x0D008 + (((_i) - 64) * 0x40))) #define IXGBE_RDH(_i) (((_i) < 64) ? (0x01010 + ((_i) * 0x40)) : \ - (0x0D010 + ((_i - 64) * 0x40))) + (0x0D010 + (((_i) - 64) * 0x40))) #define IXGBE_RDT(_i) (((_i) < 64) ? (0x01018 + ((_i) * 0x40)) : \ - (0x0D018 + ((_i - 64) * 0x40))) + (0x0D018 + (((_i) - 64) * 0x40))) #define IXGBE_RXDCTL(_i) (((_i) < 64) ? (0x01028 + ((_i) * 0x40)) : \ - (0x0D028 + ((_i - 64) * 0x40))) + (0x0D028 + (((_i) - 64) * 0x40))) #define IXGBE_RSCCTL(_i) (((_i) < 64) ? (0x0102C + ((_i) * 0x40)) : \ - (0x0D02C + ((_i - 64) * 0x40))) + (0x0D02C + (((_i) - 64) * 0x40))) #define IXGBE_RSCDBU 0x03028 #define IXGBE_RDDCC 0x02F20 #define IXGBE_RXMEMWRAP 0x03190 @@ -186,7 +186,7 @@ */ #define IXGBE_SRRCTL(_i) (((_i) <= 15) ? (0x02100 + ((_i) * 4)) : \ (((_i) < 64) ? (0x01014 + ((_i) * 0x40)) : \ - (0x0D014 + ((_i - 64) * 0x40)))) + (0x0D014 + (((_i) - 64) * 0x40)))) /* * Rx DCA Control Register: * 00-15 : 0x02200 + n*4 @@ -195,7 +195,7 @@ */ #define IXGBE_DCA_RXCTRL(_i) (((_i) <= 15) ? (0x02200 + ((_i) * 4)) : \ (((_i) < 64) ? (0x0100C + ((_i) * 0x40)) : \ - (0x0D00C + ((_i - 64) * 0x40)))) + (0x0D00C + (((_i) - 64) * 0x40)))) #define IXGBE_RDRXCTL 0x02F00 #define IXGBE_RXPBSIZE(_i) (0x03C00 + ((_i) * 4)) /* 8 of these 0x03C00 - 0x03C1C */ @@ -344,9 +344,9 @@ #define IXGBE_WUPL 0x05900 #define IXGBE_WUPM 0x05A00 /* wake up pkt memory 0x5A00-0x5A7C */ -#define IXGBE_FHFT(_n) (0x09000 + (_n * 0x100)) /* Flex host filter table */ -#define IXGBE_FHFT_EXT(_n) (0x09800 + (_n * 0x100)) /* Ext Flexible Host - * Filter Table */ +#define IXGBE_FHFT(_n) (0x09000 + ((_n) * 0x100)) /* Flex host filter table */ +#define IXGBE_FHFT_EXT(_n) (0x09800 + ((_n) * 0x100)) /* Ext Flexible Host + * Filter Table */ #define IXGBE_FLEXIBLE_FILTER_COUNT_MAX 4 #define IXGBE_EXT_FLEXIBLE_FILTER_COUNT_MAX 2 @@ -1485,7 +1485,7 @@ enum { #define IXGBE_LED_BLINK_BASE 0x00000080 #define IXGBE_LED_MODE_MASK_BASE 0x0000000F #define IXGBE_LED_OFFSET(_base, _i) (_base << (8 * (_i))) -#define IXGBE_LED_MODE_SHIFT(_i) (8*(_i)) +#define IXGBE_LED_MODE_SHIFT(_i) (8 * (_i)) #define IXGBE_LED_IVRT(_i) IXGBE_LED_OFFSET(IXGBE_LED_IVRT_BASE, _i) #define IXGBE_LED_BLINK(_i) IXGBE_LED_OFFSET(IXGBE_LED_BLINK_BASE, _i) #define IXGBE_LED_MODE_MASK(_i) IXGBE_LED_OFFSET(IXGBE_LED_MODE_MASK_BASE, _i) @@ -2068,9 +2068,9 @@ enum { /* SR-IOV specific macros */ #define IXGBE_MBVFICR_INDEX(vf_number) (vf_number >> 4) -#define IXGBE_MBVFICR(_i) (0x00710 + (_i * 4)) -#define IXGBE_VFLRE(_i) (((_i & 1) ? 0x001C0 : 0x00600)) -#define IXGBE_VFLREC(_i) (0x00700 + (_i * 4)) +#define IXGBE_MBVFICR(_i) (0x00710 + ((_i) * 4)) +#define IXGBE_VFLRE(_i) ((((_i) & 1) ? 0x001C0 : 0x00600)) +#define IXGBE_VFLREC(_i) (0x00700 + ((_i) * 4)) enum ixgbe_fdir_pballoc_type { IXGBE_FDIR_PBALLOC_NONE = 0, From 375b27cf5db963e2bc2a34dc5643d0d7ceca1ee2 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 18 Jan 2012 22:13:31 +0000 Subject: [PATCH 21/67] ixgbevf: Prevent possible race condition by checking for message The mailbox interrupt routine might cause a race condition sometimes and cause a message to be missed. Signed-off-by: Greg Rose Tested-by: Sibai Li Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 39 ++++++++++--------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 891162d1610c..1cad3b6d2c30 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -917,31 +917,34 @@ static irqreturn_t ixgbevf_msix_mbx(int irq, void *data) struct ixgbe_hw *hw = &adapter->hw; u32 eicr; u32 msg; + bool got_ack = false; eicr = IXGBE_READ_REG(hw, IXGBE_VTEICS); IXGBE_WRITE_REG(hw, IXGBE_VTEICR, eicr); - if (!hw->mbx.ops.check_for_ack(hw)) { - /* - * checking for the ack clears the PFACK bit. Place - * it back in the v2p_mailbox cache so that anyone - * polling for an ack will not miss it. Also - * avoid the read below because the code to read - * the mailbox will also clear the ack bit. This was - * causing lost acks. Just cache the bit and exit - * the IRQ handler. - */ - hw->mbx.v2p_mailbox |= IXGBE_VFMAILBOX_PFACK; + if (!hw->mbx.ops.check_for_ack(hw)) + got_ack = true; + + if (!hw->mbx.ops.check_for_msg(hw)) { + hw->mbx.ops.read(hw, &msg, 1); + + if ((msg & IXGBE_MBVFICR_VFREQ_MASK) == IXGBE_PF_CONTROL_MSG) + mod_timer(&adapter->watchdog_timer, + round_jiffies(jiffies + 1)); + + if (msg & IXGBE_VT_MSGTYPE_NACK) + pr_warn("Last Request of type %2.2x to PF Nacked\n", + msg & 0xFF); goto out; } - /* Not an ack interrupt, go ahead and read the message */ - hw->mbx.ops.read(hw, &msg, 1); - - if ((msg & IXGBE_MBVFICR_VFREQ_MASK) == IXGBE_PF_CONTROL_MSG) - mod_timer(&adapter->watchdog_timer, - round_jiffies(jiffies + 1)); - + /* + * checking for the ack clears the PFACK bit. Place + * it back in the v2p_mailbox cache so that anyone + * polling for an ack will not miss it + */ + if (got_ack) + hw->mbx.v2p_mailbox |= IXGBE_VFMAILBOX_PFACK; out: return IRQ_HANDLED; } From b47aca135d6df5f676c768368baf3c099f054fcd Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 18 Jan 2012 22:13:32 +0000 Subject: [PATCH 22/67] ixgbevf: make ethtool ops and strings const Signed-off-by: Stephen Hemminger Tested-by: Sibai Li Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbevf/ethtool.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index dc8e6511c640..c85700318147 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -56,7 +56,8 @@ struct ixgbe_stats { offsetof(struct ixgbevf_adapter, m), \ offsetof(struct ixgbevf_adapter, b), \ offsetof(struct ixgbevf_adapter, r) -static struct ixgbe_stats ixgbe_gstrings_stats[] = { + +static const struct ixgbe_stats ixgbe_gstrings_stats[] = { {"rx_packets", IXGBEVF_STAT(stats.vfgprc, stats.base_vfgprc, stats.saved_reset_vfgprc)}, {"tx_packets", IXGBEVF_STAT(stats.vfgptc, stats.base_vfgptc, @@ -671,7 +672,7 @@ static int ixgbevf_nway_reset(struct net_device *netdev) return 0; } -static struct ethtool_ops ixgbevf_ethtool_ops = { +static const struct ethtool_ops ixgbevf_ethtool_ops = { .get_settings = ixgbevf_get_settings, .get_drvinfo = ixgbevf_get_drvinfo, .get_regs_len = ixgbevf_get_regs_len, From b5417bf8e8952401bca69ded67c30ead484af823 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 18 Jan 2012 22:13:33 +0000 Subject: [PATCH 23/67] ixgbevf: fix sparse warnings Fixes sparse warnings: drivers/net/ethernet/intel/ixgbevf/vf.c:418:21: warning: symbol 'ixgbevf_82599_vf_info' was not declared. Should it be static? drivers/net/ethernet/intel/ixgbevf/vf.c:423:21: warning: symbol 'ixgbevf_X540_vf_info' was not declared. Should it be static? drivers/net/ethernet/intel/ixgbevf/mbx.c:331:29: warning: symbol 'ixgbevf_mbx_ops' was not declared. Should it be static? Signed-off-by: Stephen Hemminger Tested-by: Sibai Li Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 2 +- drivers/net/ethernet/intel/ixgbevf/mbx.c | 3 ++- drivers/net/ethernet/intel/ixgbevf/vf.c | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index e6c9d1a927a9..74b22ee497e7 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -281,7 +281,7 @@ enum ixgbevf_boards { extern struct ixgbevf_info ixgbevf_82599_vf_info; extern struct ixgbevf_info ixgbevf_X540_vf_info; -extern struct ixgbe_mbx_operations ixgbevf_mbx_ops; +extern const struct ixgbe_mbx_operations ixgbevf_mbx_ops; /* needed by ethtool.c */ extern char ixgbevf_driver_name[]; diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.c b/drivers/net/ethernet/intel/ixgbevf/mbx.c index 930fa83f2568..13532d9ba72d 100644 --- a/drivers/net/ethernet/intel/ixgbevf/mbx.c +++ b/drivers/net/ethernet/intel/ixgbevf/mbx.c @@ -26,6 +26,7 @@ *******************************************************************************/ #include "mbx.h" +#include "ixgbevf.h" /** * ixgbevf_poll_for_msg - Wait for message notification @@ -328,7 +329,7 @@ static s32 ixgbevf_init_mbx_params_vf(struct ixgbe_hw *hw) return 0; } -struct ixgbe_mbx_operations ixgbevf_mbx_ops = { +const struct ixgbe_mbx_operations ixgbevf_mbx_ops = { .init_params = ixgbevf_init_mbx_params_vf, .read = ixgbevf_read_mbx_vf, .write = ixgbevf_write_mbx_vf, diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index 21533e300367..5c8c23f8346f 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -26,6 +26,7 @@ *******************************************************************************/ #include "vf.h" +#include "ixgbevf.h" /** * ixgbevf_start_hw_vf - Prepare hardware for Tx/Rx From 3d8fe98f8d5cb303d907d8f94ea6dc1f9a1d8b7a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 18 Jan 2012 22:13:34 +0000 Subject: [PATCH 24/67] ixgbevf: make operations tables const The arrays of function pointers should be const to make life harder for rootkits. Signed-off-by: Stephen Hemminger Tested-by: Sibai Li Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 6 +++--- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 +- drivers/net/ethernet/intel/ixgbevf/vf.c | 6 +++--- drivers/net/ethernet/intel/ixgbevf/vf.h | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 74b22ee497e7..9075c1d61039 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -279,12 +279,12 @@ enum ixgbevf_boards { board_X540_vf, }; -extern struct ixgbevf_info ixgbevf_82599_vf_info; -extern struct ixgbevf_info ixgbevf_X540_vf_info; +extern const struct ixgbevf_info ixgbevf_82599_vf_info; +extern const struct ixgbevf_info ixgbevf_X540_vf_info; extern const struct ixgbe_mbx_operations ixgbevf_mbx_ops; /* needed by ethtool.c */ -extern char ixgbevf_driver_name[]; +extern const char ixgbevf_driver_name[]; extern const char ixgbevf_driver_version[]; extern int ixgbevf_up(struct ixgbevf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 1cad3b6d2c30..bed411bada21 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -53,7 +53,7 @@ #include "ixgbevf.h" -char ixgbevf_driver_name[] = "ixgbevf"; +const char ixgbevf_driver_name[] = "ixgbevf"; static const char ixgbevf_driver_string[] = "Intel(R) 10 Gigabit PCI Express Virtual Function Network Driver"; diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index 5c8c23f8346f..d0138d7a31a1 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -402,7 +402,7 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw, return 0; } -static struct ixgbe_mac_operations ixgbevf_mac_ops = { +static const struct ixgbe_mac_operations ixgbevf_mac_ops = { .init_hw = ixgbevf_init_hw_vf, .reset_hw = ixgbevf_reset_hw_vf, .start_hw = ixgbevf_start_hw_vf, @@ -416,12 +416,12 @@ static struct ixgbe_mac_operations ixgbevf_mac_ops = { .set_vfta = ixgbevf_set_vfta_vf, }; -struct ixgbevf_info ixgbevf_82599_vf_info = { +const struct ixgbevf_info ixgbevf_82599_vf_info = { .mac = ixgbe_mac_82599_vf, .mac_ops = &ixgbevf_mac_ops, }; -struct ixgbevf_info ixgbevf_X540_vf_info = { +const struct ixgbevf_info ixgbevf_X540_vf_info = { .mac = ixgbe_mac_X540_vf, .mac_ops = &ixgbevf_mac_ops, }; diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h index 10306b492ee6..d556619a9212 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.h +++ b/drivers/net/ethernet/intel/ixgbevf/vf.h @@ -167,7 +167,7 @@ struct ixgbevf_hw_stats { struct ixgbevf_info { enum ixgbe_mac_type mac; - struct ixgbe_mac_operations *mac_ops; + const struct ixgbe_mac_operations *mac_ops; }; #endif /* __IXGBE_VF_H__ */ From b67f231ded332461dd31123c4f659c4681223fb1 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Thu, 19 Jan 2012 22:25:36 +0000 Subject: [PATCH 25/67] enic: fix compile when CONFIG_PCI_IOV is not enabled reverting back change that access enic->num_vfs outside CONFIG_PCI_IOV Reported-by: Randy Dunlap Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 045c468ac8ef..ab3f67f980d8 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -2273,7 +2273,7 @@ static int __devinit enic_probe(struct pci_dev *pdev, #ifdef CONFIG_PCI_IOV int pos = 0; #endif - int num_pps; + int num_pps = 1; /* Allocate net device structure and initialize. Private * instance data is initialized to zero. @@ -2380,11 +2380,11 @@ static int __devinit enic_probe(struct pci_dev *pdev, goto err_out_vnic_unregister; } enic->priv_flags |= ENIC_SRIOV_ENABLED; + num_pps = enic->num_vfs; } } #endif - num_pps = enic->num_vfs ? enic->num_vfs : 1; /* Allocate structure for port profiles */ enic->pp = kcalloc(num_pps, sizeof(*enic->pp), GFP_KERNEL); if (!enic->pp) { From fc16dcd8c2e1e9bc91ed765957e1f2bbf334253e Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 18 Jan 2012 17:47:58 +0000 Subject: [PATCH 26/67] tcp: fix undo after RTO for BIC This patch fixes BIC so that cwnd reductions made during RTOs can be undone (just as they already can be undone when using the default/Reno behavior). When undoing cwnd reductions, BIC-derived congestion control modules were restoring the cwnd from last_max_cwnd. There were two problems with using last_max_cwnd to restore a cwnd during undo: (a) last_max_cwnd was set to 0 on state transitions into TCP_CA_Loss (by calling the module's reset() functions), so cwnd reductions from RTOs could not be undone. (b) when fast_covergence is enabled (which it is by default) last_max_cwnd does not actually hold the value of snd_cwnd before the loss; instead, it holds a scaled-down version of snd_cwnd. This patch makes the following changes: (1) upon undo, revert snd_cwnd to ca->loss_cwnd, which is already, as the existing comment notes, the "congestion window at last loss" (2) stop forgetting ca->loss_cwnd on TCP_CA_Loss events (3) use ca->last_max_cwnd to check if we're in slow start Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_bic.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 6187eb4d1dcf..f45e1c242440 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -63,7 +63,6 @@ static inline void bictcp_reset(struct bictcp *ca) { ca->cnt = 0; ca->last_max_cwnd = 0; - ca->loss_cwnd = 0; ca->last_cwnd = 0; ca->last_time = 0; ca->epoch_start = 0; @@ -72,7 +71,11 @@ static inline void bictcp_reset(struct bictcp *ca) static void bictcp_init(struct sock *sk) { - bictcp_reset(inet_csk_ca(sk)); + struct bictcp *ca = inet_csk_ca(sk); + + bictcp_reset(ca); + ca->loss_cwnd = 0; + if (initial_ssthresh) tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } @@ -127,7 +130,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) } /* if in slow start or link utilization is very low */ - if (ca->loss_cwnd == 0) { + if (ca->last_max_cwnd == 0) { if (ca->cnt > 20) /* increase cwnd 5% per RTT */ ca->cnt = 20; } @@ -185,7 +188,7 @@ static u32 bictcp_undo_cwnd(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); const struct bictcp *ca = inet_csk_ca(sk); - return max(tp->snd_cwnd, ca->last_max_cwnd); + return max(tp->snd_cwnd, ca->loss_cwnd); } static void bictcp_state(struct sock *sk, u8 new_state) From 5a45f0086a2dcf50db7e6a0bf5be933880f85127 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Wed, 18 Jan 2012 17:47:59 +0000 Subject: [PATCH 27/67] tcp: fix undo after RTO for CUBIC This patch fixes CUBIC so that cwnd reductions made during RTOs can be undone (just as they already can be undone when using the default/Reno behavior). When undoing cwnd reductions, BIC-derived congestion control modules were restoring the cwnd from last_max_cwnd. There were two problems with using last_max_cwnd to restore a cwnd during undo: (a) last_max_cwnd was set to 0 on state transitions into TCP_CA_Loss (by calling the module's reset() functions), so cwnd reductions from RTOs could not be undone. (b) when fast_covergence is enabled (which it is by default) last_max_cwnd does not actually hold the value of snd_cwnd before the loss; instead, it holds a scaled-down version of snd_cwnd. This patch makes the following changes: (1) upon undo, revert snd_cwnd to ca->loss_cwnd, which is already, as the existing comment notes, the "congestion window at last loss" (2) stop forgetting ca->loss_cwnd on TCP_CA_Loss events (3) use ca->last_max_cwnd to check if we're in slow start Signed-off-by: Neal Cardwell Acked-by: Stephen Hemminger Acked-by: Sangtae Ha Signed-off-by: David S. Miller --- net/ipv4/tcp_cubic.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index f376b05cca81..a9077f441cb2 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -107,7 +107,6 @@ static inline void bictcp_reset(struct bictcp *ca) { ca->cnt = 0; ca->last_max_cwnd = 0; - ca->loss_cwnd = 0; ca->last_cwnd = 0; ca->last_time = 0; ca->bic_origin_point = 0; @@ -142,7 +141,10 @@ static inline void bictcp_hystart_reset(struct sock *sk) static void bictcp_init(struct sock *sk) { - bictcp_reset(inet_csk_ca(sk)); + struct bictcp *ca = inet_csk_ca(sk); + + bictcp_reset(ca); + ca->loss_cwnd = 0; if (hystart) bictcp_hystart_reset(sk); @@ -275,7 +277,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) * The initial growth of cubic function may be too conservative * when the available bandwidth is still unknown. */ - if (ca->loss_cwnd == 0 && ca->cnt > 20) + if (ca->last_max_cwnd == 0 && ca->cnt > 20) ca->cnt = 20; /* increase cwnd 5% per RTT */ /* TCP Friendly */ @@ -342,7 +344,7 @@ static u32 bictcp_undo_cwnd(struct sock *sk) { struct bictcp *ca = inet_csk_ca(sk); - return max(tcp_sk(sk)->snd_cwnd, ca->last_max_cwnd); + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); } static void bictcp_state(struct sock *sk, u8 new_state) From b477ba628a283ba93e631d66907f91df80e82267 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 19 Jan 2012 09:42:37 +0000 Subject: [PATCH 28/67] mlx4_en: clear all eth statistics when port goes up Bug fix: Not all stats fields were cleared. Signed-off-by: Eugenia Emantayev Reviewed-by: Yevgeny Petriln Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx4/en_netdev.c | 44 ++++++++++++------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 72fa807b69ce..be3f4156aaab 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -807,12 +807,37 @@ static void mlx4_en_restart(struct work_struct *work) mutex_unlock(&mdev->state_lock); } +static void mlx4_en_clear_stats(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + int i; + + if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) + en_dbg(HW, priv, "Failed dumping statistics\n"); + + memset(&priv->stats, 0, sizeof(priv->stats)); + memset(&priv->pstats, 0, sizeof(priv->pstats)); + memset(&priv->pkstats, 0, sizeof(priv->pkstats)); + memset(&priv->port_stats, 0, sizeof(priv->port_stats)); + + for (i = 0; i < priv->tx_ring_num; i++) { + priv->tx_ring[i].bytes = 0; + priv->tx_ring[i].packets = 0; + priv->tx_ring[i].tx_csum = 0; + } + for (i = 0; i < priv->rx_ring_num; i++) { + priv->rx_ring[i].bytes = 0; + priv->rx_ring[i].packets = 0; + priv->rx_ring[i].csum_ok = 0; + priv->rx_ring[i].csum_none = 0; + } +} static int mlx4_en_open(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; - int i; int err = 0; mutex_lock(&mdev->state_lock); @@ -823,21 +848,8 @@ static int mlx4_en_open(struct net_device *dev) goto out; } - /* Reset HW statistics and performance counters */ - if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) - en_dbg(HW, priv, "Failed dumping statistics\n"); - - memset(&priv->stats, 0, sizeof(priv->stats)); - memset(&priv->pstats, 0, sizeof(priv->pstats)); - - for (i = 0; i < priv->tx_ring_num; i++) { - priv->tx_ring[i].bytes = 0; - priv->tx_ring[i].packets = 0; - } - for (i = 0; i < priv->rx_ring_num; i++) { - priv->rx_ring[i].bytes = 0; - priv->rx_ring[i].packets = 0; - } + /* Reset HW statistics and SW counters */ + mlx4_en_clear_stats(dev); err = mlx4_en_start_port(dev); if (err) From 35fb9afbdeef9d5859d9a878d0372907baf119e1 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 19 Jan 2012 09:44:37 +0000 Subject: [PATCH 29/67] mlx4: VF is not allowed to perform dump stats In multifunction mode - DUMP_STATS command is not executed for VFs. Signed-off-by: Eugenia Emantayev Reviewed-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/port.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 88b52e547524..1a551d69ddcb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -898,6 +898,8 @@ int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { + if (slave != dev->caps.function) + return 0; return mlx4_common_dump_eth_stats(dev, slave, vhcr->in_modifier, outbox); } From 93ece0c1a7ace88f10411dbb5643d2aa2fe00ebf Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 19 Jan 2012 09:45:05 +0000 Subject: [PATCH 30/67] mlx4_en: eth statistics modification In native mode display all available staticstics. In SRIOV mode on VF display only SW counters statistics, in SRIOV mode on hypervisor display SW counters and errors (got from FW) statistics. Signed-off-by: Eugenia Emantayev Reviewed-by: Yevgeny Petrilin Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx4/en_ethtool.c | 66 ++++++++++++++----- .../net/ethernet/mellanox/mlx4/en_netdev.c | 2 + drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 + drivers/net/ethernet/mellanox/mlx4/port.c | 21 ++++++ include/linux/mlx4/device.h | 1 + 5 files changed, 75 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 53c66869aecd..70346fd7f9c4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -183,10 +183,11 @@ static int mlx4_en_set_wol(struct net_device *netdev, static int mlx4_en_get_sset_count(struct net_device *dev, int sset) { struct mlx4_en_priv *priv = netdev_priv(dev); + int bit_count = hweight64(priv->stats_bitmap); switch (sset) { case ETH_SS_STATS: - return NUM_ALL_STATS + + return (priv->stats_bitmap ? bit_count : NUM_ALL_STATS) + (priv->tx_ring_num + priv->rx_ring_num) * 2; case ETH_SS_TEST: return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags @@ -201,14 +202,34 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, { struct mlx4_en_priv *priv = netdev_priv(dev); int index = 0; - int i; + int i, j = 0; spin_lock_bh(&priv->stats_lock); - for (i = 0; i < NUM_MAIN_STATS; i++) - data[index++] = ((unsigned long *) &priv->stats)[i]; - for (i = 0; i < NUM_PORT_STATS; i++) - data[index++] = ((unsigned long *) &priv->port_stats)[i]; + if (!(priv->stats_bitmap)) { + for (i = 0; i < NUM_MAIN_STATS; i++) + data[index++] = + ((unsigned long *) &priv->stats)[i]; + for (i = 0; i < NUM_PORT_STATS; i++) + data[index++] = + ((unsigned long *) &priv->port_stats)[i]; + for (i = 0; i < NUM_PKT_STATS; i++) + data[index++] = + ((unsigned long *) &priv->pkstats)[i]; + } else { + for (i = 0; i < NUM_MAIN_STATS; i++) { + if ((priv->stats_bitmap >> j) & 1) + data[index++] = + ((unsigned long *) &priv->stats)[i]; + j++; + } + for (i = 0; i < NUM_PORT_STATS; i++) { + if ((priv->stats_bitmap >> j) & 1) + data[index++] = + ((unsigned long *) &priv->port_stats)[i]; + j++; + } + } for (i = 0; i < priv->tx_ring_num; i++) { data[index++] = priv->tx_ring[i].packets; data[index++] = priv->tx_ring[i].bytes; @@ -217,8 +238,6 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, data[index++] = priv->rx_ring[i].packets; data[index++] = priv->rx_ring[i].bytes; } - for (i = 0; i < NUM_PKT_STATS; i++) - data[index++] = ((unsigned long *) &priv->pkstats)[i]; spin_unlock_bh(&priv->stats_lock); } @@ -247,11 +266,29 @@ static void mlx4_en_get_strings(struct net_device *dev, case ETH_SS_STATS: /* Add main counters */ - for (i = 0; i < NUM_MAIN_STATS; i++) - strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[i]); - for (i = 0; i< NUM_PORT_STATS; i++) - strcpy(data + (index++) * ETH_GSTRING_LEN, - main_strings[i + NUM_MAIN_STATS]); + if (!priv->stats_bitmap) { + for (i = 0; i < NUM_MAIN_STATS; i++) + strcpy(data + (index++) * ETH_GSTRING_LEN, + main_strings[i]); + for (i = 0; i < NUM_PORT_STATS; i++) + strcpy(data + (index++) * ETH_GSTRING_LEN, + main_strings[i + + NUM_MAIN_STATS]); + for (i = 0; i < NUM_PKT_STATS; i++) + strcpy(data + (index++) * ETH_GSTRING_LEN, + main_strings[i + + NUM_MAIN_STATS + + NUM_PORT_STATS]); + } else + for (i = 0; i < NUM_MAIN_STATS + NUM_PORT_STATS; i++) { + if ((priv->stats_bitmap >> i) & 1) { + strcpy(data + + (index++) * ETH_GSTRING_LEN, + main_strings[i]); + } + if (!(priv->stats_bitmap >> i)) + break; + } for (i = 0; i < priv->tx_ring_num; i++) { sprintf(data + (index++) * ETH_GSTRING_LEN, "tx%d_packets", i); @@ -264,9 +301,6 @@ static void mlx4_en_get_strings(struct net_device *dev, sprintf(data + (index++) * ETH_GSTRING_LEN, "rx%d_bytes", i); } - for (i = 0; i< NUM_PKT_STATS; i++) - strcpy(data + (index++) * ETH_GSTRING_LEN, - main_strings[i + NUM_MAIN_STATS + NUM_PORT_STATS]); break; } } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index be3f4156aaab..467ae5824875 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -702,6 +702,8 @@ int mlx4_en_start_port(struct net_device *dev) /* Schedule multicast task to populate multicast list */ queue_work(mdev->workqueue, &priv->mcast_task); + mlx4_set_stats_bitmap(mdev->dev, &priv->stats_bitmap); + priv->port_up = true; netif_tx_start_all_queues(dev); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index f4d189a1290e..35f08840813c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -476,6 +476,7 @@ struct mlx4_en_priv { struct mlx4_en_perf_stats pstats; struct mlx4_en_pkt_stats pkstats; struct mlx4_en_port_stats port_stats; + u64 stats_bitmap; char *mc_addrs; int mc_addrs_cnt; struct mlx4_en_stat_out_mbox hw_stats; diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 1a551d69ddcb..f44ae555bf43 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -44,6 +44,11 @@ #define MLX4_VLAN_VALID (1u << 31) #define MLX4_VLAN_MASK 0xfff +#define MLX4_STATS_TRAFFIC_COUNTERS_MASK 0xfULL +#define MLX4_STATS_TRAFFIC_DROPS_MASK 0xc0ULL +#define MLX4_STATS_ERROR_COUNTERS_MASK 0x1ffc30ULL +#define MLX4_STATS_PORT_COUNTERS_MASK 0x1fe00000ULL + void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table) { int i; @@ -903,3 +908,19 @@ int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave, return mlx4_common_dump_eth_stats(dev, slave, vhcr->in_modifier, outbox); } + +void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap) +{ + if (!mlx4_is_mfunc(dev)) { + *stats_bitmap = 0; + return; + } + + *stats_bitmap = (MLX4_STATS_TRAFFIC_COUNTERS_MASK | + MLX4_STATS_TRAFFIC_DROPS_MASK | + MLX4_STATS_PORT_COUNTERS_MASK); + + if (mlx4_is_master(dev)) + *stats_bitmap |= MLX4_STATS_ERROR_COUNTERS_MASK; +} +EXPORT_SYMBOL(mlx4_set_stats_bitmap); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 5c4fe8e5bfe5..aea61905499b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -621,6 +621,7 @@ void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac); int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac); int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn); void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn); +void mlx4_set_stats_bitmap(struct mlx4_dev *dev, u64 *stats_bitmap); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); From eb41049f2f5eee1525fe1c699e6b1f03c8c51532 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Thu, 19 Jan 2012 09:45:19 +0000 Subject: [PATCH 31/67] mlx4_core: removed function index from vf. The Virtual Functions should not be aware their function number. Signed-off-by: Marcel Apfelbaum Reviewed-by: Jack Morgenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cq.c | 4 +-- drivers/net/ethernet/mellanox/mlx4/eq.c | 4 +-- drivers/net/ethernet/mellanox/mlx4/fw.c | 7 ---- drivers/net/ethernet/mellanox/mlx4/fw.h | 1 - drivers/net/ethernet/mellanox/mlx4/main.c | 1 - drivers/net/ethernet/mellanox/mlx4/mr.c | 2 +- drivers/net/ethernet/mellanox/mlx4/pd.c | 3 +- drivers/net/ethernet/mellanox/mlx4/qp.c | 2 +- .../ethernet/mellanox/mlx4/resource_tracker.c | 35 ------------------- drivers/net/ethernet/mellanox/mlx4/srq.c | 2 +- 10 files changed, 8 insertions(+), 53 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c index 475f9d6af955..7e64033d7de3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/cq.c @@ -96,7 +96,7 @@ void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type) static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, int cq_num) { - return mlx4_cmd(dev, mailbox->dma | dev->caps.function, cq_num, 0, + return mlx4_cmd(dev, mailbox->dma, cq_num, 0, MLX4_CMD_SW2HW_CQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } @@ -111,7 +111,7 @@ static int mlx4_MODIFY_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox static int mlx4_HW2SW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, int cq_num) { - return mlx4_cmd_box(dev, dev->caps.function, mailbox ? mailbox->dma : 0, + return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, cq_num, mailbox ? 0 : 1, MLX4_CMD_HW2SW_CQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 1e9b55eb7217..e163de62604b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -546,7 +546,7 @@ static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap, static int mlx4_SW2HW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, int eq_num) { - return mlx4_cmd(dev, mailbox->dma | dev->caps.function, eq_num, 0, + return mlx4_cmd(dev, mailbox->dma, eq_num, 0, MLX4_CMD_SW2HW_EQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } @@ -554,7 +554,7 @@ static int mlx4_SW2HW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, int eq_num) { - return mlx4_cmd_box(dev, dev->caps.function, mailbox->dma, eq_num, + return mlx4_cmd_box(dev, 0, mailbox->dma, eq_num, 0, MLX4_CMD_HW2SW_EQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index a424a19280cc..8a21e10952ea 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -158,7 +158,6 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_FLAGS_OFFSET 0x0 #define QUERY_FUNC_CAP_NUM_PORTS_OFFSET 0x1 -#define QUERY_FUNC_CAP_FUNCTION_OFFSET 0x3 #define QUERY_FUNC_CAP_PF_BHVR_OFFSET 0x4 #define QUERY_FUNC_CAP_QP_QUOTA_OFFSET 0x10 #define QUERY_FUNC_CAP_CQ_QUOTA_OFFSET 0x14 @@ -182,9 +181,6 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, field = 1 << 7; /* enable only ethernet interface */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS_OFFSET); - field = slave; - MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FUNCTION_OFFSET); - field = dev->caps.num_ports; MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); @@ -249,9 +245,6 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap) goto out; } - MLX4_GET(field, outbox, QUERY_FUNC_CAP_FUNCTION_OFFSET); - func_cap->function = field; - MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); func_cap->num_ports = field; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 119e0cc9fab3..e1a5fa56bcbc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -119,7 +119,6 @@ struct mlx4_dev_cap { }; struct mlx4_func_cap { - u8 function; u8 num_ports; u8 flags; u32 pf_context_behaviour; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 6bb62c580e2d..91c8bc953f3f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -471,7 +471,6 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return -ENOSYS; } - dev->caps.function = func_cap.function; dev->caps.num_ports = func_cap.num_ports; dev->caps.num_qps = func_cap.qp_quota; dev->caps.num_srqs = func_cap.srq_quota; diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 01df5567e16e..8deeef98280c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -291,7 +291,7 @@ static u32 key_to_hw_index(u32 key) static int mlx4_SW2HW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, int mpt_index) { - return mlx4_cmd(dev, mailbox->dma | dev->caps.function , mpt_index, + return mlx4_cmd(dev, mailbox->dma, mpt_index, 0, MLX4_CMD_SW2HW_MPT, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); } diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c index 5c9a54df17ab..db4746d0dca7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/pd.c +++ b/drivers/net/ethernet/mellanox/mlx4/pd.c @@ -52,8 +52,7 @@ int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn) *pdn = mlx4_bitmap_alloc(&priv->pd_bitmap); if (*pdn == -1) return -ENOMEM; - if (mlx4_is_mfunc(dev)) - *pdn |= (dev->caps.function + 1) << NOT_MASKED_PD_BITS; + return 0; } EXPORT_SYMBOL_GPL(mlx4_pd_alloc); diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 6b03ac8b9002..738f950a1ce5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -162,7 +162,7 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, ((struct mlx4_qp_context *) (mailbox->buf + 8))->local_qpn = cpu_to_be32(qp->qpn); - ret = mlx4_cmd(dev, mailbox->dma | dev->caps.function, + ret = mlx4_cmd(dev, mailbox->dma, qp->qpn | (!!sqd_event << 31), new_state == MLX4_QP_STATE_RST ? 2 : 0, op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C, native); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index ed20751a057d..a30cf197a358 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1561,11 +1561,6 @@ static int mr_get_mtt_size(struct mlx4_mpt_entry *mpt) return be32_to_cpu(mpt->mtt_sz); } -static int mr_get_pdn(struct mlx4_mpt_entry *mpt) -{ - return be32_to_cpu(mpt->pd_flags) & 0xffffff; -} - static int qp_get_mtt_addr(struct mlx4_qp_context *qpc) { return be32_to_cpu(qpc->mtt_base_addr_l) & 0xfffffff8; @@ -1602,16 +1597,6 @@ static int qp_get_mtt_size(struct mlx4_qp_context *qpc) return total_pages; } -static int qp_get_pdn(struct mlx4_qp_context *qpc) -{ - return be32_to_cpu(qpc->pd) & 0xffffff; -} - -static int pdn2slave(int pdn) -{ - return (pdn >> NOT_MASKED_PD_BITS) - 1; -} - static int check_mtt_range(struct mlx4_dev *dev, int slave, int start, int size, struct res_mtt *mtt) { @@ -1656,11 +1641,6 @@ int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave, mpt->mtt = mtt; } - if (pdn2slave(mr_get_pdn(inbox->buf)) != slave) { - err = -EPERM; - goto ex_put; - } - err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_put; @@ -1792,11 +1772,6 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, if (err) goto ex_put_mtt; - if (pdn2slave(qp_get_pdn(qpc)) != slave) { - err = -EPERM; - goto ex_put_mtt; - } - err = get_res(dev, slave, rcqn, RES_CQ, &rcq); if (err) goto ex_put_mtt; @@ -2289,11 +2264,6 @@ int mlx4_MODIFY_CQ_wrapper(struct mlx4_dev *dev, int slave, return err; } -static int srq_get_pdn(struct mlx4_srq_context *srqc) -{ - return be32_to_cpu(srqc->pd) & 0xffffff; -} - static int srq_get_mtt_size(struct mlx4_srq_context *srqc) { int log_srq_size = (be32_to_cpu(srqc->state_logsize_srqn) >> 24) & 0xf; @@ -2333,11 +2303,6 @@ int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave, if (err) goto ex_put_mtt; - if (pdn2slave(srq_get_pdn(srqc)) != slave) { - err = -EPERM; - goto ex_put_mtt; - } - err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_put_mtt; diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index 2823fffc6383..feda6c00829f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -67,7 +67,7 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type) static int mlx4_SW2HW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox, int srq_num) { - return mlx4_cmd(dev, mailbox->dma | dev->caps.function, srq_num, 0, + return mlx4_cmd(dev, mailbox->dma, srq_num, 0, MLX4_CMD_SW2HW_SRQ, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } From 9fd7a1e147bd41fb999a58da8a8826b9f3415384 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Thu, 19 Jan 2012 09:45:31 +0000 Subject: [PATCH 32/67] mlx4_core: Fix mtt profile issue Num mtts from profile is really the number of mtt segments. Thus, in make profile, to get the proper number of MTT entries, must multiply num_mtts by mtts per segment. Signed-off-by: Marcel Apfelbaum Reviewed-by: Jack Morgenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 2 +- drivers/net/ethernet/mellanox/mlx4/profile.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 91c8bc953f3f..678558b502fc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -108,7 +108,7 @@ static struct mlx4_profile default_profile = { .num_cq = 1 << 16, .num_mcg = 1 << 13, .num_mpt = 1 << 19, - .num_mtt = 1 << 20, + .num_mtt = 1 << 20, /* It is really num mtt segements */ }; static int log_num_mac = 7; diff --git a/drivers/net/ethernet/mellanox/mlx4/profile.c b/drivers/net/ethernet/mellanox/mlx4/profile.c index 66f91ca7a7c6..1129677daa62 100644 --- a/drivers/net/ethernet/mellanox/mlx4/profile.c +++ b/drivers/net/ethernet/mellanox/mlx4/profile.c @@ -110,7 +110,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, profile[MLX4_RES_EQ].num = min_t(unsigned, dev_cap->max_eqs, MAX_MSIX); profile[MLX4_RES_DMPT].num = request->num_mpt; profile[MLX4_RES_CMPT].num = MLX4_NUM_CMPTS; - profile[MLX4_RES_MTT].num = request->num_mtt; + profile[MLX4_RES_MTT].num = request->num_mtt * (1 << log_mtts_per_seg); profile[MLX4_RES_MCG].num = request->num_mcg; for (i = 0; i < MLX4_RES_NUM; ++i) { From 803143fbda719106017309d551d082e4b4e7c8c3 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Thu, 19 Jan 2012 09:45:46 +0000 Subject: [PATCH 33/67] mlx4_core: map async events to arbitrary slave eqs Slave async events were mapped to single eq. This patch fixes this issue, so the slaves can map the async events to any eq. Signed-off-by: Marcel Apfelbaum Reviewed-by: Jack Morgenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 9 ++++++++- drivers/net/ethernet/mellanox/mlx4/eq.c | 17 +++++++---------- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 8 +++++--- .../ethernet/mellanox/mlx4/resource_tracker.c | 4 ++-- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 978f593094c0..405e6ac3faf6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1247,6 +1247,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, u32 reply; u32 slave_status = 0; u8 is_going_down = 0; + int i; slave_state[slave].comm_toggle ^= 1; reply = (u32) slave_state[slave].comm_toggle << 31; @@ -1258,6 +1259,10 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, if (cmd == MLX4_COMM_CMD_RESET) { mlx4_warn(dev, "Received reset from slave:%d\n", slave); slave_state[slave].active = false; + for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) { + slave_state[slave].event_eq[i].eqn = -1; + slave_state[slave].event_eq[i].token = 0; + } /*check if we are in the middle of FLR process, if so return "retry" status to the slave*/ if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) { @@ -1452,7 +1457,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_state *s_state; - int i, err, port; + int i, j, err, port; priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE, &priv->mfunc.vhcr_dma, @@ -1485,6 +1490,8 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) for (i = 0; i < dev->num_slaves; ++i) { s_state = &priv->mfunc.master.slave_state[i]; s_state->last_cmd = MLX4_COMM_CMD_RESET; + for (j = 0; j < MLX4_EVENT_TYPES_NUM; ++j) + s_state->event_eq[j].eqn = -1; __raw_writel((__force u32) 0, &priv->mfunc.comm[i].slave_write); __raw_writel((__force u32) 0, diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index e163de62604b..55d7bd4e210a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -513,25 +513,22 @@ int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave, { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_event_eq_info *event_eq = - &priv->mfunc.master.slave_state[slave].event_eq; + priv->mfunc.master.slave_state[slave].event_eq; u32 in_modifier = vhcr->in_modifier; u32 eqn = in_modifier & 0x1FF; u64 in_param = vhcr->in_param; int err = 0; + int i; if (slave == dev->caps.function) err = mlx4_cmd(dev, in_param, (in_modifier & 0x80000000) | eqn, 0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); - if (!err) { - if (in_modifier >> 31) { - /* unmap */ - event_eq->event_type &= ~in_param; - } else { - event_eq->eqn = eqn; - event_eq->event_type = in_param; - } - } + if (!err) + for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) + if (in_param & (1LL << i)) + event_eq[i].eqn = in_modifier >> 31 ? -1 : eqn; + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index a80121a2b519..c92269f8c057 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -388,9 +388,8 @@ struct mlx4_slave_eqe { }; struct mlx4_slave_event_eq_info { - u32 eqn; + int eqn; u16 token; - u64 event_type; }; struct mlx4_profile { @@ -449,6 +448,8 @@ struct mlx4_steer_index { struct list_head duplicates; }; +#define MLX4_EVENT_TYPES_NUM 64 + struct mlx4_slave_state { u8 comm_toggle; u8 last_cmd; @@ -461,7 +462,8 @@ struct mlx4_slave_state { struct mlx4_slave_eqe eq[MLX4_MFUNC_MAX_EQES]; struct list_head mcast_filters[MLX4_MAX_PORTS + 1]; struct mlx4_vlan_fltr *vlan_filter[MLX4_MAX_PORTS + 1]; - struct mlx4_slave_event_eq_info event_eq; + /* event type to eq number lookup */ + struct mlx4_slave_event_eq_info event_eq[MLX4_EVENT_TYPES_NUM]; u16 eq_pi; u16 eq_ci; spinlock_t lock; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index a30cf197a358..dcd819bfb2f0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2023,10 +2023,10 @@ int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe) if (!priv->mfunc.master.slave_state) return -EINVAL; - event_eq = &priv->mfunc.master.slave_state[slave].event_eq; + event_eq = &priv->mfunc.master.slave_state[slave].event_eq[eqe->type]; /* Create the event only if the slave is registered */ - if ((event_eq->event_type & (1 << eqe->type)) == 0) + if (event_eq->eqn < 0) return 0; mutex_lock(&priv->mfunc.master.gen_eqe_mutex[slave]); From a42b4799c683723e8c464de4026af085b2ebd5fa Mon Sep 17 00:00:00 2001 From: Vijay Subramanian Date: Thu, 19 Jan 2012 10:20:59 +0000 Subject: [PATCH 34/67] netem: Fix off-by-one bug in reordering With netem reordering, a gap of N is supposed to reorder every Nth packet with given reorder probability. However, the code currently skips N packets and reorders every (N+1)th packet. Signed-off-by: Vijay Subramanian Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index e7e1d0b57b3d..2776012132ea 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -419,7 +419,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) cb = netem_skb_cb(skb); if (q->gap == 0 || /* not doing reordering */ - q->counter < q->gap || /* inside last reordering gap */ + q->counter < q->gap - 1 || /* inside last reordering gap */ q->reorder < get_crandom(&q->reorder_cor)) { psched_time_t now; psched_tdiff_t delay; From 19f9ad789396fe5b4fe0302344b3acfa07cb6772 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 19 Jan 2012 14:35:25 +0000 Subject: [PATCH 35/67] skge: don't assert carrier until link is up Skge device would assert carrier (link up) as soon as network device open was called, rather than waiting until PHY has detected link. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/skge.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 18a87a57fc0a..f580f0535bb4 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -2576,6 +2576,7 @@ static int skge_up(struct net_device *dev) } /* Initialize MAC */ + netif_carrier_off(dev); spin_lock_bh(&hw->phy_lock); if (is_genesis(hw)) genesis_mac_init(hw, port); From d0249e44432aa0ffcf710b64449b8eaa3722547e Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 19 Jan 2012 14:37:18 +0000 Subject: [PATCH 36/67] skge: check for PCI dma mapping errors Driver should check for mapping errors. Machines with limited DMA maps may return an error when a PCI map is requested (not an issue on standard x86). Also use upper/lower 32 bits macros for clarity. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/skge.c | 71 +++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index f580f0535bb4..299c33bd5345 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -931,17 +931,20 @@ static int skge_ring_alloc(struct skge_ring *ring, void *vaddr, u32 base) } /* Allocate and setup a new buffer for receiving */ -static void skge_rx_setup(struct skge_port *skge, struct skge_element *e, - struct sk_buff *skb, unsigned int bufsize) +static int skge_rx_setup(struct pci_dev *pdev, + struct skge_element *e, + struct sk_buff *skb, unsigned int bufsize) { struct skge_rx_desc *rd = e->desc; - u64 map; + dma_addr_t map; - map = pci_map_single(skge->hw->pdev, skb->data, bufsize, + map = pci_map_single(pdev, skb->data, bufsize, PCI_DMA_FROMDEVICE); + if (pci_dma_mapping_error(pdev, map)) + goto mapping_error; - rd->dma_lo = map; - rd->dma_hi = map >> 32; + rd->dma_lo = lower_32_bits(map); + rd->dma_hi = upper_32_bits(map); e->skb = skb; rd->csum1_start = ETH_HLEN; rd->csum2_start = ETH_HLEN; @@ -953,6 +956,13 @@ static void skge_rx_setup(struct skge_port *skge, struct skge_element *e, rd->control = BMU_OWN | BMU_STF | BMU_IRQ_EOF | BMU_TCP_CHECK | bufsize; dma_unmap_addr_set(e, mapaddr, map); dma_unmap_len_set(e, maplen, bufsize); + return 0; + +mapping_error: + if (net_ratelimit()) + dev_warn(&pdev->dev, "%s: rx mapping error\n", + skb->dev->name); + return -EIO; } /* Resume receiving using existing skb, @@ -1014,7 +1024,11 @@ static int skge_rx_fill(struct net_device *dev) return -ENOMEM; skb_reserve(skb, NET_IP_ALIGN); - skge_rx_setup(skge, e, skb, skge->rx_buf_size); + if (skge_rx_setup(skge->hw->pdev, e, skb, skge->rx_buf_size)) { + kfree_skb(skb); + return -ENOMEM; + } + } while ((e = e->next) != ring->start); ring->to_clean = ring->start; @@ -2729,7 +2743,7 @@ static netdev_tx_t skge_xmit_frame(struct sk_buff *skb, struct skge_tx_desc *td; int i; u32 control, len; - u64 map; + dma_addr_t map; if (skb_padto(skb, ETH_ZLEN)) return NETDEV_TX_OK; @@ -2743,11 +2757,14 @@ static netdev_tx_t skge_xmit_frame(struct sk_buff *skb, e->skb = skb; len = skb_headlen(skb); map = pci_map_single(hw->pdev, skb->data, len, PCI_DMA_TODEVICE); + if (pci_dma_mapping_error(hw->pdev, map)) + goto mapping_error; + dma_unmap_addr_set(e, mapaddr, map); dma_unmap_len_set(e, maplen, len); - td->dma_lo = map; - td->dma_hi = map >> 32; + td->dma_lo = lower_32_bits(map); + td->dma_hi = upper_32_bits(map); if (skb->ip_summed == CHECKSUM_PARTIAL) { const int offset = skb_checksum_start_offset(skb); @@ -2778,14 +2795,16 @@ static netdev_tx_t skge_xmit_frame(struct sk_buff *skb, map = skb_frag_dma_map(&hw->pdev->dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); + if (dma_mapping_error(&hw->pdev->dev, map)) + goto mapping_unwind; e = e->next; e->skb = skb; tf = e->desc; BUG_ON(tf->control & BMU_OWN); - tf->dma_lo = map; - tf->dma_hi = (u64) map >> 32; + tf->dma_lo = lower_32_bits(map); + tf->dma_hi = upper_32_bits(map); dma_unmap_addr_set(e, mapaddr, map); dma_unmap_len_set(e, maplen, skb_frag_size(frag)); @@ -2813,6 +2832,28 @@ static netdev_tx_t skge_xmit_frame(struct sk_buff *skb, } return NETDEV_TX_OK; + +mapping_unwind: + /* unroll any pages that were already mapped. */ + if (e != skge->tx_ring.to_use) { + struct skge_element *u; + + for (u = skge->tx_ring.to_use->next; u != e; u = u->next) + pci_unmap_page(hw->pdev, dma_unmap_addr(u, mapaddr), + dma_unmap_len(u, maplen), + PCI_DMA_TODEVICE); + e = skge->tx_ring.to_use; + } + /* undo the mapping for the skb header */ + pci_unmap_single(hw->pdev, dma_unmap_addr(e, mapaddr), + dma_unmap_len(e, maplen), + PCI_DMA_TODEVICE); +mapping_error: + /* mapping error causes error message and packet to be discarded. */ + if (net_ratelimit()) + dev_warn(&hw->pdev->dev, "%s: tx mapping error\n", dev->name); + dev_kfree_skb(skb); + return NETDEV_TX_OK; } @@ -3060,13 +3101,17 @@ static struct sk_buff *skge_rx_get(struct net_device *dev, if (!nskb) goto resubmit; + if (unlikely(skge_rx_setup(skge->hw->pdev, e, nskb, skge->rx_buf_size))) { + dev_kfree_skb(nskb); + goto resubmit; + } + pci_unmap_single(skge->hw->pdev, dma_unmap_addr(e, mapaddr), dma_unmap_len(e, maplen), PCI_DMA_FROMDEVICE); skb = e->skb; prefetch(skb->data); - skge_rx_setup(skge, e, nskb, skge->rx_buf_size); } skb_put(skb, len); From 974c12360dfe6ab01201fe9e708e7755c413f8b6 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 19 Jan 2012 14:42:21 +0000 Subject: [PATCH 37/67] tcp: detect loss above high_seq in recovery Correctly implement a loss detection heuristic: New sequences (above high_seq) sent during the fast recovery are deemed lost when higher sequences are SACKed. Current code does not catch these losses, because tcp_mark_head_lost() does not check packets beyond high_seq. The fix is straight-forward by checking packets until the highest sacked packet. In addition, all the FLAG_DATA_LOST logic are in-effective and redundant and can be removed. Update the loss heuristic comments. The algorithm above is documented as heuristic B, but it is redundant too because heuristic A already covers B. Note that this change only marks some forward-retransmitted packets LOST. It does NOT forbid TCP performing further CWR on new losses. A potential follow-up patch under preparation is to perform another CWR on "new" losses such as 1) sequence above high_seq is lost (by resetting high_seq to snd_nxt) 2) retransmission is lost. Signed-off-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/snmp.h | 1 - net/ipv4/proc.c | 1 - net/ipv4/tcp_input.c | 41 +++++++++++++++-------------------------- 3 files changed, 15 insertions(+), 28 deletions(-) diff --git a/include/linux/snmp.h b/include/linux/snmp.h index e16557a357e5..c1241c428179 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -192,7 +192,6 @@ enum LINUX_MIB_TCPPARTIALUNDO, /* TCPPartialUndo */ LINUX_MIB_TCPDSACKUNDO, /* TCPDSACKUndo */ LINUX_MIB_TCPLOSSUNDO, /* TCPLossUndo */ - LINUX_MIB_TCPLOSS, /* TCPLoss */ LINUX_MIB_TCPLOSTRETRANSMIT, /* TCPLostRetransmit */ LINUX_MIB_TCPRENOFAILURES, /* TCPRenoFailures */ LINUX_MIB_TCPSACKFAILURES, /* TCPSackFailures */ diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 3569d8ecaeac..6afc807ee2ad 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -216,7 +216,6 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPPartialUndo", LINUX_MIB_TCPPARTIALUNDO), SNMP_MIB_ITEM("TCPDSACKUndo", LINUX_MIB_TCPDSACKUNDO), SNMP_MIB_ITEM("TCPLossUndo", LINUX_MIB_TCPLOSSUNDO), - SNMP_MIB_ITEM("TCPLoss", LINUX_MIB_TCPLOSS), SNMP_MIB_ITEM("TCPLostRetransmit", LINUX_MIB_TCPLOSTRETRANSMIT), SNMP_MIB_ITEM("TCPRenoFailures", LINUX_MIB_TCPRENOFAILURES), SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES), diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2877c3e09587..976034f82320 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -105,7 +105,6 @@ int sysctl_tcp_abc __read_mostly; #define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ #define FLAG_DATA_SACKED 0x20 /* New SACK. */ #define FLAG_ECE 0x40 /* ECE in this ACK */ -#define FLAG_DATA_LOST 0x80 /* SACK detected data lossage. */ #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ #define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ @@ -1040,13 +1039,11 @@ static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, * These 6 states form finite state machine, controlled by the following events: * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue()) * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue()) - * 3. Loss detection event of one of three flavors: + * 3. Loss detection event of two flavors: * A. Scoreboard estimator decided the packet is lost. * A'. Reno "three dupacks" marks head of queue lost. - * A''. Its FACK modfication, head until snd.fack is lost. - * B. SACK arrives sacking data transmitted after never retransmitted - * hole was sent out. - * C. SACK arrives sacking SND.NXT at the moment, when the + * A''. Its FACK modification, head until snd.fack is lost. + * B. SACK arrives sacking SND.NXT at the moment, when the * segment was retransmitted. * 4. D-SACK added new rule: D-SACK changes any tag to S. * @@ -1153,7 +1150,7 @@ static int tcp_is_sackblock_valid(struct tcp_sock *tp, int is_dsack, } /* Check for lost retransmit. This superb idea is borrowed from "ratehalving". - * Event "C". Later note: FACK people cheated me again 8), we have to account + * Event "B". Later note: FACK people cheated me again 8), we have to account * for reordering! Ugly, but should help. * * Search retransmitted skbs from write_queue that were sent when snd_nxt was @@ -1844,10 +1841,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, if (found_dup_sack && ((i + 1) == first_sack_index)) next_dup = &sp[i + 1]; - /* Event "B" in the comment above. */ - if (after(end_seq, tp->high_seq)) - state.flag |= FLAG_DATA_LOST; - /* Skip too early cached blocks */ while (tcp_sack_cache_ok(tp, cache) && !before(start_seq, cache->end_seq)) @@ -2515,8 +2508,11 @@ static void tcp_timeout_skbs(struct sock *sk) tcp_verify_left_out(tp); } -/* Mark head of queue up as lost. With RFC3517 SACK, the packets is - * is against sacked "cnt", otherwise it's against facked "cnt" +/* Detect loss in event "A" above by marking head of queue up as lost. + * For FACK or non-SACK(Reno) senders, the first "packets" number of segments + * are considered lost. For RFC3517 SACK, a segment is considered lost if it + * has at least tp->reordering SACKed seqments above it; "packets" refers to + * the maximum SACKed segments to pass before reaching this limit. */ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) { @@ -2525,6 +2521,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) int cnt, oldcnt; int err; unsigned int mss; + /* Use SACK to deduce losses of new sequences sent during recovery */ + const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq; WARN_ON(packets > tp->packets_out); if (tp->lost_skb_hint) { @@ -2546,7 +2544,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) tp->lost_skb_hint = skb; tp->lost_cnt_hint = cnt; - if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) + if (after(TCP_SKB_CB(skb)->end_seq, loss_high)) break; oldcnt = cnt; @@ -3033,19 +3031,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (tcp_check_sack_reneging(sk, flag)) return; - /* C. Process data loss notification, provided it is valid. */ - if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) && - before(tp->snd_una, tp->high_seq) && - icsk->icsk_ca_state != TCP_CA_Open && - tp->fackets_out > tp->reordering) { - tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS); - } - - /* D. Check consistency of the current state. */ + /* C. Check consistency of the current state. */ tcp_verify_left_out(tp); - /* E. Check state exit conditions. State can be terminated + /* D. Check state exit conditions. State can be terminated * when high_seq is ACKed. */ if (icsk->icsk_ca_state == TCP_CA_Open) { WARN_ON(tp->retrans_out != 0); @@ -3077,7 +3066,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, } } - /* F. Process state. */ + /* E. Process state. */ switch (icsk->icsk_ca_state) { case TCP_CA_Recovery: if (!(flag & FLAG_SND_UNA_ADVANCED)) { From bf0813bd282f6ded0a5efca3db238287f7a3dbe8 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 19 Jan 2012 15:40:06 +0000 Subject: [PATCH 38/67] pktgen: Fix unsigned function that is returning negative vals Every call to num_args() immediately checks the return value for less than zero, as it will return -EFAULT for a failed get_user() call. So it makes no sense for the function to be declared as an unsigned long. Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/core/pktgen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 65f80c7b1656..4d8ce93cd503 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -767,8 +767,8 @@ static int count_trail_chars(const char __user * user_buffer, return i; } -static unsigned long num_arg(const char __user * user_buffer, - unsigned long maxlen, unsigned long *num) +static long num_arg(const char __user *user_buffer, unsigned long maxlen, + unsigned long *num) { int i; *num = 0; From df505eb804d5221c3164ebecd1286cb7fc7f49ba Mon Sep 17 00:00:00 2001 From: Sathya Perla Date: Thu, 19 Jan 2012 20:34:04 +0000 Subject: [PATCH 39/67] be2net: create RSS rings even in multi-channel configs Currently RSS rings are not created in a multi-channel config. RSS rings can be created on one (out of four) interfaces per port in a multi-channel config. Doing this insulates the driver from a FW bug wherin multi-channel config is wrongly reported even when not enabled. This also helps performance in a multi-channel config, as one interface per port gets RSS rings. Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index a6bcdb5cd2be..e703d64434f8 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1786,8 +1786,7 @@ static void be_rx_queues_destroy(struct be_adapter *adapter) static u32 be_num_rxqs_want(struct be_adapter *adapter) { if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) && - !sriov_enabled(adapter) && be_physfn(adapter) && - !be_is_mc(adapter)) { + !sriov_enabled(adapter) && be_physfn(adapter)) { return 1 + MAX_RSS_QS; /* one default non-RSS queue */ } else { dev_warn(&adapter->pdev->dev, From 8a622e71f58ec9f092fc99eacae0e6cf14f6e742 Mon Sep 17 00:00:00 2001 From: shawnlu Date: Fri, 20 Jan 2012 12:22:04 +0000 Subject: [PATCH 40/67] tcp: md5: using remote adress for md5 lookup in rst packet md5 key is added in socket through remote address. remote address should be used in finding md5 key when sending out reset packet. Signed-off-by: shawnlu Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1eb4ad57670e..337ba4cca052 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -631,7 +631,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len = sizeof(rep.th); #ifdef CONFIG_TCP_MD5SIG - key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL; + key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL; if (key) { rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 906c7ca43542..3edd05ae4388 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1083,7 +1083,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_TCP_MD5SIG if (sk) - key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr); + key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr); #endif if (th->ack) From af58f1d62853cd883c1fa26556fb470f05878f21 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 21 Jan 2012 09:03:04 +0000 Subject: [PATCH 41/67] kernel-doc: fix new warning in net/phy/mdio_bus.c Fix new kernel-doc warning: Warning(drivers/net/phy/mdio_bus.c:49): No description found for parameter 'size' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 88cc5db9affd..8985cc62cf41 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -38,12 +38,11 @@ /** * mdiobus_alloc_size - allocate a mii_bus structure + * @size: extra amount of memory to allocate for private storage. + * If non-zero, then bus->priv is points to that memory. * * Description: called by a bus driver to allocate an mii_bus * structure to fill in. - * - * 'size' is an an extra amount of memory to allocate for private storage. - * If non-zero, then bus->priv is points to that memory. */ struct mii_bus *mdiobus_alloc_size(size_t size) { From 1a3bc369ba547c11ca8b3ed079d7584f27499e70 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 21 Jan 2012 09:03:10 +0000 Subject: [PATCH 42/67] kernel-doc: fix new warning in net/sock.h Fix new kernel-doc warning: Warning(include/net/sock.h:372): No description found for parameter 'sk_cgrp_prioidx' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/net/sock.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/sock.h b/include/net/sock.h index bb972d254dff..97fc0ad47da0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -226,6 +226,7 @@ struct cg_proto; * @sk_ack_backlog: current listen backlog * @sk_max_ack_backlog: listen backlog set in listen() * @sk_priority: %SO_PRIORITY setting + * @sk_cgrp_prioidx: socket group's priority map index * @sk_type: socket type (%SOCK_STREAM, etc) * @sk_protocol: which protocol this socket belongs in this network family * @sk_peer_pid: &struct pid for this socket's peer From 376be5ff8a6a36efadd131860cf26841f366d44c Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 20 Jan 2012 04:57:14 +0000 Subject: [PATCH 43/67] net: fix socket memcg build with !CONFIG_NET There is still a build bug with the sock memcg code, that triggers with !CONFIG_NET, that survived my series of randconfig builds. Signed-off-by: Glauber Costa Reported-by: Randy Dunlap CC: Hiroyouki Kamezawa Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- mm/memcontrol.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 97fc0ad47da0..0e7a9b05f92b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -922,7 +922,7 @@ inline void sk_refcnt_debug_release(const struct sock *sk) #define sk_refcnt_debug_release(sk) do { } while (0) #endif /* SOCK_REFCNT_DEBUG */ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET) extern struct jump_label_key memcg_socket_limit_enabled; static inline struct cg_proto *parent_cg_proto(struct proto *proto, struct cg_proto *cg_proto) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3dbff4dcde35..c3688dfd9a5f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -379,7 +379,7 @@ static void mem_cgroup_put(struct mem_cgroup *memcg); static bool mem_cgroup_is_root(struct mem_cgroup *memcg); void sock_update_memcg(struct sock *sk) { - if (static_branch(&memcg_socket_limit_enabled)) { + if (mem_cgroup_sockets_enabled) { struct mem_cgroup *memcg; BUG_ON(!sk->sk_prot->proto_cgroup); @@ -411,7 +411,7 @@ EXPORT_SYMBOL(sock_update_memcg); void sock_release_memcg(struct sock *sk) { - if (static_branch(&memcg_socket_limit_enabled) && sk->sk_cgrp) { + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) { struct mem_cgroup *memcg; WARN_ON(!sk->sk_cgrp->memcg); memcg = sk->sk_cgrp->memcg; From 8cfd14ad1eb52e44cb1fe7b47a68126e45e04026 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 20 Jan 2012 04:57:15 +0000 Subject: [PATCH 44/67] cgroup: make sure memcg margin is 0 when over limit For the memcg sock code, we'll need to register allocations that are temporarily over limit. Let's make sure that margin is 0 in this case. I am keeping this as a separate patch, so that if any weirdness interaction appears in the future, we can now exactly what caused it. Suggested by Johannes Weiner Signed-off-by: Glauber Costa CC: KAMEZAWA Hiroyuki CC: Johannes Weiner CC: Michal Hocko CC: Tejun Heo CC: Li Zefan Acked-by: Tejun Heo Signed-off-by: David S. Miller --- include/linux/res_counter.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index c9d625ca659e..d06d014afda6 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -142,7 +142,10 @@ static inline unsigned long long res_counter_margin(struct res_counter *cnt) unsigned long flags; spin_lock_irqsave(&cnt->lock, flags); - margin = cnt->limit - cnt->usage; + if (cnt->limit > cnt->usage) + margin = cnt->limit - cnt->usage; + else + margin = 0; spin_unlock_irqrestore(&cnt->lock, flags); return margin; } From 0e90b31f4ba77027a7c21cbfc66404df0851ca21 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Fri, 20 Jan 2012 04:57:16 +0000 Subject: [PATCH 45/67] net: introduce res_counter_charge_nofail() for socket allocations There is a case in __sk_mem_schedule(), where an allocation is beyond the maximum, but yet we are allowed to proceed. It happens under the following condition: sk->sk_wmem_queued + size >= sk->sk_sndbuf The network code won't revert the allocation in this case, meaning that at some point later it'll try to do it. Since this is never communicated to the underlying res_counter code, there is an inbalance in res_counter uncharge operation. I see two ways of fixing this: 1) storing the information about those allocations somewhere in memcg, and then deducting from that first, before we start draining the res_counter, 2) providing a slightly different allocation function for the res_counter, that matches the original behavior of the network code more closely. I decided to go for #2 here, believing it to be more elegant, since #1 would require us to do basically that, but in a more obscure way. Signed-off-by: Glauber Costa Cc: KAMEZAWA Hiroyuki Cc: Johannes Weiner Cc: Michal Hocko CC: Tejun Heo CC: Li Zefan CC: Laurent Chavey Acked-by: Tejun Heo Signed-off-by: David S. Miller --- include/linux/res_counter.h | 6 ++++++ include/net/sock.h | 10 ++++------ kernel/res_counter.c | 25 +++++++++++++++++++++++++ net/core/sock.c | 4 ++-- 4 files changed, 37 insertions(+), 8 deletions(-) diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index d06d014afda6..da81af086eaf 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -109,12 +109,18 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent); * * returns 0 on success and <0 if the counter->usage will exceed the * counter->limit _locked call expects the counter->lock to be taken + * + * charge_nofail works the same, except that it charges the resource + * counter unconditionally, and returns < 0 if the after the current + * charge we are over limit. */ int __must_check res_counter_charge_locked(struct res_counter *counter, unsigned long val); int __must_check res_counter_charge(struct res_counter *counter, unsigned long val, struct res_counter **limit_fail_at); +int __must_check res_counter_charge_nofail(struct res_counter *counter, + unsigned long val, struct res_counter **limit_fail_at); /* * uncharge - tell that some portion of the resource is released diff --git a/include/net/sock.h b/include/net/sock.h index 0e7a9b05f92b..4c69ac165e6b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1008,9 +1008,8 @@ static inline void memcg_memory_allocated_add(struct cg_proto *prot, struct res_counter *fail; int ret; - ret = res_counter_charge(prot->memory_allocated, - amt << PAGE_SHIFT, &fail); - + ret = res_counter_charge_nofail(prot->memory_allocated, + amt << PAGE_SHIFT, &fail); if (ret < 0) *parent_status = OVER_LIMIT; } @@ -1054,12 +1053,11 @@ sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status) } static inline void -sk_memory_allocated_sub(struct sock *sk, int amt, int parent_status) +sk_memory_allocated_sub(struct sock *sk, int amt) { struct proto *prot = sk->sk_prot; - if (mem_cgroup_sockets_enabled && sk->sk_cgrp && - parent_status != OVER_LIMIT) /* Otherwise was uncharged already */ + if (mem_cgroup_sockets_enabled && sk->sk_cgrp) memcg_memory_allocated_sub(sk->sk_cgrp, amt); atomic_long_sub(amt, prot->memory_allocated); diff --git a/kernel/res_counter.c b/kernel/res_counter.c index 6d269cce7aa1..d508363858b3 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c @@ -66,6 +66,31 @@ int res_counter_charge(struct res_counter *counter, unsigned long val, return ret; } +int res_counter_charge_nofail(struct res_counter *counter, unsigned long val, + struct res_counter **limit_fail_at) +{ + int ret, r; + unsigned long flags; + struct res_counter *c; + + r = ret = 0; + *limit_fail_at = NULL; + local_irq_save(flags); + for (c = counter; c != NULL; c = c->parent) { + spin_lock(&c->lock); + r = res_counter_charge_locked(c, val); + if (r) + c->usage += val; + spin_unlock(&c->lock); + if (r < 0 && ret == 0) { + *limit_fail_at = c; + ret = r; + } + } + local_irq_restore(flags); + + return ret; +} void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) { if (WARN_ON(counter->usage < val)) diff --git a/net/core/sock.c b/net/core/sock.c index 5c5af9988f94..3e81fd2e3c75 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1827,7 +1827,7 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) /* Alas. Undo changes. */ sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM; - sk_memory_allocated_sub(sk, amt, parent_status); + sk_memory_allocated_sub(sk, amt); return 0; } @@ -1840,7 +1840,7 @@ EXPORT_SYMBOL(__sk_mem_schedule); void __sk_mem_reclaim(struct sock *sk) { sk_memory_allocated_sub(sk, - sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, 0); + sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT); sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1; if (sk_under_memory_pressure(sk) && From b1cc16b8e643096adb92bbcb76c6c4c564141c40 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 22 Jan 2012 14:45:14 -0500 Subject: [PATCH 46/67] bluetooth: hci: Fix type of "enable_hs" to bool. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: net/bluetooth/hci_core.c: In function ‘__check_enable_hs’: net/bluetooth/hci_core.c:2587:1: warning: return from incompatible pointer type [enabled by default] Signed-off-by: David S. Miller --- include/net/bluetooth/hci.h | 2 +- net/bluetooth/hci_core.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 5b2fed5eebf2..00596e816b4d 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1388,6 +1388,6 @@ struct hci_inquiry_req { }; #define IREQ_CACHE_FLUSH 0x0001 -extern int enable_hs; +extern bool enable_hs; #endif /* __HCI_H */ diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 845da3ee56a0..9de93714213a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -55,7 +55,7 @@ #define AUTO_OFF_TIMEOUT 2000 -int enable_hs; +bool enable_hs; static void hci_rx_work(struct work_struct *work); static void hci_cmd_work(struct work_struct *work); From f80400a26a2e8bff541de12834a1134358bb6642 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sun, 22 Jan 2012 00:20:40 +0000 Subject: [PATCH 47/67] ethtool: allow ETHTOOL_GSSET_INFO for users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow ETHTOOL_GSSET_INFO ethtool ioctl() for unprivileged users. ETHTOOL_GSTRINGS is already allowed, but is unusable without this one. Signed-off-by: Michał Mirosław Acked-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 921aa2b4b415..369b41894527 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1311,6 +1311,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCSUM: case ETHTOOL_GTXCSUM: case ETHTOOL_GSG: + case ETHTOOL_GSSET_INFO: case ETHTOOL_GSTRINGS: case ETHTOOL_GTSO: case ETHTOOL_GPERMADDR: From 4ec7ac1203bcf21f5e3d977c9818b1a56c9ef40d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Jan 2012 05:38:59 +0000 Subject: [PATCH 48/67] macvlan: fix a possible use after free Commit bc416d9768 (macvlan: handle fragmented multicast frames) added a possible use after free in macvlan_handle_frame(), since ip_check_defrag() uses pskb_may_pull() : skb header can be reallocated. Signed-off-by: Eric Dumazet Cc: Ben Greear Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index f2f820c4b40a..9ea99217f116 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -173,6 +173,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb) skb = ip_check_defrag(skb, IP_DEFRAG_MACVLAN); if (!skb) return RX_HANDLER_CONSUMED; + eth = eth_hdr(skb); src = macvlan_hash_lookup(port, eth->h_source); if (!src) /* frame comes from an external address */ From 460a25cdaef1a2b6b8e14e371d868aa91b0e72e8 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 23 Jan 2012 07:31:51 +0000 Subject: [PATCH 49/67] bnx2x: credit-leakage fixup on vlan_mac_del_all Upon insertion of elements into the execution queue, it is validated that there are enough credits to support additional vlan-macs, and the credits are consumed. However, when removing a pending command in `bnx2x_vland_mac_del_all' the consumed credits are not released, which might cause leakage and eventually the inability to add new vlan-macs in certain scenarios. Signed-off-by: Yuval Mintz Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/bnx2x/bnx2x_sp.c | 44 ++++++++++++++++++- .../net/ethernet/broadcom/bnx2x/bnx2x_sp.h | 11 +++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index 5ac616093f9f..cb6339c35571 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -50,6 +50,7 @@ static inline void bnx2x_exe_queue_init(struct bnx2x *bp, int exe_len, union bnx2x_qable_obj *owner, exe_q_validate validate, + exe_q_remove remove, exe_q_optimize optimize, exe_q_execute exec, exe_q_get get) @@ -66,6 +67,7 @@ static inline void bnx2x_exe_queue_init(struct bnx2x *bp, /* Owner specific callbacks */ o->validate = validate; + o->remove = remove; o->optimize = optimize; o->execute = exec; o->get = get; @@ -1340,6 +1342,35 @@ static int bnx2x_validate_vlan_mac(struct bnx2x *bp, } } +static int bnx2x_remove_vlan_mac(struct bnx2x *bp, + union bnx2x_qable_obj *qo, + struct bnx2x_exeq_elem *elem) +{ + int rc = 0; + + /* If consumption wasn't required, nothing to do */ + if (test_bit(BNX2X_DONT_CONSUME_CAM_CREDIT, + &elem->cmd_data.vlan_mac.vlan_mac_flags)) + return 0; + + switch (elem->cmd_data.vlan_mac.cmd) { + case BNX2X_VLAN_MAC_ADD: + case BNX2X_VLAN_MAC_MOVE: + rc = qo->vlan_mac.put_credit(&qo->vlan_mac); + break; + case BNX2X_VLAN_MAC_DEL: + rc = qo->vlan_mac.get_credit(&qo->vlan_mac); + break; + default: + return -EINVAL; + } + + if (rc != true) + return -EINVAL; + + return 0; +} + /** * bnx2x_wait_vlan_mac - passivly wait for 5 seconds until all work completes. * @@ -1801,8 +1832,14 @@ static int bnx2x_vlan_mac_del_all(struct bnx2x *bp, list_for_each_entry_safe(exeq_pos, exeq_pos_n, &exeq->exe_queue, link) { if (exeq_pos->cmd_data.vlan_mac.vlan_mac_flags == - *vlan_mac_flags) + *vlan_mac_flags) { + rc = exeq->remove(bp, exeq->owner, exeq_pos); + if (rc) { + BNX2X_ERR("Failed to remove command\n"); + return rc; + } list_del(&exeq_pos->link); + } } spin_unlock_bh(&exeq->lock); @@ -1908,6 +1945,7 @@ void bnx2x_init_mac_obj(struct bnx2x *bp, bnx2x_exe_queue_init(bp, &mac_obj->exe_queue, 1, qable_obj, bnx2x_validate_vlan_mac, + bnx2x_remove_vlan_mac, bnx2x_optimize_vlan_mac, bnx2x_execute_vlan_mac, bnx2x_exeq_get_mac); @@ -1924,6 +1962,7 @@ void bnx2x_init_mac_obj(struct bnx2x *bp, bnx2x_exe_queue_init(bp, &mac_obj->exe_queue, CLASSIFY_RULES_COUNT, qable_obj, bnx2x_validate_vlan_mac, + bnx2x_remove_vlan_mac, bnx2x_optimize_vlan_mac, bnx2x_execute_vlan_mac, bnx2x_exeq_get_mac); @@ -1963,6 +2002,7 @@ void bnx2x_init_vlan_obj(struct bnx2x *bp, bnx2x_exe_queue_init(bp, &vlan_obj->exe_queue, CLASSIFY_RULES_COUNT, qable_obj, bnx2x_validate_vlan_mac, + bnx2x_remove_vlan_mac, bnx2x_optimize_vlan_mac, bnx2x_execute_vlan_mac, bnx2x_exeq_get_vlan); @@ -2009,6 +2049,7 @@ void bnx2x_init_vlan_mac_obj(struct bnx2x *bp, bnx2x_exe_queue_init(bp, &vlan_mac_obj->exe_queue, 1, qable_obj, bnx2x_validate_vlan_mac, + bnx2x_remove_vlan_mac, bnx2x_optimize_vlan_mac, bnx2x_execute_vlan_mac, bnx2x_exeq_get_vlan_mac); @@ -2025,6 +2066,7 @@ void bnx2x_init_vlan_mac_obj(struct bnx2x *bp, &vlan_mac_obj->exe_queue, CLASSIFY_RULES_COUNT, qable_obj, bnx2x_validate_vlan_mac, + bnx2x_remove_vlan_mac, bnx2x_optimize_vlan_mac, bnx2x_execute_vlan_mac, bnx2x_exeq_get_vlan_mac); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h index 992308ff82e8..66da39f0c84a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h @@ -161,6 +161,10 @@ typedef int (*exe_q_validate)(struct bnx2x *bp, union bnx2x_qable_obj *o, struct bnx2x_exeq_elem *elem); +typedef int (*exe_q_remove)(struct bnx2x *bp, + union bnx2x_qable_obj *o, + struct bnx2x_exeq_elem *elem); + /** * @return positive is entry was optimized, 0 - if not, negative * in case of an error. @@ -203,11 +207,18 @@ struct bnx2x_exe_queue_obj { */ exe_q_validate validate; + /** + * Called before removing pending commands, cleaning allocated + * resources (e.g., credits from validate) + */ + exe_q_remove remove; /** * This will try to cancel the current pending commands list * considering the new command. * + * Returns the number of optimized commands or a negative error code + * * Must run under exe_queue->lock */ exe_q_optimize optimize; From d5e836329bd836d24b168004827532426cad2f39 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 23 Jan 2012 07:31:52 +0000 Subject: [PATCH 50/67] bnx2x: fixed ethtool statistics for MF modes Previosuly, in MF modes `ethtool -S' lacked some of the statistics which appeared in non-MF modes. This has been fixed. Signed-off-by: Yuval Mintz Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- .../ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 108 +++++++----------- 1 file changed, 42 insertions(+), 66 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index f99c6e312a5d..7d32e0059966 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -2121,18 +2121,16 @@ static int bnx2x_get_sset_count(struct net_device *dev, int stringset) case ETH_SS_STATS: if (is_multi(bp)) { num_stats = bnx2x_num_stat_queues(bp) * - BNX2X_NUM_Q_STATS; - if (!IS_MF_MODE_STAT(bp)) - num_stats += BNX2X_NUM_STATS; - } else { - if (IS_MF_MODE_STAT(bp)) { - num_stats = 0; - for (i = 0; i < BNX2X_NUM_STATS; i++) - if (IS_FUNC_STAT(i)) - num_stats++; - } else - num_stats = BNX2X_NUM_STATS; - } + BNX2X_NUM_Q_STATS; + } else + num_stats = 0; + if (IS_MF_MODE_STAT(bp)) { + for (i = 0; i < BNX2X_NUM_STATS; i++) + if (IS_FUNC_STAT(i)) + num_stats++; + } else + num_stats += BNX2X_NUM_STATS; + return num_stats; case ETH_SS_TEST: @@ -2151,8 +2149,8 @@ static void bnx2x_get_strings(struct net_device *dev, u32 stringset, u8 *buf) switch (stringset) { case ETH_SS_STATS: + k = 0; if (is_multi(bp)) { - k = 0; for_each_eth_queue(bp, i) { memset(queue_name, 0, sizeof(queue_name)); sprintf(queue_name, "%d", i); @@ -2163,20 +2161,17 @@ static void bnx2x_get_strings(struct net_device *dev, u32 stringset, u8 *buf) queue_name); k += BNX2X_NUM_Q_STATS; } - if (IS_MF_MODE_STAT(bp)) - break; - for (j = 0; j < BNX2X_NUM_STATS; j++) - strcpy(buf + (k + j)*ETH_GSTRING_LEN, - bnx2x_stats_arr[j].string); - } else { - for (i = 0, j = 0; i < BNX2X_NUM_STATS; i++) { - if (IS_MF_MODE_STAT(bp) && IS_PORT_STAT(i)) - continue; - strcpy(buf + j*ETH_GSTRING_LEN, - bnx2x_stats_arr[i].string); - j++; - } } + + + for (i = 0, j = 0; i < BNX2X_NUM_STATS; i++) { + if (IS_MF_MODE_STAT(bp) && IS_PORT_STAT(i)) + continue; + strcpy(buf + (k + j)*ETH_GSTRING_LEN, + bnx2x_stats_arr[i].string); + j++; + } + break; case ETH_SS_TEST: @@ -2190,10 +2185,9 @@ static void bnx2x_get_ethtool_stats(struct net_device *dev, { struct bnx2x *bp = netdev_priv(dev); u32 *hw_stats, *offset; - int i, j, k; + int i, j, k = 0; if (is_multi(bp)) { - k = 0; for_each_eth_queue(bp, i) { hw_stats = (u32 *)&bp->fp[i].eth_q_stats; for (j = 0; j < BNX2X_NUM_Q_STATS; j++) { @@ -2214,46 +2208,28 @@ static void bnx2x_get_ethtool_stats(struct net_device *dev, } k += BNX2X_NUM_Q_STATS; } - if (IS_MF_MODE_STAT(bp)) - return; - hw_stats = (u32 *)&bp->eth_stats; - for (j = 0; j < BNX2X_NUM_STATS; j++) { - if (bnx2x_stats_arr[j].size == 0) { - /* skip this counter */ - buf[k + j] = 0; - continue; - } - offset = (hw_stats + bnx2x_stats_arr[j].offset); - if (bnx2x_stats_arr[j].size == 4) { - /* 4-byte counter */ - buf[k + j] = (u64) *offset; - continue; - } - /* 8-byte counter */ - buf[k + j] = HILO_U64(*offset, *(offset + 1)); - } - } else { - hw_stats = (u32 *)&bp->eth_stats; - for (i = 0, j = 0; i < BNX2X_NUM_STATS; i++) { - if (IS_MF_MODE_STAT(bp) && IS_PORT_STAT(i)) - continue; - if (bnx2x_stats_arr[i].size == 0) { - /* skip this counter */ - buf[j] = 0; - j++; - continue; - } - offset = (hw_stats + bnx2x_stats_arr[i].offset); - if (bnx2x_stats_arr[i].size == 4) { - /* 4-byte counter */ - buf[j] = (u64) *offset; - j++; - continue; - } - /* 8-byte counter */ - buf[j] = HILO_U64(*offset, *(offset + 1)); + } + + hw_stats = (u32 *)&bp->eth_stats; + for (i = 0, j = 0; i < BNX2X_NUM_STATS; i++) { + if (IS_MF_MODE_STAT(bp) && IS_PORT_STAT(i)) + continue; + if (bnx2x_stats_arr[i].size == 0) { + /* skip this counter */ + buf[k + j] = 0; j++; + continue; } + offset = (hw_stats + bnx2x_stats_arr[i].offset); + if (bnx2x_stats_arr[i].size == 4) { + /* 4-byte counter */ + buf[k + j] = (u64) *offset; + j++; + continue; + } + /* 8-byte counter */ + buf[k + j] = HILO_U64(*offset, *(offset + 1)); + j++; } } From b0700b1e6b9556aa99bb9bf6ad6d830ae38344c7 Mon Sep 17 00:00:00 2001 From: Dmitry Kravkov Date: Mon, 23 Jan 2012 07:31:53 +0000 Subject: [PATCH 51/67] bnx2x: fix Big-Endianess in ethtool -t Signed-off-by: Dmitry Kravkov Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 7d32e0059966..31a8b38ab15e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -1738,7 +1738,7 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode) struct bnx2x_fp_txdata *txdata = &fp_tx->txdata[0]; u16 tx_start_idx, tx_idx; u16 rx_start_idx, rx_idx; - u16 pkt_prod, bd_prod, rx_comp_cons; + u16 pkt_prod, bd_prod; struct sw_tx_bd *tx_buf; struct eth_tx_start_bd *tx_start_bd; struct eth_tx_parse_bd_e1x *pbd_e1x = NULL; @@ -1873,8 +1873,7 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode) if (rx_idx != rx_start_idx + num_pkts) goto test_loopback_exit; - rx_comp_cons = le16_to_cpu(fp_rx->rx_comp_cons); - cqe = &fp_rx->rx_comp_ring[RCQ_BD(rx_comp_cons)]; + cqe = &fp_rx->rx_comp_ring[RCQ_BD(fp_rx->rx_comp_cons)]; cqe_fp_flags = cqe->fast_path_cqe.type_error_flags; cqe_fp_type = cqe_fp_flags & ETH_FAST_PATH_RX_CQE_TYPE; if (!CQE_TYPE_FAST(cqe_fp_type) || (cqe_fp_flags & ETH_RX_ERROR_FALGS)) From 1fdf155158886514c82e5401ab7b1264beb375bf Mon Sep 17 00:00:00 2001 From: Dmitry Kravkov Date: Mon, 23 Jan 2012 07:31:54 +0000 Subject: [PATCH 52/67] bnx2x: allow user to change ring size in ISCSI SD mode Signed-off-by: Dmitry Kravkov Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 2b731b253598..03f3935fd8c2 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3117,7 +3117,7 @@ static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index) int rx_ring_size = 0; #ifdef BCM_CNIC - if (IS_MF_ISCSI_SD(bp)) { + if (!bp->rx_ring_size && IS_MF_ISCSI_SD(bp)) { rx_ring_size = MIN_RX_SIZE_NONTPA; bp->rx_ring_size = rx_ring_size; } else From 65087cfee50595185f6bbf3d78272eeb34186d2b Mon Sep 17 00:00:00 2001 From: Ariel Elior Date: Mon, 23 Jan 2012 07:31:55 +0000 Subject: [PATCH 53/67] bnx2x: handle CHIP_REVISION during init_one The macro `CHIP_IS_E1x' requires `bp' to be initialized. As `bp' is not yet initialized during this phase of `bnx2x_init_dev', it accessed uninitialized fields in the struct. Signed-off-by: Ariel Elior Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index ffeaaa95ed96..f4c2fe52ab1c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -10536,6 +10536,9 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev, { struct bnx2x *bp; int rc; + bool chip_is_e1x = (board_type == BCM57710 || + board_type == BCM57711 || + board_type == BCM57711E); SET_NETDEV_DEV(dev, &pdev->dev); bp = netdev_priv(dev); @@ -10624,7 +10627,7 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev, REG_WR(bp, PXP2_REG_PGL_ADDR_90_F0, 0); REG_WR(bp, PXP2_REG_PGL_ADDR_94_F0, 0); - if (CHIP_IS_E1x(bp)) { + if (chip_is_e1x) { REG_WR(bp, PXP2_REG_PGL_ADDR_88_F1, 0); REG_WR(bp, PXP2_REG_PGL_ADDR_8C_F1, 0); REG_WR(bp, PXP2_REG_PGL_ADDR_90_F1, 0); @@ -10635,9 +10638,7 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev, * Enable internal target-read (in case we are probed after PF FLR). * Must be done prior to any BAR read access. Only for 57712 and up */ - if (board_type != BCM57710 && - board_type != BCM57711 && - board_type != BCM57711E) + if (!chip_is_e1x) REG_WR(bp, PGLUE_B_REG_INTERNAL_PFID_ENABLE_TARGET_READ, 1); /* Reset the load counter */ From 44151acb9f13563e40d40d14c3e5c11ce21b59e1 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 23 Jan 2012 07:31:56 +0000 Subject: [PATCH 54/67] bnx2x: fix compilation error with SOE in fw_dump Signed-off-by: Yuval Mintz Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index f4c2fe52ab1c..1e3f978ee6da 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -941,7 +941,7 @@ void bnx2x_panic_dump(struct bnx2x *bp) struct sw_rx_bd *sw_bd = &fp->rx_buf_ring[j]; BNX2X_ERR("fp%d: rx_bd[%x]=[%x:%x] sw_bd=[%p]\n", - i, j, rx_bd[1], rx_bd[0], sw_bd->skb); + i, j, rx_bd[1], rx_bd[0], sw_bd->data); } start = RX_SGE(fp->rx_sge_prod); From 302476c99863fe6d08eed6145e37322892ab7f55 Mon Sep 17 00:00:00 2001 From: Paulius Zaleckas Date: Mon, 23 Jan 2012 01:16:35 +0000 Subject: [PATCH 55/67] mv643xx_eth: Add Rx Discard and Rx Overrun statistics These statistics helped me a lot while searching who is losing packets in my setup. I added these stats to MIB group since they are very similar, but just in other registers. I have tested this patch on 88F6281 SoC. Signed-off-by: Paulius Zaleckas Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mv643xx_eth.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 9c049d2cb97d..9edecfa1f0f4 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -136,6 +136,8 @@ static char mv643xx_eth_driver_version[] = "1.4"; #define INT_MASK 0x0068 #define INT_MASK_EXT 0x006c #define TX_FIFO_URGENT_THRESHOLD 0x0074 +#define RX_DISCARD_FRAME_CNT 0x0084 +#define RX_OVERRUN_FRAME_CNT 0x0088 #define TXQ_FIX_PRIO_CONF_MOVED 0x00dc #define TX_BW_RATE_MOVED 0x00e0 #define TX_BW_MTU_MOVED 0x00e8 @@ -334,6 +336,9 @@ struct mib_counters { u32 bad_crc_event; u32 collision; u32 late_collision; + /* Non MIB hardware counters */ + u32 rx_discard; + u32 rx_overrun; }; struct lro_counters { @@ -1225,6 +1230,10 @@ static void mib_counters_clear(struct mv643xx_eth_private *mp) for (i = 0; i < 0x80; i += 4) mib_read(mp, i); + + /* Clear non MIB hw counters also */ + rdlp(mp, RX_DISCARD_FRAME_CNT); + rdlp(mp, RX_OVERRUN_FRAME_CNT); } static void mib_counters_update(struct mv643xx_eth_private *mp) @@ -1262,6 +1271,9 @@ static void mib_counters_update(struct mv643xx_eth_private *mp) p->bad_crc_event += mib_read(mp, 0x74); p->collision += mib_read(mp, 0x78); p->late_collision += mib_read(mp, 0x7c); + /* Non MIB hardware counters */ + p->rx_discard += rdlp(mp, RX_DISCARD_FRAME_CNT); + p->rx_overrun += rdlp(mp, RX_OVERRUN_FRAME_CNT); spin_unlock_bh(&mp->mib_counters_lock); mod_timer(&mp->mib_counters_timer, jiffies + 30 * HZ); @@ -1413,6 +1425,8 @@ static const struct mv643xx_eth_stats mv643xx_eth_stats[] = { MIBSTAT(bad_crc_event), MIBSTAT(collision), MIBSTAT(late_collision), + MIBSTAT(rx_discard), + MIBSTAT(rx_overrun), LROSTAT(lro_aggregated), LROSTAT(lro_flushed), LROSTAT(lro_no_desc), From da057fb7d272c7e7609465a54bcac8ec8072ead5 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Sun, 22 Jan 2012 09:40:40 +0000 Subject: [PATCH 56/67] skge: add byte queue limit support This also changes the cleanup logic slightly to aggregate completed notifications for multiple packets. Signed-off-by: Stephen Hemminger Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/skge.c | 37 +++++++++++++++++++---------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 299c33bd5345..edb9bda55d55 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -2817,6 +2817,8 @@ static netdev_tx_t skge_xmit_frame(struct sk_buff *skb, td->control = BMU_OWN | BMU_SW | BMU_STF | control | len; wmb(); + netdev_sent_queue(dev, skb->len); + skge_write8(hw, Q_ADDR(txqaddr[skge->port], Q_CSR), CSR_START); netif_printk(skge, tx_queued, KERN_DEBUG, skge->netdev, @@ -2858,11 +2860,9 @@ static netdev_tx_t skge_xmit_frame(struct sk_buff *skb, /* Free resources associated with this reing element */ -static void skge_tx_free(struct skge_port *skge, struct skge_element *e, - u32 control) +static inline void skge_tx_unmap(struct pci_dev *pdev, struct skge_element *e, + u32 control) { - struct pci_dev *pdev = skge->hw->pdev; - /* skb header vs. fragment */ if (control & BMU_STF) pci_unmap_single(pdev, dma_unmap_addr(e, mapaddr), @@ -2872,13 +2872,6 @@ static void skge_tx_free(struct skge_port *skge, struct skge_element *e, pci_unmap_page(pdev, dma_unmap_addr(e, mapaddr), dma_unmap_len(e, maplen), PCI_DMA_TODEVICE); - - if (control & BMU_EOF) { - netif_printk(skge, tx_done, KERN_DEBUG, skge->netdev, - "tx done slot %td\n", e - skge->tx_ring.start); - - dev_kfree_skb(e->skb); - } } /* Free all buffers in transmit ring */ @@ -2889,10 +2882,15 @@ static void skge_tx_clean(struct net_device *dev) for (e = skge->tx_ring.to_clean; e != skge->tx_ring.to_use; e = e->next) { struct skge_tx_desc *td = e->desc; - skge_tx_free(skge, e, td->control); + + skge_tx_unmap(skge->hw->pdev, e, td->control); + + if (td->control & BMU_EOF) + dev_kfree_skb(e->skb); td->control = 0; } + netdev_reset_queue(dev); skge->tx_ring.to_clean = e; } @@ -3157,6 +3155,7 @@ static void skge_tx_done(struct net_device *dev) struct skge_port *skge = netdev_priv(dev); struct skge_ring *ring = &skge->tx_ring; struct skge_element *e; + unsigned int bytes_compl = 0, pkts_compl = 0; skge_write8(skge->hw, Q_ADDR(txqaddr[skge->port], Q_CSR), CSR_IRQ_CL_F); @@ -3166,8 +3165,20 @@ static void skge_tx_done(struct net_device *dev) if (control & BMU_OWN) break; - skge_tx_free(skge, e, control); + skge_tx_unmap(skge->hw->pdev, e, control); + + if (control & BMU_EOF) { + netif_printk(skge, tx_done, KERN_DEBUG, skge->netdev, + "tx done slot %td\n", + e - skge->tx_ring.start); + + pkts_compl++; + bytes_compl += e->skb->len; + + dev_kfree_skb(e->skb); + } } + netdev_completed_queue(dev, pkts_compl, bytes_compl); skge->tx_ring.to_clean = e; /* Can run lockless until we need to synchronize to restart queue. */ From a5a1195559f2e20bd975f58e50f53ebe84d5cca6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 23 Jan 2012 01:22:09 +0000 Subject: [PATCH 57/67] tg3: fix ipv6 header length computation tg3_start_xmit() makes the wrong assumption for TSOV6 that skb->head doesnt include any payload data. if (skb_is_gso_v6(skb)) hdr_len = skb_headlen(skb) - ETH_HLEN; This is not true anymore after commit f07d960df3 (tcp: avoid frag allocation for small frames) We should instead use : skb_transport_offset(skb) + tcp_hdrlen(skb) Its also true for IPv4 Signed-off-by: Eric Dumazet CC: Matt Carlson CC: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index d529af99157d..a1f2e0fed78b 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6667,14 +6667,9 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) iph = ip_hdr(skb); tcp_opt_len = tcp_optlen(skb); - if (skb_is_gso_v6(skb)) { - hdr_len = skb_headlen(skb) - ETH_HLEN; - } else { - u32 ip_tcp_len; - - ip_tcp_len = ip_hdrlen(skb) + sizeof(struct tcphdr); - hdr_len = ip_tcp_len + tcp_opt_len; + hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb) - ETH_HLEN; + if (!skb_is_gso_v6(skb)) { iph->check = 0; iph->tot_len = htons(mss + hdr_len); } From edd664bbba53f771d4a6d4559ed6e1ff48b46406 Mon Sep 17 00:00:00 2001 From: Chris Healy Date: Sun, 22 Jan 2012 21:20:54 +0000 Subject: [PATCH 58/67] dsa: Add reporting of silicon revision for Marvell 88E6123/88E6161/88E6165 switches. Add reporting of silicon revision during the probe function for Marvell 88E6123/88E6161/88E6165 switches. Signed-off-by: Chris Healy Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6123_61_65.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/mv88e6123_61_65.c b/drivers/net/dsa/mv88e6123_61_65.c index c0a458fc698f..6f23c9521f0e 100644 --- a/drivers/net/dsa/mv88e6123_61_65.c +++ b/drivers/net/dsa/mv88e6123_61_65.c @@ -20,12 +20,25 @@ static char *mv88e6123_61_65_probe(struct mii_bus *bus, int sw_addr) ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); if (ret >= 0) { - ret &= 0xfff0; - if (ret == 0x1210) + if (ret == 0x1212) + return "Marvell 88E6123 (A1)"; + if (ret == 0x1213) + return "Marvell 88E6123 (A2)"; + if ((ret & 0xfff0) == 0x1210) return "Marvell 88E6123"; - if (ret == 0x1610) + + if (ret == 0x1612) + return "Marvell 88E6161 (A1)"; + if (ret == 0x1613) + return "Marvell 88E6161 (A2)"; + if ((ret & 0xfff0) == 0x1610) return "Marvell 88E6161"; - if (ret == 0x1650) + + if (ret == 0x1652) + return "Marvell 88E6165 (A1)"; + if (ret == 0x1653) + return "Marvell 88e6165 (A2)"; + if ((ret & 0xfff0) == 0x1650) return "Marvell 88E6165"; } From 90b9a5454fd2e626aa1614fe9ece6b63a0dc37af Mon Sep 17 00:00:00 2001 From: Alessandro Rubini Date: Mon, 23 Jan 2012 23:26:48 +0000 Subject: [PATCH 59/67] stmmac: fix phy naming inconsistency After commit "db8857b stmmac: use an unique MDIO bus name" my device stopped being probed because two different names were being used in different places. This fixes the inconsistency. Signed-off-by: Alessandro Rubini Acked-by: Giancarlo Asnaghi Cc: Giuseppe Cavallaro Cc: Florian Fainelli Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index da4a1042523a..73195329aa46 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -154,7 +154,7 @@ int stmmac_mdio_register(struct net_device *ndev) else irqlist = priv->mii_irq; - new_bus->name = "STMMAC MII Bus"; + new_bus->name = "stmmac"; new_bus->read = &stmmac_mdio_read; new_bus->write = &stmmac_mdio_write; new_bus->reset = &stmmac_mdio_reset; From 56ac11cf2f21366ad48b356f7a0d1af8cff3588e Mon Sep 17 00:00:00 2001 From: Radu Iliescu Date: Thu, 19 Jan 2012 03:57:57 +0000 Subject: [PATCH 60/67] llc: Fix race condition in llc_ui_recvmsg There is a race on sk_receive_queue between llc_ui_recvmsg and sock_queue_rcv_skb. Our current solution is to protect skb_eat in llc_ui_recvmsg with the queue spinlock. Signed-off-by: Radu Iliescu Signed-off-by: David S. Miller --- net/llc/af_llc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index a18e6c3d36e3..b9bef2c75026 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -713,6 +713,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, struct sk_buff *skb = NULL; struct sock *sk = sock->sk; struct llc_sock *llc = llc_sk(sk); + unsigned long cpu_flags; size_t copied = 0; u32 peek_seq = 0; u32 *seq; @@ -838,7 +839,9 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, goto copy_uaddr; if (!(flags & MSG_PEEK)) { + spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); sk_eat_skb(sk, skb, 0); + spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); *seq = 0; } @@ -859,7 +862,9 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, llc_cmsg_rcv(msg, skb); if (!(flags & MSG_PEEK)) { + spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); sk_eat_skb(sk, skb, 0); + spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); *seq = 0; } From 5437f4b2576f1763a27bc4c0e7f7c280220ba1aa Mon Sep 17 00:00:00 2001 From: Alessandro Rubini Date: Mon, 23 Jan 2012 23:08:56 +0000 Subject: [PATCH 61/67] stmmac: added PCI identifiers STM has a device ID within its own VENDOR space, and it is being used in the STA2X11 I/O Hub. Signed-off-by: Alessandro Rubini Acked-by: Giancarlo Asnaghi Acked-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 54a819a36487..c796de9eed72 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -170,9 +170,9 @@ static int stmmac_pci_resume(struct pci_dev *pdev) #define STMMAC_DEVICE_ID 0x1108 static DEFINE_PCI_DEVICE_TABLE(stmmac_id_table) = { - { - PCI_DEVICE(STMMAC_VENDOR_ID, STMMAC_DEVICE_ID)}, { - } + {PCI_DEVICE(STMMAC_VENDOR_ID, STMMAC_DEVICE_ID)}, + {PCI_DEVICE(PCI_VENDOR_ID_STMICRO, PCI_DEVICE_ID_STMICRO_MAC)}, + {} }; MODULE_DEVICE_TABLE(pci, stmmac_id_table); From 2bbba277a554431a426e81f37d5c50ab6216c07d Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Tue, 24 Jan 2012 10:41:40 +0000 Subject: [PATCH 62/67] drivers/net: dsa/mv88e6xxx.c files need linux/module.h An implicit instance of module.h leaked back into existence and was masking the fact that these drivers weren't calling out the include for itself. Fix the drivers before we remove the implicit include path via net/netprio_cgroup.h file. Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6060.c | 1 + drivers/net/dsa/mv88e6123_61_65.c | 1 + drivers/net/dsa/mv88e6131.c | 1 + drivers/net/dsa/mv88e6xxx.c | 1 + 4 files changed, 4 insertions(+) diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index 7fc4e81d4d43..325391d19bad 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include diff --git a/drivers/net/dsa/mv88e6123_61_65.c b/drivers/net/dsa/mv88e6123_61_65.c index 6f23c9521f0e..c17c75b9f531 100644 --- a/drivers/net/dsa/mv88e6123_61_65.c +++ b/drivers/net/dsa/mv88e6123_61_65.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include diff --git a/drivers/net/dsa/mv88e6131.c b/drivers/net/dsa/mv88e6131.c index e0eb68243834..55888b06d8b4 100644 --- a/drivers/net/dsa/mv88e6131.c +++ b/drivers/net/dsa/mv88e6131.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 5467c040824a..a2c62c2f30ee 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include From c11bf1c8baff170fa478adc04964da519d160e62 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 24 Jan 2012 10:21:28 +0000 Subject: [PATCH 63/67] net/hyperv: fix possible memory leak in do_set_multicast() do_set_multicast() may not free the memory malloc in netvsc_set_multicast_list(). Signed-off-by: Wei Yongjun Signed-off-by: Haiyang Zhang Signed-off-by: K. Y. Srinivasan Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 462d05f05e84..1a1ca6cfc74a 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -68,11 +68,11 @@ static void do_set_multicast(struct work_struct *w) nvdev = hv_get_drvdata(ndevctx->device_ctx); if (nvdev == NULL) - return; + goto out; rdev = nvdev->extension; if (rdev == NULL) - return; + goto out; if (net->flags & IFF_PROMISC) rndis_filter_set_packet_filter(rdev, @@ -83,6 +83,7 @@ static void do_set_multicast(struct work_struct *w) NDIS_PACKET_TYPE_ALL_MULTICAST | NDIS_PACKET_TYPE_DIRECTED); +out: kfree(w); } From b82b9183d4f18f9b8c4bb31f223eb6c79b734eb0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 24 Jan 2012 05:16:00 +0000 Subject: [PATCH 64/67] team: send only changed options/ports via netlink This patch changes event message behaviour to send only updated records instead of whole list. This fixes bug on which userspace receives non-actual data in case multiple events occur in row. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/team/team.c | 136 ++++++++++++++++++++++++++-------------- include/linux/if_team.h | 10 +++ 2 files changed, 100 insertions(+), 46 deletions(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index ed2a862b835d..6b678f38e5ce 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -92,9 +92,9 @@ struct team_option *__team_find_option(struct team *team, const char *opt_name) return NULL; } -int team_options_register(struct team *team, - const struct team_option *option, - size_t option_count) +int __team_options_register(struct team *team, + const struct team_option *option, + size_t option_count) { int i; struct team_option **dst_opts; @@ -116,8 +116,11 @@ int team_options_register(struct team *team, } } - for (i = 0; i < option_count; i++) + for (i = 0; i < option_count; i++) { + dst_opts[i]->changed = true; + dst_opts[i]->removed = false; list_add_tail(&dst_opts[i]->list, &team->option_list); + } kfree(dst_opts); return 0; @@ -130,10 +133,22 @@ int team_options_register(struct team *team, return err; } -EXPORT_SYMBOL(team_options_register); +static void __team_options_mark_removed(struct team *team, + const struct team_option *option, + size_t option_count) +{ + int i; -static void __team_options_change_check(struct team *team, - struct team_option *changed_option); + for (i = 0; i < option_count; i++, option++) { + struct team_option *del_opt; + + del_opt = __team_find_option(team, option->name); + if (del_opt) { + del_opt->changed = true; + del_opt->removed = true; + } + } +} static void __team_options_unregister(struct team *team, const struct team_option *option, @@ -152,12 +167,29 @@ static void __team_options_unregister(struct team *team, } } +static void __team_options_change_check(struct team *team); + +int team_options_register(struct team *team, + const struct team_option *option, + size_t option_count) +{ + int err; + + err = __team_options_register(team, option, option_count); + if (err) + return err; + __team_options_change_check(team); + return 0; +} +EXPORT_SYMBOL(team_options_register); + void team_options_unregister(struct team *team, const struct team_option *option, size_t option_count) { + __team_options_mark_removed(team, option, option_count); + __team_options_change_check(team); __team_options_unregister(team, option, option_count); - __team_options_change_check(team, NULL); } EXPORT_SYMBOL(team_options_unregister); @@ -176,7 +208,8 @@ static int team_option_set(struct team *team, struct team_option *option, if (err) return err; - __team_options_change_check(team, option); + option->changed = true; + __team_options_change_check(team); return err; } @@ -653,6 +686,7 @@ static int team_port_del(struct team *team, struct net_device *port_dev) return -ENOENT; } + port->removed = true; __team_port_change_check(port, false); team_port_list_del_port(team, port); team_adjust_ops(team); @@ -1200,10 +1234,9 @@ static int team_nl_send_generic(struct genl_info *info, struct team *team, return err; } -static int team_nl_fill_options_get_changed(struct sk_buff *skb, - u32 pid, u32 seq, int flags, - struct team *team, - struct team_option *changed_option) +static int team_nl_fill_options_get(struct sk_buff *skb, + u32 pid, u32 seq, int flags, + struct team *team, bool fillall) { struct nlattr *option_list; void *hdr; @@ -1223,12 +1256,19 @@ static int team_nl_fill_options_get_changed(struct sk_buff *skb, struct nlattr *option_item; long arg; + /* Include only changed options if fill all mode is not on */ + if (!fillall && !option->changed) + continue; option_item = nla_nest_start(skb, TEAM_ATTR_ITEM_OPTION); if (!option_item) goto nla_put_failure; NLA_PUT_STRING(skb, TEAM_ATTR_OPTION_NAME, option->name); - if (option == changed_option) + if (option->changed) { NLA_PUT_FLAG(skb, TEAM_ATTR_OPTION_CHANGED); + option->changed = false; + } + if (option->removed) + NLA_PUT_FLAG(skb, TEAM_ATTR_OPTION_REMOVED); switch (option->type) { case TEAM_OPTION_TYPE_U32: NLA_PUT_U8(skb, TEAM_ATTR_OPTION_TYPE, NLA_U32); @@ -1255,13 +1295,13 @@ static int team_nl_fill_options_get_changed(struct sk_buff *skb, return -EMSGSIZE; } -static int team_nl_fill_options_get(struct sk_buff *skb, - struct genl_info *info, int flags, - struct team *team) +static int team_nl_fill_options_get_all(struct sk_buff *skb, + struct genl_info *info, int flags, + struct team *team) { - return team_nl_fill_options_get_changed(skb, info->snd_pid, - info->snd_seq, NLM_F_ACK, - team, NULL); + return team_nl_fill_options_get(skb, info->snd_pid, + info->snd_seq, NLM_F_ACK, + team, true); } static int team_nl_cmd_options_get(struct sk_buff *skb, struct genl_info *info) @@ -1273,7 +1313,7 @@ static int team_nl_cmd_options_get(struct sk_buff *skb, struct genl_info *info) if (!team) return -EINVAL; - err = team_nl_send_generic(info, team, team_nl_fill_options_get); + err = team_nl_send_generic(info, team, team_nl_fill_options_get_all); team_nl_team_put(team); @@ -1365,10 +1405,10 @@ static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info) return err; } -static int team_nl_fill_port_list_get_changed(struct sk_buff *skb, - u32 pid, u32 seq, int flags, - struct team *team, - struct team_port *changed_port) +static int team_nl_fill_port_list_get(struct sk_buff *skb, + u32 pid, u32 seq, int flags, + struct team *team, + bool fillall) { struct nlattr *port_list; void *hdr; @@ -1387,12 +1427,19 @@ static int team_nl_fill_port_list_get_changed(struct sk_buff *skb, list_for_each_entry(port, &team->port_list, list) { struct nlattr *port_item; + /* Include only changed ports if fill all mode is not on */ + if (!fillall && !port->changed) + continue; port_item = nla_nest_start(skb, TEAM_ATTR_ITEM_PORT); if (!port_item) goto nla_put_failure; NLA_PUT_U32(skb, TEAM_ATTR_PORT_IFINDEX, port->dev->ifindex); - if (port == changed_port) + if (port->changed) { NLA_PUT_FLAG(skb, TEAM_ATTR_PORT_CHANGED); + port->changed = false; + } + if (port->removed) + NLA_PUT_FLAG(skb, TEAM_ATTR_PORT_REMOVED); if (port->linkup) NLA_PUT_FLAG(skb, TEAM_ATTR_PORT_LINKUP); NLA_PUT_U32(skb, TEAM_ATTR_PORT_SPEED, port->speed); @@ -1408,13 +1455,13 @@ static int team_nl_fill_port_list_get_changed(struct sk_buff *skb, return -EMSGSIZE; } -static int team_nl_fill_port_list_get(struct sk_buff *skb, - struct genl_info *info, int flags, - struct team *team) +static int team_nl_fill_port_list_get_all(struct sk_buff *skb, + struct genl_info *info, int flags, + struct team *team) { - return team_nl_fill_port_list_get_changed(skb, info->snd_pid, - info->snd_seq, NLM_F_ACK, - team, NULL); + return team_nl_fill_port_list_get(skb, info->snd_pid, + info->snd_seq, NLM_F_ACK, + team, true); } static int team_nl_cmd_port_list_get(struct sk_buff *skb, @@ -1427,7 +1474,7 @@ static int team_nl_cmd_port_list_get(struct sk_buff *skb, if (!team) return -EINVAL; - err = team_nl_send_generic(info, team, team_nl_fill_port_list_get); + err = team_nl_send_generic(info, team, team_nl_fill_port_list_get_all); team_nl_team_put(team); @@ -1464,8 +1511,7 @@ static struct genl_multicast_group team_change_event_mcgrp = { .name = TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME, }; -static int team_nl_send_event_options_get(struct team *team, - struct team_option *changed_option) +static int team_nl_send_event_options_get(struct team *team) { struct sk_buff *skb; int err; @@ -1475,8 +1521,7 @@ static int team_nl_send_event_options_get(struct team *team, if (!skb) return -ENOMEM; - err = team_nl_fill_options_get_changed(skb, 0, 0, 0, team, - changed_option); + err = team_nl_fill_options_get(skb, 0, 0, 0, team, false); if (err < 0) goto err_fill; @@ -1489,18 +1534,17 @@ static int team_nl_send_event_options_get(struct team *team, return err; } -static int team_nl_send_event_port_list_get(struct team_port *port) +static int team_nl_send_event_port_list_get(struct team *team) { struct sk_buff *skb; int err; - struct net *net = dev_net(port->team->dev); + struct net *net = dev_net(team->dev); skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOMEM; - err = team_nl_fill_port_list_get_changed(skb, 0, 0, 0, - port->team, port); + err = team_nl_fill_port_list_get(skb, 0, 0, 0, team, false); if (err < 0) goto err_fill; @@ -1544,12 +1588,11 @@ static void team_nl_fini(void) * Change checkers ******************/ -static void __team_options_change_check(struct team *team, - struct team_option *changed_option) +static void __team_options_change_check(struct team *team) { int err; - err = team_nl_send_event_options_get(team, changed_option); + err = team_nl_send_event_options_get(team); if (err) netdev_warn(team->dev, "Failed to send options change via netlink\n"); } @@ -1559,9 +1602,10 @@ static void __team_port_change_check(struct team_port *port, bool linkup) { int err; - if (port->linkup == linkup) + if (!port->removed && port->linkup == linkup) return; + port->changed = true; port->linkup = linkup; if (linkup) { struct ethtool_cmd ecmd; @@ -1577,7 +1621,7 @@ static void __team_port_change_check(struct team_port *port, bool linkup) port->duplex = 0; send_event: - err = team_nl_send_event_port_list_get(port); + err = team_nl_send_event_port_list_get(port->team); if (err) netdev_warn(port->team->dev, "Failed to send port change of device %s via netlink\n", port->dev->name); diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 828181fbad5d..58404b0c5010 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -46,6 +46,10 @@ struct team_port { u32 speed; u8 duplex; + /* Custom gennetlink interface related flags */ + bool changed; + bool removed; + struct rcu_head rcu; }; @@ -72,6 +76,10 @@ struct team_option { enum team_option_type type; int (*getter)(struct team *team, void *arg); int (*setter)(struct team *team, void *arg); + + /* Custom gennetlink interface related flags */ + bool changed; + bool removed; }; struct team_mode { @@ -207,6 +215,7 @@ enum { TEAM_ATTR_OPTION_CHANGED, /* flag */ TEAM_ATTR_OPTION_TYPE, /* u8 */ TEAM_ATTR_OPTION_DATA, /* dynamic */ + TEAM_ATTR_OPTION_REMOVED, /* flag */ __TEAM_ATTR_OPTION_MAX, TEAM_ATTR_OPTION_MAX = __TEAM_ATTR_OPTION_MAX - 1, @@ -227,6 +236,7 @@ enum { TEAM_ATTR_PORT_LINKUP, /* flag */ TEAM_ATTR_PORT_SPEED, /* u32 */ TEAM_ATTR_PORT_DUPLEX, /* u8 */ + TEAM_ATTR_PORT_REMOVED, /* flag */ __TEAM_ATTR_PORT_MAX, TEAM_ATTR_PORT_MAX = __TEAM_ATTR_PORT_MAX - 1, From c452ed70771cea3af73d21a5914989137fbd28b8 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 24 Jan 2012 16:03:33 -0500 Subject: [PATCH 65/67] net: flow_dissector.c missing include linux/export.h The file net/core/flow_dissector.c seems to be missing including linux/export.h. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 0985b9b14b80..a225089df5b6 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1,4 +1,5 @@ #include +#include #include #include #include From 36a1211970193ce215de50ed1e4e1272bc814df1 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Tue, 24 Jan 2012 11:33:19 +0000 Subject: [PATCH 66/67] netprio_cgroup.h: dont include module.h from other includes A considerable effort was invested in wiping out module.h from being present in all the other standard includes. This one leaked back in, but once again isn't strictly necessary, so remove it. Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- include/net/netprio_cgroup.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h index e503b87c4c1b..7b2d43139c8e 100644 --- a/include/net/netprio_cgroup.h +++ b/include/net/netprio_cgroup.h @@ -13,7 +13,6 @@ #ifndef _NETPRIO_CGROUP_H #define _NETPRIO_CGROUP_H -#include #include #include #include From efc3dbc37412c027e363736b4f4c74ee5e8ecffc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 24 Jan 2012 17:03:44 -0500 Subject: [PATCH 67/67] rds: Make rds_sock_lock BH rather than IRQ safe. rds_sock_info() triggers locking warnings because we try to perform a local_bh_enable() (via sock_i_ino()) while hardware interrupts are disabled (via taking rds_sock_lock). There is no reason for rds_sock_lock to be a hardware IRQ disabling lock, none of these access paths run in hardware interrupt context. Therefore making it a BH disabling lock is safe and sufficient to fix this bug. Reported-by: Kumar Sanghvi Reported-by: Josh Boyer Signed-off-by: David S. Miller --- net/rds/af_rds.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index bb6ad81b671d..424ff622ab5f 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -68,7 +68,6 @@ static int rds_release(struct socket *sock) { struct sock *sk = sock->sk; struct rds_sock *rs; - unsigned long flags; if (!sk) goto out; @@ -94,10 +93,10 @@ static int rds_release(struct socket *sock) rds_rdma_drop_keys(rs); rds_notify_queue_get(rs, NULL); - spin_lock_irqsave(&rds_sock_lock, flags); + spin_lock_bh(&rds_sock_lock); list_del_init(&rs->rs_item); rds_sock_count--; - spin_unlock_irqrestore(&rds_sock_lock, flags); + spin_unlock_bh(&rds_sock_lock); rds_trans_put(rs->rs_transport); @@ -409,7 +408,6 @@ static const struct proto_ops rds_proto_ops = { static int __rds_create(struct socket *sock, struct sock *sk, int protocol) { - unsigned long flags; struct rds_sock *rs; sock_init_data(sock, sk); @@ -426,10 +424,10 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol) spin_lock_init(&rs->rs_rdma_lock); rs->rs_rdma_keys = RB_ROOT; - spin_lock_irqsave(&rds_sock_lock, flags); + spin_lock_bh(&rds_sock_lock); list_add_tail(&rs->rs_item, &rds_sock_list); rds_sock_count++; - spin_unlock_irqrestore(&rds_sock_lock, flags); + spin_unlock_bh(&rds_sock_lock); return 0; } @@ -471,12 +469,11 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len, { struct rds_sock *rs; struct rds_incoming *inc; - unsigned long flags; unsigned int total = 0; len /= sizeof(struct rds_info_message); - spin_lock_irqsave(&rds_sock_lock, flags); + spin_lock_bh(&rds_sock_lock); list_for_each_entry(rs, &rds_sock_list, rs_item) { read_lock(&rs->rs_recv_lock); @@ -492,7 +489,7 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len, read_unlock(&rs->rs_recv_lock); } - spin_unlock_irqrestore(&rds_sock_lock, flags); + spin_unlock_bh(&rds_sock_lock); lens->nr = total; lens->each = sizeof(struct rds_info_message); @@ -504,11 +501,10 @@ static void rds_sock_info(struct socket *sock, unsigned int len, { struct rds_info_socket sinfo; struct rds_sock *rs; - unsigned long flags; len /= sizeof(struct rds_info_socket); - spin_lock_irqsave(&rds_sock_lock, flags); + spin_lock_bh(&rds_sock_lock); if (len < rds_sock_count) goto out; @@ -529,7 +525,7 @@ static void rds_sock_info(struct socket *sock, unsigned int len, lens->nr = rds_sock_count; lens->each = sizeof(struct rds_info_socket); - spin_unlock_irqrestore(&rds_sock_lock, flags); + spin_unlock_bh(&rds_sock_lock); } static void rds_exit(void)