From 5596732fa8c14139018ecda8356eabbfb599d830 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 7 Apr 2014 08:08:52 +0200 Subject: [PATCH 001/116] xfrm: Fix crash with ipv6 IPsec tunnel and NAT. The ipv6 xfrm output path is not aware that packets can be rerouted by NAT to not use IPsec. We crash in this case because we expect to have a xfrm state at the dst_entry. This crash happens if the ipv6 layer does IPsec and NAT or if we have an interfamily IPsec tunnel with ipv4 NAT. We fix this by checking for a NAT rerouted packet in each address family and dst_output() to the new destination in this case. Reported-by: Martin Pelikan Tested-by: Martin Pelikan Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_output.c | 34 +++++++++++++++++++--------------- net/ipv6/xfrm6_output.c | 22 +++++++++++++--------- 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index baa0f63731fd..f3800bf79d86 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -62,10 +62,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) if (err) return err; - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED; - - skb->protocol = htons(ETH_P_IP); + IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; return x->outer_mode->output2(x, skb); } @@ -73,27 +70,34 @@ EXPORT_SYMBOL(xfrm4_prepare_output); int xfrm4_output_finish(struct sk_buff *skb) { -#ifdef CONFIG_NETFILTER - if (!skb_dst(skb)->xfrm) { - IPCB(skb)->flags |= IPSKB_REROUTED; - return dst_output(skb); - } + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + skb->protocol = htons(ETH_P_IP); +#ifdef CONFIG_NETFILTER IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; #endif - skb->protocol = htons(ETH_P_IP); return xfrm_output(skb); } +static int __xfrm4_output(struct sk_buff *skb) +{ + struct xfrm_state *x = skb_dst(skb)->xfrm; + +#ifdef CONFIG_NETFILTER + if (!x) { + IPCB(skb)->flags |= IPSKB_REROUTED; + return dst_output(skb); + } +#endif + + return x->outer_mode->afinfo->output_finish(skb); +} + int xfrm4_output(struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); - struct xfrm_state *x = dst->xfrm; - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, - NULL, dst->dev, - x->outer_mode->afinfo->output_finish, + NULL, skb_dst(skb)->dev, __xfrm4_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6cd625e37706..ba624331451c 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -114,12 +114,6 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) if (err) return err; - memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); -#ifdef CONFIG_NETFILTER - IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; -#endif - - skb->protocol = htons(ETH_P_IPV6); skb->local_df = 1; return x->outer_mode->output2(x, skb); @@ -128,11 +122,13 @@ EXPORT_SYMBOL(xfrm6_prepare_output); int xfrm6_output_finish(struct sk_buff *skb) { + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + skb->protocol = htons(ETH_P_IPV6); + #ifdef CONFIG_NETFILTER IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; #endif - skb->protocol = htons(ETH_P_IPV6); return xfrm_output(skb); } @@ -142,6 +138,13 @@ static int __xfrm6_output(struct sk_buff *skb) struct xfrm_state *x = dst->xfrm; int mtu; +#ifdef CONFIG_NETFILTER + if (!x) { + IP6CB(skb)->flags |= IP6SKB_REROUTED; + return dst_output(skb); + } +#endif + if (skb->protocol == htons(ETH_P_IPV6)) mtu = ip6_skb_dst_mtu(skb); else @@ -165,6 +168,7 @@ static int __xfrm6_output(struct sk_buff *skb) int xfrm6_output(struct sk_buff *skb) { - return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, - skb_dst(skb)->dev, __xfrm6_output); + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, + NULL, skb_dst(skb)->dev, __xfrm6_output, + !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } From ef67f18dece707eb2fb1ef25ccca47062842dc4a Mon Sep 17 00:00:00 2001 From: Alexander Bondar Date: Sun, 30 Mar 2014 10:47:08 +0300 Subject: [PATCH 002/116] iwlwifi: mvm: several fixes in scan The firmware doesn't handle properly the fragmented scan. Stop using it. While at it change max_out_time and suspend_time units from usec to TUs as expected by firmware API. Signed-off-by: Alexander Bondar Signed-off-by: Emmanuel Grumbach --- .../net/wireless/iwlwifi/mvm/fw-api-scan.h | 8 +-- drivers/net/wireless/iwlwifi/mvm/scan.c | 53 +++++-------------- 2 files changed, 16 insertions(+), 45 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h b/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h index 9426905de6b2..d73a89ecd78a 100644 --- a/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h +++ b/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h @@ -183,9 +183,9 @@ enum iwl_scan_type { * this number of packets were received (typically 1) * @passive2active: is auto switching from passive to active during scan allowed * @rxchain_sel_flags: RXON_RX_CHAIN_* - * @max_out_time: in usecs, max out of serving channel time + * @max_out_time: in TUs, max out of serving channel time * @suspend_time: how long to pause scan when returning to service channel: - * bits 0-19: beacon interal in usecs (suspend before executing) + * bits 0-19: beacon interal in TUs (suspend before executing) * bits 20-23: reserved * bits 24-31: number of beacons (suspend between channels) * @rxon_flags: RXON_FLG_* @@ -383,8 +383,8 @@ enum scan_framework_client { * @quiet_plcp_th: quiet channel num of packets threshold * @good_CRC_th: passive to active promotion threshold * @rx_chain: RXON rx chain. - * @max_out_time: max uSec to be out of assoceated channel - * @suspend_time: pause scan this long when returning to service channel + * @max_out_time: max TUs to be out of assoceated channel + * @suspend_time: pause scan this TUs when returning to service channel * @flags: RXON flags * @filter_flags: RXONfilter * @tx_cmd: tx command for active scan; for 2GHz and for 5GHz. diff --git a/drivers/net/wireless/iwlwifi/mvm/scan.c b/drivers/net/wireless/iwlwifi/mvm/scan.c index c91dc8498852..cba88a379fc8 100644 --- a/drivers/net/wireless/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/iwlwifi/mvm/scan.c @@ -277,51 +277,22 @@ static void iwl_mvm_scan_calc_params(struct iwl_mvm *mvm, IEEE80211_IFACE_ITER_NORMAL, iwl_mvm_scan_condition_iterator, &global_bound); - /* - * Under low latency traffic passive scan is fragmented meaning - * that dwell on a particular channel will be fragmented. Each fragment - * dwell time is 20ms and fragments period is 105ms. Skipping to next - * channel will be delayed by the same period - 105ms. So suspend_time - * parameter describing both fragments and channels skipping periods is - * set to 105ms. This value is chosen so that overall passive scan - * duration will not be too long. Max_out_time in this case is set to - * 70ms, so for active scanning operating channel will be left for 70ms - * while for passive still for 20ms (fragment dwell). - */ - if (global_bound) { - if (!iwl_mvm_low_latency(mvm)) { - params->suspend_time = ieee80211_tu_to_usec(100); - params->max_out_time = ieee80211_tu_to_usec(600); - } else { - params->suspend_time = ieee80211_tu_to_usec(105); - /* P2P doesn't support fragmented passive scan, so - * configure max_out_time to be at least longest dwell - * time for passive scan. - */ - if (vif->type == NL80211_IFTYPE_STATION && !vif->p2p) { - params->max_out_time = ieee80211_tu_to_usec(70); - params->passive_fragmented = true; - } else { - u32 passive_dwell; - /* - * Use band G so that passive channel dwell time - * will be assigned with maximum value. - */ - band = IEEE80211_BAND_2GHZ; - passive_dwell = iwl_mvm_get_passive_dwell(band); - params->max_out_time = - ieee80211_tu_to_usec(passive_dwell); - } - } + if (!global_bound) + goto not_bound; + + params->suspend_time = 100; + params->max_out_time = 600; + + if (iwl_mvm_low_latency(mvm)) { + params->suspend_time = 250; + params->max_out_time = 250; } +not_bound: + for (band = IEEE80211_BAND_2GHZ; band < IEEE80211_NUM_BANDS; band++) { - if (params->passive_fragmented) - params->dwell[band].passive = 20; - else - params->dwell[band].passive = - iwl_mvm_get_passive_dwell(band); + params->dwell[band].passive = iwl_mvm_get_passive_dwell(band); params->dwell[band].active = iwl_mvm_get_active_dwell(band, n_ssids); } From a32452366b7250c42e96a18ffc3ad8db9e0ca3c2 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 16 Apr 2014 09:01:03 +0200 Subject: [PATCH 003/116] vti4: Don't count header length twice. We currently count the size of LL_MAX_HEADER and struct iphdr twice for vti4 devices, this leads to a wrong device mtu. The size of LL_MAX_HEADER and struct iphdr is already counted in ip_tunnel_bind_dev(), so don't do it again in vti_tunnel_init(). Fixes: b9959fd3 ("vti: switch to new ip tunnel code") Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 687ddef4e574..cd62596e9a87 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -349,7 +349,6 @@ static int vti_tunnel_init(struct net_device *dev) memcpy(dev->broadcast, &iph->daddr, 4); dev->type = ARPHRD_TUNNEL; - dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); dev->mtu = ETH_DATA_LEN; dev->flags = IFF_NOARP; dev->iflink = 0; From 062f1d6de01df545ca0df366c6133f0fa164bff6 Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Tue, 22 Apr 2014 18:19:25 +0800 Subject: [PATCH 004/116] mac80211: avoid handling of SMPS for mesh The patch "mac80211: implement SMPS for AP" has caused kernel oops at mesh STA if the peer mesh STA operates in sleep mode and then becomes active mode. It can be easily reproduced by setting the following commands at peer mesh STA: iw mesh0 station set aa:bb:cc:dd:ee:ff mesh_power_mode deep iw mesh0 station set aa:bb:cc:dd:ee:ff mesh_power_mode active Kernel oops will happen at mesh STA aa:bb:cc:dd:ee:ff. Fix this by avoiding SMPS for mesh mode. Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 137a192e64bc..847d92f6bef6 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1148,7 +1148,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) atomic_dec(&ps->num_sta_ps); /* This station just woke up and isn't aware of our SMPS state */ - if (!ieee80211_smps_is_restrictive(sta->known_smps_mode, + if (!ieee80211_vif_is_mesh(&sdata->vif) && + !ieee80211_smps_is_restrictive(sta->known_smps_mode, sdata->smps_mode) && sta->known_smps_mode != sdata->bss->req_smps && sta_info_tx_streams(sta) != 1) { From 311ab8e1c28659e16a6fe14a68929c3ced4a5757 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Wed, 16 Apr 2014 08:32:41 -0400 Subject: [PATCH 005/116] mac80211: fixup radiotap tx flags for RTS/CTS When using RTS/CTS, the CTS-to-Self bit in radiotap TX flags is getting set instead of the RTS bit. Set the correct one. Reported-by: Larry Maxwell Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- net/mac80211/status.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 00ba90b02ab2..60cb7a665976 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -314,10 +314,9 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, !is_multicast_ether_addr(hdr->addr1)) txflags |= IEEE80211_RADIOTAP_F_TX_FAIL; - if ((info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) || - (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)) + if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) txflags |= IEEE80211_RADIOTAP_F_TX_CTS; - else if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) + if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) txflags |= IEEE80211_RADIOTAP_F_TX_RTS; put_unaligned_le16(txflags, pos); From 61622cc6f29034d0479f7ac16f3d48f1eeabf3a1 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 29 Apr 2014 07:50:44 +0200 Subject: [PATCH 006/116] xfrm4: Properly handle unsupported protocols We don't catch the case if an unsupported protocol is submitted to the xfrm4 protocol handlers, this can lead to NULL pointer dereferences. Fix this by adding the appropriate checks. Fixes: 3328715e ("xfrm4: Add IPsec protocol multiplexer") Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_protocol.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c index 7f7b243e8139..a2ce0101eaac 100644 --- a/net/ipv4/xfrm4_protocol.c +++ b/net/ipv4/xfrm4_protocol.c @@ -50,8 +50,12 @@ int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err) { int ret; struct xfrm4_protocol *handler; + struct xfrm4_protocol __rcu **head = proto_handlers(protocol); - for_each_protocol_rcu(*proto_handlers(protocol), handler) + if (!head) + return 0; + + for_each_protocol_rcu(*head, handler) if ((ret = handler->cb_handler(skb, err)) <= 0) return ret; @@ -64,15 +68,20 @@ int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, { int ret; struct xfrm4_protocol *handler; + struct xfrm4_protocol __rcu **head = proto_handlers(nexthdr); XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; XFRM_SPI_SKB_CB(skb)->family = AF_INET; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); - for_each_protocol_rcu(*proto_handlers(nexthdr), handler) + if (!head) + goto out; + + for_each_protocol_rcu(*head, handler) if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL) return ret; +out: icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); kfree_skb(skb); @@ -208,6 +217,9 @@ int xfrm4_protocol_register(struct xfrm4_protocol *handler, int ret = -EEXIST; int priority = handler->priority; + if (!proto_handlers(protocol) || !netproto(protocol)) + return -EINVAL; + mutex_lock(&xfrm4_protocol_mutex); if (!rcu_dereference_protected(*proto_handlers(protocol), @@ -250,6 +262,9 @@ int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, struct xfrm4_protocol *t; int ret = -ENOENT; + if (!proto_handlers(protocol) || !netproto(protocol)) + return -EINVAL; + mutex_lock(&xfrm4_protocol_mutex); for (pprev = proto_handlers(protocol); From fe337ac2839521b360f828b3ebd992d597b1ad16 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 28 Apr 2014 21:07:31 +0200 Subject: [PATCH 007/116] netfilter: ctnetlink: don't add null bindings if no nat requested commit 0eba801b64cc8284d9024c7ece30415a2b981a72 tried to fix a race where nat initialisation can happen after ctnetlink-created conntrack has been created. However, it causes the nat module(s) to be loaded needlessly on systems that are not using NAT. Fortunately, we do not have to create null bindings in that case. conntracks injected via ctnetlink always have the CONFIRMED bit set, which prevents addition of the nat extension in nf_nat_ipv4/6_fn(). We only need to make sure that either no nat extension is added or that we've created both src and dst manips. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ccc46fa5edbc..58579634427d 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1336,6 +1336,9 @@ ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[]) #ifdef CONFIG_NF_NAT_NEEDED int ret; + if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC]) + return 0; + ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_DST, cda[CTA_NAT_DST]); if (ret < 0) From 895162b1101b3ea5db08ca6822ae9672717efec0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 2 May 2014 15:32:16 +0200 Subject: [PATCH 008/116] netfilter: ipv4: defrag: set local_df flag on defragmented skb else we may fail to forward skb even if original fragments do fit outgoing link mtu: 1. remote sends 2k packets in two 1000 byte frags, DF set 2. we want to forward but only see '2k > mtu and DF set' 3. we then send icmp error saying that outgoing link is 1500 But original sender never sent a packet that would not fit the outgoing link. Setting local_df makes outgoing path test size vs. IPCB(skb)->frag_max_size, so we will still send the correct error in case the largest original size did not fit outgoing link mtu. Reported-by: Maxime Bizon Suggested-by: Maxime Bizon Fixes: 5f2d04f1f9 (ipv4: fix path MTU discovery with connection tracking) Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_defrag_ipv4.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 12e13bd82b5b..f40f321b41fc 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -22,7 +22,6 @@ #endif #include -/* Returns new sk_buff, or NULL */ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) { int err; @@ -33,8 +32,10 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) err = ip_defrag(skb, user); local_bh_enable(); - if (!err) + if (!err) { ip_send_check(ip_hdr(skb)); + skb->local_df = 1; + } return err; } From ecd15dd7e45f3683fa8142b9f2c015dfaa0c243d Mon Sep 17 00:00:00 2001 From: Denys Fedoryshchenko Date: Sun, 4 May 2014 13:35:37 +0200 Subject: [PATCH 009/116] netfilter: nfnetlink: Fix use after free when it fails to process batch This bug manifests when calling the nft command line tool without nf_tables kernel support. kernel message: [ 44.071555] Netfilter messages via NETLINK v0.30. [ 44.072253] BUG: unable to handle kernel NULL pointer dereference at 0000000000000119 [ 44.072264] IP: [] netlink_getsockbyportid+0xf/0x70 [ 44.072272] PGD 7f2b74067 PUD 7f2b73067 PMD 0 [ 44.072277] Oops: 0000 [#1] SMP [...] [ 44.072369] Call Trace: [ 44.072373] [] netlink_unicast+0x91/0x200 [ 44.072377] [] netlink_ack+0x99/0x110 [ 44.072381] [] nfnetlink_rcv+0x3c1/0x408 [nfnetlink] [ 44.072385] [] netlink_unicast+0xf3/0x200 [ 44.072389] [] netlink_sendmsg+0x2ff/0x740 [ 44.072394] [] ? __mmdrop+0x62/0x90 [ 44.072398] [] sock_sendmsg+0x8b/0xc0 [ 44.072403] [] ? copy_user_enhanced_fast_string+0x5/0x10 [ 44.072406] [] ? move_addr_to_kernel+0x2c/0x50 [ 44.072410] [] ___sys_sendmsg+0x3c3/0x3d0 [ 44.072415] [] ? handle_mm_fault+0xa9a/0xc60 [ 44.072420] [] ? mmap_region+0x166/0x5a0 [ 44.072424] [] ? __do_page_fault+0x1dc/0x510 [ 44.072428] [] ? apparmor_capable+0x1c/0x60 [ 44.072435] [] ? _raw_spin_unlock_bh+0x1a/0x20 [ 44.072439] [] ? release_sock+0x106/0x150 [ 44.072443] [] __sys_sendmsg+0x42/0x80 [ 44.072446] [] SyS_sendmsg+0x12/0x20 [ 44.072450] [] system_call_fastpath+0x1a/0x1f Signed-off-by: Denys Fedoryshchenko Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index e009087620e3..23ef77c60fff 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -256,15 +256,15 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, #endif { nfnl_unlock(subsys_id); - kfree_skb(nskb); - return netlink_ack(skb, nlh, -EOPNOTSUPP); + netlink_ack(skb, nlh, -EOPNOTSUPP); + return kfree_skb(nskb); } } if (!ss->commit || !ss->abort) { nfnl_unlock(subsys_id); - kfree_skb(nskb); - return netlink_ack(skb, nlh, -EOPNOTSUPP); + netlink_ack(skb, nlh, -EOPNOTSUPP); + return kfree_skb(skb); } while (skb->len >= nlmsg_total_size(0)) { From f4ebddf9abb1fb0add1ce8e9c0bc414f1c17d81d Mon Sep 17 00:00:00 2001 From: Henning Rogge Date: Thu, 1 May 2014 10:03:46 +0200 Subject: [PATCH 010/116] mac80211: Fix mac80211 station info rx bitrate for IBSS mode Filter out incoming multicast packages before applying their bitrate to the rx bitrate station info field to prevent them from setting the rx bitrate to the basic multicast rate. Signed-off-by: Henning Rogge Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 216c45b949e5..2b608b2b70ec 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1231,7 +1231,8 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) if (ether_addr_equal(bssid, rx->sdata->u.ibss.bssid) && test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { sta->last_rx = jiffies; - if (ieee80211_is_data(hdr->frame_control)) { + if (ieee80211_is_data(hdr->frame_control) && + !is_multicast_ether_addr(hdr->addr1)) { sta->last_rx_rate_idx = status->rate_idx; sta->last_rx_rate_flag = status->flag; sta->last_rx_rate_vht_flag = status->vht_flag; From c1fbb258846dfc425507a093922d2d001e54c3ea Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Wed, 30 Apr 2014 15:58:13 +0300 Subject: [PATCH 011/116] cfg80211: free sme on connection failures cfg80211 is notified about connection failures by __cfg80211_connect_result() call. However, this function currently does not free cfg80211 sme. This results in hanging connection attempts in some cases e.g. when mac80211 authentication attempt is denied, we have this function call: ieee80211_rx_mgmt_auth() -> cfg80211_rx_mlme_mgmt() -> cfg80211_process_auth() -> cfg80211_sme_rx_auth() -> __cfg80211_connect_result() but cfg80211_sme_free() is never get called. Fixes: ceca7b712 ("cfg80211: separate internal SME implementation") Cc: stable@vger.kernel.org (3.10+) Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg --- net/wireless/sme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index acdcb4a81817..3546a77033de 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -234,7 +234,6 @@ void cfg80211_conn_work(struct work_struct *work) NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); - cfg80211_sme_free(wdev); } wdev_unlock(wdev); } @@ -648,6 +647,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, cfg80211_unhold_bss(bss_from_pub(bss)); cfg80211_put_bss(wdev->wiphy, bss); } + cfg80211_sme_free(wdev); return; } From 792e6aa7a15ea0fb16f8687e93caede1ea9118c7 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Wed, 30 Apr 2014 16:14:23 +0300 Subject: [PATCH 012/116] cfg80211: add cfg80211_sched_scan_stopped_rtnl Add locked-version for cfg80211_sched_scan_stopped. This is used for some users that might want to call it when rtnl is already locked. Fixes: d43c6b6 ("mac80211: reschedule sched scan after HW restart") Cc: stable@vger.kernel.org (3.14+) Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 12 ++++++++++++ net/wireless/scan.c | 12 ++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f3539a15c411..f856e5a746fa 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3668,6 +3668,18 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy); */ void cfg80211_sched_scan_stopped(struct wiphy *wiphy); +/** + * cfg80211_sched_scan_stopped_rtnl - notify that the scheduled scan has stopped + * + * @wiphy: the wiphy on which the scheduled scan stopped + * + * The driver can call this function to inform cfg80211 that the + * scheduled scan had to be stopped, for whatever reason. The driver + * is then called back via the sched_scan_stop operation when done. + * This function should be called with rtnl locked. + */ +void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy); + /** * cfg80211_inform_bss_width_frame - inform cfg80211 of a received BSS frame * diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 7d09a712cb1f..88f108edfb58 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -284,14 +284,22 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy) } EXPORT_SYMBOL(cfg80211_sched_scan_results); -void cfg80211_sched_scan_stopped(struct wiphy *wiphy) +void cfg80211_sched_scan_stopped_rtnl(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + ASSERT_RTNL(); + trace_cfg80211_sched_scan_stopped(wiphy); - rtnl_lock(); __cfg80211_stop_sched_scan(rdev, true); +} +EXPORT_SYMBOL(cfg80211_sched_scan_stopped_rtnl); + +void cfg80211_sched_scan_stopped(struct wiphy *wiphy) +{ + rtnl_lock(); + cfg80211_sched_scan_stopped_rtnl(wiphy); rtnl_unlock(); } EXPORT_SYMBOL(cfg80211_sched_scan_stopped); From e669ba2d06c6195662601956454ac959892f0762 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Wed, 30 Apr 2014 16:14:24 +0300 Subject: [PATCH 013/116] mac80211: fix nested rtnl locking on ieee80211_reconfig ieee80211_reconfig already holds rtnl, so calling cfg80211_sched_scan_stopped results in deadlock. Use the rtnl-version of this function instead. Fixes: d43c6b6 ("mac80211: reschedule sched scan after HW restart") Cc: stable@vger.kernel.org (3.14+) Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg --- net/mac80211/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 275c94f995f7..3c365837e910 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1780,7 +1780,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) mutex_unlock(&local->mtx); if (sched_scan_stopped) - cfg80211_sched_scan_stopped(local->hw.wiphy); + cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy); /* * If this is for hw restart things are still running. From 7c3d5ab1f35f5475b1a1fbe74143683cfc092d33 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 3 May 2014 03:14:04 +0400 Subject: [PATCH 014/116] ipv4: fix "conntrack zones" support for defrag user check in ip_expire Defrag user check in ip_expire was not updated after adding support for "conntrack zones". This bug manifests as a RFC violation, since the router will send the icmp time exceeeded message when using conntrack zones. Signed-off-by: Vasily Averin Signed-off-by: Pablo Neira Ayuso --- net/ipv4/ip_fragment.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index c10a3ce5cbff..ed32313e307c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -232,8 +232,9 @@ static void ip_expire(unsigned long arg) * "Fragment Reassembly Timeout" message, per RFC792. */ if (qp->user == IP_DEFRAG_AF_PACKET || - (qp->user == IP_DEFRAG_CONNTRACK_IN && - skb_rtable(head)->rt_type != RTN_LOCAL)) + ((qp->user >= IP_DEFRAG_CONNTRACK_IN) && + (qp->user <= __IP_DEFRAG_CONNTRACK_IN_END) && + (skb_rtable(head)->rt_type != RTN_LOCAL))) goto out_rcu_unlock; From aff09ce303f83bd370772349238482ae422a2341 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 5 May 2014 00:17:48 +0400 Subject: [PATCH 015/116] bridge: superfluous skb->nfct check in br_nf_dev_queue_xmit Currently bridge can silently drop ipv4 fragments. If node have loaded nf_defrag_ipv4 module but have no nf_conntrack_ipv4, br_nf_pre_routing defragments incoming ipv4 fragments but nfct check in br_nf_dev_queue_xmit does not allow re-fragment combined packet back, and therefore it is dropped in br_dev_queue_push_xmit without incrementing of any failcounters It seems the only way to hit the ip_fragment code in the bridge xmit path is to have a fragment list whose reassembled fragments go over the mtu. This only happens if nf_defrag is enabled. Thanks to Florian Westphal for providing feedback to clarify this. Defragmentation ipv4 is required not only in conntracks but at least in TPROXY target and socket match, therefore #ifdef is changed from NF_CONNTRACK_IPV4 to NF_DEFRAG_IPV4 Signed-off-by: Vasily Averin Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 80e1b0f60a30..2acf7fa1fec6 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -859,12 +859,12 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, return NF_STOLEN; } -#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV4) +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) static int br_nf_dev_queue_xmit(struct sk_buff *skb) { int ret; - if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) && + if (skb->protocol == htons(ETH_P_IP) && skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && !skb_is_gso(skb)) { if (br_parse_ip_options(skb)) From edb666f07e539d92f63284213d72083ed8ac05ea Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 29 Apr 2014 08:23:03 +0200 Subject: [PATCH 016/116] xfrm6: Properly handle unsupported protocols We don't catch the case if an unsupported protocol is submitted to the xfrm6 protocol handlers, this can lead to NULL pointer dereferences. Fix this by adding the appropriate checks. Fixes: 7e14ea15 ("xfrm6: Add IPsec protocol multiplexer") Signed-off-by: Steffen Klassert --- net/ipv6/xfrm6_protocol.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c index 6ab989c486f7..54d13f8dbbae 100644 --- a/net/ipv6/xfrm6_protocol.c +++ b/net/ipv6/xfrm6_protocol.c @@ -50,6 +50,10 @@ int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) { int ret; struct xfrm6_protocol *handler; + struct xfrm6_protocol __rcu **head = proto_handlers(protocol); + + if (!head) + return 0; for_each_protocol_rcu(*proto_handlers(protocol), handler) if ((ret = handler->cb_handler(skb, err)) <= 0) @@ -184,10 +188,12 @@ int xfrm6_protocol_register(struct xfrm6_protocol *handler, struct xfrm6_protocol __rcu **pprev; struct xfrm6_protocol *t; bool add_netproto = false; - int ret = -EEXIST; int priority = handler->priority; + if (!proto_handlers(protocol) || !netproto(protocol)) + return -EINVAL; + mutex_lock(&xfrm6_protocol_mutex); if (!rcu_dereference_protected(*proto_handlers(protocol), @@ -230,6 +236,9 @@ int xfrm6_protocol_deregister(struct xfrm6_protocol *handler, struct xfrm6_protocol *t; int ret = -ENOENT; + if (!proto_handlers(protocol) || !netproto(protocol)) + return -EINVAL; + mutex_lock(&xfrm6_protocol_mutex); for (pprev = proto_handlers(protocol); From 0fed2bcf17d2a049c079ffd2b7bc4caca4b7f906 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 13 Apr 2014 22:55:27 +0300 Subject: [PATCH 017/116] iwlwifi: mvm: BT Coex - fix validity flags during init The commit below introduced a bug in the validity bits in init. Due to that, all the Coex mechanism stopped sending kills to the BT side. Fix that. Fixes: b9fae2d54c9f ("iwlwifi: mvm: BT Coex add support for Co-running block") Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/coex.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/coex.c b/drivers/net/wireless/iwlwifi/mvm/coex.c index fa858d548d13..0489314425cb 100644 --- a/drivers/net/wireless/iwlwifi/mvm/coex.c +++ b/drivers/net/wireless/iwlwifi/mvm/coex.c @@ -611,14 +611,14 @@ int iwl_send_bt_init_conf(struct iwl_mvm *mvm) bt_cmd->flags |= cpu_to_le32(BT_COEX_SYNC2SCO); if (IWL_MVM_BT_COEX_CORUNNING) { - bt_cmd->valid_bit_msk = cpu_to_le32(BT_VALID_CORUN_LUT_20 | - BT_VALID_CORUN_LUT_40); + bt_cmd->valid_bit_msk |= cpu_to_le32(BT_VALID_CORUN_LUT_20 | + BT_VALID_CORUN_LUT_40); bt_cmd->flags |= cpu_to_le32(BT_COEX_CORUNNING); } if (IWL_MVM_BT_COEX_MPLUT) { bt_cmd->flags |= cpu_to_le32(BT_COEX_MPLUT); - bt_cmd->valid_bit_msk = cpu_to_le32(BT_VALID_MULTI_PRIO_LUT); + bt_cmd->valid_bit_msk |= cpu_to_le32(BT_VALID_MULTI_PRIO_LUT); } if (mvm->cfg->bt_shared_single_ant) From 8e96440e8d42da0dab02cae125ea59e2b64c77fe Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 6 May 2014 20:35:10 +0300 Subject: [PATCH 018/116] iwlwifi: mvm: rs - s/CPTCFG/CONFIG My bad - I forgot to update this when sending the patch upstream. Fixes: 87d5e4155c00 ("iwlwifi: mvm: rs: reinit rs if no tx for a long time") Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/rs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/rs.c b/drivers/net/wireless/iwlwifi/mvm/rs.c index 9f52c5b3f0ec..e1c838899363 100644 --- a/drivers/net/wireless/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/iwlwifi/mvm/rs.c @@ -1010,7 +1010,7 @@ static void rs_tx_status(void *mvm_r, struct ieee80211_supported_band *sband, return; } -#ifdef CPTCFG_MAC80211_DEBUGFS +#ifdef CONFIG_MAC80211_DEBUGFS /* Disable last tx check if we are debugging with fixed rate */ if (lq_sta->dbg_fixed_rate) { IWL_DEBUG_RATE(mvm, "Fixed rate. avoid rate scaling\n"); From bd5e4744a6ca64299b57a2682c720d00a475a734 Mon Sep 17 00:00:00 2001 From: David Spinadel Date: Thu, 24 Apr 2014 13:15:29 +0300 Subject: [PATCH 019/116] iwlwifi: mvm: do no sched scan while associated Currently the FW doesn't support sched scan while associated, Prevent it. Signed-off-by: David Spinadel Reviewed-by: Johannes Berg Reviewed-by: Luciano Coelho Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 5 +++++ drivers/net/wireless/iwlwifi/mvm/mvm.h | 3 +++ drivers/net/wireless/iwlwifi/mvm/utils.c | 19 +++++++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index f0cebf12c7b8..593f723a74c4 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -1807,6 +1807,11 @@ static int iwl_mvm_mac_sched_scan_start(struct ieee80211_hw *hw, mutex_lock(&mvm->mutex); + if (iwl_mvm_is_associated(mvm)) { + ret = -EBUSY; + goto out; + } + switch (mvm->scan_status) { case IWL_MVM_SCAN_OS: IWL_DEBUG_SCAN(mvm, "Stopping previous scan for sched_scan\n"); diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h index d564233a65da..84c75a1b267e 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h @@ -1003,6 +1003,9 @@ static inline bool iwl_mvm_vif_low_latency(struct iwl_mvm_vif *mvmvif) return mvmvif->low_latency; } +/* Assoc status */ +bool iwl_mvm_is_associated(struct iwl_mvm *mvm); + /* Thermal management and CT-kill */ void iwl_mvm_tt_tx_backoff(struct iwl_mvm *mvm, u32 backoff); void iwl_mvm_tt_handler(struct iwl_mvm *mvm); diff --git a/drivers/net/wireless/iwlwifi/mvm/utils.c b/drivers/net/wireless/iwlwifi/mvm/utils.c index d619851745a1..6fdbef9696d8 100644 --- a/drivers/net/wireless/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/iwlwifi/mvm/utils.c @@ -644,3 +644,22 @@ bool iwl_mvm_low_latency(struct iwl_mvm *mvm) return result; } + +static void iwl_mvm_assoc_iter(void *_data, u8 *mac, struct ieee80211_vif *vif) +{ + bool *assoc = _data; + + if (vif->bss_conf.assoc) + *assoc = true; +} + +bool iwl_mvm_is_associated(struct iwl_mvm *mvm) +{ + bool assoc = false; + + ieee80211_iterate_active_interfaces_atomic( + mvm->hw, IEEE80211_IFACE_ITER_NORMAL, + iwl_mvm_assoc_iter, &assoc); + + return assoc; +} From 4f4178f3bb1f470d7fb863ec531e08e20a0fd51c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Sat, 3 May 2014 16:12:47 +0200 Subject: [PATCH 020/116] net: cdc_mbim: __vlan_find_dev_deep need rcu_read_lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes this warning introduced by commit 5b8f15f78e6f ("net: cdc_mbim: handle IPv6 Neigbor Solicitations"): =============================== [ INFO: suspicious RCU usage. ] 3.15.0-rc3 #213 Tainted: G W O ------------------------------- net/8021q/vlan_core.c:69 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 1 no locks held by ksoftirqd/0/3. stack backtrace: CPU: 0 PID: 3 Comm: ksoftirqd/0 Tainted: G W O 3.15.0-rc3 #213 Hardware name: LENOVO 2776LEG/2776LEG, BIOS 6EET55WW (3.15 ) 12/19/2011 0000000000000001 ffff880232533bf0 ffffffff813a5ee6 0000000000000006 ffff880232530090 ffff880232533c20 ffffffff81076b94 0000000000000081 0000000000000000 ffff8802085ac000 ffff88007fc8ea00 ffff880232533c50 Call Trace: [] dump_stack+0x4e/0x68 [] lockdep_rcu_suspicious+0xfa/0x103 [] __vlan_find_dev_deep+0x54/0x94 [] cdc_mbim_rx_fixup+0x379/0x66a [cdc_mbim] [] ? _raw_spin_unlock_irqrestore+0x3a/0x49 [] ? trace_hardirqs_on_caller+0x192/0x1a1 [] usbnet_bh+0x59/0x287 [usbnet] [] tasklet_action+0xbb/0xcd [] __do_softirq+0x14c/0x30d [] run_ksoftirqd+0x1f/0x50 [] smpboot_thread_fn+0x172/0x18e [] ? SyS_setgroups+0xdf/0xdf [] kthread+0xb5/0xbd [] ? __wait_for_common+0x13b/0x170 [] ? __kthread_parkme+0x5c/0x5c [] ret_from_fork+0x7c/0xb0 [] ? __kthread_parkme+0x5c/0x5c Fixes: 5b8f15f78e6f ("net: cdc_mbim: handle IPv6 Neigbor Solicitations") Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_mbim.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index c9f3281506af..13f7705fd679 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -204,17 +204,23 @@ static void do_neigh_solicit(struct usbnet *dev, u8 *buf, u16 tci) return; /* need to send the NA on the VLAN dev, if any */ - if (tci) + rcu_read_lock(); + if (tci) { netdev = __vlan_find_dev_deep(dev->net, htons(ETH_P_8021Q), tci); - else + if (!netdev) { + rcu_read_unlock(); + return; + } + } else { netdev = dev->net; - if (!netdev) - return; + } + dev_hold(netdev); + rcu_read_unlock(); in6_dev = in6_dev_get(netdev); if (!in6_dev) - return; + goto out; is_router = !!in6_dev->cnf.forwarding; in6_dev_put(in6_dev); @@ -224,6 +230,8 @@ static void do_neigh_solicit(struct usbnet *dev, u8 *buf, u16 tci) true /* solicited */, false /* override */, true /* inc_opt */); +out: + dev_put(netdev); } static bool is_neigh_solicit(u8 *buf, size_t len) From ca6c5d4ad216d5942ae544bbf02503041bd802aa Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 4 May 2014 23:24:31 +0200 Subject: [PATCH 021/116] net: ipv4: ip_forward: fix inverted local_df test local_df means 'ignore DF bit if set', so if its set we're allowed to perform ip fragmentation. This wasn't noticed earlier because the output path also drops such skbs (and emits needed icmp error) and because netfilter ip defrag did not set local_df until couple of days ago. Only difference is that DF-packets-larger-than MTU now discarded earlier (f.e. we avoid pointless netfilter postrouting trip). While at it, drop the repeated test ip_exceeds_mtu, checking it once is enough... Fixes: fe6cc55f3a9 ("net: ip, ipv6: handle gso skbs in forwarding path") Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/ip_forward.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index be8abe73bb9f..c29ae8371e44 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -42,12 +42,12 @@ static bool ip_may_fragment(const struct sk_buff *skb) { return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) || - !skb->local_df; + skb->local_df; } static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) { - if (skb->len <= mtu || skb->local_df) + if (skb->len <= mtu) return false; if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) From 418a31561d594a2b636c1e2fa94ecd9e1245abb1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 5 May 2014 00:03:34 +0200 Subject: [PATCH 022/116] net: ipv6: send pkttoobig immediately if orig frag size > mtu If conntrack defragments incoming ipv6 frags it stores largest original frag size in ip6cb and sets ->local_df. We must thus first test the largest original frag size vs. mtu, and not vice versa. Without this patch PKTTOOBIG is still generated in ip6_fragment() later in the stack, but 1) IPSTATS_MIB_INTOOBIGERRORS won't increment 2) packet did (needlessly) traverse netfilter postrouting hook. Fixes: fe6cc55f3a9 ("net: ip, ipv6: handle gso skbs in forwarding path") Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 40e7581374f7..31a38bde69ef 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -344,12 +344,16 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) { - if (skb->len <= mtu || skb->local_df) + if (skb->len <= mtu) return false; + /* ipv6 conntrack defrag sets max_frag_size + local_df */ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) return true; + if (skb->local_df) + return false; + if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) return false; From c7ba65d7b64984ff371cb5630b36af23506c50d5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 5 May 2014 15:00:43 +0200 Subject: [PATCH 023/116] net: ip: push gso skb forwarding handling down the stack Doing the segmentation in the forward path has one major drawback: When using virtio, we may process gso udp packets coming from host network stack. In that case, netfilter POSTROUTING will see one packet with udp header followed by multiple ip fragments. Delay the segmentation and do it after POSTROUTING invocation to avoid this. Fixes: fe6cc55f3a9 ("net: ip, ipv6: handle gso skbs in forwarding path") Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/ip_forward.c | 50 ------------------------------------------ net/ipv4/ip_output.c | 51 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 53 deletions(-) diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index c29ae8371e44..6f111e48e11c 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -56,53 +56,6 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) return true; } -static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb) -{ - unsigned int mtu; - - if (skb->local_df || !skb_is_gso(skb)) - return false; - - mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true); - - /* if seglen > mtu, do software segmentation for IP fragmentation on - * output. DF bit cannot be set since ip_forward would have sent - * icmp error. - */ - return skb_gso_network_seglen(skb) > mtu; -} - -/* called if GSO skb needs to be fragmented on forward */ -static int ip_forward_finish_gso(struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - netdev_features_t features; - struct sk_buff *segs; - int ret = 0; - - features = netif_skb_dev_features(skb, dst->dev); - segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); - if (IS_ERR(segs)) { - kfree_skb(skb); - return -ENOMEM; - } - - consume_skb(skb); - - do { - struct sk_buff *nskb = segs->next; - int err; - - segs->next = NULL; - err = dst_output(segs); - - if (err && ret == 0) - ret = err; - segs = nskb; - } while (segs); - - return ret; -} static int ip_forward_finish(struct sk_buff *skb) { @@ -114,9 +67,6 @@ static int ip_forward_finish(struct sk_buff *skb) if (unlikely(opt->optlen)) ip_forward_options(skb); - if (ip_gso_exceeds_dst_mtu(skb)) - return ip_forward_finish_gso(skb); - return dst_output(skb); } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 1cbeba5edff9..a52f50187b54 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -211,6 +211,48 @@ static inline int ip_finish_output2(struct sk_buff *skb) return -EINVAL; } +static int ip_finish_output_gso(struct sk_buff *skb) +{ + netdev_features_t features; + struct sk_buff *segs; + int ret = 0; + + /* common case: locally created skb or seglen is <= mtu */ + if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) || + skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb)) + return ip_finish_output2(skb); + + /* Slowpath - GSO segment length is exceeding the dst MTU. + * + * This can happen in two cases: + * 1) TCP GRO packet, DF bit not set + * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly + * from host network stack. + */ + features = netif_skb_features(skb); + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + if (IS_ERR(segs)) { + kfree_skb(skb); + return -ENOMEM; + } + + consume_skb(skb); + + do { + struct sk_buff *nskb = segs->next; + int err; + + segs->next = NULL; + err = ip_fragment(segs, ip_finish_output2); + + if (err && ret == 0) + ret = err; + segs = nskb; + } while (segs); + + return ret; +} + static int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) @@ -220,10 +262,13 @@ static int ip_finish_output(struct sk_buff *skb) return dst_output(skb); } #endif - if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb)) + if (skb_is_gso(skb)) + return ip_finish_output_gso(skb); + + if (skb->len > ip_skb_dst_mtu(skb)) return ip_fragment(skb, ip_finish_output2); - else - return ip_finish_output2(skb); + + return ip_finish_output2(skb); } int ip_mc_output(struct sock *sk, struct sk_buff *skb) From c1e756bfcbcac838a86a23f3e4501b556a961e3c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 5 May 2014 15:00:44 +0200 Subject: [PATCH 024/116] Revert "net: core: introduce netif_skb_dev_features" This reverts commit d206940319c41df4299db75ed56142177bb2e5f6, there are no more callers. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +------ net/core/dev.c | 22 ++++++++++------------ 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7ed3a3aa6604..20e99efb1ca6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3180,12 +3180,7 @@ void netdev_change_features(struct net_device *dev); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); -netdev_features_t netif_skb_dev_features(struct sk_buff *skb, - const struct net_device *dev); -static inline netdev_features_t netif_skb_features(struct sk_buff *skb) -{ - return netif_skb_dev_features(skb, skb->dev); -} +netdev_features_t netif_skb_features(struct sk_buff *skb); static inline bool net_gso_ok(netdev_features_t features, int gso_type) { diff --git a/net/core/dev.c b/net/core/dev.c index d2c8a06b3a98..c619b8641337 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2418,7 +2418,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault); * 2. No high memory really exists on this machine. */ -static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb) +static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_HIGHMEM int i; @@ -2493,38 +2493,36 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) } static netdev_features_t harmonize_features(struct sk_buff *skb, - const struct net_device *dev, - netdev_features_t features) + netdev_features_t features) { int tmp; if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) { features &= ~NETIF_F_ALL_CSUM; - } else if (illegal_highdma(dev, skb)) { + } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } return features; } -netdev_features_t netif_skb_dev_features(struct sk_buff *skb, - const struct net_device *dev) +netdev_features_t netif_skb_features(struct sk_buff *skb) { __be16 protocol = skb->protocol; - netdev_features_t features = dev->features; + netdev_features_t features = skb->dev->features; - if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs) + if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) features &= ~NETIF_F_GSO_MASK; if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } else if (!vlan_tx_tag_present(skb)) { - return harmonize_features(skb, dev, features); + return harmonize_features(skb, features); } - features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | + features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) @@ -2532,9 +2530,9 @@ netdev_features_t netif_skb_dev_features(struct sk_buff *skb, NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; - return harmonize_features(skb, dev, features); + return harmonize_features(skb, features); } -EXPORT_SYMBOL(netif_skb_dev_features); +EXPORT_SYMBOL(netif_skb_features); int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) From 76a691d0ab71a244f7582a5b0387728befbdb52f Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 5 May 2014 14:51:47 -0400 Subject: [PATCH 025/116] jme: Fix DMA unmap warning The jme driver forgot to check the return status from pci_map_page in its tx path, causing a dma api warning on unmap. Easy fix, just do the check and augment the tx path to tell the stack that the driver is busy so we re-queue the frame. Signed-off-by: Neil Horman CC: Guo-Fu Tseng CC: "David S. Miller" Signed-off-by: David S. Miller --- drivers/net/ethernet/jme.c | 53 +++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index b0c6050479eb..6e664d9038d6 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -1988,7 +1988,7 @@ jme_alloc_txdesc(struct jme_adapter *jme, return idx; } -static void +static int jme_fill_tx_map(struct pci_dev *pdev, struct txdesc *txdesc, struct jme_buffer_info *txbi, @@ -2005,6 +2005,9 @@ jme_fill_tx_map(struct pci_dev *pdev, len, PCI_DMA_TODEVICE); + if (unlikely(pci_dma_mapping_error(pdev, dmaaddr))) + return -EINVAL; + pci_dma_sync_single_for_device(pdev, dmaaddr, len, @@ -2021,9 +2024,30 @@ jme_fill_tx_map(struct pci_dev *pdev, txbi->mapping = dmaaddr; txbi->len = len; + return 0; } -static void +static void jme_drop_tx_map(struct jme_adapter *jme, int startidx, int endidx) +{ + struct jme_ring *txring = &(jme->txring[0]); + struct jme_buffer_info *txbi = txring->bufinf, *ctxbi; + int mask = jme->tx_ring_mask; + int j; + + for (j = startidx ; j < endidx ; ++j) { + ctxbi = txbi + ((startidx + j + 2) & (mask)); + pci_unmap_page(jme->pdev, + ctxbi->mapping, + ctxbi->len, + PCI_DMA_TODEVICE); + + ctxbi->mapping = 0; + ctxbi->len = 0; + } + +} + +static int jme_map_tx_skb(struct jme_adapter *jme, struct sk_buff *skb, int idx) { struct jme_ring *txring = &(jme->txring[0]); @@ -2034,25 +2058,37 @@ jme_map_tx_skb(struct jme_adapter *jme, struct sk_buff *skb, int idx) int mask = jme->tx_ring_mask; const struct skb_frag_struct *frag; u32 len; + int ret = 0; for (i = 0 ; i < nr_frags ; ++i) { frag = &skb_shinfo(skb)->frags[i]; ctxdesc = txdesc + ((idx + i + 2) & (mask)); ctxbi = txbi + ((idx + i + 2) & (mask)); - jme_fill_tx_map(jme->pdev, ctxdesc, ctxbi, + ret = jme_fill_tx_map(jme->pdev, ctxdesc, ctxbi, skb_frag_page(frag), frag->page_offset, skb_frag_size(frag), hidma); + if (ret) { + jme_drop_tx_map(jme, idx, idx+i); + goto out; + } + } len = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len; ctxdesc = txdesc + ((idx + 1) & (mask)); ctxbi = txbi + ((idx + 1) & (mask)); - jme_fill_tx_map(jme->pdev, ctxdesc, ctxbi, virt_to_page(skb->data), + ret = jme_fill_tx_map(jme->pdev, ctxdesc, ctxbi, virt_to_page(skb->data), offset_in_page(skb->data), len, hidma); + if (ret) + jme_drop_tx_map(jme, idx, idx+i); + +out: + return ret; } + static int jme_tx_tso(struct sk_buff *skb, __le16 *mss, u8 *flags) { @@ -2131,6 +2167,7 @@ jme_fill_tx_desc(struct jme_adapter *jme, struct sk_buff *skb, int idx) struct txdesc *txdesc; struct jme_buffer_info *txbi; u8 flags; + int ret = 0; txdesc = (struct txdesc *)txring->desc + idx; txbi = txring->bufinf + idx; @@ -2155,7 +2192,10 @@ jme_fill_tx_desc(struct jme_adapter *jme, struct sk_buff *skb, int idx) if (jme_tx_tso(skb, &txdesc->desc1.mss, &flags)) jme_tx_csum(jme, skb, &flags); jme_tx_vlan(skb, &txdesc->desc1.vlan, &flags); - jme_map_tx_skb(jme, skb, idx); + ret = jme_map_tx_skb(jme, skb, idx); + if (ret) + return ret; + txdesc->desc1.flags = flags; /* * Set tx buffer info after telling NIC to send @@ -2228,7 +2268,8 @@ jme_start_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_BUSY; } - jme_fill_tx_desc(jme, skb, idx); + if (jme_fill_tx_desc(jme, skb, idx)) + return NETDEV_TX_BUSY; jwrite32(jme, JME_TXCS, jme->reg_txcs | TXCS_SELECT_QUEUE0 | From 83f7a85f1134c6e914453f5747435415a23d516b Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 13 Apr 2014 16:03:11 +0300 Subject: [PATCH 026/116] iwlwifi: pcie: disable interrupts upon PCIe alloc In case RFKILL is in KILL position, the NIC will issue an interrupt straight away. This interrupt won't be sent because it is masked in the hardware. But if our interrupt service routine is called for another reason (SHARED_IRQ), then we'll look at the interrupt cause and service it. This can cause bad things if we are not ready yet. Explicitly clean the interrupt cause register to make sure we won't service anything before we are ready to. Cc: [3.14] Reported-and-tested-by: Alexander Monakov Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/pcie/trans.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index dcfd6d866d09..2365553f1ef7 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -1749,6 +1749,10 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, * PCI Tx retries from interfering with C3 CPU state */ pci_write_config_byte(pdev, PCI_CFG_RETRY_TIMEOUT, 0x00); + trans->dev = &pdev->dev; + trans_pcie->pci_dev = pdev; + iwl_disable_interrupts(trans); + err = pci_enable_msi(pdev); if (err) { dev_err(&pdev->dev, "pci_enable_msi failed(0X%x)\n", err); @@ -1760,8 +1764,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, } } - trans->dev = &pdev->dev; - trans_pcie->pci_dev = pdev; trans->hw_rev = iwl_read32(trans, CSR_HW_REV); trans->hw_id = (pdev->device << 16) + pdev->subsystem_device; snprintf(trans->hw_id_str, sizeof(trans->hw_id_str), @@ -1787,8 +1789,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, goto out_pci_disable_msi; } - trans_pcie->inta_mask = CSR_INI_SET_MASK; - if (iwl_pcie_alloc_ict(trans)) goto out_free_cmd_pool; @@ -1800,6 +1800,8 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, goto out_free_ict; } + trans_pcie->inta_mask = CSR_INI_SET_MASK; + return trans; out_free_ict: From d5d2bf3eabb34cc8eaf54db37fdc43f04267985a Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Tue, 6 May 2014 03:46:48 -0400 Subject: [PATCH 027/116] qlcnic: Fix panic while dumping TX queues on TX timeout o In case of non-multi TX queue mode driver does not initialize "crb_intr_mask" pointer and driver was accessing that un-initialized pointer while dumping TX queue. So dump "crb_intr_mask" only when it is initilaized. Signed-off-by: Manish Chopra Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 0bc914859e38..be789513774c 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -2943,9 +2943,13 @@ static void qlcnic_dump_tx_rings(struct qlcnic_adapter *adapter) tx_ring->tx_stats.xmit_called, tx_ring->tx_stats.xmit_on, tx_ring->tx_stats.xmit_off); + + if (tx_ring->crb_intr_mask) + netdev_info(netdev, "crb_intr_mask=%d\n", + readl(tx_ring->crb_intr_mask)); + netdev_info(netdev, - "crb_intr_mask=%d, hw_producer=%d, sw_producer=%d sw_consumer=%d, hw_consumer=%d\n", - readl(tx_ring->crb_intr_mask), + "hw_producer=%d, sw_producer=%d sw_consumer=%d, hw_consumer=%d\n", readl(tx_ring->crb_cmd_producer), tx_ring->producer, tx_ring->sw_consumer, le32_to_cpu(*(tx_ring->hw_consumer))); From 84d7ad2c3b8a80888d9a483388ccbd5e5f07438f Mon Sep 17 00:00:00 2001 From: Shahed Shaikh Date: Tue, 6 May 2014 03:46:49 -0400 Subject: [PATCH 028/116] qlcnic: Set real_num_{tx|rx}_queues properly Do not set netdev->real_num_tx_queues directly, let netif_set_real_num_tx_queues() take care of it. Do not overwrite netdev->num_tx_queues everytime when driver changes its Tx ring size through ethtool -L and also notify stack to update number of Rx queues. Signed-off-by: Shahed Shaikh Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 16 ------ .../net/ethernet/qlogic/qlcnic/qlcnic_main.c | 49 ++++++++++++++++++- 2 files changed, 47 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index 7b52a88923ef..f785d01c7d12 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -1719,22 +1719,6 @@ static inline u32 qlcnic_tx_avail(struct qlcnic_host_tx_ring *tx_ring) tx_ring->producer; } -static inline int qlcnic_set_real_num_queues(struct qlcnic_adapter *adapter, - struct net_device *netdev) -{ - int err; - - netdev->num_tx_queues = adapter->drv_tx_rings; - netdev->real_num_tx_queues = adapter->drv_tx_rings; - - err = netif_set_real_num_tx_queues(netdev, adapter->drv_tx_rings); - if (err) - netdev_err(netdev, "failed to set %d Tx queues\n", - adapter->drv_tx_rings); - - return err; -} - struct qlcnic_nic_template { int (*config_bridged_mode) (struct qlcnic_adapter *, u32); int (*config_led) (struct qlcnic_adapter *, u32, u32); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index be789513774c..7e55e88a81bf 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -2206,6 +2206,31 @@ static void qlcnic_82xx_set_mac_filter_count(struct qlcnic_adapter *adapter) ahw->max_uc_count = count; } +static int qlcnic_set_real_num_queues(struct qlcnic_adapter *adapter, + u8 tx_queues, u8 rx_queues) +{ + struct net_device *netdev = adapter->netdev; + int err = 0; + + if (tx_queues) { + err = netif_set_real_num_tx_queues(netdev, tx_queues); + if (err) { + netdev_err(netdev, "failed to set %d Tx queues\n", + tx_queues); + return err; + } + } + + if (rx_queues) { + err = netif_set_real_num_rx_queues(netdev, rx_queues); + if (err) + netdev_err(netdev, "failed to set %d Rx queues\n", + rx_queues); + } + + return err; +} + int qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev, int pci_using_dac) @@ -2269,7 +2294,8 @@ qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev, netdev->priv_flags |= IFF_UNICAST_FLT; netdev->irq = adapter->msix_entries[0].vector; - err = qlcnic_set_real_num_queues(adapter, netdev); + err = qlcnic_set_real_num_queues(adapter, adapter->drv_tx_rings, + adapter->drv_sds_rings); if (err) return err; @@ -3982,12 +4008,21 @@ int qlcnic_validate_rings(struct qlcnic_adapter *adapter, __u32 ring_cnt, int qlcnic_setup_rings(struct qlcnic_adapter *adapter) { struct net_device *netdev = adapter->netdev; + u8 tx_rings, rx_rings; int err; if (test_bit(__QLCNIC_RESETTING, &adapter->state)) return -EBUSY; + tx_rings = adapter->drv_tss_rings; + rx_rings = adapter->drv_rss_rings; + netif_device_detach(netdev); + + err = qlcnic_set_real_num_queues(adapter, tx_rings, rx_rings); + if (err) + goto done; + if (netif_running(netdev)) __qlcnic_down(adapter, netdev); @@ -4007,7 +4042,17 @@ int qlcnic_setup_rings(struct qlcnic_adapter *adapter) return err; } - netif_set_real_num_tx_queues(netdev, adapter->drv_tx_rings); + /* Check if we need to update real_num_{tx|rx}_queues because + * qlcnic_setup_intr() may change Tx/Rx rings size + */ + if ((tx_rings != adapter->drv_tx_rings) || + (rx_rings != adapter->drv_sds_rings)) { + err = qlcnic_set_real_num_queues(adapter, + adapter->drv_tx_rings, + adapter->drv_sds_rings); + if (err) + goto done; + } if (qlcnic_83xx_check(adapter)) { qlcnic_83xx_initialize_nic(adapter, 1); From aeefa1ecfc799b0ea2c4979617f14cecd5cccbfd Mon Sep 17 00:00:00 2001 From: Sergey Popovich Date: Tue, 6 May 2014 18:23:08 +0300 Subject: [PATCH 029/116] ipv4: fib_semantics: increment fib_info_cnt after fib_info allocation Increment fib_info_cnt in fib_create_info() right after successfuly alllocating fib_info structure, overwise fib_metrics allocation failure leads to fib_info_cnt incorrectly decremented in free_fib_info(), called on error path from fib_create_info(). Signed-off-by: Sergey Popovich Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 8a043f03c88e..b10cd43a4722 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -821,13 +821,13 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (fi == NULL) goto failure; + fib_info_cnt++; if (cfg->fc_mx) { fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); if (!fi->fib_metrics) goto failure; } else fi->fib_metrics = (u32 *) dst_default_metrics; - fib_info_cnt++; fi->fib_net = hold_net(net); fi->fib_protocol = cfg->fc_protocol; From 23a456f05353035d1a2b3f1b9a92707acdc036e0 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Tue, 6 May 2014 18:52:16 +0200 Subject: [PATCH 030/116] net: mdio: of_mdiobus_register(): fall back to mdiobus_register() for !CONFIG_OF If CONFIG_OF is not set, make of_mdiobus_register() call mdiobus_register() instead of returning -ENOSYS. This way, we can just call of_mdiobus_register() from all DT-enabled drivers to handle the compat cases. Signed-off-by: Daniel Mack Suggested-by: Florian Fainelli Acked-by: Florian Fainelli Acked-by: Mugunthan V N Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 6fe8464ed767..881a7c3571f4 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -31,7 +31,12 @@ extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np); #else /* CONFIG_OF */ static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) { - return -ENOSYS; + /* + * Fall back to the non-DT function to register a bus. + * This way, we don't have to keep compat bits around in drivers. + */ + + return mdiobus_register(mdio); } static inline struct phy_device *of_phy_find_device(struct device_node *phy_np) From a8951d5814e1373807a94f79f7ccec7041325470 Mon Sep 17 00:00:00 2001 From: Sergey Popovich Date: Thu, 8 May 2014 16:22:35 +0300 Subject: [PATCH 031/116] netfilter: Fix potential use after free in ip6_route_me_harder() Dst is released one line before we access it again with dst->error. Fixes: 58e35d147128 netfilter: ipv6: propagate routing errors from ip6_route_me_harder() Signed-off-by: Sergey Popovich Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 95f3f1da0d7f..d38e6a8d8b9f 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -30,13 +30,15 @@ int ip6_route_me_harder(struct sk_buff *skb) .daddr = iph->daddr, .saddr = iph->saddr, }; + int err; dst = ip6_route_output(net, skb->sk, &fl6); - if (dst->error) { + err = dst->error; + if (err) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); dst_release(dst); - return dst->error; + return err; } /* Drop old route. */ From c9d8f1a64225dfcc2f721d73a5984a2444920744 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 6 May 2014 11:02:49 -0700 Subject: [PATCH 032/116] ipv4: move local_port_range out of CONFIG_SYSCTL When CONFIG_SYSCTL is not set, ip_local_port_range should still work, just that no one can change it. Therefore we should move it out of sysctl_inet.c. Also, rename it to ->ip_local_ports instead. Cc: David S. Miller Cc: Francois Romieu Reported-by: Stefan de Konink Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 2 +- net/ipv4/af_inet.c | 28 ++++++++++++++++++++++++++++ net/ipv4/inet_connection_sock.c | 8 ++++---- net/ipv4/ping.c | 4 ++-- net/ipv4/sysctl_net_ipv4.c | 29 +++++++++++------------------ 5 files changed, 46 insertions(+), 25 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 80f500a29498..3d95cd475316 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -66,7 +66,7 @@ struct netns_ipv4 { int sysctl_icmp_ratemask; int sysctl_icmp_errors_use_inbound_ifaddr; - struct local_ports sysctl_local_ports; + struct local_ports ip_local_ports; int sysctl_tcp_ecn; int sysctl_ip_no_pmtu_disc; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8c54870db792..cccc8e487c7e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1650,6 +1650,31 @@ static int __init init_ipv4_mibs(void) return register_pernet_subsys(&ipv4_mib_ops); } +static __net_init int inet_init_net(struct net *net) +{ + /* + * Set defaults for local port range + */ + seqlock_init(&net->ipv4.ip_local_ports.lock); + net->ipv4.ip_local_ports.range[0] = 32768; + net->ipv4.ip_local_ports.range[1] = 61000; + return 0; +} + +static __net_exit void inet_exit_net(struct net *net) +{ +} + +static __net_initdata struct pernet_operations af_inet_ops = { + .init = inet_init_net, + .exit = inet_exit_net, +}; + +static int __init init_inet_pernet_ops(void) +{ + return register_pernet_subsys(&af_inet_ops); +} + static int ipv4_proc_init(void); /* @@ -1794,6 +1819,9 @@ static int __init inet_init(void) if (ip_mr_init()) pr_crit("%s: Cannot init ipv4 mroute\n", __func__); #endif + + if (init_inet_pernet_ops()) + pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__); /* * Initialise per-cpu ipv4 mibs */ diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0d1e2cb877ec..a56b8e6e866a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -37,11 +37,11 @@ void inet_get_local_port_range(struct net *net, int *low, int *high) unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); - *low = net->ipv4.sysctl_local_ports.range[0]; - *high = net->ipv4.sysctl_local_ports.range[1]; - } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); + *low = net->ipv4.ip_local_ports.range[0]; + *high = net->ipv4.ip_local_ports.range[1]; + } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); } EXPORT_SYMBOL(inet_get_local_port_range); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 8210964a9f19..347bdde9a585 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -240,11 +240,11 @@ static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 44eba052b43d..a116c41b05dd 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -45,10 +45,10 @@ static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; /* Update system visible IP port range */ static void set_local_port_range(struct net *net, int range[2]) { - write_seqlock(&net->ipv4.sysctl_local_ports.lock); - net->ipv4.sysctl_local_ports.range[0] = range[0]; - net->ipv4.sysctl_local_ports.range[1] = range[1]; - write_sequnlock(&net->ipv4.sysctl_local_ports.lock); + write_seqlock(&net->ipv4.ip_local_ports.lock); + net->ipv4.ip_local_ports.range[0] = range[0]; + net->ipv4.ip_local_ports.range[1] = range[1]; + write_sequnlock(&net->ipv4.ip_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -57,7 +57,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, size_t *lenp, loff_t *ppos) { struct net *net = - container_of(table->data, struct net, ipv4.sysctl_local_ports.range); + container_of(table->data, struct net, ipv4.ip_local_ports.range); int ret; int range[2]; struct ctl_table tmp = { @@ -90,11 +90,11 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low container_of(table->data, struct net, ipv4.sysctl_ping_group_range); unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.sysctl_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&net->ipv4.sysctl_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); } /* Update system visible IP port range */ @@ -103,10 +103,10 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig kgid_t *data = table->data; struct net *net = container_of(table->data, struct net, ipv4.sysctl_ping_group_range); - write_seqlock(&net->ipv4.sysctl_local_ports.lock); + write_seqlock(&net->ipv4.ip_local_ports.lock); data[0] = low; data[1] = high; - write_sequnlock(&net->ipv4.sysctl_local_ports.lock); + write_sequnlock(&net->ipv4.ip_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -819,8 +819,8 @@ static struct ctl_table ipv4_net_table[] = { }, { .procname = "ip_local_port_range", - .maxlen = sizeof(init_net.ipv4.sysctl_local_ports.range), - .data = &init_net.ipv4.sysctl_local_ports.range, + .maxlen = sizeof(init_net.ipv4.ip_local_ports.range), + .data = &init_net.ipv4.ip_local_ports.range, .mode = 0644, .proc_handler = ipv4_local_port_range, }, @@ -865,13 +865,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1); net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0); - /* - * Set defaults for local port range - */ - seqlock_init(&net->ipv4.sysctl_local_ports.lock); - net->ipv4.sysctl_local_ports.range[0] = 32768; - net->ipv4.sysctl_local_ports.range[1] = 61000; - net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); if (net->ipv4.ipv4_hdr == NULL) goto err_reg; From ba6b918ab234186d3aa1663e296586a1b526b77a Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 6 May 2014 11:02:50 -0700 Subject: [PATCH 033/116] ping: move ping_group_range out of CONFIG_SYSCTL Similarly, when CONFIG_SYSCTL is not set, ping_group_range should still work, just that no one can change it. Therefore we should move it out of sysctl_net_ipv4.c. And, it should not share the same seqlock with ip_local_port_range. BTW, rename it to ->ping_group_range instead. Cc: David S. Miller Cc: Francois Romieu Reported-by: Stefan de Konink Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 7 ++++++- net/ipv4/af_inet.c | 8 ++++++++ net/ipv4/ping.c | 6 +++--- net/ipv4/sysctl_net_ipv4.c | 13 +++---------- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 3d95cd475316..b2704fd0ec80 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -20,6 +20,11 @@ struct local_ports { int range[2]; }; +struct ping_group_range { + seqlock_t lock; + kgid_t range[2]; +}; + struct netns_ipv4 { #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; @@ -72,7 +77,7 @@ struct netns_ipv4 { int sysctl_ip_no_pmtu_disc; int sysctl_ip_fwd_use_pmtu; - kgid_t sysctl_ping_group_range[2]; + struct ping_group_range ping_group_range; atomic_t dev_addr_genid; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index cccc8e487c7e..6d6dd345bc4d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1658,6 +1658,14 @@ static __net_init int inet_init_net(struct net *net) seqlock_init(&net->ipv4.ip_local_ports.lock); net->ipv4.ip_local_ports.range[0] = 32768; net->ipv4.ip_local_ports.range[1] = 61000; + + seqlock_init(&net->ipv4.ping_group_range.lock); + /* + * Sane defaults - nobody may create ping sockets. + * Boot scripts should set this to distro-specific group. + */ + net->ipv4.ping_group_range.range[0] = make_kgid(&init_user_ns, 1); + net->ipv4.ping_group_range.range[1] = make_kgid(&init_user_ns, 0); return 0; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 347bdde9a585..044a0ddf6a79 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -236,15 +236,15 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, kgid_t *high) { - kgid_t *data = net->ipv4.sysctl_ping_group_range; + kgid_t *data = net->ipv4.ping_group_range.range; unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ping_group_range.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq)); } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a116c41b05dd..5cde8f263d40 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -87,7 +87,7 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low { kgid_t *data = table->data; struct net *net = - container_of(table->data, struct net, ipv4.sysctl_ping_group_range); + container_of(table->data, struct net, ipv4.ping_group_range.range); unsigned int seq; do { seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); @@ -102,7 +102,7 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig { kgid_t *data = table->data; struct net *net = - container_of(table->data, struct net, ipv4.sysctl_ping_group_range); + container_of(table->data, struct net, ipv4.ping_group_range.range); write_seqlock(&net->ipv4.ip_local_ports.lock); data[0] = low; data[1] = high; @@ -805,7 +805,7 @@ static struct ctl_table ipv4_net_table[] = { }, { .procname = "ping_group_range", - .data = &init_net.ipv4.sysctl_ping_group_range, + .data = &init_net.ipv4.ping_group_range.range, .maxlen = sizeof(gid_t)*2, .mode = 0644, .proc_handler = ipv4_ping_group_range, @@ -858,13 +858,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) table[i].data += (void *)net - (void *)&init_net; } - /* - * Sane defaults - nobody may create ping sockets. - * Boot scripts should set this to distro-specific group. - */ - net->ipv4.sysctl_ping_group_range[0] = make_kgid(&init_user_ns, 1); - net->ipv4.sysctl_ping_group_range[1] = make_kgid(&init_user_ns, 0); - net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); if (net->ipv4.ipv4_hdr == NULL) goto err_reg; From 6af55ff52b02d492d45db88df3e461fa51a6f753 Mon Sep 17 00:00:00 2001 From: Darek Marcinkiewicz Date: Tue, 6 May 2014 22:24:50 +0200 Subject: [PATCH 034/116] Driver for Beckhoff CX5020 EtherCAT master module. This driver adds support for EtherCAT master module located on CCAT FPGA found on Beckhoff CX series industrial PCs. The driver exposes EtherCAT master as an ethernet interface. EtherCAT is a fieldbus protocol defined on top of ethernet and Beckhoff CX5020 PCs come with built-in EtherCAT master module located on a FPGA, which in turn is connected to a PCI bus. Signed-off-by: Dariusz Marcinkiewicz Signed-off-by: David S. Miller --- drivers/net/ethernet/Kconfig | 12 + drivers/net/ethernet/Makefile | 1 + drivers/net/ethernet/ec_bhf.c | 706 ++++++++++++++++++++++++++++++++++ 3 files changed, 719 insertions(+) create mode 100644 drivers/net/ethernet/ec_bhf.c diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 39b26fe28d10..d7401017a3f1 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -35,6 +35,18 @@ source "drivers/net/ethernet/calxeda/Kconfig" source "drivers/net/ethernet/chelsio/Kconfig" source "drivers/net/ethernet/cirrus/Kconfig" source "drivers/net/ethernet/cisco/Kconfig" + +config CX_ECAT + tristate "Beckhoff CX5020 EtherCAT master support" + depends on PCI + ---help--- + Driver for EtherCAT master module located on CCAT FPGA + that can be found on Beckhoff CX5020, and possibly other of CX + Beckhoff CX series industrial PCs. + + To compile this driver as a module, choose M here. The module + will be called ec_bhf. + source "drivers/net/ethernet/davicom/Kconfig" config DNET diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 545d0b3b9cb4..35190e36c456 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_NET_CALXEDA_XGMAC) += calxeda/ obj-$(CONFIG_NET_VENDOR_CHELSIO) += chelsio/ obj-$(CONFIG_NET_VENDOR_CIRRUS) += cirrus/ obj-$(CONFIG_NET_VENDOR_CISCO) += cisco/ +obj-$(CONFIG_CX_ECAT) += ec_bhf.o obj-$(CONFIG_DM9000) += davicom/ obj-$(CONFIG_DNET) += dnet.o obj-$(CONFIG_NET_VENDOR_DEC) += dec/ diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c new file mode 100644 index 000000000000..4884205e56ee --- /dev/null +++ b/drivers/net/ethernet/ec_bhf.c @@ -0,0 +1,706 @@ + /* + * drivers/net/ethernet/beckhoff/ec_bhf.c + * + * Copyright (C) 2014 Darek Marcinkiewicz + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +/* This is a driver for EtherCAT master module present on CCAT FPGA. + * Those can be found on Bechhoff CX50xx industrial PCs. + */ + +#if 0 +#define DEBUG +#endif +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#define TIMER_INTERVAL_NSEC 20000 + +#define INFO_BLOCK_SIZE 0x10 +#define INFO_BLOCK_TYPE 0x0 +#define INFO_BLOCK_REV 0x2 +#define INFO_BLOCK_BLK_CNT 0x4 +#define INFO_BLOCK_TX_CHAN 0x4 +#define INFO_BLOCK_RX_CHAN 0x5 +#define INFO_BLOCK_OFFSET 0x8 + +#define EC_MII_OFFSET 0x4 +#define EC_FIFO_OFFSET 0x8 +#define EC_MAC_OFFSET 0xc + +#define MAC_FRAME_ERR_CNT 0x0 +#define MAC_RX_ERR_CNT 0x1 +#define MAC_CRC_ERR_CNT 0x2 +#define MAC_LNK_LST_ERR_CNT 0x3 +#define MAC_TX_FRAME_CNT 0x10 +#define MAC_RX_FRAME_CNT 0x14 +#define MAC_TX_FIFO_LVL 0x20 +#define MAC_DROPPED_FRMS 0x28 +#define MAC_CONNECTED_CCAT_FLAG 0x78 + +#define MII_MAC_ADDR 0x8 +#define MII_MAC_FILT_FLAG 0xe +#define MII_LINK_STATUS 0xf + +#define FIFO_TX_REG 0x0 +#define FIFO_TX_RESET 0x8 +#define FIFO_RX_REG 0x10 +#define FIFO_RX_ADDR_VALID (1u << 31) +#define FIFO_RX_RESET 0x18 + +#define DMA_CHAN_OFFSET 0x1000 +#define DMA_CHAN_SIZE 0x8 + +#define DMA_WINDOW_SIZE_MASK 0xfffffffc + +static struct pci_device_id ids[] = { + { PCI_DEVICE(0x15ec, 0x5000), }, + { 0, } +}; +MODULE_DEVICE_TABLE(pci, ids); + +struct rx_header { +#define RXHDR_NEXT_ADDR_MASK 0xffffffu +#define RXHDR_NEXT_VALID (1u << 31) + __le32 next; +#define RXHDR_NEXT_RECV_FLAG 0x1 + __le32 recv; +#define RXHDR_LEN_MASK 0xfffu + __le16 len; + __le16 port; + __le32 reserved; + u8 timestamp[8]; +} __packed; + +#define PKT_PAYLOAD_SIZE 0x7e8 +struct rx_desc { + struct rx_header header; + u8 data[PKT_PAYLOAD_SIZE]; +} __packed; + +struct tx_header { + __le16 len; +#define TX_HDR_PORT_0 0x1 +#define TX_HDR_PORT_1 0x2 + u8 port; + u8 ts_enable; +#define TX_HDR_SENT 0x1 + __le32 sent; + u8 timestamp[8]; +} __packed; + +struct tx_desc { + struct tx_header header; + u8 data[PKT_PAYLOAD_SIZE]; +} __packed; + +#define FIFO_SIZE 64 + +static long polling_frequency = TIMER_INTERVAL_NSEC; + +struct bhf_dma { + u8 *buf; + size_t len; + dma_addr_t buf_phys; + + u8 *alloc; + size_t alloc_len; + dma_addr_t alloc_phys; +}; + +struct ec_bhf_priv { + struct net_device *net_dev; + + struct pci_dev *dev; + + void * __iomem io; + void * __iomem dma_io; + + struct hrtimer hrtimer; + + int tx_dma_chan; + int rx_dma_chan; + void * __iomem ec_io; + void * __iomem fifo_io; + void * __iomem mii_io; + void * __iomem mac_io; + + struct bhf_dma rx_buf; + struct rx_desc *rx_descs; + int rx_dnext; + int rx_dcount; + + struct bhf_dma tx_buf; + struct tx_desc *tx_descs; + int tx_dcount; + int tx_dnext; + + u64 stat_rx_bytes; + u64 stat_tx_bytes; +}; + +#define PRIV_TO_DEV(priv) (&(priv)->dev->dev) + +#define ETHERCAT_MASTER_ID 0x14 + +static void ec_bhf_print_status(struct ec_bhf_priv *priv) +{ + struct device *dev = PRIV_TO_DEV(priv); + + dev_dbg(dev, "Frame error counter: %d\n", + ioread8(priv->mac_io + MAC_FRAME_ERR_CNT)); + dev_dbg(dev, "RX error counter: %d\n", + ioread8(priv->mac_io + MAC_RX_ERR_CNT)); + dev_dbg(dev, "CRC error counter: %d\n", + ioread8(priv->mac_io + MAC_CRC_ERR_CNT)); + dev_dbg(dev, "TX frame counter: %d\n", + ioread32(priv->mac_io + MAC_TX_FRAME_CNT)); + dev_dbg(dev, "RX frame counter: %d\n", + ioread32(priv->mac_io + MAC_RX_FRAME_CNT)); + dev_dbg(dev, "TX fifo level: %d\n", + ioread8(priv->mac_io + MAC_TX_FIFO_LVL)); + dev_dbg(dev, "Dropped frames: %d\n", + ioread8(priv->mac_io + MAC_DROPPED_FRMS)); + dev_dbg(dev, "Connected with CCAT slot: %d\n", + ioread8(priv->mac_io + MAC_CONNECTED_CCAT_FLAG)); + dev_dbg(dev, "Link status: %d\n", + ioread8(priv->mii_io + MII_LINK_STATUS)); +} + +static void ec_bhf_reset(struct ec_bhf_priv *priv) +{ + iowrite8(0, priv->mac_io + MAC_FRAME_ERR_CNT); + iowrite8(0, priv->mac_io + MAC_RX_ERR_CNT); + iowrite8(0, priv->mac_io + MAC_CRC_ERR_CNT); + iowrite8(0, priv->mac_io + MAC_LNK_LST_ERR_CNT); + iowrite32(0, priv->mac_io + MAC_TX_FRAME_CNT); + iowrite32(0, priv->mac_io + MAC_RX_FRAME_CNT); + iowrite8(0, priv->mac_io + MAC_DROPPED_FRMS); + + iowrite8(0, priv->fifo_io + FIFO_TX_RESET); + iowrite8(0, priv->fifo_io + FIFO_RX_RESET); + + iowrite8(0, priv->mac_io + MAC_TX_FIFO_LVL); +} + +static void ec_bhf_send_packet(struct ec_bhf_priv *priv, struct tx_desc *desc) +{ + u32 len = le16_to_cpu(desc->header.len) + sizeof(desc->header); + u32 addr = (u8 *)desc - priv->tx_buf.buf; + + iowrite32((ALIGN(len, 8) << 24) | addr, priv->fifo_io + FIFO_TX_REG); + + dev_dbg(PRIV_TO_DEV(priv), "Done sending packet\n"); +} + +static int ec_bhf_desc_sent(struct tx_desc *desc) +{ + return le32_to_cpu(desc->header.sent) & TX_HDR_SENT; +} + +static void ec_bhf_process_tx(struct ec_bhf_priv *priv) +{ + if (unlikely(netif_queue_stopped(priv->net_dev))) { + /* Make sure that we perceive changes to tx_dnext. */ + smp_rmb(); + + if (ec_bhf_desc_sent(&priv->tx_descs[priv->tx_dnext])) + netif_wake_queue(priv->net_dev); + } +} + +static int ec_bhf_pkt_received(struct rx_desc *desc) +{ + return le32_to_cpu(desc->header.recv) & RXHDR_NEXT_RECV_FLAG; +} + +static void ec_bhf_add_rx_desc(struct ec_bhf_priv *priv, struct rx_desc *desc) +{ + iowrite32(FIFO_RX_ADDR_VALID | ((u8 *)(desc) - priv->rx_buf.buf), + priv->fifo_io + FIFO_RX_REG); +} + +static void ec_bhf_process_rx(struct ec_bhf_priv *priv) +{ + struct rx_desc *desc = &priv->rx_descs[priv->rx_dnext]; + struct device *dev = PRIV_TO_DEV(priv); + + while (ec_bhf_pkt_received(desc)) { + int pkt_size = (le16_to_cpu(desc->header.len) & + RXHDR_LEN_MASK) - sizeof(struct rx_header) - 4; + u8 *data = desc->data; + struct sk_buff *skb; + + skb = netdev_alloc_skb_ip_align(priv->net_dev, pkt_size); + dev_dbg(dev, "Received packet, size: %d\n", pkt_size); + + if (skb) { + memcpy(skb_put(skb, pkt_size), data, pkt_size); + skb->protocol = eth_type_trans(skb, priv->net_dev); + dev_dbg(dev, "Protocol type: %x\n", skb->protocol); + + priv->stat_rx_bytes += pkt_size; + + netif_rx(skb); + } else { + dev_err_ratelimited(dev, + "Couldn't allocate a skb_buff for a packet of size %u\n", + pkt_size); + } + + desc->header.recv = 0; + + ec_bhf_add_rx_desc(priv, desc); + + priv->rx_dnext = (priv->rx_dnext + 1) % priv->rx_dcount; + desc = &priv->rx_descs[priv->rx_dnext]; + } + +} + +static enum hrtimer_restart ec_bhf_timer_fun(struct hrtimer *timer) +{ + struct ec_bhf_priv *priv = container_of(timer, struct ec_bhf_priv, + hrtimer); + ec_bhf_process_rx(priv); + ec_bhf_process_tx(priv); + + if (!netif_running(priv->net_dev)) + return HRTIMER_NORESTART; + + hrtimer_forward_now(timer, ktime_set(0, polling_frequency)); + return HRTIMER_RESTART; +} + +static int ec_bhf_setup_offsets(struct ec_bhf_priv *priv) +{ + struct device *dev = PRIV_TO_DEV(priv); + unsigned block_count, i; + void * __iomem ec_info; + + dev_dbg(dev, "Info block:\n"); + dev_dbg(dev, "Type of function: %x\n", (unsigned)ioread16(priv->io)); + dev_dbg(dev, "Revision of function: %x\n", + (unsigned)ioread16(priv->io + INFO_BLOCK_REV)); + + block_count = ioread8(priv->io + INFO_BLOCK_BLK_CNT); + dev_dbg(dev, "Number of function blocks: %x\n", block_count); + + for (i = 0; i < block_count; i++) { + u16 type = ioread16(priv->io + i * INFO_BLOCK_SIZE + + INFO_BLOCK_TYPE); + if (type == ETHERCAT_MASTER_ID) + break; + } + if (i == block_count) { + dev_err(dev, "EtherCAT master with DMA block not found\n"); + return -ENODEV; + } + dev_dbg(dev, "EtherCAT master with DMA block found at pos: %d\n", i); + + ec_info = priv->io + i * INFO_BLOCK_SIZE; + dev_dbg(dev, "EtherCAT master revision: %d\n", + ioread16(ec_info + INFO_BLOCK_REV)); + + priv->tx_dma_chan = ioread8(ec_info + INFO_BLOCK_TX_CHAN); + dev_dbg(dev, "EtherCAT master tx dma channel: %d\n", + priv->tx_dma_chan); + + priv->rx_dma_chan = ioread8(ec_info + INFO_BLOCK_RX_CHAN); + dev_dbg(dev, "EtherCAT master rx dma channel: %d\n", + priv->rx_dma_chan); + + priv->ec_io = priv->io + ioread32(ec_info + INFO_BLOCK_OFFSET); + priv->mii_io = priv->ec_io + ioread32(priv->ec_io + EC_MII_OFFSET); + priv->fifo_io = priv->ec_io + ioread32(priv->ec_io + EC_FIFO_OFFSET); + priv->mac_io = priv->ec_io + ioread32(priv->ec_io + EC_MAC_OFFSET); + + dev_dbg(dev, + "EtherCAT block addres: %p, fifo address: %p, mii address: %p, mac address: %p\n", + priv->ec_io, priv->fifo_io, priv->mii_io, priv->mac_io); + + return 0; +} + +static netdev_tx_t ec_bhf_start_xmit(struct sk_buff *skb, + struct net_device *net_dev) +{ + struct ec_bhf_priv *priv = netdev_priv(net_dev); + struct tx_desc *desc; + unsigned len; + + dev_dbg(PRIV_TO_DEV(priv), "Starting xmit\n"); + + desc = &priv->tx_descs[priv->tx_dnext]; + + skb_copy_and_csum_dev(skb, desc->data); + len = skb->len; + + memset(&desc->header, 0, sizeof(desc->header)); + desc->header.len = cpu_to_le16(len); + desc->header.port = TX_HDR_PORT_0; + + ec_bhf_send_packet(priv, desc); + + priv->tx_dnext = (priv->tx_dnext + 1) % priv->tx_dcount; + + if (!ec_bhf_desc_sent(&priv->tx_descs[priv->tx_dnext])) { + /* Make sure that update updates to tx_dnext are perceived + * by timer routine. + */ + smp_wmb(); + + netif_stop_queue(net_dev); + + dev_dbg(PRIV_TO_DEV(priv), "Stopping netif queue\n"); + ec_bhf_print_status(priv); + } + + priv->stat_tx_bytes += len; + + dev_kfree_skb(skb); + + return NETDEV_TX_OK; +} + +static int ec_bhf_alloc_dma_mem(struct ec_bhf_priv *priv, + struct bhf_dma *buf, + int channel, + int size) +{ + int offset = channel * DMA_CHAN_SIZE + DMA_CHAN_OFFSET; + struct device *dev = PRIV_TO_DEV(priv); + u32 mask; + + iowrite32(0xffffffff, priv->dma_io + offset); + + mask = ioread32(priv->dma_io + offset); + mask &= DMA_WINDOW_SIZE_MASK; + dev_dbg(dev, "Read mask %x for channel %d\n", mask, channel); + + /* We want to allocate a chunk of memory that is: + * - aligned to the mask we just read + * - is of size 2^mask bytes (at most) + * In order to ensure that we will allocate buffer of + * 2 * 2^mask bytes. + */ + buf->len = min_t(int, ~mask + 1, size); + buf->alloc_len = 2 * buf->len; + + dev_dbg(dev, "Allocating %d bytes for channel %d", + (int)buf->alloc_len, channel); + buf->alloc = dma_alloc_coherent(dev, buf->alloc_len, &buf->alloc_phys, + GFP_KERNEL); + if (buf->alloc == NULL) { + dev_info(dev, "Failed to allocate buffer\n"); + return -ENOMEM; + } + + buf->buf_phys = (buf->alloc_phys + buf->len) & mask; + buf->buf = buf->alloc + (buf->buf_phys - buf->alloc_phys); + + iowrite32(0, priv->dma_io + offset + 4); + iowrite32(buf->buf_phys, priv->dma_io + offset); + dev_dbg(dev, "Buffer: %x and read from dev: %x", + (unsigned)buf->buf_phys, ioread32(priv->dma_io + offset)); + + return 0; +} + +static void ec_bhf_setup_tx_descs(struct ec_bhf_priv *priv) +{ + int i = 0; + + priv->tx_dcount = priv->tx_buf.len / sizeof(struct tx_desc); + priv->tx_descs = (struct tx_desc *) priv->tx_buf.buf; + priv->tx_dnext = 0; + + for (i = 0; i < priv->tx_dcount; i++) + priv->tx_descs[i].header.sent = cpu_to_le32(TX_HDR_SENT); +} + +static void ec_bhf_setup_rx_descs(struct ec_bhf_priv *priv) +{ + int i; + + priv->rx_dcount = priv->rx_buf.len / sizeof(struct rx_desc); + priv->rx_descs = (struct rx_desc *) priv->rx_buf.buf; + priv->rx_dnext = 0; + + for (i = 0; i < priv->rx_dcount; i++) { + struct rx_desc *desc = &priv->rx_descs[i]; + u32 next; + + if (i != priv->rx_dcount - 1) + next = (u8 *)(desc + 1) - priv->rx_buf.buf; + else + next = 0; + next |= RXHDR_NEXT_VALID; + desc->header.next = cpu_to_le32(next); + desc->header.recv = 0; + ec_bhf_add_rx_desc(priv, desc); + } +} + +static int ec_bhf_open(struct net_device *net_dev) +{ + struct ec_bhf_priv *priv = netdev_priv(net_dev); + struct device *dev = PRIV_TO_DEV(priv); + int err = 0; + + dev_info(dev, "Opening device\n"); + + ec_bhf_reset(priv); + + err = ec_bhf_alloc_dma_mem(priv, &priv->rx_buf, priv->rx_dma_chan, + FIFO_SIZE * sizeof(struct rx_desc)); + if (err) { + dev_err(dev, "Failed to allocate rx buffer\n"); + goto out; + } + ec_bhf_setup_rx_descs(priv); + + dev_info(dev, "RX buffer allocated, address: %x\n", + (unsigned)priv->rx_buf.buf_phys); + + err = ec_bhf_alloc_dma_mem(priv, &priv->tx_buf, priv->tx_dma_chan, + FIFO_SIZE * sizeof(struct tx_desc)); + if (err) { + dev_err(dev, "Failed to allocate tx buffer\n"); + goto error_rx_free; + } + dev_dbg(dev, "TX buffer allocated, addres: %x\n", + (unsigned)priv->tx_buf.buf_phys); + + iowrite8(0, priv->mii_io + MII_MAC_FILT_FLAG); + + ec_bhf_setup_tx_descs(priv); + + netif_start_queue(net_dev); + + hrtimer_init(&priv->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + priv->hrtimer.function = ec_bhf_timer_fun; + hrtimer_start(&priv->hrtimer, ktime_set(0, polling_frequency), + HRTIMER_MODE_REL); + + dev_info(PRIV_TO_DEV(priv), "Device open\n"); + + ec_bhf_print_status(priv); + + return 0; + +error_rx_free: + dma_free_coherent(dev, priv->rx_buf.alloc_len, priv->rx_buf.alloc, + priv->rx_buf.alloc_len); +out: + return err; +} + +static int ec_bhf_stop(struct net_device *net_dev) +{ + struct ec_bhf_priv *priv = netdev_priv(net_dev); + struct device *dev = PRIV_TO_DEV(priv); + + hrtimer_cancel(&priv->hrtimer); + + ec_bhf_reset(priv); + + netif_tx_disable(net_dev); + + dma_free_coherent(dev, priv->tx_buf.alloc_len, + priv->tx_buf.alloc, priv->tx_buf.alloc_phys); + dma_free_coherent(dev, priv->rx_buf.alloc_len, + priv->rx_buf.alloc, priv->rx_buf.alloc_phys); + + return 0; +} + +static struct rtnl_link_stats64 * +ec_bhf_get_stats(struct net_device *net_dev, + struct rtnl_link_stats64 *stats) +{ + struct ec_bhf_priv *priv = netdev_priv(net_dev); + + stats->rx_errors = ioread8(priv->mac_io + MAC_RX_ERR_CNT) + + ioread8(priv->mac_io + MAC_CRC_ERR_CNT) + + ioread8(priv->mac_io + MAC_FRAME_ERR_CNT); + stats->rx_packets = ioread32(priv->mac_io + MAC_RX_FRAME_CNT); + stats->tx_packets = ioread32(priv->mac_io + MAC_TX_FRAME_CNT); + stats->rx_dropped = ioread8(priv->mac_io + MAC_DROPPED_FRMS); + + stats->tx_bytes = priv->stat_tx_bytes; + stats->rx_bytes = priv->stat_rx_bytes; + + return stats; +} + +static const struct net_device_ops ec_bhf_netdev_ops = { + .ndo_start_xmit = ec_bhf_start_xmit, + .ndo_open = ec_bhf_open, + .ndo_stop = ec_bhf_stop, + .ndo_get_stats64 = ec_bhf_get_stats, + .ndo_change_mtu = eth_change_mtu, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = eth_mac_addr +}; + +static int ec_bhf_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + struct net_device *net_dev; + struct ec_bhf_priv *priv; + void * __iomem dma_io; + void * __iomem io; + int err = 0; + + err = pci_enable_device(dev); + if (err) + return err; + + pci_set_master(dev); + + err = pci_set_dma_mask(dev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&dev->dev, + "Required dma mask not supported, failed to initialize device\n"); + err = -EIO; + goto err_disable_dev; + } + + err = pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(32)); + if (err) { + dev_err(&dev->dev, + "Required dma mask not supported, failed to initialize device\n"); + goto err_disable_dev; + } + + err = pci_request_regions(dev, "ec_bhf"); + if (err) { + dev_err(&dev->dev, "Failed to request pci memory regions\n"); + goto err_disable_dev; + } + + io = pci_iomap(dev, 0, 0); + if (!io) { + dev_err(&dev->dev, "Failed to map pci card memory bar 0"); + err = -EIO; + goto err_release_regions; + } + + dma_io = pci_iomap(dev, 2, 0); + if (!dma_io) { + dev_err(&dev->dev, "Failed to map pci card memory bar 2"); + err = -EIO; + goto err_unmap; + } + + net_dev = alloc_etherdev(sizeof(struct ec_bhf_priv)); + if (net_dev == 0) { + err = -ENOMEM; + goto err_unmap_dma_io; + } + + pci_set_drvdata(dev, net_dev); + SET_NETDEV_DEV(net_dev, &dev->dev); + + net_dev->features = 0; + net_dev->flags |= IFF_NOARP; + + net_dev->netdev_ops = &ec_bhf_netdev_ops; + + priv = netdev_priv(net_dev); + priv->net_dev = net_dev; + priv->io = io; + priv->dma_io = dma_io; + priv->dev = dev; + + err = ec_bhf_setup_offsets(priv); + if (err < 0) + goto err_free_net_dev; + + memcpy_fromio(net_dev->dev_addr, priv->mii_io + MII_MAC_ADDR, 6); + + dev_dbg(&dev->dev, "CX5020 Ethercat master address: %pM\n", + net_dev->dev_addr); + + err = register_netdev(net_dev); + if (err < 0) + goto err_free_net_dev; + + return 0; + +err_free_net_dev: + free_netdev(net_dev); +err_unmap_dma_io: + pci_iounmap(dev, dma_io); +err_unmap: + pci_iounmap(dev, io); +err_release_regions: + pci_release_regions(dev); +err_disable_dev: + pci_clear_master(dev); + pci_disable_device(dev); + + return err; +} + +static void ec_bhf_remove(struct pci_dev *dev) +{ + struct net_device *net_dev = pci_get_drvdata(dev); + struct ec_bhf_priv *priv = netdev_priv(net_dev); + + unregister_netdev(net_dev); + free_netdev(net_dev); + + pci_iounmap(dev, priv->dma_io); + pci_iounmap(dev, priv->io); + pci_release_regions(dev); + pci_clear_master(dev); + pci_disable_device(dev); +} + +static struct pci_driver pci_driver = { + .name = "ec_bhf", + .id_table = ids, + .probe = ec_bhf_probe, + .remove = ec_bhf_remove, +}; + +static int __init ec_bhf_init(void) +{ + return pci_register_driver(&pci_driver); +} + +static void __exit ec_bhf_exit(void) +{ + pci_unregister_driver(&pci_driver); +} + +module_init(ec_bhf_init); +module_exit(ec_bhf_exit); + +module_param(polling_frequency, long, S_IRUGO); +MODULE_PARM_DESC(polling_frequency, "Polling timer frequency in ns"); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dariusz Marcinkiewicz "); From 4a817aa78f573c6964f16d9aea3d0d10a226ade4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 9 May 2014 09:56:53 +0200 Subject: [PATCH 035/116] mac80211: allow VHT with peers not capable of 40MHz There are two (related) issues with this. One case, reported by Michal, is related to hostap: it unsets the 20/40 capability bit for stations that associate when it's in 20 MHz mode. The other case, reported by Eyal, is that some APs like Netgear R6300v2 and probably others based on the BCM4360 chipset can be configured for doing VHT at 20Mhz. In this case the beacon has a VHT IE but the HT cap indicates transmitter only support 20Mhz. In both of these cases, we currently avoid VHT and use only HT this means we can't use the highest rates (MCS8), so fixing this leads to throughput improvements. Reported-by: Michal Kazior Reported-by: Eyal Shapira Signed-off-by: Johannes Berg --- net/mac80211/vht.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index e9e36a256165..9265adfdabfc 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -129,9 +129,12 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, if (!vht_cap_ie || !sband->vht_cap.vht_supported) return; - /* A VHT STA must support 40 MHz */ - if (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40)) - return; + /* + * A VHT STA must support 40 MHz, but if we verify that here + * then we break a few things - some APs (e.g. Netgear R6300v2 + * and others based on the BCM4360 chipset) will unset this + * capability bit when operating in 20 MHz. + */ vht_cap->vht_supported = true; From f9ac71bfcc5c937ff02765dc316cf5bc01d21d97 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Wed, 26 Feb 2014 14:46:35 +0200 Subject: [PATCH 036/116] mac80211: fix vif name tracing If sdata doesn't have a valid dev (e.g. in case of monitor vif), the vif_name field was initialized with (a length of) some short string, but later was set to a different, potentially larger one. This resulted in out-of-bounds write, which usually appeared as garbage in the trace log. Simply trace sdata->name, as it should always have the correct name for both cases. Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg --- net/mac80211/trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index a0b0aea76525..cec5b60487a4 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -21,10 +21,10 @@ #define VIF_ENTRY __field(enum nl80211_iftype, vif_type) __field(void *, sdata) \ __field(bool, p2p) \ - __string(vif_name, sdata->dev ? sdata->dev->name : "") + __string(vif_name, sdata->name) #define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \ __entry->p2p = sdata->vif.p2p; \ - __assign_str(vif_name, sdata->dev ? sdata->dev->name : sdata->name) + __assign_str(vif_name, sdata->name) #define VIF_PR_FMT " vif:%s(%d%s)" #define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" From 6b5eeb7f874b689403e52a646e485d0191ab9507 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 9 May 2014 14:45:00 +0200 Subject: [PATCH 037/116] net: cdc_mbim: handle unaccelerated VLAN tagged frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver maps 802.1q VLANs to MBIM sessions. The mapping is based on a bogus assumption that all tagged frames will use the acceleration API because we enable NETIF_F_HW_VLAN_CTAG_TX. This fails for e.g. frames tagged in userspace using packet sockets. Such frames will erroneously be considered as untagged and silently dropped based on not being IP. Fix by falling back to looking into the ethernet header for a tag if no accelerated tag was found. Fixes: a82c7ce5bc5b ("net: cdc_ncm: map MBIM IPS SessionID to VLAN ID") Cc: Greg Suarez Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/cdc_mbim.c | 39 +++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 13f7705fd679..2e025ddcef21 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -120,6 +120,16 @@ static void cdc_mbim_unbind(struct usbnet *dev, struct usb_interface *intf) cdc_ncm_unbind(dev, intf); } +/* verify that the ethernet protocol is IPv4 or IPv6 */ +static bool is_ip_proto(__be16 proto) +{ + switch (proto) { + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + return true; + } + return false; +} static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { @@ -128,6 +138,7 @@ static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb struct cdc_ncm_ctx *ctx = info->ctx; __le32 sign = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN); u16 tci = 0; + bool is_ip; u8 *c; if (!ctx) @@ -137,25 +148,32 @@ static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb if (skb->len <= ETH_HLEN) goto error; + /* Some applications using e.g. packet sockets will + * bypass the VLAN acceleration and create tagged + * ethernet frames directly. We primarily look for + * the accelerated out-of-band tag, but fall back if + * required + */ + skb_reset_mac_header(skb); + if (vlan_get_tag(skb, &tci) < 0 && skb->len > VLAN_ETH_HLEN && + __vlan_get_tag(skb, &tci) == 0) { + is_ip = is_ip_proto(vlan_eth_hdr(skb)->h_vlan_encapsulated_proto); + skb_pull(skb, VLAN_ETH_HLEN); + } else { + is_ip = is_ip_proto(eth_hdr(skb)->h_proto); + skb_pull(skb, ETH_HLEN); + } + /* mapping VLANs to MBIM sessions: * no tag => IPS session <0> * 1 - 255 => IPS session * 256 - 511 => DSS session * 512 - 4095 => unsupported, drop */ - vlan_get_tag(skb, &tci); - switch (tci & 0x0f00) { case 0x0000: /* VLAN ID 0 - 255 */ - /* verify that datagram is IPv4 or IPv6 */ - skb_reset_mac_header(skb); - switch (eth_hdr(skb)->h_proto) { - case htons(ETH_P_IP): - case htons(ETH_P_IPV6): - break; - default: + if (!is_ip) goto error; - } c = (u8 *)&sign; c[3] = tci; break; @@ -169,7 +187,6 @@ static struct sk_buff *cdc_mbim_tx_fixup(struct usbnet *dev, struct sk_buff *skb "unsupported tci=0x%04x\n", tci); goto error; } - skb_pull(skb, ETH_HLEN); } spin_lock_bh(&ctx->mtx); From 0953f78971040fff09064bb564d9ac0cd1fb4e69 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 8 May 2014 10:09:20 +0200 Subject: [PATCH 038/116] net: mdio-gpio: fix device-tree binding documentation Fix aliases syntax in device-tree binding example to avoid copy-paste errors (the alias would be dropped silently). Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/mdio-gpio.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/mdio-gpio.txt b/Documentation/devicetree/bindings/net/mdio-gpio.txt index c79bab025369..8dbcf8295c6c 100644 --- a/Documentation/devicetree/bindings/net/mdio-gpio.txt +++ b/Documentation/devicetree/bindings/net/mdio-gpio.txt @@ -14,7 +14,7 @@ node. Example: aliases { - mdio-gpio0 = <&mdio0>; + mdio-gpio0 = &mdio0; }; mdio0: mdio { From 7f52da56f76f61112a9c1db41975376764828e71 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 8 May 2014 10:09:21 +0200 Subject: [PATCH 039/116] net: mdio-gpio: warn about missing bus alias id Use a sane default bus id (rather than -ENODEV) and print a warning when the bus alias id is missing. Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/phy/mdio-gpio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 9c4defdec67b..5f1a2250018f 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -215,6 +215,10 @@ static int mdio_gpio_probe(struct platform_device *pdev) if (pdev->dev.of_node) { pdata = mdio_gpio_of_get_data(pdev); bus_id = of_alias_get_id(pdev->dev.of_node, "mdio-gpio"); + if (bus_id < 0) { + dev_warn(&pdev->dev, "failed to get alias id\n"); + bus_id = 0; + } } else { pdata = dev_get_platdata(&pdev->dev); bus_id = pdev->id; From 59993f48b38fd46863b23bb1bb1dc3291e7278fb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 8 May 2014 10:09:22 +0200 Subject: [PATCH 040/116] Revert "net: eth: cpsw: Correctly attach to GPIO bitbang MDIO driver" This reverts commit f8d56d8f892be43a2404356073e16401eb5a42e6 ("net: eth: cpsw: Correctly attach to GPIO bitbang MDIO driver"). Fix potential null-pointer dereference at probe if the mdio-gpio device has not been successfully probed yet. The offending commit is plain wrong for a number of reasons. First of all it accesses internal driver data of an unrelated device. Neither does it check that the data is non-null (which it is in case the device has not been probed yet). Furthermore, the decision on whether to treat any driver data according to the mdio-gpio driver's internals is made based on the node name. But the name is not compared against "mdio" which is the normal name for the node, but rather against "gpio" which the node does not have to be named (and shouldn't be according to the binding documentation). [ If this hack is to be kept out-of-tree it should at least be matching against the compatible property. ] Cc: Stefan Roese Cc: stable # v3.14 Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 36aa109416c4..18d83d8d7f74 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1871,18 +1871,8 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, mdio_node = of_find_node_by_phandle(be32_to_cpup(parp)); phyid = be32_to_cpup(parp+1); mdio = of_find_device_by_node(mdio_node); - - if (strncmp(mdio->name, "gpio", 4) == 0) { - /* GPIO bitbang MDIO driver attached */ - struct mii_bus *bus = dev_get_drvdata(&mdio->dev); - - snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), - PHY_ID_FMT, bus->id, phyid); - } else { - /* davinci MDIO driver attached */ - snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), - PHY_ID_FMT, mdio->name, phyid); - } + snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), + PHY_ID_FMT, mdio->name, phyid); mac_addr = of_get_mac_address(slave_node); if (mac_addr) From 6954cc1f238199e971ec905c5cc87120806ac981 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 8 May 2014 10:09:23 +0200 Subject: [PATCH 041/116] net: cpsw: fix null dereference at probe Fix null-pointer dereference at probe when the mdio platform device is missing (e.g. when it has been disabled in DT). Cc: stable # v3.8 Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 18d83d8d7f74..db9360cd861c 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1871,6 +1871,10 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, mdio_node = of_find_node_by_phandle(be32_to_cpup(parp)); phyid = be32_to_cpup(parp+1); mdio = of_find_device_by_node(mdio_node); + if (!mdio) { + pr_err("Missing mdio platform device\n"); + return -EINVAL; + } snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), PHY_ID_FMT, mdio->name, phyid); From 60e71ab56b5fbd839aaef4f4af7778d86e0206f0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 8 May 2014 10:09:24 +0200 Subject: [PATCH 042/116] net: cpsw: add missing of_node_put Add missing of_node_put to avoid kref leak. Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index db9360cd861c..c331b7ebc812 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1871,6 +1871,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, mdio_node = of_find_node_by_phandle(be32_to_cpup(parp)); phyid = be32_to_cpup(parp+1); mdio = of_find_device_by_node(mdio_node); + of_node_put(mdio_node); if (!mdio) { pr_err("Missing mdio platform device\n"); return -EINVAL; From de682941eef3e5f6d1b653a6c214bc8a288f17c1 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 8 May 2014 12:34:31 +0300 Subject: [PATCH 043/116] bnx2x: Fix UNDI driver unload Commit 91ebb928b "bnx2x: Add support for Multi-Function UNDI" contains a bug which prevent the emptying of the device's Rx buffers before reset. As a result, on new boards it is likely HW will reach some fatal assertion once its interfaces load after UNDI was previously loaded. Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index b260913db236..3b0d43154e67 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -10051,8 +10051,8 @@ static void bnx2x_prev_unload_close_mac(struct bnx2x *bp, #define BCM_5710_UNDI_FW_MF_MAJOR (0x07) #define BCM_5710_UNDI_FW_MF_MINOR (0x08) #define BCM_5710_UNDI_FW_MF_VERS (0x05) -#define BNX2X_PREV_UNDI_MF_PORT(p) (0x1a150c + ((p) << 4)) -#define BNX2X_PREV_UNDI_MF_FUNC(f) (0x1a184c + ((f) << 4)) +#define BNX2X_PREV_UNDI_MF_PORT(p) (BAR_TSTRORM_INTMEM + 0x150c + ((p) << 4)) +#define BNX2X_PREV_UNDI_MF_FUNC(f) (BAR_TSTRORM_INTMEM + 0x184c + ((f) << 4)) static bool bnx2x_prev_unload_undi_fw_supports_mf(struct bnx2x *bp) { u8 major, minor, version; @@ -10352,6 +10352,7 @@ static int bnx2x_prev_unload_common(struct bnx2x *bp) /* Reset should be performed after BRB is emptied */ if (reset_reg & MISC_REGISTERS_RESET_REG_1_RST_BRB1) { u32 timer_count = 1000; + bool need_write = true; /* Close the MAC Rx to prevent BRB from filling up */ bnx2x_prev_unload_close_mac(bp, &mac_vals); @@ -10398,7 +10399,10 @@ static int bnx2x_prev_unload_common(struct bnx2x *bp) * cleaning methods - might be redundant but harmless. */ if (bnx2x_prev_unload_undi_fw_supports_mf(bp)) { - bnx2x_prev_unload_undi_mf(bp); + if (need_write) { + bnx2x_prev_unload_undi_mf(bp); + need_write = false; + } } else if (prev_undi) { /* If UNDI resides in memory, * manually increment it From a9de0500083c18589ba2ea4543135c1bea8419ec Mon Sep 17 00:00:00 2001 From: Emil Goode Date: Fri, 9 May 2014 01:07:17 +0200 Subject: [PATCH 044/116] net: cassini: use nested lock annotation In the cas_lock_tx function we acquire multiple locks in a loop and need to use nested lock annotation to prevent lockdep warnings. Reported-by: Meelis Roos Signed-off-by: Emil Goode Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/cassini.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index df8d383acf48..b9ac20f42651 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -246,7 +246,7 @@ static inline void cas_lock_tx(struct cas *cp) int i; for (i = 0; i < N_TX_RINGS; i++) - spin_lock(&cp->tx_lock[i]); + spin_lock_nested(&cp->tx_lock[i], i); } static inline void cas_lock_all(struct cas *cp) From c1e517fbbcdb13f50662af4edc11c3251fe44f86 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 26 Mar 2014 15:46:21 +0100 Subject: [PATCH 045/116] batman-adv: fix neigh_ifinfo imbalance The neigh_ifinfo object must be freed if it has been used in batadv_iv_ogm_process_per_outif(). This is a regression introduced by 89652331c00f43574515059ecbf262d26d885717 ("batman-adv: split tq information in neigh_node struct") Reported-by: Antonio Quartulli Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/bat_iv_ogm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index b3bd4ec3fd94..f04224c32005 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -1545,6 +1545,8 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, if ((orig_neigh_node) && (!is_single_hop_neigh)) batadv_orig_node_free_ref(orig_neigh_node); out: + if (router_ifinfo) + batadv_neigh_ifinfo_free_ref(router_ifinfo); if (router) batadv_neigh_node_free_ref(router); if (router_router) From 000c8dff97311357535d64539e58990526e4de70 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 26 Mar 2014 15:46:22 +0100 Subject: [PATCH 046/116] batman-adv: fix neigh reference imbalance When an interface is removed from batman-adv, the orig_ifinfo of a orig_node may be removed without releasing the router first. This will prevent the reference for the neighbor pointed at by the orig_ifinfo->router to be released, and this leak may result in reference leaks for the interface used by this neighbor. Fix that. This is a regression introduced by 7351a4822d42827ba0110677c0cbad88a3d52585 ("batman-adv: split out router from orig_node"). Reported-by: Antonio Quartulli Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/originator.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index ffd9dfbd9b0e..a43da6918512 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -501,12 +501,17 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node, static void batadv_orig_ifinfo_free_rcu(struct rcu_head *rcu) { struct batadv_orig_ifinfo *orig_ifinfo; + struct batadv_neigh_node *router; orig_ifinfo = container_of(rcu, struct batadv_orig_ifinfo, rcu); if (orig_ifinfo->if_outgoing != BATADV_IF_DEFAULT) batadv_hardif_free_ref_now(orig_ifinfo->if_outgoing); + /* this is the last reference to this object */ + router = rcu_dereference_protected(orig_ifinfo->router, true); + if (router) + batadv_neigh_node_free_ref_now(router); kfree(orig_ifinfo); } From 7b955a9fc164487d7c51acb9787f6d1b01b35ef6 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 26 Mar 2014 15:46:23 +0100 Subject: [PATCH 047/116] batman-adv: always run purge_orig_neighbors The current code will not execute batadv_purge_orig_neighbors() when an orig_ifinfo has already been purged. However we need to run it in any case. Fix that. This is a regression introduced by 7351a4822d42827ba0110677c0cbad88a3d52585 ("batman-adv: split out router from orig_node") Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/originator.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index a43da6918512..8104c3cf7741 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -862,7 +862,7 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, { struct batadv_neigh_node *best_neigh_node; struct batadv_hard_iface *hard_iface; - bool changed; + bool changed_ifinfo, changed_neigh; if (batadv_has_timed_out(orig_node->last_seen, 2 * BATADV_PURGE_TIMEOUT)) { @@ -872,10 +872,10 @@ static bool batadv_purge_orig_node(struct batadv_priv *bat_priv, jiffies_to_msecs(orig_node->last_seen)); return true; } - changed = batadv_purge_orig_ifinfo(bat_priv, orig_node); - changed = changed || batadv_purge_orig_neighbors(bat_priv, orig_node); + changed_ifinfo = batadv_purge_orig_ifinfo(bat_priv, orig_node); + changed_neigh = batadv_purge_orig_neighbors(bat_priv, orig_node); - if (!changed) + if (!changed_ifinfo && !changed_neigh) return false; /* first for NULL ... */ From d088be8042841f024156ee68fecfef7503d660cb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 10 May 2014 13:39:21 +0200 Subject: [PATCH 048/116] netfilter: nf_tables: reset rule number counter after jump and goto Otherwise we start incrementing the rule number counter from the previous chain iteration. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 804105391b9a..4368c5850548 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -123,7 +123,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) struct nft_data data[NFT_REG_MAX + 1]; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; - int rulenum = 0; + int rulenum; /* * Cache cursor to avoid problems in case that the cursor is updated * while traversing the ruleset. @@ -131,6 +131,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor); do_chain: + rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); next_rule: data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; From 709de13f0c532fe9c468c094aff069a725ed57fe Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 26 Mar 2014 15:46:24 +0100 Subject: [PATCH 049/116] batman-adv: fix removing neigh_ifinfo When an interface is removed separately, all neighbors need to be checked if they have a neigh_ifinfo structure for that particular interface. If that is the case, remove that ifinfo so any references to a hard interface can be freed. This is a regression introduced by 89652331c00f43574515059ecbf262d26d885717 ("batman-adv: split tq information in neigh_node struct") Reported-by: Antonio Quartulli Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/originator.c | 46 +++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 8104c3cf7741..1785da37b82c 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -706,6 +706,47 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, return NULL; } +/** + * batadv_purge_neigh_ifinfo - purge obsolete ifinfo entries from neighbor + * @bat_priv: the bat priv with all the soft interface information + * @neigh: orig node which is to be checked + */ +static void +batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv, + struct batadv_neigh_node *neigh) +{ + struct batadv_neigh_ifinfo *neigh_ifinfo; + struct batadv_hard_iface *if_outgoing; + struct hlist_node *node_tmp; + + spin_lock_bh(&neigh->ifinfo_lock); + + /* for all ifinfo objects for this neighinator */ + hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, + &neigh->ifinfo_list, list) { + if_outgoing = neigh_ifinfo->if_outgoing; + + /* always keep the default interface */ + if (if_outgoing == BATADV_IF_DEFAULT) + continue; + + /* don't purge if the interface is not (going) down */ + if ((if_outgoing->if_status != BATADV_IF_INACTIVE) && + (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) && + (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)) + continue; + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "neighbor/ifinfo purge: neighbor %pM, iface: %s\n", + neigh->addr, if_outgoing->net_dev->name); + + hlist_del_rcu(&neigh_ifinfo->list); + batadv_neigh_ifinfo_free_ref(neigh_ifinfo); + } + + spin_unlock_bh(&neigh->ifinfo_lock); +} + /** * batadv_purge_orig_ifinfo - purge obsolete ifinfo entries from originator * @bat_priv: the bat priv with all the soft interface information @@ -805,6 +846,11 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, hlist_del_rcu(&neigh_node->list); batadv_neigh_node_free_ref(neigh_node); + } else { + /* only necessary if not the whole neighbor is to be + * deleted, but some interface has been removed. + */ + batadv_purge_neigh_ifinfo(bat_priv, neigh_node); } } From 1c4abec0baf25ffb92a28cc99d4231feeaa4d3f3 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 8 May 2014 09:48:10 +0300 Subject: [PATCH 050/116] iwlwifi: mvm: fix setting channel in monitor mode There was a deadlock in monitor mode when we were setting the channel if the channel was not 1. ====================================================== [ INFO: possible circular locking dependency detected ] 3.14.3 #4 Not tainted ------------------------------------------------------- iw/3323 is trying to acquire lock: (&local->chanctx_mtx){+.+.+.}, at: [] ieee80211_vif_release_channel+0x42/0xb0 [mac80211] but task is already holding lock: (&local->iflist_mtx){+.+...}, at: [] ieee80211_set_monitor_channel+0x5a/0x1b0 [mac80211] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&local->iflist_mtx){+.+...}: [] __lock_acquire+0xb3b/0x13b0 [] lock_acquire+0xb0/0x1f0 [] mutex_lock_nested+0x78/0x4f0 [] ieee80211_iterate_active_interfaces+0x2f/0x60 [mac80211] [] iwl_mvm_recalc_multicast+0x49/0xa0 [iwlmvm] [] iwl_mvm_configure_filter+0x4e/0x70 [iwlmvm] [] ieee80211_configure_filter+0x153/0x5f0 [mac80211] [] ieee80211_reconfig_filter+0x15/0x20 [mac80211] [snip] -> #1 (&mvm->mutex){+.+.+.}: [] __lock_acquire+0xb3b/0x13b0 [] lock_acquire+0xb0/0x1f0 [] mutex_lock_nested+0x78/0x4f0 [] iwl_mvm_add_chanctx+0x56/0xe0 [iwlmvm] [] ieee80211_new_chanctx+0x13e/0x410 [mac80211] [] ieee80211_vif_use_channel+0x1c3/0x5a0 [mac80211] [] ieee80211_add_virtual_monitor+0x1ab/0x6b0 [mac80211] [] ieee80211_do_open+0xe6a/0x15a0 [mac80211] [] ieee80211_open+0x59/0x60 [mac80211] [snip] -> #0 (&local->chanctx_mtx){+.+.+.}: [] check_prevs_add+0x977/0x980 [] __lock_acquire+0xb3b/0x13b0 [] lock_acquire+0xb0/0x1f0 [] mutex_lock_nested+0x78/0x4f0 [] ieee80211_vif_release_channel+0x42/0xb0 [mac80211] [] ieee80211_set_monitor_channel+0x113/0x1b0 [mac80211] [] cfg80211_set_monitor_channel+0x77/0x2b0 [cfg80211] [] __nl80211_set_channel+0x122/0x140 [cfg80211] [] nl80211_set_wiphy+0x284/0xaf0 [cfg80211] [snip] other info that might help us debug this: Chain exists of: &local->chanctx_mtx --> &mvm->mutex --> &local->iflist_mtx Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&local->iflist_mtx); lock(&mvm->mutex); lock(&local->iflist_mtx); lock(&local->chanctx_mtx); *** DEADLOCK *** This deadlock actually occurs: INFO: task iw:3323 blocked for more than 120 seconds. Not tainted 3.14.3 #4 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. iw D ffff8800c8afcd80 4192 3323 3322 0x00000000 ffff880078fdb7e0 0000000000000046 ffff8800c8afcd80 ffff880078fdbfd8 00000000001d5540 00000000001d5540 ffff8801141b0000 ffff8800c8afcd80 ffff880078ff9e38 ffff880078ff9e38 ffff880078ff9e40 0000000000000246 Call Trace: [] schedule_preempt_disabled+0x31/0x80 [] mutex_lock_nested+0x19d/0x4f0 [] ? ieee80211_iterate_active_interfaces+0x2f/0x60 [mac80211] [] ? ieee80211_iterate_active_interfaces+0x2f/0x60 [mac80211] [] ? iwl_mvm_power_mac_update_mode+0xc0/0xc0 [iwlmvm] [] ieee80211_iterate_active_interfaces+0x2f/0x60 [mac80211] [] _iwl_mvm_power_update_binding+0x27/0x80 [iwlmvm] [] iwl_mvm_unassign_vif_chanctx+0x81/0xc0 [iwlmvm] [] __ieee80211_vif_release_channel+0xdf/0x470 [mac80211] [] ieee80211_vif_release_channel+0x4a/0xb0 [mac80211] [] ieee80211_set_monitor_channel+0x113/0x1b0 [mac80211] [] cfg80211_set_monitor_channel+0x77/0x2b0 [cfg80211] [] __nl80211_set_channel+0x122/0x140 [cfg80211] [] nl80211_set_wiphy+0x284/0xaf0 [cfg80211] This fixes https://bugzilla.kernel.org/show_bug.cgi?id=75541 Cc: [3.13+] Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 593f723a74c4..4b0b8b6571ee 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -1007,7 +1007,7 @@ static void iwl_mvm_mc_iface_iterator(void *_data, u8 *mac, memcpy(cmd->bssid, vif->bss_conf.bssid, ETH_ALEN); len = roundup(sizeof(*cmd) + cmd->count * ETH_ALEN, 4); - ret = iwl_mvm_send_cmd_pdu(mvm, MCAST_FILTER_CMD, CMD_SYNC, len, cmd); + ret = iwl_mvm_send_cmd_pdu(mvm, MCAST_FILTER_CMD, CMD_ASYNC, len, cmd); if (ret) IWL_ERR(mvm, "mcast filter cmd error. ret=%d\n", ret); } @@ -1023,7 +1023,7 @@ static void iwl_mvm_recalc_multicast(struct iwl_mvm *mvm) if (WARN_ON_ONCE(!mvm->mcast_filter_cmd)) return; - ieee80211_iterate_active_interfaces( + ieee80211_iterate_active_interfaces_atomic( mvm->hw, IEEE80211_IFACE_ITER_NORMAL, iwl_mvm_mc_iface_iterator, &iter_data); } From 1a466ae96e9f749d02a73315a3e66375e61a61dd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 8 May 2014 14:54:42 -0700 Subject: [PATCH 051/116] ptp: fix kconfig dependency warnings Fix kconfig warnings: PTP_1588_CLOCK selects NET_PTP_CLASSIFY, which depends on NET, so PTP_1588_CLOCK should also depend on NET. PTP_1588_CLOCK_PCH selects PTP_1588_CLOCK so the former should depend on NET. warning: (IXP4XX_ETH && PTP_1588_CLOCK) selects NET_PTP_CLASSIFY which has unmet direct dependencies (NET) warning: (SFC && TILE_NET && BFIN_MAC_USE_HWSTAMP && TIGON3 && FEC && E1000E && IGB && IXGBE && I40E && MLX4_EN && SXGBE_ETH && STMMAC_ETH && TI_CPTS && PTP_1588_CLOCK_GIANFAR && PTP_1588_CLOCK_IXP46X && DP83640_PHY && PTP_1588_CLOCK_PCH) selects PTP_1588_CLOCK which has unmet direct dependencies (NET) [This warning is caused by the new 'depends on NET' in PTP_1588_CLOCK.] Signed-off-by: Randy Dunlap Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/ptp/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 6963bdf54175..6aea373547f6 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -6,6 +6,7 @@ menu "PTP clock support" config PTP_1588_CLOCK tristate "PTP clock support" + depends on NET select PPS select NET_PTP_CLASSIFY help @@ -74,7 +75,7 @@ config DP83640_PHY config PTP_1588_CLOCK_PCH tristate "Intel PCH EG20T as PTP clock" depends on X86 || COMPILE_TEST - depends on HAS_IOMEM + depends on HAS_IOMEM && NET select PTP_1588_CLOCK help This driver adds support for using the PCH EG20T as a PTP From fd71143645a9958e437c8cf394be2c44a6acb23a Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Fri, 9 May 2014 23:43:40 +0200 Subject: [PATCH 052/116] vti6: Don't unregister pernet ops twice on init errors If we fail to register one of the xfrm protocol handlers we will unregister the pernet ops twice on the error exit path. This will probably lead to a kernel panic as the double deregistration leads to a double kfree(). Fix this by removing one of the calls to do it only once. Fixes: fa9ad96d49 ("vti6: Update the ipv6 side to use its own...") Signed-off-by: Mathias Krause Signed-off-by: Steffen Klassert --- net/ipv6/ip6_vti.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index b7c0f827140b..a51100379f4a 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1097,7 +1097,6 @@ static int __init vti6_tunnel_init(void) err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP); if (err < 0) { - unregister_pernet_device(&vti6_net_ops); pr_err("%s: can't register vti6 protocol\n", __func__); goto out; @@ -1106,7 +1105,6 @@ static int __init vti6_tunnel_init(void) err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH); if (err < 0) { xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); - unregister_pernet_device(&vti6_net_ops); pr_err("%s: can't register vti6 protocol\n", __func__); goto out; @@ -1116,7 +1114,6 @@ static int __init vti6_tunnel_init(void) if (err < 0) { xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); - unregister_pernet_device(&vti6_net_ops); pr_err("%s: can't register vti6 protocol\n", __func__); goto out; From 6d004d6cc73920299adf4cfe25010b348fc94395 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 12 May 2014 09:09:26 +0200 Subject: [PATCH 053/116] vti: Use the tunnel mark for lookup in the error handlers. We need to use the mark we get from the tunnels o_key to lookup the right vti state in the error handlers. This patch ensures that. Fixes: df3893c1 ("vti: Update the ipv4 side to use it's own receive hook.") Fixes: fa9ad96d ("vti6: Update the ipv6 side to use its own receive hook.") Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 5 ++++- net/ipv6/ip6_vti.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index cd62596e9a87..d3f64d7f355e 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -239,6 +239,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) static int vti4_err(struct sk_buff *skb, u32 info) { __be32 spi; + __u32 mark; struct xfrm_state *x; struct ip_tunnel *tunnel; struct ip_esp_hdr *esph; @@ -254,6 +255,8 @@ static int vti4_err(struct sk_buff *skb, u32 info) if (!tunnel) return -1; + mark = be32_to_cpu(tunnel->parms.o_key); + switch (protocol) { case IPPROTO_ESP: esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); @@ -281,7 +284,7 @@ static int vti4_err(struct sk_buff *skb, u32 info) return 0; } - x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, + x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr, spi, protocol, AF_INET); if (!x) return 0; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index a51100379f4a..6cc9f9371cc5 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -511,6 +511,7 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { __be32 spi; + __u32 mark; struct xfrm_state *x; struct ip6_tnl *t; struct ip_esp_hdr *esph; @@ -524,6 +525,8 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!t) return -1; + mark = be32_to_cpu(t->parms.o_key); + switch (protocol) { case IPPROTO_ESP: esph = (struct ip_esp_hdr *)(skb->data + offset); @@ -545,7 +548,7 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != NDISC_REDIRECT) return 0; - x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, + x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr, spi, protocol, AF_INET6); if (!x) return 0; From 5467a512216753d54f757314c73dbc60f659f9e6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 10 May 2014 18:33:11 +0200 Subject: [PATCH 054/116] netfilter: nf_tables: fix goto action This patch fixes a crash when trying to access the counters and the default chain policy from the non-base chain that we have reached via the goto chain. Fix this by falling back on the original base chain after returning from the custom chain. While fixing this, kill the inline function to account chain statistics to improve source code readability. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 4368c5850548..7d83a49fd8e5 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -66,20 +66,6 @@ struct nft_jumpstack { int rulenum; }; -static inline void -nft_chain_stats(const struct nft_chain *this, const struct nft_pktinfo *pkt, - struct nft_jumpstack *jumpstack, unsigned int stackptr) -{ - struct nft_stats __percpu *stats; - const struct nft_chain *chain = stackptr ? jumpstack[0].chain : this; - - rcu_read_lock_bh(); - stats = rcu_dereference(nft_base_chain(chain)->stats); - __this_cpu_inc(stats->pkts); - __this_cpu_add(stats->bytes, pkt->skb->len); - rcu_read_unlock_bh(); -} - enum nft_trace { NFT_TRACE_RULE, NFT_TRACE_RETURN, @@ -117,12 +103,13 @@ static void nft_trace_packet(const struct nft_pktinfo *pkt, unsigned int nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) { - const struct nft_chain *chain = ops->priv; + const struct nft_chain *chain = ops->priv, *basechain = chain; const struct nft_rule *rule; const struct nft_expr *expr, *last; struct nft_data data[NFT_REG_MAX + 1]; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; + struct nft_stats __percpu *stats; int rulenum; /* * Cache cursor to avoid problems in case that the cursor is updated @@ -209,12 +196,17 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) rulenum = jumpstack[stackptr].rulenum; goto next_rule; } - nft_chain_stats(chain, pkt, jumpstack, stackptr); if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_POLICY); + nft_trace_packet(pkt, basechain, ++rulenum, NFT_TRACE_POLICY); - return nft_base_chain(chain)->policy; + rcu_read_lock_bh(); + stats = rcu_dereference(nft_base_chain(basechain)->stats); + __this_cpu_inc(stats->pkts); + __this_cpu_add(stats->bytes, pkt->skb->len); + rcu_read_unlock_bh(); + + return nft_base_chain(basechain)->policy; } EXPORT_SYMBOL_GPL(nft_do_chain); From 7b9d5ef932297413adcbd8be98fe612b9527a312 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 10 May 2014 18:42:57 +0200 Subject: [PATCH 055/116] netfilter: nf_tables: fix tracing of the goto action Add missing code to trace goto actions. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 7d83a49fd8e5..f55fb28264fa 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -171,8 +171,12 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) jumpstack[stackptr].rule = rule; jumpstack[stackptr].rulenum = rulenum; stackptr++; - /* fall through */ + chain = data[NFT_REG_VERDICT].chain; + goto do_chain; case NFT_GOTO: + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); + chain = data[NFT_REG_VERDICT].chain; goto do_chain; case NFT_RETURN: From f7e7e39b21c285ad73a62fac0736191b8d830704 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 10 May 2014 18:46:02 +0200 Subject: [PATCH 056/116] netfilter: nf_tables: fix bogus rulenum after goto action After returning from the chain that we just went to with no matchings, we get a bogus rule number in the trace. To fix this, we would need to iterate over the list of remaining rules in the chain to update the rule number counter. Patrick suggested to set this to the maximum value since the default base chain policy is the very last action when the processing the base chain is over. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index f55fb28264fa..be08a96b4f45 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -202,7 +202,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) } if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, basechain, ++rulenum, NFT_TRACE_POLICY); + nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY); rcu_read_lock_bh(); stats = rcu_dereference(nft_base_chain(basechain)->stats); From 7e9bc10db275b22a9db0f976b33b5aeed288da73 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 11 May 2014 17:14:49 +0200 Subject: [PATCH 057/116] netfilter: nf_tables: fix missing return trace at the end of non-base chain Display "return" for implicit rule at the end of a non-base chain, instead of when popping chain from the stack. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index be08a96b4f45..421c36ac5145 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -182,18 +182,16 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) case NFT_RETURN: if (unlikely(pkt->skb->nf_trace)) nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN); - - /* fall through */ + break; case NFT_CONTINUE: + if (unlikely(pkt->skb->nf_trace && !(chain->flags & NFT_BASE_CHAIN))) + nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN); break; default: WARN_ON(1); } if (stackptr > 0) { - if (unlikely(pkt->skb->nf_trace)) - nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN); - stackptr--; chain = jumpstack[stackptr].chain; rule = jumpstack[stackptr].rule; From 03e5da151b8e390f2e28c4edf8fbbb6ca6d7a7ed Mon Sep 17 00:00:00 2001 From: Daniel Kim Date: Fri, 9 May 2014 12:37:05 +0200 Subject: [PATCH 058/116] brcmfmac: Fix iovar 'bw_cap' set command failure Fix iovar 'bw_cap' set command failure introduced by commit ff3b0fba6f25555ef59c55d138a467d0f81d82d7 Author: Arend van Spriel Date: Sat Mar 15 12:00:57 2014 +0100 brcmfmac: fallback to mimo_bw_cap for older firmwares This resulted in disabling 20MHz operation in the firmware. Reviewed-by: Arend Van Spriel Signed-off-by: Daniel Kim Signed-off-by: Arend van Spriel Signed-off-by: John W. Linville --- drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c index afb3d15e38ff..be1985296bdc 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c @@ -4948,7 +4948,7 @@ static int brcmf_enable_bw40_2g(struct brcmf_if *ifp) if (!err) { /* only set 2G bandwidth using bw_cap command */ band_bwcap.band = cpu_to_le32(WLC_BAND_2G); - band_bwcap.bw_cap = cpu_to_le32(WLC_BW_40MHZ_BIT); + band_bwcap.bw_cap = cpu_to_le32(WLC_BW_CAP_40MHZ); err = brcmf_fil_iovar_data_set(ifp, "bw_cap", &band_bwcap, sizeof(band_bwcap)); } else { From bbeb0eadcf9fe74fb2b9b1a6fea82cd538b1e556 Mon Sep 17 00:00:00 2001 From: Peter Christensen Date: Thu, 8 May 2014 11:15:37 +0200 Subject: [PATCH 059/116] macvlan: Don't propagate IFF_ALLMULTI changes on down interfaces. Clearing the IFF_ALLMULTI flag on a down interface could cause an allmulti overflow on the underlying interface. Attempting the set IFF_ALLMULTI on the underlying interface would cause an error and the log message: "allmulti touches root, set allmulti failed." Signed-off-by: Peter Christensen Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index b0e2865a6810..c5fb9cf95c12 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -458,8 +458,10 @@ static void macvlan_change_rx_flags(struct net_device *dev, int change) struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; - if (change & IFF_ALLMULTI) - dev_set_allmulti(lowerdev, dev->flags & IFF_ALLMULTI ? 1 : -1); + if (dev->flags & IFF_UP) { + if (change & IFF_ALLMULTI) + dev_set_allmulti(lowerdev, dev->flags & IFF_ALLMULTI ? 1 : -1); + } } static void macvlan_set_mac_lists(struct net_device *dev) From 1c3639005f48492e5f2d965779efd814e80f8b15 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 9 May 2014 11:11:39 +0200 Subject: [PATCH 060/116] sfc: fix calling of free_irq with already free vector If the sfc driver is in legacy interrupt mode (either explicitly by using interrupt_mode module param or by falling back to it) it will hit a warning at kernel/irq/manage.c because it will try to free an irq which wasn't allocated by it in the first place because the MSI(X) irqs are zero and it'll try to free them unconditionally. So fix it by checking if we're in legacy mode and freeing the appropriate irqs. CC: Zenghui Shi CC: Ben Hutchings CC: CC: Shradha Shah CC: David S. Miller Fixes: 1899c111a535 ("sfc: Fix IRQ cleanup in case of a probe failure") Reported-by: Zenghui Shi Signed-off-by: Nikolay Aleksandrov Acked-by: Shradha Shah Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/nic.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index 32d969e857f7..89b83e59e1dc 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c @@ -156,13 +156,15 @@ void efx_nic_fini_interrupt(struct efx_nic *efx) efx->net_dev->rx_cpu_rmap = NULL; #endif - /* Disable MSI/MSI-X interrupts */ - efx_for_each_channel(channel, efx) - free_irq(channel->irq, &efx->msi_context[channel->channel]); - - /* Disable legacy interrupt */ - if (efx->legacy_irq) + if (EFX_INT_MODE_USE_MSI(efx)) { + /* Disable MSI/MSI-X interrupts */ + efx_for_each_channel(channel, efx) + free_irq(channel->irq, + &efx->msi_context[channel->channel]); + } else { + /* Disable legacy interrupt */ free_irq(efx->legacy_irq, efx); + } } /* Register dump */ From c8965932a2e3b70197ec02c6741c29460279e2a8 Mon Sep 17 00:00:00 2001 From: Susant Sahani Date: Sat, 10 May 2014 00:11:32 +0530 Subject: [PATCH 061/116] ip6_tunnel: fix potential NULL pointer dereference The function ip6_tnl_validate assumes that the rtnl attribute IFLA_IPTUN_PROTO always be filled . If this attribute is not filled by the userspace application kernel get crashed with NULL pointer dereference. This patch fixes the potential kernel crash when IFLA_IPTUN_PROTO is missing . Signed-off-by: Susant Sahani Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index b05b609f69d1..f6a66bb4114d 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1557,7 +1557,7 @@ static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[]) { u8 proto; - if (!data) + if (!data || !data[IFLA_IPTUN_PROTO]) return 0; proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); From 64793110ad4d82e18d88a33307749c6562a6dd04 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Sun, 11 May 2014 19:59:43 +0300 Subject: [PATCH 062/116] iwlwifi: mvm: fix off-by-one in scan channels configuration tail should be equal to the last valid index, so decrease it by one. This error causes in "a gap" in some cases (as well as some possible out-of-bound write), finally resulting in ucode assertion. Signed-off-by: Eliad Peller Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/scan.c b/drivers/net/wireless/iwlwifi/mvm/scan.c index cba88a379fc8..c28de54c75d4 100644 --- a/drivers/net/wireless/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/iwlwifi/mvm/scan.c @@ -732,7 +732,7 @@ int iwl_mvm_config_sched_scan(struct iwl_mvm *mvm, int band_2ghz = mvm->nvm_data->bands[IEEE80211_BAND_2GHZ].n_channels; int band_5ghz = mvm->nvm_data->bands[IEEE80211_BAND_5GHZ].n_channels; int head = 0; - int tail = band_2ghz + band_5ghz; + int tail = band_2ghz + band_5ghz - 1; u32 ssid_bitmap; int cmd_len; int ret; From c52666aef9f2dff39276eb53f15d99e2e229870f Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 13 May 2014 12:54:09 +0300 Subject: [PATCH 063/116] mac80211: fix suspend vs. association race If the association is in progress while we suspend, the stack will be in a messed up state. Clean it before we suspend. This patch completes Johannes's patch: 1a1cb744de160ee70086a77afff605bbc275d291 Author: Johannes Berg mac80211: fix suspend vs. authentication race Cc: Fixes: 12e7f517029d ("mac80211: cleanup generic suspend/resume procedures") Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index dee50aefd6e8..27600a9808ba 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3598,18 +3598,24 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) sdata_lock(sdata); - if (ifmgd->auth_data) { + if (ifmgd->auth_data || ifmgd->assoc_data) { + const u8 *bssid = ifmgd->auth_data ? + ifmgd->auth_data->bss->bssid : + ifmgd->assoc_data->bss->bssid; + /* - * If we are trying to authenticate while suspending, cfg80211 - * won't know and won't actually abort those attempts, thus we - * need to do that ourselves. + * If we are trying to authenticate / associate while suspending, + * cfg80211 won't know and won't actually abort those attempts, + * thus we need to do that ourselves. */ - ieee80211_send_deauth_disassoc(sdata, - ifmgd->auth_data->bss->bssid, + ieee80211_send_deauth_disassoc(sdata, bssid, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DEAUTH_LEAVING, false, frame_buf); - ieee80211_destroy_auth_data(sdata, false); + if (ifmgd->assoc_data) + ieee80211_destroy_assoc_data(sdata, false); + if (ifmgd->auth_data) + ieee80211_destroy_auth_data(sdata, false); cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); } From b538b8ce76f69f7fa225bc0817bbb361b877ea23 Mon Sep 17 00:00:00 2001 From: David Spinadel Date: Tue, 13 May 2014 14:29:36 +0300 Subject: [PATCH 064/116] iwlwifi: mvm: prevent sched scan while not idle Prevent sched scan while not idle (including during association or in AP mode) instead of while associated only. This fixes my previous commit which was incomplete: commit bd5e4744a6ca64299b57a2682c720d00a475a734 Author: David Spinadel Date: Thu Apr 24 13:15:29 2014 +0300 iwlwifi: mvm: do no sched scan while associated Currently the FW doesn't support sched scan while associated, Prevent it. Signed-off-by: David Spinadel Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/mac80211.c | 2 +- drivers/net/wireless/iwlwifi/mvm/mvm.h | 2 +- drivers/net/wireless/iwlwifi/mvm/utils.c | 16 ++++++++-------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index 4b0b8b6571ee..b41dc84e9431 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -1807,7 +1807,7 @@ static int iwl_mvm_mac_sched_scan_start(struct ieee80211_hw *hw, mutex_lock(&mvm->mutex); - if (iwl_mvm_is_associated(mvm)) { + if (!iwl_mvm_is_idle(mvm)) { ret = -EBUSY; goto out; } diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h index 84c75a1b267e..f1ec0986c3c9 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h @@ -1004,7 +1004,7 @@ static inline bool iwl_mvm_vif_low_latency(struct iwl_mvm_vif *mvmvif) } /* Assoc status */ -bool iwl_mvm_is_associated(struct iwl_mvm *mvm); +bool iwl_mvm_is_idle(struct iwl_mvm *mvm); /* Thermal management and CT-kill */ void iwl_mvm_tt_tx_backoff(struct iwl_mvm *mvm, u32 backoff); diff --git a/drivers/net/wireless/iwlwifi/mvm/utils.c b/drivers/net/wireless/iwlwifi/mvm/utils.c index 6fdbef9696d8..2180902266ae 100644 --- a/drivers/net/wireless/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/iwlwifi/mvm/utils.c @@ -645,21 +645,21 @@ bool iwl_mvm_low_latency(struct iwl_mvm *mvm) return result; } -static void iwl_mvm_assoc_iter(void *_data, u8 *mac, struct ieee80211_vif *vif) +static void iwl_mvm_idle_iter(void *_data, u8 *mac, struct ieee80211_vif *vif) { - bool *assoc = _data; + bool *idle = _data; - if (vif->bss_conf.assoc) - *assoc = true; + if (!vif->bss_conf.idle) + *idle = false; } -bool iwl_mvm_is_associated(struct iwl_mvm *mvm) +bool iwl_mvm_is_idle(struct iwl_mvm *mvm) { - bool assoc = false; + bool idle = true; ieee80211_iterate_active_interfaces_atomic( mvm->hw, IEEE80211_IFACE_ITER_NORMAL, - iwl_mvm_assoc_iter, &assoc); + iwl_mvm_idle_iter, &idle); - return assoc; + return idle; } From f5651986fe438c1ba05fdafa383d38bd86713d13 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 May 2014 15:45:55 +0200 Subject: [PATCH 065/116] nl80211: fix NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL API My commit removing that also removed it from the header file which can break compilation of userspace that needed it, add it back for API/ABI compatibility purposes (but no code to implement anything for it.) Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1ba9d626aa83..194c1eab04d8 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3856,6 +3856,8 @@ enum nl80211_ap_sme_features { * @NL80211_FEATURE_CELL_BASE_REG_HINTS: This driver has been tested * to work properly to suppport receiving regulatory hints from * cellular base stations. + * @NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL: (no longer available, only + * here to reserve the value for API/ABI compatibility) * @NL80211_FEATURE_SAE: This driver supports simultaneous authentication of * equals (SAE) with user space SME (NL80211_CMD_AUTHENTICATE) in station * mode @@ -3897,7 +3899,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_HT_IBSS = 1 << 1, NL80211_FEATURE_INACTIVITY_TIMER = 1 << 2, NL80211_FEATURE_CELL_BASE_REG_HINTS = 1 << 3, - /* bit 4 is reserved - don't use */ + NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL = 1 << 4, NL80211_FEATURE_SAE = 1 << 5, NL80211_FEATURE_LOW_PRIORITY_SCAN = 1 << 6, NL80211_FEATURE_SCAN_FLUSH = 1 << 7, From 2176d5d41891753774f648b67470398a5acab584 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Fri, 9 May 2014 13:16:48 +0800 Subject: [PATCH 066/116] neigh: set nud_state to NUD_INCOMPLETE when probing router reachability Since commit 7e98056964("ipv6: router reachability probing"), a router falls into NUD_FAILED will be probed. Now if function rt6_select() selects a router which neighbour state is NUD_FAILED, and at the same time function rt6_probe() changes the neighbour state to NUD_PROBE, then function dst_neigh_output() can directly send packets, but actually the neighbour still is unreachable. If we set nud_state to NUD_INCOMPLETE instead NUD_PROBE, packets will not be sent out until the neihbour is reachable. In addition, because the route should be probes with a single NS, so we must set neigh->probes to neigh_max_probes(), then the neigh timer timeout and function neigh_timer_handler() will not send other NS Messages. Signed-off-by: Duan Jiong Signed-off-by: David S. Miller --- net/core/neighbour.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8f8a96ef9f3f..32d872eec7f5 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1248,8 +1248,8 @@ void __neigh_set_probe_once(struct neighbour *neigh) neigh->updated = jiffies; if (!(neigh->nud_state & NUD_FAILED)) return; - neigh->nud_state = NUD_PROBE; - atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); + neigh->nud_state = NUD_INCOMPLETE; + atomic_set(&neigh->probes, neigh_max_probes(neigh)); neigh_add_timer(neigh, jiffies + NEIGH_VAR(neigh->parms, RETRANS_TIME)); } From 773cd38f40b8834be991dbfed36683acc1dd41ee Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 13 May 2014 15:05:55 -0700 Subject: [PATCH 067/116] net: filter: x86: fix JIT address randomization bpf_alloc_binary() adds 128 bytes of room to JITed program image and rounds it up to the nearest page size. If image size is close to page size (like 4000), it is rounded to two pages: round_up(4000 + 4 + 128) == 8192 then 'hole' is computed as 8192 - (4000 + 4) = 4188 If prandom_u32() % hole selects a number >= PAGE_SIZE - sizeof(*header) then kernel will crash during bpf_jit_free(): kernel BUG at arch/x86/mm/pageattr.c:887! Call Trace: [] change_page_attr_set_clr+0x135/0x460 [] ? _raw_spin_unlock_irq+0x30/0x50 [] set_memory_rw+0x2f/0x40 [] bpf_jit_free_deferred+0x2d/0x60 [] process_one_work+0x1d8/0x6a0 [] ? process_one_work+0x178/0x6a0 [] worker_thread+0x11c/0x370 since bpf_jit_free() does: unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; struct bpf_binary_header *header = (void *)addr; to compute start address of 'bpf_binary_header' and header->pages will pass junk to: set_memory_rw(addr, header->pages); Fix it by making sure that &header->image[prandom_u32() % hole] and &header are in the same page Fixes: 314beb9bcabfd ("x86: bpf_jit_comp: secure bpf jit against spraying attacks") Signed-off-by: Alexei Starovoitov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index dc017735bb91..6d5663a599a7 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -171,7 +171,7 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen, memset(header, 0xcc, sz); /* fill whole space with int3 instructions */ header->pages = sz / PAGE_SIZE; - hole = sz - (proglen + sizeof(*header)); + hole = min(sz - (proglen + sizeof(*header)), PAGE_SIZE - sizeof(*header)); /* insert a random number of int3 instructions before BPF code */ *image_ptr = &header->image[prandom_u32() % hole]; From 3d4405226d27b3a215e4d03cfa51f536244e5de7 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 11 May 2014 22:59:30 +0200 Subject: [PATCH 068/116] net: avoid dependency of net_get_random_once on nop patching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net_get_random_once depends on the static keys infrastructure to patch up the branch to the slow path during boot. This was realized by abusing the static keys api and defining a new initializer to not enable the call site while still indicating that the branch point should get patched up. This was needed to have the fast path considered likely by gcc. The static key initialization during boot up normally walks through all the registered keys and either patches in ideal nops or enables the jump site but omitted that step on x86 if ideal nops where already placed at static_key branch points. Thus net_get_random_once branches not always became active. This patch switches net_get_random_once to the ordinary static_key api and thus places the kernel fast path in the - by gcc considered - unlikely path. Microbenchmarks on Intel and AMD x86-64 showed that the unlikely path actually beats the likely path in terms of cycle cost and that different nop patterns did not make much difference, thus this switch should not be noticeable. Fixes: a48e42920ff38b ("net: introduce new macro net_get_random_once") Reported-by: Tuomas Räsänen Cc: Linus Torvalds Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/net.h | 15 ++++----------- net/core/utils.c | 8 ++++---- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/include/linux/net.h b/include/linux/net.h index 94734a6259a4..17d83393afcc 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -248,24 +248,17 @@ do { \ bool __net_get_random_once(void *buf, int nbytes, bool *done, struct static_key *done_key); -#ifdef HAVE_JUMP_LABEL -#define ___NET_RANDOM_STATIC_KEY_INIT ((struct static_key) \ - { .enabled = ATOMIC_INIT(0), .entries = (void *)1 }) -#else /* !HAVE_JUMP_LABEL */ -#define ___NET_RANDOM_STATIC_KEY_INIT STATIC_KEY_INIT_FALSE -#endif /* HAVE_JUMP_LABEL */ - #define net_get_random_once(buf, nbytes) \ ({ \ bool ___ret = false; \ static bool ___done = false; \ - static struct static_key ___done_key = \ - ___NET_RANDOM_STATIC_KEY_INIT; \ - if (!static_key_true(&___done_key)) \ + static struct static_key ___once_key = \ + STATIC_KEY_INIT_TRUE; \ + if (static_key_true(&___once_key)) \ ___ret = __net_get_random_once(buf, \ nbytes, \ &___done, \ - &___done_key); \ + &___once_key); \ ___ret; \ }) diff --git a/net/core/utils.c b/net/core/utils.c index 2f737bf90b3f..eed34338736c 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -348,8 +348,8 @@ static void __net_random_once_deferred(struct work_struct *w) { struct __net_random_once_work *work = container_of(w, struct __net_random_once_work, work); - if (!static_key_enabled(work->key)) - static_key_slow_inc(work->key); + BUG_ON(!static_key_enabled(work->key)); + static_key_slow_dec(work->key); kfree(work); } @@ -367,7 +367,7 @@ static void __net_random_once_disable_jump(struct static_key *key) } bool __net_get_random_once(void *buf, int nbytes, bool *done, - struct static_key *done_key) + struct static_key *once_key) { static DEFINE_SPINLOCK(lock); unsigned long flags; @@ -382,7 +382,7 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done, *done = true; spin_unlock_irqrestore(&lock, flags); - __net_random_once_disable_jump(done_key); + __net_random_once_disable_jump(once_key); return true; } From 3a1cebe7e05027a1c96f2fc1a8eddf5f19b78f42 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 11 May 2014 23:01:13 +0200 Subject: [PATCH 069/116] ipv6: fix calculation of option len in ip6_append_data tot_len does specify the size of struct ipv6_txoptions. We need opt_flen + opt_nflen to calculate the overall length of additional ipv6 extensions. I found this while auditing the ipv6 output path for a memory corruption reported by Alexey Preobrazhensky while he fuzzed an instrumented AddressSanitizer kernel with trinity. This may or may not be the cause of the original bug. Fixes: 4df98e76cde7c6 ("ipv6: pmtudisc setting not respected with UFO/CORK") Reported-by: Alexey Preobrazhensky Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 31a38bde69ef..fbf11562b54c 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1229,7 +1229,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, unsigned int maxnonfragsize, headersize; headersize = sizeof(struct ipv6hdr) + - (opt ? opt->tot_len : 0) + + (opt ? opt->opt_flen + opt->opt_nflen : 0) + (dst_allfrag(&rt->dst) ? sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; From b4b177a5556a686909e643f1e9b6434c10de079f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 May 2014 15:34:41 +0200 Subject: [PATCH 070/116] mac80211: fix on-channel remain-on-channel Jouni reported that if a remain-on-channel was active on the same channel as the current operating channel, then the ROC would start, but any frames transmitted using mgmt-tx on the same channel would get delayed until after the ROC. The reason for this is that the ROC starts, but doesn't have any handling for "remain on the same channel", so it stops the interface queues. The later mgmt-tx then puts the frame on the interface queues (since it's on the current operating channel) and thus they get delayed until after the ROC. To fix this, add some logic to handle remaining on the same channel specially and not stop the queues etc. in this case. This not only fixes the bug but also improves behaviour in this case as data frames etc. can continue to flow. Cc: stable@vger.kernel.org Reported-by: Jouni Malinen Tested-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/offchannel.c | 27 ++++++++++++++++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 222c28b75315..f169b6ee94ee 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -317,6 +317,7 @@ struct ieee80211_roc_work { bool started, abort, hw_begun, notified; bool to_be_freed; + bool on_channel; unsigned long hw_start_time; diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 6fb38558a5e6..7a17decd27f9 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -333,7 +333,7 @@ void ieee80211_sw_roc_work(struct work_struct *work) container_of(work, struct ieee80211_roc_work, work.work); struct ieee80211_sub_if_data *sdata = roc->sdata; struct ieee80211_local *local = sdata->local; - bool started; + bool started, on_channel; mutex_lock(&local->mtx); @@ -354,14 +354,26 @@ void ieee80211_sw_roc_work(struct work_struct *work) if (!roc->started) { struct ieee80211_roc_work *dep; - /* start this ROC */ - ieee80211_offchannel_stop_vifs(local); + WARN_ON(local->use_chanctx); - /* switch channel etc */ + /* If actually operating on the desired channel (with at least + * 20 MHz channel width) don't stop all the operations but still + * treat it as though the ROC operation started properly, so + * other ROC operations won't interfere with this one. + */ + roc->on_channel = roc->chan == local->_oper_chandef.chan && + local->_oper_chandef.width != NL80211_CHAN_WIDTH_5 && + local->_oper_chandef.width != NL80211_CHAN_WIDTH_10; + + /* start this ROC */ ieee80211_recalc_idle(local); - local->tmp_channel = roc->chan; - ieee80211_hw_config(local, 0); + if (!roc->on_channel) { + ieee80211_offchannel_stop_vifs(local); + + local->tmp_channel = roc->chan; + ieee80211_hw_config(local, 0); + } /* tell userspace or send frame */ ieee80211_handle_roc_started(roc); @@ -380,9 +392,10 @@ void ieee80211_sw_roc_work(struct work_struct *work) finish: list_del(&roc->list); started = roc->started; + on_channel = roc->on_channel; ieee80211_roc_notify_destroy(roc, !roc->abort); - if (started) { + if (started && !on_channel) { ieee80211_flush_queues(local, NULL); local->tmp_channel = NULL; From c4b160685fc85e41fe8c08478cc61f4877d26973 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 12 May 2014 10:38:18 -0400 Subject: [PATCH 071/116] jme: Fix unmap loop counting error: In my recent fix (76a691d0a: fix dma unmap warning), Ben Hutchings noted that my loop count was incorrect. Where j started at startidx, it should have started at zero, and gone on for count entries, not to endidx. Additionally, a DMA resource exhaustion should drop the frame and (for now), return NETDEV_TX_OK, not NETEV_TX_BUSY. This patch fixes both of those issues: Signed-off-by: Neil Horman CC: Ben Hutchings CC: "David S. Miller" CC: Guo-Fu Tseng Signed-off-by: David S. Miller --- drivers/net/ethernet/jme.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 6e664d9038d6..b78378cea5e3 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -2027,14 +2027,14 @@ jme_fill_tx_map(struct pci_dev *pdev, return 0; } -static void jme_drop_tx_map(struct jme_adapter *jme, int startidx, int endidx) +static void jme_drop_tx_map(struct jme_adapter *jme, int startidx, int count) { struct jme_ring *txring = &(jme->txring[0]); struct jme_buffer_info *txbi = txring->bufinf, *ctxbi; int mask = jme->tx_ring_mask; int j; - for (j = startidx ; j < endidx ; ++j) { + for (j = 0 ; j < count ; j++) { ctxbi = txbi + ((startidx + j + 2) & (mask)); pci_unmap_page(jme->pdev, ctxbi->mapping, @@ -2069,7 +2069,7 @@ jme_map_tx_skb(struct jme_adapter *jme, struct sk_buff *skb, int idx) skb_frag_page(frag), frag->page_offset, skb_frag_size(frag), hidma); if (ret) { - jme_drop_tx_map(jme, idx, idx+i); + jme_drop_tx_map(jme, idx, i); goto out; } @@ -2081,7 +2081,7 @@ jme_map_tx_skb(struct jme_adapter *jme, struct sk_buff *skb, int idx) ret = jme_fill_tx_map(jme->pdev, ctxdesc, ctxbi, virt_to_page(skb->data), offset_in_page(skb->data), len, hidma); if (ret) - jme_drop_tx_map(jme, idx, idx+i); + jme_drop_tx_map(jme, idx, i); out: return ret; @@ -2269,7 +2269,7 @@ jme_start_xmit(struct sk_buff *skb, struct net_device *netdev) } if (jme_fill_tx_desc(jme, skb, idx)) - return NETDEV_TX_BUSY; + return NETDEV_TX_OK; jwrite32(jme, JME_TXCS, jme->reg_txcs | TXCS_SELECT_QUEUE0 | From 03a58baa785f48a85126ab043a14cb80b7e670e0 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 13 May 2014 14:03:11 +0530 Subject: [PATCH 072/116] be2net: enable interrupts in EEH resume On some BE3 FW versions, after a HW reset, interrupts will remain disabled for each function. So, explicitly enable the interrupts in the eeh_resume handler, else after an eeh recovery interrupts wouldn't work. Signed-off-by: Kalesh AP Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index a18645407d21..dc19bc5dec77 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4949,6 +4949,12 @@ static void be_eeh_resume(struct pci_dev *pdev) if (status) goto err; + /* On some BE3 FW versions, after a HW reset, + * interrupts will remain disabled for each function. + * So, explicitly enable interrupts + */ + be_intr_set(adapter, true); + /* tell fw we're ready to fire cmds */ status = be_cmd_fw_init(adapter); if (status) From f5738e2ef88070ef1372e6e718124d88e9abe4ac Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 13 May 2014 14:38:02 +0200 Subject: [PATCH 073/116] af_iucv: wrong mapping of sent and confirmed skbs When sending data through IUCV a MESSAGE COMPLETE interrupt signals that sent data memory can be freed or reused again. With commit f9c41a62bba3f3f7ef3541b2a025e3371bcbba97 "af_iucv: fix recvmsg by replacing skb_pull() function" the MESSAGE COMPLETE callback iucv_callback_txdone() identifies the wrong skb as being confirmed, which leads to data corruption. This patch fixes the skb mapping logic in iucv_callback_txdone(). Signed-off-by: Ursula Braun Signed-off-by: Frank Blaschka Cc: Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 01e77b0ae075..8c9d7302c846 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1830,7 +1830,7 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_lock_irqsave(&list->lock, flags); while (list_skb != (struct sk_buff *)list) { - if (msg->tag != IUCV_SKB_CB(list_skb)->tag) { + if (msg->tag == IUCV_SKB_CB(list_skb)->tag) { this = list_skb; break; } From faf1dc64e345ac4de5c4429df6ed492255ae2248 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Wed, 14 May 2014 14:39:21 +0530 Subject: [PATCH 074/116] ath9k_htc: Stop ANI before doing hw_reset During remain on channel request, ANI worker thread is not stopped before doing hw reset. This is causing kernel crash in hw_per_calibration. This change ensures that ANI is stopped before doing chip reset and it will be rescheduled later when the chip is configured back to home channel and having valid bss. Reported-by: David Herrmann Tested-by: David Herrmann Signed-off-by: Rajkumar Manoharan Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/htc_drv_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index f46cd0250e48..5627917c5ff7 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -95,8 +95,10 @@ static void ath9k_htc_vif_iter(void *data, u8 *mac, struct ieee80211_vif *vif) if ((vif->type == NL80211_IFTYPE_AP || vif->type == NL80211_IFTYPE_MESH_POINT) && - bss_conf->enable_beacon) + bss_conf->enable_beacon) { priv->reconfig_beacon = true; + priv->rearm_ani = true; + } if (bss_conf->assoc) { priv->rearm_ani = true; @@ -257,6 +259,7 @@ static int ath9k_htc_set_channel(struct ath9k_htc_priv *priv, ath9k_htc_ps_wakeup(priv); + ath9k_htc_stop_ani(priv); del_timer_sync(&priv->tx.cleanup_timer); ath9k_htc_tx_drain(priv); From e84d2f8d2ae33c8215429824e1ecf24cbca9645e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 May 2014 09:48:21 +0200 Subject: [PATCH 075/116] net: filter: s390: fix JIT address randomization This is the s390 variant of Alexei's JIT bug fix. (patch description below stolen from Alexei's patch) bpf_alloc_binary() adds 128 bytes of room to JITed program image and rounds it up to the nearest page size. If image size is close to page size (like 4000), it is rounded to two pages: round_up(4000 + 4 + 128) == 8192 then 'hole' is computed as 8192 - (4000 + 4) = 4188 If prandom_u32() % hole selects a number >= PAGE_SIZE - sizeof(*header) then kernel will crash during bpf_jit_free(): kernel BUG at arch/x86/mm/pageattr.c:887! Call Trace: [] change_page_attr_set_clr+0x135/0x460 [] ? _raw_spin_unlock_irq+0x30/0x50 [] set_memory_rw+0x2f/0x40 [] bpf_jit_free_deferred+0x2d/0x60 [] process_one_work+0x1d8/0x6a0 [] ? process_one_work+0x178/0x6a0 [] worker_thread+0x11c/0x370 since bpf_jit_free() does: unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; struct bpf_binary_header *header = (void *)addr; to compute start address of 'bpf_binary_header' and header->pages will pass junk to: set_memory_rw(addr, header->pages); Fix it by making sure that &header->image[prandom_u32() % hole] and &header are in the same page. Fixes: aa2d2c73c21f2 ("s390/bpf,jit: address randomize and write protect jit code") Reported-by: Alexei Starovoitov Cc: # v3.11+ Signed-off-by: Heiko Carstens Signed-off-by: David S. Miller --- arch/s390/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 452d3ebd9d0f..e9f8fa9337fe 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -811,7 +811,7 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize, return NULL; memset(header, 0, sz); header->pages = sz / PAGE_SIZE; - hole = sz - (bpfsize + sizeof(*header)); + hole = min(sz - (bpfsize + sizeof(*header)), PAGE_SIZE - sizeof(*header)); /* Insert random number of illegal instructions before BPF code * and make sure the first instruction starts at an even address. */ From 3b084e99a3fabaeb0f9c65a0806cde30f0b2835e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 15 May 2014 17:18:26 +0200 Subject: [PATCH 076/116] netfilter: nf_tables: fix trace of matching non-terminal rule Add the corresponding trace if we have a full match in a non-terminal rule. Note that the traces will look slightly different than in x_tables since the log message after all expressions have been evaluated (contrary to x_tables, that emits it before the target action). This manifests in two differences in nf_tables wrt. x_tables: 1) The rule that enables the tracing is included in the trace. 2) If the rule emits some log message, that is shown before the trace log message. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 421c36ac5145..345acfb1720b 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -144,8 +144,10 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) switch (data[NFT_REG_VERDICT].verdict) { case NFT_BREAK: data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; - /* fall through */ + continue; case NFT_CONTINUE: + if (unlikely(pkt->skb->nf_trace)) + nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); continue; } break; From 16a4142363b11952d3aa76ac78004502c0c2fe6e Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Thu, 24 Apr 2014 03:44:25 +0800 Subject: [PATCH 077/116] batman-adv: fix indirect hard_iface NULL dereference If hard_iface is NULL and goto out is made batadv_hardif_free_ref() doesn't check for NULL before dereferencing it to get to refcount. Introduced in cb1c92ec37fb70543d133a1fa7d9b54d6f8a1ecd ("batman-adv: add debugfs support to view multiif tables"). Reported-by: Sven Eckelmann Signed-off-by: Marek Lindner Acked-by: Antonio Quartulli Signed-off-by: Antonio Quartulli --- net/batman-adv/originator.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 1785da37b82c..6a484514cd3e 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1079,7 +1079,8 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset) bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq, hard_iface); out: - batadv_hardif_free_ref(hard_iface); + if (hard_iface) + batadv_hardif_free_ref(hard_iface); return 0; } From be181015a189cd141398b761ba4e79d33fe69949 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Wed, 23 Apr 2014 14:05:16 +0200 Subject: [PATCH 078/116] batman-adv: fix reference counting imbalance while sending fragment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the new fragmentation code the batadv_frag_send_packet() function obtains a reference to the primary_if, but it does not release it upon return. This reference imbalance prevents the primary_if (and then the related netdevice) to be properly released on shut down. Fix this by releasing the primary_if in batadv_frag_send_packet(). Introduced by ee75ed88879af88558818a5c6609d85f60ff0df4 ("batman-adv: Fragment and send skbs larger than mtu") Cc: Martin Hundebøll Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner Acked-by: Martin Hundebøll --- net/batman-adv/fragmentation.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index bcc4bea632fa..f14e54a05691 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -418,12 +418,13 @@ bool batadv_frag_send_packet(struct sk_buff *skb, struct batadv_neigh_node *neigh_node) { struct batadv_priv *bat_priv; - struct batadv_hard_iface *primary_if; + struct batadv_hard_iface *primary_if = NULL; struct batadv_frag_packet frag_header; struct sk_buff *skb_fragment; unsigned mtu = neigh_node->if_incoming->net_dev->mtu; unsigned header_size = sizeof(frag_header); unsigned max_fragment_size, max_packet_size; + bool ret = false; /* To avoid merge and refragmentation at next-hops we never send * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE @@ -483,7 +484,11 @@ bool batadv_frag_send_packet(struct sk_buff *skb, skb->len + ETH_HLEN); batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); - return true; + ret = true; + out_err: - return false; + if (primary_if) + batadv_hardif_free_ref(primary_if); + + return ret; } From 377fe0f968b30a1a714fab53a908061914f30e26 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Fri, 2 May 2014 01:35:13 +0200 Subject: [PATCH 079/116] batman-adv: increase orig refcount when storing ref in gw_node A pointer to the orig_node representing a bat-gateway is stored in the gw_node->orig_node member, but the refcount for such orig_node is never increased. This leads to memory faults when gw_node->orig_node is accessed and the originator has already been freed. Fix this by increasing the refcount on gw_node creation and decreasing it on gw_node free. Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/gateway_client.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index c835e137423b..90cff585b37d 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -42,8 +42,10 @@ static void batadv_gw_node_free_ref(struct batadv_gw_node *gw_node) { - if (atomic_dec_and_test(&gw_node->refcount)) + if (atomic_dec_and_test(&gw_node->refcount)) { + batadv_orig_node_free_ref(gw_node->orig_node); kfree_rcu(gw_node, rcu); + } } static struct batadv_gw_node * @@ -406,10 +408,15 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, if (gateway->bandwidth_down == 0) return; - gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC); - if (!gw_node) + if (!atomic_inc_not_zero(&orig_node->refcount)) return; + gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC); + if (!gw_node) { + batadv_orig_node_free_ref(orig_node); + return; + } + INIT_HLIST_NODE(&gw_node->list); gw_node->orig_node = orig_node; atomic_set(&gw_node->refcount, 1); From cc2f33860cea0e48ebec096130bd0f7c4bf6e0bc Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Sat, 29 Mar 2014 17:27:38 +0100 Subject: [PATCH 080/116] batman-adv: fix local TT check for outgoing arp requests in DAT Change introduced by 88e48d7b3340ef07b108eb8a8b3813dd093cc7f7 ("batman-adv: make DAT drop ARP requests targeting local clients") implements a check that prevents DAT from using the caching mechanism when the client that is supposed to provide a reply to an arp request is local. However change brought by be1db4f6615b5e6156c807ea8985171c215c2d57 ("batman-adv: make the Distributed ARP Table vlan aware") has not converted the above check into its vlan aware version thus making it useless when the local client is behind a vlan. Fix the behaviour by properly specifying the vlan when checking for a client being local or not. Reported-by: Simon Wunderlich Signed-off-by: Antonio Quartulli Signed-off-by: Marek Lindner --- net/batman-adv/distributed-arp-table.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index b25fd64d727b..aa5d4946d0d7 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -940,8 +940,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, * additional DAT answer may trigger kernel warnings about * a packet coming from the wrong port. */ - if (batadv_is_my_client(bat_priv, dat_entry->mac_addr, - BATADV_NO_FLAGS)) { + if (batadv_is_my_client(bat_priv, dat_entry->mac_addr, vid)) { ret = true; goto out; } From 200b916f3575bdf11609cb447661b8d5957b0bbf Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 12 May 2014 15:11:20 -0700 Subject: [PATCH 081/116] rtnetlink: wait for unregistering devices in rtnl_link_unregister() From: Cong Wang commit 50624c934db18ab90 (net: Delay default_device_exit_batch until no devices are unregistering) introduced rtnl_lock_unregistering() for default_device_exit_batch(). Same race could happen we when rmmod a driver which calls rtnl_link_unregister() as we call dev->destructor without rtnl lock. For long term, I think we should clean up the mess of netdev_run_todo() and net namespce exit code. Cc: Eric W. Biederman Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 5 +++++ net/core/dev.c | 2 +- net/core/net_namespace.c | 2 +- net/core/rtnetlink.c | 33 ++++++++++++++++++++++++++++++++- 4 files changed, 39 insertions(+), 3 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 8e3e66ac0a52..953937ea5233 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -4,6 +4,7 @@ #include #include +#include #include extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); @@ -22,6 +23,10 @@ extern void rtnl_lock(void); extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); + +extern wait_queue_head_t netdev_unregistering_wq; +extern struct mutex net_mutex; + #ifdef CONFIG_PROVE_LOCKING extern int lockdep_rtnl_is_held(void); #else diff --git a/net/core/dev.c b/net/core/dev.c index c619b8641337..6da649bde4f7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5541,7 +5541,7 @@ static int dev_new_index(struct net *net) /* Delayed registration/unregisteration */ static LIST_HEAD(net_todo_list); -static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); +DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq); static void net_set_todo(struct net_device *dev) { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 81d3a9a08453..7c8ffd974961 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -24,7 +24,7 @@ static LIST_HEAD(pernet_list); static struct list_head *first_device = &pernet_list; -static DEFINE_MUTEX(net_mutex); +DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9837bebf93ce..2d8d8fcfa060 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -353,15 +353,46 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops) } EXPORT_SYMBOL_GPL(__rtnl_link_unregister); +/* Return with the rtnl_lock held when there are no network + * devices unregistering in any network namespace. + */ +static void rtnl_lock_unregistering_all(void) +{ + struct net *net; + bool unregistering; + DEFINE_WAIT(wait); + + for (;;) { + prepare_to_wait(&netdev_unregistering_wq, &wait, + TASK_UNINTERRUPTIBLE); + unregistering = false; + rtnl_lock(); + for_each_net(net) { + if (net->dev_unreg_count > 0) { + unregistering = true; + break; + } + } + if (!unregistering) + break; + __rtnl_unlock(); + schedule(); + } + finish_wait(&netdev_unregistering_wq, &wait); +} + /** * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. * @ops: struct rtnl_link_ops * to unregister */ void rtnl_link_unregister(struct rtnl_link_ops *ops) { - rtnl_lock(); + /* Close the race with cleanup_net() */ + mutex_lock(&net_mutex); + rtnl_lock_unregistering_all(); __rtnl_link_unregister(ops); rtnl_unlock(); + mutex_unlock(&net_mutex); } EXPORT_SYMBOL_GPL(rtnl_link_unregister); From 898305806ad56ae11dc2c80931062e6a2c7bba48 Mon Sep 17 00:00:00 2001 From: Vince Bridgers Date: Wed, 14 May 2014 14:38:36 -0500 Subject: [PATCH 082/116] Altera TSE: Fix sparse errors and warnings This patch fixes the many sparse errors and warnings contained in the initial submission of the Altera Triple Speed Ethernet driver, and a few minor cppcheck warnings. Changes are tested on ARM and NIOS2 example designs, and compile tested against multiple architectures. Typical issues addressed were as follows: altera_tse_ethtool.c:136:19: warning: incorrect type in argument 1 (different address spaces) altera_tse_ethtool.c:136:19: expected void const volatile [noderef] *addr altera_tse_ethtool.c:136:19: got unsigned int * ... altera_sgdma.c:129:31: warning: cast removes address space of expression Signed-off-by: Vince Bridgers Signed-off-by: David S. Miller --- drivers/net/ethernet/altera/Makefile | 1 + drivers/net/ethernet/altera/altera_msgdma.c | 108 +++++------ drivers/net/ethernet/altera/altera_msgdmahw.h | 13 +- drivers/net/ethernet/altera/altera_sgdma.c | 181 +++++++++--------- drivers/net/ethernet/altera/altera_sgdmahw.h | 26 +-- drivers/net/ethernet/altera/altera_tse.h | 47 +++++ .../net/ethernet/altera/altera_tse_ethtool.c | 108 +++++++---- drivers/net/ethernet/altera/altera_tse_main.c | 128 +++++++------ drivers/net/ethernet/altera/altera_utils.c | 20 +- drivers/net/ethernet/altera/altera_utils.h | 8 +- 10 files changed, 366 insertions(+), 274 deletions(-) diff --git a/drivers/net/ethernet/altera/Makefile b/drivers/net/ethernet/altera/Makefile index d4a187e45369..3eff2fd3997e 100644 --- a/drivers/net/ethernet/altera/Makefile +++ b/drivers/net/ethernet/altera/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_ALTERA_TSE) += altera_tse.o altera_tse-objs := altera_tse_main.o altera_tse_ethtool.o \ altera_msgdma.o altera_sgdma.o altera_utils.o +ccflags-y += -D__CHECK_ENDIAN__ diff --git a/drivers/net/ethernet/altera/altera_msgdma.c b/drivers/net/ethernet/altera/altera_msgdma.c index 4d1f2fdd5c32..0fb986ba3290 100644 --- a/drivers/net/ethernet/altera/altera_msgdma.c +++ b/drivers/net/ethernet/altera/altera_msgdma.c @@ -37,18 +37,16 @@ void msgdma_start_rxdma(struct altera_tse_private *priv) void msgdma_reset(struct altera_tse_private *priv) { int counter; - struct msgdma_csr *txcsr = - (struct msgdma_csr *)priv->tx_dma_csr; - struct msgdma_csr *rxcsr = - (struct msgdma_csr *)priv->rx_dma_csr; /* Reset Rx mSGDMA */ - iowrite32(MSGDMA_CSR_STAT_MASK, &rxcsr->status); - iowrite32(MSGDMA_CSR_CTL_RESET, &rxcsr->control); + csrwr32(MSGDMA_CSR_STAT_MASK, priv->rx_dma_csr, + msgdma_csroffs(status)); + csrwr32(MSGDMA_CSR_CTL_RESET, priv->rx_dma_csr, + msgdma_csroffs(control)); counter = 0; while (counter++ < ALTERA_TSE_SW_RESET_WATCHDOG_CNTR) { - if (tse_bit_is_clear(&rxcsr->status, + if (tse_bit_is_clear(priv->rx_dma_csr, msgdma_csroffs(status), MSGDMA_CSR_STAT_RESETTING)) break; udelay(1); @@ -59,15 +57,18 @@ void msgdma_reset(struct altera_tse_private *priv) "TSE Rx mSGDMA resetting bit never cleared!\n"); /* clear all status bits */ - iowrite32(MSGDMA_CSR_STAT_MASK, &rxcsr->status); + csrwr32(MSGDMA_CSR_STAT_MASK, priv->rx_dma_csr, msgdma_csroffs(status)); /* Reset Tx mSGDMA */ - iowrite32(MSGDMA_CSR_STAT_MASK, &txcsr->status); - iowrite32(MSGDMA_CSR_CTL_RESET, &txcsr->control); + csrwr32(MSGDMA_CSR_STAT_MASK, priv->tx_dma_csr, + msgdma_csroffs(status)); + + csrwr32(MSGDMA_CSR_CTL_RESET, priv->tx_dma_csr, + msgdma_csroffs(control)); counter = 0; while (counter++ < ALTERA_TSE_SW_RESET_WATCHDOG_CNTR) { - if (tse_bit_is_clear(&txcsr->status, + if (tse_bit_is_clear(priv->tx_dma_csr, msgdma_csroffs(status), MSGDMA_CSR_STAT_RESETTING)) break; udelay(1); @@ -78,58 +79,58 @@ void msgdma_reset(struct altera_tse_private *priv) "TSE Tx mSGDMA resetting bit never cleared!\n"); /* clear all status bits */ - iowrite32(MSGDMA_CSR_STAT_MASK, &txcsr->status); + csrwr32(MSGDMA_CSR_STAT_MASK, priv->tx_dma_csr, msgdma_csroffs(status)); } void msgdma_disable_rxirq(struct altera_tse_private *priv) { - struct msgdma_csr *csr = priv->rx_dma_csr; - tse_clear_bit(&csr->control, MSGDMA_CSR_CTL_GLOBAL_INTR); + tse_clear_bit(priv->rx_dma_csr, msgdma_csroffs(control), + MSGDMA_CSR_CTL_GLOBAL_INTR); } void msgdma_enable_rxirq(struct altera_tse_private *priv) { - struct msgdma_csr *csr = priv->rx_dma_csr; - tse_set_bit(&csr->control, MSGDMA_CSR_CTL_GLOBAL_INTR); + tse_set_bit(priv->rx_dma_csr, msgdma_csroffs(control), + MSGDMA_CSR_CTL_GLOBAL_INTR); } void msgdma_disable_txirq(struct altera_tse_private *priv) { - struct msgdma_csr *csr = priv->tx_dma_csr; - tse_clear_bit(&csr->control, MSGDMA_CSR_CTL_GLOBAL_INTR); + tse_clear_bit(priv->tx_dma_csr, msgdma_csroffs(control), + MSGDMA_CSR_CTL_GLOBAL_INTR); } void msgdma_enable_txirq(struct altera_tse_private *priv) { - struct msgdma_csr *csr = priv->tx_dma_csr; - tse_set_bit(&csr->control, MSGDMA_CSR_CTL_GLOBAL_INTR); + tse_set_bit(priv->tx_dma_csr, msgdma_csroffs(control), + MSGDMA_CSR_CTL_GLOBAL_INTR); } void msgdma_clear_rxirq(struct altera_tse_private *priv) { - struct msgdma_csr *csr = priv->rx_dma_csr; - iowrite32(MSGDMA_CSR_STAT_IRQ, &csr->status); + csrwr32(MSGDMA_CSR_STAT_IRQ, priv->rx_dma_csr, msgdma_csroffs(status)); } void msgdma_clear_txirq(struct altera_tse_private *priv) { - struct msgdma_csr *csr = priv->tx_dma_csr; - iowrite32(MSGDMA_CSR_STAT_IRQ, &csr->status); + csrwr32(MSGDMA_CSR_STAT_IRQ, priv->tx_dma_csr, msgdma_csroffs(status)); } /* return 0 to indicate transmit is pending */ int msgdma_tx_buffer(struct altera_tse_private *priv, struct tse_buffer *buffer) { - struct msgdma_extended_desc *desc = priv->tx_dma_desc; - - iowrite32(lower_32_bits(buffer->dma_addr), &desc->read_addr_lo); - iowrite32(upper_32_bits(buffer->dma_addr), &desc->read_addr_hi); - iowrite32(0, &desc->write_addr_lo); - iowrite32(0, &desc->write_addr_hi); - iowrite32(buffer->len, &desc->len); - iowrite32(0, &desc->burst_seq_num); - iowrite32(MSGDMA_DESC_TX_STRIDE, &desc->stride); - iowrite32(MSGDMA_DESC_CTL_TX_SINGLE, &desc->control); + csrwr32(lower_32_bits(buffer->dma_addr), priv->tx_dma_desc, + msgdma_descroffs(read_addr_lo)); + csrwr32(upper_32_bits(buffer->dma_addr), priv->tx_dma_desc, + msgdma_descroffs(read_addr_hi)); + csrwr32(0, priv->tx_dma_desc, msgdma_descroffs(write_addr_lo)); + csrwr32(0, priv->tx_dma_desc, msgdma_descroffs(write_addr_hi)); + csrwr32(buffer->len, priv->tx_dma_desc, msgdma_descroffs(len)); + csrwr32(0, priv->tx_dma_desc, msgdma_descroffs(burst_seq_num)); + csrwr32(MSGDMA_DESC_TX_STRIDE, priv->tx_dma_desc, + msgdma_descroffs(stride)); + csrwr32(MSGDMA_DESC_CTL_TX_SINGLE, priv->tx_dma_desc, + msgdma_descroffs(control)); return 0; } @@ -138,17 +139,16 @@ u32 msgdma_tx_completions(struct altera_tse_private *priv) u32 ready = 0; u32 inuse; u32 status; - struct msgdma_csr *txcsr = - (struct msgdma_csr *)priv->tx_dma_csr; /* Get number of sent descriptors */ - inuse = ioread32(&txcsr->rw_fill_level) & 0xffff; + inuse = csrrd32(priv->tx_dma_csr, msgdma_csroffs(rw_fill_level)) + & 0xffff; if (inuse) { /* Tx FIFO is not empty */ ready = priv->tx_prod - priv->tx_cons - inuse - 1; } else { /* Check for buffered last packet */ - status = ioread32(&txcsr->status); + status = csrrd32(priv->tx_dma_csr, msgdma_csroffs(status)); if (status & MSGDMA_CSR_STAT_BUSY) ready = priv->tx_prod - priv->tx_cons - 1; else @@ -162,7 +162,6 @@ u32 msgdma_tx_completions(struct altera_tse_private *priv) void msgdma_add_rx_desc(struct altera_tse_private *priv, struct tse_buffer *rxbuffer) { - struct msgdma_extended_desc *desc = priv->rx_dma_desc; u32 len = priv->rx_dma_buf_sz; dma_addr_t dma_addr = rxbuffer->dma_addr; u32 control = (MSGDMA_DESC_CTL_END_ON_EOP @@ -172,14 +171,16 @@ void msgdma_add_rx_desc(struct altera_tse_private *priv, | MSGDMA_DESC_CTL_TR_ERR_IRQ | MSGDMA_DESC_CTL_GO); - iowrite32(0, &desc->read_addr_lo); - iowrite32(0, &desc->read_addr_hi); - iowrite32(lower_32_bits(dma_addr), &desc->write_addr_lo); - iowrite32(upper_32_bits(dma_addr), &desc->write_addr_hi); - iowrite32(len, &desc->len); - iowrite32(0, &desc->burst_seq_num); - iowrite32(0x00010001, &desc->stride); - iowrite32(control, &desc->control); + csrwr32(0, priv->rx_dma_desc, msgdma_descroffs(read_addr_lo)); + csrwr32(0, priv->rx_dma_desc, msgdma_descroffs(read_addr_hi)); + csrwr32(lower_32_bits(dma_addr), priv->rx_dma_desc, + msgdma_descroffs(write_addr_lo)); + csrwr32(upper_32_bits(dma_addr), priv->rx_dma_desc, + msgdma_descroffs(write_addr_hi)); + csrwr32(len, priv->rx_dma_desc, msgdma_descroffs(len)); + csrwr32(0, priv->rx_dma_desc, msgdma_descroffs(burst_seq_num)); + csrwr32(0x00010001, priv->rx_dma_desc, msgdma_descroffs(stride)); + csrwr32(control, priv->rx_dma_desc, msgdma_descroffs(control)); } /* status is returned on upper 16 bits, @@ -190,14 +191,13 @@ u32 msgdma_rx_status(struct altera_tse_private *priv) u32 rxstatus = 0; u32 pktlength; u32 pktstatus; - struct msgdma_csr *rxcsr = - (struct msgdma_csr *)priv->rx_dma_csr; - struct msgdma_response *rxresp = - (struct msgdma_response *)priv->rx_dma_resp; - if (ioread32(&rxcsr->resp_fill_level) & 0xffff) { - pktlength = ioread32(&rxresp->bytes_transferred); - pktstatus = ioread32(&rxresp->status); + if (csrrd32(priv->rx_dma_csr, msgdma_csroffs(resp_fill_level)) + & 0xffff) { + pktlength = csrrd32(priv->rx_dma_resp, + msgdma_respoffs(bytes_transferred)); + pktstatus = csrrd32(priv->rx_dma_resp, + msgdma_respoffs(status)); rxstatus = pktstatus; rxstatus = rxstatus << 16; rxstatus |= (pktlength & 0xffff); diff --git a/drivers/net/ethernet/altera/altera_msgdmahw.h b/drivers/net/ethernet/altera/altera_msgdmahw.h index d7b59ba4019c..e335626e1b6b 100644 --- a/drivers/net/ethernet/altera/altera_msgdmahw.h +++ b/drivers/net/ethernet/altera/altera_msgdmahw.h @@ -17,15 +17,6 @@ #ifndef __ALTERA_MSGDMAHW_H__ #define __ALTERA_MSGDMAHW_H__ -/* mSGDMA standard descriptor format - */ -struct msgdma_desc { - u32 read_addr; /* data buffer source address */ - u32 write_addr; /* data buffer destination address */ - u32 len; /* the number of bytes to transfer per descriptor */ - u32 control; /* characteristics of the transfer */ -}; - /* mSGDMA extended descriptor format */ struct msgdma_extended_desc { @@ -159,6 +150,10 @@ struct msgdma_response { u32 status; }; +#define msgdma_respoffs(a) (offsetof(struct msgdma_response, a)) +#define msgdma_csroffs(a) (offsetof(struct msgdma_csr, a)) +#define msgdma_descroffs(a) (offsetof(struct msgdma_extended_desc, a)) + /* mSGDMA response register bit definitions */ #define MSGDMA_RESP_EARLY_TERM BIT(8) diff --git a/drivers/net/ethernet/altera/altera_sgdma.c b/drivers/net/ethernet/altera/altera_sgdma.c index 9ce8630692b6..99cc56f451cf 100644 --- a/drivers/net/ethernet/altera/altera_sgdma.c +++ b/drivers/net/ethernet/altera/altera_sgdma.c @@ -20,8 +20,8 @@ #include "altera_sgdmahw.h" #include "altera_sgdma.h" -static void sgdma_setup_descrip(struct sgdma_descrip *desc, - struct sgdma_descrip *ndesc, +static void sgdma_setup_descrip(struct sgdma_descrip __iomem *desc, + struct sgdma_descrip __iomem *ndesc, dma_addr_t ndesc_phys, dma_addr_t raddr, dma_addr_t waddr, @@ -31,17 +31,17 @@ static void sgdma_setup_descrip(struct sgdma_descrip *desc, int wfixed); static int sgdma_async_write(struct altera_tse_private *priv, - struct sgdma_descrip *desc); + struct sgdma_descrip __iomem *desc); static int sgdma_async_read(struct altera_tse_private *priv); static dma_addr_t sgdma_txphysaddr(struct altera_tse_private *priv, - struct sgdma_descrip *desc); + struct sgdma_descrip __iomem *desc); static dma_addr_t sgdma_rxphysaddr(struct altera_tse_private *priv, - struct sgdma_descrip *desc); + struct sgdma_descrip __iomem *desc); static int sgdma_txbusy(struct altera_tse_private *priv); @@ -79,7 +79,8 @@ int sgdma_initialize(struct altera_tse_private *priv) priv->rxdescphys = (dma_addr_t) 0; priv->txdescphys = (dma_addr_t) 0; - priv->rxdescphys = dma_map_single(priv->device, priv->rx_dma_desc, + priv->rxdescphys = dma_map_single(priv->device, + (void __force *)priv->rx_dma_desc, priv->rxdescmem, DMA_BIDIRECTIONAL); if (dma_mapping_error(priv->device, priv->rxdescphys)) { @@ -88,7 +89,8 @@ int sgdma_initialize(struct altera_tse_private *priv) return -EINVAL; } - priv->txdescphys = dma_map_single(priv->device, priv->tx_dma_desc, + priv->txdescphys = dma_map_single(priv->device, + (void __force *)priv->tx_dma_desc, priv->txdescmem, DMA_TO_DEVICE); if (dma_mapping_error(priv->device, priv->txdescphys)) { @@ -98,8 +100,8 @@ int sgdma_initialize(struct altera_tse_private *priv) } /* Initialize descriptor memory to all 0's, sync memory to cache */ - memset(priv->tx_dma_desc, 0, priv->txdescmem); - memset(priv->rx_dma_desc, 0, priv->rxdescmem); + memset_io(priv->tx_dma_desc, 0, priv->txdescmem); + memset_io(priv->rx_dma_desc, 0, priv->rxdescmem); dma_sync_single_for_device(priv->device, priv->txdescphys, priv->txdescmem, DMA_TO_DEVICE); @@ -126,22 +128,15 @@ void sgdma_uninitialize(struct altera_tse_private *priv) */ void sgdma_reset(struct altera_tse_private *priv) { - u32 *ptxdescripmem = (u32 *)priv->tx_dma_desc; - u32 txdescriplen = priv->txdescmem; - u32 *prxdescripmem = (u32 *)priv->rx_dma_desc; - u32 rxdescriplen = priv->rxdescmem; - struct sgdma_csr *ptxsgdma = (struct sgdma_csr *)priv->tx_dma_csr; - struct sgdma_csr *prxsgdma = (struct sgdma_csr *)priv->rx_dma_csr; - /* Initialize descriptor memory to 0 */ - memset(ptxdescripmem, 0, txdescriplen); - memset(prxdescripmem, 0, rxdescriplen); + memset_io(priv->tx_dma_desc, 0, priv->txdescmem); + memset_io(priv->rx_dma_desc, 0, priv->rxdescmem); - iowrite32(SGDMA_CTRLREG_RESET, &ptxsgdma->control); - iowrite32(0, &ptxsgdma->control); + csrwr32(SGDMA_CTRLREG_RESET, priv->tx_dma_csr, sgdma_csroffs(control)); + csrwr32(0, priv->tx_dma_csr, sgdma_csroffs(control)); - iowrite32(SGDMA_CTRLREG_RESET, &prxsgdma->control); - iowrite32(0, &prxsgdma->control); + csrwr32(SGDMA_CTRLREG_RESET, priv->rx_dma_csr, sgdma_csroffs(control)); + csrwr32(0, priv->rx_dma_csr, sgdma_csroffs(control)); } /* For SGDMA, interrupts remain enabled after initially enabling, @@ -167,14 +162,14 @@ void sgdma_disable_txirq(struct altera_tse_private *priv) void sgdma_clear_rxirq(struct altera_tse_private *priv) { - struct sgdma_csr *csr = (struct sgdma_csr *)priv->rx_dma_csr; - tse_set_bit(&csr->control, SGDMA_CTRLREG_CLRINT); + tse_set_bit(priv->rx_dma_csr, sgdma_csroffs(control), + SGDMA_CTRLREG_CLRINT); } void sgdma_clear_txirq(struct altera_tse_private *priv) { - struct sgdma_csr *csr = (struct sgdma_csr *)priv->tx_dma_csr; - tse_set_bit(&csr->control, SGDMA_CTRLREG_CLRINT); + tse_set_bit(priv->tx_dma_csr, sgdma_csroffs(control), + SGDMA_CTRLREG_CLRINT); } /* transmits buffer through SGDMA. Returns number of buffers @@ -184,12 +179,11 @@ void sgdma_clear_txirq(struct altera_tse_private *priv) */ int sgdma_tx_buffer(struct altera_tse_private *priv, struct tse_buffer *buffer) { - int pktstx = 0; - struct sgdma_descrip *descbase = - (struct sgdma_descrip *)priv->tx_dma_desc; + struct sgdma_descrip __iomem *descbase = + (struct sgdma_descrip __iomem *)priv->tx_dma_desc; - struct sgdma_descrip *cdesc = &descbase[0]; - struct sgdma_descrip *ndesc = &descbase[1]; + struct sgdma_descrip __iomem *cdesc = &descbase[0]; + struct sgdma_descrip __iomem *ndesc = &descbase[1]; /* wait 'til the tx sgdma is ready for the next transmit request */ if (sgdma_txbusy(priv)) @@ -205,7 +199,7 @@ int sgdma_tx_buffer(struct altera_tse_private *priv, struct tse_buffer *buffer) 0, /* read fixed */ SGDMA_CONTROL_WR_FIXED); /* Generate SOP */ - pktstx = sgdma_async_write(priv, cdesc); + sgdma_async_write(priv, cdesc); /* enqueue the request to the pending transmit queue */ queue_tx(priv, buffer); @@ -219,10 +213,10 @@ int sgdma_tx_buffer(struct altera_tse_private *priv, struct tse_buffer *buffer) u32 sgdma_tx_completions(struct altera_tse_private *priv) { u32 ready = 0; - struct sgdma_descrip *desc = (struct sgdma_descrip *)priv->tx_dma_desc; if (!sgdma_txbusy(priv) && - ((desc->control & SGDMA_CONTROL_HW_OWNED) == 0) && + ((csrrd8(priv->tx_dma_desc, sgdma_descroffs(control)) + & SGDMA_CONTROL_HW_OWNED) == 0) && (dequeue_tx(priv))) { ready = 1; } @@ -246,32 +240,31 @@ void sgdma_add_rx_desc(struct altera_tse_private *priv, */ u32 sgdma_rx_status(struct altera_tse_private *priv) { - struct sgdma_csr *csr = (struct sgdma_csr *)priv->rx_dma_csr; - struct sgdma_descrip *base = (struct sgdma_descrip *)priv->rx_dma_desc; - struct sgdma_descrip *desc = NULL; - int pktsrx; - unsigned int rxstatus = 0; - unsigned int pktlength = 0; - unsigned int pktstatus = 0; + struct sgdma_descrip __iomem *base = + (struct sgdma_descrip __iomem *)priv->rx_dma_desc; + struct sgdma_descrip __iomem *desc = NULL; struct tse_buffer *rxbuffer = NULL; + unsigned int rxstatus = 0; - u32 sts = ioread32(&csr->status); + u32 sts = csrrd32(priv->rx_dma_csr, sgdma_csroffs(status)); desc = &base[0]; if (sts & SGDMA_STSREG_EOP) { + unsigned int pktlength = 0; + unsigned int pktstatus = 0; dma_sync_single_for_cpu(priv->device, priv->rxdescphys, priv->sgdmadesclen, DMA_FROM_DEVICE); - pktlength = desc->bytes_xferred; - pktstatus = desc->status & 0x3f; - rxstatus = pktstatus; + pktlength = csrrd16(desc, sgdma_descroffs(bytes_xferred)); + pktstatus = csrrd8(desc, sgdma_descroffs(status)); + rxstatus = pktstatus & ~SGDMA_STATUS_EOP; rxstatus = rxstatus << 16; rxstatus |= (pktlength & 0xffff); if (rxstatus) { - desc->status = 0; + csrwr8(0, desc, sgdma_descroffs(status)); rxbuffer = dequeue_rx(priv); if (rxbuffer == NULL) @@ -279,12 +272,12 @@ u32 sgdma_rx_status(struct altera_tse_private *priv) "sgdma rx and rx queue empty!\n"); /* Clear control */ - iowrite32(0, &csr->control); + csrwr32(0, priv->rx_dma_csr, sgdma_csroffs(control)); /* clear status */ - iowrite32(0xf, &csr->status); + csrwr32(0xf, priv->rx_dma_csr, sgdma_csroffs(status)); /* kick the rx sgdma after reaping this descriptor */ - pktsrx = sgdma_async_read(priv); + sgdma_async_read(priv); } else { /* If the SGDMA indicated an end of packet on recv, @@ -298,10 +291,11 @@ u32 sgdma_rx_status(struct altera_tse_private *priv) */ netdev_err(priv->dev, "SGDMA RX Error Info: %x, %x, %x\n", - sts, desc->status, rxstatus); + sts, csrrd8(desc, sgdma_descroffs(status)), + rxstatus); } } else if (sts == 0) { - pktsrx = sgdma_async_read(priv); + sgdma_async_read(priv); } return rxstatus; @@ -309,8 +303,8 @@ u32 sgdma_rx_status(struct altera_tse_private *priv) /* Private functions */ -static void sgdma_setup_descrip(struct sgdma_descrip *desc, - struct sgdma_descrip *ndesc, +static void sgdma_setup_descrip(struct sgdma_descrip __iomem *desc, + struct sgdma_descrip __iomem *ndesc, dma_addr_t ndesc_phys, dma_addr_t raddr, dma_addr_t waddr, @@ -320,27 +314,30 @@ static void sgdma_setup_descrip(struct sgdma_descrip *desc, int wfixed) { /* Clear the next descriptor as not owned by hardware */ - u32 ctrl = ndesc->control; - ctrl &= ~SGDMA_CONTROL_HW_OWNED; - ndesc->control = ctrl; - ctrl = 0; + u32 ctrl = csrrd8(ndesc, sgdma_descroffs(control)); + ctrl &= ~SGDMA_CONTROL_HW_OWNED; + csrwr8(ctrl, ndesc, sgdma_descroffs(control)); + ctrl = SGDMA_CONTROL_HW_OWNED; ctrl |= generate_eop; ctrl |= rfixed; ctrl |= wfixed; /* Channel is implicitly zero, initialized to 0 by default */ + csrwr32(lower_32_bits(raddr), desc, sgdma_descroffs(raddr)); + csrwr32(lower_32_bits(waddr), desc, sgdma_descroffs(waddr)); - desc->raddr = raddr; - desc->waddr = waddr; - desc->next = lower_32_bits(ndesc_phys); - desc->control = ctrl; - desc->status = 0; - desc->rburst = 0; - desc->wburst = 0; - desc->bytes = length; - desc->bytes_xferred = 0; + csrwr32(0, desc, sgdma_descroffs(pad1)); + csrwr32(0, desc, sgdma_descroffs(pad2)); + csrwr32(lower_32_bits(ndesc_phys), desc, sgdma_descroffs(next)); + + csrwr8(ctrl, desc, sgdma_descroffs(control)); + csrwr8(0, desc, sgdma_descroffs(status)); + csrwr8(0, desc, sgdma_descroffs(wburst)); + csrwr8(0, desc, sgdma_descroffs(rburst)); + csrwr16(length, desc, sgdma_descroffs(bytes)); + csrwr16(0, desc, sgdma_descroffs(bytes_xferred)); } /* If hardware is busy, don't restart async read. @@ -351,12 +348,11 @@ static void sgdma_setup_descrip(struct sgdma_descrip *desc, */ static int sgdma_async_read(struct altera_tse_private *priv) { - struct sgdma_csr *csr = (struct sgdma_csr *)priv->rx_dma_csr; - struct sgdma_descrip *descbase = - (struct sgdma_descrip *)priv->rx_dma_desc; + struct sgdma_descrip __iomem *descbase = + (struct sgdma_descrip __iomem *)priv->rx_dma_desc; - struct sgdma_descrip *cdesc = &descbase[0]; - struct sgdma_descrip *ndesc = &descbase[1]; + struct sgdma_descrip __iomem *cdesc = &descbase[0]; + struct sgdma_descrip __iomem *ndesc = &descbase[1]; struct tse_buffer *rxbuffer = NULL; @@ -382,11 +378,13 @@ static int sgdma_async_read(struct altera_tse_private *priv) priv->sgdmadesclen, DMA_TO_DEVICE); - iowrite32(lower_32_bits(sgdma_rxphysaddr(priv, cdesc)), - &csr->next_descrip); + csrwr32(lower_32_bits(sgdma_rxphysaddr(priv, cdesc)), + priv->rx_dma_csr, + sgdma_csroffs(next_descrip)); - iowrite32((priv->rxctrlreg | SGDMA_CTRLREG_START), - &csr->control); + csrwr32((priv->rxctrlreg | SGDMA_CTRLREG_START), + priv->rx_dma_csr, + sgdma_csroffs(control)); return 1; } @@ -395,32 +393,32 @@ static int sgdma_async_read(struct altera_tse_private *priv) } static int sgdma_async_write(struct altera_tse_private *priv, - struct sgdma_descrip *desc) + struct sgdma_descrip __iomem *desc) { - struct sgdma_csr *csr = (struct sgdma_csr *)priv->tx_dma_csr; - if (sgdma_txbusy(priv)) return 0; /* clear control and status */ - iowrite32(0, &csr->control); - iowrite32(0x1f, &csr->status); + csrwr32(0, priv->tx_dma_csr, sgdma_csroffs(control)); + csrwr32(0x1f, priv->tx_dma_csr, sgdma_csroffs(status)); dma_sync_single_for_device(priv->device, priv->txdescphys, priv->sgdmadesclen, DMA_TO_DEVICE); - iowrite32(lower_32_bits(sgdma_txphysaddr(priv, desc)), - &csr->next_descrip); + csrwr32(lower_32_bits(sgdma_txphysaddr(priv, desc)), + priv->tx_dma_csr, + sgdma_csroffs(next_descrip)); - iowrite32((priv->txctrlreg | SGDMA_CTRLREG_START), - &csr->control); + csrwr32((priv->txctrlreg | SGDMA_CTRLREG_START), + priv->tx_dma_csr, + sgdma_csroffs(control)); return 1; } static dma_addr_t sgdma_txphysaddr(struct altera_tse_private *priv, - struct sgdma_descrip *desc) + struct sgdma_descrip __iomem *desc) { dma_addr_t paddr = priv->txdescmem_busaddr; uintptr_t offs = (uintptr_t)desc - (uintptr_t)priv->tx_dma_desc; @@ -429,7 +427,7 @@ sgdma_txphysaddr(struct altera_tse_private *priv, static dma_addr_t sgdma_rxphysaddr(struct altera_tse_private *priv, - struct sgdma_descrip *desc) + struct sgdma_descrip __iomem *desc) { dma_addr_t paddr = priv->rxdescmem_busaddr; uintptr_t offs = (uintptr_t)desc - (uintptr_t)priv->rx_dma_desc; @@ -518,8 +516,8 @@ queue_rx_peekhead(struct altera_tse_private *priv) */ static int sgdma_rxbusy(struct altera_tse_private *priv) { - struct sgdma_csr *csr = (struct sgdma_csr *)priv->rx_dma_csr; - return ioread32(&csr->status) & SGDMA_STSREG_BUSY; + return csrrd32(priv->rx_dma_csr, sgdma_csroffs(status)) + & SGDMA_STSREG_BUSY; } /* waits for the tx sgdma to finish it's current operation, returns 0 @@ -528,13 +526,14 @@ static int sgdma_rxbusy(struct altera_tse_private *priv) static int sgdma_txbusy(struct altera_tse_private *priv) { int delay = 0; - struct sgdma_csr *csr = (struct sgdma_csr *)priv->tx_dma_csr; /* if DMA is busy, wait for current transactino to finish */ - while ((ioread32(&csr->status) & SGDMA_STSREG_BUSY) && (delay++ < 100)) + while ((csrrd32(priv->tx_dma_csr, sgdma_csroffs(status)) + & SGDMA_STSREG_BUSY) && (delay++ < 100)) udelay(1); - if (ioread32(&csr->status) & SGDMA_STSREG_BUSY) { + if (csrrd32(priv->tx_dma_csr, sgdma_csroffs(status)) + & SGDMA_STSREG_BUSY) { netdev_err(priv->dev, "timeout waiting for tx dma\n"); return 1; } diff --git a/drivers/net/ethernet/altera/altera_sgdmahw.h b/drivers/net/ethernet/altera/altera_sgdmahw.h index ba3334f35383..85bc33b218d9 100644 --- a/drivers/net/ethernet/altera/altera_sgdmahw.h +++ b/drivers/net/ethernet/altera/altera_sgdmahw.h @@ -19,16 +19,16 @@ /* SGDMA descriptor structure */ struct sgdma_descrip { - unsigned int raddr; /* address of data to be read */ - unsigned int pad1; - unsigned int waddr; - unsigned int pad2; - unsigned int next; - unsigned int pad3; - unsigned short bytes; - unsigned char rburst; - unsigned char wburst; - unsigned short bytes_xferred; /* 16 bits, bytes xferred */ + u32 raddr; /* address of data to be read */ + u32 pad1; + u32 waddr; + u32 pad2; + u32 next; + u32 pad3; + u16 bytes; + u8 rburst; + u8 wburst; + u16 bytes_xferred; /* 16 bits, bytes xferred */ /* bit 0: error * bit 1: length error @@ -39,7 +39,7 @@ struct sgdma_descrip { * bit 6: reserved * bit 7: status eop for recv case */ - unsigned char status; + u8 status; /* bit 0: eop * bit 1: read_fixed @@ -47,7 +47,7 @@ struct sgdma_descrip { * bits 3,4,5,6: Channel (always 0) * bit 7: hardware owned */ - unsigned char control; + u8 control; } __packed; @@ -101,6 +101,8 @@ struct sgdma_csr { u32 pad3[3]; }; +#define sgdma_csroffs(a) (offsetof(struct sgdma_csr, a)) +#define sgdma_descroffs(a) (offsetof(struct sgdma_descrip, a)) #define SGDMA_STSREG_ERR BIT(0) /* Error */ #define SGDMA_STSREG_EOP BIT(1) /* EOP */ diff --git a/drivers/net/ethernet/altera/altera_tse.h b/drivers/net/ethernet/altera/altera_tse.h index 465c4aabebbd..2adb24d4523c 100644 --- a/drivers/net/ethernet/altera/altera_tse.h +++ b/drivers/net/ethernet/altera/altera_tse.h @@ -357,6 +357,8 @@ struct altera_tse_mac { u32 reserved5[42]; }; +#define tse_csroffs(a) (offsetof(struct altera_tse_mac, a)) + /* Transmit and Receive Command Registers Bit Definitions */ #define ALTERA_TSE_TX_CMD_STAT_OMIT_CRC BIT(17) @@ -487,4 +489,49 @@ struct altera_tse_private { */ void altera_tse_set_ethtool_ops(struct net_device *); +static inline +u32 csrrd32(void __iomem *mac, size_t offs) +{ + void __iomem *paddr = (void __iomem *)((uintptr_t)mac + offs); + return readl(paddr); +} + +static inline +u16 csrrd16(void __iomem *mac, size_t offs) +{ + void __iomem *paddr = (void __iomem *)((uintptr_t)mac + offs); + return readw(paddr); +} + +static inline +u8 csrrd8(void __iomem *mac, size_t offs) +{ + void __iomem *paddr = (void __iomem *)((uintptr_t)mac + offs); + return readb(paddr); +} + +static inline +void csrwr32(u32 val, void __iomem *mac, size_t offs) +{ + void __iomem *paddr = (void __iomem *)((uintptr_t)mac + offs); + + writel(val, paddr); +} + +static inline +void csrwr16(u16 val, void __iomem *mac, size_t offs) +{ + void __iomem *paddr = (void __iomem *)((uintptr_t)mac + offs); + + writew(val, paddr); +} + +static inline +void csrwr8(u8 val, void __iomem *mac, size_t offs) +{ + void __iomem *paddr = (void __iomem *)((uintptr_t)mac + offs); + + writeb(val, paddr); +} + #endif /* __ALTERA_TSE_H__ */ diff --git a/drivers/net/ethernet/altera/altera_tse_ethtool.c b/drivers/net/ethernet/altera/altera_tse_ethtool.c index 76133caffa78..54c25eff7952 100644 --- a/drivers/net/ethernet/altera/altera_tse_ethtool.c +++ b/drivers/net/ethernet/altera/altera_tse_ethtool.c @@ -96,54 +96,89 @@ static void tse_fill_stats(struct net_device *dev, struct ethtool_stats *dummy, u64 *buf) { struct altera_tse_private *priv = netdev_priv(dev); - struct altera_tse_mac *mac = priv->mac_dev; u64 ext; - buf[0] = ioread32(&mac->frames_transmitted_ok); - buf[1] = ioread32(&mac->frames_received_ok); - buf[2] = ioread32(&mac->frames_check_sequence_errors); - buf[3] = ioread32(&mac->alignment_errors); + buf[0] = csrrd32(priv->mac_dev, + tse_csroffs(frames_transmitted_ok)); + buf[1] = csrrd32(priv->mac_dev, + tse_csroffs(frames_received_ok)); + buf[2] = csrrd32(priv->mac_dev, + tse_csroffs(frames_check_sequence_errors)); + buf[3] = csrrd32(priv->mac_dev, + tse_csroffs(alignment_errors)); /* Extended aOctetsTransmittedOK counter */ - ext = (u64) ioread32(&mac->msb_octets_transmitted_ok) << 32; - ext |= ioread32(&mac->octets_transmitted_ok); + ext = (u64) csrrd32(priv->mac_dev, + tse_csroffs(msb_octets_transmitted_ok)) << 32; + + ext |= csrrd32(priv->mac_dev, + tse_csroffs(octets_transmitted_ok)); buf[4] = ext; /* Extended aOctetsReceivedOK counter */ - ext = (u64) ioread32(&mac->msb_octets_received_ok) << 32; - ext |= ioread32(&mac->octets_received_ok); + ext = (u64) csrrd32(priv->mac_dev, + tse_csroffs(msb_octets_received_ok)) << 32; + + ext |= csrrd32(priv->mac_dev, + tse_csroffs(octets_received_ok)); buf[5] = ext; - buf[6] = ioread32(&mac->tx_pause_mac_ctrl_frames); - buf[7] = ioread32(&mac->rx_pause_mac_ctrl_frames); - buf[8] = ioread32(&mac->if_in_errors); - buf[9] = ioread32(&mac->if_out_errors); - buf[10] = ioread32(&mac->if_in_ucast_pkts); - buf[11] = ioread32(&mac->if_in_multicast_pkts); - buf[12] = ioread32(&mac->if_in_broadcast_pkts); - buf[13] = ioread32(&mac->if_out_discards); - buf[14] = ioread32(&mac->if_out_ucast_pkts); - buf[15] = ioread32(&mac->if_out_multicast_pkts); - buf[16] = ioread32(&mac->if_out_broadcast_pkts); - buf[17] = ioread32(&mac->ether_stats_drop_events); + buf[6] = csrrd32(priv->mac_dev, + tse_csroffs(tx_pause_mac_ctrl_frames)); + buf[7] = csrrd32(priv->mac_dev, + tse_csroffs(rx_pause_mac_ctrl_frames)); + buf[8] = csrrd32(priv->mac_dev, + tse_csroffs(if_in_errors)); + buf[9] = csrrd32(priv->mac_dev, + tse_csroffs(if_out_errors)); + buf[10] = csrrd32(priv->mac_dev, + tse_csroffs(if_in_ucast_pkts)); + buf[11] = csrrd32(priv->mac_dev, + tse_csroffs(if_in_multicast_pkts)); + buf[12] = csrrd32(priv->mac_dev, + tse_csroffs(if_in_broadcast_pkts)); + buf[13] = csrrd32(priv->mac_dev, + tse_csroffs(if_out_discards)); + buf[14] = csrrd32(priv->mac_dev, + tse_csroffs(if_out_ucast_pkts)); + buf[15] = csrrd32(priv->mac_dev, + tse_csroffs(if_out_multicast_pkts)); + buf[16] = csrrd32(priv->mac_dev, + tse_csroffs(if_out_broadcast_pkts)); + buf[17] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_drop_events)); /* Extended etherStatsOctets counter */ - ext = (u64) ioread32(&mac->msb_ether_stats_octets) << 32; - ext |= ioread32(&mac->ether_stats_octets); + ext = (u64) csrrd32(priv->mac_dev, + tse_csroffs(msb_ether_stats_octets)) << 32; + ext |= csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_octets)); buf[18] = ext; - buf[19] = ioread32(&mac->ether_stats_pkts); - buf[20] = ioread32(&mac->ether_stats_undersize_pkts); - buf[21] = ioread32(&mac->ether_stats_oversize_pkts); - buf[22] = ioread32(&mac->ether_stats_pkts_64_octets); - buf[23] = ioread32(&mac->ether_stats_pkts_65to127_octets); - buf[24] = ioread32(&mac->ether_stats_pkts_128to255_octets); - buf[25] = ioread32(&mac->ether_stats_pkts_256to511_octets); - buf[26] = ioread32(&mac->ether_stats_pkts_512to1023_octets); - buf[27] = ioread32(&mac->ether_stats_pkts_1024to1518_octets); - buf[28] = ioread32(&mac->ether_stats_pkts_1519tox_octets); - buf[29] = ioread32(&mac->ether_stats_jabbers); - buf[30] = ioread32(&mac->ether_stats_fragments); + buf[19] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_pkts)); + buf[20] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_undersize_pkts)); + buf[21] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_oversize_pkts)); + buf[22] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_pkts_64_octets)); + buf[23] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_pkts_65to127_octets)); + buf[24] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_pkts_128to255_octets)); + buf[25] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_pkts_256to511_octets)); + buf[26] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_pkts_512to1023_octets)); + buf[27] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_pkts_1024to1518_octets)); + buf[28] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_pkts_1519tox_octets)); + buf[29] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_jabbers)); + buf[30] = csrrd32(priv->mac_dev, + tse_csroffs(ether_stats_fragments)); } static int tse_sset_count(struct net_device *dev, int sset) @@ -178,7 +213,6 @@ static void tse_get_regs(struct net_device *dev, struct ethtool_regs *regs, { int i; struct altera_tse_private *priv = netdev_priv(dev); - u32 *tse_mac_regs = (u32 *)priv->mac_dev; u32 *buf = regbuf; /* Set version to a known value, so ethtool knows @@ -196,7 +230,7 @@ static void tse_get_regs(struct net_device *dev, struct ethtool_regs *regs, regs->version = 1; for (i = 0; i < TSE_NUM_REGS; i++) - buf[i] = ioread32(&tse_mac_regs[i]); + buf[i] = csrrd32(priv->mac_dev, i * 4); } static int tse_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index e44a4aeb9701..ecc3b4e4588b 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -100,29 +100,30 @@ static inline u32 tse_tx_avail(struct altera_tse_private *priv) */ static int altera_tse_mdio_read(struct mii_bus *bus, int mii_id, int regnum) { - struct altera_tse_mac *mac = (struct altera_tse_mac *)bus->priv; - unsigned int *mdio_regs = (unsigned int *)&mac->mdio_phy0; - u32 data; + struct net_device *ndev = bus->priv; + struct altera_tse_private *priv = netdev_priv(ndev); /* set MDIO address */ - iowrite32((mii_id & 0x1f), &mac->mdio_phy0_addr); + csrwr32((mii_id & 0x1f), priv->mac_dev, + tse_csroffs(mdio_phy0_addr)); /* get the data */ - data = ioread32(&mdio_regs[regnum]) & 0xffff; - return data; + return csrrd32(priv->mac_dev, + tse_csroffs(mdio_phy0) + regnum * 4) & 0xffff; } static int altera_tse_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value) { - struct altera_tse_mac *mac = (struct altera_tse_mac *)bus->priv; - unsigned int *mdio_regs = (unsigned int *)&mac->mdio_phy0; + struct net_device *ndev = bus->priv; + struct altera_tse_private *priv = netdev_priv(ndev); /* set MDIO address */ - iowrite32((mii_id & 0x1f), &mac->mdio_phy0_addr); + csrwr32((mii_id & 0x1f), priv->mac_dev, + tse_csroffs(mdio_phy0_addr)); /* write the data */ - iowrite32((u32) value, &mdio_regs[regnum]); + csrwr32(value, priv->mac_dev, tse_csroffs(mdio_phy0) + regnum * 4); return 0; } @@ -168,7 +169,7 @@ static int altera_tse_mdio_create(struct net_device *dev, unsigned int id) for (i = 0; i < PHY_MAX_ADDR; i++) mdio->irq[i] = PHY_POLL; - mdio->priv = priv->mac_dev; + mdio->priv = dev; mdio->parent = priv->device; ret = of_mdiobus_register(mdio, mdio_node); @@ -563,7 +564,6 @@ static int tse_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int nopaged_len = skb_headlen(skb); enum netdev_tx ret = NETDEV_TX_OK; dma_addr_t dma_addr; - int txcomplete = 0; spin_lock_bh(&priv->tx_lock); @@ -599,7 +599,7 @@ static int tse_start_xmit(struct sk_buff *skb, struct net_device *dev) dma_sync_single_for_device(priv->device, buffer->dma_addr, buffer->len, DMA_TO_DEVICE); - txcomplete = priv->dmaops->tx_buffer(priv, buffer); + priv->dmaops->tx_buffer(priv, buffer); skb_tx_timestamp(skb); @@ -698,7 +698,6 @@ static struct phy_device *connect_local_phy(struct net_device *dev) struct altera_tse_private *priv = netdev_priv(dev); struct phy_device *phydev = NULL; char phy_id_fmt[MII_BUS_ID_SIZE + 3]; - int ret; if (priv->phy_addr != POLL_PHY) { snprintf(phy_id_fmt, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, @@ -712,6 +711,7 @@ static struct phy_device *connect_local_phy(struct net_device *dev) netdev_err(dev, "Could not attach to PHY\n"); } else { + int ret; phydev = phy_find_first(priv->mdio); if (phydev == NULL) { netdev_err(dev, "No PHY found\n"); @@ -791,7 +791,6 @@ static int init_phy(struct net_device *dev) static void tse_update_mac_addr(struct altera_tse_private *priv, u8 *addr) { - struct altera_tse_mac *mac = priv->mac_dev; u32 msb; u32 lsb; @@ -799,8 +798,8 @@ static void tse_update_mac_addr(struct altera_tse_private *priv, u8 *addr) lsb = ((addr[5] << 8) | addr[4]) & 0xffff; /* Set primary MAC address */ - iowrite32(msb, &mac->mac_addr_0); - iowrite32(lsb, &mac->mac_addr_1); + csrwr32(msb, priv->mac_dev, tse_csroffs(mac_addr_0)); + csrwr32(lsb, priv->mac_dev, tse_csroffs(mac_addr_1)); } /* MAC software reset. @@ -811,26 +810,26 @@ static void tse_update_mac_addr(struct altera_tse_private *priv, u8 *addr) */ static int reset_mac(struct altera_tse_private *priv) { - void __iomem *cmd_cfg_reg = &priv->mac_dev->command_config; int counter; u32 dat; - dat = ioread32(cmd_cfg_reg); + dat = csrrd32(priv->mac_dev, tse_csroffs(command_config)); dat &= ~(MAC_CMDCFG_TX_ENA | MAC_CMDCFG_RX_ENA); dat |= MAC_CMDCFG_SW_RESET | MAC_CMDCFG_CNT_RESET; - iowrite32(dat, cmd_cfg_reg); + csrwr32(dat, priv->mac_dev, tse_csroffs(command_config)); counter = 0; while (counter++ < ALTERA_TSE_SW_RESET_WATCHDOG_CNTR) { - if (tse_bit_is_clear(cmd_cfg_reg, MAC_CMDCFG_SW_RESET)) + if (tse_bit_is_clear(priv->mac_dev, tse_csroffs(command_config), + MAC_CMDCFG_SW_RESET)) break; udelay(1); } if (counter >= ALTERA_TSE_SW_RESET_WATCHDOG_CNTR) { - dat = ioread32(cmd_cfg_reg); + dat = csrrd32(priv->mac_dev, tse_csroffs(command_config)); dat &= ~MAC_CMDCFG_SW_RESET; - iowrite32(dat, cmd_cfg_reg); + csrwr32(dat, priv->mac_dev, tse_csroffs(command_config)); return -1; } return 0; @@ -840,41 +839,57 @@ static int reset_mac(struct altera_tse_private *priv) */ static int init_mac(struct altera_tse_private *priv) { - struct altera_tse_mac *mac = priv->mac_dev; unsigned int cmd = 0; u32 frm_length; /* Setup Rx FIFO */ - iowrite32(priv->rx_fifo_depth - ALTERA_TSE_RX_SECTION_EMPTY, - &mac->rx_section_empty); - iowrite32(ALTERA_TSE_RX_SECTION_FULL, &mac->rx_section_full); - iowrite32(ALTERA_TSE_RX_ALMOST_EMPTY, &mac->rx_almost_empty); - iowrite32(ALTERA_TSE_RX_ALMOST_FULL, &mac->rx_almost_full); + csrwr32(priv->rx_fifo_depth - ALTERA_TSE_RX_SECTION_EMPTY, + priv->mac_dev, tse_csroffs(rx_section_empty)); + + csrwr32(ALTERA_TSE_RX_SECTION_FULL, priv->mac_dev, + tse_csroffs(rx_section_full)); + + csrwr32(ALTERA_TSE_RX_ALMOST_EMPTY, priv->mac_dev, + tse_csroffs(rx_almost_empty)); + + csrwr32(ALTERA_TSE_RX_ALMOST_FULL, priv->mac_dev, + tse_csroffs(rx_almost_full)); /* Setup Tx FIFO */ - iowrite32(priv->tx_fifo_depth - ALTERA_TSE_TX_SECTION_EMPTY, - &mac->tx_section_empty); - iowrite32(ALTERA_TSE_TX_SECTION_FULL, &mac->tx_section_full); - iowrite32(ALTERA_TSE_TX_ALMOST_EMPTY, &mac->tx_almost_empty); - iowrite32(ALTERA_TSE_TX_ALMOST_FULL, &mac->tx_almost_full); + csrwr32(priv->tx_fifo_depth - ALTERA_TSE_TX_SECTION_EMPTY, + priv->mac_dev, tse_csroffs(tx_section_empty)); + + csrwr32(ALTERA_TSE_TX_SECTION_FULL, priv->mac_dev, + tse_csroffs(tx_section_full)); + + csrwr32(ALTERA_TSE_TX_ALMOST_EMPTY, priv->mac_dev, + tse_csroffs(tx_almost_empty)); + + csrwr32(ALTERA_TSE_TX_ALMOST_FULL, priv->mac_dev, + tse_csroffs(tx_almost_full)); /* MAC Address Configuration */ tse_update_mac_addr(priv, priv->dev->dev_addr); /* MAC Function Configuration */ frm_length = ETH_HLEN + priv->dev->mtu + ETH_FCS_LEN; - iowrite32(frm_length, &mac->frm_length); - iowrite32(ALTERA_TSE_TX_IPG_LENGTH, &mac->tx_ipg_length); + csrwr32(frm_length, priv->mac_dev, tse_csroffs(frm_length)); + + csrwr32(ALTERA_TSE_TX_IPG_LENGTH, priv->mac_dev, + tse_csroffs(tx_ipg_length)); /* Disable RX/TX shift 16 for alignment of all received frames on 16-bit * start address */ - tse_set_bit(&mac->rx_cmd_stat, ALTERA_TSE_RX_CMD_STAT_RX_SHIFT16); - tse_clear_bit(&mac->tx_cmd_stat, ALTERA_TSE_TX_CMD_STAT_TX_SHIFT16 | - ALTERA_TSE_TX_CMD_STAT_OMIT_CRC); + tse_set_bit(priv->mac_dev, tse_csroffs(rx_cmd_stat), + ALTERA_TSE_RX_CMD_STAT_RX_SHIFT16); + + tse_clear_bit(priv->mac_dev, tse_csroffs(tx_cmd_stat), + ALTERA_TSE_TX_CMD_STAT_TX_SHIFT16 | + ALTERA_TSE_TX_CMD_STAT_OMIT_CRC); /* Set the MAC options */ - cmd = ioread32(&mac->command_config); + cmd = csrrd32(priv->mac_dev, tse_csroffs(command_config)); cmd &= ~MAC_CMDCFG_PAD_EN; /* No padding Removal on Receive */ cmd &= ~MAC_CMDCFG_CRC_FWD; /* CRC Removal */ cmd |= MAC_CMDCFG_RX_ERR_DISC; /* Automatically discard frames @@ -889,9 +904,10 @@ static int init_mac(struct altera_tse_private *priv) cmd &= ~MAC_CMDCFG_ETH_SPEED; cmd &= ~MAC_CMDCFG_ENA_10; - iowrite32(cmd, &mac->command_config); + csrwr32(cmd, priv->mac_dev, tse_csroffs(command_config)); - iowrite32(ALTERA_TSE_PAUSE_QUANTA, &mac->pause_quanta); + csrwr32(ALTERA_TSE_PAUSE_QUANTA, priv->mac_dev, + tse_csroffs(pause_quanta)); if (netif_msg_hw(priv)) dev_dbg(priv->device, @@ -904,15 +920,14 @@ static int init_mac(struct altera_tse_private *priv) */ static void tse_set_mac(struct altera_tse_private *priv, bool enable) { - struct altera_tse_mac *mac = priv->mac_dev; - u32 value = ioread32(&mac->command_config); + u32 value = csrrd32(priv->mac_dev, tse_csroffs(command_config)); if (enable) value |= MAC_CMDCFG_TX_ENA | MAC_CMDCFG_RX_ENA; else value &= ~(MAC_CMDCFG_TX_ENA | MAC_CMDCFG_RX_ENA); - iowrite32(value, &mac->command_config); + csrwr32(value, priv->mac_dev, tse_csroffs(command_config)); } /* Change the MTU @@ -942,13 +957,12 @@ static int tse_change_mtu(struct net_device *dev, int new_mtu) static void altera_tse_set_mcfilter(struct net_device *dev) { struct altera_tse_private *priv = netdev_priv(dev); - struct altera_tse_mac *mac = priv->mac_dev; int i; struct netdev_hw_addr *ha; /* clear the hash filter */ for (i = 0; i < 64; i++) - iowrite32(0, &(mac->hash_table[i])); + csrwr32(0, priv->mac_dev, tse_csroffs(hash_table) + i * 4); netdev_for_each_mc_addr(ha, dev) { unsigned int hash = 0; @@ -964,7 +978,7 @@ static void altera_tse_set_mcfilter(struct net_device *dev) hash = (hash << 1) | xor_bit; } - iowrite32(1, &(mac->hash_table[hash])); + csrwr32(1, priv->mac_dev, tse_csroffs(hash_table) + hash * 4); } } @@ -972,12 +986,11 @@ static void altera_tse_set_mcfilter(struct net_device *dev) static void altera_tse_set_mcfilterall(struct net_device *dev) { struct altera_tse_private *priv = netdev_priv(dev); - struct altera_tse_mac *mac = priv->mac_dev; int i; /* set the hash filter */ for (i = 0; i < 64; i++) - iowrite32(1, &(mac->hash_table[i])); + csrwr32(1, priv->mac_dev, tse_csroffs(hash_table) + i * 4); } /* Set or clear the multicast filter for this adaptor @@ -985,12 +998,12 @@ static void altera_tse_set_mcfilterall(struct net_device *dev) static void tse_set_rx_mode_hashfilter(struct net_device *dev) { struct altera_tse_private *priv = netdev_priv(dev); - struct altera_tse_mac *mac = priv->mac_dev; spin_lock(&priv->mac_cfg_lock); if (dev->flags & IFF_PROMISC) - tse_set_bit(&mac->command_config, MAC_CMDCFG_PROMIS_EN); + tse_set_bit(priv->mac_dev, tse_csroffs(command_config), + MAC_CMDCFG_PROMIS_EN); if (dev->flags & IFF_ALLMULTI) altera_tse_set_mcfilterall(dev); @@ -1005,15 +1018,16 @@ static void tse_set_rx_mode_hashfilter(struct net_device *dev) static void tse_set_rx_mode(struct net_device *dev) { struct altera_tse_private *priv = netdev_priv(dev); - struct altera_tse_mac *mac = priv->mac_dev; spin_lock(&priv->mac_cfg_lock); if ((dev->flags & IFF_PROMISC) || (dev->flags & IFF_ALLMULTI) || !netdev_mc_empty(dev) || !netdev_uc_empty(dev)) - tse_set_bit(&mac->command_config, MAC_CMDCFG_PROMIS_EN); + tse_set_bit(priv->mac_dev, tse_csroffs(command_config), + MAC_CMDCFG_PROMIS_EN); else - tse_clear_bit(&mac->command_config, MAC_CMDCFG_PROMIS_EN); + tse_clear_bit(priv->mac_dev, tse_csroffs(command_config), + MAC_CMDCFG_PROMIS_EN); spin_unlock(&priv->mac_cfg_lock); } @@ -1493,7 +1507,7 @@ static int altera_tse_remove(struct platform_device *pdev) return 0; } -struct altera_dmaops altera_dtype_sgdma = { +static const struct altera_dmaops altera_dtype_sgdma = { .altera_dtype = ALTERA_DTYPE_SGDMA, .dmamask = 32, .reset_dma = sgdma_reset, @@ -1512,7 +1526,7 @@ struct altera_dmaops altera_dtype_sgdma = { .start_rxdma = sgdma_start_rxdma, }; -struct altera_dmaops altera_dtype_msgdma = { +static const struct altera_dmaops altera_dtype_msgdma = { .altera_dtype = ALTERA_DTYPE_MSGDMA, .dmamask = 64, .reset_dma = msgdma_reset, diff --git a/drivers/net/ethernet/altera/altera_utils.c b/drivers/net/ethernet/altera/altera_utils.c index 70fa13f486b2..d7eeb1713ad2 100644 --- a/drivers/net/ethernet/altera/altera_utils.c +++ b/drivers/net/ethernet/altera/altera_utils.c @@ -17,28 +17,28 @@ #include "altera_tse.h" #include "altera_utils.h" -void tse_set_bit(void __iomem *ioaddr, u32 bit_mask) +void tse_set_bit(void __iomem *ioaddr, size_t offs, u32 bit_mask) { - u32 value = ioread32(ioaddr); + u32 value = csrrd32(ioaddr, offs); value |= bit_mask; - iowrite32(value, ioaddr); + csrwr32(value, ioaddr, offs); } -void tse_clear_bit(void __iomem *ioaddr, u32 bit_mask) +void tse_clear_bit(void __iomem *ioaddr, size_t offs, u32 bit_mask) { - u32 value = ioread32(ioaddr); + u32 value = csrrd32(ioaddr, offs); value &= ~bit_mask; - iowrite32(value, ioaddr); + csrwr32(value, ioaddr, offs); } -int tse_bit_is_set(void __iomem *ioaddr, u32 bit_mask) +int tse_bit_is_set(void __iomem *ioaddr, size_t offs, u32 bit_mask) { - u32 value = ioread32(ioaddr); + u32 value = csrrd32(ioaddr, offs); return (value & bit_mask) ? 1 : 0; } -int tse_bit_is_clear(void __iomem *ioaddr, u32 bit_mask) +int tse_bit_is_clear(void __iomem *ioaddr, size_t offs, u32 bit_mask) { - u32 value = ioread32(ioaddr); + u32 value = csrrd32(ioaddr, offs); return (value & bit_mask) ? 0 : 1; } diff --git a/drivers/net/ethernet/altera/altera_utils.h b/drivers/net/ethernet/altera/altera_utils.h index ce1db36d3583..baf100ccf587 100644 --- a/drivers/net/ethernet/altera/altera_utils.h +++ b/drivers/net/ethernet/altera/altera_utils.h @@ -19,9 +19,9 @@ #ifndef __ALTERA_UTILS_H__ #define __ALTERA_UTILS_H__ -void tse_set_bit(void __iomem *ioaddr, u32 bit_mask); -void tse_clear_bit(void __iomem *ioaddr, u32 bit_mask); -int tse_bit_is_set(void __iomem *ioaddr, u32 bit_mask); -int tse_bit_is_clear(void __iomem *ioaddr, u32 bit_mask); +void tse_set_bit(void __iomem *ioaddr, size_t offs, u32 bit_mask); +void tse_clear_bit(void __iomem *ioaddr, size_t offs, u32 bit_mask); +int tse_bit_is_set(void __iomem *ioaddr, size_t offs, u32 bit_mask); +int tse_bit_is_clear(void __iomem *ioaddr, size_t offs, u32 bit_mask); #endif /* __ALTERA_UTILS_H__*/ From d91e5c02555ae838e9d783b9e83e125fd10dca73 Mon Sep 17 00:00:00 2001 From: Vince Bridgers Date: Wed, 14 May 2014 14:38:37 -0500 Subject: [PATCH 083/116] Altera TSE: Disable Multicast filtering to workaround problem This patch disables multicast hash filtering if present in the hardware and uses promiscuous mode instead until the problem with multicast filtering has been debugged, integrated and tested. Signed-off-by: Vince Bridgers Signed-off-by: David S. Miller --- drivers/net/ethernet/altera/altera_tse_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index ecc3b4e4588b..7330681574d2 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -1376,6 +1376,11 @@ static int altera_tse_probe(struct platform_device *pdev) of_property_read_bool(pdev->dev.of_node, "altr,has-hash-multicast-filter"); + /* Set hash filter to not set for now until the + * multicast filter receive issue is debugged + */ + priv->hash_filter = 0; + /* get supplemental address settings for this instance */ priv->added_unicast = of_property_read_bool(pdev->dev.of_node, From b394745df2d9d4c30bf1bcc55773bec6f3bc7c67 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 14 May 2014 13:12:49 -0700 Subject: [PATCH 084/116] net: phy: Don't call phy_resume if phy_init_hw failed After the call to phy_init_hw failed in phy_attach_direct, phy_detach is called to detach the phy device from its network device. If the attached driver is a generic phy driver, this also detaches the driver. Subsequently phy_resume is called, which assumes without checking that a driver is attached to the device. This will result in a crash such as Unable to handle kernel paging request for data at address 0xffffffffffffff90 Faulting instruction address: 0xc0000000003a0e18 Oops: Kernel access of bad area, sig: 11 [#1] ... NIP [c0000000003a0e18] .phy_attach_direct+0x68/0x17c LR [c0000000003a0e6c] .phy_attach_direct+0xbc/0x17c Call Trace: [c0000003fc0475d0] [c0000000003a0e6c] .phy_attach_direct+0xbc/0x17c (unreliable) [c0000003fc047670] [c0000000003a0ff8] .phy_connect_direct+0x28/0x98 [c0000003fc047700] [c0000000003f0074] .of_phy_connect+0x4c/0xa4 Only call phy_resume if phy_init_hw was successful. Signed-off-by: Guenter Roeck Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 0ce606624296..4987a1c6dc52 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -614,8 +614,8 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, err = phy_init_hw(phydev); if (err) phy_detach(phydev); - - phy_resume(phydev); + else + phy_resume(phydev); return err; } From be7a010d6fa33dca9327ad8e91844278dfd1e712 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Thu, 15 May 2014 15:56:14 +0800 Subject: [PATCH 085/116] ipv6: update Destination Cache entries when gateway turn into host RFC 4861 states in 7.2.5: The IsRouter flag in the cache entry MUST be set based on the Router flag in the received advertisement. In those cases where the IsRouter flag changes from TRUE to FALSE as a result of this update, the node MUST remove that router from the Default Router List and update the Destination Cache entries for all destinations using that neighbor as a router as specified in Section 7.3.3. This is needed to detect when a node that is used as a router stops forwarding packets due to being configured as a host. Currently, when dealing with NA Message which IsRouter flag changes from TRUE to FALSE, the kernel only removes router from the Default Router List, and don't update the Destination Cache entries. Now in order to update those Destination Cache entries, i introduce function rt6_clean_tohost(). Signed-off-by: Duan Jiong Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip6_route.h | 1 + net/ipv6/ndisc.c | 7 ++----- net/ipv6/route.c | 21 +++++++++++++++++++++ 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 6c4f5eac98e7..216cecce65e9 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -127,6 +127,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg); void rt6_ifdown(struct net *net, struct net_device *dev); void rt6_mtu_change(struct net_device *dev, unsigned int mtu); void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); +void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); /* diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 09a22f4f36c9..ca8d4ea48a5d 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -851,7 +851,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) static void ndisc_recv_na(struct sk_buff *skb) { struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb); - const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; + struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + @@ -944,10 +944,7 @@ static void ndisc_recv_na(struct sk_buff *skb) /* * Change: router to host */ - struct rt6_info *rt; - rt = rt6_get_dflt_router(saddr, dev); - if (rt) - ip6_del_rt(rt); + rt6_clean_tohost(dev_net(dev), saddr); } out: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 004fffb6c221..58c449ad7f6e 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2234,6 +2234,27 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) fib6_clean_all(net, fib6_remove_prefsrc, &adni); } +#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) +#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) + +/* Remove routers and update dst entries when gateway turn into host. */ +static int fib6_clean_tohost(struct rt6_info *rt, void *arg) +{ + struct in6_addr *gateway = (struct in6_addr *)arg; + + if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) || + ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) && + ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { + return -1; + } + return 0; +} + +void rt6_clean_tohost(struct net *net, struct in6_addr *gateway) +{ + fib6_clean_all(net, fib6_clean_tohost, gateway); +} + struct arg_dev_net { struct net_device *dev; struct net *net; From 583757446ba6850eff96cef6565d729266da9c5b Mon Sep 17 00:00:00 2001 From: Zoltan Kiss Date: Thu, 15 May 2014 11:08:34 +0100 Subject: [PATCH 086/116] xen-netback: Fix grant ref resolution in RX path The original series for reintroducing grant mapping for netback had a patch [1] to handle receiving of packets from an another VIF. Grant copy on the receiving side needs the grant ref of the page to set up the op. The original patch assumed (wrongly) that the frags array haven't changed. In the case reported by Sander, the sending guest sent a packet where the linear buffer and the first frag were under PKT_PROT_LEN (=128) bytes. xenvif_tx_submit() then pulled up the linear area to 128 bytes, and ditched the first frag. The receiving side had an off-by-one problem when gathered the grant refs. This patch fixes that by checking whether the actual frag's page pointer is the same as the page in the original frag list. It can handle any kind of changes on the original frags array, like: - removing granted frags from the array at any point - adding local pages to the frags list anywhere - reordering the frags It's optimized to the most common case, when there is 1:1 relation between the frags and the list, plus works optimal when frags are removed from the end or the beginning. [1]: 3e2234: xen-netback: Handle foreign mapped pages on the guest RX path Reported-by: Sander Eikelenboom Signed-off-by: Zoltan Kiss Acked-by: Ian Campbell Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 98 +++++++++++++++++++++++++------ 1 file changed, 80 insertions(+), 18 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 76665405c5aa..64ab1d141f1c 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -104,7 +104,7 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif, /* Find the containing VIF's structure from a pointer in pending_tx_info array */ -static inline struct xenvif* ubuf_to_vif(struct ubuf_info *ubuf) +static inline struct xenvif *ubuf_to_vif(const struct ubuf_info *ubuf) { u16 pending_idx = ubuf->desc; struct pending_tx_info *temp = @@ -322,6 +322,35 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, } } +/* + * Find the grant ref for a given frag in a chain of struct ubuf_info's + * skb: the skb itself + * i: the frag's number + * ubuf: a pointer to an element in the chain. It should not be NULL + * + * Returns a pointer to the element in the chain where the page were found. If + * not found, returns NULL. + * See the definition of callback_struct in common.h for more details about + * the chain. + */ +static const struct ubuf_info *xenvif_find_gref(const struct sk_buff *const skb, + const int i, + const struct ubuf_info *ubuf) +{ + struct xenvif *foreign_vif = ubuf_to_vif(ubuf); + + do { + u16 pending_idx = ubuf->desc; + + if (skb_shinfo(skb)->frags[i].page.p == + foreign_vif->mmap_pages[pending_idx]) + break; + ubuf = (struct ubuf_info *) ubuf->ctx; + } while (ubuf); + + return ubuf; +} + /* * Prepare an SKB to be transmitted to the frontend. * @@ -346,9 +375,8 @@ static int xenvif_gop_skb(struct sk_buff *skb, int head = 1; int old_meta_prod; int gso_type; - struct ubuf_info *ubuf = skb_shinfo(skb)->destructor_arg; - grant_ref_t foreign_grefs[MAX_SKB_FRAGS]; - struct xenvif *foreign_vif = NULL; + const struct ubuf_info *ubuf = skb_shinfo(skb)->destructor_arg; + const struct ubuf_info *const head_ubuf = ubuf; old_meta_prod = npo->meta_prod; @@ -386,19 +414,6 @@ static int xenvif_gop_skb(struct sk_buff *skb, npo->copy_off = 0; npo->copy_gref = req->gref; - if ((skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) && - (ubuf->callback == &xenvif_zerocopy_callback)) { - int i = 0; - foreign_vif = ubuf_to_vif(ubuf); - - do { - u16 pending_idx = ubuf->desc; - foreign_grefs[i++] = - foreign_vif->pending_tx_info[pending_idx].req.gref; - ubuf = (struct ubuf_info *) ubuf->ctx; - } while (ubuf); - } - data = skb->data; while (data < skb_tail_pointer(skb)) { unsigned int offset = offset_in_page(data); @@ -415,13 +430,60 @@ static int xenvif_gop_skb(struct sk_buff *skb, } for (i = 0; i < nr_frags; i++) { + /* This variable also signals whether foreign_gref has a real + * value or not. + */ + struct xenvif *foreign_vif = NULL; + grant_ref_t foreign_gref; + + if ((skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) && + (ubuf->callback == &xenvif_zerocopy_callback)) { + const struct ubuf_info *const startpoint = ubuf; + + /* Ideally ubuf points to the chain element which + * belongs to this frag. Or if frags were removed from + * the beginning, then shortly before it. + */ + ubuf = xenvif_find_gref(skb, i, ubuf); + + /* Try again from the beginning of the list, if we + * haven't tried from there. This only makes sense in + * the unlikely event of reordering the original frags. + * For injected local pages it's an unnecessary second + * run. + */ + if (unlikely(!ubuf) && startpoint != head_ubuf) + ubuf = xenvif_find_gref(skb, i, head_ubuf); + + if (likely(ubuf)) { + u16 pending_idx = ubuf->desc; + + foreign_vif = ubuf_to_vif(ubuf); + foreign_gref = foreign_vif->pending_tx_info[pending_idx].req.gref; + /* Just a safety measure. If this was the last + * element on the list, the for loop will + * iterate again if a local page were added to + * the end. Using head_ubuf here prevents the + * second search on the chain. Or the original + * frags changed order, but that's less likely. + * In any way, ubuf shouldn't be NULL. + */ + ubuf = ubuf->ctx ? + (struct ubuf_info *) ubuf->ctx : + head_ubuf; + } else + /* This frag was a local page, added to the + * array after the skb left netback. + */ + ubuf = head_ubuf; + } xenvif_gop_frag_copy(vif, skb, npo, skb_frag_page(&skb_shinfo(skb)->frags[i]), skb_frag_size(&skb_shinfo(skb)->frags[i]), skb_shinfo(skb)->frags[i].page_offset, &head, foreign_vif, - foreign_grefs[i]); + foreign_vif ? foreign_gref : UINT_MAX); } return npo->meta_prod - old_meta_prod; From 81c708068dfedece038e07d818ba68333d8d885d Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 15 May 2014 13:35:23 +0200 Subject: [PATCH 087/116] bonding: fix out of range parameters for bond_intmax_tbl I've missed to add a NULL entry to the bond_intmax_tbl when I introduced it with the conversion of arp_interval so add it now. CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek Fixes: 7bdb04ed0dbf ("bonding: convert arp_interval to use the new option API") Signed-off-by: Nikolay Aleksandrov Acked-by: Veaceslav Falico Signed-off-by: David S. Miller --- drivers/net/bonding/bond_options.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 724e30fa20b9..832070298446 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -125,6 +125,7 @@ static const struct bond_opt_value bond_fail_over_mac_tbl[] = { static const struct bond_opt_value bond_intmax_tbl[] = { { "off", 0, BOND_VALFLAG_DEFAULT}, { "maxval", INT_MAX, BOND_VALFLAG_MAX}, + { NULL, -1, 0} }; static const struct bond_opt_value bond_lacp_rate_tbl[] = { From ce8d9e0d6746ff67c1870386b7121a4448f21130 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 15 May 2014 15:29:27 +0300 Subject: [PATCH 088/116] net/mlx4_core: Add UPDATE_QP SRIOV wrapper support This patch adds UPDATE_QP SRIOV wrapper support. The mechanism is a general one, but currently only source MAC index changes are allowed for VFs. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 4 +- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 6 +++ drivers/net/ethernet/mellanox/mlx4/qp.c | 35 ++++++++++++ .../ethernet/mellanox/mlx4/resource_tracker.c | 54 +++++++++++++++++++ include/linux/mlx4/qp.h | 11 ++++ 5 files changed, 108 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 78099eab7673..92d3249f63f1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1253,12 +1253,12 @@ static struct mlx4_cmd_info cmd_info[] = { }, { .opcode = MLX4_CMD_UPDATE_QP, - .has_inbox = false, + .has_inbox = true, .has_outbox = false, .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_CMD_EPERM_wrapper + .wrapper = mlx4_UPDATE_QP_wrapper }, { .opcode = MLX4_CMD_GET_OP_REQ, diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index f9c465101963..212cea440f90 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1195,6 +1195,12 @@ int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); + int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 61d64ebffd56..fbd32af89c7c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -389,6 +389,41 @@ int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp) EXPORT_SYMBOL_GPL(mlx4_qp_alloc); +#define MLX4_UPDATE_QP_SUPPORTED_ATTRS MLX4_UPDATE_QP_SMAC +int mlx4_update_qp(struct mlx4_dev *dev, struct mlx4_qp *qp, + enum mlx4_update_qp_attr attr, + struct mlx4_update_qp_params *params) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_update_qp_context *cmd; + u64 pri_addr_path_mask = 0; + int err = 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + cmd = (struct mlx4_update_qp_context *)mailbox->buf; + + if (!attr || (attr & ~MLX4_UPDATE_QP_SUPPORTED_ATTRS)) + return -EINVAL; + + if (attr & MLX4_UPDATE_QP_SMAC) { + pri_addr_path_mask |= 1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX; + cmd->qp_context.pri_path.grh_mylmc = params->smac_index; + } + + cmd->primary_addr_path_mask = cpu_to_be64(pri_addr_path_mask); + + err = mlx4_cmd(dev, mailbox->dma, qp->qpn & 0xffffff, 0, + MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_update_qp); + void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp) { struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 1c3fdd4a1f7d..8f1254a79832 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -3895,6 +3895,60 @@ static int add_eth_header(struct mlx4_dev *dev, int slave, } +#define MLX4_UPD_QP_PATH_MASK_SUPPORTED (1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX) +int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd_info) +{ + int err; + u32 qpn = vhcr->in_modifier & 0xffffff; + struct res_qp *rqp; + u64 mac; + unsigned port; + u64 pri_addr_path_mask; + struct mlx4_update_qp_context *cmd; + int smac_index; + + cmd = (struct mlx4_update_qp_context *)inbox->buf; + + pri_addr_path_mask = be64_to_cpu(cmd->primary_addr_path_mask); + if (cmd->qp_mask || cmd->secondary_addr_path_mask || + (pri_addr_path_mask & ~MLX4_UPD_QP_PATH_MASK_SUPPORTED)) + return -EPERM; + + /* Just change the smac for the QP */ + err = get_res(dev, slave, qpn, RES_QP, &rqp); + if (err) { + mlx4_err(dev, "Updating qpn 0x%x for slave %d rejected\n", qpn, slave); + return err; + } + + port = (rqp->sched_queue >> 6 & 1) + 1; + smac_index = cmd->qp_context.pri_path.grh_mylmc; + err = mac_find_smac_ix_in_slave(dev, slave, port, + smac_index, &mac); + if (err) { + mlx4_err(dev, "Failed to update qpn 0x%x, MAC is invalid. smac_ix: %d\n", + qpn, smac_index); + goto err_mac; + } + + err = mlx4_cmd(dev, inbox->dma, + vhcr->in_modifier, 0, + MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A, + MLX4_CMD_NATIVE); + if (err) { + mlx4_err(dev, "Failed to update qpn on qpn 0x%x, command failed\n", qpn); + goto err_mac; + } + +err_mac: + put_res(dev, slave, qpn, RES_QP); + return err; +} + int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index b66e7610d4ee..7040dc98ff8b 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -421,6 +421,17 @@ struct mlx4_wqe_inline_seg { __be32 byte_count; }; +enum mlx4_update_qp_attr { + MLX4_UPDATE_QP_SMAC = 1 << 0, +}; + +struct mlx4_update_qp_params { + u8 smac_index; +}; + +int mlx4_update_qp(struct mlx4_dev *dev, struct mlx4_qp *qp, + enum mlx4_update_qp_attr attr, + struct mlx4_update_qp_params *params); int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state, struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar, From 9433c188915c1383ee36259119bc3a9c6f98cfc3 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 15 May 2014 15:29:28 +0300 Subject: [PATCH 089/116] IB/mlx4: Invoke UPDATE_QP for proxy QP1 on MAC changes When we receive a netdev event indicating a netdev change and/or a netdev address change, we must change the MAC index used by the proxy QP1 (in the QP context), otherwise RoCE CM packets sent by the VF will not carry the same source MAC address as the non-CM packets. We use the UPDATE_QP command to perform this change. In order to avoid modifying a QP context based on netdev event, while the driver attempts to destroy this QP (e.g either the mlx4_ib or ib_mad modules are unloaded), we use mutex locking in both flows. Since the relevant mlx4 proxy GSI QP is created indirectly by the mad module when they create their GSI QP, the mlx4 didn't need to keep track on that QP prior to this change. Now, when QP modifications are needed to this QP from within the driver, we added refernece to it. Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/main.c | 67 ++++++++++++++++++++++++++-- drivers/infiniband/hw/mlx4/mlx4_ib.h | 3 ++ drivers/infiniband/hw/mlx4/qp.c | 8 ++++ 3 files changed, 75 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1b6dbe156a37..199c7896f081 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -48,6 +48,7 @@ #include #include +#include #include "mlx4_ib.h" #include "user.h" @@ -1614,6 +1615,53 @@ static int mlx4_ib_inet6_event(struct notifier_block *this, unsigned long event, } #endif +#define MLX4_IB_INVALID_MAC ((u64)-1) +static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev, + struct net_device *dev, + int port) +{ + u64 new_smac = 0; + u64 release_mac = MLX4_IB_INVALID_MAC; + struct mlx4_ib_qp *qp; + + read_lock(&dev_base_lock); + new_smac = mlx4_mac_to_u64(dev->dev_addr); + read_unlock(&dev_base_lock); + + mutex_lock(&ibdev->qp1_proxy_lock[port - 1]); + qp = ibdev->qp1_proxy[port - 1]; + if (qp) { + int new_smac_index; + u64 old_smac = qp->pri.smac; + struct mlx4_update_qp_params update_params; + + if (new_smac == old_smac) + goto unlock; + + new_smac_index = mlx4_register_mac(ibdev->dev, port, new_smac); + + if (new_smac_index < 0) + goto unlock; + + update_params.smac_index = new_smac_index; + if (mlx4_update_qp(ibdev->dev, &qp->mqp, MLX4_UPDATE_QP_SMAC, + &update_params)) { + release_mac = new_smac; + goto unlock; + } + + qp->pri.smac = new_smac; + qp->pri.smac_index = new_smac_index; + + release_mac = old_smac; + } + +unlock: + mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]); + if (release_mac != MLX4_IB_INVALID_MAC) + mlx4_unregister_mac(ibdev->dev, port, release_mac); +} + static void mlx4_ib_get_dev_addr(struct net_device *dev, struct mlx4_ib_dev *ibdev, u8 port) { @@ -1689,9 +1737,13 @@ static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev) return 0; } -static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev) +static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, + struct net_device *dev, + unsigned long event) + { struct mlx4_ib_iboe *iboe; + int update_qps_port = -1; int port; iboe = &ibdev->iboe; @@ -1719,6 +1771,11 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev) } curr_master = iboe->masters[port - 1]; + if (dev == iboe->netdevs[port - 1] && + (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER || + event == NETDEV_UP || event == NETDEV_CHANGE)) + update_qps_port = port; + if (curr_netdev) { port_state = (netif_running(curr_netdev) && netif_carrier_ok(curr_netdev)) ? IB_PORT_ACTIVE : IB_PORT_DOWN; @@ -1752,6 +1809,9 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev) } spin_unlock(&iboe->lock); + + if (update_qps_port > 0) + mlx4_ib_update_qps(ibdev, dev, update_qps_port); } static int mlx4_ib_netdev_event(struct notifier_block *this, @@ -1764,7 +1824,7 @@ static int mlx4_ib_netdev_event(struct notifier_block *this, return NOTIFY_DONE; ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb); - mlx4_ib_scan_netdevs(ibdev); + mlx4_ib_scan_netdevs(ibdev, dev, event); return NOTIFY_DONE; } @@ -2043,6 +2103,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_map; for (i = 0; i < ibdev->num_ports; ++i) { + mutex_init(&ibdev->qp1_proxy_lock[i]); if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) == IB_LINK_LAYER_ETHERNET) { err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]); @@ -2126,7 +2187,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) for (i = 1 ; i <= ibdev->num_ports ; ++i) reset_gid_table(ibdev, i); rtnl_lock(); - mlx4_ib_scan_netdevs(ibdev); + mlx4_ib_scan_netdevs(ibdev, NULL, 0); rtnl_unlock(); mlx4_ib_init_gid_table(ibdev); } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index f589522fddfd..66b0b7dbd9f4 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -522,6 +522,9 @@ struct mlx4_ib_dev { int steer_qpn_count; int steer_qpn_base; int steering_support; + struct mlx4_ib_qp *qp1_proxy[MLX4_MAX_PORTS]; + /* lock when destroying qp1_proxy and getting netdev events */ + struct mutex qp1_proxy_lock[MLX4_MAX_PORTS]; }; struct ib_event_work { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 41308af4163c..dc57482ae7af 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1132,6 +1132,12 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp) if (is_qp0(dev, mqp)) mlx4_CLOSE_PORT(dev->dev, mqp->port); + if (dev->qp1_proxy[mqp->port - 1] == mqp) { + mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]); + dev->qp1_proxy[mqp->port - 1] = NULL; + mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]); + } + pd = get_pd(mqp); destroy_qp_common(dev, mqp, !!pd->ibpd.uobject); @@ -1646,6 +1652,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, err = handle_eth_ud_smac_index(dev, qp, (u8 *)attr->smac, context); if (err) return -EINVAL; + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) + dev->qp1_proxy[qp->port - 1] = qp; } } } From 6e14a5eeb158215881ef4507833a3574d0dbad19 Mon Sep 17 00:00:00 2001 From: Zhangfei Gao Date: Thu, 15 May 2014 13:35:34 +0800 Subject: [PATCH 090/116] net: phy: resume phydev when going to RESUMING With commit be9dad1f9f26604fb ("net: phy: suspend phydev when going to HALTED"), an unused PHY device will be put in a low-power mode using BMCR_PDOWN. Some Ethernet drivers might be calling phy_start() and phy_stop() from ndo_open and ndo_close() respectively, while calling phy_connect() and phy_disconnect() from probe and remove. In such a case, the PHY will be powered down during the phy_stop() call, but will fail to be powered up in phy_start(). This patch fixes this scenario. Signed-off-by: Jiancheng Xue Signed-off-by: Zhangfei Gao Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index a972056b2249..3bc079a67a3d 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -715,7 +715,7 @@ void phy_state_machine(struct work_struct *work) struct delayed_work *dwork = to_delayed_work(work); struct phy_device *phydev = container_of(dwork, struct phy_device, state_queue); - int needs_aneg = 0, do_suspend = 0; + bool needs_aneg = false, do_suspend = false, do_resume = false; int err = 0; mutex_lock(&phydev->lock); @@ -727,7 +727,7 @@ void phy_state_machine(struct work_struct *work) case PHY_PENDING: break; case PHY_UP: - needs_aneg = 1; + needs_aneg = true; phydev->link_timeout = PHY_AN_TIMEOUT; @@ -757,7 +757,7 @@ void phy_state_machine(struct work_struct *work) phydev->adjust_link(phydev->attached_dev); } else if (0 == phydev->link_timeout--) - needs_aneg = 1; + needs_aneg = true; break; case PHY_NOLINK: err = phy_read_status(phydev); @@ -791,7 +791,7 @@ void phy_state_machine(struct work_struct *work) netif_carrier_on(phydev->attached_dev); } else { if (0 == phydev->link_timeout--) - needs_aneg = 1; + needs_aneg = true; } phydev->adjust_link(phydev->attached_dev); @@ -827,7 +827,7 @@ void phy_state_machine(struct work_struct *work) phydev->link = 0; netif_carrier_off(phydev->attached_dev); phydev->adjust_link(phydev->attached_dev); - do_suspend = 1; + do_suspend = true; } break; case PHY_RESUMING: @@ -876,6 +876,7 @@ void phy_state_machine(struct work_struct *work) } phydev->adjust_link(phydev->attached_dev); } + do_resume = true; break; } @@ -883,9 +884,10 @@ void phy_state_machine(struct work_struct *work) if (needs_aneg) err = phy_start_aneg(phydev); - - if (do_suspend) + else if (do_suspend) phy_suspend(phydev); + else if (do_resume) + phy_resume(phydev); if (err < 0) phy_error(phydev); From fde0133b9cfa4e01b275e942ffc32fd78e27d27c Mon Sep 17 00:00:00 2001 From: Nathaniel W Filardo Date: Thu, 15 May 2014 15:51:22 +0100 Subject: [PATCH 091/116] af_rxrpc: Fix XDR length check in rxrpc key demarshalling. There may be padding on the ticket contained in the key payload, so just ensure that the claimed token length is large enough, rather than exactly the right size. Signed-off-by: Nathaniel Wesley Filardo Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/ar-key.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 7633a752c65e..0ad080790a32 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -99,7 +99,7 @@ static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr, _debug("tktlen: %x", tktlen); if (tktlen > AFSTOKEN_RK_TIX_MAX) return -EKEYREJECTED; - if (8 * 4 + tktlen != toklen) + if (toklen < 8 * 4 + tktlen) return -EKEYREJECTED; plen = sizeof(*token) + sizeof(*token->kad) + tktlen; From 0d08fceb2e21c30ca3e1e462e678723f806acf18 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Fri, 16 May 2014 12:26:04 +0100 Subject: [PATCH 092/116] xen-netback: fix race between napi_complete() and interrupt handler When the NAPI budget was not all used, xenvif_poll() would call napi_complete() /after/ enabling the interrupt. This resulted in a race between the napi_complete() and the napi_schedule() in the interrupt handler. The use of local_irq_save/restore() avoided by race iff the handler is running on the same CPU but not if it was running on a different CPU. Fix this properly by calling napi_complete() before reenabling interrupts (in the xenvif_napi_schedule_or_enable_irq() call). Signed-off-by: David Vrabel Acked-by: Wei Liu Acked-by: Ian Campbell Signed-off-by: David S. Miller --- drivers/net/xen-netback/common.h | 2 +- drivers/net/xen-netback/interface.c | 30 +++-------------------------- drivers/net/xen-netback/netback.c | 4 ++-- 3 files changed, 6 insertions(+), 30 deletions(-) diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 630a3fcf65bc..0d4a285cbd7e 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -226,7 +226,7 @@ int xenvif_map_frontend_rings(struct xenvif *vif, grant_ref_t rx_ring_ref); /* Check for SKBs from frontend and schedule backend processing */ -void xenvif_check_rx_xenvif(struct xenvif *vif); +void xenvif_napi_schedule_or_enable_events(struct xenvif *vif); /* Prevent the device from generating any further traffic. */ void xenvif_carrier_off(struct xenvif *vif); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index ef05c5c49d41..20e9defa1060 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -75,32 +75,8 @@ static int xenvif_poll(struct napi_struct *napi, int budget) work_done = xenvif_tx_action(vif, budget); if (work_done < budget) { - int more_to_do = 0; - unsigned long flags; - - /* It is necessary to disable IRQ before calling - * RING_HAS_UNCONSUMED_REQUESTS. Otherwise we might - * lose event from the frontend. - * - * Consider: - * RING_HAS_UNCONSUMED_REQUESTS - * - * __napi_complete - * - * This handler is still in scheduled state so the - * event has no effect at all. After __napi_complete - * this handler is descheduled and cannot get - * scheduled again. We lose event in this case and the ring - * will be completely stalled. - */ - - local_irq_save(flags); - - RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do); - if (!more_to_do) - __napi_complete(napi); - - local_irq_restore(flags); + napi_complete(napi); + xenvif_napi_schedule_or_enable_events(vif); } return work_done; @@ -194,7 +170,7 @@ static void xenvif_up(struct xenvif *vif) enable_irq(vif->tx_irq); if (vif->tx_irq != vif->rx_irq) enable_irq(vif->rx_irq); - xenvif_check_rx_xenvif(vif); + xenvif_napi_schedule_or_enable_events(vif); } static void xenvif_down(struct xenvif *vif) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 64ab1d141f1c..7367208ee8cd 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -716,7 +716,7 @@ static void xenvif_rx_action(struct xenvif *vif) notify_remote_via_irq(vif->rx_irq); } -void xenvif_check_rx_xenvif(struct xenvif *vif) +void xenvif_napi_schedule_or_enable_events(struct xenvif *vif) { int more_to_do; @@ -750,7 +750,7 @@ static void tx_credit_callback(unsigned long data) { struct xenvif *vif = (struct xenvif *)data; tx_add_credit(vif); - xenvif_check_rx_xenvif(vif); + xenvif_napi_schedule_or_enable_events(vif); } static void xenvif_tx_err(struct xenvif *vif, From 2e47b291953c35afa4e20a65475954c1a1b9afe1 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 15 May 2014 16:38:41 -0700 Subject: [PATCH 093/116] net: ipv6: make "ip -6 route get mark xyz" work. Currently, "ip -6 route get mark xyz" ignores the mark passed in by userspace. Make it honour the mark, just like IPv4 does. Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 58c449ad7f6e..6ebdb7b6744c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2730,6 +2730,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh) if (tb[RTA_OIF]) oif = nla_get_u32(tb[RTA_OIF]); + if (tb[RTA_MARK]) + fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); + if (iif) { struct net_device *dev; int flags = 0; From d1c0b471b340f43fa857d19150e029257d7bb475 Mon Sep 17 00:00:00 2001 From: Fabian Godehardt Date: Fri, 16 May 2014 06:21:44 +0200 Subject: [PATCH 094/116] net/dsa/dsa.c: increment chip_index during of_node handling on dsa_of_probe() Adding more than one chip on device-tree currently causes the probing routine to always use the first chips data pointer. Signed-off-by: Fabian Godehardt Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 0eb5d5e76dfb..5db37cef50a9 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -406,8 +406,9 @@ static int dsa_of_probe(struct platform_device *pdev) goto out_free; } - chip_index = 0; + chip_index = -1; for_each_available_child_of_node(np, child) { + chip_index++; cd = &pd->chip[chip_index]; cd->mii_bus = &mdio_bus->dev; From 22fb22eaebf4d16987f3fd9c3484c436ee0badf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Fri, 16 May 2014 08:34:39 +0300 Subject: [PATCH 095/116] ipv4: ip_tunnels: disable cache for nbma gre tunnels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The connected check fails to check for ip_gre nbma mode tunnels properly. ip_gre creates temporary tnl_params with daddr specified to pass-in the actual target on per-packet basis from neighbor layer. Detect these tunnels by inspecting the actual tunnel configuration. Minimal test case: ip route add 192.168.1.1/32 via 10.0.0.1 ip route add 192.168.1.2/32 via 10.0.0.2 ip tunnel add nbma0 mode gre key 1 tos c0 ip addr add 172.17.0.0/16 dev nbma0 ip link set nbma0 up ip neigh add 172.17.0.1 lladdr 192.168.1.1 dev nbma0 ip neigh add 172.17.0.2 lladdr 192.168.1.2 dev nbma0 ping 172.17.0.1 ping 172.17.0.2 The second ping should be going to 192.168.1.2 and head 10.0.0.2; but cached gre tunnel level route is used and it's actually going to 192.168.1.1 via 10.0.0.1. The lladdr's need to go to separate dst for the bug to trigger. Test case uses separate route entries, but this can also happen when the route entry is same: if there is a nexthop exception or the GRE tunnel is IPsec'ed in which case the dst points to xfrm bundle unique to the gre lladdr. Fixes: 7d442fab0a67 ("ipv4: Cache dst in tunnels") Signed-off-by: Timo Teräs Cc: Tom Herbert Cc: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index b3f859731c60..49721d78df75 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -540,9 +540,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, unsigned int max_headroom; /* The extra header space needed */ __be32 dst; int err; - bool connected = true; + bool connected; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + connected = (tunnel->parms.iph.daddr != 0); dst = tnl_params->daddr; if (dst == 0) { From 29e98242783ed3ba569797846a606ba66f781625 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 16 May 2014 11:34:37 -0700 Subject: [PATCH 096/116] net: gro: make sure skb->cb[] initial content has not to be zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting from linux-3.13, GRO attempts to build full size skbs. Problem is the commit assumed one particular field in skb->cb[] was clean, but it is not the case on some stacked devices. Timo reported a crash in case traffic is decrypted before reaching a GRE device. Fix this by initializing NAPI_GRO_CB(skb)->last at the right place, this also removes one conditional. Thanks a lot to Timo for providing full reports and bisecting this. Fixes: 8a29111c7ca6 ("net: gro: allow to build full sized skb") Bisected-by: Timo Teras Signed-off-by: Eric Dumazet Tested-by: Timo Teräs Signed-off-by: David S. Miller --- net/core/dev.c | 1 + net/core/skbuff.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 6da649bde4f7..ed928e846559 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3951,6 +3951,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff } NAPI_GRO_CB(skb)->count = 1; NAPI_GRO_CB(skb)->age = jiffies; + NAPI_GRO_CB(skb)->last = skb; skb_shinfo(skb)->gso_size = skb_gro_len(skb); skb->next = napi->gro_list; napi->gro_list = skb; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1b62343f5837..8383b2bddeb9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3076,7 +3076,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) if (unlikely(p->len + len >= 65536)) return -E2BIG; - lp = NAPI_GRO_CB(p)->last ?: p; + lp = NAPI_GRO_CB(p)->last; pinfo = skb_shinfo(lp); if (headlen <= offset) { @@ -3192,7 +3192,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) __skb_pull(skb, offset); - if (!NAPI_GRO_CB(p)->last) + if (NAPI_GRO_CB(p)->last == p) skb_shinfo(p)->frag_list = skb; else NAPI_GRO_CB(p)->last->next = skb; From 4085ebe8c31face855fd01ee40372cb4aab1df3a Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 16 May 2014 17:04:53 -0400 Subject: [PATCH 097/116] net: Find the nesting level of a given device by type. Multiple devices in the kernel can be stacked/nested and they need to know their nesting level for the purposes of lockdep. This patch provides a generic function that determines a nesting level of a particular device by its type (ex: vlan, macvlan, etc). We only care about nesting of the same type of devices. For example: eth0 <- vlan0.10 <- macvlan0 <- vlan1.20 The nesting level of vlan1.20 would be 1, since there is another vlan in the stack under it. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++++ net/core/dev.c | 50 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 20e99efb1ca6..fb912e8e5c7f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3077,6 +3077,14 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, priv; \ priv = netdev_lower_get_next_private_rcu(dev, &(iter))) +void *netdev_lower_get_next(struct net_device *dev, + struct list_head **iter); +#define netdev_for_each_lower_dev(dev, ldev, iter) \ + for (iter = &(dev)->adj_list.lower, \ + ldev = netdev_lower_get_next(dev, &(iter)); \ + ldev; \ + ldev = netdev_lower_get_next(dev, &(iter))) + void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); struct net_device *netdev_master_upper_dev_get(struct net_device *dev); @@ -3092,6 +3100,8 @@ void netdev_upper_dev_unlink(struct net_device *dev, void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); +int dev_get_nest_level(struct net_device *dev, + bool (*type_check)(struct net_device *dev)); int skb_checksum_help(struct sk_buff *skb); struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path); diff --git a/net/core/dev.c b/net/core/dev.c index ed928e846559..6ee3ac25ed72 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4622,6 +4622,32 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, } EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); +/** + * netdev_lower_get_next - Get the next device from the lower neighbour + * list + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next netdev_adjacent from the dev's lower neighbour + * list, starting from iter position. The caller must hold RTNL lock or + * its own locking that guarantees that the neighbour lower + * list will remain unchainged. + */ +void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter) +{ + struct netdev_adjacent *lower; + + lower = list_entry((*iter)->next, struct netdev_adjacent, list); + + if (&lower->list == &dev->adj_list.lower) + return NULL; + + *iter = &lower->list; + + return lower->dev; +} +EXPORT_SYMBOL(netdev_lower_get_next); + /** * netdev_lower_get_first_private_rcu - Get the first ->private from the * lower neighbour list, RCU @@ -5072,6 +5098,30 @@ void *netdev_lower_dev_get_private(struct net_device *dev, } EXPORT_SYMBOL(netdev_lower_dev_get_private); + +int dev_get_nest_level(struct net_device *dev, + bool (*type_check)(struct net_device *dev)) +{ + struct net_device *lower = NULL; + struct list_head *iter; + int max_nest = -1; + int nest; + + ASSERT_RTNL(); + + netdev_for_each_lower_dev(dev, lower, iter) { + nest = dev_get_nest_level(lower, type_check); + if (max_nest < nest) + max_nest = nest; + } + + if (type_check(dev)) + max_nest++; + + return max_nest; +} +EXPORT_SYMBOL(dev_get_nest_level); + static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; From 25175ba5c9bff9aaf0229df34bb5d54c81633ec3 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 16 May 2014 17:04:54 -0400 Subject: [PATCH 098/116] net: Allow for more then a single subclass for netif_addr_lock Currently netif_addr_lock_nested assumes that there can be only a single nesting level between 2 devices. However, if we have multiple devices of the same type stacked, this fails. For example: eth0 <-- vlan0.10 <-- vlan0.10.20 A more complicated configuration may stack more then one type of device in different order. Ex: eth0 <-- vlan0.10 <-- macvlan0 <-- vlan1.10.20 <-- macvlan1 This patch adds an ndo_* function that allows each stackable device to report its nesting level. If the device doesn't provide this function default subclass of 1 is used. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fb912e8e5c7f..9d4b1f1b6b75 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1144,6 +1144,7 @@ struct net_device_ops { netdev_tx_t (*ndo_dfwd_start_xmit) (struct sk_buff *skb, struct net_device *dev, void *priv); + int (*ndo_get_lock_subclass)(struct net_device *dev); }; /** @@ -2950,7 +2951,12 @@ static inline void netif_addr_lock(struct net_device *dev) static inline void netif_addr_lock_nested(struct net_device *dev) { - spin_lock_nested(&dev->addr_list_lock, SINGLE_DEPTH_NESTING); + int subclass = SINGLE_DEPTH_NESTING; + + if (dev->netdev_ops->ndo_get_lock_subclass) + subclass = dev->netdev_ops->ndo_get_lock_subclass(dev); + + spin_lock_nested(&dev->addr_list_lock, subclass); } static inline void netif_addr_lock_bh(struct net_device *dev) From d38569ab2bba6e6b3233acfc3a84cdbcfbd1f79f Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 16 May 2014 17:04:55 -0400 Subject: [PATCH 099/116] vlan: Fix lockdep warning with stacked vlan devices. This reverts commit dc8eaaa006350d24030502a4521542e74b5cb39f. vlan: Fix lockdep warning when vlan dev handle notification Instead we use the new new API to find the lock subclass of our vlan device. This way we can support configurations where vlans are interspersed with other devices: bond -> vlan -> macvlan -> vlan Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 3 ++- net/8021q/vlan.c | 1 + net/8021q/vlan_dev.c | 52 +++++++---------------------------------- net/core/dev.c | 1 - 4 files changed, 12 insertions(+), 45 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 13bbbde00e68..724bde8477b2 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -73,7 +73,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) /* found in socket.c */ extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); -static inline int is_vlan_dev(struct net_device *dev) +static inline bool is_vlan_dev(struct net_device *dev) { return dev->priv_flags & IFF_802_1Q_VLAN; } @@ -159,6 +159,7 @@ struct vlan_dev_priv { #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *netpoll; #endif + unsigned int nest_level; }; static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 175273f38cb1..44ebd5c2cd4a 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -169,6 +169,7 @@ int register_vlan_dev(struct net_device *dev) if (err < 0) goto out_uninit_mvrp; + vlan->nest_level = dev_get_nest_level(real_dev, is_vlan_dev) + 1; err = register_netdevice(dev); if (err < 0) goto out_uninit_mvrp; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 733ec283ed1b..019efb79708f 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -493,48 +493,10 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change) } } -static int vlan_calculate_locking_subclass(struct net_device *real_dev) -{ - int subclass = 0; - - while (is_vlan_dev(real_dev)) { - subclass++; - real_dev = vlan_dev_priv(real_dev)->real_dev; - } - - return subclass; -} - -static void vlan_dev_mc_sync(struct net_device *to, struct net_device *from) -{ - int err = 0, subclass; - - subclass = vlan_calculate_locking_subclass(to); - - spin_lock_nested(&to->addr_list_lock, subclass); - err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); - if (!err) - __dev_set_rx_mode(to); - spin_unlock(&to->addr_list_lock); -} - -static void vlan_dev_uc_sync(struct net_device *to, struct net_device *from) -{ - int err = 0, subclass; - - subclass = vlan_calculate_locking_subclass(to); - - spin_lock_nested(&to->addr_list_lock, subclass); - err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); - if (!err) - __dev_set_rx_mode(to); - spin_unlock(&to->addr_list_lock); -} - static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) { - vlan_dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); - vlan_dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); + dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); + dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev); } /* @@ -562,6 +524,11 @@ static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass) netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass); } +static int vlan_dev_get_lock_subclass(struct net_device *dev) +{ + return vlan_dev_priv(dev)->nest_level; +} + static const struct header_ops vlan_header_ops = { .create = vlan_dev_hard_header, .rebuild = vlan_dev_rebuild_header, @@ -597,7 +564,6 @@ static const struct net_device_ops vlan_netdev_ops; static int vlan_dev_init(struct net_device *dev) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; - int subclass = 0; netif_carrier_off(dev); @@ -646,8 +612,7 @@ static int vlan_dev_init(struct net_device *dev) SET_NETDEV_DEVTYPE(dev, &vlan_type); - subclass = vlan_calculate_locking_subclass(dev); - vlan_dev_set_lockdep_class(dev, subclass); + vlan_dev_set_lockdep_class(dev, vlan_dev_get_lock_subclass(dev)); vlan_dev_priv(dev)->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats); if (!vlan_dev_priv(dev)->vlan_pcpu_stats) @@ -819,6 +784,7 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup, #endif .ndo_fix_features = vlan_dev_fix_features, + .ndo_get_lock_subclass = vlan_dev_get_lock_subclass, }; void vlan_setup(struct net_device *dev) diff --git a/net/core/dev.c b/net/core/dev.c index 6ee3ac25ed72..2b872bfbd172 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5287,7 +5287,6 @@ void __dev_set_rx_mode(struct net_device *dev) if (ops->ndo_set_rx_mode) ops->ndo_set_rx_mode(dev); } -EXPORT_SYMBOL(__dev_set_rx_mode); void dev_set_rx_mode(struct net_device *dev) { From c674ac30c549596295eb0a5af7f4714c0b905b6f Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 16 May 2014 17:04:56 -0400 Subject: [PATCH 100/116] macvlan: Fix lockdep warnings with stacked macvlan devices Macvlan devices try to avoid stacking, but that's not always successfull or even desired. As an example, the following configuration is perefectly legal and valid: eth0 <--- macvlan0 <---- vlan0.10 <--- macvlan1 However, this configuration produces the following lockdep trace: [ 115.620418] ====================================================== [ 115.620477] [ INFO: possible circular locking dependency detected ] [ 115.620516] 3.15.0-rc1+ #24 Not tainted [ 115.620540] ------------------------------------------------------- [ 115.620577] ip/1704 is trying to acquire lock: [ 115.620604] (&vlan_netdev_addr_lock_key/1){+.....}, at: [] dev_uc_sync+0x3c/0x80 [ 115.620686] but task is already holding lock: [ 115.620723] (&macvlan_netdev_addr_lock_key){+.....}, at: [] dev_set_rx_mode+0x1e/0x40 [ 115.620795] which lock already depends on the new lock. [ 115.620853] the existing dependency chain (in reverse order) is: [ 115.620894] -> #1 (&macvlan_netdev_addr_lock_key){+.....}: [ 115.620935] [] lock_acquire+0xa2/0x130 [ 115.620974] [] _raw_spin_lock_nested+0x37/0x50 [ 115.621019] [] vlan_dev_set_rx_mode+0x53/0x110 [8021q] [ 115.621066] [] __dev_set_rx_mode+0x57/0xa0 [ 115.621105] [] dev_set_rx_mode+0x26/0x40 [ 115.621143] [] __dev_open+0xde/0x140 [ 115.621174] [] __dev_change_flags+0x9d/0x170 [ 115.621174] [] dev_change_flags+0x29/0x60 [ 115.621174] [] do_setlink+0x321/0x9a0 [ 115.621174] [] rtnl_newlink+0x51f/0x730 [ 115.621174] [] rtnetlink_rcv_msg+0x95/0x250 [ 115.621174] [] netlink_rcv_skb+0xa9/0xc0 [ 115.621174] [] rtnetlink_rcv+0x2a/0x40 [ 115.621174] [] netlink_unicast+0xf0/0x1c0 [ 115.621174] [] netlink_sendmsg+0x2ff/0x740 [ 115.621174] [] sock_sendmsg+0x8b/0xc0 [ 115.621174] [] ___sys_sendmsg+0x369/0x380 [ 115.621174] [] __sys_sendmsg+0x42/0x80 [ 115.621174] [] SyS_sendmsg+0x12/0x20 [ 115.621174] [] system_call_fastpath+0x16/0x1b [ 115.621174] -> #0 (&vlan_netdev_addr_lock_key/1){+.....}: [ 115.621174] [] __lock_acquire+0x1773/0x1a60 [ 115.621174] [] lock_acquire+0xa2/0x130 [ 115.621174] [] _raw_spin_lock_nested+0x37/0x50 [ 115.621174] [] dev_uc_sync+0x3c/0x80 [ 115.621174] [] macvlan_set_mac_lists+0xca/0x110 [macvlan] [ 115.621174] [] __dev_set_rx_mode+0x57/0xa0 [ 115.621174] [] dev_set_rx_mode+0x26/0x40 [ 115.621174] [] __dev_open+0xde/0x140 [ 115.621174] [] __dev_change_flags+0x9d/0x170 [ 115.621174] [] dev_change_flags+0x29/0x60 [ 115.621174] [] do_setlink+0x321/0x9a0 [ 115.621174] [] rtnl_newlink+0x51f/0x730 [ 115.621174] [] rtnetlink_rcv_msg+0x95/0x250 [ 115.621174] [] netlink_rcv_skb+0xa9/0xc0 [ 115.621174] [] rtnetlink_rcv+0x2a/0x40 [ 115.621174] [] netlink_unicast+0xf0/0x1c0 [ 115.621174] [] netlink_sendmsg+0x2ff/0x740 [ 115.621174] [] sock_sendmsg+0x8b/0xc0 [ 115.621174] [] ___sys_sendmsg+0x369/0x380 [ 115.621174] [] __sys_sendmsg+0x42/0x80 [ 115.621174] [] SyS_sendmsg+0x12/0x20 [ 115.621174] [] system_call_fastpath+0x16/0x1b [ 115.621174] other info that might help us debug this: [ 115.621174] Possible unsafe locking scenario: [ 115.621174] CPU0 CPU1 [ 115.621174] ---- ---- [ 115.621174] lock(&macvlan_netdev_addr_lock_key); [ 115.621174] lock(&vlan_netdev_addr_lock_key/1); [ 115.621174] lock(&macvlan_netdev_addr_lock_key); [ 115.621174] lock(&vlan_netdev_addr_lock_key/1); [ 115.621174] *** DEADLOCK *** [ 115.621174] 2 locks held by ip/1704: [ 115.621174] #0: (rtnl_mutex){+.+.+.}, at: [] rtnetlink_rcv+0x1b/0x40 [ 115.621174] #1: (&macvlan_netdev_addr_lock_key){+.....}, at: [] dev_set_rx_mode+0x1e/0x40 [ 115.621174] stack backtrace: [ 115.621174] CPU: 3 PID: 1704 Comm: ip Not tainted 3.15.0-rc1+ #24 [ 115.621174] Hardware name: Hewlett-Packard HP xw8400 Workstation/0A08h, BIOS 786D5 v02.38 10/25/2010 [ 115.621174] ffffffff82339ae0 ffff880465f79568 ffffffff816ee20c ffffffff82339ae0 [ 115.621174] ffff880465f795a8 ffffffff816e9e1b ffff880465f79600 ffff880465b019c8 [ 115.621174] 0000000000000001 0000000000000002 ffff880465b019c8 ffff880465b01230 [ 115.621174] Call Trace: [ 115.621174] [] dump_stack+0x4d/0x66 [ 115.621174] [] print_circular_bug+0x200/0x20e [ 115.621174] [] __lock_acquire+0x1773/0x1a60 [ 115.621174] [] ? trace_hardirqs_on_caller+0xb2/0x1d0 [ 115.621174] [] lock_acquire+0xa2/0x130 [ 115.621174] [] ? dev_uc_sync+0x3c/0x80 [ 115.621174] [] _raw_spin_lock_nested+0x37/0x50 [ 115.621174] [] ? dev_uc_sync+0x3c/0x80 [ 115.621174] [] dev_uc_sync+0x3c/0x80 [ 115.621174] [] macvlan_set_mac_lists+0xca/0x110 [macvlan] [ 115.621174] [] __dev_set_rx_mode+0x57/0xa0 [ 115.621174] [] dev_set_rx_mode+0x26/0x40 [ 115.621174] [] __dev_open+0xde/0x140 [ 115.621174] [] __dev_change_flags+0x9d/0x170 [ 115.621174] [] dev_change_flags+0x29/0x60 [ 115.621174] [] ? mem_cgroup_bad_page_check+0x21/0x30 [ 115.621174] [] do_setlink+0x321/0x9a0 [ 115.621174] [] ? __lock_acquire+0x37c/0x1a60 [ 115.621174] [] rtnl_newlink+0x51f/0x730 [ 115.621174] [] ? rtnl_newlink+0xe9/0x730 [ 115.621174] [] rtnetlink_rcv_msg+0x95/0x250 [ 115.621174] [] ? trace_hardirqs_on+0xd/0x10 [ 115.621174] [] ? rtnetlink_rcv+0x1b/0x40 [ 115.621174] [] ? rtnetlink_rcv+0x40/0x40 [ 115.621174] [] netlink_rcv_skb+0xa9/0xc0 [ 115.621174] [] rtnetlink_rcv+0x2a/0x40 [ 115.621174] [] netlink_unicast+0xf0/0x1c0 [ 115.621174] [] netlink_sendmsg+0x2ff/0x740 [ 115.621174] [] sock_sendmsg+0x8b/0xc0 [ 115.621174] [] ? might_fault+0x5f/0xb0 [ 115.621174] [] ? might_fault+0xa8/0xb0 [ 115.621174] [] ? might_fault+0x5f/0xb0 [ 115.621174] [] ? verify_iovec+0x5e/0xe0 [ 115.621174] [] ___sys_sendmsg+0x369/0x380 [ 115.621174] [] ? __do_page_fault+0x11d/0x570 [ 115.621174] [] ? up_read+0x1f/0x40 [ 115.621174] [] ? __do_page_fault+0x214/0x570 [ 115.621174] [] ? mntput_no_expire+0x6b/0x1c0 [ 115.621174] [] ? mntput_no_expire+0x17/0x1c0 [ 115.621174] [] ? mntput+0x24/0x40 [ 115.621174] [] __sys_sendmsg+0x42/0x80 [ 115.621174] [] SyS_sendmsg+0x12/0x20 [ 115.621174] [] system_call_fastpath+0x16/0x1b Fix this by correctly providing macvlan lockdep class. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 12 ++++++++++-- include/linux/if_macvlan.h | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index c5fb9cf95c12..d53e299ae1d9 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -517,6 +517,11 @@ static struct lock_class_key macvlan_netdev_addr_lock_key; #define MACVLAN_STATE_MASK \ ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT)) +static int macvlan_get_nest_level(struct net_device *dev) +{ + return ((struct macvlan_dev *)netdev_priv(dev))->nest_level; +} + static void macvlan_set_lockdep_class_one(struct net_device *dev, struct netdev_queue *txq, void *_unused) @@ -527,8 +532,9 @@ static void macvlan_set_lockdep_class_one(struct net_device *dev, static void macvlan_set_lockdep_class(struct net_device *dev) { - lockdep_set_class(&dev->addr_list_lock, - &macvlan_netdev_addr_lock_key); + lockdep_set_class_and_subclass(&dev->addr_list_lock, + &macvlan_netdev_addr_lock_key, + macvlan_get_nest_level(dev)); netdev_for_each_tx_queue(dev, macvlan_set_lockdep_class_one, NULL); } @@ -723,6 +729,7 @@ static const struct net_device_ops macvlan_netdev_ops = { .ndo_fdb_add = macvlan_fdb_add, .ndo_fdb_del = macvlan_fdb_del, .ndo_fdb_dump = ndo_dflt_fdb_dump, + .ndo_get_lock_subclass = macvlan_get_nest_level, }; void macvlan_common_setup(struct net_device *dev) @@ -851,6 +858,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, vlan->dev = dev; vlan->port = port; vlan->set_features = MACVLAN_FEATURES; + vlan->nest_level = dev_get_nest_level(lowerdev, netif_is_macvlan) + 1; vlan->mode = MACVLAN_MODE_VEPA; if (data && data[IFLA_MACVLAN_MODE]) diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 7c8b20b120ea..a9a53b12397b 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -56,6 +56,7 @@ struct macvlan_dev { int numqueues; netdev_features_t tap_features; int minor; + int nest_level; }; static inline void macvlan_count_rx(const struct macvlan_dev *vlan, From 44a4085538c844e79d6ee6bcf46fabf7c57a9a38 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 16 May 2014 17:20:38 -0400 Subject: [PATCH 101/116] bonding: Fix stacked device detection in arp monitoring Prior to commit fbd929f2dce460456807a51e18d623db3db9f077 bonding: support QinQ for bond arp interval the arp monitoring code allowed for proper detection of devices stacked on top of vlans. Since the above commit, the code can still detect a device stacked on top of single vlan, but not a device stacked on top of Q-in-Q configuration. The search will only set the inner vlan tag if the route device is the vlan device. However, this is not always the case, as it is possible to extend the stacked configuration. With this patch it is possible to provision devices on top Q-in-Q vlan configuration that should be used as a source of ARP monitoring information. For example: ip link add link bond0 vlan10 type vlan proto 802.1q id 10 ip link add link vlan10 vlan100 type vlan proto 802.1q id 100 ip link add link vlan100 type macvlan Note: This patch limites the number of stacked VLANs to 2, just like before. The original, however had another issue in that if we had more then 2 levels of VLANs, we would end up generating incorrectly tagged traffic. This is no longer possible. Fixes: fbd929f2dce460456807a51e18d623db3db9f077 (bonding: support QinQ for bond arp interval) CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: Ding Tianhong CC: Patric McHardy Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 134 ++++++++++++++++---------------- drivers/net/bonding/bonding.h | 1 + include/linux/if_vlan.h | 6 ++ include/linux/netdevice.h | 9 +++ net/core/dev.c | 26 +++++++ 5 files changed, 107 insertions(+), 69 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 69aff72c8957..d3a67896d435 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2126,10 +2126,10 @@ static bool bond_has_this_ip(struct bonding *bond, __be32 ip) */ static void bond_arp_send(struct net_device *slave_dev, int arp_op, __be32 dest_ip, __be32 src_ip, - struct bond_vlan_tag *inner, - struct bond_vlan_tag *outer) + struct bond_vlan_tag *tags) { struct sk_buff *skb; + int i; pr_debug("arp %d on slave %s: dst %pI4 src %pI4\n", arp_op, slave_dev->name, &dest_ip, &src_ip); @@ -2141,21 +2141,26 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op, net_err_ratelimited("ARP packet allocation failed\n"); return; } - if (outer->vlan_id) { - if (inner->vlan_id) { - pr_debug("inner tag: proto %X vid %X\n", - ntohs(inner->vlan_proto), inner->vlan_id); - skb = __vlan_put_tag(skb, inner->vlan_proto, - inner->vlan_id); - if (!skb) { - net_err_ratelimited("failed to insert inner VLAN tag\n"); - return; - } - } - pr_debug("outer reg: proto %X vid %X\n", - ntohs(outer->vlan_proto), outer->vlan_id); - skb = vlan_put_tag(skb, outer->vlan_proto, outer->vlan_id); + /* Go through all the tags backwards and add them to the packet */ + for (i = BOND_MAX_VLAN_ENCAP - 1; i > 0; i--) { + if (!tags[i].vlan_id) + continue; + + pr_debug("inner tag: proto %X vid %X\n", + ntohs(tags[i].vlan_proto), tags[i].vlan_id); + skb = __vlan_put_tag(skb, tags[i].vlan_proto, + tags[i].vlan_id); + if (!skb) { + net_err_ratelimited("failed to insert inner VLAN tag\n"); + return; + } + } + /* Set the outer tag */ + if (tags[0].vlan_id) { + pr_debug("outer tag: proto %X vid %X\n", + ntohs(tags[0].vlan_proto), tags[0].vlan_id); + skb = vlan_put_tag(skb, tags[0].vlan_proto, tags[0].vlan_id); if (!skb) { net_err_ratelimited("failed to insert outer VLAN tag\n"); return; @@ -2164,22 +2169,52 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op, arp_xmit(skb); } +/* Validate the device path between the @start_dev and the @end_dev. + * The path is valid if the @end_dev is reachable through device + * stacking. + * When the path is validated, collect any vlan information in the + * path. + */ +static bool bond_verify_device_path(struct net_device *start_dev, + struct net_device *end_dev, + struct bond_vlan_tag *tags) +{ + struct net_device *upper; + struct list_head *iter; + int idx; + + if (start_dev == end_dev) + return true; + + netdev_for_each_upper_dev_rcu(start_dev, upper, iter) { + if (bond_verify_device_path(upper, end_dev, tags)) { + if (is_vlan_dev(upper)) { + idx = vlan_get_encap_level(upper); + if (idx >= BOND_MAX_VLAN_ENCAP) + return false; + + tags[idx].vlan_proto = + vlan_dev_vlan_proto(upper); + tags[idx].vlan_id = vlan_dev_vlan_id(upper); + } + return true; + } + } + + return false; +} static void bond_arp_send_all(struct bonding *bond, struct slave *slave) { - struct net_device *upper, *vlan_upper; - struct list_head *iter, *vlan_iter; struct rtable *rt; - struct bond_vlan_tag inner, outer; + struct bond_vlan_tag tags[BOND_MAX_VLAN_ENCAP]; __be32 *targets = bond->params.arp_targets, addr; int i; + bool ret; for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) { pr_debug("basa: target %pI4\n", &targets[i]); - inner.vlan_proto = 0; - inner.vlan_id = 0; - outer.vlan_proto = 0; - outer.vlan_id = 0; + memset(tags, 0, sizeof(tags)); /* Find out through which dev should the packet go */ rt = ip_route_output(dev_net(bond->dev), targets[i], 0, @@ -2192,7 +2227,8 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) net_warn_ratelimited("%s: no route to arp_ip_target %pI4 and arp_validate is set\n", bond->dev->name, &targets[i]); - bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 0, &inner, &outer); + bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], + 0, tags); continue; } @@ -2201,52 +2237,12 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) goto found; rcu_read_lock(); - /* first we search only for vlan devices. for every vlan - * found we verify its upper dev list, searching for the - * rt->dst.dev. If found we save the tag of the vlan and - * proceed to send the packet. - */ - netdev_for_each_all_upper_dev_rcu(bond->dev, vlan_upper, - vlan_iter) { - if (!is_vlan_dev(vlan_upper)) - continue; - - if (vlan_upper == rt->dst.dev) { - outer.vlan_proto = vlan_dev_vlan_proto(vlan_upper); - outer.vlan_id = vlan_dev_vlan_id(vlan_upper); - rcu_read_unlock(); - goto found; - } - netdev_for_each_all_upper_dev_rcu(vlan_upper, upper, - iter) { - if (upper == rt->dst.dev) { - /* If the upper dev is a vlan dev too, - * set the vlan tag to inner tag. - */ - if (is_vlan_dev(upper)) { - inner.vlan_proto = vlan_dev_vlan_proto(upper); - inner.vlan_id = vlan_dev_vlan_id(upper); - } - outer.vlan_proto = vlan_dev_vlan_proto(vlan_upper); - outer.vlan_id = vlan_dev_vlan_id(vlan_upper); - rcu_read_unlock(); - goto found; - } - } - } - - /* if the device we're looking for is not on top of any of - * our upper vlans, then just search for any dev that - * matches, and in case it's a vlan - save the id - */ - netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { - if (upper == rt->dst.dev) { - rcu_read_unlock(); - goto found; - } - } + ret = bond_verify_device_path(bond->dev, rt->dst.dev, tags); rcu_read_unlock(); + if (ret) + goto found; + /* Not our device - skip */ pr_debug("%s: no path to arp_ip_target %pI4 via rt.dev %s\n", bond->dev->name, &targets[i], @@ -2259,7 +2255,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) addr = bond_confirm_addr(rt->dst.dev, targets[i], 0); ip_rt_put(rt); bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], - addr, &inner, &outer); + addr, tags); } } diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index b8bdd0acc8f3..00bea320e3b5 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -36,6 +36,7 @@ #define bond_version DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n" +#define BOND_MAX_VLAN_ENCAP 2 #define BOND_MAX_ARP_TARGETS 16 #define BOND_DEFAULT_MIIMON 100 diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 724bde8477b2..c901b13b6f03 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -484,4 +484,10 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, */ skb->protocol = htons(ETH_P_802_2); } + +static inline int vlan_get_encap_level(struct net_device *dev) +{ + BUG_ON(!is_vlan_dev(dev)); + return vlan_dev_priv(dev)->nest_level; +} #endif /* !(_LINUX_IF_VLAN_H_) */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9d4b1f1b6b75..b42d07b0390b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3056,9 +3056,18 @@ extern int weight_p; extern int bpf_jit_enable; bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); +struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, + struct list_head **iter); struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, struct list_head **iter); +/* iterate through upper list, must be called under RCU read lock */ +#define netdev_for_each_upper_dev_rcu(dev, updev, iter) \ + for (iter = &(dev)->adj_list.upper, \ + updev = netdev_upper_get_next_dev_rcu(dev, &(iter)); \ + updev; \ + updev = netdev_upper_get_next_dev_rcu(dev, &(iter))) + /* iterate through upper list, must be called under RCU read lock */ #define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \ for (iter = &(dev)->all_adj_list.upper, \ diff --git a/net/core/dev.c b/net/core/dev.c index 2b872bfbd172..9abc503b19b7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4541,6 +4541,32 @@ void *netdev_adjacent_get_private(struct list_head *adj_list) } EXPORT_SYMBOL(netdev_adjacent_get_private); +/** + * netdev_upper_get_next_dev_rcu - Get the next dev from upper list + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next device from the dev's upper list, starting from iter + * position. The caller must hold RCU read lock. + */ +struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *upper; + + WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held()); + + upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); + + if (&upper->list == &dev->adj_list.upper) + return NULL; + + *iter = &upper->list; + + return upper->dev; +} +EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); + /** * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list * @dev: device From f60c3704e87d39356d00c71bf51e55c2c55ad4f5 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 16 May 2014 17:20:39 -0400 Subject: [PATCH 102/116] bonding: Fix alb mode to only use first level vlans. ALB/TLB learning packets use all vlans configured on top of the bond. This ends up being incorrect if we have a stack of vlans on top of the bond. ALB/TLB should only use first level/outer most vlans in its announcements. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- drivers/net/bonding/bond_alb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 9f69e818b000..e53847884319 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1045,7 +1045,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) /* loop through vlans and send one packet for each */ rcu_read_lock(); netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { - if (upper->priv_flags & IFF_802_1Q_VLAN) + if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) alb_send_lp_vid(slave, mac_addr, vlan_dev_vlan_id(upper)); } From 02948344fbf511c6eec28687dc76bb64b758ff93 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 15 May 2014 23:51:43 -0700 Subject: [PATCH 103/116] bnx2x: Convert return 0 to return rc These "return 0;" uses seem wrong as there are rc variables where error return values are set but unused. Signed-off-by: Joe Perches Acked-by: Dmitry Kravkov Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 2 +- drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 81cc2d9831c2..b8078d50261b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -2695,7 +2695,7 @@ int bnx2x_set_vf_mac(struct net_device *dev, int vfidx, u8 *mac) bnx2x_unlock_vf_pf_channel(bp, vf, CHANNEL_TLV_PF_SET_MAC); } - return 0; + return rc; } int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index 0c067e8564dd..784c7155b98a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -747,7 +747,7 @@ int bnx2x_vfpf_config_mac(struct bnx2x *bp, u8 *addr, u8 vf_qid, bool set) out: bnx2x_vfpf_finalize(bp, &req->first_tlv); - return 0; + return rc; } /* request pf to config rss table for vf queues*/ From 4f337ed5c014752b000f593182fdcfa3ecf0d166 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 16 May 2014 15:57:59 -0700 Subject: [PATCH 104/116] MAINTAINERS: Pravin Shelar is Open vSwitch maintainer. Pravin will be maintaining Open vSwitch going forward. CC: Pravin Shelar Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7578deb8ff20..0eb0adb16f1e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6511,10 +6511,10 @@ T: git git://openrisc.net/~jonas/linux F: arch/openrisc/ OPENVSWITCH -M: Jesse Gross +M: Pravin Shelar L: dev@openvswitch.org W: http://openvswitch.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jesse/openvswitch.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/pshelar/openvswitch.git S: Maintained F: net/openvswitch/ From 524369e2391f4b422d0efdd11d526a373a11a43a Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 6 May 2014 19:45:38 +0200 Subject: [PATCH 105/116] can: c_can: remove obsolete STRICT_FRAME_ORDERING Kconfig option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In 2b9aecdce2 ("can: c_can: Disable rx split as workaround") a new Kconfig option was introduced as a workaround. The tests performed by Alexander Stein confirmed this option to be obsolete with all the other cleanups and fixes that had been discussed that time: http://marc.info/?l=linux-can&m=139746476821294&w=2 Both (author and tester) agreed to remove this Kconfig option again: http://marc.info/?l=linux-can&m=139883820714229&w=2 As some more cleanups took place since then a simple revert is not possible. This patch removes the entire option as it would behave when disabled. Further beautification’s can be done later. Signed-off-by: Oliver Hartkopp Tested-by: Alexander Stein Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/Kconfig | 7 ------- drivers/net/can/c_can/c_can.c | 36 ----------------------------------- 2 files changed, 43 deletions(-) diff --git a/drivers/net/can/c_can/Kconfig b/drivers/net/can/c_can/Kconfig index 8ab7103d4f44..61ffc12d8fd8 100644 --- a/drivers/net/can/c_can/Kconfig +++ b/drivers/net/can/c_can/Kconfig @@ -14,13 +14,6 @@ config CAN_C_CAN_PLATFORM SPEAr1310 and SPEAr320 evaluation boards & TI (www.ti.com) boards like am335x, dm814x, dm813x and dm811x. -config CAN_C_CAN_STRICT_FRAME_ORDERING - bool "Force a strict RX CAN frame order (may cause frame loss)" - ---help--- - The RX split buffer prevents packet reordering but can cause packet - loss. Only enable this option when you accept to lose CAN frames - in favour of getting the received CAN frames in the correct order. - config CAN_C_CAN_PCI tristate "Generic PCI Bus based C_CAN/D_CAN driver" depends on PCI diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index a2ca820b5373..95e04e2002da 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -732,26 +732,12 @@ static u32 c_can_adjust_pending(u32 pend) static inline void c_can_rx_object_get(struct net_device *dev, struct c_can_priv *priv, u32 obj) { -#ifdef CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING - if (obj < C_CAN_MSG_RX_LOW_LAST) - c_can_object_get(dev, IF_RX, obj, IF_COMM_RCV_LOW); - else -#endif c_can_object_get(dev, IF_RX, obj, priv->comm_rcv_high); } static inline void c_can_rx_finalize(struct net_device *dev, struct c_can_priv *priv, u32 obj) { -#ifdef CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING - if (obj < C_CAN_MSG_RX_LOW_LAST) - priv->rxmasked |= BIT(obj - 1); - else if (obj == C_CAN_MSG_RX_LOW_LAST) { - priv->rxmasked = 0; - /* activate all lower message objects */ - c_can_activate_all_lower_rx_msg_obj(dev, IF_RX); - } -#endif if (priv->type != BOSCH_D_CAN) c_can_object_get(dev, IF_RX, obj, IF_COMM_CLR_NEWDAT); } @@ -799,9 +785,6 @@ static inline u32 c_can_get_pending(struct c_can_priv *priv) { u32 pend = priv->read_reg(priv, C_CAN_NEWDAT1_REG); -#ifdef CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING - pend &= ~priv->rxmasked; -#endif return pend; } @@ -814,25 +797,6 @@ static inline u32 c_can_get_pending(struct c_can_priv *priv) * has arrived. To work-around this issue, we keep two groups of message * objects whose partitioning is defined by C_CAN_MSG_OBJ_RX_SPLIT. * - * If CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING = y - * - * To ensure in-order frame reception we use the following - * approach while re-activating a message object to receive further - * frames: - * - if the current message object number is lower than - * C_CAN_MSG_RX_LOW_LAST, do not clear the NEWDAT bit while clearing - * the INTPND bit. - * - if the current message object number is equal to - * C_CAN_MSG_RX_LOW_LAST then clear the NEWDAT bit of all lower - * receive message objects. - * - if the current message object number is greater than - * C_CAN_MSG_RX_LOW_LAST then clear the NEWDAT bit of - * only this message object. - * - * This can cause packet loss! - * - * If CONFIG_CAN_C_CAN_STRICT_FRAME_ORDERING = n - * * We clear the newdat bit right away. * * This can result in packet reordering when the readout is slow. From e1618d461ca18d40f9c3ef70598abb72e75d27ae Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 20 May 2014 10:59:26 -0400 Subject: [PATCH 106/116] vlan: Fix build error wth vlan_get_encap_level() The new function vlan_get_encap_level() uses vlan_dev_priv() which is only conditionally avaialble when VLAN support is enabled. Make vlan_get_encap_level() conditionally available as well. Fixes: 44a4085538c8 ("bonding: Fix stacked device detection in arp monitoring") Reported-by: Stephen Rothwell CC: Stephen Rothwell Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index c901b13b6f03..b2acc4a1b13c 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -198,6 +198,12 @@ extern void vlan_vids_del_by_dev(struct net_device *dev, const struct net_device *by_dev); extern bool vlan_uses_dev(const struct net_device *dev); + +static inline int vlan_get_encap_level(struct net_device *dev) +{ + BUG_ON(!is_vlan_dev(dev)); + return vlan_dev_priv(dev)->nest_level; +} #else static inline struct net_device * __vlan_find_dev_deep(struct net_device *real_dev, @@ -264,6 +270,11 @@ static inline bool vlan_uses_dev(const struct net_device *dev) { return false; } +static inline int vlan_get_encap_level(struct net_device *dev) +{ + BUG(); + return 0; +} #endif static inline bool vlan_hw_offload_capable(netdev_features_t features, @@ -485,9 +496,4 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, skb->protocol = htons(ETH_P_802_2); } -static inline int vlan_get_encap_level(struct net_device *dev) -{ - BUG_ON(!is_vlan_dev(dev)); - return vlan_dev_priv(dev)->nest_level; -} #endif /* !(_LINUX_IF_VLAN_H_) */ From 78ff4be45a4c51d8fb21ad92e4fabb467c6c3eeb Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 19 May 2014 11:36:56 +0200 Subject: [PATCH 107/116] ip_tunnel: Initialize the fallback device properly We need to initialize the fallback device to have a correct mtu set on this device. Otherwise the mtu is set to null and the device is unusable. Fixes: fd58156e456d ("IPIP: Use ip-tunneling code.") Cc: Pravin B Shelar Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 49721d78df75..2acc2337d38b 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -883,6 +883,7 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, */ if (!IS_ERR(itn->fb_tunnel_dev)) { itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; + itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); } rtnl_unlock(); From 0b5a958cf4df3a5cd578b861471e62138f55c85e Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Tue, 20 May 2014 11:38:56 +0200 Subject: [PATCH 108/116] can: peak_pci: prevent use after free at netdev removal As remarked by Christopher R. Baker in his post at http://marc.info/?l=linux-can&m=139707295706465&w=2 there's a possibility for an use after free condition at device removal. This simplified patch introduces an additional variable to prevent the issue. Thanks for catching this. Cc: linux-stable Reported-by: Christopher R. Baker Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sja1000/peak_pci.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/sja1000/peak_pci.c b/drivers/net/can/sja1000/peak_pci.c index c540e3d12e3d..564933ae218c 100644 --- a/drivers/net/can/sja1000/peak_pci.c +++ b/drivers/net/can/sja1000/peak_pci.c @@ -551,7 +551,7 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct sja1000_priv *priv; struct peak_pci_chan *chan; - struct net_device *dev; + struct net_device *dev, *prev_dev; void __iomem *cfg_base, *reg_base; u16 sub_sys_id, icr; int i, err, channels; @@ -688,11 +688,13 @@ static int peak_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) writew(0x0, cfg_base + PITA_ICR + 2); chan = NULL; - for (dev = pci_get_drvdata(pdev); dev; dev = chan->prev_dev) { - unregister_sja1000dev(dev); - free_sja1000dev(dev); + for (dev = pci_get_drvdata(pdev); dev; dev = prev_dev) { priv = netdev_priv(dev); chan = priv->priv; + prev_dev = chan->prev_dev; + + unregister_sja1000dev(dev); + free_sja1000dev(dev); } /* free any PCIeC resources too */ @@ -726,10 +728,12 @@ static void peak_pci_remove(struct pci_dev *pdev) /* Loop over all registered devices */ while (1) { + struct net_device *prev_dev = chan->prev_dev; + dev_info(&pdev->dev, "removing device %s\n", dev->name); unregister_sja1000dev(dev); free_sja1000dev(dev); - dev = chan->prev_dev; + dev = prev_dev; if (!dev) { /* do that only for first channel */ From bf63ac73b3e132e6bf0c8798aba7b277c3316e19 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 19 May 2014 12:15:49 -0700 Subject: [PATCH 109/116] net_sched: fix an oops in tcindex filter Kelly reported the following crash: IP: [] tcf_action_exec+0x46/0x90 PGD 3009067 PUD 300c067 PMD 11ff30067 PTE 800000011634b060 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC CPU: 1 PID: 639 Comm: dhclient Not tainted 3.15.0-rc4+ #342 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8801169ecd00 ti: ffff8800d21b8000 task.ti: ffff8800d21b8000 RIP: 0010:[] [] tcf_action_exec+0x46/0x90 RSP: 0018:ffff8800d21b9b90 EFLAGS: 00010283 RAX: 00000000ffffffff RBX: ffff88011634b8e8 RCX: ffff8800cf7133d8 RDX: ffff88011634b900 RSI: ffff8800cf7133e0 RDI: ffff8800d210f840 RBP: ffff8800d21b9bb0 R08: ffffffff8287bf60 R09: 0000000000000001 R10: ffff8800d2b22b24 R11: 0000000000000001 R12: ffff8800d210f840 R13: ffff8800d21b9c50 R14: ffff8800cf7133e0 R15: ffff8800cad433d8 FS: 00007f49723e1840(0000) GS:ffff88011a800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff88011634b8f0 CR3: 00000000ce469000 CR4: 00000000000006e0 Stack: ffff8800d2170188 ffff8800d210f840 ffff8800d2171b90 0000000000000000 ffff8800d21b9be8 ffffffff817c55bb ffff8800d21b9c50 ffff8800d2171b90 ffff8800d210f840 ffff8800d21b0300 ffff8800d21b9c50 ffff8800d21b9c18 Call Trace: [] tcindex_classify+0x88/0x9b [] tc_classify_compat+0x3e/0x7b [] tc_classify+0x25/0x9f [] htb_enqueue+0x55/0x27a [] dsmark_enqueue+0x165/0x1a4 [] __dev_queue_xmit+0x35e/0x536 [] dev_queue_xmit+0x10/0x12 [] packet_sendmsg+0xb26/0xb9a [] ? __lock_acquire+0x3ae/0xdf3 [] __sock_sendmsg_nosec+0x25/0x27 [] sock_aio_write+0xd0/0xe7 [] do_sync_write+0x59/0x78 [] vfs_write+0xb5/0x10a [] SyS_write+0x49/0x7f [] system_call_fastpath+0x16/0x1b This is because we memcpy struct tcindex_filter_result which contains struct tcf_exts, obviously struct list_head can not be simply copied. This is a regression introduced by commit 33be627159913b094bb578 (net_sched: act: use standard struct list_head). It's not very easy to fix it as the code is a mess: if (old_r) memcpy(&cr, r, sizeof(cr)); else { memset(&cr, 0, sizeof(cr)); tcf_exts_init(&cr.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); } ... tcf_exts_change(tp, &cr.exts, &e); ... memcpy(r, &cr, sizeof(cr)); the above code should equal to: tcindex_filter_result_init(&cr); if (old_r) cr.res = r->res; ... if (old_r) tcf_exts_change(tp, &r->exts, &e); else tcf_exts_change(tp, &cr.exts, &e); ... r->res = cr.res; after this change, since there is no need to copy struct tcf_exts. And it also fixes other places zero'ing struct's contains struct tcf_exts. Fixes: commit 33be627159913b0 (net_sched: act: use standard struct list_head) Reported-by: Kelly Anderson Tested-by: Kelly Anderson Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index eed8404443d8..f435a88d899a 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -188,6 +188,12 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = { [TCA_TCINDEX_CLASSID] = { .type = NLA_U32 }, }; +static void tcindex_filter_result_init(struct tcindex_filter_result *r) +{ + memset(r, 0, sizeof(*r)); + tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); +} + static int tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, u32 handle, struct tcindex_data *p, @@ -207,15 +213,11 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, return err; memcpy(&cp, p, sizeof(cp)); - memset(&new_filter_result, 0, sizeof(new_filter_result)); - tcf_exts_init(&new_filter_result.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + tcindex_filter_result_init(&new_filter_result); + tcindex_filter_result_init(&cr); if (old_r) - memcpy(&cr, r, sizeof(cr)); - else { - memset(&cr, 0, sizeof(cr)); - tcf_exts_init(&cr.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); - } + cr.res = r->res; if (tb[TCA_TCINDEX_HASH]) cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); @@ -267,9 +269,14 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = -ENOMEM; if (!cp.perfect && !cp.h) { if (valid_perfect_hash(&cp)) { + int i; + cp.perfect = kcalloc(cp.hash, sizeof(*r), GFP_KERNEL); if (!cp.perfect) goto errout; + for (i = 0; i < cp.hash; i++) + tcf_exts_init(&cp.perfect[i].exts, TCA_TCINDEX_ACT, + TCA_TCINDEX_POLICE); balloc = 1; } else { cp.h = kcalloc(cp.hash, sizeof(f), GFP_KERNEL); @@ -295,14 +302,17 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, tcf_bind_filter(tp, &cr.res, base); } - tcf_exts_change(tp, &cr.exts, &e); + if (old_r) + tcf_exts_change(tp, &r->exts, &e); + else + tcf_exts_change(tp, &cr.exts, &e); tcf_tree_lock(tp); if (old_r && old_r != r) - memset(old_r, 0, sizeof(*old_r)); + tcindex_filter_result_init(old_r); memcpy(p, &cp, sizeof(cp)); - memcpy(r, &cr, sizeof(cr)); + r->res = cr.res; if (r == &new_filter_result) { struct tcindex_filter **fp; From 4de462ab63e23953fd05da511aeb460ae10cc726 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 May 2014 21:56:34 -0700 Subject: [PATCH 110/116] ipv6: gro: fix CHECKSUM_COMPLETE support When GRE support was added in linux-3.14, CHECKSUM_COMPLETE handling broke on GRE+IPv6 because we did not update/use the appropriate csum : GRO layer is supposed to use/update NAPI_GRO_CB(skb)->csum instead of skb->csum Tested using a GRE tunnel and IPv6 traffic. GRO aggregation now happens at the first level (ethernet device) instead of being done in gre tunnel. Native IPv6+TCP is still properly aggregated. Fixes: bf5a755f5e918 ("net-gre-gro: Add GRE support to the GRO stack") Signed-off-by: Eric Dumazet Cc: Jerry Chu Signed-off-by: David S. Miller --- net/ipv6/ip6_offload.c | 6 +----- net/ipv6/tcpv6_offload.c | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 59f95affceb0..b2f091566f88 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -196,7 +196,6 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, unsigned int off; u16 flush = 1; int proto; - __wsum csum; off = skb_gro_offset(skb); hlen = off + sizeof(*iph); @@ -264,13 +263,10 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, NAPI_GRO_CB(skb)->flush |= flush; - csum = skb->csum; - skb_postpull_rcsum(skb, iph, skb_network_header_len(skb)); + skb_gro_postpull_rcsum(skb, iph, nlen); pp = ops->callbacks.gro_receive(head, skb); - skb->csum = csum; - out_unlock: rcu_read_unlock(); diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 0d78132ff18a..8517d3cd1aed 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -42,7 +42,7 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, if (NAPI_GRO_CB(skb)->flush) goto skip_csum; - wsum = skb->csum; + wsum = NAPI_GRO_CB(skb)->csum; switch (skb->ip_summed) { case CHECKSUM_NONE: From 89df20d951b310f3e33422554054ae6e58a3c7a9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 20 May 2014 11:38:18 +0200 Subject: [PATCH 111/116] stmmac: Remove unbalanced clk_disable call The stmmac_open call was calling clk_disable_unprepare on phy init failure, but it never calls clk_prepare_enable, this causes a WARN_ON in the clk framework to trigger if for some reason phy init fails. Signed-off-by: Hans de Goede Acked-by: Giuseppe Cavallaro Acked-by: Chen-Yu Tsai Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d940034acdd4..0f4841d2e8dc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1704,7 +1704,7 @@ static int stmmac_open(struct net_device *dev) if (ret) { pr_err("%s: Cannot attach to PHY (error: %d)\n", __func__, ret); - goto phy_error; + return ret; } } @@ -1779,8 +1779,6 @@ static int stmmac_open(struct net_device *dev) dma_desc_error: if (priv->phydev) phy_disconnect(priv->phydev); -phy_error: - clk_disable_unprepare(priv->stmmac_clk); return ret; } From b0db5cdf3975134ca967e8b8370a8f0c91ce2329 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 20 May 2014 13:52:13 +0200 Subject: [PATCH 112/116] net: doc: Update references to skb->rxhash In commit 61b905da33 ("net: Rename skb->rxhash to skb->hash"), skb->rxhash was renamed to skb->hash. Update references in Documentation accordingly. Signed-off-by: Tobias Klauser Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 2 +- Documentation/networking/packet_mmap.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 81f940f4e884..e3ba753cb714 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -277,7 +277,7 @@ Possible BPF extensions are shown in the following table: mark skb->mark queue skb->queue_mapping hatype skb->dev->type - rxhash skb->rxhash + rxhash skb->hash cpu raw_smp_processor_id() vlan_tci vlan_tx_tag_get(skb) vlan_pr vlan_tx_tag_present(skb) diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 6fea79efb4cb..38112d512f47 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -578,7 +578,7 @@ processes. This also works in combination with mmap(2) on packet sockets. Currently implemented fanout policies are: - - PACKET_FANOUT_HASH: schedule to socket by skb's rxhash + - PACKET_FANOUT_HASH: schedule to socket by skb's packet hash - PACKET_FANOUT_LB: schedule to socket by round-robin - PACKET_FANOUT_CPU: schedule to socket by CPU packet arrives on - PACKET_FANOUT_RND: schedule to socket by random selection From d6b694c0b3f20c3ee54d29d4109fa1978d11a033 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 21 May 2014 11:24:39 -0400 Subject: [PATCH 113/116] bonding: Don't assume 802.1Q when sending alb learning packets. TLB/ALB learning packets always assume 802.1Q vlan protocol, but that is no longer the case since we now have support for Q-in-Q on top of bonding. Pass the vlan protocol to alb_send_lp_vid() so that the packets are properly tagged. CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek Signed-off-by: Vlad Yasevich Acked-by: Veaceslav Falico Signed-off-by: David S. Miller --- drivers/net/bonding/bond_alb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index e53847884319..2ec945c3b9ba 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -995,7 +995,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id) /*********************** tlb/rlb shared functions *********************/ static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[], - u16 vid) + __be16 vlan_proto, u16 vid) { struct learning_pkt pkt; struct sk_buff *skb; @@ -1021,7 +1021,7 @@ static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[], skb->dev = slave->dev; if (vid) { - skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vid); + skb = vlan_put_tag(skb, vlan_proto, vid); if (!skb) { pr_err("%s: Error: failed to insert VLAN tag\n", slave->bond->dev->name); @@ -1040,13 +1040,14 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) struct list_head *iter; /* send untagged */ - alb_send_lp_vid(slave, mac_addr, 0); + alb_send_lp_vid(slave, mac_addr, 0, 0); /* loop through vlans and send one packet for each */ rcu_read_lock(); netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) alb_send_lp_vid(slave, mac_addr, + vlan_dev_vlan_proto(upper), vlan_dev_vlan_id(upper)); } rcu_read_unlock(); From d0c21d43a5a12aaebb1e42e10cf78e6491fc9e5a Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 21 May 2014 13:19:48 -0400 Subject: [PATCH 114/116] bonding: Send ALB learning packets using the right source ALB learning packets are currentlyalways sent using the slave mac address for all vlans configured on top of bond. This is not always correct, as vlans may change their mac address. This patch introduced a concept of strict matching where the source of learning packets can either strictly match the address passed in, or it can determine a more correct address to use. There are 3 casese to consider: 1) Switchover. In this case, we have a new active slave and we need tell the switch about all addresses available on the slave. 2) Monitor. We'll periodically refresh learning info for all slaves. In this case, we refresh all addresses for current active, and just the slave address for other slaves. 3) Teaching of disabled adddress. This happens as part of the failover and in this case, we alwyas to use just the address provided. CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- drivers/net/bonding/bond_alb.c | 49 ++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 2ec945c3b9ba..93580a47cc54 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -82,7 +82,8 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb) } /* Forward declaration */ -static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]); +static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], + bool strict_match); static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp); static void rlb_src_unlink(struct bonding *bond, u32 index); static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, @@ -459,7 +460,7 @@ static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[]) bond->alb_info.rlb_promisc_timeout_counter = 0; - alb_send_learning_packets(bond->curr_active_slave, addr); + alb_send_learning_packets(bond->curr_active_slave, addr, true); } /* slave being removed should not be active at this point @@ -1032,8 +1033,8 @@ static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[], dev_queue_xmit(skb); } - -static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) +static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], + bool strict_match) { struct bonding *bond = bond_get_bond_by_slave(slave); struct net_device *upper; @@ -1045,10 +1046,19 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]) /* loop through vlans and send one packet for each */ rcu_read_lock(); netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) { - if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) - alb_send_lp_vid(slave, mac_addr, - vlan_dev_vlan_proto(upper), - vlan_dev_vlan_id(upper)); + if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) { + if (strict_match && + ether_addr_equal_64bits(mac_addr, + upper->dev_addr)) { + alb_send_lp_vid(slave, mac_addr, + vlan_dev_vlan_proto(upper), + vlan_dev_vlan_id(upper)); + } else if (!strict_match) { + alb_send_lp_vid(slave, upper->dev_addr, + vlan_dev_vlan_proto(upper), + vlan_dev_vlan_id(upper)); + } + } } rcu_read_unlock(); } @@ -1108,7 +1118,7 @@ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1, /* fasten the change in the switch */ if (SLAVE_IS_OK(slave1)) { - alb_send_learning_packets(slave1, slave1->dev->dev_addr); + alb_send_learning_packets(slave1, slave1->dev->dev_addr, false); if (bond->alb_info.rlb_enabled) { /* inform the clients that the mac address * has changed @@ -1120,7 +1130,7 @@ static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1, } if (SLAVE_IS_OK(slave2)) { - alb_send_learning_packets(slave2, slave2->dev->dev_addr); + alb_send_learning_packets(slave2, slave2->dev->dev_addr, false); if (bond->alb_info.rlb_enabled) { /* inform the clients that the mac address * has changed @@ -1491,6 +1501,8 @@ void bond_alb_monitor(struct work_struct *work) /* send learning packets */ if (bond_info->lp_counter >= BOND_ALB_LP_TICKS(bond)) { + bool strict_match; + /* change of curr_active_slave involves swapping of mac addresses. * in order to avoid this swapping from happening while * sending the learning packets, the curr_slave_lock must be held for @@ -1498,8 +1510,15 @@ void bond_alb_monitor(struct work_struct *work) */ read_lock(&bond->curr_slave_lock); - bond_for_each_slave_rcu(bond, slave, iter) - alb_send_learning_packets(slave, slave->dev->dev_addr); + bond_for_each_slave_rcu(bond, slave, iter) { + /* If updating current_active, use all currently + * user mac addreses (!strict_match). Otherwise, only + * use mac of the slave device. + */ + strict_match = (slave != bond->curr_active_slave); + alb_send_learning_packets(slave, slave->dev->dev_addr, + strict_match); + } read_unlock(&bond->curr_slave_lock); @@ -1722,7 +1741,8 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave } else { /* set the new_slave to the bond mac address */ alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr); - alb_send_learning_packets(new_slave, bond->dev->dev_addr); + alb_send_learning_packets(new_slave, bond->dev->dev_addr, + false); } write_lock_bh(&bond->curr_slave_lock); @@ -1765,7 +1785,8 @@ int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr) alb_set_slave_mac_addr(bond->curr_active_slave, bond_dev->dev_addr); read_lock(&bond->lock); - alb_send_learning_packets(bond->curr_active_slave, bond_dev->dev_addr); + alb_send_learning_packets(bond->curr_active_slave, + bond_dev->dev_addr, false); if (bond->alb_info.rlb_enabled) { /* inform clients mac address has changed */ rlb_req_update_slave_clients(bond, bond->curr_active_slave); From fbdc0ad095c0a299e9abf5d8ac8f58374951149a Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 22 May 2014 16:36:55 +0800 Subject: [PATCH 115/116] ipv4: initialise the itag variable in __mkroute_input the value of itag is a random value from stack, and may not be initiated by fib_validate_source, which called fib_combine_itag if CONFIG_IP_ROUTE_CLASSID is not set This will make the cached dst uncertainty Signed-off-by: Li RongQing Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index db1e0da871f4..5e676be3daeb 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1519,7 +1519,7 @@ static int __mkroute_input(struct sk_buff *skb, struct in_device *out_dev; unsigned int flags = 0; bool do_cache; - u32 itag; + u32 itag = 0; /* get a working reference to the output device */ out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); From b6ed5498601df40489606dbc14a9c7011c16630b Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 22 May 2014 11:57:17 -0700 Subject: [PATCH 116/116] batman: fix a bogus warning from batadv_is_on_batman_iface() batman tries to search dev->iflink to check if it's a batman interface, but ->iflink could be 0, which is not a valid ifindex. It should just avoid iflink == 0 case. Reported-by: Jet Chen Tested-by: Jet Chen Cc: David S. Miller Cc: Steffen Klassert Cc: Antonio Quartulli Cc: Marek Lindner Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/batman-adv/hard-interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index b851cc580853..fbda6b54baff 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -83,7 +83,7 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) return true; /* no more parents..stop recursion */ - if (net_dev->iflink == net_dev->ifindex) + if (net_dev->iflink == 0 || net_dev->iflink == net_dev->ifindex) return false; /* recurse over the parent device */