From 5745b8232e942abd5e16e85fa9b27cc21324acf0 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Sun, 3 Apr 2016 22:09:23 +0800 Subject: [PATCH 01/44] ipv4: l2tp: fix a potential issue in l2tp_ip_recv pskb_may_pull() can change skb->data, so we have to load ptr/optr at the right place. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index ec22078b0914..42de4ccd159f 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -123,12 +123,11 @@ static int l2tp_ip_recv(struct sk_buff *skb) struct l2tp_tunnel *tunnel = NULL; int length; - /* Point to L2TP header */ - optr = ptr = skb->data; - if (!pskb_may_pull(skb, 4)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; session_id = ntohl(*((__be32 *) ptr)); ptr += 4; @@ -156,6 +155,9 @@ static int l2tp_ip_recv(struct sk_buff *skb) if (!pskb_may_pull(skb, length)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; + ptr += 4; pr_debug("%s: ip recv\n", tunnel->name); print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } From be447f305494e019dfc37ea4cdf3b0e4200b4eba Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Sun, 3 Apr 2016 22:09:24 +0800 Subject: [PATCH 02/44] ipv6: l2tp: fix a potential issue in l2tp_ip6_recv pskb_may_pull() can change skb->data, so we have to load ptr/optr at the right place. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip6.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 6b54ff3ff4cb..cd479903d943 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -136,12 +136,11 @@ static int l2tp_ip6_recv(struct sk_buff *skb) struct l2tp_tunnel *tunnel = NULL; int length; - /* Point to L2TP header */ - optr = ptr = skb->data; - if (!pskb_may_pull(skb, 4)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; session_id = ntohl(*((__be32 *) ptr)); ptr += 4; @@ -169,6 +168,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb) if (!pskb_may_pull(skb, length)) goto discard; + /* Point to L2TP header */ + optr = ptr = skb->data; + ptr += 4; pr_debug("%s: ip recv\n", tunnel->name); print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length); } From c862cc9b70526a71d07e7bd86d9b61d1c792cead Mon Sep 17 00:00:00 2001 From: Bastien Philbert Date: Sun, 3 Apr 2016 19:04:26 -0400 Subject: [PATCH 03/44] bridge: Fix incorrect variable assignment on error path in br_sysfs_addbr This fixes the incorrect variable assignment on error path in br_sysfs_addbr for when the call to kobject_create_and_add fails to assign the value of -EINVAL to the returned variable of err rather then incorrectly return zero making callers think this function has succeededed due to the previous assignment being assigned zero when assigning it the successful return value of the call to sysfs_create_group which is zero. Signed-off-by: Bastien Philbert Signed-off-by: David S. Miller --- net/bridge/br_sysfs_br.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 6b8091407ca3..f4d40edbe8a9 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -870,6 +870,7 @@ int br_sysfs_addbr(struct net_device *dev) br->ifobj = kobject_create_and_add(SYSFS_BRIDGE_PORT_SUBDIR, brobj); if (!br->ifobj) { + err = -EINVAL; pr_info("%s: can't add kobject (directory) %s/%s\n", __func__, dev->name, SYSFS_BRIDGE_PORT_SUBDIR); goto out3; From c3732a7b37bca52bdd23aacde001f2038f9cb11a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 28 Feb 2016 15:19:53 +0100 Subject: [PATCH 04/44] mac80211: fix AP buffered multicast frames with queue control and txq Buffered multicast frames must be passed to the driver directly via drv_tx instead of going through the txq, otherwise they cannot easily be scheduled to be sent after DTIM. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 62ad5321257d..664624ef9925 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1247,7 +1247,8 @@ static void ieee80211_drv_tx(struct ieee80211_local *local, struct txq_info *txqi; u8 ac; - if (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE) + if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) || + (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE)) goto tx_normal; if (!ieee80211_is_data(hdr->frame_control)) From db8d99774c2682559b7648857697b9b588c6795a Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Wed, 2 Mar 2016 23:28:31 +0200 Subject: [PATCH 05/44] mac80211: TDLS: always downgrade invalid chandefs Even if the current chandef width is equal to the station's max-BW, it doesn't mean it's a valid width for TDLS. Make sure to always check regulatory constraints in these cases. Fixes: 0fabfaafec3a ("mac80211: upgrade BW of TDLS peers when possible") Signed-off-by: Arik Nemtsov Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/tdls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index c9eeb3f12808..43f13abe89c7 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -332,7 +332,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata, return; /* proceed to downgrade the chandef until usable or the same */ - while (uc.width > max_width && + while (uc.width > max_width || !cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc, sdata->wdev.iftype)) ieee80211_chandef_downgrade(&uc); From 59021c675995281d453eee45b3e2e1e3edbc0ec2 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Wed, 2 Mar 2016 23:28:32 +0200 Subject: [PATCH 06/44] mac80211: TDLS: change BW calculation for WIDER_BW peers The previous approach simply ignored chandef restrictions when calculating the appropriate peer BW for a WIDER_BW peer. This could result in a regulatory violation if both peers indicated 80MHz support, but the regdomain forbade it. Change the approach to setting a WIDER_BW peer's BW. Don't exempt it from the chandef width at first. If during TDLS negotiation the chandef width is upgraded, update the peer's BW to match. Fixes: 0fabfaafec3a ("mac80211: upgrade BW of TDLS peers when possible") Signed-off-by: Arik Nemtsov Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 4 ++++ net/mac80211/tdls.c | 38 ++++++++++++++++++++++++++++++++------ net/mac80211/vht.c | 30 +++++++++++++++++++++++++----- 3 files changed, 61 insertions(+), 11 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 804575ff7af5..422003540169 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1719,6 +1719,10 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta); enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta); void ieee80211_sta_set_rx_nss(struct sta_info *sta); +enum ieee80211_sta_rx_bandwidth +ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width); +enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta); +void ieee80211_sta_set_rx_nss(struct sta_info *sta); void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt); u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 43f13abe89c7..eed1152efb42 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -4,7 +4,7 @@ * Copyright 2006-2010 Johannes Berg * Copyright 2014, Intel Corporation * Copyright 2014 Intel Mobile Communications GmbH - * Copyright 2015 Intel Deutschland GmbH + * Copyright 2015 - 2016 Intel Deutschland GmbH * * This file is GPLv2 as found in COPYING. */ @@ -15,6 +15,7 @@ #include #include "ieee80211_i.h" #include "driver-ops.h" +#include "rate.h" /* give usermode some time for retries in setting up the TDLS session */ #define TDLS_PEER_SETUP_TIMEOUT (15 * HZ) @@ -302,7 +303,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata, /* IEEE802.11ac-2013 Table E-4 */ u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 }; struct cfg80211_chan_def uc = sta->tdls_chandef; - enum nl80211_chan_width max_width = ieee80211_get_sta_bw(&sta->sta); + enum nl80211_chan_width max_width = ieee80211_sta_cap_chan_bw(sta); int i; /* only support upgrading non-narrow channels up to 80Mhz */ @@ -1242,18 +1243,44 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, return ret; } -static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata) +static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta) { struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *ctx; + enum nl80211_chan_width width; + struct ieee80211_supported_band *sband; mutex_lock(&local->chanctx_mtx); conf = rcu_dereference_protected(sdata->vif.chanctx_conf, lockdep_is_held(&local->chanctx_mtx)); if (conf) { + width = conf->def.width; + sband = local->hw.wiphy->bands[conf->def.chan->band]; ctx = container_of(conf, struct ieee80211_chanctx, conf); ieee80211_recalc_chanctx_chantype(local, ctx); + + /* if width changed and a peer is given, update its BW */ + if (width != conf->def.width && sta && + test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW)) { + enum ieee80211_sta_rx_bandwidth bw; + + bw = ieee80211_chan_width_to_rx_bw(conf->def.width); + bw = min(bw, ieee80211_sta_cap_rx_bw(sta)); + if (bw != sta->sta.bandwidth) { + sta->sta.bandwidth = bw; + rate_control_rate_update(local, sband, sta, + IEEE80211_RC_BW_CHANGED); + /* + * if a TDLS peer BW was updated, we need to + * recalc the chandef width again, to get the + * correct chanctx min_def + */ + ieee80211_recalc_chanctx_chantype(local, ctx); + } + } + } mutex_unlock(&local->chanctx_mtx); } @@ -1350,8 +1377,6 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, break; } - iee80211_tdls_recalc_chanctx(sdata); - mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, peer); if (!sta) { @@ -1360,6 +1385,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, break; } + iee80211_tdls_recalc_chanctx(sdata, sta); iee80211_tdls_recalc_ht_protection(sdata, sta); set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH); @@ -1390,7 +1416,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, iee80211_tdls_recalc_ht_protection(sdata, NULL); mutex_unlock(&local->sta_mtx); - iee80211_tdls_recalc_chanctx(sdata); + iee80211_tdls_recalc_chanctx(sdata, NULL); break; default: ret = -ENOTSUPP; diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 89e04d55aa18..e590e2ef9eaf 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -319,7 +319,30 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta) return IEEE80211_STA_RX_BW_80; } -static enum ieee80211_sta_rx_bandwidth +enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct sta_info *sta) +{ + struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; + u32 cap_width; + + if (!vht_cap->vht_supported) { + if (!sta->sta.ht_cap.ht_supported) + return NL80211_CHAN_WIDTH_20_NOHT; + + return sta->sta.ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? + NL80211_CHAN_WIDTH_40 : NL80211_CHAN_WIDTH_20; + } + + cap_width = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; + + if (cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ) + return NL80211_CHAN_WIDTH_160; + else if (cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) + return NL80211_CHAN_WIDTH_80P80; + + return NL80211_CHAN_WIDTH_80; +} + +enum ieee80211_sta_rx_bandwidth ieee80211_chan_width_to_rx_bw(enum nl80211_chan_width width) { switch (width) { @@ -347,10 +370,7 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) bw = ieee80211_sta_cap_rx_bw(sta); bw = min(bw, sta->cur_max_bandwidth); - - /* do not cap the BW of TDLS WIDER_BW peers by the bss */ - if (!test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW)) - bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width)); + bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width)); return bw; } From aa507a7bc5ab7513b83fc37ed040b5254737a518 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Wed, 2 Mar 2016 23:28:33 +0200 Subject: [PATCH 07/44] mac80211: recalc min_def chanctx even when chandef is identical The min_def chanctx is affected not only by the current chandef, but sometimes also by other stations on the vif. There's a valid scenario where a TDLS peer can widen its BW, thereby causing the min_def to increase. Signed-off-by: Arik Nemtsov Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 283981108ca8..74142d07ad31 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -343,8 +343,10 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, const struct cfg80211_chan_def *chandef) { - if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) + if (cfg80211_chandef_identical(&ctx->conf.def, chandef)) { + ieee80211_recalc_chanctx_min_def(local, ctx); return; + } WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef)); From 62b14b241ca6f790a17ccd9dd9f62ce1b006d406 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 31 Mar 2016 17:22:45 +0200 Subject: [PATCH 08/44] mac80211: properly deal with station hashtable insert errors The original hand-implemented hash-table in mac80211 couldn't result in insertion errors, and while converting to rhashtable I evidently forgot to check the errors. This surfaced now only because Ben is adding many identical keys and that resulted in hidden insertion errors. Cc: stable@vger.kernel.org Fixes: 7bedd0cfad4e1 ("mac80211: use rhashtable for station table") Reported-by: Ben Greear Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index d20bab5c146c..18b56d7a2dbd 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -258,11 +258,11 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) } /* Caller must hold local->sta_mtx */ -static void sta_info_hash_add(struct ieee80211_local *local, - struct sta_info *sta) +static int sta_info_hash_add(struct ieee80211_local *local, + struct sta_info *sta) { - rhashtable_insert_fast(&local->sta_hash, &sta->hash_node, - sta_rht_params); + return rhashtable_insert_fast(&local->sta_hash, &sta->hash_node, + sta_rht_params); } static void sta_deliver_ps_frames(struct work_struct *wk) @@ -524,7 +524,9 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) set_sta_flag(sta, WLAN_STA_BLOCK_BA); /* make the station visible */ - sta_info_hash_add(local, sta); + err = sta_info_hash_add(local, sta); + if (err) + goto out_drop_sta; list_add_tail_rcu(&sta->list, &local->sta_list); @@ -557,6 +559,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) out_remove: sta_info_hash_del(local, sta); list_del_rcu(&sta->list); + out_drop_sta: local->num_sta--; synchronize_net(); __cleanup_single_sta(sta); From b6bf8c688e07c056f3059d870f6db44777259fe7 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Fri, 1 Apr 2016 14:13:31 -0700 Subject: [PATCH 09/44] mac80211: ensure no limits on station rhashtable By default, the rhashtable logic will fail to insert objects if the key-chains are too long and un-balanced. In the degenerate case where mac80211 is creating many virtual interfaces connected to the same peer(s), this case can happen. St insecure_elasticity to true to allow chains to grow as long as needed. Signed-off-by: Ben Greear [remove message, change commit message slightly] Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 18b56d7a2dbd..861b93ffbe92 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -67,6 +67,7 @@ static const struct rhashtable_params sta_rht_params = { .nelem_hint = 3, /* start small */ + .insecure_elasticity = true, /* Disable chain-length checks. */ .automatic_shrinking = true, .head_offset = offsetof(struct sta_info, hash_node), .key_offset = offsetof(struct sta_info, addr), From 84ea3a18c06c7ae94b76f763c462bf1e0ce4dc79 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Fri, 18 Mar 2016 16:09:29 +0000 Subject: [PATCH 10/44] mac80211: add doc for RX_FLAG_DUP_VALIDATED flag Add documentation for the flag for duplication check. Fixes the following warning when running make htmldocs: warning: Enum value 'RX_FLAG_DUP_VALIDATED' not described in enum 'mac80211_rx_flags' Signed-off-by: Luis de Bethencourt [fix description] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0c09da34b67a..e385eb3076a1 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1001,6 +1001,8 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * flag indicates that the PN was verified for replay protection. * Note that this flag is also currently only supported when a frame * is also decrypted (ie. @RX_FLAG_DECRYPTED must be set) + * @RX_FLAG_DUP_VALIDATED: The driver should set this flag if it did + * de-duplication by itself. * @RX_FLAG_FAILED_FCS_CRC: Set this flag if the FCS check failed on * the frame. * @RX_FLAG_FAILED_PLCP_CRC: Set this flag if the PCLP check failed on From c2d45923e3df43b58bddf80debd8e22edd5077bf Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Fri, 18 Mar 2016 19:23:18 +0000 Subject: [PATCH 11/44] mac80211: remove description of dropped member Commit 976bd9efdae6 ("mac80211: move beacon_loss_count into ifmgd") removed the member from the sta_info struct but the description stayed lingering. Remove it. Signed-off-by: Luis de Bethencourt Signed-off-by: Johannes Berg --- net/mac80211/sta_info.h | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 053f5c4fa495..62193f4bc37b 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -377,7 +377,6 @@ DECLARE_EWMA(signal, 1024, 8) * @uploaded: set to true when sta is uploaded to the driver * @sta: station information we share with the driver * @sta_state: duplicates information about station state (for debug) - * @beacon_loss_count: number of times beacon loss has triggered * @rcu_head: RCU head used for freeing this station struct * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, * taken from HT/VHT capabilities or VHT operating mode notification From facde7f332f91353c7a6d34c9cff6b329cc0c3ab Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 17 Mar 2016 16:51:41 +0200 Subject: [PATCH 12/44] mac80211: don't send deferred frames outside the SP Frames that are sent between ampdu_action(IEEE80211_AMPDU_TX_START) and the move to the HT_AGG_STATE_OPERATIONAL state are buffered. If we try to start an A-MPDU session while the peer is sleeping and polling frames with U-APSD, we may have frames that will be buffered by ieee80211_tx_prep_agg. These frames have IEEE80211_TX_CTL_NO_PS_BUFFER set since they are sent to a sleeping client and possibly IEEE80211_TX_STATUS_EOSP. If the frame is buffered, we need clear these two flags since they will be re-sent after the move to HT_AGG_STATE_OPERATIONAL state which is very likely to happen after the SP ends. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 664624ef9925..6ce686b96ec0 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1118,9 +1118,7 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, queued = true; info->control.vif = &tx->sdata->vif; info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; - info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS | - IEEE80211_TX_CTL_NO_PS_BUFFER | - IEEE80211_TX_STATUS_EOSP; + info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS; __skb_queue_tail(&tid_tx->pending, skb); if (skb_queue_len(&tid_tx->pending) > STA_MAX_TX_BUFFER) purge_skb = __skb_dequeue(&tid_tx->pending); From 4b559ec0bfc3a9f41a127cea6964f38b2b4bb323 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Tue, 8 Mar 2016 13:35:31 +0200 Subject: [PATCH 13/44] mac80211: Fix BW upgrade for TDLS peers It is possible that the station is connected to an AP with bandwidth of 80+80MHz or 160MHz. In such cases there is no need to perform an upgrade as the maximal supported bandwidth is 80MHz. In addition, when upgrading and setting center_freq1 and bandwidth to 80MHz also set center_freq2 to 0. Fixes: 0fabfaafec3a ("mac80211: upgrade BW of TDLS peers when possible" Signed-off-by: Ilan Peer Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/tdls.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index eed1152efb42..a29ea813b7d5 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -314,7 +314,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata, if (max_width > NL80211_CHAN_WIDTH_80) max_width = NL80211_CHAN_WIDTH_80; - if (uc.width == max_width) + if (uc.width >= max_width) return; /* * Channel usage constrains in the IEEE802.11ac-2013 specification only @@ -325,6 +325,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata, for (i = 0; i < ARRAY_SIZE(centers_80mhz); i++) if (abs(uc.chan->center_freq - centers_80mhz[i]) <= 30) { uc.center_freq1 = centers_80mhz[i]; + uc.center_freq2 = 0; uc.width = NL80211_CHAN_WIDTH_80; break; } From f6d4671a08810ff5111099dd1febe57e7eb9ba59 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 17 Mar 2016 16:51:42 +0200 Subject: [PATCH 14/44] mac80211: close the SP when we enqueue frames during the SP Since we enqueued the frame that was supposed to be sent during the SP, and that frame may very well cary the IEEE80211_TX_STATUS_EOSP bit, we may never close the SP (WLAN_STA_SP will never be cleared). If that happens, we will not open any new SP and will never respond to any poll frame from the client. Clear WLAN_STA_SP manually if a frame that was polled during the SP is queued because of a starting A-MPDU session. The client may not see the EOSP bit, but it will at least be able to poll new frames in another SP. Reported-by: Alesya Shapira Signed-off-by: Emmanuel Grumbach [remove erroneous comment] Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 6ce686b96ec0..21f6602395f7 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1116,6 +1116,12 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, reset_agg_timer = true; } else { queued = true; + if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) { + clear_sta_flag(tx->sta, WLAN_STA_SP); + ps_dbg(tx->sta->sdata, + "STA %pM aid %d: SP frame queued, close the SP w/o telling the peer\n", + tx->sta->sta.addr, tx->sta->sta.aid); + } info->control.vif = &tx->sdata->vif; info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS; From e43569e6d3c71eb266641c6297ea54f7ac66954f Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 1 Apr 2016 14:05:48 -0300 Subject: [PATCH 15/44] sctp: flush if we can't fit another DATA chunk There is no point on delaying the packet if we can't fit a single byte of data on it anymore. So lets just reduce the threshold by the amount that a data chunk with 4 bytes (rounding) would use. v2: based on the right tree Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index 97745351d58c..9844fe573029 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -705,7 +705,8 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, /* Check whether this chunk and all the rest of pending data will fit * or delay in hopes of bundling a full sized packet. */ - if (chunk->skb->len + q->out_qlen >= transport->pathmtu - packet->overhead) + if (chunk->skb->len + q->out_qlen > + transport->pathmtu - packet->overhead - sizeof(sctp_data_chunk_t) - 4) /* Enough data queued to fill a packet */ return SCTP_XMIT_OK; From eb8e97715f29a1240cdf67b0df725be27433259f Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 1 Apr 2016 14:30:32 -0300 Subject: [PATCH 16/44] sctp: use list_* in sctp_list_dequeue Use list_* helpers in sctp_list_dequeue, more readable. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 65521cfdcade..03fb33efcae2 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -386,11 +386,9 @@ static inline struct list_head *sctp_list_dequeue(struct list_head *list) { struct list_head *result = NULL; - if (list->next != list) { + if (!list_empty(list)) { result = list->next; - list->next = result->next; - list->next->prev = list; - INIT_LIST_HEAD(result); + list_del_init(result); } return result; } From 8e2cc0e67f4aa33931cd15a553938163a19dcab3 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 29 Feb 2016 11:00:24 -0800 Subject: [PATCH 17/44] i40e: fix errant PCIe bandwidth message There was an error introduced with commit 3fced535079a ("i40e: X722 is on the IOSF bus and does not report the PCI bus info"), where code was added but the enabling flag is never set. CC: Anjali Singhai Jain CC: Stefan Assman Fixes: 3fced535079a ("i40e: X722 is on the IOSF bus ...") Reported-by: Steve Best Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 67006431726a..344912957cab 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -8559,6 +8559,7 @@ static int i40e_sw_init(struct i40e_pf *pf) I40E_FLAG_OUTER_UDP_CSUM_CAPABLE | I40E_FLAG_WB_ON_ITR_CAPABLE | I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE | + I40E_FLAG_NO_PCI_LINK_CHECK | I40E_FLAG_100M_SGMII_CAPABLE | I40E_FLAG_USE_SET_LLDP_MIB | I40E_FLAG_GENEVE_OFFLOAD_CAPABLE; From 847a1d6796c767f8b697ead60997b847a84b897b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 2 Mar 2016 16:16:01 -0500 Subject: [PATCH 18/44] e1000: Do not overestimate descriptor counts in Tx pre-check The current code path is capable of grossly overestimating the number of descriptors needed to transmit a new frame. This specifically occurs if the skb contains a number of 4K pages. The issue is that the logic for determining the descriptors needed is ((S) >> (X)) + 1. When X is 12 it means that we were indicating that we required 2 descriptors for each 4K page when we only needed one. This change corrects this by instead adding (1 << (X)) - 1 to the S value instead of adding 1 after the fact. This way we get an accurate descriptor needed count as we are essentially doing a DIV_ROUNDUP(). Reported-by: Ivan Suzdal Signed-off-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 3fc7bde699ba..d213fb45855d 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -3106,7 +3106,7 @@ static int e1000_maybe_stop_tx(struct net_device *netdev, return __e1000_maybe_stop_tx(netdev, size); } -#define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1) +#define TXD_USE_COUNT(S, X) (((S) + ((1 << (X)) - 1)) >> (X)) static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { From a4605fef7132f19afded76ee025c957558271a7d Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 2 Mar 2016 16:16:08 -0500 Subject: [PATCH 19/44] e1000: Double Tx descriptors needed check for 82544 The 82544 has code that adds one additional descriptor per data buffer. However we weren't taking that into account when determining the descriptors needed for the next transmit at the end of the xmit_frame path. This change takes that into account by doubling the number of descriptors needed for the 82544 so that we can avoid a potential issue where we could hang the Tx ring by loading frames with xmit_more enabled and then stopping the ring without writing the tail. In addition it adds a few more descriptors to account for some additional workarounds that have been added over time. Signed-off-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_main.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index d213fb45855d..ae90d4f12b70 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -3256,12 +3256,29 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, nr_frags, mss); if (count) { + /* The descriptors needed is higher than other Intel drivers + * due to a number of workarounds. The breakdown is below: + * Data descriptors: MAX_SKB_FRAGS + 1 + * Context Descriptor: 1 + * Keep head from touching tail: 2 + * Workarounds: 3 + */ + int desc_needed = MAX_SKB_FRAGS + 7; + netdev_sent_queue(netdev, skb->len); skb_tx_timestamp(skb); e1000_tx_queue(adapter, tx_ring, tx_flags, count); + + /* 82544 potentially requires twice as many data descriptors + * in order to guarantee buffers don't end on evenly-aligned + * dwords + */ + if (adapter->pcix_82544) + desc_needed += MAX_SKB_FRAGS + 1; + /* Make sure there is space in the ring for the next send. */ - e1000_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 2); + e1000_maybe_stop_tx(netdev, tx_ring, desc_needed); if (!skb->xmit_more || netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) { From 727ceaa49bb86518470c19640ed7f067c5aa9485 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 5 Apr 2016 15:58:22 -0500 Subject: [PATCH 20/44] Revert "netpoll: Fix extra refcount release in netpoll_cleanup()" This reverts commit 543e3a8da5a4c453e992d5351ef405d5e32f27d7. Direct callers of __netpoll_setup() depend on it to set np->dev, so we can't simply move that assignment up to netpoll_stup(). Reported-by: Bart Van Assche Signed-off-by: Bjorn Helgaas Signed-off-by: David S. Miller --- net/core/netpoll.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a57bd17805b4..94acfc89ad97 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -603,6 +603,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) const struct net_device_ops *ops; int err; + np->dev = ndev; strlcpy(np->dev_name, ndev->name, IFNAMSIZ); INIT_WORK(&np->cleanup_work, netpoll_async_cleanup); @@ -669,7 +670,6 @@ int netpoll_setup(struct netpoll *np) goto unlock; } dev_hold(ndev); - np->dev = ndev; if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", np->dev_name); @@ -770,7 +770,6 @@ int netpoll_setup(struct netpoll *np) return 0; put: - np->dev = NULL; dev_put(ndev); unlock: rtnl_unlock(); From b6ee376cb0b7fb4e7e07d6cd248bd40436fb9ba6 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 1 Apr 2016 17:17:50 -0300 Subject: [PATCH 21/44] ip6_tunnel: set rtnl_link_ops before calling register_netdevice When creating an ip6tnl tunnel with ip tunnel, rtnl_link_ops is not set before ip6_tnl_create2 is called. When register_netdevice is called, there is no linkinfo attribute in the NEWLINK message because of that. Setting rtnl_link_ops before calling register_netdevice fixes that. Fixes: 0b112457229d ("ip6tnl: add support of link creation via rtnl") Signed-off-by: Thadeu Lima de Souza Cascardo Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index eb2ac4bb09ce..1f20345cbc97 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -252,12 +252,12 @@ static int ip6_tnl_create2(struct net_device *dev) t = netdev_priv(dev); + dev->rtnl_link_ops = &ip6_link_ops; err = register_netdevice(dev); if (err < 0) goto out; strcpy(t->parms.name, dev->name); - dev->rtnl_link_ops = &ip6_link_ops; dev_hold(dev); ip6_tnl_link(ip6n, t); From b4201cc4fc6e1c57d6d306b1f787865043d60129 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Mon, 4 Apr 2016 14:15:23 -0400 Subject: [PATCH 22/44] =?UTF-8?q?mac80211:=20fix=20"warning:=20=E2=80=98ta?= =?UTF-8?q?rget=5Fmetric=E2=80=99=20may=20be=20used=20uninitialized"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes: net/mac80211/mesh_hwmp.c:603:26: warning: ‘target_metric’ may be used uninitialized in this function target_metric is only consumed when reply = true so no bug exists here, but not all versions of gcc realize it. Initialize to 0 to remove the warning. Signed-off-by: Jeff Mahoney Signed-off-by: Johannes Berg --- net/mac80211/mesh_hwmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 5b6aec1a0630..002244bca948 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -530,7 +530,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, const u8 *target_addr, *orig_addr; const u8 *da; u8 target_flags, ttl, flags; - u32 orig_sn, target_sn, lifetime, target_metric; + u32 orig_sn, target_sn, lifetime, target_metric = 0; bool reply = false; bool forward = true; bool root_is_gate; From 529927f952c55f399823fc500e8ea6cac47b7fbf Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Mon, 4 Apr 2016 09:54:53 +0530 Subject: [PATCH 23/44] cxgb4: Add pci device id for chelsio t520-cr adapter Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index 06bc2d2e7a73..a2cdfc1261dc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -166,6 +166,7 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x5099), /* Custom 2x40G QSFP */ CH_PCI_ID_TABLE_FENTRY(0x509a), /* Custom T520-CR */ CH_PCI_ID_TABLE_FENTRY(0x509b), /* Custom T540-CR LOM */ + CH_PCI_ID_TABLE_FENTRY(0x509c), /* Custom T520-CR*/ /* T6 adapters: */ From 32fa270c8a3939a9c6cbcf8bc18a0db83a3b40d4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 6 Apr 2016 15:42:45 -0400 Subject: [PATCH 24/44] Revert "bridge: Fix incorrect variable assignment on error path in br_sysfs_addbr" This reverts commit c862cc9b70526a71d07e7bd86d9b61d1c792cead. Patch lacks a real-name Signed-off-by. Signed-off-by: David S. Miller --- net/bridge/br_sysfs_br.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index f4d40edbe8a9..6b8091407ca3 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -870,7 +870,6 @@ int br_sysfs_addbr(struct net_device *dev) br->ifobj = kobject_create_and_add(SYSFS_BRIDGE_PORT_SUBDIR, brobj); if (!br->ifobj) { - err = -EINVAL; pr_info("%s: can't add kobject (directory) %s/%s\n", __func__, dev->name, SYSFS_BRIDGE_PORT_SUBDIR); goto out3; From 77e63534d679e281bf200dd9ee2a422bd4865a2b Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 4 Apr 2016 22:31:32 +0530 Subject: [PATCH 25/44] samples/bpf: Fix build breakage with map_perf_test_user.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building BPF samples is failing with the below error: samples/bpf/map_perf_test_user.c: In function ‘main’: samples/bpf/map_perf_test_user.c:134:9: error: variable ‘r’ has initializer but incomplete type struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; ^ samples/bpf/map_perf_test_user.c:134:21: error: ‘RLIM_INFINITY’ undeclared (first use in this function) struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; ^ samples/bpf/map_perf_test_user.c:134:21: note: each undeclared identifier is reported only once for each function it appears in samples/bpf/map_perf_test_user.c:134:9: warning: excess elements in struct initializer [enabled by default] struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; ^ samples/bpf/map_perf_test_user.c:134:9: warning: (near initialization for ‘r’) [enabled by default] samples/bpf/map_perf_test_user.c:134:9: warning: excess elements in struct initializer [enabled by default] samples/bpf/map_perf_test_user.c:134:9: warning: (near initialization for ‘r’) [enabled by default] samples/bpf/map_perf_test_user.c:134:16: error: storage size of ‘r’ isn’t known struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; ^ samples/bpf/map_perf_test_user.c:139:2: warning: implicit declaration of function ‘setrlimit’ [-Wimplicit-function-declaration] setrlimit(RLIMIT_MEMLOCK, &r); ^ samples/bpf/map_perf_test_user.c:139:12: error: ‘RLIMIT_MEMLOCK’ undeclared (first use in this function) setrlimit(RLIMIT_MEMLOCK, &r); ^ samples/bpf/map_perf_test_user.c:134:16: warning: unused variable ‘r’ [-Wunused-variable] struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; ^ make[2]: *** [samples/bpf/map_perf_test_user.o] Error 1 Fix this by including the necessary header file. Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: David S. Miller Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Acked-by: Alexei Starovoitov Signed-off-by: Naveen N. Rao Signed-off-by: David S. Miller --- samples/bpf/map_perf_test_user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index 95af56ec5739..3147377e8fd3 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "libbpf.h" #include "bpf_load.h" From 128d1514be3583c3cfd56d66d7cdfba413b867ae Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 4 Apr 2016 22:31:33 +0530 Subject: [PATCH 26/44] samples/bpf: Use llc in PATH, rather than a hardcoded value While at it, remove the generation of .s files and fix some typos in the related comment. Cc: Alexei Starovoitov Cc: David S. Miller Cc: Daniel Borkmann Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Signed-off-by: Naveen N. Rao Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/Makefile | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 502c9fc8db85..b820cc96a3bc 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -76,16 +76,10 @@ HOSTLOADLIBES_offwaketime += -lelf HOSTLOADLIBES_spintest += -lelf HOSTLOADLIBES_map_perf_test += -lelf -lrt -# point this to your LLVM backend with bpf support -LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc - -# asm/sysreg.h inline assmbly used by it is incompatible with llvm. -# But, ehere is not easy way to fix it, so just exclude it since it is +# asm/sysreg.h - inline assembly used by it is incompatible with llvm. +# But, there is no easy way to fix it, so just exclude it since it is # useless for BPF samples. $(obj)/%.o: $(src)/%.c clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \ - -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ - clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \ - -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \ - -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=asm -o $@.s + -O2 -emit-llvm -c $< -o -| llc -march=bpf -filetype=obj -o $@ From 138d6153a139c318739f20e61309e5778427a73c Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Mon, 4 Apr 2016 22:31:34 +0530 Subject: [PATCH 27/44] samples/bpf: Enable powerpc support Add the necessary definitions for building bpf samples on ppc. Since ppc doesn't store function return address on the stack, modify how PT_REGS_RET() and PT_REGS_FP() work. Also, introduce PT_REGS_IP() to access the instruction pointer. Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: David S. Miller Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Signed-off-by: Naveen N. Rao Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/bpf_helpers.h | 26 ++++++++++++++++++++++++++ samples/bpf/spintest_kern.c | 2 +- samples/bpf/tracex2_kern.c | 4 ++-- samples/bpf/tracex4_kern.c | 2 +- 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 9363500131a7..7904a2a493de 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -82,6 +82,7 @@ static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flag #define PT_REGS_FP(x) ((x)->bp) #define PT_REGS_RC(x) ((x)->ax) #define PT_REGS_SP(x) ((x)->sp) +#define PT_REGS_IP(x) ((x)->ip) #elif defined(__s390x__) @@ -94,6 +95,7 @@ static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flag #define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */ #define PT_REGS_RC(x) ((x)->gprs[2]) #define PT_REGS_SP(x) ((x)->gprs[15]) +#define PT_REGS_IP(x) ((x)->ip) #elif defined(__aarch64__) @@ -106,6 +108,30 @@ static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flag #define PT_REGS_FP(x) ((x)->regs[29]) /* Works only with CONFIG_FRAME_POINTER */ #define PT_REGS_RC(x) ((x)->regs[0]) #define PT_REGS_SP(x) ((x)->sp) +#define PT_REGS_IP(x) ((x)->pc) + +#elif defined(__powerpc__) + +#define PT_REGS_PARM1(x) ((x)->gpr[3]) +#define PT_REGS_PARM2(x) ((x)->gpr[4]) +#define PT_REGS_PARM3(x) ((x)->gpr[5]) +#define PT_REGS_PARM4(x) ((x)->gpr[6]) +#define PT_REGS_PARM5(x) ((x)->gpr[7]) +#define PT_REGS_RC(x) ((x)->gpr[3]) +#define PT_REGS_SP(x) ((x)->sp) +#define PT_REGS_IP(x) ((x)->nip) #endif + +#ifdef __powerpc__ +#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) +#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP +#else +#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ \ + bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) +#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ \ + bpf_probe_read(&(ip), sizeof(ip), \ + (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) +#endif + #endif diff --git a/samples/bpf/spintest_kern.c b/samples/bpf/spintest_kern.c index 4b27619d91a4..ce0167d09cdc 100644 --- a/samples/bpf/spintest_kern.c +++ b/samples/bpf/spintest_kern.c @@ -34,7 +34,7 @@ struct bpf_map_def SEC("maps") stackmap = { #define PROG(foo) \ int foo(struct pt_regs *ctx) \ { \ - long v = ctx->ip, *val; \ + long v = PT_REGS_IP(ctx), *val; \ \ val = bpf_map_lookup_elem(&my_map, &v); \ bpf_map_update_elem(&my_map, &v, &v, BPF_ANY); \ diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c index 09c1adc27d42..6d6eefd0d465 100644 --- a/samples/bpf/tracex2_kern.c +++ b/samples/bpf/tracex2_kern.c @@ -27,10 +27,10 @@ int bpf_prog2(struct pt_regs *ctx) long init_val = 1; long *value; - /* x64/s390x specific: read ip of kfree_skb caller. + /* read ip of kfree_skb caller. * non-portable version of __builtin_return_address(0) */ - bpf_probe_read(&loc, sizeof(loc), (void *)PT_REGS_RET(ctx)); + BPF_KPROBE_READ_RET_IP(loc, ctx); value = bpf_map_lookup_elem(&my_map, &loc); if (value) diff --git a/samples/bpf/tracex4_kern.c b/samples/bpf/tracex4_kern.c index ac4671420cf1..6dd8e384de96 100644 --- a/samples/bpf/tracex4_kern.c +++ b/samples/bpf/tracex4_kern.c @@ -40,7 +40,7 @@ int bpf_prog2(struct pt_regs *ctx) long ip = 0; /* get ip address of kmem_cache_alloc_node() caller */ - bpf_probe_read(&ip, sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip))); + BPF_KRETPROBE_READ_RET_IP(ip, ctx); struct pair v = { .val = bpf_ktime_get_ns(), From 18fcf49f87f4b280d3cd695fc77766004b223af5 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 4 Apr 2016 10:32:48 -0700 Subject: [PATCH 28/44] net_sched: fix a memory leak in tc action Fixes: ddf97ccdd7cb ("net_sched: add network namespace support for tc actions") Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/act_api.h b/include/net/act_api.h index 2a19fe111c78..03e322b30218 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -135,6 +135,7 @@ void tcf_hashinfo_destroy(const struct tc_action_ops *ops, static inline void tc_action_net_exit(struct tc_action_net *tn) { tcf_hashinfo_destroy(tn->ops, tn->hinfo); + kfree(tn->hinfo); } int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, From 6ae81ced378820c4c6434b1dedba14a7122df310 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 4 Apr 2016 15:11:50 -0400 Subject: [PATCH 29/44] af_packet: tone down the Tx-ring unsupported spew. Trinity and other fuzzers can hit this WARN on far too easily, resulting in a tainted kernel that hinders automated fuzzing. Replace it with a rate-limited printk. Signed-off-by: Dave Jones Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1ecfa710ca98..f12c17f355d9 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4151,7 +4151,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, /* Opening a Tx-ring is NOT supported in TPACKET_V3 */ if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) { - WARN(1, "Tx-ring is not supported.\n"); + net_warn_ratelimited("Tx-ring is not supported.\n"); goto out; } From 52f95bbfcf72126a9f90a386a974fcbe6c6cae46 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Tue, 5 Apr 2016 08:46:57 +0200 Subject: [PATCH 30/44] stmmac: fix adjust link call in case of a switch is attached While initializing the phy, the stmmac driver sets the PHY_IGNORE_INTERRUPT so the PAL won't call the adjust hook that is needed, on some platforms, e.g. STi, to invoke the glue. The patch allows the PAL to poll the stmmac_adjust_link just one time in case of a switch is attached, setting later the PHY_IGNORE_INTERRUPT flag. Moving this kind of logic inside the adjust_link it makes sense to anticipate the check for EEE that will never initialized in this scenario. Reported-by: Gabriel Fernandez Signed-off-by: Giuseppe Cavallaro Tested-by: Gabriel Fernandez Cc: Alexandre TORGUE Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 78464fa7fe1f..fcbd4be562e2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -288,10 +288,6 @@ bool stmmac_eee_init(struct stmmac_priv *priv) (priv->pcs == STMMAC_PCS_RTBI)) goto out; - /* Never init EEE in case of a switch is attached */ - if (priv->phydev->is_pseudo_fixed_link) - goto out; - /* MAC core supports the EEE feature. */ if (priv->dma_cap.eee) { int tx_lpi_timer = priv->tx_lpi_timer; @@ -771,10 +767,16 @@ static void stmmac_adjust_link(struct net_device *dev) spin_unlock_irqrestore(&priv->lock, flags); - /* At this stage, it could be needed to setup the EEE or adjust some - * MAC related HW registers. - */ - priv->eee_enabled = stmmac_eee_init(priv); + if (phydev->is_pseudo_fixed_link) + /* Stop PHY layer to call the hook to adjust the link in case + * of a switch is attached to the stmmac driver. + */ + phydev->irq = PHY_IGNORE_INTERRUPT; + else + /* At this stage, init the EEE if supported. + * Never called in case of fixed_link. + */ + priv->eee_enabled = stmmac_eee_init(priv); } /** @@ -865,10 +867,6 @@ static int stmmac_init_phy(struct net_device *dev) return -ENODEV; } - /* If attached to a switch, there is no reason to poll phy handler */ - if (phydev->is_pseudo_fixed_link) - phydev->irq = PHY_IGNORE_INTERRUPT; - pr_debug("stmmac_init_phy: %s: attached to PHY (UID 0x%x)" " Link = %d\n", dev->name, phydev->phy_id, phydev->link); From 8ab18d71de8b07d2c4d6f984b718418c09ea45c5 Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Tue, 5 Apr 2016 01:59:32 -0700 Subject: [PATCH 31/44] VSOCK: Detach QP check should filter out non matching QPs. The check in vmci_transport_peer_detach_cb should only allow a detach when the qp handle of the transport matches the one in the detach message. Testing: Before this change, a detach from a peer on a different socket would cause an active stream socket to register a detach. Reviewed-by: George Zhang Signed-off-by: Jorgen Hansen Signed-off-by: David S. Miller --- net/vmw_vsock/vmci_transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 0a369bb440e7..662bdd20a748 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -842,7 +842,7 @@ static void vmci_transport_peer_detach_cb(u32 sub_id, * qp_handle. */ if (vmci_handle_is_invalid(e_payload->handle) || - vmci_handle_is_equal(trans->qp_handle, e_payload->handle)) + !vmci_handle_is_equal(trans->qp_handle, e_payload->handle)) return; /* We don't ask for delayed CBs when we subscribe to this event (we @@ -2154,7 +2154,7 @@ module_exit(vmci_transport_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMCI transport for Virtual Sockets"); -MODULE_VERSION("1.0.2.0-k"); +MODULE_VERSION("1.0.3.0-k"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("vmware_vsock"); MODULE_ALIAS_NETPROTO(PF_VSOCK); From 9f134c34fbce58d863f60995cc13728af28b741a Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 5 Apr 2016 15:32:53 +0530 Subject: [PATCH 32/44] lib/test_bpf: Fix JMP_JSET tests JMP_JSET tests incorrectly used BPF_JNE. Fix the same. Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: "David S. Miller" Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Cc: Paul Mackerras Signed-off-by: Naveen N. Rao Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- lib/test_bpf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 27a7a26b1ece..e76fa4d224d8 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -4303,7 +4303,7 @@ static struct bpf_test tests[] = { .u.insns_int = { BPF_ALU32_IMM(BPF_MOV, R0, 0), BPF_LD_IMM64(R1, 3), - BPF_JMP_IMM(BPF_JNE, R1, 2, 1), + BPF_JMP_IMM(BPF_JSET, R1, 2, 1), BPF_EXIT_INSN(), BPF_ALU32_IMM(BPF_MOV, R0, 1), BPF_EXIT_INSN(), @@ -4317,7 +4317,7 @@ static struct bpf_test tests[] = { .u.insns_int = { BPF_ALU32_IMM(BPF_MOV, R0, 0), BPF_LD_IMM64(R1, 3), - BPF_JMP_IMM(BPF_JNE, R1, 0xffffffff, 1), + BPF_JMP_IMM(BPF_JSET, R1, 0xffffffff, 1), BPF_EXIT_INSN(), BPF_ALU32_IMM(BPF_MOV, R0, 1), BPF_EXIT_INSN(), @@ -4474,7 +4474,7 @@ static struct bpf_test tests[] = { BPF_ALU32_IMM(BPF_MOV, R0, 0), BPF_LD_IMM64(R1, 3), BPF_LD_IMM64(R2, 2), - BPF_JMP_REG(BPF_JNE, R1, R2, 1), + BPF_JMP_REG(BPF_JSET, R1, R2, 1), BPF_EXIT_INSN(), BPF_ALU32_IMM(BPF_MOV, R0, 1), BPF_EXIT_INSN(), @@ -4489,7 +4489,7 @@ static struct bpf_test tests[] = { BPF_ALU32_IMM(BPF_MOV, R0, 0), BPF_LD_IMM64(R1, 3), BPF_LD_IMM64(R2, 0xffffffff), - BPF_JMP_REG(BPF_JNE, R1, R2, 1), + BPF_JMP_REG(BPF_JSET, R1, R2, 1), BPF_EXIT_INSN(), BPF_ALU32_IMM(BPF_MOV, R0, 1), BPF_EXIT_INSN(), From c7395d6bd7cc1ca6a47b733439c364f7a423a489 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 5 Apr 2016 15:32:54 +0530 Subject: [PATCH 33/44] lib/test_bpf: Add tests for unsigned BPF_JGT Unsigned Jump-if-Greater-Than. Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: "David S. Miller" Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Cc: Paul Mackerras Signed-off-by: Naveen N. Rao Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- lib/test_bpf.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index e76fa4d224d8..7e6fb49530dc 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -4222,6 +4222,20 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { + "JMP_JGT_K: Unsigned jump: if (-1 > 1) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -1), + BPF_JMP_IMM(BPF_JGT, R1, 1, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, /* BPF_JMP | BPF_JGE | BPF_K */ { "JMP_JGE_K: if (3 >= 2) return 1", @@ -4404,6 +4418,21 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { + "JMP_JGT_X: Unsigned jump: if (-1 > 1) return 1", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_LD_IMM64(R1, -1), + BPF_LD_IMM64(R2, 1), + BPF_JMP_REG(BPF_JGT, R1, R2, 1), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, /* BPF_JMP | BPF_JGE | BPF_X */ { "JMP_JGE_X: if (3 >= 2) return 1", From b64b50eac47046f1ecd23adac650317ccadf400f Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 5 Apr 2016 15:32:55 +0530 Subject: [PATCH 34/44] lib/test_bpf: Add test to check for result of 32-bit add that overflows BPF_ALU32 and BPF_ALU64 tests for adding two 32-bit values that results in 32-bit overflow. Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: "David S. Miller" Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Cc: Paul Mackerras Signed-off-by: Naveen N. Rao Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- lib/test_bpf.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 7e6fb49530dc..2fb31aa89b9d 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -2443,6 +2443,22 @@ static struct bpf_test tests[] = { { }, { { 0, 4294967295U } }, }, + { + "ALU_ADD_X: 2 + 4294967294 = 0", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_LD_IMM64(R1, 4294967294U), + BPF_ALU32_REG(BPF_ADD, R0, R1), + BPF_JMP_IMM(BPF_JEQ, R0, 0, 2), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, { "ALU64_ADD_X: 1 + 2 = 3", .u.insns_int = { @@ -2467,6 +2483,23 @@ static struct bpf_test tests[] = { { }, { { 0, 4294967295U } }, }, + { + "ALU64_ADD_X: 2 + 4294967294 = 4294967296", + .u.insns_int = { + BPF_LD_IMM64(R0, 2), + BPF_LD_IMM64(R1, 4294967294U), + BPF_LD_IMM64(R2, 4294967296ULL), + BPF_ALU64_REG(BPF_ADD, R0, R1), + BPF_JMP_REG(BPF_JEQ, R0, R2, 2), + BPF_MOV32_IMM(R0, 0), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, /* BPF_ALU | BPF_ADD | BPF_K */ { "ALU_ADD_K: 1 + 2 = 3", @@ -2501,6 +2534,21 @@ static struct bpf_test tests[] = { { }, { { 0, 4294967295U } }, }, + { + "ALU_ADD_K: 4294967294 + 2 = 0", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967294U), + BPF_ALU32_IMM(BPF_ADD, R0, 2), + BPF_JMP_IMM(BPF_JEQ, R0, 0, 2), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, { "ALU_ADD_K: 0 + (-1) = 0x00000000ffffffff", .u.insns_int = { @@ -2550,6 +2598,22 @@ static struct bpf_test tests[] = { { }, { { 0, 2147483647 } }, }, + { + "ALU64_ADD_K: 4294967294 + 2 = 4294967296", + .u.insns_int = { + BPF_LD_IMM64(R0, 4294967294U), + BPF_LD_IMM64(R1, 4294967296ULL), + BPF_ALU64_IMM(BPF_ADD, R0, 2), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, + }, { "ALU64_ADD_K: 2147483646 + -2147483647 = -1", .u.insns_int = { From 9c94f6c8e0d3d4b1e44eb7ad90849a1c964aabe7 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Tue, 5 Apr 2016 15:32:56 +0530 Subject: [PATCH 35/44] lib/test_bpf: Add additional BPF_ADD tests Some of these tests proved useful with the powerpc eBPF JIT port due to sign-extended 16-bit immediate loads. Though some of these aspects get covered in other tests, it is better to have explicit tests so as to quickly tag the precise problem. Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: "David S. Miller" Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Cc: Paul Mackerras Signed-off-by: Naveen N. Rao Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- lib/test_bpf.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 2fb31aa89b9d..8f22fbedc3a6 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -2565,6 +2565,70 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, + { + "ALU_ADD_K: 0 + 0xffff = 0xffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0), + BPF_LD_IMM64(R3, 0xffff), + BPF_ALU32_IMM(BPF_ADD, R2, 0xffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU_ADD_K: 0 + 0x7fffffff = 0x7fffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0), + BPF_LD_IMM64(R3, 0x7fffffff), + BPF_ALU32_IMM(BPF_ADD, R2, 0x7fffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU_ADD_K: 0 + 0x80000000 = 0x80000000", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0), + BPF_LD_IMM64(R3, 0x80000000), + BPF_ALU32_IMM(BPF_ADD, R2, 0x80000000), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU_ADD_K: 0 + 0x80008000 = 0x80008000", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0), + BPF_LD_IMM64(R3, 0x80008000), + BPF_ALU32_IMM(BPF_ADD, R2, 0x80008000), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, { "ALU64_ADD_K: 1 + 2 = 3", .u.insns_int = { @@ -2657,6 +2721,70 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, + { + "ALU64_ADD_K: 0 + 0xffff = 0xffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0), + BPF_LD_IMM64(R3, 0xffff), + BPF_ALU64_IMM(BPF_ADD, R2, 0xffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_ADD_K: 0 + 0x7fffffff = 0x7fffffff", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0), + BPF_LD_IMM64(R3, 0x7fffffff), + BPF_ALU64_IMM(BPF_ADD, R2, 0x7fffffff), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU64_ADD_K: 0 + 0x80000000 = 0xffffffff80000000", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0), + BPF_LD_IMM64(R3, 0xffffffff80000000LL), + BPF_ALU64_IMM(BPF_ADD, R2, 0x80000000), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, + { + "ALU_ADD_K: 0 + 0x80008000 = 0xffffffff80008000", + .u.insns_int = { + BPF_LD_IMM64(R2, 0x0), + BPF_LD_IMM64(R3, 0xffffffff80008000LL), + BPF_ALU64_IMM(BPF_ADD, R2, 0x80008000), + BPF_JMP_REG(BPF_JEQ, R2, R3, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x1 } }, + }, /* BPF_ALU | BPF_SUB | BPF_X */ { "ALU_SUB_X: 3 - 1 = 2", From eff471b1b9475cfda38078308e71a9e2d811ff44 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 5 Apr 2016 16:39:37 +0200 Subject: [PATCH 36/44] MAINTAINERS: intel-wired-lan list is moderated I got the following message: > Your mail to 'Intel-wired-lan' with the subject > > [PATCH net-next] net: intel: remove dead links > > Is being held until the list moderator can review it for approval. > > The reason it is being held: > > Post by non-member to a members-only list Mark the list as moderated. Signed-off-by: Jiri Benc Acked-by: Jeff Kirsher Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7ba7bc485d74..1d8ce9b9e4f1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5750,7 +5750,7 @@ R: Don Skidmore R: Bruce Allan R: John Ronciak R: Mitch Williams -L: intel-wired-lan@lists.osuosl.org +L: intel-wired-lan@lists.osuosl.org (moderated for non-subscribers) W: http://www.intel.com/support/feedback.htm W: http://e1000.sourceforge.net/ Q: http://patchwork.ozlabs.org/project/intel-wired-lan/list/ From 0a1a37b6d62e6864a77a82e925217c720f91f963 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Tue, 5 Apr 2016 07:41:11 -0700 Subject: [PATCH 37/44] net: add the AF_KCM entries to family name tables This is for the recent kcm driver, which introduces AF_KCM(41) in b7ac4eb(kcm: Kernel Connection Multiplexor module). Signed-off-by: Dexuan Cui Cc: Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/sock.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index b67b9aedb230..7e73c26b6bb4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -221,7 +221,8 @@ static const char *const af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , - "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX" + "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" , + "sk_lock-AF_MAX" }; static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , @@ -237,7 +238,8 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , - "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX" + "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" , + "slock-AF_MAX" }; static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , @@ -253,7 +255,8 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , - "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX" + "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , + "clock-AF_MAX" }; /* From a0ca153f98db8cf25298565a09e11fe9d82846ad Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 5 Apr 2016 09:13:39 -0700 Subject: [PATCH 38/44] GRE: Disable segmentation offloads w/ CSUM and we are encapsulated via FOU This patch fixes an issue I found in which we were dropping frames if we had enabled checksums on GRE headers that were encapsulated by either FOU or GUE. Without this patch I was barely able to get 1 Gb/s of throughput. With this patch applied I am now at least getting around 6 Gb/s. The issue is due to the fact that with FOU or GUE applied we do not provide a transport offset pointing to the GRE header, nor do we offload it in software as the GRE header is completely skipped by GSO and treated like a VXLAN or GENEVE type header. As such we need to prevent the stack from generating it and also prevent GRE from generating it via any interface we create. Fixes: c3483384ee511 ("gro: Allow tunnel stacking in the case of FOU/GUE") Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ++++- net/core/dev.c | 1 + net/ipv4/fou.c | 6 ++++++ net/ipv4/gre_offload.c | 8 ++++++++ net/ipv4/ip_gre.c | 13 ++++++++++--- 5 files changed, 29 insertions(+), 4 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cb0d5d09c2e4..8395308a2445 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2120,7 +2120,10 @@ struct napi_gro_cb { /* Used in foo-over-udp, set in udp[46]_gro_receive */ u8 is_ipv6:1; - /* 7 bit hole */ + /* Used in GRE, set in fou/gue_gro_receive */ + u8 is_fou:1; + + /* 6 bit hole */ /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; diff --git a/net/core/dev.c b/net/core/dev.c index b9bcbe77d913..77a71cd68535 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4439,6 +4439,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->encap_mark = 0; + NAPI_GRO_CB(skb)->is_fou = 0; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; /* Setup for GRO checksum validation */ diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 5a94aea280d3..a39068b4a4d9 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -203,6 +203,9 @@ static struct sk_buff **fou_gro_receive(struct sk_buff **head, */ NAPI_GRO_CB(skb)->encap_mark = 0; + /* Flag this frame as already having an outer encap header */ + NAPI_GRO_CB(skb)->is_fou = 1; + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); @@ -368,6 +371,9 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head, */ NAPI_GRO_CB(skb)->encap_mark = 0; + /* Flag this frame as already having an outer encap header */ + NAPI_GRO_CB(skb)->is_fou = 1; + rcu_read_lock(); offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[guehdr->proto_ctype]); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index c47539d04b88..6a5bd4317866 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -150,6 +150,14 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, if ((greh->flags & ~(GRE_KEY|GRE_CSUM)) != 0) goto out; + /* We can only support GRE_CSUM if we can track the location of + * the GRE header. In the case of FOU/GUE we cannot because the + * outer UDP header displaces the GRE header leaving us in a state + * of limbo. + */ + if ((greh->flags & GRE_CSUM) && NAPI_GRO_CB(skb)->is_fou) + goto out; + type = greh->protocol; rcu_read_lock(); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 31936d387cfd..af5d1f38217f 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -862,9 +862,16 @@ static void __gre_tunnel_init(struct net_device *dev) dev->hw_features |= GRE_FEATURES; if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { - /* TCP offload with GRE SEQ is not supported. */ - dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_GSO_SOFTWARE; + /* TCP offload with GRE SEQ is not supported, nor + * can we support 2 levels of outer headers requiring + * an update. + */ + if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || + (tunnel->encap.type == TUNNEL_ENCAP_NONE)) { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } + /* Can use a lockless transmit, unless we generate * output sequences */ From e98499ac63977c75331b198a18979944a532c9f2 Mon Sep 17 00:00:00 2001 From: shamir rabinovitch Date: Thu, 7 Apr 2016 07:57:35 -0400 Subject: [PATCH 39/44] RDS: memory allocated must be align to 8 Fix issue in 'rds_ib_cong_recv' when accessing unaligned memory allocated by 'rds_page_remainder_alloc' using uint64_t pointer. Signed-off-by: Shamir Rabinovitch Signed-off-by: David S. Miller --- net/rds/page.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rds/page.c b/net/rds/page.c index 616f21f4e7d7..e2b5a5832d3d 100644 --- a/net/rds/page.c +++ b/net/rds/page.c @@ -135,8 +135,8 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, if (rem->r_offset != 0) rds_stats_inc(s_page_remainder_hit); - rem->r_offset += bytes; - if (rem->r_offset == PAGE_SIZE) { + rem->r_offset += ALIGN(bytes, 8); + if (rem->r_offset >= PAGE_SIZE) { __free_page(rem->r_page); rem->r_page = NULL; } From 579ba855524c49d30929871b63c1f7637ffab4b6 Mon Sep 17 00:00:00 2001 From: shamir rabinovitch Date: Thu, 7 Apr 2016 07:57:36 -0400 Subject: [PATCH 40/44] RDS: fix congestion map corruption for PAGE_SIZE > 4k When PAGE_SIZE > 4k single page can contain 2 RDS fragments. If 'rds_ib_cong_recv' ignore the RDS fragment offset in to the page it then read the data fragment as far congestion map update and lead to corruption of the RDS connection far congestion map. Signed-off-by: Shamir Rabinovitch Signed-off-by: David S. Miller --- net/rds/ib_recv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 977fb86065b7..abc8cc805e8d 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -796,7 +796,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, addr = kmap_atomic(sg_page(&frag->f_sg)); - src = addr + frag_off; + src = addr + frag->f_sg.offset + frag_off; dst = (void *)map->m_page_addrs[map_page] + map_off; for (k = 0; k < to_copy; k += 8) { /* Record ports that became uncongested, ie From 3ba3458fb9c050718b95275a3310b74415e767e2 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Tue, 5 Apr 2016 18:41:08 +0200 Subject: [PATCH 41/44] ipv6: Count in extension headers in skb->network_header When sending a UDPv6 message longer than MTU, account for the length of fragmentable IPv6 extension headers in skb->network_header offset. Same as we do in alloc_new_skb path in __ip6_append_data(). This ensures that later on __ip6_make_skb() will make space in headroom for fragmentable extension headers: /* move skb->data to ip header from ext header */ if (skb->data < skb_network_header(skb)) __skb_pull(skb, skb_network_offset(skb)); Prevents a splat due to skb_under_panic: skbuff: skb_under_panic: text:ffffffff8143397b len:2126 put:14 \ head:ffff880005bacf50 data:ffff880005bacf4a tail:0x48 end:0xc0 dev:lo ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:104! invalid opcode: 0000 [#1] KASAN CPU: 0 PID: 160 Comm: reproducer Not tainted 4.6.0-rc2 #65 [...] Call Trace: [] skb_push+0x79/0x80 [] eth_header+0x2b/0x100 [] neigh_resolve_output+0x210/0x310 [] ip6_finish_output2+0x4a7/0x7c0 [] ip6_output+0x16a/0x280 [] ip6_local_out+0xb1/0xf0 [] ip6_send_skb+0x45/0xd0 [] udp_v6_send_skb+0x246/0x5d0 [] udpv6_sendmsg+0xa6e/0x1090 [...] Reported-by: Ji Jianwen Signed-off-by: Jakub Sitnicki Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 9428345d3a07..bc972e7152c7 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1090,8 +1090,8 @@ static inline int ip6_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, - int transhdrlen, int mtu, unsigned int flags, - const struct flowi6 *fl6) + int exthdrlen, int transhdrlen, int mtu, + unsigned int flags, const struct flowi6 *fl6) { struct sk_buff *skb; @@ -1116,7 +1116,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb_put(skb, fragheaderlen + transhdrlen); /* initialize network header pointer */ - skb_reset_network_header(skb); + skb_set_network_header(skb, exthdrlen); /* initialize protocol header pointer */ skb->transport_header = skb->network_header + fragheaderlen; @@ -1358,7 +1358,7 @@ static int __ip6_append_data(struct sock *sk, (rt->dst.dev->features & NETIF_F_UFO) && (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, - hh_len, fragheaderlen, + hh_len, fragheaderlen, exthdrlen, transhdrlen, mtu, flags, fl6); if (err) goto error; From 94a57f1f8a9de90ab4b0f8748361ff8be706c80c Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Thu, 7 Apr 2016 21:28:38 -0700 Subject: [PATCH 42/44] mpls: find_outdev: check for err ptr in addition to NULL check find_outdev calls inet{,6}_fib_lookup_dev() or dev_get_by_index() to find the output device. In case of an error, inet{,6}_fib_lookup_dev() returns error pointer and dev_get_by_index() returns NULL. But the function only checks for NULL and thus can end up calling dev_put on an ERR_PTR. This patch adds an additional check for err ptr after the NULL check. Before: Trying to add an mpls route with no oif from user, no available path to 10.1.1.8 and no default route: $ip -f mpls route add 100 as 200 via inet 10.1.1.8 [ 822.337195] BUG: unable to handle kernel NULL pointer dereference at 00000000000003a3 [ 822.340033] IP: [] mpls_nh_assign_dev+0x10b/0x182 [ 822.340033] PGD 1db38067 PUD 1de9e067 PMD 0 [ 822.340033] Oops: 0000 [#1] SMP [ 822.340033] Modules linked in: [ 822.340033] CPU: 0 PID: 11148 Comm: ip Not tainted 4.5.0-rc7+ #54 [ 822.340033] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5.1-0-g8936dbb-20141113_115728-nilsson.home.kraxel.org 04/01/2014 [ 822.340033] task: ffff88001db82580 ti: ffff88001dad4000 task.ti: ffff88001dad4000 [ 822.340033] RIP: 0010:[] [] mpls_nh_assign_dev+0x10b/0x182 [ 822.340033] RSP: 0018:ffff88001dad7a88 EFLAGS: 00010282 [ 822.340033] RAX: ffffffffffffff9b RBX: ffffffffffffff9b RCX: 0000000000000002 [ 822.340033] RDX: 00000000ffffff9b RSI: 0000000000000008 RDI: 0000000000000000 [ 822.340033] RBP: ffff88001ddc9ea0 R08: ffff88001e9f1768 R09: 0000000000000000 [ 822.340033] R10: ffff88001d9c1100 R11: ffff88001e3c89f0 R12: ffffffff8187e0c0 [ 822.340033] R13: ffffffff8187e0c0 R14: ffff88001ddc9e80 R15: 0000000000000004 [ 822.340033] FS: 00007ff9ed798700(0000) GS:ffff88001fc00000(0000) knlGS:0000000000000000 [ 822.340033] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 822.340033] CR2: 00000000000003a3 CR3: 000000001de89000 CR4: 00000000000006f0 [ 822.340033] Stack: [ 822.340033] 0000000000000000 0000000100000000 0000000000000000 0000000000000000 [ 822.340033] 0000000000000000 0801010a00000000 0000000000000000 0000000000000000 [ 822.340033] 0000000000000004 ffffffff8148749b ffffffff8187e0c0 000000000000001c [ 822.340033] Call Trace: [ 822.340033] [] ? mpls_rt_alloc+0x2b/0x3e [ 822.340033] [] ? mpls_rtm_newroute+0x358/0x3e2 [ 822.340033] [] ? get_page+0x5/0xa [ 822.340033] [] ? rtnetlink_rcv_msg+0x17e/0x191 [ 822.340033] [] ? __kmalloc_track_caller+0x8c/0x9e [ 822.340033] [] ? rht_key_hashfn.isra.20.constprop.57+0x14/0x1f [ 822.340033] [] ? __rtnl_unlock+0xc/0xc [ 822.340033] [] ? netlink_rcv_skb+0x36/0x82 [ 822.340033] [] ? rtnetlink_rcv+0x1f/0x28 [ 822.340033] [] ? netlink_unicast+0x106/0x189 [ 822.340033] [] ? netlink_sendmsg+0x27f/0x2c8 [ 822.340033] [] ? sock_sendmsg_nosec+0x10/0x1b [ 822.340033] [] ? ___sys_sendmsg+0x182/0x1e3 [ 822.340033] [] ? __alloc_pages_nodemask+0x11c/0x1e4 [ 822.340033] [] ? PageAnon+0x5/0xd [ 822.340033] [] ? __page_set_anon_rmap+0x45/0x52 [ 822.340033] [] ? get_page+0x5/0xa [ 822.340033] [] ? __lru_cache_add+0x1a/0x3a [ 822.340033] [] ? current_kernel_time64+0x9/0x30 [ 822.340033] [] ? __sys_sendmsg+0x3c/0x5a [ 822.340033] [] ? entry_SYSCALL_64_fastpath+0x12/0x6a [ 822.340033] Code: 83 08 04 00 00 65 ff 00 48 8b 3c 24 e8 40 7c f2 ff eb 13 48 c7 c3 9f ff ff ff eb 0f 89 ce e8 f1 ae f1 ff 48 89 c3 48 85 db 74 15 <48> 8b 83 08 04 00 00 65 ff 08 48 81 fb 00 f0 ff ff 76 0d eb 07 [ 822.340033] RIP [] mpls_nh_assign_dev+0x10b/0x182 [ 822.340033] RSP [ 822.340033] CR2: 00000000000003a3 [ 822.435363] ---[ end trace 98cc65e6f6b8bf11 ]--- After patch: $ip -f mpls route add 100 as 200 via inet 10.1.1.8 RTNETLINK answers: Network is unreachable Signed-off-by: Roopa Prabhu Reported-by: David Miller Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index b18c5ed42d95..0b80a7140cc4 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -543,6 +543,9 @@ static struct net_device *find_outdev(struct net *net, if (!dev) return ERR_PTR(-ENODEV); + if (IS_ERR(dev)) + return dev; + /* The caller is holding rtnl anyways, so release the dev reference */ dev_put(dev); From 016adb7260f481168c03e09f785184d6d5278894 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 8 Apr 2016 13:26:48 +0800 Subject: [PATCH 43/44] tuntap: restore default qdisc After commit f84bb1eac027 ("net: fix IFF_NO_QUEUE for drivers using alloc_netdev"), default qdisc was changed to noqueue because tuntap does not set tx_queue_len during .setup(). This patch restores default qdisc by setting tx_queue_len in tun_setup(). Fixes: f84bb1eac027 ("net: fix IFF_NO_QUEUE for drivers using alloc_netdev") Cc: Phil Sutter Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Acked-by: Phil Sutter Signed-off-by: David S. Miller --- drivers/net/tun.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 510e90a6bb26..2c9e45f50edb 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1015,7 +1015,6 @@ static void tun_net_init(struct net_device *dev) /* Zero header length */ dev->type = ARPHRD_NONE; dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; - dev->tx_queue_len = TUN_READQ_SIZE; /* We prefer our own queue length */ break; case IFF_TAP: @@ -1027,7 +1026,6 @@ static void tun_net_init(struct net_device *dev) eth_hw_addr_random(dev); - dev->tx_queue_len = TUN_READQ_SIZE; /* We prefer our own queue length */ break; } } @@ -1481,6 +1479,8 @@ static void tun_setup(struct net_device *dev) dev->ethtool_ops = &tun_ethtool_ops; dev->destructor = tun_free_netdev; + /* We prefer our own queue length */ + dev->tx_queue_len = TUN_READQ_SIZE; } /* Trivial set of netlink ops to allow deleting tun or tap From 3430284feab3caacf94984400c6e84d0858de880 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 5 Apr 2016 13:43:53 -0700 Subject: [PATCH 44/44] bridge, netem: mark mailing lists as moderated I moderate these (lightly loaded) lists to block spam. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1d8ce9b9e4f1..8cc99ff05b97 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4302,7 +4302,7 @@ F: drivers/net/ethernet/agere/ ETHERNET BRIDGE M: Stephen Hemminger -L: bridge@lists.linux-foundation.org +L: bridge@lists.linux-foundation.org (moderated for non-subscribers) L: netdev@vger.kernel.org W: http://www.linuxfoundation.org/en/Net:Bridge S: Maintained @@ -7575,7 +7575,7 @@ F: drivers/infiniband/hw/nes/ NETEM NETWORK EMULATOR M: Stephen Hemminger -L: netem@lists.linux-foundation.org +L: netem@lists.linux-foundation.org (moderated for non-subscribers) S: Maintained F: net/sched/sch_netem.c