From 570b90fa230b8021f51a67fab2245fe8df6fe37d Mon Sep 17 00:00:00 2001 From: Andrew Lutomirski Date: Mon, 12 Dec 2016 12:55:55 -0800 Subject: [PATCH 01/35] orinoco: Use shash instead of ahash for MIC calculations Eric Biggers pointed out that the orinoco driver pointed scatterlists at the stack. Fix it by switching from ahash to shash. The result should be simpler, faster, and more correct. kvalo: cherry picked from commit 1fef293b8a9850cfa124a53c1d8878d355010403 as I accidentally applied this patch to wireless-drivers-next when I was supposed to apply this wireless-drivers Cc: stable@vger.kernel.org # 4.9 only Reported-by: Eric Biggers Signed-off-by: Andy Lutomirski Signed-off-by: Kalle Valo --- drivers/net/wireless/intersil/orinoco/mic.c | 44 +++++++++++-------- drivers/net/wireless/intersil/orinoco/mic.h | 3 +- .../net/wireless/intersil/orinoco/orinoco.h | 4 +- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/intersil/orinoco/mic.c b/drivers/net/wireless/intersil/orinoco/mic.c index bc7397d709d3..08bc7822f820 100644 --- a/drivers/net/wireless/intersil/orinoco/mic.c +++ b/drivers/net/wireless/intersil/orinoco/mic.c @@ -16,7 +16,7 @@ /********************************************************************/ int orinoco_mic_init(struct orinoco_private *priv) { - priv->tx_tfm_mic = crypto_alloc_ahash("michael_mic", 0, + priv->tx_tfm_mic = crypto_alloc_shash("michael_mic", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tx_tfm_mic)) { printk(KERN_DEBUG "orinoco_mic_init: could not allocate " @@ -25,7 +25,7 @@ int orinoco_mic_init(struct orinoco_private *priv) return -ENOMEM; } - priv->rx_tfm_mic = crypto_alloc_ahash("michael_mic", 0, + priv->rx_tfm_mic = crypto_alloc_shash("michael_mic", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->rx_tfm_mic)) { printk(KERN_DEBUG "orinoco_mic_init: could not allocate " @@ -40,17 +40,16 @@ int orinoco_mic_init(struct orinoco_private *priv) void orinoco_mic_free(struct orinoco_private *priv) { if (priv->tx_tfm_mic) - crypto_free_ahash(priv->tx_tfm_mic); + crypto_free_shash(priv->tx_tfm_mic); if (priv->rx_tfm_mic) - crypto_free_ahash(priv->rx_tfm_mic); + crypto_free_shash(priv->rx_tfm_mic); } -int orinoco_mic(struct crypto_ahash *tfm_michael, u8 *key, +int orinoco_mic(struct crypto_shash *tfm_michael, u8 *key, u8 *da, u8 *sa, u8 priority, u8 *data, size_t data_len, u8 *mic) { - AHASH_REQUEST_ON_STACK(req, tfm_michael); - struct scatterlist sg[2]; + SHASH_DESC_ON_STACK(desc, tfm_michael); u8 hdr[ETH_HLEN + 2]; /* size of header + padding */ int err; @@ -67,18 +66,27 @@ int orinoco_mic(struct crypto_ahash *tfm_michael, u8 *key, hdr[ETH_ALEN * 2 + 2] = 0; hdr[ETH_ALEN * 2 + 3] = 0; - /* Use scatter gather to MIC header and data in one go */ - sg_init_table(sg, 2); - sg_set_buf(&sg[0], hdr, sizeof(hdr)); - sg_set_buf(&sg[1], data, data_len); + desc->tfm = tfm_michael; + desc->flags = 0; - if (crypto_ahash_setkey(tfm_michael, key, MIC_KEYLEN)) - return -1; + err = crypto_shash_setkey(tfm_michael, key, MIC_KEYLEN); + if (err) + return err; + + err = crypto_shash_init(desc); + if (err) + return err; + + err = crypto_shash_update(desc, hdr, sizeof(hdr)); + if (err) + return err; + + err = crypto_shash_update(desc, data, data_len); + if (err) + return err; + + err = crypto_shash_final(desc, mic); + shash_desc_zero(desc); - ahash_request_set_tfm(req, tfm_michael); - ahash_request_set_callback(req, 0, NULL, NULL); - ahash_request_set_crypt(req, sg, mic, data_len + sizeof(hdr)); - err = crypto_ahash_digest(req); - ahash_request_zero(req); return err; } diff --git a/drivers/net/wireless/intersil/orinoco/mic.h b/drivers/net/wireless/intersil/orinoco/mic.h index ce731d05cc98..e8724e889219 100644 --- a/drivers/net/wireless/intersil/orinoco/mic.h +++ b/drivers/net/wireless/intersil/orinoco/mic.h @@ -6,6 +6,7 @@ #define _ORINOCO_MIC_H_ #include +#include #define MICHAEL_MIC_LEN 8 @@ -15,7 +16,7 @@ struct crypto_ahash; int orinoco_mic_init(struct orinoco_private *priv); void orinoco_mic_free(struct orinoco_private *priv); -int orinoco_mic(struct crypto_ahash *tfm_michael, u8 *key, +int orinoco_mic(struct crypto_shash *tfm_michael, u8 *key, u8 *da, u8 *sa, u8 priority, u8 *data, size_t data_len, u8 *mic); diff --git a/drivers/net/wireless/intersil/orinoco/orinoco.h b/drivers/net/wireless/intersil/orinoco/orinoco.h index 2f0c84b1c440..5fa1c3e3713f 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco.h +++ b/drivers/net/wireless/intersil/orinoco/orinoco.h @@ -152,8 +152,8 @@ struct orinoco_private { u8 *wpa_ie; int wpa_ie_len; - struct crypto_ahash *rx_tfm_mic; - struct crypto_ahash *tx_tfm_mic; + struct crypto_shash *rx_tfm_mic; + struct crypto_shash *tx_tfm_mic; unsigned int wpa_enabled:1; unsigned int tkip_cm_active:1; From 60f59ce0278557f7896d5158ae6d12a4855a72cc Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 21 Dec 2016 11:18:55 -0600 Subject: [PATCH 02/35] rtlwifi: rtl_usb: Fix missing entry in USB driver's private data These drivers need to be able to reference "struct ieee80211_hw" from the driver's private data, and vice versa. The USB driver failed to store the address of ieee80211_hw in the private data. Although this bug has been present for a long time, it was not exposed until commit ba9f93f82aba ("rtlwifi: Fix enter/exit power_save"). Fixes: ba9f93f82aba ("rtlwifi: Fix enter/exit power_save") Signed-off-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index 0a508649903d..49015b05f3d1 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -1063,6 +1063,7 @@ int rtl_usb_probe(struct usb_interface *intf, return -ENOMEM; } rtlpriv = hw->priv; + rtlpriv->hw = hw; rtlpriv->usb_data = kzalloc(RTL_USB_MAX_RX_COUNT * sizeof(u32), GFP_KERNEL); if (!rtlpriv->usb_data) From 811a919135b980bac8009d042acdccf10dc1ef5e Mon Sep 17 00:00:00 2001 From: Zefir Kurtisi Date: Fri, 6 Jan 2017 12:14:48 +0100 Subject: [PATCH 03/35] phy state machine: failsafe leave invalid RUNNING state While in RUNNING state, phy_state_machine() checks for link changes by comparing phydev->link before and after calling phy_read_status(). This works as long as it is guaranteed that phydev->link is never changed outside the phy_state_machine(). If in some setups this happens, it causes the state machine to miss a link loss and remain RUNNING despite phydev->link being 0. This has been observed running a dsa setup with a process continuously polling the link states over ethtool each second (SNMPD RFC-1213 agent). Disconnecting the link on a phy followed by a ETHTOOL_GSET causes dsa_slave_get_settings() / dsa_slave_get_link_ksettings() to call phy_read_status() and with that modify the link status - and with that bricking the phy state machine. This patch adds a fail-safe check while in RUNNING, which causes to move to CHANGELINK when the link is gone and we are still RUNNING. Signed-off-by: Zefir Kurtisi Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 25f93a98863b..48da6e93c3f7 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1065,6 +1065,15 @@ void phy_state_machine(struct work_struct *work) if (old_link != phydev->link) phydev->state = PHY_CHANGELINK; } + /* + * Failsafe: check that nobody set phydev->link=0 between two + * poll cycles, otherwise we won't leave RUNNING state as long + * as link remains down. + */ + if (!phydev->link && phydev->state == PHY_RUNNING) { + phydev->state = PHY_CHANGELINK; + phydev_err(phydev, "no link in PHY_RUNNING\n"); + } break; case PHY_CHANGELINK: err = phy_read_status(phydev); From 67c408cfa8862fe7e45b3a1f762f7140e03b7217 Mon Sep 17 00:00:00 2001 From: Alexander Alemayhu Date: Sat, 7 Jan 2017 23:53:00 +0100 Subject: [PATCH 04/35] ipv6: fix typos o s/approriate/appropriate o s/discouvery/discovery Signed-off-by: Alexander Alemayhu Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8417c41d8ec8..ce5aaf448c54 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1464,7 +1464,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, struct fib6_node *fn; /* Get the "current" route for this destination and - * check if the redirect has come from approriate router. + * check if the redirect has come from appropriate router. * * RFC 4861 specifies that redirects should only be * accepted if they come from the nexthop to the target. @@ -2768,7 +2768,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) old MTU is the lowest MTU in the path, update the route PMTU to reflect the increase. In this case if the other nodes' MTU also have the lowest MTU, TOO BIG MESSAGE will be lead to - PMTU discouvery. + PMTU discovery. */ if (rt->dst.dev == arg->dev && dst_metric_raw(&rt->dst, RTAX_MTU) && From b007f09072ca8afa118ade333e717ba443e8d807 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Mon, 9 Jan 2017 10:45:49 +0300 Subject: [PATCH 05/35] ipv4: make tcp_notsent_lowat sysctl knob behave as true unsigned int > cat /proc/sys/net/ipv4/tcp_notsent_lowat -1 > echo 4294967295 > /proc/sys/net/ipv4/tcp_notsent_lowat -bash: echo: write error: Invalid argument > echo -2147483648 > /proc/sys/net/ipv4/tcp_notsent_lowat > cat /proc/sys/net/ipv4/tcp_notsent_lowat -2147483648 but in documentation we have "tcp_notsent_lowat - UNSIGNED INTEGER" v2: simplify to just proc_douintvec Signed-off-by: Pavel Tikhomirov Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 22cbd61079b5..b2fa498b15d1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -951,7 +951,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_tcp_notsent_lowat, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_douintvec, }, { .procname = "tcp_tw_reuse", From ce7e40c432ba84da104438f6799d460a4cad41bc Mon Sep 17 00:00:00 2001 From: Vlad Tsyrklevich Date: Mon, 9 Jan 2017 20:57:48 +0700 Subject: [PATCH 06/35] net/appletalk: Fix kernel memory disclosure ipddp_route structs contain alignment padding so kernel heap memory is leaked when they are copied to user space in ipddp_ioctl(SIOCFINDIPDDPRT). Change kmalloc() to kzalloc() to clear that memory. Signed-off-by: Vlad Tsyrklevich Signed-off-by: David S. Miller --- drivers/net/appletalk/ipddp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c index b8c293373ecc..a306de4318d7 100644 --- a/drivers/net/appletalk/ipddp.c +++ b/drivers/net/appletalk/ipddp.c @@ -190,7 +190,7 @@ static netdev_tx_t ipddp_xmit(struct sk_buff *skb, struct net_device *dev) */ static int ipddp_create(struct ipddp_route *new_rt) { - struct ipddp_route *rt = kmalloc(sizeof(*rt), GFP_KERNEL); + struct ipddp_route *rt = kzalloc(sizeof(*rt), GFP_KERNEL); if (rt == NULL) return -ENOMEM; From 2ebae8bd60188f57e26e95e7fde6b8943297d348 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 9 Jan 2017 15:17:27 +0100 Subject: [PATCH 07/35] net: phy: Add Meson GXL PHY hardware dependency As I understand it the Meson GXL PHY driver is only useful on one architecture so only make it visible on that architecture. Signed-off-by: Jean Delvare Fixes: 7334b3e47aee ("net: phy: Add Meson GXL Internal PHY driver") Cc: Neil Armstrong Cc: Florian Fainelli Cc: Andrew Lunn Cc: David S. Miller Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index d361835b315d..8dbd59baa34d 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -279,6 +279,7 @@ config MARVELL_PHY config MESON_GXL_PHY tristate "Amlogic Meson GXL Internal PHY" + depends on ARCH_MESON || COMPILE_TEST ---help--- Currently has a driver for the Amlogic Meson GXL Internal PHY From 6bb629db5e7daa619f5242b6ad93e4dd9bf7432c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 Jan 2017 08:51:32 -0800 Subject: [PATCH 08/35] tcp: do not export tcp_peer_is_proven() After commit 1fb6f159fd21 ("tcp: add tcp_conn_request"), tcp_peer_is_proven() no longer needs to be exported. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_metrics.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index d46f4d5b1c62..ba8f02d0f283 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -606,7 +606,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, return ret; } -EXPORT_SYMBOL_GPL(tcp_peer_is_proven); void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst) { From dc647ec88e029307e60e6bf9988056605f11051a Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 10 Jan 2017 09:30:51 +0100 Subject: [PATCH 09/35] net: socket: Make unnecessarily global sockfs_setattr() static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sockfs_setattr() static as it is not used outside of net/socket.c This fixes the following GCC warning: net/socket.c:534:5: warning: no previous prototype for ‘sockfs_setattr’ [-Wmissing-prototypes] Fixes: 86741ec25462 ("net: core: Add a UID field to struct sock.") Cc: Lorenzo Colitti Signed-off-by: Tobias Klauser Acked-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index a8c2307590b8..0758e13754e2 100644 --- a/net/socket.c +++ b/net/socket.c @@ -533,7 +533,7 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, return used; } -int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) +static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) { int err = simple_setattr(dentry, iattr); From 8fb280616878b81c0790a0c33acbeec59c5711f4 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 10 Jan 2017 17:04:06 +0800 Subject: [PATCH 10/35] r8152: split rtl8152_suspend function Split rtl8152_suspend() into rtl8152_system_suspend() and rtl8152_rumtime_suspend(). Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 57 +++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 7dc61228c55b..c5e6d88de4e4 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3576,39 +3576,62 @@ static bool delay_autosuspend(struct r8152 *tp) return false; } -static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message) +static int rtl8152_rumtime_suspend(struct r8152 *tp) { - struct r8152 *tp = usb_get_intfdata(intf); struct net_device *netdev = tp->netdev; int ret = 0; - mutex_lock(&tp->control); - - if (PMSG_IS_AUTO(message)) { - if (netif_running(netdev) && delay_autosuspend(tp)) { + if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) { + if (delay_autosuspend(tp)) { ret = -EBUSY; goto out1; } - set_bit(SELECTIVE_SUSPEND, &tp->flags); - } else { - netif_device_detach(netdev); + clear_bit(WORK_ENABLE, &tp->flags); + usb_kill_urb(tp->intr_urb); + napi_disable(&tp->napi); + rtl_stop_rx(tp); + tp->rtl_ops.autosuspend_en(tp, true); + napi_enable(&tp->napi); } + set_bit(SELECTIVE_SUSPEND, &tp->flags); + +out1: + return ret; +} + +static int rtl8152_system_suspend(struct r8152 *tp) +{ + struct net_device *netdev = tp->netdev; + int ret = 0; + + netif_device_detach(netdev); + if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) { clear_bit(WORK_ENABLE, &tp->flags); usb_kill_urb(tp->intr_urb); napi_disable(&tp->napi); - if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { - rtl_stop_rx(tp); - tp->rtl_ops.autosuspend_en(tp, true); - } else { - cancel_delayed_work_sync(&tp->schedule); - tp->rtl_ops.down(tp); - } + cancel_delayed_work_sync(&tp->schedule); + tp->rtl_ops.down(tp); napi_enable(&tp->napi); } -out1: + + return ret; +} + +static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message) +{ + struct r8152 *tp = usb_get_intfdata(intf); + int ret; + + mutex_lock(&tp->control); + + if (PMSG_IS_AUTO(message)) + ret = rtl8152_rumtime_suspend(tp); + else + ret = rtl8152_system_suspend(tp); + mutex_unlock(&tp->control); return ret; From 75dc692eda114cb234a46cb11893a9c3ea520934 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 10 Jan 2017 17:04:07 +0800 Subject: [PATCH 11/35] r8152: fix rx issue for runtime suspend Pause the rx and make sure the rx fifo is empty when the autosuspend occurs. If the rx data comes when the driver is canceling the rx urb, the host controller would stop getting the data from the device and continue it after next rx urb is submitted. That is, one continuing data is split into two different urb buffers. That let the driver take the data as a rx descriptor, and unexpected behavior happens. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index c5e6d88de4e4..be418563cb18 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3582,17 +3582,42 @@ static int rtl8152_rumtime_suspend(struct r8152 *tp) int ret = 0; if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) { + u32 rcr = 0; + if (delay_autosuspend(tp)) { ret = -EBUSY; goto out1; } + if (netif_carrier_ok(netdev)) { + u32 ocp_data; + + rcr = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR); + ocp_data = rcr & ~RCR_ACPT_ALL; + ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); + rxdy_gated_en(tp, true); + ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, + PLA_OOB_CTRL); + if (!(ocp_data & RXFIFO_EMPTY)) { + rxdy_gated_en(tp, false); + ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, rcr); + ret = -EBUSY; + goto out1; + } + } + clear_bit(WORK_ENABLE, &tp->flags); usb_kill_urb(tp->intr_urb); - napi_disable(&tp->napi); - rtl_stop_rx(tp); + tp->rtl_ops.autosuspend_en(tp, true); - napi_enable(&tp->napi); + + if (netif_carrier_ok(netdev)) { + napi_disable(&tp->napi); + rtl_stop_rx(tp); + rxdy_gated_en(tp, false); + ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, rcr); + napi_enable(&tp->napi); + } } set_bit(SELECTIVE_SUSPEND, &tp->flags); From d9584d8ccc06ba98f4fad8ec720de66b6659fd35 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 Jan 2017 11:18:01 -0800 Subject: [PATCH 12/35] net: skb_flow_get_be16() can be static Removes following sparse complain : net/core/flow_dissector.c:70:8: warning: symbol 'skb_flow_get_be16' was not declared. Should it be static? Fixes: 972d3876faa8 ("flow dissector: ICMP support") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index fe4e1531976c..1b7673aac59d 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -67,8 +67,8 @@ EXPORT_SYMBOL(skb_flow_dissector_init); * The function will try to retrieve a be32 entity at * offset poff */ -__be16 skb_flow_get_be16(const struct sk_buff *skb, int poff, void *data, - int hlen) +static __be16 skb_flow_get_be16(const struct sk_buff *skb, int poff, + void *data, int hlen) { __be16 *u, _u; From faf3a932fbeb77860226a8323eacb835edc98648 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 9 Jan 2017 11:58:34 -0800 Subject: [PATCH 13/35] net: dsa: Ensure validity of dst->ds[0] It is perfectly possible to have non zero indexed switches being present in a DSA switch tree, in such a case, we will be deferencing a NULL pointer while dsa_cpu_port_ethtool_{setup,restore}. Be more defensive and ensure that dst->ds[0] is valid before doing anything with it. Fixes: 0c73c523cf73 ("net: dsa: Initialize CPU port ethtool ops per tree") Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/dsa2.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 5fff951a0a49..da3862124545 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -394,9 +394,11 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst) return err; } - err = dsa_cpu_port_ethtool_setup(dst->ds[0]); - if (err) - return err; + if (dst->ds[0]) { + err = dsa_cpu_port_ethtool_setup(dst->ds[0]); + if (err) + return err; + } /* If we use a tagging format that doesn't have an ethertype * field, make sure that all packets from this point on get @@ -433,7 +435,8 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst) dsa_ds_unapply(dst, ds); } - dsa_cpu_port_ethtool_restore(dst->ds[0]); + if (dst->ds[0]) + dsa_cpu_port_ethtool_restore(dst->ds[0]); pr_info("DSA: tree %d unapplied\n", dst->tree); dst->applied = false; From 3512a1ad56174308a9fd3e10f4b1e3e152e9ec01 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 9 Jan 2017 14:31:58 -0800 Subject: [PATCH 14/35] net: qrtr: Mark 'buf' as little endian Failure to mark this pointer as __le32 causes checkers like sparse to complain: net/qrtr/qrtr.c:274:16: warning: incorrect type in assignment (different base types) net/qrtr/qrtr.c:274:16: expected unsigned int [unsigned] [usertype] net/qrtr/qrtr.c:274:16: got restricted __le32 [usertype] net/qrtr/qrtr.c:275:16: warning: incorrect type in assignment (different base types) net/qrtr/qrtr.c:275:16: expected unsigned int [unsigned] [usertype] net/qrtr/qrtr.c:275:16: got restricted __le32 [usertype] net/qrtr/qrtr.c:276:16: warning: incorrect type in assignment (different base types) net/qrtr/qrtr.c:276:16: expected unsigned int [unsigned] [usertype] net/qrtr/qrtr.c:276:16: got restricted __le32 [usertype] Silence it. Cc: Bjorn Andersson Signed-off-by: Stephen Boyd Acked-by: Bjorn Andersson Signed-off-by: David S. Miller --- net/qrtr/qrtr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index c985ecbe9bd6..ae5ac175b2be 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -252,7 +252,7 @@ static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, const int pkt_len = 20; struct qrtr_hdr *hdr; struct sk_buff *skb; - u32 *buf; + __le32 *buf; skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL); if (!skb) @@ -269,7 +269,7 @@ static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, hdr->dst_node_id = cpu_to_le32(dst_node); hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL); - buf = (u32 *)skb_put(skb, pkt_len); + buf = (__le32 *)skb_put(skb, pkt_len); memset(buf, 0, pkt_len); buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX); buf[1] = cpu_to_le32(src_node); From 5d722b3024f6762addb8642ffddc9f275b5107ae Mon Sep 17 00:00:00 2001 From: "Anna, Suman" Date: Mon, 9 Jan 2017 21:48:56 -0600 Subject: [PATCH 15/35] net: add the AF_QIPCRTR entries to family name tables Commit bdabad3e363d ("net: Add Qualcomm IPC router") introduced a new address family. Update the family name tables accordingly so that the lockdep initialization can use the proper names for this family. Cc: Courtney Cavin Cc: Bjorn Andersson Signed-off-by: Suman Anna Signed-off-by: David S. Miller --- net/core/sock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index f560e0826009..4eca27dc5c94 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -222,7 +222,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" , - "sk_lock-AF_MAX" + "sk_lock-AF_QIPCRTR", "sk_lock-AF_MAX" }; static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , @@ -239,7 +239,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" , - "slock-AF_MAX" + "slock-AF_QIPCRTR", "slock-AF_MAX" }; static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , @@ -256,7 +256,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" , - "clock-AF_MAX" + "clock-AF_QIPCRTR", "clock-AF_MAX" }; /* From dc5367bcc556e97555fc94a32cd1aadbebdff47e Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Tue, 10 Jan 2017 17:10:34 +0100 Subject: [PATCH 16/35] net/af_iucv: don't use paged skbs for TX on HiperSockets With commit e53743994e21 ("af_iucv: use paged SKBs for big outbound messages"), we transmit paged skbs for both of AF_IUCV's transport modes (IUCV or HiperSockets). The qeth driver for Layer 3 HiperSockets currently doesn't support NETIF_F_SG, so these skbs would just be linearized again by the stack. Avoid that overhead by using paged skbs only for IUCV transport. cc stable, since this also circumvents a significant skb leak when sending large messages (where the skb then needs to be linearized). Signed-off-by: Julian Wiedmann Signed-off-by: Ursula Braun Cc: # v4.8+ Fixes: e53743994e21 ("af_iucv: use paged SKBs for big outbound messages") Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index cfb9e5f4e28f..13190b38f22e 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1044,7 +1044,8 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, { struct sock *sk = sock->sk; struct iucv_sock *iucv = iucv_sk(sk); - size_t headroom, linear; + size_t headroom = 0; + size_t linear; struct sk_buff *skb; struct iucv_message txmsg = {0}; struct cmsghdr *cmsg; @@ -1122,18 +1123,20 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, * this is fine for SOCK_SEQPACKET (unless we want to support * segmented records using the MSG_EOR flag), but * for SOCK_STREAM we might want to improve it in future */ - headroom = (iucv->transport == AF_IUCV_TRANS_HIPER) - ? sizeof(struct af_iucv_trans_hdr) + ETH_HLEN : 0; - if (headroom + len < PAGE_SIZE) { + if (iucv->transport == AF_IUCV_TRANS_HIPER) { + headroom = sizeof(struct af_iucv_trans_hdr) + ETH_HLEN; linear = len; } else { - /* In nonlinear "classic" iucv skb, - * reserve space for iucv_array - */ - if (iucv->transport != AF_IUCV_TRANS_HIPER) - headroom += sizeof(struct iucv_array) * - (MAX_SKB_FRAGS + 1); - linear = PAGE_SIZE - headroom; + if (len < PAGE_SIZE) { + linear = len; + } else { + /* In nonlinear "classic" iucv skb, + * reserve space for iucv_array + */ + headroom = sizeof(struct iucv_array) * + (MAX_SKB_FRAGS + 1); + linear = PAGE_SIZE - headroom; + } } skb = sock_alloc_send_pskb(sk, headroom + linear, len - linear, noblock, &err, 0); From 9f9b74ef896792399dc7b5121896b9c963db80fb Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 10 Jan 2017 09:41:49 -0800 Subject: [PATCH 17/35] mlx4: Return EOPNOTSUPP instead of ENOTSUPP In commit b45f0674b997 ("mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs"), it changed EOPNOTSUPP to ENOTSUPP by mistake. This patch fixes it. Fixes: b45f0674b997 ("mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs") Signed-off-by: Martin KaFai Lau Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index edbe200ac2fa..4910d9af1933 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2277,7 +2277,7 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) if (priv->tx_ring_num[TX_XDP] && !mlx4_en_check_xdp_mtu(dev, new_mtu)) - return -ENOTSUPP; + return -EOPNOTSUPP; dev->mtu = new_mtu; From 1272ce87fa017ca4cf32920764d879656b7a005a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 10 Jan 2017 12:24:01 -0800 Subject: [PATCH 18/35] gro: Enter slow-path if there is no tailroom The GRO path has a fast-path where we avoid calling pskb_may_pull and pskb_expand by directly accessing frag0. However, this should only be done if we have enough tailroom in the skb as otherwise we'll have to expand it later anyway. This patch adds the check by capping frag0_len with the skb tailroom. Fixes: cb18978cbf45 ("gro: Open-code final pskb_may_pull") Reported-by: Slava Shwartsman Signed-off-by: Herbert Xu Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 8db5a0b4b520..88d2907ca2cd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4441,7 +4441,8 @@ static void skb_gro_reset_offset(struct sk_buff *skb) pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); - NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); + NAPI_GRO_CB(skb)->frag0_len = min(skb_frag_size(frag0), + skb->end - skb->tail); } } From 57ea52a865144aedbcd619ee0081155e658b6f7d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 10 Jan 2017 12:24:15 -0800 Subject: [PATCH 19/35] gro: Disable frag0 optimization on IPv6 ext headers The GRO fast path caches the frag0 address. This address becomes invalid if frag0 is modified by pskb_may_pull or its variants. So whenever that happens we must disable the frag0 optimization. This is usually done through the combination of gro_header_hard and gro_header_slow, however, the IPv6 extension header path did the pulling directly and would continue to use the GRO fast path incorrectly. This patch fixes it by disabling the fast path when we enter the IPv6 extension header path. Fixes: 78a478d0efd9 ("gro: Inline skb_gro_header and cache frag0 virtual address") Reported-by: Slava Shwartsman Signed-off-by: Herbert Xu Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 +++++++-- net/ipv6/ip6_offload.c | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 994f7423a74b..9bde9558b596 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2477,14 +2477,19 @@ static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) return NAPI_GRO_CB(skb)->frag0_len < hlen; } +static inline void skb_gro_frag0_invalidate(struct sk_buff *skb) +{ + NAPI_GRO_CB(skb)->frag0 = NULL; + NAPI_GRO_CB(skb)->frag0_len = 0; +} + static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, unsigned int offset) { if (!pskb_may_pull(skb, hlen)) return NULL; - NAPI_GRO_CB(skb)->frag0 = NULL; - NAPI_GRO_CB(skb)->frag0_len = 0; + skb_gro_frag0_invalidate(skb); return skb->data + offset; } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 89c59e656f44..fc7b4017ba24 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -191,6 +191,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, ops = rcu_dereference(inet6_offloads[proto]); if (!ops || !ops->callbacks.gro_receive) { __pskb_pull(skb, skb_gro_offset(skb)); + skb_gro_frag0_invalidate(skb); proto = ipv6_gso_pull_exthdrs(skb, proto); skb_gro_pull(skb, -skb_transport_offset(skb)); skb_reset_transport_header(skb); From cd3776638003b3362d9d7d1f27bcb80c276e2c28 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 10 Jan 2017 22:33:30 +0200 Subject: [PATCH 20/35] net/mlx5e: Properly handle offloading of source udp port for IP tunnels We can offload the matching on source udp port of ip tunnels for decapsulation. We can not offload setting source udp port for tunnels as part of encapsulation. Fix both the code that deals with matching offload (decap) and the code that deal with encap offload to align with that. Fixes: a54e20b4fcae ('net/mlx5e: Add basic TC tunnel set action for SRIOV offloads') Fixes: bbd00f7e2349 ('net/mlx5e: Add TC tunnel release action for SRIOV offloads') Signed-off-by: Or Gerlitz Reviewed-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f8829b517156..b60feceab63b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -239,10 +239,6 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) return -EOPNOTSUPP; - /* udp src port isn't supported */ - if (memchr_inv(&mask->src, 0, sizeof(mask->src))) - return -EOPNOTSUPP; - if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) && MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) parse_vxlan_attr(spec, f); @@ -254,6 +250,10 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(key->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, + udp_sport, ntohs(mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, + udp_sport, ntohs(key->src)); } else { /* udp dst port must be given */ return -EOPNOTSUPP; } @@ -796,6 +796,10 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst))) return -EOPNOTSUPP; + /* setting udp src port isn't supported */ + if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) + return -EOPNOTSUPP; + if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) && MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { info.tp_dst = key->tp_dst; From 2fcd82e9be133e4ec777f66fd67a8fb8e7748b1b Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 10 Jan 2017 22:33:31 +0200 Subject: [PATCH 21/35] net/mlx5e: Warn when rejecting offload attempts of IP tunnels We silently reject offloading of IPv6 tunnels, non vxlan tunnels, vxlan tunnels where the dst port to match is not provided, etc. Be a bit more verbose and print a warning so the user better realizes what went wrong here and can fix it. Fixes: a54e20b4fcae ('net/mlx5e: Add basic TC tunnel set action for SRIOV offloads') Fixes: bbd00f7e2349 ('net/mlx5e: Add TC tunnel release action for SRIOV offloads') Signed-off-by: Or Gerlitz Reviewed-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 30 +++++++++++++++---- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index b60feceab63b..d2fc055f054a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -237,13 +237,16 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, /* Full udp dst port must be given */ if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) - return -EOPNOTSUPP; + goto vxlan_match_offload_err; if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) && MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) parse_vxlan_attr(spec, f); - else + else { + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst)); return -EOPNOTSUPP; + } MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, ntohs(mask->dst)); @@ -255,7 +258,10 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, ntohs(key->src)); } else { /* udp dst port must be given */ - return -EOPNOTSUPP; +vxlan_match_offload_err: + netdev_warn(priv->netdev, + "IP tunnel decap offload supported only for vxlan, must set UDP dport\n"); + return -EOPNOTSUPP; } if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { @@ -346,6 +352,9 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, if (parse_tunnel_attr(priv, spec, f)) return -EOPNOTSUPP; break; + case FLOW_DISSECTOR_KEY_IPV6_ADDRS: + netdev_warn(priv->netdev, + "IPv6 tunnel decap offload isn't supported\n"); default: return -EOPNOTSUPP; } @@ -792,13 +801,17 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, int tunnel_type; int err; - /* udp dst port must be given */ + /* udp dst port must be set */ if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst))) - return -EOPNOTSUPP; + goto vxlan_encap_offload_err; /* setting udp src port isn't supported */ - if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) + if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) { +vxlan_encap_offload_err: + netdev_warn(priv->netdev, + "must set udp dst port and not set udp src port\n"); return -EOPNOTSUPP; + } if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) && MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { @@ -806,6 +819,8 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, info.tun_id = tunnel_id_to_key32(key->tun_id); tunnel_type = MLX5_HEADER_TYPE_VXLAN; } else { + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst)); return -EOPNOTSUPP; } @@ -813,6 +828,9 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, case AF_INET: info.daddr = key->u.ipv4.dst; break; + case AF_INET6: + netdev_warn(priv->netdev, + "IPv6 tunnel encap offload isn't supported\n"); default: return -EOPNOTSUPP; } From a42485eb0ee458da3a0df82b0e42ab58ce76be05 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 10 Jan 2017 22:33:32 +0200 Subject: [PATCH 22/35] net/mlx5e: TC ipv4 tunnel encap offload error flow fixes When the route lookup fails we should return the actual error. When the neigh isn't valid, we should return -EOPNOTSUPP as done in similar cases along the code. When the offload can't take place as of invalid neigh etc, we must release the neigh. Fixes: a54e20b4fcae ('net/mlx5e: Add basic TC tunnel set action for SRIOV offloads') Signed-off-by: Or Gerlitz Reviewed-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index d2fc055f054a..b62f06f3f7e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -656,17 +656,14 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv, #if IS_ENABLED(CONFIG_INET) rt = ip_route_output_key(dev_net(mirred_dev), fl4); - if (IS_ERR(rt)) { - pr_warn("%s: no route to %pI4\n", __func__, &fl4->daddr); - return -EOPNOTSUPP; - } + if (IS_ERR(rt)) + return PTR_ERR(rt); #else return -EOPNOTSUPP; #endif if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) { - pr_warn("%s: Can't offload the flow, netdevices aren't on the same HW e-switch\n", - __func__); + pr_warn("%s: can't offload, devices not on same HW e-switch\n", __func__); ip_rt_put(rt); return -EOPNOTSUPP; } @@ -727,8 +724,8 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, struct net_device **out_dev) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); + struct neighbour *n = NULL; struct flowi4 fl4 = {}; - struct neighbour *n; char *encap_header; int encap_size; __be32 saddr; @@ -759,7 +756,8 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, e->out_dev = *out_dev; if (!(n->nud_state & NUD_VALID)) { - err = -ENOTSUPP; + pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__, &fl4.daddr); + err = -EOPNOTSUPP; goto out; } @@ -781,6 +779,8 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, encap_size, encap_header, &e->encap_id); out: + if (err && n) + neigh_release(n); kfree(encap_header); return err; } From 2e72eb438ce5ea9fa118edfd9a5f628c2a69111a Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 10 Jan 2017 22:33:33 +0200 Subject: [PATCH 23/35] net/mlx5e: Properly get address type of encapsulation IP headers As done elsewhere in our TC/flower offload code, the address type of the encapsulation IP headers should be realized accroding to the addr_type field of the encapsulation control dissector key, do that. Fixes: bbd00f7e2349 ('net/mlx5e: Add TC tunnel release action for SRIOV offloads') Signed-off-by: Or Gerlitz Reviewed-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index b62f06f3f7e0..9cfddd9fc097 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -225,6 +225,11 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + struct flow_dissector_key_control *enc_control = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL, + f->key); + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { struct flow_dissector_key_ports *key = skb_flow_dissector_target(f->dissector, @@ -264,7 +269,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, return -EOPNOTSUPP; } - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { struct flow_dissector_key_ipv4_addrs *key = skb_flow_dissector_target(f->dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, @@ -286,10 +291,10 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, MLX5_SET(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4, ntohl(key->dst)); - } - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); + } /* Enforce DMAC when offloading incoming tunneled flows. * Flow counters require a match on the DMAC. From 0827444d052ba5347900376dbdbf5d9065d091d4 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 10 Jan 2017 22:33:34 +0200 Subject: [PATCH 24/35] net/mlx5e: Set inline mode requirements for matching on IP fragments For e-switch level matching on packets being an IP fragment, we need to make sure the source vport inline mode is L3, fix that. Fixes: 3f7d0eb42d59 ('net/mlx5e: Offload TC matching on packets being IP fragments') Signed-off-by: Or Gerlitz Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9cfddd9fc097..a35fa1eb0694 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -389,6 +389,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, key->flags & FLOW_DIS_IS_FRAGMENT); + + /* the HW doesn't need L3 inline to match on frag=no */ + if (key->flags & FLOW_DIS_IS_FRAGMENT) + *min_inline = MLX5_INLINE_MODE_IP; } } From a757d108dc1a053722215ee89116f8af9bba1525 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Tue, 10 Jan 2017 22:33:35 +0200 Subject: [PATCH 25/35] net/mlx5e: Fix kbuild warnings for uninitialized parameters kbuild warn about parameters that may be used uninitialized, fix it. Fixes: a54e20b4fcae ('net/mlx5e: Add basic TC tunnel set action for SRIOV offloads') Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index a35fa1eb0694..5dbc81de34ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -737,8 +737,8 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, struct flowi4 fl4 = {}; char *encap_header; int encap_size; - __be32 saddr; - int ttl; + __be32 saddr = 0; + int ttl = 0; int err; encap_header = kzalloc(max_encap_size, GFP_KERNEL); From 5e86397abe10aa4c884478a45e9a35b6a37d8d5d Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 10 Jan 2017 22:33:36 +0200 Subject: [PATCH 26/35] net/mlx5e: Properly handle FW errors while adding TC rules When the firmware returns an error (common example is an attempt to add twice the same rule which is refused by the some FWs), we are not properly derefing/cleaning few resources allocated on the way. Examples are vport vlan deref under eswitch vlan offloads, and encap entry/neighbour deref under eswitch encapsulation offloads, fix that. Fixes: a54e20b4fcae ('net/mlx5e: Add basic TC tunnel set action for SRIOV offloads') Fixes: 8b32580df1cb ('net/mlx5e: Add TC vlan action for SRIOV offloads') Signed-off-by: Or Gerlitz Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 5dbc81de34ee..118cea5e5489 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -161,15 +161,21 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, } } +/* we get here also when setting rule to the FW failed, etc. It means that the + * flow rule itself might not exist, but some offloading related to the actions + * should be cleaned. + */ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_fc *counter = NULL; - counter = mlx5_flow_rule_counter(flow->rule); - - mlx5_del_flow_rules(flow->rule); + if (!IS_ERR(flow->rule)) { + counter = mlx5_flow_rule_counter(flow->rule); + mlx5_del_flow_rules(flow->rule); + mlx5_fc_destroy(priv->mdev, counter); + } if (esw && esw->mode == SRIOV_OFFLOADS) { mlx5_eswitch_del_vlan_action(esw, flow->attr); @@ -177,8 +183,6 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, mlx5e_detach_encap(priv, flow); } - mlx5_fc_destroy(priv->mdev, counter); - if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) { mlx5_destroy_flow_table(priv->fs.tc.t); priv->fs.tc.t = NULL; @@ -1017,7 +1021,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); - goto err_free; + goto err_del_rule; } err = rhashtable_insert_fast(&tc->ht, &flow->node, @@ -1028,7 +1032,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, goto out; err_del_rule: - mlx5_del_flow_rules(flow->rule); + mlx5e_tc_del_flow(priv, flow); err_free: kfree(flow); From 3deef8cea3efcaeeae240bb00541de66abb9bfa0 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 10 Jan 2017 22:33:37 +0200 Subject: [PATCH 27/35] net/mlx5e: Un-register uplink representor on nic_disable The code before this patch registered uplink e-Switch representor on nic_enable and unregistered on nic_cleanup, the right place for this unregister is in nic_disable. Fixes: 127ea380acc9 ("net/mlx5: Add Representors registration API") Signed-off-by: Saeed Mahameed Reviewed-by: Mohamad Haj Yahia Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1236b27b1493..2b7dd315020c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3675,14 +3675,8 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev, static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; - mlx5e_vxlan_cleanup(priv); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) - mlx5_eswitch_unregister_vport_rep(esw, 0); - if (priv->xdp_prog) bpf_prog_put(priv->xdp_prog); } @@ -3807,9 +3801,14 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) static void mlx5e_nic_disable(struct mlx5e_priv *priv) { + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + queue_work(priv->wq, &priv->set_rx_mode_work); + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + mlx5_eswitch_unregister_vport_rep(esw, 0); mlx5e_disable_async_events(priv); - mlx5_lag_remove(priv->mdev); + mlx5_lag_remove(mdev); } static const struct mlx5e_profile mlx5e_nic_profile = { From 0bbcc0a8fc394d01988fe0263ccf7fddb77a12c3 Mon Sep 17 00:00:00 2001 From: Gil Rockah Date: Tue, 10 Jan 2017 22:33:38 +0200 Subject: [PATCH 28/35] net/mlx5e: Remove WARN_ONCE from adaptive moderation code When trying to do interface down or changing interface configuration under heavy traffic, some of the adaptive moderation corner cases can occur and leave a WARN_ONCE call trace in the kernel log. Those WARN_ONCE are meant for debug only, and should have been inserted only under debug. We avoid such call traces by removing those WARN_ONCE. Fixes: cb3c7fd4f839 ("net/mlx5e: Support adaptive RX coalescing") Signed-off-by: Gil Rockah Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c index 1fffe48a93cc..cbfac06b7ffd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c @@ -109,7 +109,6 @@ static bool mlx5e_am_on_top(struct mlx5e_rx_am *am) switch (am->tune_state) { case MLX5E_AM_PARKING_ON_TOP: case MLX5E_AM_PARKING_TIRED: - WARN_ONCE(true, "mlx5e_am_on_top: PARKING\n"); return true; case MLX5E_AM_GOING_RIGHT: return (am->steps_left > 1) && (am->steps_right == 1); @@ -123,7 +122,6 @@ static void mlx5e_am_turn(struct mlx5e_rx_am *am) switch (am->tune_state) { case MLX5E_AM_PARKING_ON_TOP: case MLX5E_AM_PARKING_TIRED: - WARN_ONCE(true, "mlx5e_am_turn: PARKING\n"); break; case MLX5E_AM_GOING_RIGHT: am->tune_state = MLX5E_AM_GOING_LEFT; @@ -144,7 +142,6 @@ static int mlx5e_am_step(struct mlx5e_rx_am *am) switch (am->tune_state) { case MLX5E_AM_PARKING_ON_TOP: case MLX5E_AM_PARKING_TIRED: - WARN_ONCE(true, "mlx5e_am_step: PARKING\n"); break; case MLX5E_AM_GOING_RIGHT: if (am->profile_ix == (MLX5E_PARAMS_AM_NUM_PROFILES - 1)) @@ -282,10 +279,8 @@ static void mlx5e_am_calc_stats(struct mlx5e_rx_am_sample *start, u32 delta_us = ktime_us_delta(end->time, start->time); unsigned int npkts = end->pkt_ctr - start->pkt_ctr; - if (!delta_us) { - WARN_ONCE(true, "mlx5e_am_calc_stats: delta_us=0\n"); + if (!delta_us) return; - } curr_stats->ppms = (npkts * USEC_PER_MSEC) / delta_us; curr_stats->epms = (MLX5E_AM_NEVENTS * USEC_PER_MSEC) / delta_us; From 5e44fca5047054f1762813751626b5245e0da022 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 10 Jan 2017 22:33:39 +0200 Subject: [PATCH 29/35] net/mlx5: Only cancel recovery work when cleaning up device Do not attempt to drain the health workqueue when unloading the device in the recovery flow, this can cause a deadlock when the recovery work tries to cancel itself with sync. Because the work is no longer unconditionally canceled when unloading, it must be explicitly canceled in the AER flow. fixes: 689a248df83b ("net/mlx5: Cancel recovery work in remove flow") Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 6547f22e6b9b..d01e9f21d469 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1195,7 +1195,8 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, { int err = 0; - mlx5_drain_health_wq(dev); + if (cleanup) + mlx5_drain_health_wq(dev); mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { @@ -1359,9 +1360,10 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv, false); - /* In case of kernel call save the pci state */ + /* In case of kernel call save the pci state and drain the health wq */ if (state) { pci_save_state(pdev); + mlx5_drain_health_wq(dev); mlx5_pci_disable_device(dev); } From 7cfd5fd5a9813f1430290d20c0fead9b4582a307 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Jan 2017 19:52:43 -0800 Subject: [PATCH 30/35] gro: use min_t() in skb_gro_reset_offset() On 32bit arches, (skb->end - skb->data) is not 'unsigned int', so we shall use min_t() instead of min() to avoid a compiler error. Fixes: 1272ce87fa01 ("gro: Enter slow-path if there is no tailroom") Reported-by: kernel test robot Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 88d2907ca2cd..07b307b0b414 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4441,8 +4441,9 @@ static void skb_gro_reset_offset(struct sk_buff *skb) pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); - NAPI_GRO_CB(skb)->frag0_len = min(skb_frag_size(frag0), - skb->end - skb->tail); + NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, + skb_frag_size(frag0), + skb->end - skb->tail); } } From 73b351473547e543e9c8166dd67fd99c64c15b0b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Jan 2017 13:08:06 +0100 Subject: [PATCH 31/35] cgroup: move CONFIG_SOCK_CGROUP_DATA to init/Kconfig We now 'select SOCK_CGROUP_DATA' but Kconfig complains that this is not right when CONFIG_NET is disabled and there is no socket interface: warning: (CGROUP_BPF) selects SOCK_CGROUP_DATA which has unmet direct dependencies (NET) I don't know what the correct solution for this is, but simply removing the dependency on NET from SOCK_CGROUP_DATA by moving it out of the 'if NET' section avoids the warning and does not produce other build errors. Fixes: 483c4933ea09 ("cgroup: Fix CGROUP_BPF config") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- init/Kconfig | 4 ++++ net/Kconfig | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 223b734abccd..e1a937348a3e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1176,6 +1176,10 @@ config CGROUP_DEBUG Say N. +config SOCK_CGROUP_DATA + bool + default n + endif # CGROUPS config CHECKPOINT_RESTORE diff --git a/net/Kconfig b/net/Kconfig index a1005007224c..a29bb4b41c50 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -258,10 +258,6 @@ config XPS config HWBM bool -config SOCK_CGROUP_DATA - bool - default n - config CGROUP_NET_PRIO bool "Network priority cgroup" depends on CGROUPS From 7a18c5b9fb31a999afc62b0e60978aa896fc89e9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 10 Jan 2017 14:37:35 -0800 Subject: [PATCH 32/35] net: ipv4: Fix multipath selection with vrf fib_select_path does not call fib_select_multipath if oif is set in the flow struct. For VRF use cases oif is always set, so multipath route selection is bypassed. Use the FLOWI_FLAG_SKIP_NH_OIF to skip the oif check similar to what is done in fib_table_lookup. Add saddr and proto to the flow struct for the fib lookup done by the VRF driver to better match hash computation for a flow. Fixes: 613d09b30f8b ("net: Use VRF device index for lookups on TX") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 2 ++ net/ipv4/fib_semantics.c | 9 +++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 23dfb0eac098..0a067708aa39 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -263,7 +263,9 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_tos = RT_TOS(ip4h->tos), .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF, + .flowi4_proto = ip4h->protocol, .daddr = ip4h->daddr, + .saddr = ip4h->saddr, }; struct net *net = dev_net(vrf_dev); struct rtable *rt; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 7a5b4c7d9a87..eba1546b5031 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1618,8 +1618,13 @@ void fib_select_multipath(struct fib_result *res, int hash) void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, int mp_hash) { + bool oif_check; + + oif_check = (fl4->flowi4_oif == 0 || + fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF); + #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res->fi->fib_nhs > 1 && fl4->flowi4_oif == 0) { + if (res->fi->fib_nhs > 1 && oif_check) { if (mp_hash < 0) mp_hash = get_hash_from_flowi4(fl4) >> 1; @@ -1629,7 +1634,7 @@ void fib_select_path(struct net *net, struct fib_result *res, #endif if (!res->prefixlen && res->table->tb_num_default > 1 && - res->type == RTN_UNICAST && !fl4->flowi4_oif) + res->type == RTN_UNICAST && oif_check) fib_select_default(fl4, res); if (!fl4->saddr) From eb004603c857f3e3bfcda437b6c68fd258c54960 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 10 Jan 2017 22:53:06 +0000 Subject: [PATCH 33/35] sctp: Fix spelling mistake: "Atempt" -> "Attempt" Trivial fix to spelling mistake in WARN_ONCE message Signed-off-by: Colin Ian King Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index e54082699520..34efaa4ef2f6 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1048,7 +1048,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) (new_transport->state == SCTP_PF))) new_transport = asoc->peer.active_path; if (new_transport->state == SCTP_UNCONFIRMED) { - WARN_ONCE(1, "Atempt to send packet on unconfirmed path."); + WARN_ONCE(1, "Attempt to send packet on unconfirmed path."); sctp_chunk_fail(chunk, 0); sctp_chunk_free(chunk); continue; From a13c06525ab9ff442924e67df9393a5efa914c56 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 10 Jan 2017 23:13:45 +0000 Subject: [PATCH 34/35] net: phy: marvell: fix Marvell 88E1512 used in SGMII mode When an Marvell 88E1512 PHY is connected to a nic in SGMII mode, the fiber page is used for the SGMII host-side connection. The PHY driver notices that SUPPORTED_FIBRE is set, so it tries reading the fiber page for the link status, and ends up reading the MAC-side status instead of the outgoing (copper) link. This leads to incorrect results reported via ethtool. If the PHY is connected via SGMII to the host, ignore the fiber page. However, continue to allow the existing power management code to suspend and resume the fiber page. Fixes: 6cfb3bcc0641 ("Marvell phy: check link status in case of fiber link.") Signed-off-by: Russell King Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index e269262471a4..0b78210c0fa7 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1192,7 +1192,8 @@ static int marvell_read_status(struct phy_device *phydev) int err; /* Check the fiber mode first */ - if (phydev->supported & SUPPORTED_FIBRE) { + if (phydev->supported & SUPPORTED_FIBRE && + phydev->interface != PHY_INTERFACE_MODE_SGMII) { err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_FIBER); if (err < 0) goto error; From 24c63bbc18e25d5d8439422aa5fd2d66390b88eb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 10 Jan 2017 15:22:25 -0800 Subject: [PATCH 35/35] net: vrf: do not allow table id 0 Frank reported that vrf devices can be created with a table id of 0. This breaks many of the run time table id checks and should not be allowed. Detect this condition at create time and fail with EINVAL. Fixes: 193125dbd8eb ("net: Introduce VRF device driver") Reported-by: Frank Kellermann Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 0a067708aa39..454f907d419a 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1252,6 +1252,8 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev, return -EINVAL; vrf->tb_id = nla_get_u32(data[IFLA_VRF_TABLE]); + if (vrf->tb_id == RT_TABLE_UNSPEC) + return -EINVAL; dev->priv_flags |= IFF_L3MDEV_MASTER;