From 197afc631413d96dc60acfc7970bdd4125d38cd3 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 6 Nov 2020 16:02:51 -0800
Subject: [PATCH 001/115] libbpf: Don't attempt to load unused subprog as an
 entry-point BPF program

If BPF code contains unused BPF subprogram and there are no other subprogram
calls (which can realistically happen in real-world applications given
sufficiently smart Clang code optimizations), libbpf will erroneously assume
that subprograms are entry-point programs and will attempt to load them with
UNSPEC program type.

Fix by not relying on subcall instructions and rather detect it based on the
structure of BPF object's sections.

Fixes: 9a94f277c4fb ("tools: libbpf: restore the ability to load programs from .text section")
Reported-by: Dmitrii Banshchikov <dbanschikov@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20201107000251.256821-1-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c                        | 23 +++++++++++--------
 .../selftests/bpf/prog_tests/subprogs.c       |  6 +++++
 .../bpf/progs/test_subprogs_unused.c          | 21 +++++++++++++++++
 3 files changed, 40 insertions(+), 10 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_subprogs_unused.c

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 313034117070..28baee7ba1ca 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -560,8 +560,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
 		      const char *name, size_t sec_idx, const char *sec_name,
 		      size_t sec_off, void *insn_data, size_t insn_data_sz)
 {
-	int i;
-
 	if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
 		pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
 			sec_name, name, sec_off, insn_data_sz);
@@ -600,13 +598,6 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
 		goto errout;
 	memcpy(prog->insns, insn_data, insn_data_sz);
 
-	for (i = 0; i < prog->insns_cnt; i++) {
-		if (insn_is_subprog_call(&prog->insns[i])) {
-			obj->has_subcalls = true;
-			break;
-		}
-	}
-
 	return 0;
 errout:
 	pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
@@ -3280,7 +3271,19 @@ bpf_object__find_program_by_title(const struct bpf_object *obj,
 static bool prog_is_subprog(const struct bpf_object *obj,
 			    const struct bpf_program *prog)
 {
-	return prog->sec_idx == obj->efile.text_shndx && obj->has_subcalls;
+	/* For legacy reasons, libbpf supports an entry-point BPF programs
+	 * without SEC() attribute, i.e., those in the .text section. But if
+	 * there are 2 or more such programs in the .text section, they all
+	 * must be subprograms called from entry-point BPF programs in
+	 * designated SEC()'tions, otherwise there is no way to distinguish
+	 * which of those programs should be loaded vs which are a subprogram.
+	 * Similarly, if there is a function/program in .text and at least one
+	 * other BPF program with custom SEC() attribute, then we just assume
+	 * .text programs are subprograms (even if they are not called from
+	 * other programs), because libbpf never explicitly supported mixing
+	 * SEC()-designated BPF programs and .text entry-point BPF programs.
+	 */
+	return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
 }
 
 struct bpf_program *
diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs.c b/tools/testing/selftests/bpf/prog_tests/subprogs.c
index a00abf58c037..3f3d2ac4dd57 100644
--- a/tools/testing/selftests/bpf/prog_tests/subprogs.c
+++ b/tools/testing/selftests/bpf/prog_tests/subprogs.c
@@ -3,12 +3,14 @@
 #include <test_progs.h>
 #include <time.h>
 #include "test_subprogs.skel.h"
+#include "test_subprogs_unused.skel.h"
 
 static int duration;
 
 void test_subprogs(void)
 {
 	struct test_subprogs *skel;
+	struct test_subprogs_unused *skel2;
 	int err;
 
 	skel = test_subprogs__open_and_load();
@@ -26,6 +28,10 @@ void test_subprogs(void)
 	CHECK(skel->bss->res3 != 19, "res3", "got %d, exp %d\n", skel->bss->res3, 19);
 	CHECK(skel->bss->res4 != 36, "res4", "got %d, exp %d\n", skel->bss->res4, 36);
 
+	skel2 = test_subprogs_unused__open_and_load();
+	ASSERT_OK_PTR(skel2, "unused_progs_skel");
+	test_subprogs_unused__destroy(skel2);
+
 cleanup:
 	test_subprogs__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_unused.c b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c
new file mode 100644
index 000000000000..75d975f8cf90
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c
@@ -0,0 +1,21 @@
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+const char LICENSE[] SEC("license") = "GPL";
+
+__attribute__((maybe_unused)) __noinline int unused1(int x)
+{
+	return x + 1;
+}
+
+static __attribute__((maybe_unused)) __noinline int unused2(int x)
+{
+	return x + 2;
+}
+
+SEC("raw_tp/sys_enter")
+int main_prog(void *ctx)
+{
+	return 0;
+}

From abbaa433de07076fb8ef524b77ce55d94bad5fc5 Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Sat, 7 Nov 2020 15:45:44 +0800
Subject: [PATCH 002/115] bpf: Fix passing zero to PTR_ERR() in
 bpf_btf_printf_prepare

There is a bug when passing zero to PTR_ERR() and return.
Fix the smatch error.

Fixes: c4d0bfb45068 ("bpf: Add bpf_snprintf_btf helper")
Signed-off-by: Wang Qing <wangqing@vivo.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/1604735144-686-1-git-send-email-wangqing@vivo.com
---
 kernel/trace/bpf_trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 4517c8b66518..5113fd423cdf 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1198,7 +1198,7 @@ static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
 	*btf = bpf_get_btf_vmlinux();
 
 	if (IS_ERR_OR_NULL(*btf))
-		return PTR_ERR(*btf);
+		return IS_ERR(*btf) ? PTR_ERR(*btf) : -EINVAL;
 
 	if (ptr->type_id > 0)
 		*btf_id = ptr->type_id;

From f16e631333a8f12ae8128826e695db4b2a528407 Mon Sep 17 00:00:00 2001
From: Kaixu Xia <kaixuxia@tencent.com>
Date: Wed, 11 Nov 2020 13:03:46 +0800
Subject: [PATCH 003/115] bpf: Fix unsigned 'datasec_id' compared with zero in
 check_pseudo_btf_id

The unsigned variable datasec_id is assigned a return value from the call
to check_pseudo_btf_id(), which may return negative error code.

This fixes the following coccicheck warning:

  ./kernel/bpf/verifier.c:9616:5-15: WARNING: Unsigned expression compared with zero: datasec_id > 0

Fixes: eaa6bcb71ef6 ("bpf: Introduce bpf_per_cpu_ptr()")
Reported-by: Tosk Robot <tencent_os_robot@tencent.com>
Signed-off-by: Kaixu Xia <kaixuxia@tencent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Cc: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/bpf/1605071026-25906-1-git-send-email-kaixuxia@tencent.com
---
 kernel/bpf/verifier.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6200519582a6..6204ec705d80 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9572,12 +9572,13 @@ static int check_pseudo_btf_id(struct bpf_verifier_env *env,
 			       struct bpf_insn *insn,
 			       struct bpf_insn_aux_data *aux)
 {
-	u32 datasec_id, type, id = insn->imm;
 	const struct btf_var_secinfo *vsi;
 	const struct btf_type *datasec;
 	const struct btf_type *t;
 	const char *sym_name;
 	bool percpu = false;
+	u32 type, id = insn->imm;
+	s32 datasec_id;
 	u64 addr;
 	int i;
 

From 94e2bd0b259ed39a755fdded47e6734acf1ce464 Mon Sep 17 00:00:00 2001
From: Claire Chang <tientzu@chromium.org>
Date: Tue, 10 Nov 2020 16:49:08 +0800
Subject: [PATCH 004/115] rfkill: Fix use-after-free in rfkill_resume()

If a device is getting removed or reprobed during resume, use-after-free
might happen. For example, h5_btrtl_resume() schedules a work queue for
device reprobing, which of course requires removal first.

If the removal happens in parallel with the device_resume() and wins the
race to acquire device_lock(), removal may remove the device from the PM
lists and all, but device_resume() is already running and will continue
when the lock can be acquired, thus calling rfkill_resume().

During this, if rfkill_set_block() is then called after the corresponding
*_unregister() and kfree() are called, there will be an use-after-free
in hci_rfkill_set_block():

BUG: KASAN: use-after-free in hci_rfkill_set_block+0x58/0xc0 [bluetooth]
...
Call trace:
  dump_backtrace+0x0/0x154
  show_stack+0x20/0x2c
  dump_stack+0xbc/0x12c
  print_address_description+0x88/0x4b0
  __kasan_report+0x144/0x168
  kasan_report+0x10/0x18
  check_memory_region+0x19c/0x1ac
  __kasan_check_write+0x18/0x24
  hci_rfkill_set_block+0x58/0xc0 [bluetooth]
  rfkill_set_block+0x9c/0x120
  rfkill_resume+0x34/0x70
  dpm_run_callback+0xf0/0x1f4
  device_resume+0x210/0x22c

Fix this by checking rfkill->registered in rfkill_resume(). device_del()
in rfkill_unregister() requires device_lock() and the whole rfkill_resume()
is also protected by the same lock via device_resume(), we can make sure
either the rfkill->registered is false before rfkill_resume() starts or the
rfkill device won't be unregistered before rfkill_resume() returns.

As async_resume() holds a reference to the device, at this level there can
be no use-after-free; only in the user that doesn't expect this scenario.

Fixes: 8589086f4efd ("Bluetooth: hci_h5: Turn off RTL8723BS on suspend, reprobe on resume")
Signed-off-by: Claire Chang <tientzu@chromium.org>
Link: https://lore.kernel.org/r/20201110084908.219088-1-tientzu@chromium.org
[edit commit message for clarity and add more info provided later]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/rfkill/core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 971c73c7d34c..97101c55763d 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -876,6 +876,9 @@ static int rfkill_resume(struct device *dev)
 
 	rfkill->suspended = false;
 
+	if (!rfkill->registered)
+		return 0;
+
 	if (!rfkill->persistent) {
 		cur = !!(rfkill->state & RFKILL_BLOCK_SW);
 		rfkill_set_block(rfkill, cur);

From 1d18288555b3265f84d08f1f75582415e4ec343a Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 11 Nov 2020 19:33:57 +0100
Subject: [PATCH 005/115] mac80211: fix memory leak on filtered powersave
 frames

After the status rework, ieee80211_tx_status_ext is leaking un-acknowledged
packets for stations in powersave mode.
To fix this, move the code handling those packets from __ieee80211_tx_status
into ieee80211_tx_status_ext

Reported-by: Tobias Waldvogel <tobias.waldvogel@gmail.com>
Fixes: 3318111cf63d ("mac80211: reduce duplication in tx status functions")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20201111183359.43528-1-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/status.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 6feb45135020..3485610755ef 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -49,7 +49,8 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 	int ac;
 
 	if (info->flags & (IEEE80211_TX_CTL_NO_PS_BUFFER |
-			   IEEE80211_TX_CTL_AMPDU)) {
+			   IEEE80211_TX_CTL_AMPDU |
+			   IEEE80211_TX_CTL_HW_80211_ENCAP)) {
 		ieee80211_free_txskb(&local->hw, skb);
 		return;
 	}
@@ -915,15 +916,6 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
 			ieee80211_mpsp_trigger_process(
 				ieee80211_get_qos_ctl(hdr), sta, true, acked);
 
-		if (!acked && test_sta_flag(sta, WLAN_STA_PS_STA)) {
-			/*
-			 * The STA is in power save mode, so assume
-			 * that this TX packet failed because of that.
-			 */
-			ieee80211_handle_filtered_frame(local, sta, skb);
-			return;
-		}
-
 		if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) &&
 		    (ieee80211_is_data(hdr->frame_control)) &&
 		    (rates_idx != -1))
@@ -1150,6 +1142,12 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
 							    -info->status.ack_signal);
 				}
 			} else if (test_sta_flag(sta, WLAN_STA_PS_STA)) {
+				/*
+				 * The STA is in power save mode, so assume
+				 * that this TX packet failed because of that.
+				 */
+				if (skb)
+					ieee80211_handle_filtered_frame(local, sta, skb);
 				return;
 			} else if (noack_success) {
 				/* nothing to do here, do not account as lost */

From 4fe40b8e1566dad04c87fbf299049a1d0d4bd58d Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 11 Nov 2020 19:33:58 +0100
Subject: [PATCH 006/115] mac80211: minstrel: remove deferred sampling code

Deferring sampling attempts to the second stage has some bad interactions
with drivers that process the rate table in hardware and use the probe flag
to indicate probing packets (e.g. most mt76 drivers). On affected drivers
it can lead to probing not working at all.

If the link conditions turn worse, it might not be such a good idea to
do a lot of sampling for lower rates in this case.

Fix this by simply skipping the sample attempt instead of deferring it,
but keep the checks that would allow it to be sampled if it was skipped
too often, but only if it has less than 95% success probability.

Also ensure that IEEE80211_TX_CTL_RATE_CTRL_PROBE is set for all probing
packets.

Cc: stable@vger.kernel.org
Fixes: cccf129f820e ("mac80211: add the 'minstrel' rate control algorithm")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20201111183359.43528-2-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rc80211_minstrel.c | 25 ++++---------------------
 net/mac80211/rc80211_minstrel.h |  1 -
 2 files changed, 4 insertions(+), 22 deletions(-)

diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 86bc469a28bc..a8a940e97a9a 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -287,12 +287,6 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband,
 			mi->r[ndx].stats.success += success;
 	}
 
-	if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && (i >= 0))
-		mi->sample_packets++;
-
-	if (mi->sample_deferred > 0)
-		mi->sample_deferred--;
-
 	if (time_after(jiffies, mi->last_stats_update +
 				mp->update_interval / (mp->new_avg ? 2 : 1)))
 		minstrel_update_stats(mp, mi);
@@ -367,7 +361,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
 		return;
 
 	delta = (mi->total_packets * sampling_ratio / 100) -
-			(mi->sample_packets + mi->sample_deferred / 2);
+			mi->sample_packets;
 
 	/* delta < 0: no sampling required */
 	prev_sample = mi->prev_sample;
@@ -376,7 +370,6 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
 		return;
 
 	if (mi->total_packets >= 10000) {
-		mi->sample_deferred = 0;
 		mi->sample_packets = 0;
 		mi->total_packets = 0;
 	} else if (delta > mi->n_rates * 2) {
@@ -401,19 +394,8 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
 	 * rate sampling method should be used.
 	 * Respect such rates that are not sampled for 20 interations.
 	 */
-	if (mrr_capable &&
-	    msr->perfect_tx_time > mr->perfect_tx_time &&
-	    msr->stats.sample_skipped < 20) {
-		/* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark
-		 * packets that have the sampling rate deferred to the
-		 * second MRR stage. Increase the sample counter only
-		 * if the deferred sample rate was actually used.
-		 * Use the sample_deferred counter to make sure that
-		 * the sampling is not done in large bursts */
-		info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
-		rate++;
-		mi->sample_deferred++;
-	} else {
+	if (msr->perfect_tx_time < mr->perfect_tx_time ||
+	    msr->stats.sample_skipped >= 20) {
 		if (!msr->sample_limit)
 			return;
 
@@ -433,6 +415,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
 
 	rate->idx = mi->r[ndx].rix;
 	rate->count = minstrel_get_retry_count(&mi->r[ndx], info);
+	info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
 }
 
 
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index dbb43bcd3c45..86cd80b3ffde 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -126,7 +126,6 @@ struct minstrel_sta_info {
 	u8 max_prob_rate;
 	unsigned int total_packets;
 	unsigned int sample_packets;
-	int sample_deferred;
 
 	unsigned int sample_row;
 	unsigned int sample_column;

From b2911a84396f72149dce310a3b64d8948212c1b3 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 11 Nov 2020 19:33:59 +0100
Subject: [PATCH 007/115] mac80211: minstrel: fix tx status processing corner
 case

Some drivers fill the status rate list without setting the rate index after
the final rate to -1. minstrel_ht already deals with this, but minstrel
doesn't, which causes it to get stuck at the lowest rate on these drivers.

Fix this by checking the count as well.

Cc: stable@vger.kernel.org
Fixes: cccf129f820e ("mac80211: add the 'minstrel' rate control algorithm")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20201111183359.43528-3-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rc80211_minstrel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index a8a940e97a9a..b13b1da19386 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -274,7 +274,7 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband,
 	success = !!(info->flags & IEEE80211_TX_STAT_ACK);
 
 	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
-		if (ar[i].idx < 0)
+		if (ar[i].idx < 0 || !ar[i].count)
 			break;
 
 		ndx = rix_to_ndx(mi, ar[i].idx);

From fd63729cc0a6872bdabd393ee933a969642e4076 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 11 Nov 2020 15:12:15 -0800
Subject: [PATCH 008/115] selftests/bpf: Fix unused attribute usage in
 subprogs_unused test

Correct attribute name is "unused". maybe_unused is a C++17 addition.
This patch fixes compilation warning during selftests compilation.

Fixes: 197afc631413 ("libbpf: Don't attempt to load unused subprog as an entry-point BPF program")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201111231215.1779147-1-andrii@kernel.org
---
 tools/testing/selftests/bpf/progs/test_subprogs_unused.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_unused.c b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c
index 75d975f8cf90..bc49e050d342 100644
--- a/tools/testing/selftests/bpf/progs/test_subprogs_unused.c
+++ b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c
@@ -4,12 +4,12 @@
 
 const char LICENSE[] SEC("license") = "GPL";
 
-__attribute__((maybe_unused)) __noinline int unused1(int x)
+__attribute__((unused)) __noinline int unused1(int x)
 {
 	return x + 1;
 }
 
-static __attribute__((maybe_unused)) __noinline int unused2(int x)
+static __attribute__((unused)) __noinline int unused2(int x)
 {
 	return x + 2;
 }

From 9602182810cc15e241f06c63c90b828ef63d0507 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 12 Nov 2020 10:03:40 -0800
Subject: [PATCH 009/115] MAINTAINERS/bpf: Update Andrii's entry.

Andrii has been a de-facto maintainer for libbpf and other components.
Update maintainers entry to acknowledge his work de-jure.

The folks with git write permissions will continue to follow the rule
of not applying their own patches unless absolutely trivial.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201112180340.45265-1-alexei.starovoitov@gmail.com
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index cd123d0a6a2d..008ee2bf753b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3243,10 +3243,10 @@ F:	drivers/iio/accel/bma400*
 BPF (Safe dynamic programs and tools)
 M:	Alexei Starovoitov <ast@kernel.org>
 M:	Daniel Borkmann <daniel@iogearbox.net>
+M:	Andrii Nakryiko <andrii@kernel.org>
 R:	Martin KaFai Lau <kafai@fb.com>
 R:	Song Liu <songliubraving@fb.com>
 R:	Yonghong Song <yhs@fb.com>
-R:	Andrii Nakryiko <andrii@kernel.org>
 R:	John Fastabend <john.fastabend@gmail.com>
 R:	KP Singh <kpsingh@chromium.org>
 L:	netdev@vger.kernel.org

From 1922a46b8c18cb09d33e06a6cc2e43844ac1b9d0 Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Thu, 12 Nov 2020 16:42:10 +1030
Subject: [PATCH 010/115] net/ncsi: Fix netlink registration

If a user unbinds and re-binds a NC-SI aware driver the kernel will
attempt to register the netlink interface at runtime. The structure is
marked __ro_after_init so registration fails spectacularly at this point.

 # echo 1e660000.ethernet > /sys/bus/platform/drivers/ftgmac100/unbind
 # echo 1e660000.ethernet > /sys/bus/platform/drivers/ftgmac100/bind
  ftgmac100 1e660000.ethernet: Read MAC address 52:54:00:12:34:56 from chip
  ftgmac100 1e660000.ethernet: Using NCSI interface
  8<--- cut here ---
  Unable to handle kernel paging request at virtual address 80a8f858
  pgd = 8c768dd6
  [80a8f858] *pgd=80a0841e(bad)
  Internal error: Oops: 80d [#1] SMP ARM
  CPU: 0 PID: 116 Comm: sh Not tainted 5.10.0-rc3-next-20201111-00003-gdd25b227ec1e #51
  Hardware name: Generic DT based system
  PC is at genl_register_family+0x1f8/0x6d4
  LR is at 0xff26ffff
  pc : [<8073f930>]    lr : [<ff26ffff>]    psr: 20000153
  sp : 8553bc80  ip : 81406244  fp : 8553bd04
  r10: 8085d12c  r9 : 80a8f73c  r8 : 85739000
  r7 : 00000017  r6 : 80a8f860  r5 : 80c8ab98  r4 : 80a8f858
  r3 : 00000000  r2 : 00000000  r1 : 81406130  r0 : 00000017
  Flags: nzCv  IRQs on  FIQs off  Mode SVC_32  ISA ARM  Segment none
  Control: 00c5387d  Table: 85524008  DAC: 00000051
  Process sh (pid: 116, stack limit = 0x1f1988d6)
 ...
  Backtrace:
  [<8073f738>] (genl_register_family) from [<80860ac0>] (ncsi_init_netlink+0x20/0x48)
   r10:8085d12c r9:80c8fb0c r8:85739000 r7:00000000 r6:81218000 r5:85739000
   r4:8121c000
  [<80860aa0>] (ncsi_init_netlink) from [<8085d740>] (ncsi_register_dev+0x1b0/0x210)
   r5:8121c400 r4:8121c000
  [<8085d590>] (ncsi_register_dev) from [<805a8060>] (ftgmac100_probe+0x6e0/0x778)
   r10:00000004 r9:80950228 r8:8115bc10 r7:8115ab00 r6:9eae2c24 r5:813b6f88
   r4:85739000
  [<805a7980>] (ftgmac100_probe) from [<805355ec>] (platform_drv_probe+0x58/0xa8)
   r9:80c76bb0 r8:00000000 r7:80cd4974 r6:80c76bb0 r5:8115bc10 r4:00000000
  [<80535594>] (platform_drv_probe) from [<80532d58>] (really_probe+0x204/0x514)
   r7:80cd4974 r6:00000000 r5:80cd4868 r4:8115bc10

Jakub pointed out that ncsi_register_dev is obviously broken, because
there is only one family so it would never work if there was more than
one ncsi netdev.

Fix the crash by registering the netlink family once on boot, and drop
the code to unregister it.

Fixes: 955dc68cb9b2 ("net/ncsi: Add generic netlink family")
Signed-off-by: Joel Stanley <joel@jms.id.au>
Reviewed-by: Samuel Mendoza-Jonas <sam@mendozajonas.com>
Link: https://lore.kernel.org/r/20201112061210.914621-1-joel@jms.id.au
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ncsi/ncsi-manage.c  |  5 -----
 net/ncsi/ncsi-netlink.c | 22 +++-------------------
 net/ncsi/ncsi-netlink.h |  3 ---
 3 files changed, 3 insertions(+), 27 deletions(-)

diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index f1be3e3f6425..a9cb355324d1 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -1726,9 +1726,6 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 	ndp->ptype.dev = dev;
 	dev_add_pack(&ndp->ptype);
 
-	/* Set up generic netlink interface */
-	ncsi_init_netlink(dev);
-
 	pdev = to_platform_device(dev->dev.parent);
 	if (pdev) {
 		np = pdev->dev.of_node;
@@ -1892,8 +1889,6 @@ void ncsi_unregister_dev(struct ncsi_dev *nd)
 	list_del_rcu(&ndp->node);
 	spin_unlock_irqrestore(&ncsi_dev_lock, flags);
 
-	ncsi_unregister_netlink(nd->dev);
-
 	kfree(ndp);
 }
 EXPORT_SYMBOL_GPL(ncsi_unregister_dev);
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index adddc7707aa4..bb5f1650f11c 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -766,24 +766,8 @@ static struct genl_family ncsi_genl_family __ro_after_init = {
 	.n_small_ops = ARRAY_SIZE(ncsi_ops),
 };
 
-int ncsi_init_netlink(struct net_device *dev)
+static int __init ncsi_init_netlink(void)
 {
-	int rc;
-
-	rc = genl_register_family(&ncsi_genl_family);
-	if (rc)
-		netdev_err(dev, "ncsi: failed to register netlink family\n");
-
-	return rc;
-}
-
-int ncsi_unregister_netlink(struct net_device *dev)
-{
-	int rc;
-
-	rc = genl_unregister_family(&ncsi_genl_family);
-	if (rc)
-		netdev_err(dev, "ncsi: failed to unregister netlink family\n");
-
-	return rc;
+	return genl_register_family(&ncsi_genl_family);
 }
+subsys_initcall(ncsi_init_netlink);
diff --git a/net/ncsi/ncsi-netlink.h b/net/ncsi/ncsi-netlink.h
index 7502723fba83..39a1a9d7bf77 100644
--- a/net/ncsi/ncsi-netlink.h
+++ b/net/ncsi/ncsi-netlink.h
@@ -22,7 +22,4 @@ int ncsi_send_netlink_err(struct net_device *dev,
 			  struct nlmsghdr *nlhdr,
 			  int err);
 
-int ncsi_init_netlink(struct net_device *dev);
-int ncsi_unregister_netlink(struct net_device *dev);
-
 #endif /* __NCSI_NETLINK_H__ */

From e8aa6d520b448efc88670a98eccd196713639f2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vincent=20Stehl=C3=A9?= <vincent.stehle@laposte.net>
Date: Thu, 12 Nov 2020 09:48:33 +0100
Subject: [PATCH 011/115] net: ethernet: mtk-star-emac: return ok when xmit
 drops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ndo_start_xmit() method must return NETDEV_TX_OK if the DMA mapping
fails, after freeing the socket buffer.
Fix the mtk_star_netdev_start_xmit() function accordingly.

Fixes: 8c7bd5a454ff ("net: ethernet: mtk-star-emac: new driver")
Signed-off-by: Vincent Stehlé <vincent.stehle@laposte.net>
Acked-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Link: https://lore.kernel.org/r/20201112084833.21842-1-vincent.stehle@laposte.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mediatek/mtk_star_emac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
index 13250553263b..e56a26f797f2 100644
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -1053,7 +1053,7 @@ static int mtk_star_netdev_start_xmit(struct sk_buff *skb,
 err_drop_packet:
 	dev_kfree_skb(skb);
 	ndev->stats.tx_dropped++;
-	return NETDEV_TX_BUSY;
+	return NETDEV_TX_OK;
 }
 
 /* Returns the number of bytes sent or a negative number on the first

From baee1991fad928d6c8dd5be3197ecb413c420c97 Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong@huawei.com>
Date: Thu, 12 Nov 2020 19:34:39 +0800
Subject: [PATCH 012/115] net: ethernet: mtk-star-emac: fix error return code
 in mtk_star_enable()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 8c7bd5a454ff ("net: ethernet: mtk-star-emac: new driver")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
Acked-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Link: https://lore.kernel.org/r/1605180879-2573-1-git-send-email-zhangchangzhong@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mediatek/mtk_star_emac.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
index e56a26f797f2..a8641a407c06 100644
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -966,6 +966,7 @@ static int mtk_star_enable(struct net_device *ndev)
 				      mtk_star_adjust_link, 0, priv->phy_intf);
 	if (!priv->phydev) {
 		netdev_err(ndev, "failed to connect to PHY\n");
+		ret = -ENODEV;
 		goto err_free_irq;
 	}
 

From 4ee18c179e5e815fa5575e0d2db0c05795a804ee Mon Sep 17 00:00:00 2001
From: Xie He <xie.he.0141@gmail.com>
Date: Thu, 12 Nov 2020 02:35:06 -0800
Subject: [PATCH 013/115] net: x25: Increase refcnt of "struct x25_neigh" in
 x25_rx_call_request

The x25_disconnect function in x25_subr.c would decrease the refcount of
"x25->neighbour" (struct x25_neigh) and reset this pointer to NULL.

However, the x25_rx_call_request function in af_x25.c, which is called
when we receive a connection request, does not increase the refcount when
it assigns the pointer.

Fix this issue by increasing the refcount of "struct x25_neigh" in
x25_rx_call_request.

This patch fixes frequent kernel crashes when using AF_X25 sockets.

Fixes: 4becb7ee5b3d ("net/x25: Fix x25_neigh refcnt leak when x25 disconnect")
Cc: Martin Schiller <ms@dev.tdt.de>
Signed-off-by: Xie He <xie.he.0141@gmail.com>
Link: https://lore.kernel.org/r/20201112103506.5875-1-xie.he.0141@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/x25/af_x25.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 046d3fee66a9..a10487e7574c 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1050,6 +1050,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
 	makex25->lci           = lci;
 	makex25->dest_addr     = dest_addr;
 	makex25->source_addr   = source_addr;
+	x25_neigh_hold(nb);
 	makex25->neighbour     = nb;
 	makex25->facilities    = facilities;
 	makex25->dte_facilities= dte_facilities;

From 7bc40aedf24d31d8bea80e1161e996ef4299fb10 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 12 Nov 2020 11:22:04 +0100
Subject: [PATCH 014/115] mac80211: free sta in sta_info_insert_finish() on
 errors

If sta_info_insert_finish() fails, we currently keep the station
around and free it only in the caller, but there's only one such
caller and it always frees it immediately.

As syzbot found, another consequence of this split is that we can
put things that sleep only into __cleanup_single_sta() and not in
sta_info_free(), but this is the only place that requires such of
sta_info_free() now.

Change this to free the station in sta_info_insert_finish(), in
which case we can still sleep. This will also let us unify the
cleanup code later.

Cc: stable@vger.kernel.org
Fixes: dcd479e10a05 ("mac80211: always wind down STA state")
Reported-by: syzbot+32c6c38c4812d22f2f0b@syzkaller.appspotmail.com
Reported-by: syzbot+4c81fe92e372d26c4246@syzkaller.appspotmail.com
Reported-by: syzbot+6a7fe9faf0d1d61bc24a@syzkaller.appspotmail.com
Reported-by: syzbot+abed06851c5ffe010921@syzkaller.appspotmail.com
Reported-by: syzbot+b7aeb9318541a1c709f1@syzkaller.appspotmail.com
Reported-by: syzbot+d5a9416c6cafe53b5dd0@syzkaller.appspotmail.com
Link: https://lore.kernel.org/r/20201112112201.ee6b397b9453.I9c31d667a0ea2151441cc64ed6613d36c18a48e0@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/sta_info.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 4fe284ff1ea3..ec6973ee88ef 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -705,7 +705,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
  out_drop_sta:
 	local->num_sta--;
 	synchronize_net();
-	__cleanup_single_sta(sta);
+	cleanup_single_sta(sta);
  out_err:
 	mutex_unlock(&local->sta_mtx);
 	kfree(sinfo);
@@ -724,19 +724,13 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
 
 	err = sta_info_insert_check(sta);
 	if (err) {
+		sta_info_free(local, sta);
 		mutex_unlock(&local->sta_mtx);
 		rcu_read_lock();
-		goto out_free;
+		return err;
 	}
 
-	err = sta_info_insert_finish(sta);
-	if (err)
-		goto out_free;
-
-	return 0;
- out_free:
-	sta_info_free(local, sta);
-	return err;
+	return sta_info_insert_finish(sta);
 }
 
 int sta_info_insert(struct sta_info *sta)

From 50431b45685b600fc2851a3f2b53e24643efe6d3 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Fri, 13 Nov 2020 19:51:52 +0800
Subject: [PATCH 015/115] tools, bpftool: Add missing close before bpftool net
 attach exit

progfd is created by prog_parse_fd() in do_attach() and before the latter
returns in case of success, the file descriptor should be closed.

Fixes: 04949ccc273e ("tools: bpftool: add net attach command to attach XDP on interface")
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201113115152.53178-1-wanghai38@huawei.com
---
 tools/bpf/bpftool/net.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index 910e7bac6e9e..3fae61ef6339 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -578,8 +578,8 @@ static int do_attach(int argc, char **argv)
 
 	ifindex = net_parse_dev(&argc, &argv);
 	if (ifindex < 1) {
-		close(progfd);
-		return -EINVAL;
+		err = -EINVAL;
+		goto cleanup;
 	}
 
 	if (argc) {
@@ -587,8 +587,8 @@ static int do_attach(int argc, char **argv)
 			overwrite = true;
 		} else {
 			p_err("expected 'overwrite', got: '%s'?", *argv);
-			close(progfd);
-			return -EINVAL;
+			err = -EINVAL;
+			goto cleanup;
 		}
 	}
 
@@ -596,17 +596,17 @@ static int do_attach(int argc, char **argv)
 	if (is_prefix("xdp", attach_type_strings[attach_type]))
 		err = do_attach_detach_xdp(progfd, attach_type, ifindex,
 					   overwrite);
-
-	if (err < 0) {
+	if (err) {
 		p_err("interface %s attach failed: %s",
 		      attach_type_strings[attach_type], strerror(-err));
-		return err;
+		goto cleanup;
 	}
 
 	if (json_output)
 		jsonw_null(json_wtr);
-
-	return 0;
+cleanup:
+	close(progfd);
+	return err;
 }
 
 static int do_detach(int argc, char **argv)

From 2b5668733050fca85f0ab458c5b91732f9496a38 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 12 Nov 2020 13:15:46 +0200
Subject: [PATCH 016/115] net: ethernet: ti: cpsw: fix cpts irq after suspend

Depending on the SoC/platform the CPSW can completely lose context after a
suspend/resume cycle, including CPSW wrapper (WR) which will cause reset of
WR_C0_MISC_EN register, so CPTS IRQ will became disabled.

Fix it by moving CPTS IRQ enabling in cpsw_ndo_open() where CPTS is
actually started.

Fixes: 84ea9c0a95d7 ("net: ethernet: ti: cpsw: enable cpts irq")
Reported-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Link: https://lore.kernel.org/r/20201112111546.20343-1-grygorii.strashko@ti.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ti/cpsw.c     | 10 ++++++----
 drivers/net/ethernet/ti/cpsw_new.c |  9 ++++++---
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 9fd1f77190ad..fa2d1025cbb2 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -838,9 +838,12 @@ static int cpsw_ndo_open(struct net_device *ndev)
 		if (ret < 0)
 			goto err_cleanup;
 
-		if (cpts_register(cpsw->cpts))
-			dev_err(priv->dev, "error registering cpts device\n");
-
+		if (cpsw->cpts) {
+			if (cpts_register(cpsw->cpts))
+				dev_err(priv->dev, "error registering cpts device\n");
+			else
+				writel(0x10, &cpsw->wr_regs->misc_en);
+		}
 	}
 
 	cpsw_restore(priv);
@@ -1716,7 +1719,6 @@ static int cpsw_probe(struct platform_device *pdev)
 
 	/* Enable misc CPTS evnt_pend IRQ */
 	cpts_set_irqpoll(cpsw->cpts, false);
-	writel(0x10, &cpsw->wr_regs->misc_en);
 
 skip_cpts:
 	cpsw_notice(priv, probe,
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index f779d2e1b5c5..2f5e0ad23ad7 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -873,8 +873,12 @@ static int cpsw_ndo_open(struct net_device *ndev)
 		if (ret < 0)
 			goto err_cleanup;
 
-		if (cpts_register(cpsw->cpts))
-			dev_err(priv->dev, "error registering cpts device\n");
+		if (cpsw->cpts) {
+			if (cpts_register(cpsw->cpts))
+				dev_err(priv->dev, "error registering cpts device\n");
+			else
+				writel(0x10, &cpsw->wr_regs->misc_en);
+		}
 
 		napi_enable(&cpsw->napi_rx);
 		napi_enable(&cpsw->napi_tx);
@@ -2006,7 +2010,6 @@ static int cpsw_probe(struct platform_device *pdev)
 
 	/* Enable misc CPTS evnt_pend IRQ */
 	cpts_set_irqpoll(cpsw->cpts, false);
-	writel(0x10, &cpsw->wr_regs->misc_en);
 
 skip_cpts:
 	ret = cpsw_register_notifiers(cpsw);

From 8cf8821e15cd553339a5b48ee555a0439c2b2742 Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@akamai.com>
Date: Thu, 12 Nov 2020 20:58:15 -0500
Subject: [PATCH 017/115] net: Exempt multicast addresses from five-second
 neighbor lifetime

Commit 58956317c8de ("neighbor: Improve garbage collection")
guarantees neighbour table entries a five-second lifetime.  Processes
which make heavy use of multicast can fill the neighour table with
multicast addresses in five seconds.  At that point, neighbour entries
can't be GC-ed because they aren't five seconds old yet, the kernel
log starts to fill up with "neighbor table overflow!" messages, and
sends start to fail.

This patch allows multicast addresses to be thrown out before they've
lived out their five seconds.  This makes room for non-multicast
addresses and makes messages to all addresses more reliable in these
circumstances.

Fixes: 58956317c8de ("neighbor: Improve garbage collection")
Signed-off-by: Jeff Dike <jdike@akamai.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20201113015815.31397-1-jdike@akamai.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/neighbour.h | 1 +
 net/core/neighbour.c    | 2 ++
 net/ipv4/arp.c          | 6 ++++++
 net/ipv6/ndisc.c        | 7 +++++++
 4 files changed, 16 insertions(+)

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 81ee17594c32..22ced1381ede 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -204,6 +204,7 @@ struct neigh_table {
 	int			(*pconstructor)(struct pneigh_entry *);
 	void			(*pdestructor)(struct pneigh_entry *);
 	void			(*proxy_redo)(struct sk_buff *skb);
+	int			(*is_multicast)(const void *pkey);
 	bool			(*allow_add)(const struct net_device *dev,
 					     struct netlink_ext_ack *extack);
 	char			*id;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8e39e28b0a8d..9500d28a43b0 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -235,6 +235,8 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 
 			write_lock(&n->lock);
 			if ((n->nud_state == NUD_FAILED) ||
+			    (tbl->is_multicast &&
+			     tbl->is_multicast(n->primary_key)) ||
 			    time_after(tref, n->updated))
 				remove = true;
 			write_unlock(&n->lock);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 687971d83b4e..922dd73e5740 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -125,6 +125,7 @@ static int arp_constructor(struct neighbour *neigh);
 static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
 static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
 static void parp_redo(struct sk_buff *skb);
+static int arp_is_multicast(const void *pkey);
 
 static const struct neigh_ops arp_generic_ops = {
 	.family =		AF_INET,
@@ -156,6 +157,7 @@ struct neigh_table arp_tbl = {
 	.key_eq		= arp_key_eq,
 	.constructor	= arp_constructor,
 	.proxy_redo	= parp_redo,
+	.is_multicast	= arp_is_multicast,
 	.id		= "arp_cache",
 	.parms		= {
 		.tbl			= &arp_tbl,
@@ -928,6 +930,10 @@ static void parp_redo(struct sk_buff *skb)
 	arp_process(dev_net(skb->dev), NULL, skb);
 }
 
+static int arp_is_multicast(const void *pkey)
+{
+	return ipv4_is_multicast(*((__be32 *)pkey));
+}
 
 /*
  *	Receive an arp request from the device layer.
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 27f29b957ee7..76717478f173 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -81,6 +81,7 @@ static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
 static int pndisc_constructor(struct pneigh_entry *n);
 static void pndisc_destructor(struct pneigh_entry *n);
 static void pndisc_redo(struct sk_buff *skb);
+static int ndisc_is_multicast(const void *pkey);
 
 static const struct neigh_ops ndisc_generic_ops = {
 	.family =		AF_INET6,
@@ -115,6 +116,7 @@ struct neigh_table nd_tbl = {
 	.pconstructor =	pndisc_constructor,
 	.pdestructor =	pndisc_destructor,
 	.proxy_redo =	pndisc_redo,
+	.is_multicast =	ndisc_is_multicast,
 	.allow_add  =   ndisc_allow_add,
 	.id =		"ndisc_cache",
 	.parms = {
@@ -1706,6 +1708,11 @@ static void pndisc_redo(struct sk_buff *skb)
 	kfree_skb(skb);
 }
 
+static int ndisc_is_multicast(const void *pkey)
+{
+	return ipv6_addr_is_multicast((struct in6_addr *)pkey);
+}
+
 static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
 {
 	struct inet6_dev *idev = __in6_dev_get(skb->dev);

From aa6306a8481e0223f3783d24045daea80897238e Mon Sep 17 00:00:00 2001
From: Steen Hegelund <steen.hegelund@microchip.com>
Date: Fri, 13 Nov 2020 10:11:16 +0100
Subject: [PATCH 018/115] net: phy: mscc: remove non-MACSec compatible phy

Selecting VSC8575 as a MACSec PHY was not correct

The relevant datasheet can be found here:
  - VSC8575: https://www.microchip.com/wwwproducts/en/VSC8575

History:
v1 -> v2:
   - Corrected the sha in the "Fixes:" tag

Fixes: 1bbe0ecc2a1a ("net: phy: mscc: macsec initialization")
Signed-off-by: Steen Hegelund <steen.hegelund@microchip.com>
Reviewed-by: Antoine Tenart <atenart@kernel.org>
Link: https://lore.kernel.org/r/20201113091116.1102450-1-steen.hegelund@microchip.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/mscc/mscc_macsec.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/phy/mscc/mscc_macsec.c b/drivers/net/phy/mscc/mscc_macsec.c
index 6cf9b798b710..10be266e48e8 100644
--- a/drivers/net/phy/mscc/mscc_macsec.c
+++ b/drivers/net/phy/mscc/mscc_macsec.c
@@ -981,7 +981,6 @@ int vsc8584_macsec_init(struct phy_device *phydev)
 
 	switch (phydev->phy_id & phydev->drv->phy_id_mask) {
 	case PHY_ID_VSC856X:
-	case PHY_ID_VSC8575:
 	case PHY_ID_VSC8582:
 	case PHY_ID_VSC8584:
 		INIT_LIST_HEAD(&vsc8531->macsec_flows);

From 8c07205aea36ccebe9fc5f97287a8bc416cea197 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Fri, 13 Nov 2020 19:32:36 +0800
Subject: [PATCH 019/115] net: marvell: prestera: fix error return code in
 prestera_pci_probe()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 4c2703dfd7fa ("net: marvell: prestera: Add PCI interface support")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Reviewed-by: Vadym Kochan <vadym.kochan@plvision.eu>
Acked-by: Vadym Kochan <vadym.kochan@plvision.eu>
Link: https://lore.kernel.org/r/20201113113236.71678-1-wanghai38@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/marvell/prestera/prestera_pci.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
index 1b97adae542e..be5677623455 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
@@ -676,7 +676,8 @@ static int prestera_pci_probe(struct pci_dev *pdev,
 	if (err)
 		return err;
 
-	if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(30))) {
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(30));
+	if (err) {
 		dev_err(&pdev->dev, "fail to set DMA mask\n");
 		goto err_dma_mask;
 	}
@@ -702,8 +703,10 @@ static int prestera_pci_probe(struct pci_dev *pdev,
 	dev_info(fw->dev.dev, "Prestera FW is ready\n");
 
 	fw->wq = alloc_workqueue("prestera_fw_wq", WQ_HIGHPRI, 1);
-	if (!fw->wq)
+	if (!fw->wq) {
+		err = -ENOMEM;
 		goto err_wq_alloc;
+	}
 
 	INIT_WORK(&fw->evt_work, prestera_fw_evt_work_fn);
 

From 81e329e93b860b31c216b40eb5e1373db0ffe0ba Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Thu, 12 Nov 2020 18:45:41 +0200
Subject: [PATCH 020/115] net: ethernet: ti: am65-cpts: update ret when
 ptp_clock is ERROR

We always have to update the value of ret, otherwise the
 error value may be the previous one.

Fixes: f6bd59526ca5 ("net: ethernet: ti: introduce am654 common platform time sync driver")
Signed-off-by: Wang Qing <wangqing@vivo.com>
[grygorii.strashko@ti.com: fix build warn, subj add fixes tag]
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Link: https://lore.kernel.org/r/20201112164541.3223-1-grygorii.strashko@ti.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ti/am65-cpts.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c
index 75056c14b161..5dc60ecabe56 100644
--- a/drivers/net/ethernet/ti/am65-cpts.c
+++ b/drivers/net/ethernet/ti/am65-cpts.c
@@ -1001,8 +1001,7 @@ struct am65_cpts *am65_cpts_create(struct device *dev, void __iomem *regs,
 	if (IS_ERR_OR_NULL(cpts->ptp_clock)) {
 		dev_err(dev, "Failed to register ptp clk %ld\n",
 			PTR_ERR(cpts->ptp_clock));
-		if (!cpts->ptp_clock)
-			ret = -ENODEV;
+		ret = cpts->ptp_clock ? PTR_ERR(cpts->ptp_clock) : -ENODEV;
 		goto refclk_disable;
 	}
 	cpts->phc_index = ptp_clock_index(cpts->ptp_clock);

From 9c2e14b48119b39446031d29d994044ae958d8fc Mon Sep 17 00:00:00 2001
From: Yi-Hung Wei <yihung.wei@gmail.com>
Date: Tue, 10 Nov 2020 16:16:40 -0800
Subject: [PATCH 021/115] ip_tunnels: Set tunnel option flag when tunnel
 metadata is present

Currently, we may set the tunnel option flag when the size of metadata
is zero.  For example, we set TUNNEL_GENEVE_OPT in the receive function
no matter the geneve option is present or not.  As this may result in
issues on the tunnel flags consumers, this patch fixes the issue.

Related discussion:
* https://lore.kernel.org/netdev/1604448694-19351-1-git-send-email-yihung.wei@gmail.com/T/#u

Fixes: 256c87c17c53 ("net: check tunnel option type in tunnel flags")
Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Link: https://lore.kernel.org/r/1605053800-74072-1-git-send-email-yihung.wei@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/geneve.c     | 3 +--
 include/net/ip_tunnels.h | 7 ++++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index d07008a818df..1426bfc009bc 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -224,8 +224,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
 	if (ip_tunnel_collect_metadata() || gs->collect_md) {
 		__be16 flags;
 
-		flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
-			(gnvh->oam ? TUNNEL_OAM : 0) |
+		flags = TUNNEL_KEY | (gnvh->oam ? TUNNEL_OAM : 0) |
 			(gnvh->critical ? TUNNEL_CRIT_OPT : 0);
 
 		tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags,
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 02ccd32542d0..61620677b034 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -478,9 +478,11 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
 					   const void *from, int len,
 					   __be16 flags)
 {
-	memcpy(ip_tunnel_info_opts(info), from, len);
 	info->options_len = len;
-	info->key.tun_flags |= flags;
+	if (len > 0) {
+		memcpy(ip_tunnel_info_opts(info), from, len);
+		info->key.tun_flags |= flags;
+	}
 }
 
 static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
@@ -526,7 +528,6 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
 					   __be16 flags)
 {
 	info->options_len = 0;
-	info->key.tun_flags |= flags;
 }
 
 #endif /* CONFIG_INET */

From ceb736e1d45c253f5e86b185ca9b497cdd43063f Mon Sep 17 00:00:00 2001
From: Zhang Qilong <zhangqilong3@huawei.com>
Date: Thu, 12 Nov 2020 16:09:50 +0800
Subject: [PATCH 022/115] ipv6: Fix error path to cancel the meseage

genlmsg_cancel() needs to be called in the error path of
inet6_fill_ifmcaddr and inet6_fill_ifacaddr to cancel
the message.

Fixes: 6ecf4c37eb3e ("ipv6: enable IFA_TARGET_NETNSID for RTM_GETADDR")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
Link: https://lore.kernel.org/r/20201112080950.1476302-1-zhangqilong3@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv6/addrconf.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 01146b66d666..8b6eb384bac7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5022,8 +5022,10 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
 		return -EMSGSIZE;
 
 	if (args->netnsid >= 0 &&
-	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
+	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) {
+		nlmsg_cancel(skb, nlh);
 		return -EMSGSIZE;
+	}
 
 	put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
 	if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 ||
@@ -5054,8 +5056,10 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
 		return -EMSGSIZE;
 
 	if (args->netnsid >= 0 &&
-	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
+	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) {
+		nlmsg_cancel(skb, nlh);
 		return -EMSGSIZE;
+	}
 
 	put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex);
 	if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 ||

From f782e2c300a717233b64697affda3ea7aac00b2b Mon Sep 17 00:00:00 2001
From: Dmitrii Banshchikov <me@ubique.spb.ru>
Date: Fri, 13 Nov 2020 17:17:56 +0000
Subject: [PATCH 023/115] bpf: Relax return code check for subprograms

Currently verifier enforces return code checks for subprograms in the
same manner as it does for program entry points. This prevents returning
arbitrary scalar values from subprograms. Scalar type of returned values
is checked by btf_prepare_func_args() and hence it should be safe to
allow only scalars for now. Relax return code checks for subprograms and
allow any correct scalar values.

Fixes: 51c39bb1d5d10 (bpf: Introduce function-by-function verification)
Signed-off-by: Dmitrii Banshchikov <me@ubique.spb.ru>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201113171756.90594-1-me@ubique.spb.ru
---
 kernel/bpf/verifier.c                         | 15 +++++++++++++--
 .../bpf/prog_tests/test_global_funcs.c        |  1 +
 .../selftests/bpf/progs/test_global_func8.c   | 19 +++++++++++++++++++
 3 files changed, 33 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_global_func8.c

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6204ec705d80..1388bf733071 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7786,9 +7786,11 @@ static int check_return_code(struct bpf_verifier_env *env)
 	struct tnum range = tnum_range(0, 1);
 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
 	int err;
+	const bool is_subprog = env->cur_state->frame[0]->subprogno;
 
 	/* LSM and struct_ops func-ptr's return type could be "void" */
-	if ((prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
+	if (!is_subprog &&
+	    (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
 	     prog_type == BPF_PROG_TYPE_LSM) &&
 	    !prog->aux->attach_func_proto->type)
 		return 0;
@@ -7808,6 +7810,16 @@ static int check_return_code(struct bpf_verifier_env *env)
 		return -EACCES;
 	}
 
+	reg = cur_regs(env) + BPF_REG_0;
+	if (is_subprog) {
+		if (reg->type != SCALAR_VALUE) {
+			verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
+				reg_type_str[reg->type]);
+			return -EINVAL;
+		}
+		return 0;
+	}
+
 	switch (prog_type) {
 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
 		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
@@ -7861,7 +7873,6 @@ static int check_return_code(struct bpf_verifier_env *env)
 		return 0;
 	}
 
-	reg = cur_regs(env) + BPF_REG_0;
 	if (reg->type != SCALAR_VALUE) {
 		verbose(env, "At program exit the register R0 is not a known value (%s)\n",
 			reg_type_str[reg->type]);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
index 193002b14d7f..32e4348b714b 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -60,6 +60,7 @@ void test_test_global_funcs(void)
 		{ "test_global_func5.o" , "expected pointer to ctx, but got PTR" },
 		{ "test_global_func6.o" , "modified ctx ptr R2" },
 		{ "test_global_func7.o" , "foo() doesn't return scalar" },
+		{ "test_global_func8.o" },
 	};
 	libbpf_print_fn_t old_print_fn = NULL;
 	int err, i, duration = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func8.c b/tools/testing/selftests/bpf/progs/test_global_func8.c
new file mode 100644
index 000000000000..d55a6544b1ab
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func8.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__noinline int foo(struct __sk_buff *skb)
+{
+	return bpf_get_prandom_u32();
+}
+
+SEC("cgroup_skb/ingress")
+int test_cls(struct __sk_buff *skb)
+{
+	if (!foo(skb))
+		return 0;
+
+	return 1;
+}

From 37344718bd7032639a02053e06b51697f90154ce Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong@huawei.com>
Date: Thu, 12 Nov 2020 19:23:59 +0800
Subject: [PATCH 024/115] net: phy: smsc: add missed clk_disable_unprepare in
 smsc_phy_probe()

Add the missing clk_disable_unprepare() before return from
smsc_phy_probe() in the error handling case.

Fixes: bedd8d78aba3 ("net: phy: smsc: LAN8710/20: add phy refclk in support")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
Link: https://lore.kernel.org/r/1605180239-1792-1-git-send-email-zhangchangzhong@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/smsc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index ec97669be5c2..0fc39ac5ca88 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -291,8 +291,10 @@ static int smsc_phy_probe(struct phy_device *phydev)
 		return ret;
 
 	ret = clk_set_rate(priv->refclk, 50 * 1000 * 1000);
-	if (ret)
+	if (ret) {
+		clk_disable_unprepare(priv->refclk);
 		return ret;
+	}
 
 	return 0;
 }

From 92307069a96c07d9b6e74b96b79390e7cd7d2111 Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Thu, 12 Nov 2020 12:43:35 +0100
Subject: [PATCH 025/115] net: dsa: mv88e6xxx: Avoid VTU corruption on 6097

As soon as you add the second port to a VLAN, all other port
membership configuration is overwritten with zeroes. The HW interprets
this as all ports being "unmodified members" of the VLAN.

In the simple case when all ports belong to the same VLAN, switching
will still work. But using multiple VLANs or trying to set multiple
ports as tagged members will not work.

On the 6352, doing a VTU GetNext op, followed by an STU GetNext op
will leave you with both the member- and state- data in the VTU/STU
data registers. But on the 6097 (which uses the same implementation),
the STU GetNext will override the information gathered from the VTU
GetNext.

Separate the two stages, parsing the result of the VTU GetNext before
doing the STU GetNext.

We opt to update the existing implementation for all applicable chips,
as opposed to creating a separate callback for 6097, because although
the previous implementation did work for (at least) 6352, the
datasheet does not mention the masking behavior.

Fixes: ef6fcea37f01 ("net: dsa: mv88e6xxx: get STU entry on VTU GetNext")
Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Link: https://lore.kernel.org/r/20201112114335.27371-1-tobias@waldekranz.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/mv88e6xxx/global1_vtu.c | 61 ++++++++++++++++++++-----
 1 file changed, 50 insertions(+), 11 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c
index 48390b7b18ad..1048509a849b 100644
--- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c
+++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c
@@ -125,11 +125,9 @@ static int mv88e6xxx_g1_vtu_vid_write(struct mv88e6xxx_chip *chip,
  * Offset 0x08: VTU/STU Data Register 2
  * Offset 0x09: VTU/STU Data Register 3
  */
-
-static int mv88e6185_g1_vtu_data_read(struct mv88e6xxx_chip *chip,
-				      struct mv88e6xxx_vtu_entry *entry)
+static int mv88e6185_g1_vtu_stu_data_read(struct mv88e6xxx_chip *chip,
+					  u16 *regs)
 {
-	u16 regs[3];
 	int i;
 
 	/* Read all 3 VTU/STU Data registers */
@@ -142,12 +140,45 @@ static int mv88e6185_g1_vtu_data_read(struct mv88e6xxx_chip *chip,
 			return err;
 	}
 
-	/* Extract MemberTag and PortState data */
+	return 0;
+}
+
+static int mv88e6185_g1_vtu_data_read(struct mv88e6xxx_chip *chip,
+				      struct mv88e6xxx_vtu_entry *entry)
+{
+	u16 regs[3];
+	int err;
+	int i;
+
+	err = mv88e6185_g1_vtu_stu_data_read(chip, regs);
+	if (err)
+		return err;
+
+	/* Extract MemberTag data */
 	for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
 		unsigned int member_offset = (i % 4) * 4;
-		unsigned int state_offset = member_offset + 2;
 
 		entry->member[i] = (regs[i / 4] >> member_offset) & 0x3;
+	}
+
+	return 0;
+}
+
+static int mv88e6185_g1_stu_data_read(struct mv88e6xxx_chip *chip,
+				      struct mv88e6xxx_vtu_entry *entry)
+{
+	u16 regs[3];
+	int err;
+	int i;
+
+	err = mv88e6185_g1_vtu_stu_data_read(chip, regs);
+	if (err)
+		return err;
+
+	/* Extract PortState data */
+	for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
+		unsigned int state_offset = (i % 4) * 4 + 2;
+
 		entry->state[i] = (regs[i / 4] >> state_offset) & 0x3;
 	}
 
@@ -349,6 +380,10 @@ int mv88e6185_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
 		if (err)
 			return err;
 
+		err = mv88e6185_g1_stu_data_read(chip, entry);
+		if (err)
+			return err;
+
 		/* VTU DBNum[3:0] are located in VTU Operation 3:0
 		 * VTU DBNum[7:4] are located in VTU Operation 11:8
 		 */
@@ -374,11 +409,6 @@ int mv88e6352_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
 		return err;
 
 	if (entry->valid) {
-		/* Fetch (and mask) VLAN PortState data from the STU */
-		err = mv88e6xxx_g1_vtu_stu_get(chip, entry);
-		if (err)
-			return err;
-
 		err = mv88e6185_g1_vtu_data_read(chip, entry);
 		if (err)
 			return err;
@@ -386,6 +416,15 @@ int mv88e6352_g1_vtu_getnext(struct mv88e6xxx_chip *chip,
 		err = mv88e6xxx_g1_vtu_fid_read(chip, entry);
 		if (err)
 			return err;
+
+		/* Fetch VLAN PortState data from the STU */
+		err = mv88e6xxx_g1_vtu_stu_get(chip, entry);
+		if (err)
+			return err;
+
+		err = mv88e6185_g1_stu_data_read(chip, entry);
+		if (err)
+			return err;
 	}
 
 	return 0;

From 65b422d9b61ba12c08150784e8012fa1892ad03e Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Thu, 12 Nov 2020 14:38:37 +0100
Subject: [PATCH 026/115] vsock: forward all packets to the host when no H2G is
 registered

Before commit c0cfa2d8a788 ("vsock: add multi-transports support"),
if a G2H transport was loaded (e.g. virtio transport), every packets
was forwarded to the host, regardless of the destination CID.
The H2G transports implemented until then (vhost-vsock, VMCI) always
responded with an error, if the destination CID was not
VMADDR_CID_HOST.

From that commit, we are using the remote CID to decide which
transport to use, so packets with remote CID > VMADDR_CID_HOST(2)
are sent only through H2G transport. If no H2G is available, packets
are discarded directly in the guest.

Some use cases (e.g. Nitro Enclaves [1]) rely on the old behaviour
to implement sibling VMs communication, so we restore the old
behavior when no H2G is registered.
It will be up to the host to discard packets if the destination is
not the right one. As it was already implemented before adding
multi-transport support.

Tested with nested QEMU/KVM by me and Nitro Enclaves by Andra.

[1] Documentation/virt/ne_overview.rst

Cc: Jorgen Hansen <jhansen@vmware.com>
Cc: Dexuan Cui <decui@microsoft.com>
Fixes: c0cfa2d8a788 ("vsock: add multi-transports support")
Reported-by: Andra Paraschiv <andraprs@amazon.com>
Tested-by: Andra Paraschiv <andraprs@amazon.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/r/20201112133837.34183-1-sgarzare@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/vmw_vsock/af_vsock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index b4d7b8aba003..d10916ab4526 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -438,7 +438,7 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
 	case SOCK_STREAM:
 		if (vsock_use_local_transport(remote_cid))
 			new_transport = transport_local;
-		else if (remote_cid <= VMADDR_CID_HOST)
+		else if (remote_cid <= VMADDR_CID_HOST || !transport_h2g)
 			new_transport = transport_g2h;
 		else
 			new_transport = transport_h2g;

From 057a10fa1f73d745c8e69aa54ab147715f5630ae Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 14 Nov 2020 13:22:53 +0800
Subject: [PATCH 027/115] sctp: change to hold/put transport for
 proto_unreach_timer

A call trace was found in Hangbin's Codenomicon testing with debug kernel:

  [ 2615.981988] ODEBUG: free active (active state 0) object type: timer_list hint: sctp_generate_proto_unreach_event+0x0/0x3a0 [sctp]
  [ 2615.995050] WARNING: CPU: 17 PID: 0 at lib/debugobjects.c:328 debug_print_object+0x199/0x2b0
  [ 2616.095934] RIP: 0010:debug_print_object+0x199/0x2b0
  [ 2616.191533] Call Trace:
  [ 2616.194265]  <IRQ>
  [ 2616.202068]  debug_check_no_obj_freed+0x25e/0x3f0
  [ 2616.207336]  slab_free_freelist_hook+0xeb/0x140
  [ 2616.220971]  kfree+0xd6/0x2c0
  [ 2616.224293]  rcu_do_batch+0x3bd/0xc70
  [ 2616.243096]  rcu_core+0x8b9/0xd00
  [ 2616.256065]  __do_softirq+0x23d/0xacd
  [ 2616.260166]  irq_exit+0x236/0x2a0
  [ 2616.263879]  smp_apic_timer_interrupt+0x18d/0x620
  [ 2616.269138]  apic_timer_interrupt+0xf/0x20
  [ 2616.273711]  </IRQ>

This is because it holds asoc when transport->proto_unreach_timer starts
and puts asoc when the timer stops, and without holding transport the
transport could be freed when the timer is still running.

So fix it by holding/putting transport instead for proto_unreach_timer
in transport, just like other timers in transport.

v1->v2:
  - Also use sctp_transport_put() for the "out_unlock:" path in
    sctp_generate_proto_unreach_event(), as Marcelo noticed.

Fixes: 50b5d6ad6382 ("sctp: Fix a race between ICMP protocol unreachable and connect()")
Reported-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Link: https://lore.kernel.org/r/102788809b554958b13b95d33440f5448113b8d6.1605331373.git.lucien.xin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sctp/input.c         | 4 ++--
 net/sctp/sm_sideeffect.c | 4 ++--
 net/sctp/transport.c     | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/sctp/input.c b/net/sctp/input.c
index 55d4fc6f371d..d508f6f3dd08 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -449,7 +449,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
 		else {
 			if (!mod_timer(&t->proto_unreach_timer,
 						jiffies + (HZ/20)))
-				sctp_association_hold(asoc);
+				sctp_transport_hold(t);
 		}
 	} else {
 		struct net *net = sock_net(sk);
@@ -458,7 +458,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
 			 "encountered!\n", __func__);
 
 		if (del_timer(&t->proto_unreach_timer))
-			sctp_association_put(asoc);
+			sctp_transport_put(t);
 
 		sctp_do_sm(net, SCTP_EVENT_T_OTHER,
 			   SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 813d30767204..0948f14ce221 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -419,7 +419,7 @@ void sctp_generate_proto_unreach_event(struct timer_list *t)
 		/* Try again later.  */
 		if (!mod_timer(&transport->proto_unreach_timer,
 				jiffies + (HZ/20)))
-			sctp_association_hold(asoc);
+			sctp_transport_hold(transport);
 		goto out_unlock;
 	}
 
@@ -435,7 +435,7 @@ void sctp_generate_proto_unreach_event(struct timer_list *t)
 
 out_unlock:
 	bh_unlock_sock(sk);
-	sctp_association_put(asoc);
+	sctp_transport_put(transport);
 }
 
  /* Handle the timeout of the RE-CONFIG timer. */
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 806af58f4375..60fcf31cdcfb 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -133,7 +133,7 @@ void sctp_transport_free(struct sctp_transport *transport)
 
 	/* Delete the ICMP proto unreachable timer if it's active. */
 	if (del_timer(&transport->proto_unreach_timer))
-		sctp_association_put(transport->asoc);
+		sctp_transport_put(transport);
 
 	sctp_transport_put(transport);
 }

From 1ba86d4366e023d96df3dbe415eea7f1dc08c303 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Fri, 13 Nov 2020 16:30:40 -0500
Subject: [PATCH 028/115] netlabel: fix an uninitialized warning in
 netlbl_unlabel_staticlist()

Static checking revealed that a previous fix to
netlbl_unlabel_staticlist() leaves a stack variable uninitialized,
this patches fixes that.

Fixes: 866358ec331f ("netlabel: fix our progress tracking in netlbl_unlabel_staticlist()")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
Reviewed-by: James Morris <jamorris@linux.microsoft.com>
Link: https://lore.kernel.org/r/160530304068.15651.18355773009751195447.stgit@sifl
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/netlabel/netlabel_unlabeled.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index fc55c9116da0..ccb491642811 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1167,7 +1167,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 	u32 skip_bkt = cb->args[0];
 	u32 skip_chain = cb->args[1];
 	u32 skip_addr4 = cb->args[2];
-	u32 iter_bkt, iter_chain, iter_addr4 = 0, iter_addr6 = 0;
+	u32 iter_bkt, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0;
 	struct netlbl_unlhsh_iface *iface;
 	struct list_head *iter_list;
 	struct netlbl_af4list *addr4;

From e35df62e04cc6fc4b9d90d054732f138349ff9b1 Mon Sep 17 00:00:00 2001
From: Sven Van Asbroeck <thesven73@gmail.com>
Date: Thu, 12 Nov 2020 13:59:49 -0500
Subject: [PATCH 029/115] lan743x: fix issue causing intermittent kernel log
 warnings

When running this chip on arm imx6, we intermittently observe
the following kernel warning in the log, especially when the
system is under high load:

[   50.119484] ------------[ cut here ]------------
[   50.124377] WARNING: CPU: 0 PID: 303 at kernel/softirq.c:169 __local_bh_enable_ip+0x100/0x184
[   50.132925] IRQs not enabled as expected
[   50.159250] CPU: 0 PID: 303 Comm: rngd Not tainted 5.7.8 #1
[   50.164837] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
[   50.171395] [<c0111a38>] (unwind_backtrace) from [<c010be28>] (show_stack+0x10/0x14)
[   50.179162] [<c010be28>] (show_stack) from [<c05b9dec>] (dump_stack+0xac/0xd8)
[   50.186408] [<c05b9dec>] (dump_stack) from [<c0122e40>] (__warn+0xd0/0x10c)
[   50.193391] [<c0122e40>] (__warn) from [<c0123238>] (warn_slowpath_fmt+0x98/0xc4)
[   50.200892] [<c0123238>] (warn_slowpath_fmt) from [<c012b010>] (__local_bh_enable_ip+0x100/0x184)
[   50.209860] [<c012b010>] (__local_bh_enable_ip) from [<bf09ecbc>] (destroy_conntrack+0x48/0xd8 [nf_conntrack])
[   50.220038] [<bf09ecbc>] (destroy_conntrack [nf_conntrack]) from [<c0ac9b58>] (nf_conntrack_destroy+0x94/0x168)
[   50.230160] [<c0ac9b58>] (nf_conntrack_destroy) from [<c0a4aaa0>] (skb_release_head_state+0xa0/0xd0)
[   50.239314] [<c0a4aaa0>] (skb_release_head_state) from [<c0a4aadc>] (skb_release_all+0xc/0x24)
[   50.247946] [<c0a4aadc>] (skb_release_all) from [<c0a4b4cc>] (consume_skb+0x74/0x17c)
[   50.255796] [<c0a4b4cc>] (consume_skb) from [<c081a2dc>] (lan743x_tx_release_desc+0x120/0x124)
[   50.264428] [<c081a2dc>] (lan743x_tx_release_desc) from [<c081a98c>] (lan743x_tx_napi_poll+0x5c/0x18c)
[   50.273755] [<c081a98c>] (lan743x_tx_napi_poll) from [<c0a6b050>] (net_rx_action+0x118/0x4a4)
[   50.282306] [<c0a6b050>] (net_rx_action) from [<c0101364>] (__do_softirq+0x13c/0x53c)
[   50.290157] [<c0101364>] (__do_softirq) from [<c012b29c>] (irq_exit+0x150/0x17c)
[   50.297575] [<c012b29c>] (irq_exit) from [<c0196a08>] (__handle_domain_irq+0x60/0xb0)
[   50.305423] [<c0196a08>] (__handle_domain_irq) from [<c05d44fc>] (gic_handle_irq+0x4c/0x90)
[   50.313790] [<c05d44fc>] (gic_handle_irq) from [<c0100ed4>] (__irq_usr+0x54/0x80)
[   50.321287] Exception stack(0xecd99fb0 to 0xecd99ff8)
[   50.326355] 9fa0:                                     1cf1aa74 00000001 00000001 00000000
[   50.334547] 9fc0: 00000001 00000000 00000000 00000000 00000000 00000000 00004097 b6d17d14
[   50.342738] 9fe0: 00000001 b6d17c60 00000000 b6e71f94 800b0010 ffffffff
[   50.349364] irq event stamp: 2525027
[   50.352955] hardirqs last  enabled at (2525026): [<c0a6afec>] net_rx_action+0xb4/0x4a4
[   50.360892] hardirqs last disabled at (2525027): [<c0d6d2fc>] _raw_spin_lock_irqsave+0x1c/0x50
[   50.369517] softirqs last  enabled at (2524660): [<c01015b4>] __do_softirq+0x38c/0x53c
[   50.377446] softirqs last disabled at (2524693): [<c012b29c>] irq_exit+0x150/0x17c
[   50.385027] ---[ end trace c0b571db4bc8087d ]---

The driver is calling dev_kfree_skb() from code inside a spinlock,
where h/w interrupts are disabled. This is forbidden, as documented
in include/linux/netdevice.h. The correct function to use
dev_kfree_skb_irq(), or dev_kfree_skb_any().

Fix by using the correct dev_kfree_skb_xxx() functions:

in lan743x_tx_release_desc():
  called by lan743x_tx_release_completed_descriptors()
    called by in lan743x_tx_napi_poll()
    which holds a spinlock
  called by lan743x_tx_release_all_descriptors()
    called by lan743x_tx_close()
    which can-sleep
conclusion: use dev_kfree_skb_any()

in lan743x_tx_xmit_frame():
  which holds a spinlock
conclusion: use dev_kfree_skb_irq()

in lan743x_tx_close():
  which can-sleep
conclusion: use dev_kfree_skb()

in lan743x_rx_release_ring_element():
  called by lan743x_rx_close()
    which can-sleep
  called by lan743x_rx_open()
    which can-sleep
conclusion: use dev_kfree_skb()

Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver")
Signed-off-by: Sven Van Asbroeck <thesven73@gmail.com>
Link: https://lore.kernel.org/r/20201112185949.11315-1-TheSven73@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/microchip/lan743x_main.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index e2c99d909247..18c053b860ab 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -1307,13 +1307,13 @@ static void lan743x_tx_release_desc(struct lan743x_tx *tx,
 		goto clear_active;
 
 	if (!(buffer_info->flags & TX_BUFFER_INFO_FLAG_TIMESTAMP_REQUESTED)) {
-		dev_kfree_skb(buffer_info->skb);
+		dev_kfree_skb_any(buffer_info->skb);
 		goto clear_skb;
 	}
 
 	if (cleanup) {
 		lan743x_ptp_unrequest_tx_timestamp(tx->adapter);
-		dev_kfree_skb(buffer_info->skb);
+		dev_kfree_skb_any(buffer_info->skb);
 	} else {
 		ignore_sync = (buffer_info->flags &
 			       TX_BUFFER_INFO_FLAG_IGNORE_SYNC) != 0;
@@ -1623,7 +1623,7 @@ static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx,
 	if (required_number_of_descriptors >
 		lan743x_tx_get_avail_desc(tx)) {
 		if (required_number_of_descriptors > (tx->ring_size - 1)) {
-			dev_kfree_skb(skb);
+			dev_kfree_skb_irq(skb);
 		} else {
 			/* save to overflow buffer */
 			tx->overflow_skb = skb;
@@ -1656,7 +1656,7 @@ static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx,
 				   start_frame_length,
 				   do_timestamp,
 				   skb->ip_summed == CHECKSUM_PARTIAL)) {
-		dev_kfree_skb(skb);
+		dev_kfree_skb_irq(skb);
 		goto unlock;
 	}
 
@@ -1675,7 +1675,7 @@ static netdev_tx_t lan743x_tx_xmit_frame(struct lan743x_tx *tx,
 			 * frame assembler clean up was performed inside
 			 *	lan743x_tx_frame_add_fragment
 			 */
-			dev_kfree_skb(skb);
+			dev_kfree_skb_irq(skb);
 			goto unlock;
 		}
 	}

From 796a2665ca3e91ebaba7222f76fd9a035714e2d8 Mon Sep 17 00:00:00 2001
From: Sven Van Asbroeck <thesven73@gmail.com>
Date: Thu, 12 Nov 2020 15:47:41 -0500
Subject: [PATCH 030/115] lan743x: prevent entire kernel HANG on open, for some
 platforms

On arm imx6, when opening the chip's netdev, the whole Linux
kernel intermittently hangs/freezes.

This is caused by a bug in the driver code which tests if pcie
interrupts are working correctly, using the software interrupt:

1. open: enable the software interrupt
2. open: tell the chip to assert the software interrupt
3. open: wait for flag
4. ISR: acknowledge s/w interrupt, set flag
5. open: notice flag, disable the s/w interrupt, continue

Unfortunately the ISR only acknowledges the s/w interrupt, but
does not disable it. This will re-trigger the ISR in a tight
loop.

On some (lucky) platforms, open proceeds to disable the s/w
interrupt even while the ISR is 'spinning'. On arm imx6,
the spinning ISR does not allow open to proceed, resulting
in a hung Linux kernel.

Fix minimally by disabling the s/w interrupt in the ISR, which
will prevent it from spinning. This won't break anything because
the s/w interrupt is used as a one-shot interrupt.

Note that this is a minimal fix, overlooking many possible
cleanups, e.g.:
- lan743x_intr_software_isr() is completely redundant and reads
  INT_STS twice for no apparent reason
- disabling the s/w interrupt in lan743x_intr_test_isr() is now
  redundant, but harmless
- waiting on software_isr_flag can be converted from a sleeping
  poll loop to wait_event_timeout()

Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver")
Tested-by: Sven Van Asbroeck <thesven73@gmail.com> # arm imx6 lan7430
Signed-off-by: Sven Van Asbroeck <thesven73@gmail.com>
Link: https://lore.kernel.org/r/20201112204741.12375-1-TheSven73@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/microchip/lan743x_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index 18c053b860ab..b319c22c211c 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -148,7 +148,8 @@ static void lan743x_intr_software_isr(void *context)
 
 	int_sts = lan743x_csr_read(adapter, INT_STS);
 	if (int_sts & INT_BIT_SW_GP_) {
-		lan743x_csr_write(adapter, INT_STS, INT_BIT_SW_GP_);
+		/* disable the interrupt to prevent repeated re-triggering */
+		lan743x_csr_write(adapter, INT_EN_CLR, INT_BIT_SW_GP_);
 		intr->software_isr_flag = 1;
 	}
 }

From 56311a315da7ebc668dbcc2f1c99689cc10796c4 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Date: Fri, 13 Nov 2020 09:09:02 +0800
Subject: [PATCH 031/115] net: stmmac: dwmac_lib: enlarge dma reset timeout

If the phy enables power saving technology, the dwmac's software reset
needs more time to complete, enlarge dma reset timeout to 200000us.

Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Link: https://lore.kernel.org/r/20201113090902.5c7aab1a@xhacker.debian
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index cb87d31a99df..57a53a600aa5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -23,7 +23,7 @@ int dwmac_dma_reset(void __iomem *ioaddr)
 
 	return readl_poll_timeout(ioaddr + DMA_BUS_MODE, value,
 				 !(value & DMA_BUS_MODE_SFT_RESET),
-				 10000, 100000);
+				 10000, 200000);
 }
 
 /* CSR1 enables the transmit DMA to check for new descriptor */

From 849920c703392957f94023f77ec89ca6cf119d43 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Fri, 13 Nov 2020 19:16:22 +0800
Subject: [PATCH 032/115] devlink: Add missing genlmsg_cancel() in
 devlink_nl_sb_port_pool_fill()

If sb_occ_port_pool_get() failed in devlink_nl_sb_port_pool_fill(),
msg should be canceled by genlmsg_cancel().

Fixes: df38dafd2559 ("devlink: implement shared buffer occupancy monitoring interface")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Link: https://lore.kernel.org/r/20201113111622.11040-1-wanghai38@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/devlink.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index ab4b1368904f..4b0211590aac 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1448,7 +1448,7 @@ static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
 		err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index,
 						pool_index, &cur, &max);
 		if (err && err != -EOPNOTSUPP)
-			return err;
+			goto sb_occ_get_failure;
 		if (!err) {
 			if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))
 				goto nla_put_failure;
@@ -1461,8 +1461,10 @@ static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
 	return 0;
 
 nla_put_failure:
+	err = -EMSGSIZE;
+sb_occ_get_failure:
 	genlmsg_cancel(msg, hdr);
-	return -EMSGSIZE;
+	return err;
 }
 
 static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb,

From c8c958a58fc67f353289986850a0edf553435702 Mon Sep 17 00:00:00 2001
From: Anant Thazhemadam <anant.thazhemadam@gmail.com>
Date: Wed, 4 Nov 2020 03:09:05 +0530
Subject: [PATCH 033/115] can: af_can: prevent potential access of
 uninitialized member in can_rcv()

In can_rcv(), cfd->len is uninitialized when skb->len = 0, and this
uninitialized cfd->len is accessed nonetheless by pr_warn_once().

Fix this uninitialized variable access by checking cfd->len's validity
condition (cfd->len > CAN_MAX_DLEN) separately after the skb->len's
condition is checked, and appropriately modify the log messages that
are generated as well.
In case either of the required conditions fail, the skb is freed and
NET_RX_DROP is returned, same as before.

Fixes: 8cb68751c115 ("can: af_can: can_rcv(): replace WARN_ONCE by pr_warn_once")
Reported-by: syzbot+9bcb0c9409066696d3aa@syzkaller.appspotmail.com
Tested-by: Anant Thazhemadam <anant.thazhemadam@gmail.com>
Signed-off-by: Anant Thazhemadam <anant.thazhemadam@gmail.com>
Link: https://lore.kernel.org/r/20201103213906.24219-2-anant.thazhemadam@gmail.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/af_can.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/net/can/af_can.c b/net/can/af_can.c
index 6373ab9c5507..e8d4e3ef5322 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -677,16 +677,25 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
 {
 	struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
 
-	if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU ||
-		     cfd->len > CAN_MAX_DLEN)) {
-		pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n",
+	if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU)) {
+		pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n",
+			     dev->type, skb->len);
+		goto free_skb;
+	}
+
+	/* This check is made separately since cfd->len would be uninitialized if skb->len = 0. */
+	if (unlikely(cfd->len > CAN_MAX_DLEN)) {
+		pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d, datalen %d\n",
 			     dev->type, skb->len, cfd->len);
-		kfree_skb(skb);
-		return NET_RX_DROP;
+		goto free_skb;
 	}
 
 	can_receive(skb, dev);
 	return NET_RX_SUCCESS;
+
+free_skb:
+	kfree_skb(skb);
+	return NET_RX_DROP;
 }
 
 static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,

From 9aa9379d8f868e91719333a7f063ccccc0579acc Mon Sep 17 00:00:00 2001
From: Anant Thazhemadam <anant.thazhemadam@gmail.com>
Date: Wed, 4 Nov 2020 03:09:06 +0530
Subject: [PATCH 034/115] can: af_can: prevent potential access of
 uninitialized member in canfd_rcv()

In canfd_rcv(), cfd->len is uninitialized when skb->len = 0, and this
uninitialized cfd->len is accessed nonetheless by pr_warn_once().

Fix this uninitialized variable access by checking cfd->len's validity
condition (cfd->len > CANFD_MAX_DLEN) separately after the skb->len's
condition is checked, and appropriately modify the log messages that
are generated as well.
In case either of the required conditions fail, the skb is freed and
NET_RX_DROP is returned, same as before.

Fixes: d4689846881d ("can: af_can: canfd_rcv(): replace WARN_ONCE by pr_warn_once")
Reported-by: syzbot+9bcb0c9409066696d3aa@syzkaller.appspotmail.com
Tested-by: Anant Thazhemadam <anant.thazhemadam@gmail.com>
Signed-off-by: Anant Thazhemadam <anant.thazhemadam@gmail.com>
Link: https://lore.kernel.org/r/20201103213906.24219-3-anant.thazhemadam@gmail.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/af_can.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/net/can/af_can.c b/net/can/af_can.c
index e8d4e3ef5322..5d124c155904 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -703,16 +703,25 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
 {
 	struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
 
-	if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU ||
-		     cfd->len > CANFD_MAX_DLEN)) {
-		pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n",
+	if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU)) {
+		pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n",
+			     dev->type, skb->len);
+		goto free_skb;
+	}
+
+	/* This check is made separately since cfd->len would be uninitialized if skb->len = 0. */
+	if (unlikely(cfd->len > CANFD_MAX_DLEN)) {
+		pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d, datalen %d\n",
 			     dev->type, skb->len, cfd->len);
-		kfree_skb(skb);
-		return NET_RX_DROP;
+		goto free_skb;
 	}
 
 	can_receive(skb, dev);
 	return NET_RX_SUCCESS;
+
+free_skb:
+	kfree_skb(skb);
+	return NET_RX_DROP;
 }
 
 /* af_can protocol functions */

From a1e654070a60d5d4f7cce59c38f4ca790bb79121 Mon Sep 17 00:00:00 2001
From: Alejandro Concepcion Rodriguez <alejandro@acoro.eu>
Date: Thu, 5 Nov 2020 21:51:47 +0000
Subject: [PATCH 035/115] can: dev: can_restart(): post buffer from the right
 context

netif_rx() is meant to be called from interrupt contexts. can_restart() may be
called by can_restart_work(), which is called from a worqueue, so it may run in
process context. Use netif_rx_ni() instead.

Fixes: 39549eef3587 ("can: CAN Network device driver and Netlink interface")
Co-developed-by: Loris Fauster <loris.fauster@ttcontrol.com>
Signed-off-by: Loris Fauster <loris.fauster@ttcontrol.com>
Signed-off-by: Alejandro Concepcion Rodriguez <alejandro@acoro.eu>
Link: https://lore.kernel.org/r/4e84162b-fb31-3a73-fa9a-9438b4bd5234@acoro.eu
[mkl: use netif_rx_ni() instead of netif_rx_any_context()]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 6dee4f8f2024..81e39d7507d8 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -592,7 +592,7 @@ static void can_restart(struct net_device *dev)
 
 	cf->can_id |= CAN_ERR_RESTARTED;
 
-	netif_rx(skb);
+	netif_rx_ni(skb);
 
 	stats->rx_packets++;
 	stats->rx_bytes += cf->can_dlc;

From 7968c7c79d3be8987feb8021f0c46e6866831408 Mon Sep 17 00:00:00 2001
From: Zhang Qilong <zhangqilong3@huawei.com>
Date: Sat, 14 Nov 2020 19:17:08 +0800
Subject: [PATCH 036/115] can: ti_hecc: Fix memleak in ti_hecc_probe

In the error handling, we should goto the probe_exit_candev
to free ndev to prevent memory leak.

Fixes: dabf54dd1c63 ("can: ti_hecc: Convert TI HECC driver to DT only driver")
Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
Link: https://lore.kernel.org/r/20201114111708.3465543-1-zhangqilong3@huawei.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/ti_hecc.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index 9913f5458279..2c22f40e12bd 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -881,7 +881,8 @@ static int ti_hecc_probe(struct platform_device *pdev)
 	priv->base = devm_platform_ioremap_resource_byname(pdev, "hecc");
 	if (IS_ERR(priv->base)) {
 		dev_err(&pdev->dev, "hecc ioremap failed\n");
-		return PTR_ERR(priv->base);
+		err = PTR_ERR(priv->base);
+		goto probe_exit_candev;
 	}
 
 	/* handle hecc-ram memory */
@@ -889,20 +890,22 @@ static int ti_hecc_probe(struct platform_device *pdev)
 							       "hecc-ram");
 	if (IS_ERR(priv->hecc_ram)) {
 		dev_err(&pdev->dev, "hecc-ram ioremap failed\n");
-		return PTR_ERR(priv->hecc_ram);
+		err = PTR_ERR(priv->hecc_ram);
+		goto probe_exit_candev;
 	}
 
 	/* handle mbx memory */
 	priv->mbx = devm_platform_ioremap_resource_byname(pdev, "mbx");
 	if (IS_ERR(priv->mbx)) {
 		dev_err(&pdev->dev, "mbx ioremap failed\n");
-		return PTR_ERR(priv->mbx);
+		err = PTR_ERR(priv->mbx);
+		goto probe_exit_candev;
 	}
 
 	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 	if (!irq) {
 		dev_err(&pdev->dev, "No irq resource\n");
-		goto probe_exit;
+		goto probe_exit_candev;
 	}
 
 	priv->ndev = ndev;
@@ -966,7 +969,7 @@ static int ti_hecc_probe(struct platform_device *pdev)
 	clk_put(priv->clk);
 probe_exit_candev:
 	free_candev(ndev);
-probe_exit:
+
 	return err;
 }
 

From 81c9c8e0adef3285336b942f93287c554c89e6c6 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 28 Aug 2019 21:16:55 +0200
Subject: [PATCH 037/115] can: mcba_usb: mcba_usb_start_xmit(): first fill skb,
 then pass to can_put_echo_skb()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver has to first fill the skb with data and then handle it to
can_put_echo_skb(). This patch moves the can_put_echo_skb() down, right before
sending the skb out via USB.

Fixes: 51f3baad7de9 ("can: mcba_usb: Add support for Microchip CAN BUS Analyzer")
Cc: Remigiusz Kołłątaj <remigiusz.kollataj@mobica.com>
Link: https://lore.kernel.org/r/20201111221204.1639007-1-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/mcba_usb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c
index 5857b37dcd96..e97f2e0da6b0 100644
--- a/drivers/net/can/usb/mcba_usb.c
+++ b/drivers/net/can/usb/mcba_usb.c
@@ -326,8 +326,6 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb,
 	if (!ctx)
 		return NETDEV_TX_BUSY;
 
-	can_put_echo_skb(skb, priv->netdev, ctx->ndx);
-
 	if (cf->can_id & CAN_EFF_FLAG) {
 		/* SIDH    | SIDL                 | EIDH   | EIDL
 		 * 28 - 21 | 20 19 18 x x x 17 16 | 15 - 8 | 7 - 0
@@ -357,6 +355,8 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb,
 	if (cf->can_id & CAN_RTR_FLAG)
 		usb_msg.dlc |= MCBA_DLC_RTR_MASK;
 
+	can_put_echo_skb(skb, priv->netdev, ctx->ndx);
+
 	err = mcba_usb_xmit(priv, (struct mcba_usb_msg *)&usb_msg, ctx);
 	if (err)
 		goto xmit_failed;

From 8a68cc0d690c9e5730d676b764c6f059343b842c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 5 Nov 2020 11:24:27 +0000
Subject: [PATCH 038/115] can: peak_usb: fix potential integer overflow on
 shift of a int

The left shift of int 32 bit integer constant 1 is evaluated using 32 bit
arithmetic and then assigned to a signed 64 bit variable. In the case where
time_ref->adapter->ts_used_bits is 32 or more this can lead to an oveflow.
Avoid this by shifting using the BIT_ULL macro instead.

Fixes: bb4785551f64 ("can: usb: PEAK-System Technik USB adapters driver core")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Link: https://lore.kernel.org/r/20201105112427.40688-1-colin.king@canonical.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/peak_usb/pcan_usb_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index c2764799f9ef..204ccb27d6d9 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -156,7 +156,7 @@ void peak_usb_get_ts_time(struct peak_time_ref *time_ref, u32 ts, ktime_t *time)
 		if (time_ref->ts_dev_1 < time_ref->ts_dev_2) {
 			/* case when event time (tsw) wraps */
 			if (ts < time_ref->ts_dev_1)
-				delta_ts = 1 << time_ref->adapter->ts_used_bits;
+				delta_ts = BIT_ULL(time_ref->adapter->ts_used_bits);
 
 		/* Otherwise, sync time counter (ts_dev_2) has wrapped:
 		 * handle case when event time (tsn) hasn't.
@@ -168,7 +168,7 @@ void peak_usb_get_ts_time(struct peak_time_ref *time_ref, u32 ts, ktime_t *time)
 		 *              tsn            ts
 		 */
 		} else if (time_ref->ts_dev_1 < ts) {
-			delta_ts = -(1 << time_ref->adapter->ts_used_bits);
+			delta_ts = -BIT_ULL(time_ref->adapter->ts_used_bits);
 		}
 
 		/* add delay between last sync and event timestamps */

From 499aa923c56769274f81e60414b8de4912864b8d Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 14 Oct 2020 13:41:36 +0200
Subject: [PATCH 039/115] can: flexcan: flexcan_setup_stop_mode(): add missing
 "req_bit" to stop mode property comment

In the patch

    d9b081e3fc4b ("can: flexcan: remove ack_grp and ack_bit handling from driver")

the unused ack_grp and ack_bit were removed from the driver. However in the
comment, the "req_bit" was accidentally removed, too.

This patch adds back the "req_bit" bit.

Fixes: d9b081e3fc4b ("can: flexcan: remove ack_grp and ack_bit handling from driver")
Reported-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Link: http://lore.kernel.org/r/20201014114810.2911135-1-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 881799bd9c5e..4e8fdb6064bd 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -1852,7 +1852,7 @@ static int flexcan_setup_stop_mode(struct platform_device *pdev)
 		return -EINVAL;
 
 	/* stop mode property format is:
-	 * <&gpr req_gpr>.
+	 * <&gpr req_gpr req_bit>.
 	 */
 	ret = of_property_read_u32_array(np, "fsl,stop-mode", out_val,
 					 ARRAY_SIZE(out_val));

From b7ee5bc3e1006433601a058a6a7c24c5272635f4 Mon Sep 17 00:00:00 2001
From: Zhang Qilong <zhangqilong3@huawei.com>
Date: Sun, 8 Nov 2020 16:30:00 +0800
Subject: [PATCH 040/115] can: flexcan: fix failure handling of
 pm_runtime_get_sync()

pm_runtime_get_sync() will increment pm usage at first and it will resume the
device later. If runtime of the device has error or device is in inaccessible
state(or other error state), resume operation will fail. If we do not call put
operation to decrease the reference, it will result in reference leak in the
two functions flexcan_get_berr_counter() and flexcan_open().

Moreover, this device cannot enter the idle state and always stay busy or other
non-idle state later. So we should fix it through adding
pm_runtime_put_noidle().

Fixes: ca10989632d88 ("can: flexcan: implement can Runtime PM")
Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
Link: https://lore.kernel.org/r/20201108083000.2599705-1-zhangqilong3@huawei.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 4e8fdb6064bd..d6a9cf0e9b60 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -728,8 +728,10 @@ static int flexcan_get_berr_counter(const struct net_device *dev,
 	int err;
 
 	err = pm_runtime_get_sync(priv->dev);
-	if (err < 0)
+	if (err < 0) {
+		pm_runtime_put_noidle(priv->dev);
 		return err;
+	}
 
 	err = __flexcan_get_berr_counter(dev, bec);
 
@@ -1654,8 +1656,10 @@ static int flexcan_open(struct net_device *dev)
 	}
 
 	err = pm_runtime_get_sync(priv->dev);
-	if (err < 0)
+	if (err < 0) {
+		pm_runtime_put_noidle(priv->dev);
 		return err;
+	}
 
 	err = open_candev(dev);
 	if (err)

From 3fcce133f0d9a50d3a23f8e2bc950197b4e03900 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Mon, 13 Apr 2020 16:10:13 +0200
Subject: [PATCH 041/115] can: tcan4x5x: replace depends on REGMAP_SPI with
 depends on SPI

regmap is a library function that gets selected by drivers that need it. No
driver modules should depend on it. Instead depends on SPI and select
REGMAP_SPI. Depending on REGMAP_SPI makes this driver only build if another
driver already selected REGMAP_SPI, as the symbol can't be selected through the
menu kernel configuration.

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Link: http://lore.kernel.org/r/20200413141013.506613-1-enric.balletbo@collabora.com
Reviewed-by: Dan Murphy <dmurphy@ti.com>
Fixes: 5443c226ba91 ("can: tcan4x5x: Add tcan4x5x driver to the kernel")
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/can/m_can/Kconfig b/drivers/net/can/m_can/Kconfig
index 48be627c85c2..5f9f8192dd0b 100644
--- a/drivers/net/can/m_can/Kconfig
+++ b/drivers/net/can/m_can/Kconfig
@@ -16,7 +16,8 @@ config CAN_M_CAN_PLATFORM
 
 config CAN_M_CAN_TCAN4X5X
 	depends on CAN_M_CAN
-	depends on REGMAP_SPI
+	depends on SPI
+	select REGMAP_SPI
 	tristate "TCAN4X5X M_CAN device"
 	help
 	  Say Y here if you want support for Texas Instruments TCAN4x5x

From 1ff203badbbf1738027c8395d5b40b0d462b6e4d Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Fri, 3 Jan 2020 11:30:34 +0100
Subject: [PATCH 042/115] can: tcan4x5x: tcan4x5x_can_probe(): add missing
 error checking for devm_regmap_init()

This patch adds the missing error checking when initializing the regmap
interface fails.

Fixes: 5443c226ba91 ("can: tcan4x5x: Add tcan4x5x driver to the kernel")
Cc: Dan Murphy <dmurphy@ti.com>
Link: http://lore.kernel.org/r/20201019154233.1262589-7-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/tcan4x5x.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/can/m_can/tcan4x5x.c b/drivers/net/can/m_can/tcan4x5x.c
index eacd428e07e9..f058bd9104e9 100644
--- a/drivers/net/can/m_can/tcan4x5x.c
+++ b/drivers/net/can/m_can/tcan4x5x.c
@@ -487,6 +487,10 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
 
 	priv->regmap = devm_regmap_init(&spi->dev, &tcan4x5x_bus,
 					&spi->dev, &tcan4x5x_regmap);
+	if (IS_ERR(priv->regmap)) {
+		ret = PTR_ERR(priv->regmap);
+		goto out_clk;
+	}
 
 	ret = tcan4x5x_power_enable(priv->power, 1);
 	if (ret)

From c81d0b6ca665477c761f227807010762630b089f Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 10 Aug 2020 22:23:49 +0200
Subject: [PATCH 043/115] can: tcan4x5x: tcan4x5x_can_remove(): fix order of
 deregistration

Change the order in tcan4x5x_can_remove() to be the exact inverse of
tcan4x5x_can_probe(). First m_can_class_unregister(), then power down the
device.

Fixes: 5443c226ba91 ("can: tcan4x5x: Add tcan4x5x driver to the kernel")
Cc: Dan Murphy <dmurphy@ti.com>
Link: http://lore.kernel.org/r/20201019154233.1262589-10-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/tcan4x5x.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/m_can/tcan4x5x.c b/drivers/net/can/m_can/tcan4x5x.c
index f058bd9104e9..4fdb7121403a 100644
--- a/drivers/net/can/m_can/tcan4x5x.c
+++ b/drivers/net/can/m_can/tcan4x5x.c
@@ -527,10 +527,10 @@ static int tcan4x5x_can_remove(struct spi_device *spi)
 {
 	struct tcan4x5x_priv *priv = spi_get_drvdata(spi);
 
-	tcan4x5x_power_enable(priv->power, 0);
-
 	m_can_class_unregister(priv->mcan_dev);
 
+	tcan4x5x_power_enable(priv->power, 0);
+
 	return 0;
 }
 

From cd0d83eab2e0c26fe87a10debfedbb23901853c1 Mon Sep 17 00:00:00 2001
From: Wu Bo <wubo.oduw@gmail.com>
Date: Wed, 29 Jan 2020 10:23:30 +0800
Subject: [PATCH 044/115] can: m_can: m_can_handle_state_change(): fix state
 change

m_can_handle_state_change() is called with the new_state as an argument.

In the switch statements for CAN_STATE_ERROR_ACTIVE, the comment and the
following code indicate that a CAN_STATE_ERROR_WARNING is handled.

This patch fixes this problem by changing the case to CAN_STATE_ERROR_WARNING.

Signed-off-by: Wu Bo <wubo.oduw@gmail.com>
Link: http://lore.kernel.org/r/20200129022330.21248-2-wubo.oduw@gmail.com
Cc: Dan Murphy <dmurphy@ti.com>
Fixes: e0d1f4816f2a ("can: m_can: add Bosch M_CAN controller support")
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 02c5795b7393..63887e23d89c 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -665,7 +665,7 @@ static int m_can_handle_state_change(struct net_device *dev,
 	unsigned int ecr;
 
 	switch (new_state) {
-	case CAN_STATE_ERROR_ACTIVE:
+	case CAN_STATE_ERROR_WARNING:
 		/* error warning state */
 		cdev->can.can_stats.error_warning++;
 		cdev->can.state = CAN_STATE_ERROR_WARNING;
@@ -694,7 +694,7 @@ static int m_can_handle_state_change(struct net_device *dev,
 	__m_can_get_berr_counter(dev, &bec);
 
 	switch (new_state) {
-	case CAN_STATE_ERROR_ACTIVE:
+	case CAN_STATE_ERROR_WARNING:
 		/* error warning state */
 		cf->can_id |= CAN_ERR_CRTL;
 		cf->data[1] = (bec.txerr > bec.rxerr) ?

From a8c22f5b0c689a29f45ef4a110d09fd391debcbc Mon Sep 17 00:00:00 2001
From: Dan Murphy <dmurphy@ti.com>
Date: Thu, 27 Feb 2020 12:38:29 -0600
Subject: [PATCH 045/115] can: m_can: m_can_class_free_dev(): introduce new
 function

This patch creates a common function that peripherials can call to free the
netdev device when failures occur.

Fixes: f524f829b75a ("can: m_can: Create a m_can platform framework")
Reported-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Dan Murphy <dmurphy@ti.com>
Link: http://lore.kernel.org/r/20200227183829.21854-2-dmurphy@ti.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c | 6 ++++++
 drivers/net/can/m_can/m_can.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 63887e23d89c..f2c87b76e569 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1812,6 +1812,12 @@ struct m_can_classdev *m_can_class_allocate_dev(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(m_can_class_allocate_dev);
 
+void m_can_class_free_dev(struct net_device *net)
+{
+	free_candev(net);
+}
+EXPORT_SYMBOL_GPL(m_can_class_free_dev);
+
 int m_can_class_register(struct m_can_classdev *m_can_dev)
 {
 	int ret;
diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h
index 49f42b50627a..b2699a7c9997 100644
--- a/drivers/net/can/m_can/m_can.h
+++ b/drivers/net/can/m_can/m_can.h
@@ -99,6 +99,7 @@ struct m_can_classdev {
 };
 
 struct m_can_classdev *m_can_class_allocate_dev(struct device *dev);
+void m_can_class_free_dev(struct net_device *net);
 int m_can_class_register(struct m_can_classdev *cdev);
 void m_can_class_unregister(struct m_can_classdev *cdev);
 int m_can_class_get_clocks(struct m_can_classdev *cdev);

From 85816aba460ceebed0047381395615891df68c8f Mon Sep 17 00:00:00 2001
From: Dan Murphy <dmurphy@ti.com>
Date: Thu, 27 Feb 2020 12:38:29 -0600
Subject: [PATCH 046/115] can: m_can: Fix freeing of can device from
 peripherials

Fix leaking netdev device from peripherial devices. The call to allocate the
netdev device is made from and managed by the peripherial.

Fixes: f524f829b75a ("can: m_can: Create a m_can platform framework")
Reported-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Dan Murphy <dmurphy@ti.com>
Link: http://lore.kernel.org/r/20200227183829.21854-2-dmurphy@ti.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c          |  3 ---
 drivers/net/can/m_can/m_can_platform.c | 23 +++++++++++++++--------
 drivers/net/can/m_can/tcan4x5x.c       | 26 ++++++++++++++++++--------
 3 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index f2c87b76e569..645101d19989 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1856,7 +1856,6 @@ int m_can_class_register(struct m_can_classdev *m_can_dev)
 	if (ret) {
 		if (m_can_dev->pm_clock_support)
 			pm_runtime_disable(m_can_dev->dev);
-		free_candev(m_can_dev->net);
 	}
 
 	return ret;
@@ -1914,8 +1913,6 @@ void m_can_class_unregister(struct m_can_classdev *m_can_dev)
 	unregister_candev(m_can_dev->net);
 
 	m_can_clk_stop(m_can_dev);
-
-	free_candev(m_can_dev->net);
 }
 EXPORT_SYMBOL_GPL(m_can_class_unregister);
 
diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c
index e6d0cb9ee02f..161cb9be018c 100644
--- a/drivers/net/can/m_can/m_can_platform.c
+++ b/drivers/net/can/m_can/m_can_platform.c
@@ -67,32 +67,36 @@ static int m_can_plat_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
+	if (!priv) {
+		ret = -ENOMEM;
+		goto probe_fail;
+	}
 
 	mcan_class->device_data = priv;
 
-	m_can_class_get_clocks(mcan_class);
+	ret = m_can_class_get_clocks(mcan_class);
+	if (ret)
+		goto probe_fail;
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "m_can");
 	addr = devm_ioremap_resource(&pdev->dev, res);
 	irq = platform_get_irq_byname(pdev, "int0");
 	if (IS_ERR(addr) || irq < 0) {
 		ret = -EINVAL;
-		goto failed_ret;
+		goto probe_fail;
 	}
 
 	/* message ram could be shared */
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "message_ram");
 	if (!res) {
 		ret = -ENODEV;
-		goto failed_ret;
+		goto probe_fail;
 	}
 
 	mram_addr = devm_ioremap(&pdev->dev, res->start, resource_size(res));
 	if (!mram_addr) {
 		ret = -ENOMEM;
-		goto failed_ret;
+		goto probe_fail;
 	}
 
 	priv->base = addr;
@@ -111,9 +115,10 @@ static int m_can_plat_probe(struct platform_device *pdev)
 
 	m_can_init_ram(mcan_class);
 
-	ret = m_can_class_register(mcan_class);
+	return m_can_class_register(mcan_class);
 
-failed_ret:
+probe_fail:
+	m_can_class_free_dev(mcan_class->net);
 	return ret;
 }
 
@@ -134,6 +139,8 @@ static int m_can_plat_remove(struct platform_device *pdev)
 
 	m_can_class_unregister(mcan_class);
 
+	m_can_class_free_dev(mcan_class->net);
+
 	platform_set_drvdata(pdev, NULL);
 
 	return 0;
diff --git a/drivers/net/can/m_can/tcan4x5x.c b/drivers/net/can/m_can/tcan4x5x.c
index 4fdb7121403a..e5d7d85e0b6d 100644
--- a/drivers/net/can/m_can/tcan4x5x.c
+++ b/drivers/net/can/m_can/tcan4x5x.c
@@ -440,14 +440,18 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
 		return -ENOMEM;
 
 	priv = devm_kzalloc(&spi->dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
+	if (!priv) {
+		ret = -ENOMEM;
+		goto out_m_can_class_free_dev;
+	}
 
 	priv->power = devm_regulator_get_optional(&spi->dev, "vsup");
-	if (PTR_ERR(priv->power) == -EPROBE_DEFER)
-		return -EPROBE_DEFER;
-	else
+	if (PTR_ERR(priv->power) == -EPROBE_DEFER) {
+		ret = -EPROBE_DEFER;
+		goto out_m_can_class_free_dev;
+	} else {
 		priv->power = NULL;
+	}
 
 	mcan_class->device_data = priv;
 
@@ -460,8 +464,10 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
 	}
 
 	/* Sanity check */
-	if (freq < 20000000 || freq > TCAN4X5X_EXT_CLK_DEF)
-		return -ERANGE;
+	if (freq < 20000000 || freq > TCAN4X5X_EXT_CLK_DEF) {
+		ret = -ERANGE;
+		goto out_m_can_class_free_dev;
+	}
 
 	priv->reg_offset = TCAN4X5X_MCAN_OFFSET;
 	priv->mram_start = TCAN4X5X_MRAM_START;
@@ -518,8 +524,10 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
 		clk_disable_unprepare(mcan_class->cclk);
 		clk_disable_unprepare(mcan_class->hclk);
 	}
-
+ out_m_can_class_free_dev:
+	m_can_class_free_dev(mcan_class->net);
 	dev_err(&spi->dev, "Probe failed, err=%d\n", ret);
+
 	return ret;
 }
 
@@ -531,6 +539,8 @@ static int tcan4x5x_can_remove(struct spi_device *spi)
 
 	tcan4x5x_power_enable(priv->power, 0);
 
+	m_can_class_free_dev(priv->mcan_dev->net);
+
 	return 0;
 }
 

From a584e9bc1b7e88f24f8504886eafbe6c73d8a97c Mon Sep 17 00:00:00 2001
From: Faiz Abbas <faiz_abbas@ti.com>
Date: Tue, 25 Aug 2020 11:24:42 +0530
Subject: [PATCH 047/115] can: m_can: m_can_stop(): set device to software init
 mode before closing

There might be some requests pending in the buffer when the interface close
sequence occurs. In some devices, these pending requests might lead to the
module not shutting down properly when m_can_clk_stop() is called.

Therefore, move the device to init state before potentially powering it down.

Fixes: e0d1f4816f2a ("can: m_can: add Bosch M_CAN controller support")
Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
Acked-by: Dan Murphy <dmurphy@ti.com>
Link: https://lore.kernel.org/r/20200825055442.16994-1-faiz_abbas@ti.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 645101d19989..e7264043f79a 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1414,6 +1414,9 @@ static void m_can_stop(struct net_device *dev)
 	/* disable all interrupts */
 	m_can_disable_all_interrupts(cdev);
 
+	/* Set init mode to disengage from the network */
+	m_can_config_endisable(cdev, true);
+
 	/* set the state as STOPPED */
 	cdev->can.state = CAN_STATE_STOPPED;
 }

From dd8088d5a8969dc2b42f71d7bc01c25c61a78066 Mon Sep 17 00:00:00 2001
From: Zhang Qilong <zhangqilong3@huawei.com>
Date: Tue, 10 Nov 2020 17:29:32 +0800
Subject: [PATCH 048/115] PM: runtime: Add pm_runtime_resume_and_get to deal
 with usage counter

In many case, we need to check return value of pm_runtime_get_sync, but
it brings a trouble to the usage counter processing. Many callers forget
to decrease the usage counter when it failed, which could resulted in
reference leak. It has been discussed a lot[0][1]. So we add a function
to deal with the usage counter for better coding.

[0]https://lkml.org/lkml/2020/6/14/88
[1]https://patchwork.ozlabs.org/project/linux-tegra/list/?series=178139
Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
Acked-by: Rafael J. Wysocki  <rafael.j.wysocki@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/pm_runtime.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 4b708f4e8eed..b492ae00cc90 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -386,6 +386,27 @@ static inline int pm_runtime_get_sync(struct device *dev)
 	return __pm_runtime_resume(dev, RPM_GET_PUT);
 }
 
+/**
+ * pm_runtime_resume_and_get - Bump up usage counter of a device and resume it.
+ * @dev: Target device.
+ *
+ * Resume @dev synchronously and if that is successful, increment its runtime
+ * PM usage counter. Return 0 if the runtime PM usage counter of @dev has been
+ * incremented or a negative error code otherwise.
+ */
+static inline int pm_runtime_resume_and_get(struct device *dev)
+{
+	int ret;
+
+	ret = __pm_runtime_resume(dev, RPM_GET_PUT);
+	if (ret < 0) {
+		pm_runtime_put_noidle(dev);
+		return ret;
+	}
+
+	return 0;
+}
+
 /**
  * pm_runtime_put - Drop device usage counter and queue up "idle check" if 0.
  * @dev: Target device.

From da875fa5040b0f951cb4bf7efbf59f6dcff44d3c Mon Sep 17 00:00:00 2001
From: Zhang Qilong <zhangqilong3@huawei.com>
Date: Tue, 10 Nov 2020 17:29:33 +0800
Subject: [PATCH 049/115] net: fec: Fix reference count leak in fec series ops

pm_runtime_get_sync() will increment pm usage at first and it will
resume the device later. If runtime of the device has error or
device is in inaccessible state(or other error state), resume
operation will fail. If we do not call put operation to decrease
the reference, it will result in reference count leak. Moreover,
this device cannot enter the idle state and always stay busy or other
non-idle state later. So we fixed it by replacing it with
pm_runtime_resume_and_get.

Fixes: 8fff755e9f8d0 ("net: fec: Ensure clocks are enabled while using mdio bus")
Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/fec_main.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index d7919555250d..04f24c66cf36 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1808,7 +1808,7 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 	int ret = 0, frame_start, frame_addr, frame_op;
 	bool is_c45 = !!(regnum & MII_ADDR_C45);
 
-	ret = pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
 	if (ret < 0)
 		return ret;
 
@@ -1867,11 +1867,9 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 	int ret, frame_start, frame_addr;
 	bool is_c45 = !!(regnum & MII_ADDR_C45);
 
-	ret = pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
 	if (ret < 0)
 		return ret;
-	else
-		ret = 0;
 
 	if (is_c45) {
 		frame_start = FEC_MMFR_ST_C45;
@@ -2275,7 +2273,7 @@ static void fec_enet_get_regs(struct net_device *ndev,
 	u32 i, off;
 	int ret;
 
-	ret = pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
 	if (ret < 0)
 		return;
 
@@ -2976,7 +2974,7 @@ fec_enet_open(struct net_device *ndev)
 	int ret;
 	bool reset_again;
 
-	ret = pm_runtime_get_sync(&fep->pdev->dev);
+	ret = pm_runtime_resume_and_get(&fep->pdev->dev);
 	if (ret < 0)
 		return ret;
 
@@ -3770,7 +3768,7 @@ fec_drv_remove(struct platform_device *pdev)
 	struct device_node *np = pdev->dev.of_node;
 	int ret;
 
-	ret = pm_runtime_get_sync(&pdev->dev);
+	ret = pm_runtime_resume_and_get(&pdev->dev);
 	if (ret < 0)
 		return ret;
 

From 9d9e937b1c8be97b424e3e11938e183fcde905c0 Mon Sep 17 00:00:00 2001
From: Georg Kohmann <geokohma@cisco.com>
Date: Wed, 11 Nov 2020 12:50:25 +0100
Subject: [PATCH 050/115] ipv6/netfilter: Discard first fragment not including
 all headers

Packets are processed even though the first fragment don't include all
headers through the upper layer header. This breaks TAHI IPv6 Core
Conformance Test v6LC.1.3.6.

Referring to RFC8200 SECTION 4.5: "If the first fragment does not include
all headers through an Upper-Layer header, then that fragment should be
discarded and an ICMP Parameter Problem, Code 3, message should be sent to
the source of the fragment, with the Pointer field set to zero."

The fragment needs to be validated the same way it is done in
commit 2efdaaaf883a ("IPv6: reply ICMP error if the first fragment don't
include all headers") for ipv6. Wrap the validation into a common function,
ipv6_frag_thdr_truncated() to check for truncation in the upper layer
header. This validation does not fullfill all aspects of RFC 8200,
section 4.5, but is at the moment sufficient to pass mentioned TAHI test.

In netfilter, utilize the fragment offset returned by find_prev_fhdr() to
let ipv6_frag_thdr_truncated() start it's traverse from the fragment
header.

Return 0 to drop the fragment in the netfilter. This is the same behaviour
as used on other protocol errors in this function, e.g. when
nf_ct_frag6_queue() returns -EPROTO. The Fragment will later be picked up
by ipv6_frag_rcv() in reassembly.c. ipv6_frag_rcv() will then send an
appropriate ICMP Parameter Problem message back to the source.

References commit 2efdaaaf883a ("IPv6: reply ICMP error if the first
fragment don't include all headers")

Signed-off-by: Georg Kohmann <geokohma@cisco.com>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Link: https://lore.kernel.org/r/20201111115025.28879-1-geokohma@cisco.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ipv6.h                      |  2 +
 net/ipv6/netfilter/nf_conntrack_reasm.c |  9 ++++
 net/ipv6/reassembly.c                   | 55 ++++++++++++++++---------
 3 files changed, 46 insertions(+), 20 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index bd1f396cc9c7..637cc6dd12b7 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1064,6 +1064,8 @@ int ipv6_skip_exthdr(const struct sk_buff *, int start, u8 *nexthdrp,
 
 bool ipv6_ext_hdr(u8 nexthdr);
 
+bool ipv6_frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp);
+
 enum {
 	IP6_FH_F_FRAG		= (1 << 0),
 	IP6_FH_F_AUTH		= (1 << 1),
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 054d287eb13d..b9cc0b330dbe 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -440,6 +440,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
 int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 {
 	u16 savethdr = skb->transport_header;
+	u8 nexthdr = NEXTHDR_FRAGMENT;
 	int fhoff, nhoff, ret;
 	struct frag_hdr *fhdr;
 	struct frag_queue *fq;
@@ -455,6 +456,14 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 	if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
 		return 0;
 
+	/* Discard the first fragment if it does not include all headers
+	 * RFC 8200, Section 4.5
+	 */
+	if (ipv6_frag_thdr_truncated(skb, fhoff, &nexthdr)) {
+		pr_debug("Drop incomplete fragment\n");
+		return 0;
+	}
+
 	if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr)))
 		return -ENOMEM;
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index c8cf1bbad74a..e3869bac9c88 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -318,15 +318,43 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
 	return -1;
 }
 
+/* Check if the upper layer header is truncated in the first fragment. */
+bool ipv6_frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp)
+{
+	u8 nexthdr = *nexthdrp;
+	__be16 frag_off;
+	int offset;
+
+	offset = ipv6_skip_exthdr(skb, start, &nexthdr, &frag_off);
+	if (offset < 0 || (frag_off & htons(IP6_OFFSET)))
+		return false;
+	switch (nexthdr) {
+	case NEXTHDR_TCP:
+		offset += sizeof(struct tcphdr);
+		break;
+	case NEXTHDR_UDP:
+		offset += sizeof(struct udphdr);
+		break;
+	case NEXTHDR_ICMP:
+		offset += sizeof(struct icmp6hdr);
+		break;
+	default:
+		offset += 1;
+	}
+	if (offset > skb->len)
+		return true;
+	return false;
+}
+EXPORT_SYMBOL(ipv6_frag_thdr_truncated);
+
 static int ipv6_frag_rcv(struct sk_buff *skb)
 {
 	struct frag_hdr *fhdr;
 	struct frag_queue *fq;
 	const struct ipv6hdr *hdr = ipv6_hdr(skb);
 	struct net *net = dev_net(skb_dst(skb)->dev);
-	__be16 frag_off;
-	int iif, offset;
 	u8 nexthdr;
+	int iif;
 
 	if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
 		goto fail_hdr;
@@ -362,24 +390,11 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	 * the source of the fragment, with the Pointer field set to zero.
 	 */
 	nexthdr = hdr->nexthdr;
-	offset = ipv6_skip_exthdr(skb, skb_transport_offset(skb), &nexthdr, &frag_off);
-	if (offset >= 0) {
-		/* Check some common protocols' header */
-		if (nexthdr == IPPROTO_TCP)
-			offset += sizeof(struct tcphdr);
-		else if (nexthdr == IPPROTO_UDP)
-			offset += sizeof(struct udphdr);
-		else if (nexthdr == IPPROTO_ICMPV6)
-			offset += sizeof(struct icmp6hdr);
-		else
-			offset += 1;
-
-		if (!(frag_off & htons(IP6_OFFSET)) && offset > skb->len) {
-			__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
-					IPSTATS_MIB_INHDRERRORS);
-			icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0);
-			return -1;
-		}
+	if (ipv6_frag_thdr_truncated(skb, skb_transport_offset(skb), &nexthdr)) {
+		__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
+				IPSTATS_MIB_INHDRERRORS);
+		icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0);
+		return -1;
 	}
 
 	iif = skb->dev ? skb->dev->ifindex : 0;

From 857524564eae8aefc3006a3d35139bb69ca53210 Mon Sep 17 00:00:00 2001
From: Xie He <xie.he.0141@gmail.com>
Date: Sat, 14 Nov 2020 03:10:29 -0800
Subject: [PATCH 051/115] MAINTAINERS: Add Martin Schiller as a maintainer for
 the X.25 stack

Martin Schiller is an active developer and reviewer for the X.25 code.
His company is providing products based on the Linux X.25 stack.
So he is a good candidate for maintainers of the X.25 code.

The original maintainer of the X.25 network layer (Andrew Hendry) has
not sent any email to the netdev mail list since 2013. So he is probably
inactive now.

Cc: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: Xie He <xie.he.0141@gmail.com>
Acked-by: Martin Schiller <ms@dev.tdt.de>
Link: https://lore.kernel.org/r/20201114111029.326972-1-xie.he.0141@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 MAINTAINERS | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 4a34b25ecc1f..e0144f396462 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9843,13 +9843,6 @@ S:	Maintained
 F:	arch/mips/lantiq
 F:	drivers/soc/lantiq
 
-LAPB module
-L:	linux-x25@vger.kernel.org
-S:	Orphan
-F:	Documentation/networking/lapb-module.rst
-F:	include/*/lapb.h
-F:	net/lapb/
-
 LASI 53c700 driver for PARISC
 M:	"James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
 L:	linux-scsi@vger.kernel.org
@@ -18998,12 +18991,18 @@ L:	linux-kernel@vger.kernel.org
 S:	Maintained
 N:	axp[128]
 
-X.25 NETWORK LAYER
-M:	Andrew Hendry <andrew.hendry@gmail.com>
+X.25 STACK
+M:	Martin Schiller <ms@dev.tdt.de>
 L:	linux-x25@vger.kernel.org
-S:	Odd Fixes
+S:	Maintained
+F:	Documentation/networking/lapb-module.rst
 F:	Documentation/networking/x25*
+F:	drivers/net/wan/hdlc_x25.c
+F:	drivers/net/wan/lapbether.c
+F:	include/*/lapb.h
 F:	include/net/x25*
+F:	include/uapi/linux/x25.h
+F:	net/lapb/
 F:	net/x25/
 
 X86 ARCHITECTURE (32-BIT AND 64-BIT)

From 2acc3c1bc8e98bc66b1badec42e9ea205b4fcdaa Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Mon, 16 Nov 2020 18:16:33 +0800
Subject: [PATCH 052/115] selftests/bpf: Fix error return code in
 run_getsockopt_test()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 65b4414a05eb ("selftests/bpf: add sockopt test that exercises BPF_F_ALLOW_MULTI")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20201116101633.64627-1-wanghai38@huawei.com
---
 tools/testing/selftests/bpf/prog_tests/sockopt_multi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
index 29188d6f5c8d..51fac975b316 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
@@ -138,7 +138,8 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
 	 */
 
 	buf = 0x40;
-	if (setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1) < 0) {
+	err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+	if (err < 0) {
 		log_err("Failed to call setsockopt(IP_TOS)");
 		goto detach;
 	}

From 2a1828e378c1b5ba1ff283ed8f8c5cc37bb391dc Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sun, 15 Nov 2020 17:57:57 +0100
Subject: [PATCH 053/115] net: lantiq: Wait for the GPHY firmware to be ready

A user reports (slightly shortened from the original message):
  libphy: lantiq,xrx200-mdio: probed
  mdio_bus 1e108000.switch-mii: MDIO device at address 17 is missing.
  gswip 1e108000.switch lan: no phy at 2
  gswip 1e108000.switch lan: failed to connect to port 2: -19
  lantiq,xrx200-net 1e10b308.eth eth0: error -19 setting up slave phy

This is a single-port board using the internal Fast Ethernet PHY. The
user reports that switching to PHY scanning instead of configuring the
PHY within device-tree works around this issue.

The documentation for the standalone variant of the PHY11G (which is
probably very similar to what is used inside the xRX200 SoCs but having
the firmware burnt onto that standalone chip in the factory) states that
the PHY needs 300ms to be ready for MDIO communication after releasing
the reset.

Add a 300ms delay after initializing all GPHYs to ensure that the GPHY
firmware had enough time to initialize and to appear on the MDIO bus.
Unfortunately there is no (known) documentation on what the minimum time
to wait after releasing the reset on an internal PHY so play safe and
take the one for the external variant. Only wait after the last GPHY
firmware is loaded to not slow down the initialization too much (
xRX200 has two GPHYs but newer SoCs have at least three GPHYs).

Fixes: 14fceff4771e51 ("net: dsa: Add Lantiq / Intel DSA driver for vrx200")
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20201115165757.552641-1-martin.blumenstingl@googlemail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/lantiq_gswip.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 74db81dafee3..09701c17f3f6 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -26,6 +26,7 @@
  */
 
 #include <linux/clk.h>
+#include <linux/delay.h>
 #include <linux/etherdevice.h>
 #include <linux/firmware.h>
 #include <linux/if_bridge.h>
@@ -1837,6 +1838,16 @@ static int gswip_gphy_fw_list(struct gswip_priv *priv,
 		i++;
 	}
 
+	/* The standalone PHY11G requires 300ms to be fully
+	 * initialized and ready for any MDIO communication after being
+	 * taken out of reset. For the SoC-internal GPHY variant there
+	 * is no (known) documentation for the minimum time after a
+	 * reset. Use the same value as for the standalone variant as
+	 * some users have reported internal PHYs not being detected
+	 * without any delay.
+	 */
+	msleep(300);
+
 	return 0;
 
 remove_gphy:

From 794e442ca39e6c8d46003c430559bdb67a73690c Mon Sep 17 00:00:00 2001
From: Raju Rangoju <rajur@chelsio.com>
Date: Mon, 16 Nov 2020 16:13:22 +0530
Subject: [PATCH 054/115] MAINTAINERS: update cxgb4 and cxgb3 maintainer

Update cxgb4 and cxgb3 driver maintainer

Signed-off-by: Raju Rangoju <rajur@chelsio.com>
Link: https://lore.kernel.org/r/20201116104322.3959-1-rajur@chelsio.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 MAINTAINERS | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index e0144f396462..8cb1aae96bdf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4710,7 +4710,7 @@ T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/dvb-frontends/cxd2820r*
 
 CXGB3 ETHERNET DRIVER (CXGB3)
-M:	Vishal Kulkarni <vishal@chelsio.com>
+M:	Raju Rangoju <rajur@chelsio.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 W:	http://www.chelsio.com
@@ -4742,7 +4742,7 @@ W:	http://www.chelsio.com
 F:	drivers/net/ethernet/chelsio/inline_crypto/
 
 CXGB4 ETHERNET DRIVER (CXGB4)
-M:	Vishal Kulkarni <vishal@chelsio.com>
+M:	Raju Rangoju <rajur@chelsio.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 W:	http://www.chelsio.com
@@ -4764,7 +4764,7 @@ F:	drivers/infiniband/hw/cxgb4/
 F:	include/uapi/rdma/cxgb4-abi.h
 
 CXGB4VF ETHERNET DRIVER (CXGB4VF)
-M:	Vishal Kulkarni <vishal@gmail.com>
+M:	Raju Rangoju <rajur@chelsio.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 W:	http://www.chelsio.com

From cfbaa8b33e022aca62a3f2815ffbc02874d4cb8b Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong@huawei.com>
Date: Fri, 13 Nov 2020 14:07:07 +0800
Subject: [PATCH 055/115] cx82310_eth: fix error return code in cx82310_bind()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: ca139d76b0d9 ("cx82310_eth: re-enable ethernet mode after router reboot")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
Link: https://lore.kernel.org/r/1605247627-15385-1-git-send-email-zhangchangzhong@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/usb/cx82310_eth.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c
index ca89d8258dd3..c4568a491dc4 100644
--- a/drivers/net/usb/cx82310_eth.c
+++ b/drivers/net/usb/cx82310_eth.c
@@ -197,7 +197,8 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf)
 	}
 
 	/* enable ethernet mode (?) */
-	if (cx82310_enable_ethernet(dev))
+	ret = cx82310_enable_ethernet(dev);
+	if (ret)
 		goto err;
 
 	/* get the MAC address */

From 3beb9be165083c2964eba1923601c3bfac0b02d4 Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong@huawei.com>
Date: Fri, 13 Nov 2020 14:16:26 +0800
Subject: [PATCH 056/115] qlcnic: fix error return code in
 qlcnic_83xx_restart_hw()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 3ced0a88cd4c ("qlcnic: Add support to run firmware POST")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
Link: https://lore.kernel.org/r/1605248186-16013-1-git-send-email-zhangchangzhong@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
index b8af59fc1aa4..d2c190732d3e 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
@@ -2231,7 +2231,8 @@ static int qlcnic_83xx_restart_hw(struct qlcnic_adapter *adapter)
 
 	/* Boot either flash image or firmware image from host file system */
 	if (qlcnic_load_fw_file == 1) {
-		if (qlcnic_83xx_load_fw_image_from_host(adapter))
+		err = qlcnic_83xx_load_fw_image_from_host(adapter);
+		if (err)
 			return err;
 	} else {
 		QLC_SHARED_REG_WR32(adapter, QLCNIC_FW_IMG_VALID,

From 661710bfd5039267f911e42675ab743760b6449d Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong@huawei.com>
Date: Fri, 13 Nov 2020 14:34:03 +0800
Subject: [PATCH 057/115] net: stmmac: dwmac-intel-plat: fix error return code
 in intel_eth_plat_probe()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 9efc9b2b04c7 ("net: stmmac: Add dwmac-intel-plat for GBE driver")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
Link: https://lore.kernel.org/r/1605249243-17262-1-git-send-email-zhangchangzhong@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
index f61cb997a8f6..82b1c7a5a7a9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
@@ -113,8 +113,10 @@ static int intel_eth_plat_probe(struct platform_device *pdev)
 		/* Enable TX clock */
 		if (dwmac->data->tx_clk_en) {
 			dwmac->tx_clk = devm_clk_get(&pdev->dev, "tx_clk");
-			if (IS_ERR(dwmac->tx_clk))
+			if (IS_ERR(dwmac->tx_clk)) {
+				ret = PTR_ERR(dwmac->tx_clk);
 				goto err_remove_config_dt;
+			}
 
 			clk_prepare_enable(dwmac->tx_clk);
 

From 35f735c665114840dcd3142f41148d07870f51f7 Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong@huawei.com>
Date: Fri, 13 Nov 2020 14:49:33 +0800
Subject: [PATCH 058/115] net: ethernet: ti: cpsw: fix error return code in
 cpsw_probe()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 83a8471ba255 ("net: ethernet: ti: cpsw: refactor probe to group common hw initialization")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
Link: https://lore.kernel.org/r/1605250173-18438-1-git-send-email-zhangchangzhong@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ti/cpsw.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index fa2d1025cbb2..b0f00b4edd94 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1634,6 +1634,7 @@ static int cpsw_probe(struct platform_device *pdev)
 				       CPSW_MAX_QUEUES, CPSW_MAX_QUEUES);
 	if (!ndev) {
 		dev_err(dev, "error allocating net_device\n");
+		ret = -ENOMEM;
 		goto clean_cpts;
 	}
 

From 7a30ecc9237681bb125cbd30eee92bef7e86293d Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 13 Nov 2020 10:27:27 +0100
Subject: [PATCH 059/115] net: bridge: add missing counters to ndo_get_stats64
 callback

In br_forward.c and br_input.c fields dev->stats.tx_dropped and
dev->stats.multicast are populated, but they are ignored in
ndo_get_stats64.

Fixes: 28172739f0a2 ("net: fix 64 bit counters on 32 bit arches")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Link: https://lore.kernel.org/r/58ea9963-77ad-a7cf-8dfd-fc95ab95f606@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/bridge/br_device.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 6f742fee874a..7730c8f3cb53 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -207,6 +207,7 @@ static void br_get_stats64(struct net_device *dev,
 {
 	struct net_bridge *br = netdev_priv(dev);
 
+	netdev_stats_to_stats64(stats, &dev->stats);
 	dev_fetch_sw_netstats(stats, br->stats);
 }
 

From 8e5debed39017836a850c6c7bfacc93299d19bad Mon Sep 17 00:00:00 2001
From: Wong Vee Khee <vee.khee.wong@intel.com>
Date: Sun, 15 Nov 2020 15:42:10 +0800
Subject: [PATCH 060/115] net: stmmac: Use rtnl_lock/unlock on
 netif_set_real_num_rx_queues() call

Fix an issue where dump stack is printed on suspend resume flow due to
netif_set_real_num_rx_queues() is not called with rtnl_lock held().

Fixes: 686cff3d7022 ("net: stmmac: Fix incorrect location to set real_num_rx|tx_queues")
Reported-by: Christophe ROULLIER <christophe.roullier@st.com>
Tested-by: Christophe ROULLIER <christophe.roullier@st.com>
Cc: Alexandre TORGUE <alexandre.torgue@st.com>
Reviewed-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Wong Vee Khee <vee.khee.wong@intel.com>
Link: https://lore.kernel.org/r/20201115074210.23605-1-vee.khee.wong@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d833908b660a..ba45fe237512 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -5247,6 +5247,7 @@ int stmmac_resume(struct device *dev)
 			return ret;
 	}
 
+	rtnl_lock();
 	mutex_lock(&priv->lock);
 
 	stmmac_reset_queues_param(priv);
@@ -5262,6 +5263,7 @@ int stmmac_resume(struct device *dev)
 	stmmac_enable_all_queues(priv);
 
 	mutex_unlock(&priv->lock);
+	rtnl_unlock();
 
 	if (!device_may_wakeup(priv->device) || !priv->plat->pmt) {
 		rtnl_lock();

From 9c79a8ab5f124db01eb1d7287454a702f0d4252f Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 13 Nov 2020 19:16:57 +0100
Subject: [PATCH 061/115] net: mvneta: fix possible memory leak in
 mvneta_swbm_add_rx_fragment

Recycle the page running page_pool_put_full_page() in
mvneta_swbm_add_rx_fragment routine when the last descriptor
contains just the FCS or if the received packet contains more than
MAX_SKB_FRAGS fragments

Fixes: ca0e014609f0 ("net: mvneta: move skb build after descriptors processing")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/df6a2bad70323ee58d3901491ada31c1ca2a40b9.1605291228.git.lorenzo@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/marvell/mvneta.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 54b0bf574c05..4a9041ee1b39 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2287,6 +2287,7 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
 	dma_sync_single_for_cpu(dev->dev.parent,
 				rx_desc->buf_phys_addr,
 				len, dma_dir);
+	rx_desc->buf_phys_addr = 0;
 
 	if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) {
 		skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags];
@@ -2295,8 +2296,8 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp,
 		skb_frag_size_set(frag, data_len);
 		__skb_frag_set_page(frag, page);
 		sinfo->nr_frags++;
-
-		rx_desc->buf_phys_addr = 0;
+	} else {
+		page_pool_put_full_page(rxq->page_pool, page, true);
 	}
 	*size -= len;
 }

From fc70f5bf5e525dde81565f0a30d5e39168062eba Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Fri, 13 Nov 2020 13:12:05 -0700
Subject: [PATCH 062/115] net: qualcomm: rmnet: Fix incorrect receive packet
 handling during cleanup

During rmnet unregistration, the real device rx_handler is first cleared
followed by the removal of rx_handler_data after the rcu synchronization.

Any packets in the receive path may observe that the rx_handler is NULL.
However, there is no check when dereferencing this value to use the
rmnet_port information.

This fixes following splat by adding the NULL check.

Unable to handle kernel NULL pointer dereference at virtual
address 000000000000000d
pc : rmnet_rx_handler+0x124/0x284
lr : rmnet_rx_handler+0x124/0x284
 rmnet_rx_handler+0x124/0x284
 __netif_receive_skb_core+0x758/0xd74
 __netif_receive_skb+0x50/0x17c
 process_backlog+0x15c/0x1b8
 napi_poll+0x88/0x284
 net_rx_action+0xbc/0x23c
 __do_softirq+0x20c/0x48c

Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation")
Signed-off-by: Sean Tranchetti <stranche@codeaurora.org>
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Link: https://lore.kernel.org/r/1605298325-3705-1-git-send-email-subashab@codeaurora.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 29a7bfa2584d..3d7d3ab383f8 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -188,6 +188,11 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb)
 
 	dev = skb->dev;
 	port = rmnet_get_port_rcu(dev);
+	if (unlikely(!port)) {
+		atomic_long_inc(&skb->dev->rx_nohandler);
+		kfree_skb(skb);
+		goto done;
+	}
 
 	switch (port->rmnet_mode) {
 	case RMNET_EPMODE_VND:

From 3fe16edf6767decd640fa2654308bc64f8d656dc Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vfedorenko@novek.ru>
Date: Sun, 15 Nov 2020 07:16:00 +0300
Subject: [PATCH 063/115] net/tls: fix corrupted data in recvmsg

If tcp socket has more data than Encrypted Handshake Message then
tls_sw_recvmsg will try to decrypt next record instead of returning
full control message to userspace as mentioned in comment. The next
message - usually Application Data - gets corrupted because it uses
zero copy for decryption that's why the data is not stored in skb
for next iteration. Revert check to not decrypt next record if
current is not Application Data.

Fixes: 692d7b5d1f91 ("tls: Fix recvmsg() to be able to peek across multiple records")
Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru>
Link: https://lore.kernel.org/r/1605413760-21153-1-git-send-email-vfedorenko@novek.ru
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls_sw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 95ab5545a931..2fe9e2cf8659 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1913,7 +1913,7 @@ int tls_sw_recvmsg(struct sock *sk,
 			 * another message type
 			 */
 			msg->msg_flags |= MSG_EOR;
-			if (ctx->control != TLS_RECORD_TYPE_DATA)
+			if (control != TLS_RECORD_TYPE_DATA)
 				goto recv_end;
 		} else {
 			break;

From 064c9c32b17ca9b36f95eba32ee790dbbebd9a5f Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Sat, 14 Nov 2020 12:20:17 -0600
Subject: [PATCH 064/115] net: ipa: lock when freeing transaction

Transactions sit on one of several lists, depending on their state
(allocated, pending, complete, or polled).  A spinlock protects
against concurrent access when transactions are moved between these
lists.

Transactions are also reference counted.  A newly-allocated
transaction has an initial count of 1; a transaction is released in
gsi_trans_free() only if its decremented reference count reaches 0.
Releasing a transaction includes removing it from the polled (or if
unused, allocated) list, so the spinlock is acquired when we release
a transaction.

The reference count is used to allow a caller to synchronously wait
for a committed transaction to complete.  In this case, the waiter
takes an extra reference to the transaction *before* committing it
(so it won't be freed), and releases its reference (calls
gsi_trans_free()) when it is done with it.

Similarly, gsi_channel_update() takes an extra reference to ensure a
transaction isn't released before the function is done operating on
it.  Until the transaction is moved to the completed list (by this
function) it won't be freed, so this reference is taken "safely."

But in the quiesce path, we want to wait for the "last" transaction,
which we find in the completed or polled list.  Transactions on
these lists can be freed at any time, so we (try to) prevent that
by taking the reference while holding the spinlock.

Currently gsi_trans_free() decrements a transaction's reference
count unconditionally, acquiring the lock to remove the transaction
from its list *only* when the count reaches 0.  This does not
protect the quiesce path, which depends on the lock to ensure its
extra reference prevents release of the transaction.

Fix this by only dropping the last reference to a transaction
in gsi_trans_free() while holding the spinlock.

Fixes: 9dd441e4ed575 ("soc: qcom: ipa: GSI transactions")
Reported-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Alex Elder <elder@linaro.org>
Link: https://lore.kernel.org/r/20201114182017.28270-1-elder@linaro.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/gsi_trans.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c
index 92642030e735..e8599bb948c0 100644
--- a/drivers/net/ipa/gsi_trans.c
+++ b/drivers/net/ipa/gsi_trans.c
@@ -362,22 +362,31 @@ struct gsi_trans *gsi_channel_trans_alloc(struct gsi *gsi, u32 channel_id,
 	return trans;
 }
 
-/* Free a previously-allocated transaction (used only in case of error) */
+/* Free a previously-allocated transaction */
 void gsi_trans_free(struct gsi_trans *trans)
 {
+	refcount_t *refcount = &trans->refcount;
 	struct gsi_trans_info *trans_info;
+	bool last;
 
-	if (!refcount_dec_and_test(&trans->refcount))
+	/* We must hold the lock to release the last reference */
+	if (refcount_dec_not_one(refcount))
 		return;
 
 	trans_info = &trans->gsi->channel[trans->channel_id].trans_info;
 
 	spin_lock_bh(&trans_info->spinlock);
 
-	list_del(&trans->links);
+	/* Reference might have been added before we got the lock */
+	last = refcount_dec_and_test(refcount);
+	if (last)
+		list_del(&trans->links);
 
 	spin_unlock_bh(&trans_info->spinlock);
 
+	if (!last)
+		return;
+
 	ipa_gsi_trans_release(trans);
 
 	/* Releasing the reserved TREs implicitly frees the sgl[] and

From 4260330b32b14330cfe427d568ac5f5b29b5be3d Mon Sep 17 00:00:00 2001
From: Edwin Peer <edwin.peer@broadcom.com>
Date: Sun, 15 Nov 2020 19:27:49 -0500
Subject: [PATCH 065/115] bnxt_en: read EEPROM A2h address using page 0

The module eeprom address range returned by bnxt_get_module_eeprom()
should be 256 bytes of A0h address space, the lower half of the A2h
address space, and page 0 for the upper half of the A2h address space.

Fix the firmware call by passing page_number 0 for the A2h slave address
space.

Fixes: 42ee18fe4ca2 ("bnxt_en: Add Support for ETHTOOL_GMODULEINFO and ETHTOOL_GMODULEEEPRO")
Signed-off-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 53687bc7fcf5..f133ea5674cb 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -2997,7 +2997,7 @@ static int bnxt_get_module_eeprom(struct net_device *dev,
 	/* Read A2 portion of the EEPROM */
 	if (length) {
 		start -= ETH_MODULE_SFF_8436_LEN;
-		rc = bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 1,
+		rc = bnxt_read_sfp_module_eeprom_info(bp, I2C_DEV_ADDR_A2, 0,
 						      start, length, data);
 	}
 	return rc;

From eba93de6d31c1734dee59909020a162de612e41e Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 15 Nov 2020 19:27:50 -0500
Subject: [PATCH 066/115] bnxt_en: Free port stats during firmware reset.

Firmware is unable to retain the port counters during any kind of
fatal or non-fatal resets, so we must clear the port counters to
avoid false detection of port counter overflow.

Fixes: fea6b3335527 ("bnxt_en: Accumulate all counters.")
Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Reviewed-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 7975f59735d6..448e1ba762ee 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4099,7 +4099,8 @@ static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init)
 	bnxt_free_ntp_fltrs(bp, irq_re_init);
 	if (irq_re_init) {
 		bnxt_free_ring_stats(bp);
-		if (!(bp->fw_cap & BNXT_FW_CAP_PORT_STATS_NO_RESET))
+		if (!(bp->fw_cap & BNXT_FW_CAP_PORT_STATS_NO_RESET) ||
+		    test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
 			bnxt_free_port_stats(bp);
 		bnxt_free_ring_grps(bp);
 		bnxt_free_vnics(bp);

From fa97f303fa4cf8469fd3d1ef29da69c0a3f6ddc8 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 15 Nov 2020 19:27:51 -0500
Subject: [PATCH 067/115] bnxt_en: Fix counter overflow logic.

bnxt_add_one_ctr() adds a hardware counter to a software counter and
adjusts for the hardware counter wraparound against the mask.  The logic
assumes that the hardware counter is always smaller than or equal to
the mask.

This assumption is mostly correct.  But in some cases if the firmware
is older and does not provide the accurate mask, the driver can use
a mask that is smaller than the actual hardware mask.  This can cause
some extra carry bits to be added to the software counter, resulting in
counters that far exceed the actual value.  Fix it by masking the
hardware counter with the mask passed into bnxt_add_one_ctr().

Fixes: fea6b3335527 ("bnxt_en: Accumulate all counters.")
Reviewed-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 448e1ba762ee..7c21aaa8b9af 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -7758,6 +7758,7 @@ static void bnxt_add_one_ctr(u64 hw, u64 *sw, u64 mask)
 {
 	u64 sw_tmp;
 
+	hw &= mask;
 	sw_tmp = (*sw & ~mask) | hw;
 	if (hw < (*sw & mask))
 		sw_tmp += mask + 1;

From 0ae0a779efb8840a0cdb2d6bd9a5d07663ac3ee2 Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Sun, 15 Nov 2020 19:27:52 -0500
Subject: [PATCH 068/115] bnxt_en: Avoid unnecessary NVM_GET_DEV_INFO cmd error
 log on VFs.

VFs do not have access permissions to issue NVM_GET_DEV_INFO
firmware command.

Fixes: 4933f6753b50 ("bnxt_en: Add bnxt_hwrm_nvm_get_dev_info() to query NVM info.")
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index f133ea5674cb..1471c9a36238 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -2079,6 +2079,9 @@ int bnxt_hwrm_nvm_get_dev_info(struct bnxt *bp,
 	struct hwrm_nvm_get_dev_info_input req = {0};
 	int rc;
 
+	if (BNXT_VF(bp))
+		return -EOPNOTSUPP;
+
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_NVM_GET_DEV_INFO, -1, -1);
 	mutex_lock(&bp->hwrm_cmd_lock);
 	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);

From cb47d16ea21045c66eebbf5ed792e74a8537e27a Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong@huawei.com>
Date: Mon, 16 Nov 2020 21:07:13 +0800
Subject: [PATCH 069/115] qed: fix error return code in qed_iwarp_ll2_start()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 469981b17a4f ("qed: Add unaligned and packed packet processing")
Fixes: fcb39f6c10b2 ("qed: Add mpa buffer descriptors for storing and processing mpa fpdus")
Fixes: 1e28eaad07ea ("qed: Add iWARP support for fpdu spanned over more than two tcp packets")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
Acked-by: Michal Kalderon <michal.kalderon@marvell.com>
Link: https://lore.kernel.org/r/1605532033-27373-1-git-send-email-zhangchangzhong@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index 512cbef24097..a99861124630 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -2754,14 +2754,18 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
 	iwarp_info->partial_fpdus = kcalloc((u16)p_hwfn->p_rdma_info->num_qps,
 					    sizeof(*iwarp_info->partial_fpdus),
 					    GFP_KERNEL);
-	if (!iwarp_info->partial_fpdus)
+	if (!iwarp_info->partial_fpdus) {
+		rc = -ENOMEM;
 		goto err;
+	}
 
 	iwarp_info->max_num_partial_fpdus = (u16)p_hwfn->p_rdma_info->num_qps;
 
 	iwarp_info->mpa_intermediate_buf = kzalloc(buff_size, GFP_KERNEL);
-	if (!iwarp_info->mpa_intermediate_buf)
+	if (!iwarp_info->mpa_intermediate_buf) {
+		rc = -ENOMEM;
 		goto err;
+	}
 
 	/* The mpa_bufs array serves for pending RX packets received on the
 	 * mpa ll2 that don't have place on the tx ring and require later
@@ -2771,8 +2775,10 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
 	iwarp_info->mpa_bufs = kcalloc(data.input.rx_num_desc,
 				       sizeof(*iwarp_info->mpa_bufs),
 				       GFP_KERNEL);
-	if (!iwarp_info->mpa_bufs)
+	if (!iwarp_info->mpa_bufs) {
+		rc = -ENOMEM;
 		goto err;
+	}
 
 	INIT_LIST_HEAD(&iwarp_info->mpa_buf_pending_list);
 	INIT_LIST_HEAD(&iwarp_info->mpa_buf_list);

From 7b027c249da54f492699c43e26cba486cfd48035 Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong@huawei.com>
Date: Tue, 17 Nov 2020 11:02:11 +0800
Subject: [PATCH 070/115] net: b44: fix error return code in b44_init_one()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 39a6f4bce6b4 ("b44: replace the ssb_dma API with the generic DMA API")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Link: https://lore.kernel.org/r/1605582131-36735-1-git-send-email-zhangchangzhong@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/b44.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index 74c1778d841e..b455b60a5434 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -2383,7 +2383,8 @@ static int b44_init_one(struct ssb_device *sdev,
 		goto err_out_free_dev;
 	}
 
-	if (dma_set_mask_and_coherent(sdev->dma_dev, DMA_BIT_MASK(30))) {
+	err = dma_set_mask_and_coherent(sdev->dma_dev, DMA_BIT_MASK(30));
+	if (err) {
 		dev_err(sdev->dev,
 			"Required 30BIT DMA mask unsupported by the system\n");
 		goto err_out_powerdown;

From 3d5179458d22dc0b4fdc724e4bed4231a655112a Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Tue, 17 Nov 2020 13:14:48 +1030
Subject: [PATCH 071/115] net: ftgmac100: Fix crash when removing driver

When removing the driver we would hit BUG_ON(!list_empty(&dev->ptype_specific))
in net/core/dev.c due to still having the NC-SI packet handler
registered.

 # echo 1e660000.ethernet > /sys/bus/platform/drivers/ftgmac100/unbind
  ------------[ cut here ]------------
  kernel BUG at net/core/dev.c:10254!
  Internal error: Oops - BUG: 0 [#1] SMP ARM
  CPU: 0 PID: 115 Comm: sh Not tainted 5.10.0-rc3-next-20201111-00007-g02e0365710c4 #46
  Hardware name: Generic DT based system
  PC is at netdev_run_todo+0x314/0x394
  LR is at cpumask_next+0x20/0x24
  pc : [<806f5830>]    lr : [<80863cb0>]    psr: 80000153
  sp : 855bbd58  ip : 00000001  fp : 855bbdac
  r10: 80c03d00  r9 : 80c06228  r8 : 81158c54
  r7 : 00000000  r6 : 80c05dec  r5 : 80c05d18  r4 : 813b9280
  r3 : 813b9054  r2 : 8122c470  r1 : 00000002  r0 : 00000002
  Flags: Nzcv  IRQs on  FIQs off  Mode SVC_32  ISA ARM  Segment none
  Control: 00c5387d  Table: 85514008  DAC: 00000051
  Process sh (pid: 115, stack limit = 0x7cb5703d)
 ...
  Backtrace:
  [<806f551c>] (netdev_run_todo) from [<80707eec>] (rtnl_unlock+0x18/0x1c)
   r10:00000051 r9:854ed710 r8:81158c54 r7:80c76bb0 r6:81158c10 r5:8115b410
   r4:813b9000
  [<80707ed4>] (rtnl_unlock) from [<806f5db8>] (unregister_netdev+0x2c/0x30)
  [<806f5d8c>] (unregister_netdev) from [<805a8180>] (ftgmac100_remove+0x20/0xa8)
   r5:8115b410 r4:813b9000
  [<805a8160>] (ftgmac100_remove) from [<805355e4>] (platform_drv_remove+0x34/0x4c)

Fixes: bd466c3fb5a4 ("net/faraday: Support NCSI mode")
Signed-off-by: Joel Stanley <joel@jms.id.au>
Link: https://lore.kernel.org/r/20201117024448.1170761-1-joel@jms.id.au
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/faraday/ftgmac100.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 00024dd41147..80fb1f537bb3 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1907,6 +1907,8 @@ static int ftgmac100_probe(struct platform_device *pdev)
 	clk_disable_unprepare(priv->rclk);
 	clk_disable_unprepare(priv->clk);
 err_ncsi_dev:
+	if (priv->ndev)
+		ncsi_unregister_dev(priv->ndev);
 	ftgmac100_destroy_mdio(netdev);
 err_setup_mdio:
 	iounmap(priv->base);
@@ -1926,6 +1928,8 @@ static int ftgmac100_remove(struct platform_device *pdev)
 	netdev = platform_get_drvdata(pdev);
 	priv = netdev_priv(netdev);
 
+	if (priv->ndev)
+		ncsi_unregister_dev(priv->ndev);
 	unregister_netdev(netdev);
 
 	clk_disable_unprepare(priv->rclk);

From 1b9e2a8c99a5c021041bfb2d512dc3ed92a94ffd Mon Sep 17 00:00:00 2001
From: Ryan Sharpelletti <sharpelletti@google.com>
Date: Mon, 16 Nov 2020 17:44:13 +0000
Subject: [PATCH 072/115] tcp: only postpone PROBE_RTT if RTT is < current
 min_rtt estimate

During loss recovery, retransmitted packets are forced to use TCP
timestamps to calculate the RTT samples, which have a millisecond
granularity. BBR is designed using a microsecond granularity. As a
result, multiple RTT samples could be truncated to the same RTT value
during loss recovery. This is problematic, as BBR will not enter
PROBE_RTT if the RTT sample is <= the current min_rtt sample, meaning
that if there are persistent losses, PROBE_RTT will constantly be
pushed off and potentially never re-entered. This patch makes sure
that BBR enters PROBE_RTT by checking if RTT sample is < the current
min_rtt sample, rather than <=.

The Netflix transport/TCP team discovered this bug in the Linux TCP
BBR code during lab tests.

Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control")
Signed-off-by: Ryan Sharpelletti <sharpelletti@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Link: https://lore.kernel.org/r/20201116174412.1433277-1-sharpelletti.kdev@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/tcp_bbr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 6c4d79baff26..6ea3dc2e4219 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -945,7 +945,7 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
 	filter_expired = after(tcp_jiffies32,
 			       bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
 	if (rs->rtt_us >= 0 &&
-	    (rs->rtt_us <= bbr->min_rtt_us ||
+	    (rs->rtt_us < bbr->min_rtt_us ||
 	     (filter_expired && !rs->is_ack_delayed))) {
 		bbr->min_rtt_us = rs->rtt_us;
 		bbr->min_rtt_stamp = tcp_jiffies32;

From ea63609857321c38fd4ad096388b413b66001c6c Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Thu, 8 Oct 2020 12:34:10 +0300
Subject: [PATCH 073/115] net/mlx5e: Fix refcount leak on kTLS RX resync

On resync, the driver calls inet_lookup_established
(__inet6_lookup_established) that increases sk_refcnt of the socket. To
decrease it, the driver set skb->destructor to sock_edemux. However, it
didn't work well, because the TCP stack also sets this destructor for
early demux, and the refcount gets decreased only once, while increased
two times (in mlx5e and in the TCP stack). It leads to a socket leak, a
TLS context leak, which in the end leads to calling tls_dev_del twice:
on socket close and on driver unload, which in turn leads to a crash.

This commit fixes the refcount leak by calling sock_gen_put right away
after using the socket, thus fixing all the subsequent issues.

Fixes: 0419d8c9d8f8 ("net/mlx5e: kTLS, Add kTLS RX resync support")
Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c  | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index 7f6221b8b1f7..6a1d82503ef8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -476,19 +476,22 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb)
 
 	depth += sizeof(struct tcphdr);
 
-	if (unlikely(!sk || sk->sk_state == TCP_TIME_WAIT))
+	if (unlikely(!sk))
 		return;
 
+	if (unlikely(sk->sk_state == TCP_TIME_WAIT))
+		goto unref;
+
 	if (unlikely(!resync_queue_get_psv(sk)))
-		return;
-
-	skb->sk = sk;
-	skb->destructor = sock_edemux;
+		goto unref;
 
 	seq = th->seq;
 	datalen = skb->len - depth;
 	tls_offload_rx_resync_async_request_start(sk, seq, datalen);
 	rq->stats->tls_resync_req_start++;
+
+unref:
+	sock_gen_put(sk);
 }
 
 void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk,

From 5cfb540ef27b5b763a3b181d142847ef0411728e Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Thu, 22 Oct 2020 11:22:56 -0500
Subject: [PATCH 074/115] net/mlx5e: Set IPsec WAs only in IP's non checksum
 partial case.

The IP's checksum partial still requires L4 csum flag on Ethernet WQE.
Make the IPsec WAs only for the IP's non checksum partial case
(for example icmd packet)

Fixes: 5be019040cb7 ("net/mlx5e: IPsec: Add Connect-X IPsec Tx data path offload")
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Reviewed-by: Raed Salem <raeds@nvidia.com>
Reviewed-by: Alaa Hleihel <alaa@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 82b4419af9d4..6dd3ea3cbbed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -144,7 +144,9 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs)
 	memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz);
 }
 
-/* RM 2311217: no L4 inner checksum for IPsec tunnel type packet */
+/* If packet is not IP's CHECKSUM_PARTIAL (e.g. icmd packet),
+ * need to set L3 checksum flag for IPsec
+ */
 static void
 ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 			    struct mlx5_wqe_eth_seg *eseg)
@@ -154,7 +156,6 @@ ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 		eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
 		sq->stats->csum_partial_inner++;
 	} else {
-		eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
 		sq->stats->csum_partial++;
 	}
 }
@@ -162,11 +163,6 @@ ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 static inline void
 mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
 {
-	if (unlikely(eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC))) {
-		ipsec_txwqe_build_eseg_csum(sq, skb, eseg);
-		return;
-	}
-
 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
 		if (skb->encapsulation) {
@@ -177,6 +173,9 @@ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct
 			eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
 			sq->stats->csum_partial++;
 		}
+	} else if (unlikely(eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC))) {
+		ipsec_txwqe_build_eseg_csum(sq, skb, eseg);
+
 	} else
 		sq->stats->csum_none++;
 }

From 6248ce991f8eed4f2f0fdec694f5749156105629 Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Tue, 3 Nov 2020 12:56:18 -0600
Subject: [PATCH 075/115] net/mlx5e: Fix IPsec packet drop by
 mlx5e_tc_update_skb

Both TC and IPsec crypto offload use metadata_regB to store
private information. Since TC does not use bit 31 of regB, IPsec
will use bit 31 as the IPsec packet marker. The IPsec's regB usage
is changed to:
Bit31: IPsec marker
Bit30-24: IPsec syndrome
Bit23-0: IPsec obj id

Fixes: b2ac7541e377 ("net/mlx5e: IPsec: Add Connect-X IPsec Rx data path offload")
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Reviewed-by: Raed Salem <raeds@nvidia.com>
Reviewed-by: Ariel Levkovich <lariel@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ipsec_fs.c         | 14 +++++++-------
 .../mellanox/mlx5/core/en_accel/ipsec_rxtx.c       |  3 +--
 .../mellanox/mlx5/core/en_accel/ipsec_rxtx.h       |  9 +++++----
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h    |  3 +++
 4 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 0e45590662a8..381a9c8c9da9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -64,13 +64,13 @@ static int rx_err_add_rule(struct mlx5e_priv *priv,
 	if (!spec)
 		return -ENOMEM;
 
-	/* Action to copy 7 bit ipsec_syndrome to regB[0:6] */
+	/* Action to copy 7 bit ipsec_syndrome to regB[24:30] */
 	MLX5_SET(copy_action_in, action, action_type, MLX5_ACTION_TYPE_COPY);
 	MLX5_SET(copy_action_in, action, src_field, MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME);
 	MLX5_SET(copy_action_in, action, src_offset, 0);
 	MLX5_SET(copy_action_in, action, length, 7);
 	MLX5_SET(copy_action_in, action, dst_field, MLX5_ACTION_IN_FIELD_METADATA_REG_B);
-	MLX5_SET(copy_action_in, action, dst_offset, 0);
+	MLX5_SET(copy_action_in, action, dst_offset, 24);
 
 	modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL,
 					      1, action);
@@ -488,13 +488,13 @@ static int rx_add_rule(struct mlx5e_priv *priv,
 
 	setup_fte_common(attrs, ipsec_obj_id, spec, &flow_act);
 
-	/* Set 1  bit ipsec marker */
-	/* Set 24 bit ipsec_obj_id */
+	/* Set bit[31] ipsec marker */
+	/* Set bit[23-0] ipsec_obj_id */
 	MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
 	MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B);
-	MLX5_SET(set_action_in, action, data, (ipsec_obj_id << 1) | 0x1);
-	MLX5_SET(set_action_in, action, offset, 7);
-	MLX5_SET(set_action_in, action, length, 25);
+	MLX5_SET(set_action_in, action, data, (ipsec_obj_id | BIT(31)));
+	MLX5_SET(set_action_in, action, offset, 0);
+	MLX5_SET(set_action_in, action, length, 32);
 
 	modify_hdr = mlx5_modify_header_alloc(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL,
 					      1, action);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 11e31a3db2be..a9b45606dbdb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -453,7 +453,6 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
 				       struct mlx5_cqe64 *cqe)
 {
 	u32 ipsec_meta_data = be32_to_cpu(cqe->ft_metadata);
-	u8 ipsec_syndrome = ipsec_meta_data & 0xFF;
 	struct mlx5e_priv *priv;
 	struct xfrm_offload *xo;
 	struct xfrm_state *xs;
@@ -481,7 +480,7 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
 	xo = xfrm_offload(skb);
 	xo->flags = CRYPTO_DONE;
 
-	switch (ipsec_syndrome & MLX5_IPSEC_METADATA_SYNDROM_MASK) {
+	switch (MLX5_IPSEC_METADATA_SYNDROM(ipsec_meta_data)) {
 	case MLX5E_IPSEC_OFFLOAD_RX_SYNDROME_DECRYPTED:
 		xo->status = CRYPTO_SUCCESS;
 		if (WARN_ON_ONCE(priv->ipsec->no_trailer))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index 056dacb612b0..9df9b9a8e09b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -39,9 +39,10 @@
 #include "en.h"
 #include "en/txrx.h"
 
-#define MLX5_IPSEC_METADATA_MARKER_MASK      (0x80)
-#define MLX5_IPSEC_METADATA_SYNDROM_MASK     (0x7F)
-#define MLX5_IPSEC_METADATA_HANDLE(metadata) (((metadata) >> 8) & 0xFF)
+/* Bit31: IPsec marker, Bit30-24: IPsec syndrome, Bit23-0: IPsec obj id */
+#define MLX5_IPSEC_METADATA_MARKER(metadata)  (((metadata) >> 31) & 0x1)
+#define MLX5_IPSEC_METADATA_SYNDROM(metadata) (((metadata) >> 24) & GENMASK(6, 0))
+#define MLX5_IPSEC_METADATA_HANDLE(metadata)  ((metadata) & GENMASK(23, 0))
 
 struct mlx5e_accel_tx_ipsec_state {
 	struct xfrm_offload *xo;
@@ -78,7 +79,7 @@ static inline unsigned int mlx5e_ipsec_tx_ids_len(struct mlx5e_accel_tx_ipsec_st
 
 static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe)
 {
-	return !!(MLX5_IPSEC_METADATA_MARKER_MASK & be32_to_cpu(cqe->ft_metadata));
+	return MLX5_IPSEC_METADATA_MARKER(be32_to_cpu(cqe->ft_metadata));
 }
 
 static inline bool mlx5e_ipsec_is_tx_flow(struct mlx5e_accel_tx_ipsec_state *ipsec_st)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 3b979008143d..4a2ce241522e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -283,6 +283,9 @@ static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe)
 
 	reg_b = be32_to_cpu(cqe->ft_metadata);
 
+	if (reg_b >> (MLX5E_TC_TABLE_CHAIN_TAG_BITS + ZONE_RESTORE_BITS))
+		return false;
+
 	chain = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
 	if (chain)
 		return true;

From 219b3267ca102a35092f5998921a9e6f99074af2 Mon Sep 17 00:00:00 2001
From: Maor Dickman <maord@nvidia.com>
Date: Wed, 4 Nov 2020 14:10:30 +0200
Subject: [PATCH 076/115] net/mlx5e: Fix check if netdev is bond slave

Bond events handler uses bond_slave_get_rtnl to check if net device
is bond slave. bond_slave_get_rtnl return the rcu rx_handler pointer
from the netdev which exists for bond slaves but also exists for
devices that are attached to linux bridge so using it as indication
for bond slave is wrong.

Fix by using netif_is_lag_port instead.

Fixes: 7e51891a237f ("net/mlx5e: Use netdev events to set/del egress acl forward-to-vport rule")
Signed-off-by: Maor Dickman <maord@nvidia.com>
Reviewed-by: Raed Salem <raeds@nvidia.com>
Reviewed-by: Ariel Levkovich <lariel@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
index 3e44e4d820c5..95f2b26a3ee3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
@@ -187,7 +187,7 @@ static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
 	struct mlx5e_priv *priv;
 
 	/* A given netdev is not a representor or not a slave of LAG configuration */
-	if (!mlx5e_eswitch_rep(netdev) || !bond_slave_get_rtnl(netdev))
+	if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev))
 		return false;
 
 	priv = netdev_priv(netdev);

From 8cbcc5ef2a281f6bb10099f4572a08cb765ffbf4 Mon Sep 17 00:00:00 2001
From: Michael Guralnik <michaelgur@nvidia.com>
Date: Mon, 2 Nov 2020 17:34:44 +0200
Subject: [PATCH 077/115] net/mlx5: Add handling of port type in rule deletion

Handle destruction of rules with port destination type to enable
full destruction of flow.

Without this handling of TX rules the deletion of these rules fails.
Dmesg of flow destruction failure:

[  203.714146] mlx5_core 0000:00:0b.0: mlx5_cmd_check:753:(pid 342): SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x144b7a)
[  210.547387] ------------[ cut here ]------------
[  210.548663] refcount_t: decrement hit 0; leaking memory.
[  210.550651] WARNING: CPU: 4 PID: 342 at lib/refcount.c:31 refcount_warn_saturate+0x5c/0x110
[  210.550654] Modules linked in: mlx5_ib mlx5_core ib_ipoib rdma_ucm rdma_cm iw_cm ib_cm ib_umad ib_uverbs ib_core
[  210.550675] CPU: 4 PID: 342 Comm: test Not tainted 5.8.0-rc2+ #116
[  210.550678] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
[  210.550680] RIP: 0010:refcount_warn_saturate+0x5c/0x110
[  210.550685] Code: c6 d1 1b 01 00 0f 84 ad 00 00 00 5b 5d c3 80 3d b5 d1 1b 01 00 75 f4 48 c7 c7 20 d1 15 82 c6 05 a5 d1 1b 01 01 e8 a7 eb af ff <0f> 0b eb dd 80 3d 99 d1 1b 01 00 75 d4 48 c7 c7 c0 cf 15 82 c6 05
[  210.550687] RSP: 0018:ffff8881642e77e8 EFLAGS: 00010282
[  210.550691] RAX: 0000000000000000 RBX: 0000000000000004 RCX: 0000000000000000
[  210.550694] RDX: 0000000000000027 RSI: 0000000000000004 RDI: ffffed102c85ceef
[  210.550696] RBP: ffff888161720428 R08: ffffffff8124c10e R09: ffffed103243beae
[  210.550698] R10: ffff8881921df56b R11: ffffed103243bead R12: ffff8881841b4180
[  210.550701] R13: ffff888161720428 R14: ffff8881616d0000 R15: ffff888161720380
[  210.550704] FS:  00007fc27f025740(0000) GS:ffff888192000000(0000) knlGS:0000000000000000
[  210.550706] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  210.550708] CR2: 0000557e4b41a6a0 CR3: 0000000002415004 CR4: 0000000000360ea0
[  210.550711] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  210.550713] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  210.550715] Call Trace:
[  210.550717]  mlx5_del_flow_rules+0x484/0x490 [mlx5_core]
[  210.550720]  ? mlx5_cmd_set_fte+0xa80/0xa80 [mlx5_core]
[  210.550722]  mlx5_ib_destroy_flow+0x17f/0x280 [mlx5_ib]
[  210.550724]  uverbs_free_flow+0x4c/0x90 [ib_uverbs]
[  210.550726]  destroy_hw_idr_uobject+0x41/0xb0 [ib_uverbs]
[  210.550728]  uverbs_destroy_uobject+0xaa/0x390 [ib_uverbs]
[  210.550731]  __uverbs_cleanup_ufile+0x129/0x1b0 [ib_uverbs]
[  210.550733]  ? uverbs_destroy_uobject+0x390/0x390 [ib_uverbs]
[  210.550735]  uverbs_destroy_ufile_hw+0x78/0x190 [ib_uverbs]
[  210.550737]  ib_uverbs_close+0x36/0x140 [ib_uverbs]
[  210.550739]  __fput+0x181/0x380
[  210.550741]  task_work_run+0x88/0xd0
[  210.550743]  do_exit+0x5f6/0x13b0
[  210.550745]  ? sched_clock_cpu+0x30/0x140
[  210.550747]  ? is_current_pgrp_orphaned+0x70/0x70
[  210.550750]  ? lock_downgrade+0x360/0x360
[  210.550752]  ? mark_held_locks+0x1d/0x90
[  210.550754]  do_group_exit+0x8a/0x140
[  210.550756]  get_signal+0x20a/0xf50
[  210.550758]  do_signal+0x8c/0xbe0
[  210.550760]  ? hrtimer_nanosleep+0x1d8/0x200
[  210.550762]  ? nanosleep_copyout+0x50/0x50
[  210.550764]  ? restore_sigcontext+0x320/0x320
[  210.550766]  ? __hrtimer_init+0xf0/0xf0
[  210.550768]  ? timespec64_add_safe+0x150/0x150
[  210.550770]  ? mark_held_locks+0x1d/0x90
[  210.550772]  ? lockdep_hardirqs_on_prepare+0x14c/0x240
[  210.550774]  __prepare_exit_to_usermode+0x119/0x170
[  210.550776]  do_syscall_64+0x65/0x300
[  210.550778]  ? trace_hardirqs_off+0x10/0x120
[  210.550781]  ? mark_held_locks+0x1d/0x90
[  210.550783]  ? asm_sysvec_apic_timer_interrupt+0xa/0x20
[  210.550785]  ? lockdep_hardirqs_on+0x112/0x190
[  210.550787]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  210.550789] RIP: 0033:0x7fc27f1cd157
[  210.550791] Code: Bad RIP value.
[  210.550793] RSP: 002b:00007ffd4db27ea8 EFLAGS: 00000246 ORIG_RAX: 0000000000000023
[  210.550798] RAX: fffffffffffffdfc RBX: ffffffffffffff80 RCX: 00007fc27f1cd157
[  210.550800] RDX: 00007fc27f025740 RSI: 00007ffd4db27eb0 RDI: 00007ffd4db27eb0
[  210.550803] RBP: 0000000000000016 R08: 0000000000000000 R09: 000000000000000e
[  210.550805] R10: 00007ffd4db27dc7 R11: 0000000000000246 R12: 0000000000400c00
[  210.550808] R13: 00007ffd4db285f0 R14: 0000000000000000 R15: 0000000000000000
[  210.550809] irq event stamp: 49399
[  210.550812] hardirqs last  enabled at (49399): [<ffffffff81172d36>] console_unlock+0x556/0x6f0
[  210.550815] hardirqs last disabled at (49398): [<ffffffff81172897>] console_unlock+0xb7/0x6f0
[  210.550818] softirqs last  enabled at (48706): [<ffffffff81e0037b>] __do_softirq+0x37b/0x60c
[  210.550820] softirqs last disabled at (48697): [<ffffffff81c00e2f>] asm_call_on_stack+0xf/0x20
[  210.550822] ---[ end trace ad18c0e6fa846454 ]---
[  210.581862] mlx5_core 0000:00:0c.0: mlx5_destroy_flow_table:2132:(pid 342): Flow table 262150 wasn't destroyed, refcount > 1

Fixes: a7ee18bdee83 ("RDMA/mlx5: Allow creating a matcher for a NIC TX flow table")
Signed-off-by: Michael Guralnik <michaelgur@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 325a5b0d6829..9fdd99272e31 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -534,6 +534,13 @@ static void del_sw_hw_rule(struct fs_node *node)
 		goto out;
 	}
 
+	if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_PORT &&
+	    --fte->dests_size) {
+		fte->modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
+		fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+		goto out;
+	}
+
 	if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
 	    --fte->dests_size) {
 		fte->modify_mask |=

From 1ce5fc724a26e0b476e42c5d588bdb80caea003b Mon Sep 17 00:00:00 2001
From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Date: Mon, 2 Nov 2020 13:45:24 +0200
Subject: [PATCH 078/115] net/mlx5: Clear bw_share upon VF disable

Currently, if user disables VFs with some min and max rates configured,
they are cleared. But QoS data is not cleared and restored upon next VF
enable placing limits on minimal rate for given VF, when user expects
none.

To match cleared vport->info struct with QoS-related min and max rates
upon VF disable, clear vport->qos struct too.

Fixes: 556b9d16d3f5 ("net/mlx5: Clear VF's configuration on disabling SRIOV")
Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index e8e6294c7cca..6562f4d484e6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1408,6 +1408,7 @@ static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw)
 	int i;
 
 	mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) {
+		memset(&vport->qos, 0, sizeof(vport->qos));
 		memset(&vport->info, 0, sizeof(vport->info));
 		vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
 	}

From 470b74758260e4abc2508cf1614573c00a00465c Mon Sep 17 00:00:00 2001
From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Date: Wed, 21 Oct 2020 11:05:41 +0300
Subject: [PATCH 079/115] net/mlx5: Disable QoS when min_rates on all VFs are
 zero
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently when QoS is enabled for VF and any min_rate is configured,
the driver sets bw_share value to at least 1 and doesn’t allow to set
it to 0 to make minimal rate unlimited. It means there is always a
minimal rate configured for every VF, even if user tries to remove it.

In order to make QoS disable possible, check whether all vports have
configured min_rate = 0. If this is true, set their bw_share to 0 to
disable min_rate limitations.

Fixes: c9497c98901c ("net/mlx5: Add support for setting VF min rate")
Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 6562f4d484e6..5ad2308a2a6b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -2222,12 +2222,15 @@ static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
 		max_guarantee = evport->info.min_rate;
 	}
 
-	return max_t(u32, max_guarantee / fw_max_bw_share, 1);
+	if (max_guarantee)
+		return max_t(u32, max_guarantee / fw_max_bw_share, 1);
+	return 0;
 }
 
-static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider)
+static int normalize_vports_min_rate(struct mlx5_eswitch *esw)
 {
 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+	u32 divider = calculate_vports_min_rate_divider(esw);
 	struct mlx5_vport *evport;
 	u32 vport_max_rate;
 	u32 vport_min_rate;
@@ -2240,9 +2243,9 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider)
 			continue;
 		vport_min_rate = evport->info.min_rate;
 		vport_max_rate = evport->info.max_rate;
-		bw_share = MLX5_MIN_BW_SHARE;
+		bw_share = 0;
 
-		if (vport_min_rate)
+		if (divider)
 			bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate,
 							 divider,
 							 fw_max_bw_share);
@@ -2267,7 +2270,6 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
 	struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
 	u32 fw_max_bw_share;
 	u32 previous_min_rate;
-	u32 divider;
 	bool min_rate_supported;
 	bool max_rate_supported;
 	int err = 0;
@@ -2292,8 +2294,7 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
 
 	previous_min_rate = evport->info.min_rate;
 	evport->info.min_rate = min_rate;
-	divider = calculate_vports_min_rate_divider(esw);
-	err = normalize_vports_min_rate(esw, divider);
+	err = normalize_vports_min_rate(esw);
 	if (err) {
 		evport->info.min_rate = previous_min_rate;
 		goto unlock;

From 5b8631c7b21ca8bc039f0bc030048973b039e0d2 Mon Sep 17 00:00:00 2001
From: Eli Cohen <elic@nvidia.com>
Date: Mon, 9 Nov 2020 11:35:52 +0200
Subject: [PATCH 080/115] net/mlx5: E-Switch, Fail mlx5_esw_modify_vport_rate
 if qos disabled

Avoid calling mlx5_esw_modify_vport_rate() if qos is not enabled and
avoid unnecessary syndrome messages from firmware.

Fixes: fcb64c0f5640 ("net/mlx5: E-Switch, add ingress rate support")
Signed-off-by: Eli Cohen <elic@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 5ad2308a2a6b..d4ee0a9c03db 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1142,6 +1142,10 @@ int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
 	struct mlx5_vport *vport;
 
 	vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+	if (!vport->qos.enabled)
+		return -EOPNOTSUPP;
+
 	MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
 
 	return mlx5_modify_scheduling_element_cmd(esw->dev,

From 68ec32daf7d50a9f7425f8607a7402c13aa0c587 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Sat, 14 Nov 2020 19:52:23 +0800
Subject: [PATCH 081/115] net/mlx5: fix error return code in
 mlx5e_tc_nic_init()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: aedd133d17bc ("net/mlx5e: Support CT offload for tc nic flows")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 2e2fa0440032..ce710f22b1ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -5229,8 +5229,10 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 
 	tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
 				 MLX5_FLOW_NAMESPACE_KERNEL);
-	if (IS_ERR(tc->ct))
+	if (IS_ERR(tc->ct)) {
+		err = PTR_ERR(tc->ct);
 		goto err_ct;
+	}
 
 	tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
 	err = register_netdevice_notifier_dev_net(priv->netdev,

From fd5736bf9f235d26c83cac8a16c70bbdafa55abe Mon Sep 17 00:00:00 2001
From: Alex Marginean <alexandru.marginean@nxp.com>
Date: Thu, 12 Nov 2020 20:26:08 +0200
Subject: [PATCH 082/115] enetc: Workaround for MDIO register access issue

Due to a hardware issue, an access to MDIO registers
that is concurrent with other ENETC register accesses
may lead to the MDIO access being dropped or corrupted.
The workaround introduces locking for all register accesses
to the ENETC register space.  To reduce performance impact,
a readers-writers locking scheme has been implemented.
The writer in this case is the MDIO access code (irrelevant
whether that MDIO access is a register read or write), and
the reader is any access code to non-MDIO ENETC registers.
Also, the datapath functions acquire the read lock fewer times
and use _hot accessors.  All the rest of the code uses the _wa
accessors which lock every register access.
The commit introducing MDIO support is -
commit ebfcb23d62ab ("enetc: Add ENETC PF level external MDIO support")
but due to subsequent refactoring this patch is applicable on
top of a later commit.

Fixes: 6517798dd343 ("enetc: Make MDIO accessors more generic and export to include/linux/fsl")
Signed-off-by: Alex Marginean <alexandru.marginean@nxp.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Link: https://lore.kernel.org/r/20201112182608.26177-1-claudiu.manoil@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/enetc/Kconfig  |   1 +
 drivers/net/ethernet/freescale/enetc/enetc.c  |  62 +++++++---
 .../net/ethernet/freescale/enetc/enetc_hw.h   | 115 +++++++++++++++++-
 .../net/ethernet/freescale/enetc/enetc_mdio.c |   8 +-
 4 files changed, 161 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig
index 0fa18b00c49b..d99ea0f4e4a6 100644
--- a/drivers/net/ethernet/freescale/enetc/Kconfig
+++ b/drivers/net/ethernet/freescale/enetc/Kconfig
@@ -16,6 +16,7 @@ config FSL_ENETC
 config FSL_ENETC_VF
 	tristate "ENETC VF driver"
 	depends on PCI && PCI_MSI
+	select FSL_ENETC_MDIO
 	select PHYLINK
 	select DIMLIB
 	help
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 52be6e315752..fc2075ea57fe 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -33,7 +33,10 @@ netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
 		return NETDEV_TX_BUSY;
 	}
 
+	enetc_lock_mdio();
 	count = enetc_map_tx_buffs(tx_ring, skb, priv->active_offloads);
+	enetc_unlock_mdio();
+
 	if (unlikely(!count))
 		goto drop_packet_err;
 
@@ -239,7 +242,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
 	skb_tx_timestamp(skb);
 
 	/* let H/W know BD ring has been updated */
-	enetc_wr_reg(tx_ring->tpir, i); /* includes wmb() */
+	enetc_wr_reg_hot(tx_ring->tpir, i); /* includes wmb() */
 
 	return count;
 
@@ -262,12 +265,16 @@ static irqreturn_t enetc_msix(int irq, void *data)
 	struct enetc_int_vector	*v = data;
 	int i;
 
+	enetc_lock_mdio();
+
 	/* disable interrupts */
-	enetc_wr_reg(v->rbier, 0);
-	enetc_wr_reg(v->ricr1, v->rx_ictt);
+	enetc_wr_reg_hot(v->rbier, 0);
+	enetc_wr_reg_hot(v->ricr1, v->rx_ictt);
 
 	for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS)
-		enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i), 0);
+		enetc_wr_reg_hot(v->tbier_base + ENETC_BDR_OFF(i), 0);
+
+	enetc_unlock_mdio();
 
 	napi_schedule(&v->napi);
 
@@ -334,19 +341,23 @@ static int enetc_poll(struct napi_struct *napi, int budget)
 
 	v->rx_napi_work = false;
 
+	enetc_lock_mdio();
+
 	/* enable interrupts */
-	enetc_wr_reg(v->rbier, ENETC_RBIER_RXTIE);
+	enetc_wr_reg_hot(v->rbier, ENETC_RBIER_RXTIE);
 
 	for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS)
-		enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i),
-			     ENETC_TBIER_TXTIE);
+		enetc_wr_reg_hot(v->tbier_base + ENETC_BDR_OFF(i),
+				 ENETC_TBIER_TXTIE);
+
+	enetc_unlock_mdio();
 
 	return work_done;
 }
 
 static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci)
 {
-	int pi = enetc_rd_reg(tx_ring->tcir) & ENETC_TBCIR_IDX_MASK;
+	int pi = enetc_rd_reg_hot(tx_ring->tcir) & ENETC_TBCIR_IDX_MASK;
 
 	return pi >= ci ? pi - ci : tx_ring->bd_count - ci + pi;
 }
@@ -386,7 +397,10 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 
 	i = tx_ring->next_to_clean;
 	tx_swbd = &tx_ring->tx_swbd[i];
+
+	enetc_lock_mdio();
 	bds_to_clean = enetc_bd_ready_count(tx_ring, i);
+	enetc_unlock_mdio();
 
 	do_tstamp = false;
 
@@ -429,16 +443,20 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 			tx_swbd = tx_ring->tx_swbd;
 		}
 
+		enetc_lock_mdio();
+
 		/* BD iteration loop end */
 		if (is_eof) {
 			tx_frm_cnt++;
 			/* re-arm interrupt source */
-			enetc_wr_reg(tx_ring->idr, BIT(tx_ring->index) |
-				     BIT(16 + tx_ring->index));
+			enetc_wr_reg_hot(tx_ring->idr, BIT(tx_ring->index) |
+					 BIT(16 + tx_ring->index));
 		}
 
 		if (unlikely(!bds_to_clean))
 			bds_to_clean = enetc_bd_ready_count(tx_ring, i);
+
+		enetc_unlock_mdio();
 	}
 
 	tx_ring->next_to_clean = i;
@@ -515,8 +533,6 @@ static int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
 	if (likely(j)) {
 		rx_ring->next_to_alloc = i; /* keep track from page reuse */
 		rx_ring->next_to_use = i;
-		/* update ENETC's consumer index */
-		enetc_wr_reg(rx_ring->rcir, i);
 	}
 
 	return j;
@@ -534,8 +550,8 @@ static void enetc_get_rx_tstamp(struct net_device *ndev,
 	u64 tstamp;
 
 	if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_TSTMP) {
-		lo = enetc_rd(hw, ENETC_SICTR0);
-		hi = enetc_rd(hw, ENETC_SICTR1);
+		lo = enetc_rd_reg_hot(hw->reg + ENETC_SICTR0);
+		hi = enetc_rd_reg_hot(hw->reg + ENETC_SICTR1);
 		rxbd = enetc_rxbd_ext(rxbd);
 		tstamp_lo = le32_to_cpu(rxbd->ext.tstamp);
 		if (lo <= tstamp_lo)
@@ -684,23 +700,31 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 		u32 bd_status;
 		u16 size;
 
+		enetc_lock_mdio();
+
 		if (cleaned_cnt >= ENETC_RXBD_BUNDLE) {
 			int count = enetc_refill_rx_ring(rx_ring, cleaned_cnt);
 
+			/* update ENETC's consumer index */
+			enetc_wr_reg_hot(rx_ring->rcir, rx_ring->next_to_use);
 			cleaned_cnt -= count;
 		}
 
 		rxbd = enetc_rxbd(rx_ring, i);
 		bd_status = le32_to_cpu(rxbd->r.lstatus);
-		if (!bd_status)
+		if (!bd_status) {
+			enetc_unlock_mdio();
 			break;
+		}
 
-		enetc_wr_reg(rx_ring->idr, BIT(rx_ring->index));
+		enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index));
 		dma_rmb(); /* for reading other rxbd fields */
 		size = le16_to_cpu(rxbd->r.buf_len);
 		skb = enetc_map_rx_buff_to_skb(rx_ring, i, size);
-		if (!skb)
+		if (!skb) {
+			enetc_unlock_mdio();
 			break;
+		}
 
 		enetc_get_offloads(rx_ring, rxbd, skb);
 
@@ -712,6 +736,7 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 
 		if (unlikely(bd_status &
 			     ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))) {
+			enetc_unlock_mdio();
 			dev_kfree_skb(skb);
 			while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
 				dma_rmb();
@@ -751,6 +776,8 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 
 		enetc_process_skb(rx_ring, skb);
 
+		enetc_unlock_mdio();
+
 		napi_gro_receive(napi, skb);
 
 		rx_frm_cnt++;
@@ -1225,6 +1252,7 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
 	rx_ring->idr = hw->reg + ENETC_SIRXIDR;
 
 	enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring));
+	enetc_wr(hw, ENETC_SIRXIDR, rx_ring->next_to_use);
 
 	/* enable ring */
 	enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index 17cf7c94fdb5..eb6bbf1113c7 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -324,14 +324,100 @@ struct enetc_hw {
 	void __iomem *global;
 };
 
-/* general register accessors */
-#define enetc_rd_reg(reg)	ioread32((reg))
-#define enetc_wr_reg(reg, val)	iowrite32((val), (reg))
+/* ENETC register accessors */
+
+/* MDIO issue workaround (on LS1028A) -
+ * Due to a hardware issue, an access to MDIO registers
+ * that is concurrent with other ENETC register accesses
+ * may lead to the MDIO access being dropped or corrupted.
+ * To protect the MDIO accesses a readers-writers locking
+ * scheme is used, where the MDIO register accesses are
+ * protected by write locks to insure exclusivity, while
+ * the remaining ENETC registers are accessed under read
+ * locks since they only compete with MDIO accesses.
+ */
+extern rwlock_t enetc_mdio_lock;
+
+/* use this locking primitive only on the fast datapath to
+ * group together multiple non-MDIO register accesses to
+ * minimize the overhead of the lock
+ */
+static inline void enetc_lock_mdio(void)
+{
+	read_lock(&enetc_mdio_lock);
+}
+
+static inline void enetc_unlock_mdio(void)
+{
+	read_unlock(&enetc_mdio_lock);
+}
+
+/* use these accessors only on the fast datapath under
+ * the enetc_lock_mdio() locking primitive to minimize
+ * the overhead of the lock
+ */
+static inline u32 enetc_rd_reg_hot(void __iomem *reg)
+{
+	lockdep_assert_held(&enetc_mdio_lock);
+
+	return ioread32(reg);
+}
+
+static inline void enetc_wr_reg_hot(void __iomem *reg, u32 val)
+{
+	lockdep_assert_held(&enetc_mdio_lock);
+
+	iowrite32(val, reg);
+}
+
+/* internal helpers for the MDIO w/a */
+static inline u32 _enetc_rd_reg_wa(void __iomem *reg)
+{
+	u32 val;
+
+	enetc_lock_mdio();
+	val = ioread32(reg);
+	enetc_unlock_mdio();
+
+	return val;
+}
+
+static inline void _enetc_wr_reg_wa(void __iomem *reg, u32 val)
+{
+	enetc_lock_mdio();
+	iowrite32(val, reg);
+	enetc_unlock_mdio();
+}
+
+static inline u32 _enetc_rd_mdio_reg_wa(void __iomem *reg)
+{
+	unsigned long flags;
+	u32 val;
+
+	write_lock_irqsave(&enetc_mdio_lock, flags);
+	val = ioread32(reg);
+	write_unlock_irqrestore(&enetc_mdio_lock, flags);
+
+	return val;
+}
+
+static inline void _enetc_wr_mdio_reg_wa(void __iomem *reg, u32 val)
+{
+	unsigned long flags;
+
+	write_lock_irqsave(&enetc_mdio_lock, flags);
+	iowrite32(val, reg);
+	write_unlock_irqrestore(&enetc_mdio_lock, flags);
+}
+
 #ifdef ioread64
-#define enetc_rd_reg64(reg)	ioread64((reg))
+static inline u64 _enetc_rd_reg64(void __iomem *reg)
+{
+	return ioread64(reg);
+}
 #else
 /* using this to read out stats on 32b systems */
-static inline u64 enetc_rd_reg64(void __iomem *reg)
+static inline u64 _enetc_rd_reg64(void __iomem *reg)
 {
 	u32 low, high, tmp;
 
@@ -345,12 +431,29 @@ static inline u64 enetc_rd_reg64(void __iomem *reg)
 }
 #endif
 
+static inline u64 _enetc_rd_reg64_wa(void __iomem *reg)
+{
+	u64 val;
+
+	enetc_lock_mdio();
+	val = _enetc_rd_reg64(reg);
+	enetc_unlock_mdio();
+
+	return val;
+}
+
+/* general register accessors */
+#define enetc_rd_reg(reg)		_enetc_rd_reg_wa((reg))
+#define enetc_wr_reg(reg, val)		_enetc_wr_reg_wa((reg), (val))
 #define enetc_rd(hw, off)		enetc_rd_reg((hw)->reg + (off))
 #define enetc_wr(hw, off, val)		enetc_wr_reg((hw)->reg + (off), val)
-#define enetc_rd64(hw, off)		enetc_rd_reg64((hw)->reg + (off))
+#define enetc_rd64(hw, off)		_enetc_rd_reg64_wa((hw)->reg + (off))
 /* port register accessors - PF only */
 #define enetc_port_rd(hw, off)		enetc_rd_reg((hw)->port + (off))
 #define enetc_port_wr(hw, off, val)	enetc_wr_reg((hw)->port + (off), val)
+#define enetc_port_rd_mdio(hw, off)	_enetc_rd_mdio_reg_wa((hw)->port + (off))
+#define enetc_port_wr_mdio(hw, off, val)	_enetc_wr_mdio_reg_wa(\
+							(hw)->port + (off), val)
 /* global register accessors - PF only */
 #define enetc_global_rd(hw, off)	enetc_rd_reg((hw)->global + (off))
 #define enetc_global_wr(hw, off, val)	enetc_wr_reg((hw)->global + (off), val)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
index 48c32a171afa..ee0116ed4738 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
@@ -16,13 +16,13 @@
 
 static inline u32 _enetc_mdio_rd(struct enetc_mdio_priv *mdio_priv, int off)
 {
-	return enetc_port_rd(mdio_priv->hw, mdio_priv->mdio_base + off);
+	return enetc_port_rd_mdio(mdio_priv->hw, mdio_priv->mdio_base + off);
 }
 
 static inline void _enetc_mdio_wr(struct enetc_mdio_priv *mdio_priv, int off,
 				  u32 val)
 {
-	enetc_port_wr(mdio_priv->hw, mdio_priv->mdio_base + off, val);
+	enetc_port_wr_mdio(mdio_priv->hw, mdio_priv->mdio_base + off, val);
 }
 
 #define enetc_mdio_rd(mdio_priv, off) \
@@ -174,3 +174,7 @@ struct enetc_hw *enetc_hw_alloc(struct device *dev, void __iomem *port_regs)
 	return hw;
 }
 EXPORT_SYMBOL_GPL(enetc_hw_alloc);
+
+/* Lock for MDIO access errata on LS1028A */
+DEFINE_RWLOCK(enetc_mdio_lock);
+EXPORT_SYMBOL_GPL(enetc_mdio_lock);

From a5bbcbf29089a1252c201b1a7fd38151de355db9 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 15 Nov 2020 10:30:41 +0000
Subject: [PATCH 083/115] netdevsim: set .owner to THIS_MODULE

If THIS_MODULE is not set, the module would be removed while debugfs is
being used.
It eventually makes kernel panic.

Fixes: 82c93a87bf8b ("netdevsim: implement couple of testing devlink health reporters")
Fixes: 424be63ad831 ("netdevsim: add UDP tunnel port offload support")
Fixes: 4418f862d675 ("netdevsim: implement support for devlink region and snapshots")
Fixes: d3cbb907ae57 ("netdevsim: add ACL trap reporting cookie as a metadata")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Link: https://lore.kernel.org/r/20201115103041.30701-1-ap420073@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/netdevsim/dev.c         | 2 ++
 drivers/net/netdevsim/health.c      | 1 +
 drivers/net/netdevsim/udp_tunnels.c | 1 +
 3 files changed, 4 insertions(+)

diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index d07061417675..e7972e88ffe0 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -96,6 +96,7 @@ static const struct file_operations nsim_dev_take_snapshot_fops = {
 	.open = simple_open,
 	.write = nsim_dev_take_snapshot_write,
 	.llseek = generic_file_llseek,
+	.owner = THIS_MODULE,
 };
 
 static ssize_t nsim_dev_trap_fa_cookie_read(struct file *file,
@@ -188,6 +189,7 @@ static const struct file_operations nsim_dev_trap_fa_cookie_fops = {
 	.read = nsim_dev_trap_fa_cookie_read,
 	.write = nsim_dev_trap_fa_cookie_write,
 	.llseek = generic_file_llseek,
+	.owner = THIS_MODULE,
 };
 
 static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev)
diff --git a/drivers/net/netdevsim/health.c b/drivers/net/netdevsim/health.c
index 62958b238d50..21e2974660e7 100644
--- a/drivers/net/netdevsim/health.c
+++ b/drivers/net/netdevsim/health.c
@@ -261,6 +261,7 @@ static const struct file_operations nsim_dev_health_break_fops = {
 	.open = simple_open,
 	.write = nsim_dev_health_break_write,
 	.llseek = generic_file_llseek,
+	.owner = THIS_MODULE,
 };
 
 int nsim_dev_health_init(struct nsim_dev *nsim_dev, struct devlink *devlink)
diff --git a/drivers/net/netdevsim/udp_tunnels.c b/drivers/net/netdevsim/udp_tunnels.c
index 6ab023acefd6..02dc3123eb6c 100644
--- a/drivers/net/netdevsim/udp_tunnels.c
+++ b/drivers/net/netdevsim/udp_tunnels.c
@@ -124,6 +124,7 @@ static const struct file_operations nsim_udp_tunnels_info_reset_fops = {
 	.open = simple_open,
 	.write = nsim_udp_tunnels_info_reset_write,
 	.llseek = generic_file_llseek,
+	.owner = THIS_MODULE,
 };
 
 int nsim_udp_tunnels_info_create(struct nsim_dev *nsim_dev,

From 138559b9f99d3b6b1d5e75c78facc067a23871c6 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Sun, 15 Nov 2020 15:14:48 +0200
Subject: [PATCH 084/115] net/tls: Fix wrong record sn in async mode of device
 resync

In async_resync mode, we log the TCP seq of records until the async request
is completed.  Later, in case one of the logged seqs matches the resync
request, we return it, together with its record serial number.  Before this
fix, we mistakenly returned the serial number of the current record
instead.

Fixes: ed9b7646b06a ("net/tls: Add asynchronous resync")
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Boris Pismenny <borisp@nvidia.com>
Link: https://lore.kernel.org/r/20201115131448.2702-1-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/tls.h    | 16 +++++++++++++++-
 net/tls/tls_device.c | 37 +++++++++++++++++++++++++++----------
 2 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index baf1e99d8193..cf1473099453 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -300,7 +300,8 @@ enum tls_offload_sync_type {
 #define TLS_DEVICE_RESYNC_ASYNC_LOGMAX		13
 struct tls_offload_resync_async {
 	atomic64_t req;
-	u32 loglen;
+	u16 loglen;
+	u16 rcd_delta;
 	u32 log[TLS_DEVICE_RESYNC_ASYNC_LOGMAX];
 };
 
@@ -471,6 +472,18 @@ static inline bool tls_bigint_increment(unsigned char *seq, int len)
 	return (i == -1);
 }
 
+static inline void tls_bigint_subtract(unsigned char *seq, int  n)
+{
+	u64 rcd_sn;
+	__be64 *p;
+
+	BUILD_BUG_ON(TLS_MAX_REC_SEQ_SIZE != 8);
+
+	p = (__be64 *)seq;
+	rcd_sn = be64_to_cpu(*p);
+	*p = cpu_to_be64(rcd_sn - n);
+}
+
 static inline struct tls_context *tls_get_ctx(const struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -639,6 +652,7 @@ tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len)
 	atomic64_set(&rx_ctx->resync_async->req, ((u64)ntohl(seq) << 32) |
 		     ((u64)len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC);
 	rx_ctx->resync_async->loglen = 0;
+	rx_ctx->resync_async->rcd_delta = 0;
 }
 
 static inline void
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index cec86229a6a0..54d3e161d198 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -694,36 +694,51 @@ static void tls_device_resync_rx(struct tls_context *tls_ctx,
 
 static bool
 tls_device_rx_resync_async(struct tls_offload_resync_async *resync_async,
-			   s64 resync_req, u32 *seq)
+			   s64 resync_req, u32 *seq, u16 *rcd_delta)
 {
 	u32 is_async = resync_req & RESYNC_REQ_ASYNC;
 	u32 req_seq = resync_req >> 32;
 	u32 req_end = req_seq + ((resync_req >> 16) & 0xffff);
+	u16 i;
+
+	*rcd_delta = 0;
 
 	if (is_async) {
+		/* shouldn't get to wraparound:
+		 * too long in async stage, something bad happened
+		 */
+		if (WARN_ON_ONCE(resync_async->rcd_delta == USHRT_MAX))
+			return false;
+
 		/* asynchronous stage: log all headers seq such that
 		 * req_seq <= seq <= end_seq, and wait for real resync request
 		 */
-		if (between(*seq, req_seq, req_end) &&
+		if (before(*seq, req_seq))
+			return false;
+		if (!after(*seq, req_end) &&
 		    resync_async->loglen < TLS_DEVICE_RESYNC_ASYNC_LOGMAX)
 			resync_async->log[resync_async->loglen++] = *seq;
 
+		resync_async->rcd_delta++;
+
 		return false;
 	}
 
 	/* synchronous stage: check against the logged entries and
 	 * proceed to check the next entries if no match was found
 	 */
-	while (resync_async->loglen) {
-		if (req_seq == resync_async->log[resync_async->loglen - 1] &&
-		    atomic64_try_cmpxchg(&resync_async->req,
-					 &resync_req, 0)) {
-			resync_async->loglen = 0;
+	for (i = 0; i < resync_async->loglen; i++)
+		if (req_seq == resync_async->log[i] &&
+		    atomic64_try_cmpxchg(&resync_async->req, &resync_req, 0)) {
+			*rcd_delta = resync_async->rcd_delta - i;
 			*seq = req_seq;
+			resync_async->loglen = 0;
+			resync_async->rcd_delta = 0;
 			return true;
 		}
-		resync_async->loglen--;
-	}
+
+	resync_async->loglen = 0;
+	resync_async->rcd_delta = 0;
 
 	if (req_seq == *seq &&
 	    atomic64_try_cmpxchg(&resync_async->req,
@@ -741,6 +756,7 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
 	u32 sock_data, is_req_pending;
 	struct tls_prot_info *prot;
 	s64 resync_req;
+	u16 rcd_delta;
 	u32 req_seq;
 
 	if (tls_ctx->rx_conf != TLS_HW)
@@ -786,8 +802,9 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
 			return;
 
 		if (!tls_device_rx_resync_async(rx_ctx->resync_async,
-						resync_req, &seq))
+						resync_req, &seq, &rcd_delta))
 			return;
+		tls_bigint_subtract(rcd_sn, rcd_delta);
 		break;
 	}
 

From c9c89dcd872ea33327673fcb97398993a1f22736 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 16 Nov 2020 14:27:46 -0800
Subject: [PATCH 085/115] bpf, sockmap: Fix partial copy_page_to_iter so
 progress can still be made

If copy_page_to_iter() fails or even partially completes, but with fewer
bytes copied than expected we currently reset sg.start and return EFAULT.
This proves problematic if we already copied data into the user buffer
before we return an error. Because we leave the copied data in the user
buffer and fail to unwind the scatterlist so kernel side believes data
has been copied and user side believes data has _not_ been received.

Expected behavior should be to return number of bytes copied and then
on the next read we need to return the error assuming its still there. This
can happen if we have a copy length spanning multiple scatterlist elements
and one or more complete before the error is hit.

The error is rare enough though that my normal testing with server side
programs, such as nginx, httpd, envoy, etc., I have never seen this. The
only reliable way to reproduce that I've found is to stream movies over
my browser for a day or so and wait for it to hang. Not very scientific,
but with a few extra WARN_ON()s in the code the bug was obvious.

When we review the errors from copy_page_to_iter() it seems we are hitting
a page fault from copy_page_to_iter_iovec() where the code checks
fault_in_pages_writeable(buf, copy) where buf is the user buffer. It
also seems typical server applications don't hit this case.

The other way to try and reproduce this is run the sockmap selftest tool
test_sockmap with data verification enabled, but it doesn't reproduce the
fault. Perhaps we can trigger this case artificially somehow from the
test tools. I haven't sorted out a way to do that yet though.

Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/160556566659.73229.15694973114605301063.stgit@john-XPS-13-9370
---
 net/ipv4/tcp_bpf.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 37f4cb2bba5c..8e950b0bfabc 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -15,8 +15,8 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
 {
 	struct iov_iter *iter = &msg->msg_iter;
 	int peek = flags & MSG_PEEK;
-	int i, ret, copied = 0;
 	struct sk_msg *msg_rx;
+	int i, copied = 0;
 
 	msg_rx = list_first_entry_or_null(&psock->ingress_msg,
 					  struct sk_msg, list);
@@ -37,11 +37,9 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
 			page = sg_page(sge);
 			if (copied + copy > len)
 				copy = len - copied;
-			ret = copy_page_to_iter(page, sge->offset, copy, iter);
-			if (ret != copy) {
-				msg_rx->sg.start = i;
-				return -EFAULT;
-			}
+			copy = copy_page_to_iter(page, sge->offset, copy, iter);
+			if (!copy)
+				return copied ? copied : -EFAULT;
 
 			copied += copy;
 			if (likely(!peek)) {
@@ -56,6 +54,11 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
 						put_page(page);
 				}
 			} else {
+				/* Lets not optimize peek case if copy_page_to_iter
+				 * didn't copy the entire length lets just break.
+				 */
+				if (copy != sge->length)
+					return copied;
 				sk_msg_iter_var_next(i);
 			}
 

From 36cd0e696a832a00247fca522034703566ac8885 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 16 Nov 2020 14:28:06 -0800
Subject: [PATCH 086/115] bpf, sockmap: Ensure SO_RCVBUF memory is observed on
 ingress redirect

Fix sockmap sk_skb programs so that they observe sk_rcvbuf limits. This
allows users to tune SO_RCVBUF and sockmap will honor them.

We can refactor the if(charge) case out in later patches. But, keep this
fix to the point.

Fixes: 51199405f9672 ("bpf: skb_verdict, support SK_PASS on RX BPF path")
Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/160556568657.73229.8404601585878439060.stgit@john-XPS-13-9370
---
 net/core/skmsg.c   | 20 ++++++++++++++++----
 net/ipv4/tcp_bpf.c |  3 ++-
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 654182ecf87b..fe44280c033e 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -170,10 +170,12 @@ static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i,
 	struct scatterlist *sge = sk_msg_elem(msg, i);
 	u32 len = sge->length;
 
-	if (charge)
-		sk_mem_uncharge(sk, len);
-	if (!msg->skb)
+	/* When the skb owns the memory we free it from consume_skb path. */
+	if (!msg->skb) {
+		if (charge)
+			sk_mem_uncharge(sk, len);
 		put_page(sg_page(sge));
+	}
 	memset(sge, 0, sizeof(*sge));
 	return len;
 }
@@ -403,6 +405,9 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
 	int copied = 0, num_sge;
 	struct sk_msg *msg;
 
+	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+		return -EAGAIN;
+
 	msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
 	if (unlikely(!msg))
 		return -EAGAIN;
@@ -418,7 +423,14 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
 		return num_sge;
 	}
 
-	sk_mem_charge(sk, skb->len);
+	/* This will transition ownership of the data from the socket where
+	 * the BPF program was run initiating the redirect to the socket
+	 * we will eventually receive this data on. The data will be released
+	 * from skb_consume found in __tcp_bpf_recvmsg() after its been copied
+	 * into user buffers.
+	 */
+	skb_set_owner_r(skb, sk);
+
 	copied = skb->len;
 	msg->sg.start = 0;
 	msg->sg.size = copied;
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 8e950b0bfabc..bc7d2a586e18 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -45,7 +45,8 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
 			if (likely(!peek)) {
 				sge->offset += copy;
 				sge->length -= copy;
-				sk_mem_uncharge(sk, copy);
+				if (!msg_rx->skb)
+					sk_mem_uncharge(sk, copy);
 				msg_rx->sg.size -= copy;
 
 				if (!sge->length) {

From 70796fb751f1d34cc650e640572a174faf009cd4 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 16 Nov 2020 14:28:26 -0800
Subject: [PATCH 087/115] bpf, sockmap: Use truesize with sk_rmem_schedule()

We use skb->size with sk_rmem_scheduled() which is not correct. Instead
use truesize to align with socket and tcp stack usage of sk_rmem_schedule.

Suggested-by: Daniel Borkman <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/160556570616.73229.17003722112077507863.stgit@john-XPS-13-9370
---
 net/core/skmsg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index fe44280c033e..d09426ce4af3 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -411,7 +411,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
 	msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
 	if (unlikely(!msg))
 		return -EAGAIN;
-	if (!sk_rmem_schedule(sk, skb, skb->len)) {
+	if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
 		kfree(msg);
 		return -EAGAIN;
 	}

From 6fa9201a898983da731fca068bb4b5c941537588 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 16 Nov 2020 14:28:46 -0800
Subject: [PATCH 088/115] bpf, sockmap: Avoid returning unneeded EAGAIN when
 redirecting to self

If a socket redirects to itself and it is under memory pressure it is
possible to get a socket stuck so that recv() returns EAGAIN and the
socket can not advance for some time. This happens because when
redirecting a skb to the same socket we received the skb on we first
check if it is OK to enqueue the skb on the receiving socket by checking
memory limits. But, if the skb is itself the object holding the memory
needed to enqueue the skb we will keep retrying from kernel side
and always fail with EAGAIN. Then userspace will get a recv() EAGAIN
error if there are no skbs in the psock ingress queue. This will continue
until either some skbs get kfree'd causing the memory pressure to
reduce far enough that we can enqueue the pending packet or the
socket is destroyed. In some cases its possible to get a socket
stuck for a noticeable amount of time if the socket is only receiving
skbs from sk_skb verdict programs. To reproduce I make the socket
memory limits ridiculously low so sockets are always under memory
pressure. More often though if under memory pressure it looks like
a spurious EAGAIN error on user space side causing userspace to retry
and typically enough has moved on the memory side that it works.

To fix skip memory checks and skb_orphan if receiving on the same
sock as already assigned.

For SK_PASS cases this is easy, its always the same socket so we
can just omit the orphan/set_owner pair.

For backlog cases we need to check skb->sk and decide if the orphan
and set_owner pair are needed.

Fixes: 51199405f9672 ("bpf: skb_verdict, support SK_PASS on RX BPF path")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/160556572660.73229.12566203819812939627.stgit@john-XPS-13-9370
---
 net/core/skmsg.c | 72 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 53 insertions(+), 19 deletions(-)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index d09426ce4af3..9aed5a2c7c5b 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -399,38 +399,38 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
 }
 EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
 
-static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
+						  struct sk_buff *skb)
 {
-	struct sock *sk = psock->sk;
-	int copied = 0, num_sge;
 	struct sk_msg *msg;
 
 	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
-		return -EAGAIN;
+		return NULL;
+
+	if (!sk_rmem_schedule(sk, skb, skb->truesize))
+		return NULL;
 
 	msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
 	if (unlikely(!msg))
-		return -EAGAIN;
-	if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
-		kfree(msg);
-		return -EAGAIN;
-	}
+		return NULL;
 
 	sk_msg_init(msg);
-	num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
+	return msg;
+}
+
+static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
+					struct sk_psock *psock,
+					struct sock *sk,
+					struct sk_msg *msg)
+{
+	int num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
+	int copied;
+
 	if (unlikely(num_sge < 0)) {
 		kfree(msg);
 		return num_sge;
 	}
 
-	/* This will transition ownership of the data from the socket where
-	 * the BPF program was run initiating the redirect to the socket
-	 * we will eventually receive this data on. The data will be released
-	 * from skb_consume found in __tcp_bpf_recvmsg() after its been copied
-	 * into user buffers.
-	 */
-	skb_set_owner_r(skb, sk);
-
 	copied = skb->len;
 	msg->sg.start = 0;
 	msg->sg.size = copied;
@@ -442,6 +442,40 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
 	return copied;
 }
 
+static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+{
+	struct sock *sk = psock->sk;
+	struct sk_msg *msg;
+
+	msg = sk_psock_create_ingress_msg(sk, skb);
+	if (!msg)
+		return -EAGAIN;
+
+	/* This will transition ownership of the data from the socket where
+	 * the BPF program was run initiating the redirect to the socket
+	 * we will eventually receive this data on. The data will be released
+	 * from skb_consume found in __tcp_bpf_recvmsg() after its been copied
+	 * into user buffers.
+	 */
+	skb_set_owner_r(skb, sk);
+	return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+}
+
+/* Puts an skb on the ingress queue of the socket already assigned to the
+ * skb. In this case we do not need to check memory limits or skb_set_owner_r
+ * because the skb is already accounted for here.
+ */
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb)
+{
+	struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
+	struct sock *sk = psock->sk;
+
+	if (unlikely(!msg))
+		return -EAGAIN;
+	sk_msg_init(msg);
+	return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+}
+
 static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
 			       u32 off, u32 len, bool ingress)
 {
@@ -801,7 +835,7 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
 		 * retrying later from workqueue.
 		 */
 		if (skb_queue_empty(&psock->ingress_skb)) {
-			err = sk_psock_skb_ingress(psock, skb);
+			err = sk_psock_skb_ingress_self(psock, skb);
 		}
 		if (err < 0) {
 			skb_queue_tail(&psock->ingress_skb, skb);

From 2443ca66676d50a4eb3305c236bccd84a9828ce2 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 16 Nov 2020 14:29:08 -0800
Subject: [PATCH 089/115] bpf, sockmap: Handle memory acct if skb_verdict prog
 redirects to self

If the skb_verdict_prog redirects an skb knowingly to itself, fix your
BPF program this is not optimal and an abuse of the API please use
SK_PASS. That said there may be cases, such as socket load balancing,
where picking the socket is hashed based or otherwise picks the same
socket it was received on in some rare cases. If this happens we don't
want to confuse userspace giving them an EAGAIN error if we can avoid
it.

To avoid double accounting in these cases. At the moment even if the
skb has already been charged against the sockets rcvbuf and forward
alloc we check it again and do set_owner_r() causing it to be orphaned
and recharged. For one this is useless work, but more importantly we
can have a case where the skb could be put on the ingress queue, but
because we are under memory pressure we return EAGAIN. The trouble
here is the skb has already been accounted for so any rcvbuf checks
include the memory associated with the packet already. This rolls
up and can result in unnecessary EAGAIN errors in userspace read()
calls.

Fix by doing an unlikely check and skipping checks if skb->sk == sk.

Fixes: 51199405f9672 ("bpf: skb_verdict, support SK_PASS on RX BPF path")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/160556574804.73229.11328201020039674147.stgit@john-XPS-13-9370
---
 net/core/skmsg.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 9aed5a2c7c5b..514bc9f6f8ae 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -442,11 +442,19 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
 	return copied;
 }
 
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb);
+
 static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
 {
 	struct sock *sk = psock->sk;
 	struct sk_msg *msg;
 
+	/* If we are receiving on the same sock skb->sk is already assigned,
+	 * skip memory accounting and owner transition seeing it already set
+	 * correctly.
+	 */
+	if (unlikely(skb->sk == sk))
+		return sk_psock_skb_ingress_self(psock, skb);
 	msg = sk_psock_create_ingress_msg(sk, skb);
 	if (!msg)
 		return -EAGAIN;

From 4363023d2668e621b0743db351a9555d6e6ea57e Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 16 Nov 2020 14:29:28 -0800
Subject: [PATCH 090/115] bpf, sockmap: Avoid failures from skb_to_sgvec when
 skb has frag_list

When skb has a frag_list its possible for skb_to_sgvec() to fail. This
happens when the scatterlist has fewer elements to store pages than would
be needed for the initial skb plus any of its frags.

This case appears rare, but is possible when running an RX parser/verdict
programs exposed to the internet. Currently, when this happens we throw
an error, break the pipe, and kfree the msg. This effectively breaks the
application or forces it to do a retry.

Lets catch this case and handle it by doing an skb_linearize() on any
skb we receive with frags. At this point skb_to_sgvec should not fail
because the failing conditions would require frags to be in place.

Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/160556576837.73229.14800682790808797635.stgit@john-XPS-13-9370
---
 net/core/skmsg.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 514bc9f6f8ae..25cdbb20f3a0 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -423,9 +423,16 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
 					struct sock *sk,
 					struct sk_msg *msg)
 {
-	int num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
-	int copied;
+	int num_sge, copied;
 
+	/* skb linearize may fail with ENOMEM, but lets simply try again
+	 * later if this happens. Under memory pressure we don't want to
+	 * drop the skb. We need to linearize the skb so that the mapping
+	 * in skb_to_sgvec can not error.
+	 */
+	if (skb_linearize(skb))
+		return -EAGAIN;
+	num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
 	if (unlikely(num_sge < 0)) {
 		kfree(msg);
 		return num_sge;

From e33de7c5317e2827b2ba6fd120a505e9eb727b05 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Mon, 16 Nov 2020 16:20:18 +0800
Subject: [PATCH 091/115] inet_diag: Fix error path to cancel the meseage in
 inet_req_diag_fill()

nlmsg_cancel() needs to be called in the error path of
inet_req_diag_fill to cancel the message.

Fixes: d545caca827b ("net: inet: diag: expose the socket mark to privileged processes.")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Link: https://lore.kernel.org/r/20201116082018.16496-1-wanghai38@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/inet_diag.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 366a4507b5a3..93474b1bea4e 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -479,8 +479,10 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
 	r->idiag_inode	= 0;
 
 	if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
-				     inet_rsk(reqsk)->ir_mark))
+				     inet_rsk(reqsk)->ir_mark)) {
+		nlmsg_cancel(skb, nlh);
 		return -EMSGSIZE;
+	}
 
 	nlmsg_end(skb, nlh);
 	return 0;

From 93be52612431e71ee8cb980ef11468997857e4c4 Mon Sep 17 00:00:00 2001
From: Dmitry Bogdanov <dbogdanov@marvell.com>
Date: Mon, 16 Nov 2020 16:29:44 +0300
Subject: [PATCH 092/115] qed: fix ILT configuration of SRC block

The code refactoring of ILT configuration was not complete, the old
unused variables were used for the SRC block. That could lead to the memory
corruption by HW when rx filters are configured.
This patch completes that refactoring.

Fixes: 8a52bbab39c9 (qed: Debug feature: ilt and mdump)
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Dmitry Bogdanov <dbogdanov@marvell.com>
Link: https://lore.kernel.org/r/20201116132944.2055-1-dbogdanov@marvell.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/qlogic/qed/qed_cxt.c | 4 ++--
 drivers/net/ethernet/qlogic/qed/qed_cxt.h | 3 ---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index 0e4cd8890cff..0a22f8ce9a2c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -1647,9 +1647,9 @@ static void qed_src_init_pf(struct qed_hwfn *p_hwfn)
 		     ilog2(rounded_conn_num));
 
 	STORE_RT_REG_AGG(p_hwfn, SRC_REG_FIRSTFREE_RT_OFFSET,
-			 p_hwfn->p_cxt_mngr->first_free);
+			 p_hwfn->p_cxt_mngr->src_t2.first_free);
 	STORE_RT_REG_AGG(p_hwfn, SRC_REG_LASTFREE_RT_OFFSET,
-			 p_hwfn->p_cxt_mngr->last_free);
+			 p_hwfn->p_cxt_mngr->src_t2.last_free);
 }
 
 /* Timers PF */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.h b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
index 8b64495f8745..056e79620a0e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
@@ -326,9 +326,6 @@ struct qed_cxt_mngr {
 
 	/* SRC T2 */
 	struct qed_src_t2 src_t2;
-	u32 t2_num_pages;
-	u64 first_free;
-	u64 last_free;
 
 	/* total number of SRQ's for this hwfn */
 	u32 srq_count;

From c09c8a27b9baa417864b9adc3228b10ae5eeec93 Mon Sep 17 00:00:00 2001
From: Florian Klink <flokli@flokli.de>
Date: Sun, 15 Nov 2020 23:45:09 +0100
Subject: [PATCH 093/115] ipv4: use IS_ENABLED instead of ifdef

Checking for ifdef CONFIG_x fails if CONFIG_x=m.

Use IS_ENABLED instead, which is true for both built-ins and modules.

Otherwise, a
> ip -4 route add 1.2.3.4/32 via inet6 fe80::2 dev eth1
fails with the message "Error: IPv6 support not enabled in kernel." if
CONFIG_IPV6 is `m`.

In the spirit of b8127113d01e53adba15b41aefd37b90ed83d631.

Fixes: d15662682db2 ("ipv4: Allow ipv6 gateway with ipv4 routes")
Cc: Kim Phillips <kim.phillips@arm.com>
Signed-off-by: Florian Klink <flokli@flokli.de>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20201115224509.2020651-1-flokli@flokli.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/fib_frontend.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 86a23e4a6a50..b87140a1fa28 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -696,7 +696,7 @@ int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
 		cfg->fc_gw4 = *((__be32 *)via->rtvia_addr);
 		break;
 	case AF_INET6:
-#ifdef CONFIG_IPV6
+#if IS_ENABLED(CONFIG_IPV6)
 		if (alen != sizeof(struct in6_addr)) {
 			NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_VIA");
 			return -EINVAL;

From 470e14c00c63752466ac44de392f584dfdddd82e Mon Sep 17 00:00:00 2001
From: Jimmy Assarsson <extja@kvaser.com>
Date: Sun, 15 Nov 2020 17:30:22 +0100
Subject: [PATCH 094/115] can: kvaser_pciefd: Fix KCAN bittiming limits

Use correct bittiming limits for the KCAN CAN controller.

Fixes: 26ad340e582d ("can: kvaser_pciefd: Add driver for Kvaser PCIEcan devices")
Signed-off-by: Jimmy Assarsson <extja@kvaser.com>
Link: https://lore.kernel.org/r/20201115163027.16851-1-jimmyassarsson@gmail.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/kvaser_pciefd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c
index 6f766918211a..72acd1ba162d 100644
--- a/drivers/net/can/kvaser_pciefd.c
+++ b/drivers/net/can/kvaser_pciefd.c
@@ -287,12 +287,12 @@ struct kvaser_pciefd_tx_packet {
 static const struct can_bittiming_const kvaser_pciefd_bittiming_const = {
 	.name = KVASER_PCIEFD_DRV_NAME,
 	.tseg1_min = 1,
-	.tseg1_max = 255,
+	.tseg1_max = 512,
 	.tseg2_min = 1,
 	.tseg2_max = 32,
 	.sjw_max = 16,
 	.brp_min = 1,
-	.brp_max = 4096,
+	.brp_max = 8192,
 	.brp_inc = 1,
 };
 

From d003868d7f8579838ed58b6429af91844039b6f8 Mon Sep 17 00:00:00 2001
From: Jimmy Assarsson <extja@kvaser.com>
Date: Sun, 15 Nov 2020 17:30:23 +0100
Subject: [PATCH 095/115] can: kvaser_usb: kvaser_usb_hydra: Fix KCAN bittiming
 limits

Use correct bittiming limits for the KCAN CAN controller.

Fixes: aec5fb2268b7 ("can: kvaser_usb: Add support for Kvaser USB hydra family")
Signed-off-by: Jimmy Assarsson <extja@kvaser.com>
Link: https://lore.kernel.org/r/20201115163027.16851-2-jimmyassarsson@gmail.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
index 7ab87a758754..218fadc91155 100644
--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
+++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
@@ -367,7 +367,7 @@ static const struct can_bittiming_const kvaser_usb_hydra_kcan_bittiming_c = {
 	.tseg2_max = 32,
 	.sjw_max = 16,
 	.brp_min = 1,
-	.brp_max = 4096,
+	.brp_max = 8192,
 	.brp_inc = 1,
 };
 

From cd9f13c59461351d7a5fd07924264fb49b287359 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 18 Nov 2020 16:01:48 +0100
Subject: [PATCH 096/115] can: flexcan: flexcan_chip_start(): fix erroneous
 flexcan_transceiver_enable() during bus-off recovery

If the CAN controller goes into bus off, the do_set_mode() callback with
CAN_MODE_START can be used to recover the controller, which then calls
flexcan_chip_start(). If configured, this is done automatically by the
framework or manually by the user.

In flexcan_chip_start() there is an explicit call to
flexcan_transceiver_enable(), which does a regulator_enable() on the
transceiver regulator. This results in a net usage counter increase, as there
is no corresponding flexcan_transceiver_disable() in the bus off code path.
This further leads to the transceiver stuck enabled, even if the CAN interface
is shut down.

To fix this problem the
flexcan_transceiver_enable()/flexcan_transceiver_disable() are moved out of
flexcan_chip_start()/flexcan_chip_stop() into flexcan_open()/flexcan_close().

Fixes: e955cead0311 ("CAN: Add Flexcan CAN controller driver")
Link: https://lore.kernel.org/r/20201118150148.2664024-1-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index d6a9cf0e9b60..99e5f272205d 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -1567,14 +1567,10 @@ static int flexcan_chip_start(struct net_device *dev)
 		priv->write(reg_ctrl2, &regs->ctrl2);
 	}
 
-	err = flexcan_transceiver_enable(priv);
-	if (err)
-		goto out_chip_disable;
-
 	/* synchronize with the can bus */
 	err = flexcan_chip_unfreeze(priv);
 	if (err)
-		goto out_transceiver_disable;
+		goto out_chip_disable;
 
 	priv->can.state = CAN_STATE_ERROR_ACTIVE;
 
@@ -1592,8 +1588,6 @@ static int flexcan_chip_start(struct net_device *dev)
 
 	return 0;
 
- out_transceiver_disable:
-	flexcan_transceiver_disable(priv);
  out_chip_disable:
 	flexcan_chip_disable(priv);
 	return err;
@@ -1623,7 +1617,6 @@ static int __flexcan_chip_stop(struct net_device *dev, bool disable_on_error)
 	priv->write(priv->reg_ctrl_default & ~FLEXCAN_CTRL_ERR_ALL,
 		    &regs->ctrl);
 
-	flexcan_transceiver_disable(priv);
 	priv->can.state = CAN_STATE_STOPPED;
 
 	return 0;
@@ -1665,10 +1658,14 @@ static int flexcan_open(struct net_device *dev)
 	if (err)
 		goto out_runtime_put;
 
-	err = request_irq(dev->irq, flexcan_irq, IRQF_SHARED, dev->name, dev);
+	err = flexcan_transceiver_enable(priv);
 	if (err)
 		goto out_close;
 
+	err = request_irq(dev->irq, flexcan_irq, IRQF_SHARED, dev->name, dev);
+	if (err)
+		goto out_transceiver_disable;
+
 	if (priv->can.ctrlmode & CAN_CTRLMODE_FD)
 		priv->mb_size = sizeof(struct flexcan_mb) + CANFD_MAX_DLEN;
 	else
@@ -1720,6 +1717,8 @@ static int flexcan_open(struct net_device *dev)
 	can_rx_offload_del(&priv->offload);
  out_free_irq:
 	free_irq(dev->irq, dev);
+ out_transceiver_disable:
+	flexcan_transceiver_disable(priv);
  out_close:
 	close_candev(dev);
  out_runtime_put:
@@ -1738,6 +1737,7 @@ static int flexcan_close(struct net_device *dev)
 
 	can_rx_offload_del(&priv->offload);
 	free_irq(dev->irq, dev);
+	flexcan_transceiver_disable(priv);
 
 	close_candev(dev);
 	pm_runtime_put(priv->dev);

From a1f634463aaf2c94dfa13001dbdea011303124cc Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Tue, 15 Sep 2020 16:47:15 +0300
Subject: [PATCH 097/115] can: m_can: process interrupt only when not runtime
 suspended

Avoid processing bogus interrupt statuses when the HW is runtime suspended and
the M_CAN_IR register read may get all bits 1's. Handler can be called if the
interrupt request is shared with other peripherals or at the end of free_irq().

Therefore check the runtime suspended status before processing.

Fixes: cdf8259d6573 ("can: m_can: Add PM Support")
Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Link: https://lore.kernel.org/r/20200915134715.696303-1-jarkko.nikula@linux.intel.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index e7264043f79a..f3fc37e96b08 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -956,6 +956,8 @@ static irqreturn_t m_can_isr(int irq, void *dev_id)
 	struct net_device_stats *stats = &dev->stats;
 	u32 ir;
 
+	if (pm_runtime_suspended(cdev->dev))
+		return IRQ_NONE;
 	ir = m_can_read(cdev, M_CAN_IR);
 	if (!ir)
 		return IRQ_NONE;

From df8d85d8c69d6837817e54dcb73c84a8b5a13877 Mon Sep 17 00:00:00 2001
From: Filip Moc <dev@moc6.cz>
Date: Tue, 17 Nov 2020 18:36:31 +0100
Subject: [PATCH 098/115] net: usb: qmi_wwan: Set DTR quirk for MR400
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

LTE module MR400 embedded in TL-MR6400 v4 requires DTR to be set.

Signed-off-by: Filip Moc <dev@moc6.cz>
Acked-by: Bjørn Mork <bjorn@mork.no>
Link: https://lore.kernel.org/r/20201117173631.GA550981@moc6.cz
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/usb/qmi_wwan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 581ed51abb53..fc378ff56775 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1070,7 +1070,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x05c6, 0x9011, 4)},
 	{QMI_FIXED_INTF(0x05c6, 0x9021, 1)},
 	{QMI_FIXED_INTF(0x05c6, 0x9022, 2)},
-	{QMI_FIXED_INTF(0x05c6, 0x9025, 4)},	/* Alcatel-sbell ASB TL131 TDD LTE  (China Mobile) */
+	{QMI_QUIRK_SET_DTR(0x05c6, 0x9025, 4)},	/* Alcatel-sbell ASB TL131 TDD LTE (China Mobile) */
 	{QMI_FIXED_INTF(0x05c6, 0x9026, 3)},
 	{QMI_FIXED_INTF(0x05c6, 0x902e, 5)},
 	{QMI_FIXED_INTF(0x05c6, 0x9031, 5)},

From a5ebcbdf34b65fcc07f38eaf2d60563b42619a59 Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong@huawei.com>
Date: Tue, 17 Nov 2020 10:45:05 +0800
Subject: [PATCH 099/115] ah6: fix error return code in ah6_input()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
Link: https://lore.kernel.org/r/1605581105-35295-1-git-send-email-zhangchangzhong@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv6/ah6.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index d88d97617f7e..440080da805b 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -588,7 +588,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 	memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
 	memset(ah->auth_data, 0, ahp->icv_trunc_len);
 
-	if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN))
+	err = ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN);
+	if (err)
 		goto out_free;
 
 	ip6h->priority    = 0;

From 537a14726582c4e7bfe4dff9cb7fca19dc912cf6 Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong@huawei.com>
Date: Tue, 17 Nov 2020 10:55:21 +0800
Subject: [PATCH 100/115] atl1c: fix error return code in atl1c_probe()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 43250ddd75a3 ("atl1c: Atheros L1C Gigabit Ethernet driver")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
Link: https://lore.kernel.org/r/1605581721-36028-1-git-send-email-zhangchangzhong@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 0c12cf7bda50..3f65f2b370c5 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -2543,8 +2543,8 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * various kernel subsystems to support the mechanics required by a
 	 * fixed-high-32-bit system.
 	 */
-	if ((dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0) ||
-	    (dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0)) {
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+	if (err) {
 		dev_err(&pdev->dev, "No usable DMA configuration,aborting\n");
 		goto err_dma;
 	}

From 3a36060bf294e7b7e33c5dddcc4f5d2c1c834e56 Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong@huawei.com>
Date: Tue, 17 Nov 2020 10:57:55 +0800
Subject: [PATCH 101/115] atl1e: fix error return code in atl1e_probe()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: a6a5325239c2 ("atl1e: Atheros L1E Gigabit Ethernet driver")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
Link: https://lore.kernel.org/r/1605581875-36281-1-git-send-email-zhangchangzhong@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
index 098b0328e3cb..ff9f96de74b8 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
@@ -2312,8 +2312,8 @@ static int atl1e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * various kernel subsystems to support the mechanics required by a
 	 * fixed-high-32-bit system.
 	 */
-	if ((dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0) ||
-	    (dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)) != 0)) {
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+	if (err) {
 		dev_err(&pdev->dev, "No usable DMA configuration,aborting\n");
 		goto err_dma;
 	}

From 1532b9778478577152201adbafa7738b1e844868 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 16 Nov 2020 19:52:34 -0800
Subject: [PATCH 102/115] net: Have netpoll bring-up DSA management interface

DSA network devices rely on having their DSA management interface up and
running otherwise their ndo_open() will return -ENETDOWN. Without doing
this it would not be possible to use DSA devices as netconsole when
configured on the command line. These devices also do not utilize the
upper/lower linking so the check about the netpoll device having upper
is not going to be a problem.

The solution adopted here is identical to the one done for
net/ipv4/ipconfig.c with 728c02089a0e ("net: ipv4: handle DSA enabled
master network devices"), with the network namespace scope being
restricted to that of the process configuring netpoll.

Fixes: 04ff53f96a93 ("net: dsa: Add netconsole support")
Tested-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20201117035236.22658-1-f.fainelli@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/netpoll.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c310c7c1cef7..960948290001 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -29,6 +29,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 #include <linux/if_vlan.h>
+#include <net/dsa.h>
 #include <net/tcp.h>
 #include <net/udp.h>
 #include <net/addrconf.h>
@@ -657,15 +658,15 @@ EXPORT_SYMBOL_GPL(__netpoll_setup);
 
 int netpoll_setup(struct netpoll *np)
 {
-	struct net_device *ndev = NULL;
+	struct net_device *ndev = NULL, *dev = NULL;
+	struct net *net = current->nsproxy->net_ns;
 	struct in_device *in_dev;
 	int err;
 
 	rtnl_lock();
-	if (np->dev_name[0]) {
-		struct net *net = current->nsproxy->net_ns;
+	if (np->dev_name[0])
 		ndev = __dev_get_by_name(net, np->dev_name);
-	}
+
 	if (!ndev) {
 		np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
 		err = -ENODEV;
@@ -673,6 +674,19 @@ int netpoll_setup(struct netpoll *np)
 	}
 	dev_hold(ndev);
 
+	/* bring up DSA management network devices up first */
+	for_each_netdev(net, dev) {
+		if (!netdev_uses_dsa(dev))
+			continue;
+
+		err = dev_change_flags(dev, dev->flags | IFF_UP, NULL);
+		if (err < 0) {
+			np_err(np, "%s failed to open %s\n",
+			       np->dev_name, dev->name);
+			goto put;
+		}
+	}
+
 	if (netdev_master_upper_dev_get(ndev)) {
 		np_err(np, "%s is a slave device, aborting\n", np->dev_name);
 		err = -EBUSY;

From fb738b99ef229bd3d25f1b3e5503925dba9b1a7c Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 17 Nov 2020 19:33:51 +0200
Subject: [PATCH 103/115] mlxsw: Fix firmware flashing

The commit cited below moved firmware flashing functionality from
mlxsw_spectrum to mlxsw_core, but did not adjust the Kconfig
dependencies. This makes it possible to have mlxsw_core as built-in and
mlxfw as a module. The mlxfw code is therefore not reachable from
mlxsw_core and firmware flashing fails:

# devlink dev flash pci/0000:01:00.0 file mellanox/mlxsw_spectrum-13.2008.1310.mfa2
devlink answers: Operation not supported

Fix by having mlxsw_core select mlxfw.

Fixes: b79cb787ac70 ("mlxsw: Move fw flashing code into core.c")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reported-by: Vadim Pasternak <vadimp@nvidia.com>
Tested-by: Vadim Pasternak <vadimp@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlxsw/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index 872e9910bb7c..a619d90559f7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -6,6 +6,7 @@
 config MLXSW_CORE
 	tristate "Mellanox Technologies Switch ASICs support"
 	select NET_DEVLINK
+	select MLXFW
 	help
 	  This driver supports Mellanox Technologies Switch ASICs family.
 
@@ -82,7 +83,6 @@ config MLXSW_SPECTRUM
 	select GENERIC_ALLOCATOR
 	select PARMAN
 	select OBJAGG
-	select MLXFW
 	imply PTP_1588_CLOCK
 	select NET_PTP_CLASSIFY if PTP_1588_CLOCK
 	default m

From 1f492eab67bced119a0ac7db75ef2047e29a30c6 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 17 Nov 2020 19:33:52 +0200
Subject: [PATCH 104/115] mlxsw: core: Use variable timeout for EMAD retries

The driver sends Ethernet Management Datagram (EMAD) packets to the
device for configuration purposes and waits for up to 200ms for a reply.
A request is retried up to 5 times.

When the system is under heavy load, replies are not always processed in
time and EMAD transactions fail.

Make the process more robust to such delays by using exponential
backoff. First wait for up to 200ms, then retransmit and wait for up to
400ms and so on.

Fixes: caf7297e7ab5 ("mlxsw: core: Introduce support for asynchronous EMAD register access")
Reported-by: Denis Yulevich <denisyu@nvidia.com>
Tested-by: Denis Yulevich <denisyu@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlxsw/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 937b8e46f8c7..1a86535c4968 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -571,7 +571,8 @@ static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans)
 	if (trans->core->fw_flash_in_progress)
 		timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_DURING_FW_FLASH_MS);
 
-	queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw, timeout);
+	queue_delayed_work(trans->core->emad_wq, &trans->timeout_dw,
+			   timeout << trans->retries);
 }
 
 static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core,

From a3dcb3e7e70c72a68a79b30fc3a3adad5612731c Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 16 Nov 2020 08:43:01 -0800
Subject: [PATCH 105/115] net: dsa: mv88e6xxx: Wait for EEPROM done after HW
 reset

When the switch is hardware reset, it reads the contents of the
EEPROM. This can contain instructions for programming values into
registers and to perform waits between such programming. Reading the
EEPROM can take longer than the 100ms mv88e6xxx_hardware_reset() waits
after deasserting the reset GPIO. So poll the EEPROM done bit to
ensure it is complete.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Ruslan Sushko <rus@sushko.dev>
Link: https://lore.kernel.org/r/20201116164301.977661-1-rus@sushko.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/mv88e6xxx/chip.c    |  2 ++
 drivers/net/dsa/mv88e6xxx/global1.c | 31 +++++++++++++++++++++++++++++
 drivers/net/dsa/mv88e6xxx/global1.h |  1 +
 3 files changed, 34 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index bd297ae7cf9e..34cca0a4b31c 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2297,6 +2297,8 @@ static void mv88e6xxx_hardware_reset(struct mv88e6xxx_chip *chip)
 		usleep_range(10000, 20000);
 		gpiod_set_value_cansleep(gpiod, 0);
 		usleep_range(10000, 20000);
+
+		mv88e6xxx_g1_wait_eeprom_done(chip);
 	}
 }
 
diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c
index f62aa83ca08d..33d443a37efc 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.c
+++ b/drivers/net/dsa/mv88e6xxx/global1.c
@@ -75,6 +75,37 @@ static int mv88e6xxx_g1_wait_init_ready(struct mv88e6xxx_chip *chip)
 	return mv88e6xxx_g1_wait_bit(chip, MV88E6XXX_G1_STS, bit, 1);
 }
 
+void mv88e6xxx_g1_wait_eeprom_done(struct mv88e6xxx_chip *chip)
+{
+	const unsigned long timeout = jiffies + 1 * HZ;
+	u16 val;
+	int err;
+
+	/* Wait up to 1 second for the switch to finish reading the
+	 * EEPROM.
+	 */
+	while (time_before(jiffies, timeout)) {
+		err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STS, &val);
+		if (err) {
+			dev_err(chip->dev, "Error reading status");
+			return;
+		}
+
+		/* If the switch is still resetting, it may not
+		 * respond on the bus, and so MDIO read returns
+		 * 0xffff. Differentiate between that, and waiting for
+		 * the EEPROM to be done by bit 0 being set.
+		 */
+		if (val != 0xffff &&
+		    val & BIT(MV88E6XXX_G1_STS_IRQ_EEPROM_DONE))
+			return;
+
+		usleep_range(1000, 2000);
+	}
+
+	dev_err(chip->dev, "Timeout waiting for EEPROM done");
+}
+
 /* Offset 0x01: Switch MAC Address Register Bytes 0 & 1
  * Offset 0x02: Switch MAC Address Register Bytes 2 & 3
  * Offset 0x03: Switch MAC Address Register Bytes 4 & 5
diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h
index 1e3546f8b072..e05abe61fa11 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.h
+++ b/drivers/net/dsa/mv88e6xxx/global1.h
@@ -278,6 +278,7 @@ int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr);
 int mv88e6185_g1_reset(struct mv88e6xxx_chip *chip);
 int mv88e6352_g1_reset(struct mv88e6xxx_chip *chip);
 int mv88e6250_g1_reset(struct mv88e6xxx_chip *chip);
+void mv88e6xxx_g1_wait_eeprom_done(struct mv88e6xxx_chip *chip);
 
 int mv88e6185_g1_ppu_enable(struct mv88e6xxx_chip *chip);
 int mv88e6185_g1_ppu_disable(struct mv88e6xxx_chip *chip);

From d8c19014bba8f565d8a2f1f46b4e38d1d97bf1a7 Mon Sep 17 00:00:00 2001
From: Dongli Zhang <dongli.zhang@oracle.com>
Date: Sun, 15 Nov 2020 12:10:29 -0800
Subject: [PATCH 106/115] page_frag: Recover from memory pressure

The ethernet driver may allocate skb (and skb->data) via napi_alloc_skb().
This ends up to page_frag_alloc() to allocate skb->data from
page_frag_cache->va.

During the memory pressure, page_frag_cache->va may be allocated as
pfmemalloc page. As a result, the skb->pfmemalloc is always true as
skb->data is from page_frag_cache->va. The skb will be dropped if the
sock (receiver) does not have SOCK_MEMALLOC. This is expected behaviour
under memory pressure.

However, once kernel is not under memory pressure any longer (suppose large
amount of memory pages are just reclaimed), the page_frag_alloc() may still
re-use the prior pfmemalloc page_frag_cache->va to allocate skb->data. As a
result, the skb->pfmemalloc is always true unless page_frag_cache->va is
re-allocated, even if the kernel is not under memory pressure any longer.

Here is how kernel runs into issue.

1. The kernel is under memory pressure and allocation of
PAGE_FRAG_CACHE_MAX_ORDER in __page_frag_cache_refill() will fail. Instead,
the pfmemalloc page is allocated for page_frag_cache->va.

2: All skb->data from page_frag_cache->va (pfmemalloc) will have
skb->pfmemalloc=true. The skb will always be dropped by sock without
SOCK_MEMALLOC. This is an expected behaviour.

3. Suppose a large amount of pages are reclaimed and kernel is not under
memory pressure any longer. We expect skb->pfmemalloc drop will not happen.

4. Unfortunately, page_frag_alloc() does not proactively re-allocate
page_frag_alloc->va and will always re-use the prior pfmemalloc page. The
skb->pfmemalloc is always true even kernel is not under memory pressure any
longer.

Fix this by freeing and re-allocating the page instead of recycling it.

References: https://lore.kernel.org/lkml/20201103193239.1807-1-dongli.zhang@oracle.com/
References: https://lore.kernel.org/linux-mm/20201105042140.5253-1-willy@infradead.org/
Suggested-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Aruna Ramakrishna <aruna.ramakrishna@oracle.com>
Cc: Bert Barbe <bert.barbe@oracle.com>
Cc: Rama Nichanamatlu <rama.nichanamatlu@oracle.com>
Cc: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
Cc: Manjunath Patil <manjunath.b.patil@oracle.com>
Cc: Joe Jin <joe.jin@oracle.com>
Cc: SRINIVAS <srinivas.eeda@oracle.com>
Fixes: 79930f5892e1 ("net: do not deplete pfmemalloc reserve")
Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20201115201029.11903-1-dongli.zhang@oracle.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 mm/page_alloc.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 23f5066bd4a5..eaa227a479e4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5103,6 +5103,11 @@ void *page_frag_alloc(struct page_frag_cache *nc,
 		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
 			goto refill;
 
+		if (unlikely(nc->pfmemalloc)) {
+			free_the_page(page, compound_order(page));
+			goto refill;
+		}
+
 #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
 		/* if size can vary use size else just use PAGE_SIZE */
 		size = nc->size;

From 6dceaa9f56e22d0f9b4c4ad2ed9e04e315ce7fe5 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 16 Nov 2020 17:21:14 +0100
Subject: [PATCH 107/115] atm: nicstar: Unmap DMA on send error

The `skb' is mapped for DMA in ns_send() but does not unmap DMA in case
push_scqe() fails to submit the `skb'. The memory of the `skb' is
released so only the DMA mapping is leaking.

Unmap the DMA mapping in case push_scqe() failed.

Fixes: 864a3ff635fa7 ("atm: [nicstar] remove virt_to_bus() and support 64-bit platforms")
Cc: Chas Williams <3chas3@gmail.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/atm/nicstar.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index 7af74fb450a0..09ad73361879 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -1706,6 +1706,8 @@ static int ns_send(struct atm_vcc *vcc, struct sk_buff *skb)
 
 	if (push_scqe(card, vc, scq, &scqe, skb) != 0) {
 		atomic_inc(&vcc->stats->tx_err);
+		dma_unmap_single(&card->pcidev->dev, NS_PRV_DMA(skb), skb->len,
+				 DMA_TO_DEVICE);
 		dev_kfree_skb_any(skb);
 		return -EIO;
 	}

From 6d9c8d15af0ef20a66a0b432cac0d08319920602 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Wed, 18 Nov 2020 10:19:22 +0200
Subject: [PATCH 108/115] net/mlx4_core: Fix init_hca fields offset

Slave function read the following capabilities from the wrong offset:
1. log_mc_entry_sz
2. fs_log_entry_sz
3. log_mc_hash_sz

Fix that by adjusting these capabilities offset to match firmware
layout.

Due to the wrong offset read, the following issues might occur:
1+2. Negative value reported at max_mcast_qp_attach.
3. Driver to init FW with multicast hash size of zero.

Fixes: a40ded604365 ("net/mlx4_core: Add masking for a few queries on HCA caps")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Eran Ben Elisha <eranbe@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://lore.kernel.org/r/20201118081922.553-1-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c | 6 +++---
 drivers/net/ethernet/mellanox/mlx4/fw.h | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index f6ff9620a137..f6cfec81ccc3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -1864,8 +1864,8 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 #define	 INIT_HCA_LOG_RD_OFFSET		 (INIT_HCA_QPC_OFFSET + 0x77)
 #define INIT_HCA_MCAST_OFFSET		 0x0c0
 #define	 INIT_HCA_MC_BASE_OFFSET	 (INIT_HCA_MCAST_OFFSET + 0x00)
-#define	 INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x12)
-#define	 INIT_HCA_LOG_MC_HASH_SZ_OFFSET	 (INIT_HCA_MCAST_OFFSET + 0x16)
+#define	 INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x13)
+#define	 INIT_HCA_LOG_MC_HASH_SZ_OFFSET	 (INIT_HCA_MCAST_OFFSET + 0x17)
 #define  INIT_HCA_UC_STEERING_OFFSET	 (INIT_HCA_MCAST_OFFSET + 0x18)
 #define	 INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b)
 #define  INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN	0x6
@@ -1873,7 +1873,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 #define  INIT_HCA_DRIVER_VERSION_SZ       0x40
 #define  INIT_HCA_FS_PARAM_OFFSET         0x1d0
 #define  INIT_HCA_FS_BASE_OFFSET          (INIT_HCA_FS_PARAM_OFFSET + 0x00)
-#define  INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET  (INIT_HCA_FS_PARAM_OFFSET + 0x12)
+#define  INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET  (INIT_HCA_FS_PARAM_OFFSET + 0x13)
 #define  INIT_HCA_FS_A0_OFFSET		  (INIT_HCA_FS_PARAM_OFFSET + 0x18)
 #define  INIT_HCA_FS_LOG_TABLE_SZ_OFFSET  (INIT_HCA_FS_PARAM_OFFSET + 0x1b)
 #define  INIT_HCA_FS_ETH_BITS_OFFSET      (INIT_HCA_FS_PARAM_OFFSET + 0x21)
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 650ae08c71de..8f020f26ebf5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -182,8 +182,8 @@ struct mlx4_init_hca_param {
 	u64 cmpt_base;
 	u64 mtt_base;
 	u64 global_caps;
-	u16 log_mc_entry_sz;
-	u16 log_mc_hash_sz;
+	u8 log_mc_entry_sz;
+	u8 log_mc_hash_sz;
 	u16 hca_core_clock; /* Internal Clock Frequency (in MHz) */
 	u8  log_num_qps;
 	u8  log_num_srqs;

From 1fd6cee127e2ddff36d648573d7566aafb0d0b77 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 18 Nov 2020 22:13:50 +0100
Subject: [PATCH 109/115] libbpf: Fix VERSIONED_SYM_COUNT number parsing

We remove "other info" from "readelf -s --wide" output when
parsing GLOBAL_SYM_COUNT variable, which was added in [1].
But we don't do that for VERSIONED_SYM_COUNT and it's failing
the check_abi target on powerpc Fedora 33.

The extra "other info" wasn't problem for VERSIONED_SYM_COUNT
parsing until commit [2] added awk in the pipe, which assumes
that the last column is symbol, but it can be "other info".

Adding "other info" removal for VERSIONED_SYM_COUNT the same
way as we did for GLOBAL_SYM_COUNT parsing.

[1] aa915931ac3e ("libbpf: Fix readelf output parsing for Fedora")
[2] 746f534a4809 ("tools/libbpf: Avoid counting local symbols in ABI check")

Fixes: 746f534a4809 ("tools/libbpf: Avoid counting local symbols in ABI check")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20201118211350.1493421-1-jolsa@kernel.org
---
 tools/lib/bpf/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 5f9abed3e226..55bd78b3496f 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -146,6 +146,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
 			   awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
 			   sort -u | wc -l)
 VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
+			      sed 's/\[.*\]//' | \
 			      awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
 			      grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
 
@@ -214,6 +215,7 @@ check_abi: $(OUTPUT)libbpf.so
 		    awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'|  \
 		    sort -u > $(OUTPUT)libbpf_global_syms.tmp;		 \
 		readelf --dyn-syms --wide $(OUTPUT)libbpf.so |		 \
+		    sed 's/\[.*\]//' |					 \
 		    awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'|  \
 		    grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 |		 \
 		    sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; 	 \

From 2d8f6481c17db9fa5238b277cdbc392084060b09 Mon Sep 17 00:00:00 2001
From: Georg Kohmann <geokohma@cisco.com>
Date: Thu, 19 Nov 2020 10:58:33 +0100
Subject: [PATCH 110/115] ipv6: Remove dependency of ipv6_frag_thdr_truncated
 on ipv6 module

IPV6=m
NF_DEFRAG_IPV6=y

ld: net/ipv6/netfilter/nf_conntrack_reasm.o: in function
`nf_ct_frag6_gather':
net/ipv6/netfilter/nf_conntrack_reasm.c:462: undefined reference to
`ipv6_frag_thdr_truncated'

Netfilter is depending on ipv6 symbol ipv6_frag_thdr_truncated. This
dependency is forcing IPV6=y.

Remove this dependency by moving ipv6_frag_thdr_truncated out of ipv6. This
is the same solution as used with a similar issues: Referring to
commit 70b095c843266 ("ipv6: remove dependency of nf_defrag_ipv6 on ipv6
module")

Fixes: 9d9e937b1c8b ("ipv6/netfilter: Discard first fragment not including all headers")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Georg Kohmann <geokohma@cisco.com>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Link: https://lore.kernel.org/r/20201119095833.8409-1-geokohma@cisco.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/ipv6.h                      |  2 --
 include/net/ipv6_frag.h                 | 30 ++++++++++++++++++++++++
 net/ipv6/netfilter/nf_conntrack_reasm.c |  2 +-
 net/ipv6/reassembly.c                   | 31 +------------------------
 4 files changed, 32 insertions(+), 33 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 637cc6dd12b7..bd1f396cc9c7 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1064,8 +1064,6 @@ int ipv6_skip_exthdr(const struct sk_buff *, int start, u8 *nexthdrp,
 
 bool ipv6_ext_hdr(u8 nexthdr);
 
-bool ipv6_frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp);
-
 enum {
 	IP6_FH_F_FRAG		= (1 << 0),
 	IP6_FH_F_AUTH		= (1 << 1),
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
index a21e8b1381a1..851029ecff13 100644
--- a/include/net/ipv6_frag.h
+++ b/include/net/ipv6_frag.h
@@ -108,5 +108,35 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
 	rcu_read_unlock();
 	inet_frag_put(&fq->q);
 }
+
+/* Check if the upper layer header is truncated in the first fragment. */
+static inline bool
+ipv6frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp)
+{
+	u8 nexthdr = *nexthdrp;
+	__be16 frag_off;
+	int offset;
+
+	offset = ipv6_skip_exthdr(skb, start, &nexthdr, &frag_off);
+	if (offset < 0 || (frag_off & htons(IP6_OFFSET)))
+		return false;
+	switch (nexthdr) {
+	case NEXTHDR_TCP:
+		offset += sizeof(struct tcphdr);
+		break;
+	case NEXTHDR_UDP:
+		offset += sizeof(struct udphdr);
+		break;
+	case NEXTHDR_ICMP:
+		offset += sizeof(struct icmp6hdr);
+		break;
+	default:
+		offset += 1;
+	}
+	if (offset > skb->len)
+		return true;
+	return false;
+}
+
 #endif
 #endif
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b9cc0b330dbe..c129ad334eb3 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -459,7 +459,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 	/* Discard the first fragment if it does not include all headers
 	 * RFC 8200, Section 4.5
 	 */
-	if (ipv6_frag_thdr_truncated(skb, fhoff, &nexthdr)) {
+	if (ipv6frag_thdr_truncated(skb, fhoff, &nexthdr)) {
 		pr_debug("Drop incomplete fragment\n");
 		return 0;
 	}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e3869bac9c88..47a0dc46cbdb 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -318,35 +318,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
 	return -1;
 }
 
-/* Check if the upper layer header is truncated in the first fragment. */
-bool ipv6_frag_thdr_truncated(struct sk_buff *skb, int start, u8 *nexthdrp)
-{
-	u8 nexthdr = *nexthdrp;
-	__be16 frag_off;
-	int offset;
-
-	offset = ipv6_skip_exthdr(skb, start, &nexthdr, &frag_off);
-	if (offset < 0 || (frag_off & htons(IP6_OFFSET)))
-		return false;
-	switch (nexthdr) {
-	case NEXTHDR_TCP:
-		offset += sizeof(struct tcphdr);
-		break;
-	case NEXTHDR_UDP:
-		offset += sizeof(struct udphdr);
-		break;
-	case NEXTHDR_ICMP:
-		offset += sizeof(struct icmp6hdr);
-		break;
-	default:
-		offset += 1;
-	}
-	if (offset > skb->len)
-		return true;
-	return false;
-}
-EXPORT_SYMBOL(ipv6_frag_thdr_truncated);
-
 static int ipv6_frag_rcv(struct sk_buff *skb)
 {
 	struct frag_hdr *fhdr;
@@ -390,7 +361,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	 * the source of the fragment, with the Pointer field set to zero.
 	 */
 	nexthdr = hdr->nexthdr;
-	if (ipv6_frag_thdr_truncated(skb, skb_transport_offset(skb), &nexthdr)) {
+	if (ipv6frag_thdr_truncated(skb, skb_transport_offset(skb), &nexthdr)) {
 		__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
 				IPSTATS_MIB_INHDRERRORS);
 		icmpv6_param_prob(skb, ICMPV6_HDR_INCOMP, 0);

From 0530bd6e6a3d5a0292a1a5f33ea980ae7e8b56ca Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Wed, 18 Nov 2020 22:40:37 +0100
Subject: [PATCH 111/115] net/smc: fix matching of existing link groups

With the multi-subnet support of SMC-Dv2 the match for existing link
groups should not include the vlanid of the network device.
Set ini->smcd_version accordingly before the call to smc_conn_create()
and use this value in smc_conn_create() to skip the vlanid check.

Fixes: 5c21c4ccafe8 ("net/smc: determine accepted ISM devices")
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/smc/af_smc.c   | 3 ++-
 net/smc/smc_core.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index e9f487c8c6d5..5dd4faaf7d6e 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -979,7 +979,8 @@ static int __smc_connect(struct smc_sock *smc)
 
 	/* check if smc modes and versions of CLC proposal and accept match */
 	rc = smc_connect_check_aclc(ini, aclc);
-	version = aclc->hdr.version == SMC_V1 ? SMC_V1 : version;
+	version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2;
+	ini->smcd_version = version;
 	if (rc)
 		goto vlan_cleanup;
 
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2b19863f7171..af96f813c075 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1309,7 +1309,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
 				    ini->ism_peer_gid[ini->ism_selected]) :
 		     smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
 		    !lgr->sync_err &&
-		    lgr->vlan_id == ini->vlan_id &&
+		    (ini->smcd_version == SMC_V2 ||
+		     lgr->vlan_id == ini->vlan_id) &&
 		    (role == SMC_CLNT || ini->is_smcd ||
 		     lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
 			/* link group found */

From 41a0be3f8f6be893860b991eb10c47fc3ee09d7f Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Wed, 18 Nov 2020 22:40:38 +0100
Subject: [PATCH 112/115] net/smc: fix direct access to ib_gid_addr->ndev in
 smc_ib_determine_gid()

Sparse complaints 3 times about:
net/smc/smc_ib.c:203:52: warning: incorrect type in argument 1 (different address spaces)
net/smc/smc_ib.c:203:52:    expected struct net_device const *dev
net/smc/smc_ib.c:203:52:    got struct net_device [noderef] __rcu *const ndev

Fix that by using the existing and validated ndev variable instead of
accessing attr->ndev directly.

Fixes: 5102eca9039b ("net/smc: Use rdma_read_gid_l2_fields to L2 fields")
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/smc/smc_ib.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 1c314dbdc7fa..fc766b537ac7 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -198,9 +198,9 @@ int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
 		rcu_read_lock();
 		ndev = rdma_read_gid_attr_ndev_rcu(attr);
 		if (!IS_ERR(ndev) &&
-		    ((!vlan_id && !is_vlan_dev(attr->ndev)) ||
-		     (vlan_id && is_vlan_dev(attr->ndev) &&
-		      vlan_dev_vlan_id(attr->ndev) == vlan_id)) &&
+		    ((!vlan_id && !is_vlan_dev(ndev)) ||
+		     (vlan_id && is_vlan_dev(ndev) &&
+		      vlan_dev_vlan_id(ndev) == vlan_id)) &&
 		    attr->gid_type == IB_GID_TYPE_ROCE) {
 			rcu_read_unlock();
 			if (gid)

From 6fa6d28051e9fcaa1570e69648ea13a353a5d218 Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Tue, 17 Nov 2020 12:05:45 -0800
Subject: [PATCH 113/115] lib/strncpy_from_user.c: Mask out bytes after NUL
 terminator.

do_strncpy_from_user() may copy some extra bytes after the NUL
terminator into the destination buffer. This usually does not matter for
normal string operations. However, when BPF programs key BPF maps with
strings, this matters a lot.

A BPF program may read strings from user memory by calling the
bpf_probe_read_user_str() helper which eventually calls
do_strncpy_from_user(). The program can then key a map with the
destination buffer. BPF map keys are fixed-width and string-agnostic,
meaning that map keys are treated as a set of bytes.

The issue is when do_strncpy_from_user() overcopies bytes after the NUL
terminator, it can result in seemingly identical strings occupying
multiple slots in a BPF map. This behavior is subtle and totally
unexpected by the user.

This commit masks out the bytes following the NUL while preserving
long-sized stride in the fast path.

Fixes: 6ae08ae3dea2 ("bpf: Add probe_read_{user, kernel} and probe_read_{user, kernel}_str helpers")
Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/21efc982b3e9f2f7b0379eed642294caaa0c27a7.1605642949.git.dxu@dxuuu.xyz
---
 kernel/trace/bpf_trace.c | 10 ++++++++++
 lib/strncpy_from_user.c  | 19 +++++++++++++++++--
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 5113fd423cdf..048c655315f1 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -181,6 +181,16 @@ bpf_probe_read_user_str_common(void *dst, u32 size,
 {
 	int ret;
 
+	/*
+	 * NB: We rely on strncpy_from_user() not copying junk past the NUL
+	 * terminator into `dst`.
+	 *
+	 * strncpy_from_user() does long-sized strides in the fast path. If the
+	 * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`,
+	 * then there could be junk after the NUL in `dst`. If user takes `dst`
+	 * and keys a hash map with it, then semantically identical strings can
+	 * occupy multiple entries in the map.
+	 */
 	ret = strncpy_from_user_nofault(dst, unsafe_ptr, size);
 	if (unlikely(ret < 0))
 		memset(dst, 0, size);
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index e6d5fcc2cdf3..122d8d0e253c 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -35,17 +35,32 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src,
 		goto byte_at_a_time;
 
 	while (max >= sizeof(unsigned long)) {
-		unsigned long c, data;
+		unsigned long c, data, mask;
 
 		/* Fall back to byte-at-a-time if we get a page fault */
 		unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time);
 
-		*(unsigned long *)(dst+res) = c;
+		/*
+		 * Note that we mask out the bytes following the NUL. This is
+		 * important to do because string oblivious code may read past
+		 * the NUL. For those routines, we don't want to give them
+		 * potentially random bytes after the NUL in `src`.
+		 *
+		 * One example of such code is BPF map keys. BPF treats map keys
+		 * as an opaque set of bytes. Without the post-NUL mask, any BPF
+		 * maps keyed by strings returned from strncpy_from_user() may
+		 * have multiple entries for semantically identical strings.
+		 */
 		if (has_zero(c, &data, &constants)) {
 			data = prep_zero_mask(c, data, &constants);
 			data = create_zero_mask(data);
+			mask = zero_bytemask(data);
+			*(unsigned long *)(dst+res) = c & mask;
 			return res + find_zero(data);
 		}
+
+		*(unsigned long *)(dst+res) = c;
+
 		res += sizeof(unsigned long);
 		max -= sizeof(unsigned long);
 	}

From c8a36aedf3e24768e94d87fdcdd37684bd241c44 Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Tue, 17 Nov 2020 12:05:46 -0800
Subject: [PATCH 114/115] selftest/bpf: Test bpf_probe_read_user_str() strips
 trailing bytes after NUL

Previously, bpf_probe_read_user_str() could potentially overcopy the
trailing bytes after the NUL due to how do_strncpy_from_user() does the
copy in long-sized strides. The issue has been fixed in the previous
commit.

This commit adds a selftest that ensures we don't regress
bpf_probe_read_user_str() again.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/4d977508fab4ec5b7b574b85bdf8b398868b6ee9.1605642949.git.dxu@dxuuu.xyz
---
 .../bpf/prog_tests/probe_read_user_str.c      | 71 +++++++++++++++++++
 .../bpf/progs/test_probe_read_user_str.c      | 25 +++++++
 2 files changed, 96 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_probe_read_user_str.c

diff --git a/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c b/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c
new file mode 100644
index 000000000000..e419298132b5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "test_probe_read_user_str.skel.h"
+
+static const char str1[] = "mestring";
+static const char str2[] = "mestringalittlebigger";
+static const char str3[] = "mestringblubblubblubblubblub";
+
+static int test_one_str(struct test_probe_read_user_str *skel, const char *str,
+			size_t len)
+{
+	int err, duration = 0;
+	char buf[256];
+
+	/* Ensure bytes after string are ones */
+	memset(buf, 1, sizeof(buf));
+	memcpy(buf, str, len);
+
+	/* Give prog our userspace pointer */
+	skel->bss->user_ptr = buf;
+
+	/* Trigger tracepoint */
+	usleep(1);
+
+	/* Did helper fail? */
+	if (CHECK(skel->bss->ret < 0, "prog_ret", "prog returned: %ld\n",
+		  skel->bss->ret))
+		return 1;
+
+	/* Check that string was copied correctly */
+	err = memcmp(skel->bss->buf, str, len);
+	if (CHECK(err, "memcmp", "prog copied wrong string"))
+		return 1;
+
+	/* Now check that no extra trailing bytes were copied */
+	memset(buf, 0, sizeof(buf));
+	err = memcmp(skel->bss->buf + len, buf, sizeof(buf) - len);
+	if (CHECK(err, "memcmp", "trailing bytes were not stripped"))
+		return 1;
+
+	return 0;
+}
+
+void test_probe_read_user_str(void)
+{
+	struct test_probe_read_user_str *skel;
+	int err, duration = 0;
+
+	skel = test_probe_read_user_str__open_and_load();
+	if (CHECK(!skel, "test_probe_read_user_str__open_and_load",
+		  "skeleton open and load failed\n"))
+		return;
+
+	/* Give pid to bpf prog so it doesn't read from anyone else */
+	skel->bss->pid = getpid();
+
+	err = test_probe_read_user_str__attach(skel);
+	if (CHECK(err, "test_probe_read_user_str__attach",
+		  "skeleton attach failed: %d\n", err))
+		goto out;
+
+	if (test_one_str(skel, str1, sizeof(str1)))
+		goto out;
+	if (test_one_str(skel, str2, sizeof(str2)))
+		goto out;
+	if (test_one_str(skel, str3, sizeof(str3)))
+		goto out;
+
+out:
+	test_probe_read_user_str__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c b/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c
new file mode 100644
index 000000000000..3ae398b75dcd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include <sys/types.h>
+
+pid_t pid = 0;
+long ret = 0;
+void *user_ptr = 0;
+char buf[256] = {};
+
+SEC("tracepoint/syscalls/sys_enter_nanosleep")
+int on_write(void *ctx)
+{
+	if (pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	ret = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";

From 2801a5da5b25b7af9dd2addd19b2315c02d17b64 Mon Sep 17 00:00:00 2001
From: Luo Meng <luomeng12@huawei.com>
Date: Wed, 18 Nov 2020 22:49:31 +0900
Subject: [PATCH 115/115] fail_function: Remove a redundant mutex unlock

Fix a mutex_unlock() issue where before copy_from_user() is
not called mutex_locked.

Fixes: 4b1a29a7f542 ("error-injection: Support fault injection framework")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Luo Meng <luomeng12@huawei.com>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Link: https://lore.kernel.org/bpf/160570737118.263807.8358435412898356284.stgit@devnote2
---
 kernel/fail_function.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/fail_function.c b/kernel/fail_function.c
index 63b349168da7..b0b1ad93fa95 100644
--- a/kernel/fail_function.c
+++ b/kernel/fail_function.c
@@ -253,7 +253,7 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
 
 	if (copy_from_user(buf, buffer, count)) {
 		ret = -EFAULT;
-		goto out;
+		goto out_free;
 	}
 	buf[count] = '\0';
 	sym = strstrip(buf);
@@ -307,8 +307,9 @@ static ssize_t fei_write(struct file *file, const char __user *buffer,
 		ret = count;
 	}
 out:
-	kfree(buf);
 	mutex_unlock(&fei_lock);
+out_free:
+	kfree(buf);
 	return ret;
 }