From 327eaf738ff97d19491362e30497954105d60414 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 12 Jun 2018 23:34:57 -0400
Subject: [PATCH 001/382] ext4: add warn_on_error mount option

This is very handy when debugging bugs handling maliciously corrupted
file systems.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ext4.h  |  1 +
 fs/ext4/super.c | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index fa52b7dd4542..856b6a54d82b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1108,6 +1108,7 @@ struct ext4_inode_info {
 #define EXT4_MOUNT_DIOREAD_NOLOCK	0x400000 /* Enable support for dio read nolocking */
 #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
 #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
+#define EXT4_MOUNT_WARN_ON_ERROR	0x2000000 /* Trigger WARN_ON on error */
 #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
 #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
 #define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c1c5c8775ae7..c8b7b8302e90 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -405,6 +405,9 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
 
 static void ext4_handle_error(struct super_block *sb)
 {
+	if (test_opt(sb, WARN_ON_ERROR))
+		WARN_ON_ONCE(1);
+
 	if (sb_rdonly(sb))
 		return;
 
@@ -740,6 +743,9 @@ __acquires(bitlock)
 		va_end(args);
 	}
 
+	if (test_opt(sb, WARN_ON_ERROR))
+		WARN_ON_ONCE(1);
+
 	if (test_opt(sb, ERRORS_CONT)) {
 		ext4_commit_super(sb, 0);
 		return;
@@ -1377,7 +1383,8 @@ enum {
 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
 	Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
 	Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
-	Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
+	Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
+	Opt_nowarn_on_error, Opt_mblk_io_submit,
 	Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
 	Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
 	Opt_inode_readahead_blks, Opt_journal_ioprio,
@@ -1444,6 +1451,8 @@ static const match_table_t tokens = {
 	{Opt_dax, "dax"},
 	{Opt_stripe, "stripe=%u"},
 	{Opt_delalloc, "delalloc"},
+	{Opt_warn_on_error, "warn_on_error"},
+	{Opt_nowarn_on_error, "nowarn_on_error"},
 	{Opt_lazytime, "lazytime"},
 	{Opt_nolazytime, "nolazytime"},
 	{Opt_debug_want_extra_isize, "debug_want_extra_isize=%u"},
@@ -1608,6 +1617,8 @@ static const struct mount_opts {
 	 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
 	{Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
 	 MOPT_EXT4_ONLY | MOPT_CLEAR},
+	{Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
+	{Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
 	{Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
 	 MOPT_EXT4_ONLY | MOPT_CLEAR},
 	{Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,

From 5369a762c882c0b6e9599e4ebbb3a9ba9eee7e2d Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 13 Jun 2018 00:23:11 -0400
Subject: [PATCH 002/382] ext4: add corruption check in ext4_xattr_set_entry()

In theory this should have been caught earlier when the xattr list was
verified, but in case it got missed, it's simple enough to add check
to make sure we don't overrun the xattr buffer.

This addresses CVE-2018-10879.

https://bugzilla.kernel.org/show_bug.cgi?id=200001

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Cc: stable@kernel.org
---
 fs/ext4/xattr.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index fc4ced59c565..230ba79715f6 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1560,7 +1560,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
 				handle_t *handle, struct inode *inode,
 				bool is_block)
 {
-	struct ext4_xattr_entry *last;
+	struct ext4_xattr_entry *last, *next;
 	struct ext4_xattr_entry *here = s->here;
 	size_t min_offs = s->end - s->base, name_len = strlen(i->name);
 	int in_inode = i->in_inode;
@@ -1595,7 +1595,13 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
 
 	/* Compute min_offs and last. */
 	last = s->first;
-	for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
+	for (; !IS_LAST_ENTRY(last); last = next) {
+		next = EXT4_XATTR_NEXT(last);
+		if ((void *)next >= s->end) {
+			EXT4_ERROR_INODE(inode, "corrupted xattr entries");
+			ret = -EFSCORRUPTED;
+			goto out;
+		}
 		if (!last->e_value_inum && last->e_value_size) {
 			size_t offs = le16_to_cpu(last->e_value_offs);
 			if (offs < min_offs)

From 513f86d73855ce556ea9522b6bfd79f87356dc3a Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 13 Jun 2018 00:51:28 -0400
Subject: [PATCH 003/382] ext4: always verify the magic number in xattr blocks

If there an inode points to a block which is also some other type of
metadata block (such as a block allocation bitmap), the
buffer_verified flag can be set when it was validated as that other
metadata block type; however, it would make a really terrible external
attribute block.  The reason why we use the verified flag is to avoid
constantly reverifying the block.  However, it doesn't take much
overhead to make sure the magic number of the xattr block is correct,
and this will avoid potential crashes.

This addresses CVE-2018-10879.

https://bugzilla.kernel.org/show_bug.cgi?id=200001

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Cc: stable@kernel.org
---
 fs/ext4/xattr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 230ba79715f6..0263692979ec 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -230,12 +230,12 @@ __ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh,
 {
 	int error = -EFSCORRUPTED;
 
-	if (buffer_verified(bh))
-		return 0;
-
 	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
 	    BHDR(bh)->h_blocks != cpu_to_le32(1))
 		goto errout;
+	if (buffer_verified(bh))
+		return 0;
+
 	error = -EFSBADCRC;
 	if (!ext4_xattr_block_csum_verify(inode, bh))
 		goto errout;

From 819b23f1c501b17b9694325471789e6b5cc2d0d2 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 13 Jun 2018 23:00:48 -0400
Subject: [PATCH 004/382] ext4: always check block group bounds in
 ext4_init_block_bitmap()

Regardless of whether the flex_bg feature is set, we should always
check to make sure the bits we are setting in the block bitmap are
within the block group bounds.

https://bugzilla.kernel.org/show_bug.cgi?id=199865

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/ext4/balloc.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index b00481c475cb..8a2e202ade8a 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -184,7 +184,6 @@ static int ext4_init_block_bitmap(struct super_block *sb,
 	unsigned int bit, bit_max;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_fsblk_t start, tmp;
-	int flex_bg = 0;
 
 	J_ASSERT_BH(bh, buffer_locked(bh));
 
@@ -207,22 +206,19 @@ static int ext4_init_block_bitmap(struct super_block *sb,
 
 	start = ext4_group_first_block_no(sb, block_group);
 
-	if (ext4_has_feature_flex_bg(sb))
-		flex_bg = 1;
-
 	/* Set bits for block and inode bitmaps, and inode table */
 	tmp = ext4_block_bitmap(sb, gdp);
-	if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
+	if (ext4_block_in_group(sb, tmp, block_group))
 		ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
 
 	tmp = ext4_inode_bitmap(sb, gdp);
-	if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
+	if (ext4_block_in_group(sb, tmp, block_group))
 		ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
 
 	tmp = ext4_inode_table(sb, gdp);
 	for (; tmp < ext4_inode_table(sb, gdp) +
 		     sbi->s_itb_per_group; tmp++) {
-		if (!flex_bg || ext4_block_in_group(sb, tmp, block_group))
+		if (ext4_block_in_group(sb, tmp, block_group))
 			ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
 	}
 

From 77260807d1170a8cf35dbb06e07461a655f67eee Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 13 Jun 2018 23:08:26 -0400
Subject: [PATCH 005/382] ext4: make sure bitmaps and the inode table don't
 overlap with bg descriptors

It's really bad when the allocation bitmaps and the inode table
overlap with the block group descriptors, since it causes random
corruption of the bg descriptors.  So we really want to head those off
at the pass.

https://bugzilla.kernel.org/show_bug.cgi?id=199865

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/ext4/super.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c8b7b8302e90..c61675d62195 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2348,6 +2348,7 @@ static int ext4_check_descriptors(struct super_block *sb,
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
 	ext4_fsblk_t last_block;
+	ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0) + 1;
 	ext4_fsblk_t block_bitmap;
 	ext4_fsblk_t inode_bitmap;
 	ext4_fsblk_t inode_table;
@@ -2380,6 +2381,14 @@ static int ext4_check_descriptors(struct super_block *sb,
 			if (!sb_rdonly(sb))
 				return 0;
 		}
+		if (block_bitmap >= sb_block + 1 &&
+		    block_bitmap <= last_bg_block) {
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+				 "Block bitmap for group %u overlaps "
+				 "block group descriptors", i);
+			if (!sb_rdonly(sb))
+				return 0;
+		}
 		if (block_bitmap < first_block || block_bitmap > last_block) {
 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
 			       "Block bitmap for group %u not in group "
@@ -2394,6 +2403,14 @@ static int ext4_check_descriptors(struct super_block *sb,
 			if (!sb_rdonly(sb))
 				return 0;
 		}
+		if (inode_bitmap >= sb_block + 1 &&
+		    inode_bitmap <= last_bg_block) {
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+				 "Inode bitmap for group %u overlaps "
+				 "block group descriptors", i);
+			if (!sb_rdonly(sb))
+				return 0;
+		}
 		if (inode_bitmap < first_block || inode_bitmap > last_block) {
 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
 			       "Inode bitmap for group %u not in group "
@@ -2408,6 +2425,14 @@ static int ext4_check_descriptors(struct super_block *sb,
 			if (!sb_rdonly(sb))
 				return 0;
 		}
+		if (inode_table >= sb_block + 1 &&
+		    inode_table <= last_bg_block) {
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+				 "Inode table for group %u overlaps "
+				 "block group descriptors", i);
+			if (!sb_rdonly(sb))
+				return 0;
+		}
 		if (inode_table < first_block ||
 		    inode_table + sbi->s_itb_per_group - 1 > last_block) {
 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "

From 8844618d8aa7a9973e7b527d038a2a589665002c Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 14 Jun 2018 00:58:00 -0400
Subject: [PATCH 006/382] ext4: only look at the bg_flags field if it is valid

The bg_flags field in the block group descripts is only valid if the
uninit_bg or metadata_csum feature is enabled.  We were not
consistently looking at this field; fix this.

Also block group #0 must never have uninitialized allocation bitmaps,
or need to be zeroed, since that's where the root inode, and other
special inodes are set up.  Check for these conditions and mark the
file system as corrupted if they are detected.

This addresses CVE-2018-10876.

https://bugzilla.kernel.org/show_bug.cgi?id=199403

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/ext4/balloc.c  | 11 ++++++++++-
 fs/ext4/ialloc.c  | 14 ++++++++++++--
 fs/ext4/mballoc.c |  6 ++++--
 fs/ext4/super.c   | 11 ++++++++++-
 4 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 8a2e202ade8a..e68cefe08261 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -438,7 +438,16 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
 		goto verify;
 	}
 	ext4_lock_group(sb, block_group);
-	if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+	if (ext4_has_group_desc_csum(sb) &&
+	    (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
+		if (block_group == 0) {
+			ext4_unlock_group(sb, block_group);
+			unlock_buffer(bh);
+			ext4_error(sb, "Block bitmap for bg 0 marked "
+				   "uninitialized");
+			err = -EFSCORRUPTED;
+			goto out;
+		}
 		err = ext4_init_block_bitmap(sb, bh, block_group, desc);
 		set_bitmap_uptodate(bh);
 		set_buffer_uptodate(bh);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 4d6e007f3569..da6c10c1e37a 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -150,7 +150,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 	}
 
 	ext4_lock_group(sb, block_group);
-	if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
+	if (ext4_has_group_desc_csum(sb) &&
+	    (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) {
+		if (block_group == 0) {
+			ext4_unlock_group(sb, block_group);
+			unlock_buffer(bh);
+			ext4_error(sb, "Inode bitmap for bg 0 marked "
+				   "uninitialized");
+			err = -EFSCORRUPTED;
+			goto out;
+		}
 		memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
 		ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
 				     sb->s_blocksize * 8, bh->b_data);
@@ -994,7 +1003,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
 
 		/* recheck and clear flag under lock if we still need to */
 		ext4_lock_group(sb, group);
-		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+		if (ext4_has_group_desc_csum(sb) &&
+		    (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
 			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
 			ext4_free_group_clusters_set(sb, gdp,
 				ext4_free_clusters_after_init(sb, group, gdp));
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 243c42fdc155..402c769c51ea 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2444,7 +2444,8 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
 	 * initialize bb_free to be able to skip
 	 * empty groups without initialization
 	 */
-	if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+	if (ext4_has_group_desc_csum(sb) &&
+	    (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
 		meta_group_info[i]->bb_free =
 			ext4_free_clusters_after_init(sb, group, desc);
 	} else {
@@ -3010,7 +3011,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 #endif
 	ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
 		      ac->ac_b_ex.fe_len);
-	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+	if (ext4_has_group_desc_csum(sb) &&
+	    (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
 		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
 		ext4_free_group_clusters_set(sb, gdp,
 					     ext4_free_clusters_after_init(sb,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c61675d62195..4d34430d75f6 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3139,13 +3139,22 @@ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
 	ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
 	struct ext4_group_desc *gdp = NULL;
 
+	if (!ext4_has_group_desc_csum(sb))
+		return ngroups;
+
 	for (group = 0; group < ngroups; group++) {
 		gdp = ext4_get_group_desc(sb, group, NULL);
 		if (!gdp)
 			continue;
 
-		if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
+		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
+			continue;
+		if (group != 0)
 			break;
+		ext4_error(sb, "Inode table for bg 0 marked as "
+			   "needing zeroing");
+		if (sb_rdonly(sb))
+			return ngroups;
 	}
 
 	return group;

From bc890a60247171294acc0bd67d211fa4b88d40ba Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 14 Jun 2018 12:55:10 -0400
Subject: [PATCH 007/382] ext4: verify the depth of extent tree in
 ext4_find_extent()

If there is a corupted file system where the claimed depth of the
extent tree is -1, this can cause a massive buffer overrun leading to
sadness.

This addresses CVE-2018-10877.

https://bugzilla.kernel.org/show_bug.cgi?id=199417

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/ext4/ext4_extents.h | 1 +
 fs/ext4/extents.c      | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 98fb0c119c68..adf6668b596f 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -91,6 +91,7 @@ struct ext4_extent_header {
 };
 
 #define EXT4_EXT_MAGIC		cpu_to_le16(0xf30a)
+#define EXT4_MAX_EXTENT_DEPTH 5
 
 #define EXT4_EXTENT_TAIL_OFFSET(hdr) \
 	(sizeof(struct ext4_extent_header) + \
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index c969275ce3ee..08226f72b7ee 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -869,6 +869,12 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
 
 	eh = ext_inode_hdr(inode);
 	depth = ext_depth(inode);
+	if (depth < 0 || depth > EXT4_MAX_EXTENT_DEPTH) {
+		EXT4_ERROR_INODE(inode, "inode has invalid extent depth: %d",
+				 depth);
+		ret = -EFSCORRUPTED;
+		goto err;
+	}
 
 	if (path) {
 		ext4_ext_drop_refs(path);

From bdbd6ce01a70f02e9373a584d0ae9538dcf0a121 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 15 Jun 2018 12:27:16 -0400
Subject: [PATCH 008/382] ext4: include the illegal physical block in the bad
 map ext4_error msg

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/ext4/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2ea07efbe016..c2f4ccb880c4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -402,9 +402,9 @@ static int __check_block_validity(struct inode *inode, const char *func,
 	if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
 				   map->m_len)) {
 		ext4_error_inode(inode, func, line, map->m_pblk,
-				 "lblock %lu mapped to illegal pblock "
+				 "lblock %lu mapped to illegal pblock %llu "
 				 "(length %d)", (unsigned long) map->m_lblk,
-				 map->m_len);
+				 map->m_pblk, map->m_len);
 		return -EFSCORRUPTED;
 	}
 	return 0;

From 6e8ab72a812396996035a37e5ca4b3b99b5d214b Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 15 Jun 2018 12:28:16 -0400
Subject: [PATCH 009/382] ext4: clear i_data in ext4_inode_info when removing
 inline data

When converting from an inode from storing the data in-line to a data
block, ext4_destroy_inline_data_nolock() was only clearing the on-disk
copy of the i_blocks[] array.  It was not clearing copy of the
i_blocks[] in ext4_inode_info, in i_data[], which is the copy actually
used by ext4_map_blocks().

This didn't matter much if we are using extents, since the extents
header would be invalid and thus the extents could would re-initialize
the extents tree.  But if we are using indirect blocks, the previous
contents of the i_blocks array will be treated as block numbers, with
potentially catastrophic results to the file system integrity and/or
user data.

This gets worse if the file system is using a 1k block size and
s_first_data is zero, but even without this, the file system can get
quite badly corrupted.

This addresses CVE-2018-10881.

https://bugzilla.kernel.org/show_bug.cgi?id=200015

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/ext4/inline.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 44b4fcdc3755..d79115d8d716 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -437,6 +437,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle,
 
 	memset((void *)ext4_raw_inode(&is.iloc)->i_block,
 		0, EXT4_MIN_INLINE_DATA_SIZE);
+	memset(ei->i_data, 0, EXT4_MIN_INLINE_DATA_SIZE);
 
 	if (ext4_has_feature_extents(inode->i_sb)) {
 		if (S_ISDIR(inode->i_mode) ||

From 8cdb5240ec5928b20490a2bb34cb87e9a5f40226 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 16 Jun 2018 15:40:48 -0400
Subject: [PATCH 010/382] ext4: never move the system.data xattr out of the
 inode body

When expanding the extra isize space, we must never move the
system.data xattr out of the inode body.  For performance reasons, it
doesn't make any sense, and the inline data implementation assumes
that system.data xattr is never in the external xattr block.

This addresses CVE-2018-10880

https://bugzilla.kernel.org/show_bug.cgi?id=200005

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/ext4/xattr.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 0263692979ec..72377b77fbd7 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -2657,6 +2657,11 @@ static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode,
 		last = IFIRST(header);
 		/* Find the entry best suited to be pushed into EA block */
 		for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
+			/* never move system.data out of the inode */
+			if ((last->e_name_len == 4) &&
+			    (last->e_name_index == EXT4_XATTR_INDEX_SYSTEM) &&
+			    !memcmp(last->e_name, "data", 4))
+				continue;
 			total_size = EXT4_XATTR_LEN(last->e_name_len);
 			if (!last->e_value_inum)
 				total_size += EXT4_XATTR_SIZE(

From e09463f220ca9a1a1ecfda84fcda658f99a1f12a Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 16 Jun 2018 20:21:45 -0400
Subject: [PATCH 011/382] jbd2: don't mark block as modified if the handle is
 out of credits

Do not set the b_modified flag in block's journal head should not
until after we're sure that jbd2_journal_dirty_metadat() will not
abort with an error due to there not being enough space reserved in
the jbd2 handle.

Otherwise, future attempts to modify the buffer may lead a large
number of spurious errors and warnings.

This addresses CVE-2018-10883.

https://bugzilla.kernel.org/show_bug.cgi?id=200071

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/jbd2/transaction.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 51dd68e67b0f..c0b66a7a795b 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1361,6 +1361,13 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 		if (jh->b_transaction == transaction &&
 		    jh->b_jlist != BJ_Metadata) {
 			jbd_lock_bh_state(bh);
+			if (jh->b_transaction == transaction &&
+			    jh->b_jlist != BJ_Metadata)
+				pr_err("JBD2: assertion failure: h_type=%u "
+				       "h_line_no=%u block_no=%llu jlist=%u\n",
+				       handle->h_type, handle->h_line_no,
+				       (unsigned long long) bh->b_blocknr,
+				       jh->b_jlist);
 			J_ASSERT_JH(jh, jh->b_transaction != transaction ||
 					jh->b_jlist == BJ_Metadata);
 			jbd_unlock_bh_state(bh);
@@ -1380,11 +1387,11 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 		 * of the transaction. This needs to be done
 		 * once a transaction -bzzz
 		 */
-		jh->b_modified = 1;
 		if (handle->h_buffer_credits <= 0) {
 			ret = -ENOSPC;
 			goto out_unlock_bh;
 		}
+		jh->b_modified = 1;
 		handle->h_buffer_credits--;
 	}
 

From 8bc1379b82b8e809eef77a9fedbb75c6c297be19 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 16 Jun 2018 23:41:59 -0400
Subject: [PATCH 012/382] ext4: avoid running out of journal credits when
 appending to an inline file

Use a separate journal transaction if it turns out that we need to
convert an inline file to use an data block.  Otherwise we could end
up failing due to not having journal credits.

This addresses CVE-2018-10883.

https://bugzilla.kernel.org/show_bug.cgi?id=200071

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/ext4/ext4.h   |  3 ---
 fs/ext4/inline.c | 38 +-------------------------------------
 fs/ext4/xattr.c  | 19 ++-----------------
 3 files changed, 3 insertions(+), 57 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 856b6a54d82b..859d6433dcc1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3013,9 +3013,6 @@ extern int ext4_inline_data_fiemap(struct inode *inode,
 struct iomap;
 extern int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap);
 
-extern int ext4_try_to_evict_inline_data(handle_t *handle,
-					 struct inode *inode,
-					 int needed);
 extern int ext4_inline_data_truncate(struct inode *inode, int *has_inline);
 
 extern int ext4_convert_inline_data(struct inode *inode);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index d79115d8d716..851bc552d849 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -887,11 +887,11 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
 	flags |= AOP_FLAG_NOFS;
 
 	if (ret == -ENOSPC) {
+		ext4_journal_stop(handle);
 		ret = ext4_da_convert_inline_data_to_extent(mapping,
 							    inode,
 							    flags,
 							    fsdata);
-		ext4_journal_stop(handle);
 		if (ret == -ENOSPC &&
 		    ext4_should_retry_alloc(inode->i_sb, &retries))
 			goto retry_journal;
@@ -1891,42 +1891,6 @@ int ext4_inline_data_fiemap(struct inode *inode,
 	return (error < 0 ? error : 0);
 }
 
-/*
- * Called during xattr set, and if we can sparse space 'needed',
- * just create the extent tree evict the data to the outer block.
- *
- * We use jbd2 instead of page cache to move data to the 1st block
- * so that the whole transaction can be committed as a whole and
- * the data isn't lost because of the delayed page cache write.
- */
-int ext4_try_to_evict_inline_data(handle_t *handle,
-				  struct inode *inode,
-				  int needed)
-{
-	int error;
-	struct ext4_xattr_entry *entry;
-	struct ext4_inode *raw_inode;
-	struct ext4_iloc iloc;
-
-	error = ext4_get_inode_loc(inode, &iloc);
-	if (error)
-		return error;
-
-	raw_inode = ext4_raw_inode(&iloc);
-	entry = (struct ext4_xattr_entry *)((void *)raw_inode +
-					    EXT4_I(inode)->i_inline_off);
-	if (EXT4_XATTR_LEN(entry->e_name_len) +
-	    EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)) < needed) {
-		error = -ENOSPC;
-		goto out;
-	}
-
-	error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
-out:
-	brelse(iloc.bh);
-	return error;
-}
-
 int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
 {
 	handle_t *handle;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 72377b77fbd7..723df14f4084 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -2212,23 +2212,8 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
 	if (EXT4_I(inode)->i_extra_isize == 0)
 		return -ENOSPC;
 	error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
-	if (error) {
-		if (error == -ENOSPC &&
-		    ext4_has_inline_data(inode)) {
-			error = ext4_try_to_evict_inline_data(handle, inode,
-					EXT4_XATTR_LEN(strlen(i->name) +
-					EXT4_XATTR_SIZE(i->value_len)));
-			if (error)
-				return error;
-			error = ext4_xattr_ibody_find(inode, i, is);
-			if (error)
-				return error;
-			error = ext4_xattr_set_entry(i, s, handle, inode,
-						     false /* is_block */);
-		}
-		if (error)
-			return error;
-	}
+	if (error)
+		return error;
 	header = IHDR(inode, ext4_raw_inode(&is->iloc));
 	if (!IS_LAST_ENTRY(s->first)) {
 		header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);

From c37e9e013469521d9adb932d17a1795c139b36db Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sun, 17 Jun 2018 00:41:14 -0400
Subject: [PATCH 013/382] ext4: add more inode number paranoia checks

If there is a directory entry pointing to a system inode (such as a
journal inode), complain and declare the file system to be corrupted.

Also, if the superblock's first inode number field is too small,
refuse to mount the file system.

This addresses CVE-2018-10882.

https://bugzilla.kernel.org/show_bug.cgi?id=200069

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/ext4/ext4.h  | 5 -----
 fs/ext4/inode.c | 3 ++-
 fs/ext4/super.c | 5 +++++
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 859d6433dcc1..4bd69649a048 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1502,11 +1502,6 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
 static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 {
 	return ino == EXT4_ROOT_INO ||
-		ino == EXT4_USR_QUOTA_INO ||
-		ino == EXT4_GRP_QUOTA_INO ||
-		ino == EXT4_BOOT_LOADER_INO ||
-		ino == EXT4_JOURNAL_INO ||
-		ino == EXT4_RESIZE_INO ||
 		(ino >= EXT4_FIRST_INO(sb) &&
 		 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
 }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c2f4ccb880c4..7d6c10017bdf 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4506,7 +4506,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
 	int			inodes_per_block, inode_offset;
 
 	iloc->bh = NULL;
-	if (!ext4_valid_inum(sb, inode->i_ino))
+	if (inode->i_ino < EXT4_ROOT_INO ||
+	    inode->i_ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
 		return -EFSCORRUPTED;
 
 	iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4d34430d75f6..1f955c128e0d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3858,6 +3858,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	} else {
 		sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
 		sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
+		if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
+			ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
+				 sbi->s_first_ino);
+			goto failed_mount;
+		}
 		if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
 		    (!is_power_of_2(sbi->s_inode_size)) ||
 		    (sbi->s_inode_size > blocksize)) {

From bfe0a5f47ada40d7984de67e59a7d3390b9b9ecc Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sun, 17 Jun 2018 18:11:20 -0400
Subject: [PATCH 014/382] ext4: add more mount time checks of the superblock

The kernel's ext4 mount-time checks were more permissive than
e2fsprogs's libext2fs checks when opening a file system.  The
superblock is considered too insane for debugfs or e2fsck to operate
on it, the kernel has no business trying to mount it.

This will make file system fuzzing tools work harder, but the failure
cases that they find will be more useful and be easier to evaluate.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/ext4/super.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1f955c128e0d..b37b00befd65 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3793,6 +3793,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 			 le32_to_cpu(es->s_log_block_size));
 		goto failed_mount;
 	}
+	if (le32_to_cpu(es->s_log_cluster_size) >
+	    (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
+		ext4_msg(sb, KERN_ERR,
+			 "Invalid log cluster size: %u",
+			 le32_to_cpu(es->s_log_cluster_size));
+		goto failed_mount;
+	}
 
 	if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
 		ext4_msg(sb, KERN_ERR,
@@ -3939,13 +3946,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 				 "block size (%d)", clustersize, blocksize);
 			goto failed_mount;
 		}
-		if (le32_to_cpu(es->s_log_cluster_size) >
-		    (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
-			ext4_msg(sb, KERN_ERR,
-				 "Invalid log cluster size: %u",
-				 le32_to_cpu(es->s_log_cluster_size));
-			goto failed_mount;
-		}
 		sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
 			le32_to_cpu(es->s_log_block_size);
 		sbi->s_clusters_per_group =
@@ -3966,10 +3966,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		}
 	} else {
 		if (clustersize != blocksize) {
-			ext4_warning(sb, "fragment/cluster size (%d) != "
-				     "block size (%d)", clustersize,
-				     blocksize);
-			clustersize = blocksize;
+			ext4_msg(sb, KERN_ERR,
+				 "fragment/cluster size (%d) != "
+				 "block size (%d)", clustersize, blocksize);
+			goto failed_mount;
 		}
 		if (sbi->s_blocks_per_group > blocksize * 8) {
 			ext4_msg(sb, KERN_ERR,
@@ -4023,6 +4023,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 			 ext4_blocks_count(es));
 		goto failed_mount;
 	}
+	if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
+	    (sbi->s_cluster_ratio == 1)) {
+		ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
+			 "block is 0 with a 1k block and cluster size");
+		goto failed_mount;
+	}
+
 	blocks_count = (ext4_blocks_count(es) -
 			le32_to_cpu(es->s_first_data_block) +
 			EXT4_BLOCKS_PER_GROUP(sb) - 1);
@@ -4058,6 +4065,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		ret = -ENOMEM;
 		goto failed_mount;
 	}
+	if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
+	    le32_to_cpu(es->s_inodes_count)) {
+		ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
+			 le32_to_cpu(es->s_inodes_count),
+			 ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
+		ret = -EINVAL;
+		goto failed_mount;
+	}
 
 	bgl_lock_init(sbi->s_blockgroup_lock);
 

From ba062ebb2cd561d404e0fba8ee4b3f5ebce7cbfc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 13 Jun 2018 09:13:39 -0700
Subject: [PATCH 015/382] netfilter: nf_queue: augment nfqa_cfg_policy

Three attributes are currently not verified, thus can trigger KMSAN
warnings such as :

BUG: KMSAN: uninit-value in __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline]
BUG: KMSAN: uninit-value in __fswab32 include/uapi/linux/swab.h:59 [inline]
BUG: KMSAN: uninit-value in nfqnl_recv_config+0x939/0x17d0 net/netfilter/nfnetlink_queue.c:1268
CPU: 1 PID: 4521 Comm: syz-executor120 Not tainted 4.17.0+ #5
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x185/0x1d0 lib/dump_stack.c:113
 kmsan_report+0x188/0x2a0 mm/kmsan/kmsan.c:1117
 __msan_warning_32+0x70/0xc0 mm/kmsan/kmsan_instr.c:620
 __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline]
 __fswab32 include/uapi/linux/swab.h:59 [inline]
 nfqnl_recv_config+0x939/0x17d0 net/netfilter/nfnetlink_queue.c:1268
 nfnetlink_rcv_msg+0xb2e/0xc80 net/netfilter/nfnetlink.c:212
 netlink_rcv_skb+0x37e/0x600 net/netlink/af_netlink.c:2448
 nfnetlink_rcv+0x2fe/0x680 net/netfilter/nfnetlink.c:513
 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
 netlink_unicast+0x1680/0x1750 net/netlink/af_netlink.c:1336
 netlink_sendmsg+0x104f/0x1350 net/netlink/af_netlink.c:1901
 sock_sendmsg_nosec net/socket.c:629 [inline]
 sock_sendmsg net/socket.c:639 [inline]
 ___sys_sendmsg+0xec8/0x1320 net/socket.c:2117
 __sys_sendmsg net/socket.c:2155 [inline]
 __do_sys_sendmsg net/socket.c:2164 [inline]
 __se_sys_sendmsg net/socket.c:2162 [inline]
 __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162
 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x43fd59
RSP: 002b:00007ffde0e30d28 EFLAGS: 00000213 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043fd59
RDX: 0000000000000000 RSI: 0000000020000080 RDI: 0000000000000003
RBP: 00000000006ca018 R08: 00000000004002c8 R09: 00000000004002c8
R10: 00000000004002c8 R11: 0000000000000213 R12: 0000000000401680
R13: 0000000000401710 R14: 0000000000000000 R15: 0000000000000000

Uninit was created at:
 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 [inline]
 kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:189
 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:315
 kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan.c:322
 slab_post_alloc_hook mm/slab.h:446 [inline]
 slab_alloc_node mm/slub.c:2753 [inline]
 __kmalloc_node_track_caller+0xb35/0x11b0 mm/slub.c:4395
 __kmalloc_reserve net/core/skbuff.c:138 [inline]
 __alloc_skb+0x2cb/0x9e0 net/core/skbuff.c:206
 alloc_skb include/linux/skbuff.h:988 [inline]
 netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline]
 netlink_sendmsg+0x76e/0x1350 net/netlink/af_netlink.c:1876
 sock_sendmsg_nosec net/socket.c:629 [inline]
 sock_sendmsg net/socket.c:639 [inline]
 ___sys_sendmsg+0xec8/0x1320 net/socket.c:2117
 __sys_sendmsg net/socket.c:2155 [inline]
 __do_sys_sendmsg net/socket.c:2164 [inline]
 __se_sys_sendmsg net/socket.c:2162 [inline]
 __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162
 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: fdb694a01f1f ("netfilter: Add fail-open support")
Fixes: 829e17a1a602 ("[NETFILTER]: nfnetlink_queue: allow changing queue length through netlink")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_queue.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 4ccd2988f9db..ea4ba551abb2 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1243,6 +1243,9 @@ static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl,
 static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
 	[NFQA_CFG_CMD]		= { .len = sizeof(struct nfqnl_msg_config_cmd) },
 	[NFQA_CFG_PARAMS]	= { .len = sizeof(struct nfqnl_msg_config_params) },
+	[NFQA_CFG_QUEUE_MAXLEN]	= { .type = NLA_U32 },
+	[NFQA_CFG_MASK]		= { .type = NLA_U32 },
+	[NFQA_CFG_FLAGS]	= { .type = NLA_U32 },
 };
 
 static const struct nf_queue_handler nfqh = {

From 9ce7bc036ae4cfe3393232c86e9e1fea2153c237 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 13 Jun 2018 10:11:56 -0700
Subject: [PATCH 016/382] netfilter: ipv6: nf_defrag: reduce struct net memory
 waste

It is a waste of memory to use a full "struct netns_sysctl_ipv6"
while only one pointer is really used, considering netns_sysctl_ipv6
keeps growing.

Also, since "struct netns_frags" has cache line alignment,
it is better to move the frags_hdr pointer outside, otherwise
we spend a full cache line for this pointer.

This saves 192 bytes of memory per netns.

Fixes: c038a767cd69 ("ipv6: add a new namespace for nf_conntrack_reasm")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/net_namespace.h             | 1 +
 include/net/netns/ipv6.h                | 1 -
 net/ipv6/netfilter/nf_conntrack_reasm.c | 6 +++---
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 47e35cce3b64..a71264d75d7f 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -128,6 +128,7 @@ struct net {
 #endif
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 	struct netns_nf_frag	nf_frag;
+	struct ctl_table_header *nf_frag_frags_hdr;
 #endif
 	struct sock		*nfnl;
 	struct sock		*nfnl_stash;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index c978a31b0f84..762ac9931b62 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -109,7 +109,6 @@ struct netns_ipv6 {
 
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 struct netns_nf_frag {
-	struct netns_sysctl_ipv6 sysctl;
 	struct netns_frags	frags;
 };
 #endif
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 5e0332014c17..a452d99c9f52 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -107,7 +107,7 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
 	if (hdr == NULL)
 		goto err_reg;
 
-	net->nf_frag.sysctl.frags_hdr = hdr;
+	net->nf_frag_frags_hdr = hdr;
 	return 0;
 
 err_reg:
@@ -121,8 +121,8 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
 {
 	struct ctl_table *table;
 
-	table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg;
-	unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr);
+	table = net->nf_frag_frags_hdr->ctl_table_arg;
+	unregister_net_sysctl_table(net->nf_frag_frags_hdr);
 	if (!net_eq(net, &init_net))
 		kfree(table);
 }

From ad9852af97587b8abe8102f9ddcb05c9769656f6 Mon Sep 17 00:00:00 2001
From: Gao Feng <gfree.wind@vip.163.com>
Date: Wed, 13 Jun 2018 12:26:13 +0800
Subject: [PATCH 017/382] netfilter: nf_ct_helper: Fix possible panic after
 nf_conntrack_helper_unregister

The helper module would be unloaded after nf_conntrack_helper_unregister,
so it may cause a possible panic caused by race.

nf_ct_iterate_destroy(unhelp, me) reset the helper of conntrack as NULL,
but maybe someone has gotten the helper pointer during this period. Then
it would panic, when it accesses the helper and the module was unloaded.

Take an example as following:
CPU0                                                   CPU1
ctnetlink_dump_helpinfo
helper = rcu_dereference(help->helper);
                                                       unhelp
                                                       set helper as NULL
                                                       unload helper module
helper->to_nlattr(skb, ct);

As above, the cpu0 tries to access the helper and its module is unloaded,
then the panic happens.

Signed-off-by: Gao Feng <gfree.wind@vip.163.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_helper.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 551a1eddf0fa..a75b11c39312 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -465,6 +465,11 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
 
 	nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
 	nf_ct_iterate_destroy(unhelp, me);
+
+	/* Maybe someone has gotten the helper already when unhelp above.
+	 * So need to wait it.
+	 */
+	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
 

From bfc9dfdcb6e9493de5d4fe0d3ed3ce57672f8d07 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 13 Jun 2018 08:39:49 -0700
Subject: [PATCH 018/382] MD: cleanup resources in failure

We need destroy the memory pool in failure

Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 29b0cd9ec951..994aed2f9dff 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5547,7 +5547,8 @@ int md_run(struct mddev *mddev)
 		else
 			pr_warn("md: personality for level %s is not loaded!\n",
 				mddev->clevel);
-		return -EINVAL;
+		err = -EINVAL;
+		goto abort;
 	}
 	spin_unlock(&pers_lock);
 	if (mddev->level != pers->level) {
@@ -5560,7 +5561,8 @@ int md_run(struct mddev *mddev)
 	    pers->start_reshape == NULL) {
 		/* This personality cannot handle reshaping... */
 		module_put(pers->owner);
-		return -EINVAL;
+		err = -EINVAL;
+		goto abort;
 	}
 
 	if (pers->sync_request) {
@@ -5629,7 +5631,7 @@ int md_run(struct mddev *mddev)
 		mddev->private = NULL;
 		module_put(pers->owner);
 		bitmap_destroy(mddev);
-		return err;
+		goto abort;
 	}
 	if (mddev->queue) {
 		bool nonrot = true;

From fae2a63737e5973f1426bc139935a0f42e232844 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Fri, 8 Jun 2018 18:26:33 +0800
Subject: [PATCH 019/382] libahci: Fix possible Spectre-v1 pmp indexing in
 ahci_led_store()

Currently smatch warns of possible Spectre-V1 issue in ahci_led_store():
drivers/ata/libahci.c:1150 ahci_led_store() warn: potential spectre issue 'pp->em_priv' (local cap)

Userspace controls @pmp from following callchain:
em_message->store()
->ata_scsi_em_message_store()
-->ap->ops->em_store()
--->ahci_led_store()

After the mask+shift @pmp is effectively an 8b value, which is used to
index into an array of length 8, so sanitize the array index.

Signed-off-by: John Garry <john.garry@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libahci.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 965842a08743..09620c2ffa0f 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -35,6 +35,7 @@
 #include <linux/kernel.h>
 #include <linux/gfp.h>
 #include <linux/module.h>
+#include <linux/nospec.h>
 #include <linux/blkdev.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
@@ -1146,10 +1147,12 @@ static ssize_t ahci_led_store(struct ata_port *ap, const char *buf,
 
 	/* get the slot number from the message */
 	pmp = (state & EM_MSG_LED_PMP_SLOT) >> 8;
-	if (pmp < EM_MAX_SLOTS)
+	if (pmp < EM_MAX_SLOTS) {
+		pmp = array_index_nospec(pmp, EM_MAX_SLOTS);
 		emp = &pp->em_priv[pmp];
-	else
+	} else {
 		return -EINVAL;
+	}
 
 	/* mask off the activity bits if we are in sw_activity
 	 * mode, user should turn off sw_activity before setting

From 95ffcf471d05ec7c91993c91dea912f99dccfc26 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 6 Jun 2018 06:56:34 +0000
Subject: [PATCH 020/382] ata: ahci_mvebu: ahci_mvebu_stop_engine() can be
 static

Fixes the following sparse warning:

drivers/ata/ahci_mvebu.c:85:5: warning:
 symbol 'ahci_mvebu_stop_engine' was not declared. Should it be static?

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/ahci_mvebu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c
index 0045dacd814b..72d90b4c3aae 100644
--- a/drivers/ata/ahci_mvebu.c
+++ b/drivers/ata/ahci_mvebu.c
@@ -82,7 +82,7 @@ static void ahci_mvebu_regret_option(struct ahci_host_priv *hpriv)
  *
  * Return: 0 on success; Error code otherwise.
  */
-int ahci_mvebu_stop_engine(struct ata_port *ap)
+static int ahci_mvebu_stop_engine(struct ata_port *ap)
 {
 	void __iomem *port_mmio = ahci_port_base(ap);
 	u32 tmp, port_fbs;

From 08ca1b52f69b4dfa8703d54e26e2c6e11aa453eb Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Mon, 18 Jun 2018 16:39:50 -0600
Subject: [PATCH 021/382] vfio/pci: Make IGD support a configurable option

Allow the code which provides extensions to support direct assignment
of Intel IGD (GVT-d) to be compiled out of the kernel if desired.  The
config option for this was previously automatically enabled on X86,
therefore the default remains Y.  This simply provides the option to
disable it even for X86.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/Kconfig | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 24ee2605b9f0..42dc1d3d71cf 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -28,5 +28,13 @@ config VFIO_PCI_INTX
 	def_bool y if !S390
 
 config VFIO_PCI_IGD
-	depends on VFIO_PCI
-	def_bool y if X86
+	bool "VFIO PCI extensions for Intel graphics (GVT-d)"
+	depends on VFIO_PCI && X86
+	default y
+	help
+	  Support for Intel IGD specific extensions to enable direct
+	  assignment to virtual machines.  This includes exposing an IGD
+	  specific firmware table and read-only copies of the host bridge
+	  and LPC bridge config space.
+
+	  To enable Intel IGD assignment through vfio-pci, say Y.

From c9bd0946da243a8eb86b44ff613e2c813f9b683b Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Date: Tue, 5 Jun 2018 18:59:57 +0200
Subject: [PATCH 022/382] dmaengine: ti: omap-dma: Fix OMAP1510 incorrect
 residue_granularity

Commit 0198d7bb8a0c ("ASoC: omap-mcbsp: Convert to use the sdma-pcm
instead of omap-pcm") resulted in broken audio playback on OMAP1510
(discovered on Amstrad Delta).

When running on OMAP1510, omap-pcm used to obtain DMA offset from
snd_dmaengine_pcm_pointer_no_residue() based on DMA interrupt triggered
software calculations instead of snd_dmaengine_pcm_pointer() which
depended on residue value calculated from omap_dma_get_src_pos().
Similar code path is still available in now used
sound/soc/soc-generic-dmaengine-pcm.c but it is not triggered.

It was verified already before that omap_get_dma_src_pos() from
arch/arm/plat-omap/dma.c didn't work correctly for OMAP1510 - see
commit 1bdd7419910c ("ASoC: OMAP: fix OMAP1510 broken PCM pointer
callback") for details.  Apparently the same applies to its successor,
omap_dma_get_src_pos() from drivers/dma/ti/omap-dma.c.

On the other hand, snd_dmaengine_pcm_pointer_no_residue() is described
as depreciated and discouraged for use in new drivers because of its
unreliable accuracy.  However, it seems the only working option for
OPAM1510 now, as long as a software calculated residue is not
implemented as OMAP1510 fallback in omap-dma.

Using snd_dmaengine_pcm_pointer_no_residue() code path instead of
snd_dmaengine_pcm_pointer() in sound/soc/soc-generic-dmaengine-pcm.c
can be triggered in two ways:
- by passing pcm->flags |= SND_DMAENGINE_PCM_FLAG_NO_RESIDUE from
  sound/soc/omap/sdma-pcm.c,
- by passing dma_caps.residue_granularity =
  DMA_RESIDUE_GRANULARITY_DESCRIPTOR from DMA engine.

Let's do the latter.

Signed-off-by: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Acked-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ti/omap-dma.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/ti/omap-dma.c b/drivers/dma/ti/omap-dma.c
index 9b5ca8691f27..a4a931ddf6f6 100644
--- a/drivers/dma/ti/omap-dma.c
+++ b/drivers/dma/ti/omap-dma.c
@@ -1485,7 +1485,11 @@ static int omap_dma_probe(struct platform_device *pdev)
 	od->ddev.src_addr_widths = OMAP_DMA_BUSWIDTHS;
 	od->ddev.dst_addr_widths = OMAP_DMA_BUSWIDTHS;
 	od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
-	od->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	if (__dma_omap15xx(od->plat->dma_attr))
+		od->ddev.residue_granularity =
+				DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+	else
+		od->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
 	od->ddev.max_burst = SZ_16M - 1; /* CCEN: 24bit unsigned */
 	od->ddev.dev = &pdev->dev;
 	INIT_LIST_HEAD(&od->ddev.channels);

From 356073bcf6dbcc15cd8fb22d10cf8f35f4525271 Mon Sep 17 00:00:00 2001
From: Sinan Kaya <okaya@codeaurora.org>
Date: Thu, 14 Jun 2018 09:37:46 -0400
Subject: [PATCH 023/382] MAINTAINERS: Update email-id of Sinan Kaya

I'm no longer with QCOM. I am still interested in maintaining or reviewing
PCI/DMA engine patches. Update email-id to an active one.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9d5eeff51b5f..209ddcdc5b7a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11821,7 +11821,7 @@ S:	Supported
 F:	arch/hexagon/
 
 QUALCOMM HIDMA DRIVER
-M:	Sinan Kaya <okaya@codeaurora.org>
+M:	Sinan Kaya <okaya@kernel.org>
 L:	linux-arm-kernel@lists.infradead.org
 L:	linux-arm-msm@vger.kernel.org
 L:	dmaengine@vger.kernel.org

From 6362f0a290023bafd7f991089e81dd9278f154b8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 19 Jun 2018 10:12:48 -0600
Subject: [PATCH 024/382] libata: add command iterator helpers

Now that we have the internal tag as a special (higher) value tag,
it gets a bit tricky to iterate the internal commands as some loops
will exceed ATA_MAX_QUEUE. Add explicit helpers for iterating pending
commands, both inflight and internal.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/libata.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 8b8946dd63b9..a2257e380789 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1495,6 +1495,29 @@ static inline bool ata_tag_valid(unsigned int tag)
 	return tag < ATA_MAX_QUEUE || ata_tag_internal(tag);
 }
 
+#define __ata_qc_for_each(ap, qc, tag, max_tag, fn)		\
+	for ((tag) = 0; (tag) < (max_tag) &&			\
+	     ({ qc = fn((ap), (tag)); 1; }); (tag)++)		\
+
+/*
+ * Internal use only, iterate commands ignoring error handling and
+ * status of 'qc'.
+ */
+#define ata_qc_for_each_raw(ap, qc, tag)					\
+	__ata_qc_for_each(ap, qc, tag, ATA_MAX_QUEUE, __ata_qc_from_tag)
+
+/*
+ * Iterate all potential commands that can be queued
+ */
+#define ata_qc_for_each(ap, qc, tag)					\
+	__ata_qc_for_each(ap, qc, tag, ATA_MAX_QUEUE, ata_qc_from_tag)
+
+/*
+ * Like ata_qc_for_each, but with the internal tag included
+ */
+#define ata_qc_for_each_with_internal(ap, qc, tag)			\
+	__ata_qc_for_each(ap, qc, tag, ATA_MAX_QUEUE + 1, ata_qc_from_tag)
+
 /*
  * device helpers
  */

From 258c4e5c65b21bdbe9735f49ea584b3059c810e4 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 19 Jun 2018 10:12:49 -0600
Subject: [PATCH 025/382] libata: convert eh to command iterators

Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-eh.c | 41 ++++++++++++++++-------------------------
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index d5412145d76d..01306c018398 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -614,8 +614,7 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap,
 		list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) {
 			struct ata_queued_cmd *qc;
 
-			for (i = 0; i < ATA_MAX_QUEUE; i++) {
-				qc = __ata_qc_from_tag(ap, i);
+			ata_qc_for_each_raw(ap, qc, i) {
 				if (qc->flags & ATA_QCFLAG_ACTIVE &&
 				    qc->scsicmd == scmd)
 					break;
@@ -818,14 +817,13 @@ EXPORT_SYMBOL_GPL(ata_port_wait_eh);
 
 static int ata_eh_nr_in_flight(struct ata_port *ap)
 {
+	struct ata_queued_cmd *qc;
 	unsigned int tag;
 	int nr = 0;
 
 	/* count only non-internal commands */
-	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
-		if (ata_tag_internal(tag))
-			continue;
-		if (ata_qc_from_tag(ap, tag))
+	ata_qc_for_each(ap, qc, tag) {
+		if (qc)
 			nr++;
 	}
 
@@ -847,13 +845,13 @@ void ata_eh_fastdrain_timerfn(struct timer_list *t)
 		goto out_unlock;
 
 	if (cnt == ap->fastdrain_cnt) {
+		struct ata_queued_cmd *qc;
 		unsigned int tag;
 
 		/* No progress during the last interval, tag all
 		 * in-flight qcs as timed out and freeze the port.
 		 */
-		for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
-			struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag);
+		ata_qc_for_each(ap, qc, tag) {
 			if (qc)
 				qc->err_mask |= AC_ERR_TIMEOUT;
 		}
@@ -999,6 +997,7 @@ void ata_port_schedule_eh(struct ata_port *ap)
 
 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link)
 {
+	struct ata_queued_cmd *qc;
 	int tag, nr_aborted = 0;
 
 	WARN_ON(!ap->ops->error_handler);
@@ -1007,9 +1006,7 @@ static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link)
 	ata_eh_set_pending(ap, 0);
 
 	/* include internal tag in iteration */
-	for (tag = 0; tag <= ATA_MAX_QUEUE; tag++) {
-		struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag);
-
+	ata_qc_for_each_with_internal(ap, qc, tag) {
 		if (qc && (!link || qc->dev->link == link)) {
 			qc->flags |= ATA_QCFLAG_FAILED;
 			ata_qc_complete(qc);
@@ -1712,9 +1709,7 @@ void ata_eh_analyze_ncq_error(struct ata_link *link)
 		return;
 
 	/* has LLDD analyzed already? */
-	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
-		qc = __ata_qc_from_tag(ap, tag);
-
+	ata_qc_for_each_raw(ap, qc, tag) {
 		if (!(qc->flags & ATA_QCFLAG_FAILED))
 			continue;
 
@@ -2136,6 +2131,7 @@ static void ata_eh_link_autopsy(struct ata_link *link)
 {
 	struct ata_port *ap = link->ap;
 	struct ata_eh_context *ehc = &link->eh_context;
+	struct ata_queued_cmd *qc;
 	struct ata_device *dev;
 	unsigned int all_err_mask = 0, eflags = 0;
 	int tag, nr_failed = 0, nr_quiet = 0;
@@ -2168,9 +2164,7 @@ static void ata_eh_link_autopsy(struct ata_link *link)
 
 	all_err_mask |= ehc->i.err_mask;
 
-	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
-		struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
-
+	ata_qc_for_each_raw(ap, qc, tag) {
 		if (!(qc->flags & ATA_QCFLAG_FAILED) ||
 		    ata_dev_phys_link(qc->dev) != link)
 			continue;
@@ -2436,6 +2430,7 @@ static void ata_eh_link_report(struct ata_link *link)
 {
 	struct ata_port *ap = link->ap;
 	struct ata_eh_context *ehc = &link->eh_context;
+	struct ata_queued_cmd *qc;
 	const char *frozen, *desc;
 	char tries_buf[6] = "";
 	int tag, nr_failed = 0;
@@ -2447,9 +2442,7 @@ static void ata_eh_link_report(struct ata_link *link)
 	if (ehc->i.desc[0] != '\0')
 		desc = ehc->i.desc;
 
-	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
-		struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
-
+	ata_qc_for_each_raw(ap, qc, tag) {
 		if (!(qc->flags & ATA_QCFLAG_FAILED) ||
 		    ata_dev_phys_link(qc->dev) != link ||
 		    ((qc->flags & ATA_QCFLAG_QUIET) &&
@@ -2511,8 +2504,7 @@ static void ata_eh_link_report(struct ata_link *link)
 		  ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : "");
 #endif
 
-	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
-		struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
+	ata_qc_for_each_raw(ap, qc, tag) {
 		struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf;
 		char data_buf[20] = "";
 		char cdb_buf[70] = "";
@@ -3992,12 +3984,11 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
  */
 void ata_eh_finish(struct ata_port *ap)
 {
+	struct ata_queued_cmd *qc;
 	int tag;
 
 	/* retry or finish qcs */
-	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
-		struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
-
+	ata_qc_for_each_raw(ap, qc, tag) {
 		if (!(qc->flags & ATA_QCFLAG_FAILED))
 			continue;
 

From d3543b4d1b48afd931ed4afb6f861e6122657b6f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 19 Jun 2018 10:12:50 -0600
Subject: [PATCH 026/382] sata_fsl: convert to command iterator

We need to iterate all commands, including the internal one,
for ATAPI error handling.

Fixes: 28361c403683 ("libata: add extra internal command")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/sata_fsl.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index b8d9cfc60374..bb5ec5f71e73 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -1229,8 +1229,7 @@ static void sata_fsl_host_intr(struct ata_port *ap)
 
 	/* Workaround for data length mismatch errata */
 	if (unlikely(hstatus & INT_ON_DATA_LENGTH_MISMATCH)) {
-		for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
-			qc = ata_qc_from_tag(ap, tag);
+		ata_qc_for_each_with_internal(ap, qc, tag) {
 			if (qc && ata_is_atapi(qc->tf.protocol)) {
 				u32 hcontrol;
 				/* Set HControl[27] to clear error registers */

From eb36333de4bfba57fa6f8f88052e53180d54708e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 19 Jun 2018 10:12:51 -0600
Subject: [PATCH 027/382] sata_fsl: remove dead code in tag retrieval

We can never pass in the internal tag to this helper, it'll
always be the hardware tag. So there's no need to check and
do an internal translation of that tag.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/sata_fsl.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index bb5ec5f71e73..4dc528bf8e85 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -395,12 +395,6 @@ static inline unsigned int sata_fsl_tag(unsigned int tag,
 {
 	/* We let libATA core do actual (queue) tag allocation */
 
-	/* all non NCQ/queued commands should have tag#0 */
-	if (ata_tag_internal(tag)) {
-		DPRINTK("mapping internal cmds to tag#0\n");
-		return 0;
-	}
-
 	if (unlikely(tag >= SATA_FSL_QUEUE_DEPTH)) {
 		DPRINTK("tag %d invalid : out of range\n", tag);
 		return 0;

From 63ce3c384db26494615e3c8972bcd419ed71f4c4 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Tue, 19 Jun 2018 17:58:24 +0200
Subject: [PATCH 028/382] scsi: target: Fix truncated PR-in ReadKeys response

SPC5r17 states that the contents of the ADDITIONAL LENGTH field are not
altered based on the allocation length, so always calculate and pack the
full key list length even if the list itself is truncated.

According to Maged:

  Yes it fixes the "Storage Spaces Persistent Reservation" test in the
  Windows 2016 Server Failover Cluster validation suites when having
  many connections that result in more than 8 registrations. I tested
  your patch on 4.17 with iblock.

This behaviour can be tested using the libiscsi PrinReadKeys.Truncate test.

Cc: stable@vger.kernel.org
Signed-off-by: David Disseldorp <ddiss@suse.de>
Reviewed-by: Mike Christie <mchristi@redhat.com>
Tested-by: Maged Mokhtar <mmokhtar@petasan.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_pr.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 01ac306131c1..10db5656fd5d 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -3727,11 +3727,16 @@ core_scsi3_pri_read_keys(struct se_cmd *cmd)
 		 * Check for overflow of 8byte PRI READ_KEYS payload and
 		 * next reservation key list descriptor.
 		 */
-		if ((add_len + 8) > (cmd->data_length - 8))
-			break;
-
-		put_unaligned_be64(pr_reg->pr_res_key, &buf[off]);
-		off += 8;
+		if (off + 8 <= cmd->data_length) {
+			put_unaligned_be64(pr_reg->pr_res_key, &buf[off]);
+			off += 8;
+		}
+		/*
+		 * SPC5r17: 6.16.2 READ KEYS service action
+		 * The ADDITIONAL LENGTH field indicates the number of bytes in
+		 * the Reservation key list. The contents of the ADDITIONAL
+		 * LENGTH field are not altered based on the allocation length
+		 */
 		add_len += 8;
 	}
 	spin_unlock(&dev->t10_pr.registration_lock);

From ca95ef7c98674a8eb5d411fc0835783051c0662b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 12 Jun 2018 15:03:02 +0300
Subject: [PATCH 029/382] rtc: mrst: fix error code in probe()

We should be returning "retval".  The "mrst_rtc.rtc" variable is a valid
pointer.

Fixes: 32b41f93dcaf ("rtc: mrst: switch to devm functions")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-mrst.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c
index 097a4d4e2aba..1925aaf09093 100644
--- a/drivers/rtc/rtc-mrst.c
+++ b/drivers/rtc/rtc-mrst.c
@@ -367,10 +367,8 @@ static int vrtc_mrst_do_probe(struct device *dev, struct resource *iomem,
 	}
 
 	retval = rtc_register_device(mrst_rtc.rtc);
-	if (retval) {
-		retval = PTR_ERR(mrst_rtc.rtc);
+	if (retval)
 		goto cleanup0;
-	}
 
 	dev_dbg(dev, "initialised\n");
 	return 0;

From 12f8c553a503d98b519cca650b188bf51ebdbdbf Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 18 Apr 2018 20:52:31 +0900
Subject: [PATCH 030/382] clk: sunxi-ng: replace lib-y with obj-y

We had commit 06e226c7fb23 ("clk: sunxi-ng: Move all clock types to a
library") and commit 799c43415442 ("kbuild: thin archives make default
for all archs") in the same development cycle, from different trees.

With migration to the thin archive, the entire drivers/clk/sunxi-ng/lib.a
is linked to the vmlinux.  This does not break build, but we do not get
any size saving.

However, we do not need to go back to the individual Kconfig options.
The default configuration pulls in all (or most) of the CCU parts anyway.
Also, once we enable CONFIG_LD_DEAD_CODE_DATA_ELIMINATION, we can simply
list all files with obj-y, and the linker will drop all unused functions
by itself.

After the long discussion [1], people there agreed to fix this, but
nobody sent a patch after all.  I am doing it now.

I lifted up CONFIG_SUNXI_CCU to drivers/clk/Makefile because everything
in drivers/clk/sunxi-ng/ depends on SUNXI_CCU.

[1] https://patchwork.kernel.org/patch/9796521/

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/Makefile          |  2 +-
 drivers/clk/sunxi-ng/Makefile | 39 ++++++++++++++---------------------
 2 files changed, 16 insertions(+), 25 deletions(-)

diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index ae40cbe770f0..0bb25dd009d1 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -96,7 +96,7 @@ obj-$(CONFIG_ARCH_SPRD)			+= sprd/
 obj-$(CONFIG_ARCH_STI)			+= st/
 obj-$(CONFIG_ARCH_STRATIX10)		+= socfpga/
 obj-$(CONFIG_ARCH_SUNXI)		+= sunxi/
-obj-$(CONFIG_ARCH_SUNXI)		+= sunxi-ng/
+obj-$(CONFIG_SUNXI_CCU)			+= sunxi-ng/
 obj-$(CONFIG_ARCH_TEGRA)		+= tegra/
 obj-y					+= ti/
 obj-$(CONFIG_CLK_UNIPHIER)		+= uniphier/
diff --git a/drivers/clk/sunxi-ng/Makefile b/drivers/clk/sunxi-ng/Makefile
index acaa14cfa25c..49454700f2e5 100644
--- a/drivers/clk/sunxi-ng/Makefile
+++ b/drivers/clk/sunxi-ng/Makefile
@@ -1,24 +1,24 @@
 # SPDX-License-Identifier: GPL-2.0
 # Common objects
-lib-$(CONFIG_SUNXI_CCU)		+= ccu_common.o
-lib-$(CONFIG_SUNXI_CCU)		+= ccu_mmc_timing.o
-lib-$(CONFIG_SUNXI_CCU)		+= ccu_reset.o
+obj-y				+= ccu_common.o
+obj-y				+= ccu_mmc_timing.o
+obj-y				+= ccu_reset.o
 
 # Base clock types
-lib-$(CONFIG_SUNXI_CCU)		+= ccu_div.o
-lib-$(CONFIG_SUNXI_CCU)		+= ccu_frac.o
-lib-$(CONFIG_SUNXI_CCU)		+= ccu_gate.o
-lib-$(CONFIG_SUNXI_CCU)		+= ccu_mux.o
-lib-$(CONFIG_SUNXI_CCU)		+= ccu_mult.o
-lib-$(CONFIG_SUNXI_CCU)		+= ccu_phase.o
-lib-$(CONFIG_SUNXI_CCU)		+= ccu_sdm.o
+obj-y				+= ccu_div.o
+obj-y				+= ccu_frac.o
+obj-y				+= ccu_gate.o
+obj-y				+= ccu_mux.o
+obj-y				+= ccu_mult.o
+obj-y				+= ccu_phase.o
+obj-y				+= ccu_sdm.o
 
 # Multi-factor clocks
-lib-$(CONFIG_SUNXI_CCU)		+= ccu_nk.o
-lib-$(CONFIG_SUNXI_CCU)		+= ccu_nkm.o
-lib-$(CONFIG_SUNXI_CCU)		+= ccu_nkmp.o
-lib-$(CONFIG_SUNXI_CCU)		+= ccu_nm.o
-lib-$(CONFIG_SUNXI_CCU)		+= ccu_mp.o
+obj-y				+= ccu_nk.o
+obj-y				+= ccu_nkm.o
+obj-y				+= ccu_nkmp.o
+obj-y				+= ccu_nm.o
+obj-y				+= ccu_mp.o
 
 # SoC support
 obj-$(CONFIG_SUN50I_A64_CCU)	+= ccu-sun50i-a64.o
@@ -38,12 +38,3 @@ obj-$(CONFIG_SUN8I_R40_CCU)	+= ccu-sun8i-r40.o
 obj-$(CONFIG_SUN9I_A80_CCU)	+= ccu-sun9i-a80.o
 obj-$(CONFIG_SUN9I_A80_CCU)	+= ccu-sun9i-a80-de.o
 obj-$(CONFIG_SUN9I_A80_CCU)	+= ccu-sun9i-a80-usb.o
-
-# The lib-y file goals is supposed to work only in arch/*/lib or lib/. In our
-# case, we want to use that goal, but even though lib.a will be properly
-# generated, it will not be linked in, eventually resulting in a linker error
-# for missing symbols.
-#
-# We can work around that by explicitly adding lib.a to the obj-y goal. This is
-# an undocumented behaviour, but works well for now.
-obj-$(CONFIG_SUNXI_CCU)		+= lib.a

From 957f9a13df6c70aac31a1dade5e417c286d6d258 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 20 Jun 2018 11:42:45 -0700
Subject: [PATCH 031/382] tools: bpftool: remove duplicated error message on
 prog load

do_pin_fd() will already print out an error message if something
goes wrong.  Printing another error is unnecessary and will break
JSON output, since error messages are full objects:

$ bpftool -jp prog load tracex1_kern.o /sys/fs/bpf/a
{
    "error": "can't pin the object (/sys/fs/bpf/a): File exists"
},{
    "error": "failed to pin program"
}

Fixes: 49a086c201a9 ("bpftool: implement prog load command")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/prog.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 05f42a46d6ed..12b694fe0404 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -694,10 +694,8 @@ static int do_load(int argc, char **argv)
 		return -1;
 	}
 
-	if (do_pin_fd(prog_fd, argv[1])) {
-		p_err("failed to pin program");
+	if (do_pin_fd(prog_fd, argv[1]))
 		return -1;
-	}
 
 	if (json_output)
 		jsonw_null(json_wtr);

From bfee71fb7376081349117fdc89f685a9e14a58c2 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 20 Jun 2018 11:42:46 -0700
Subject: [PATCH 032/382] tools: bpftool: remember to close the libbpf object
 after prog load

Remembering to close all descriptors and free memory may not seem
important in a user space tool like bpftool, but if we were to run
in batch mode the consumed resources start to add up quickly.  Make
sure program load closes the libbpf object (which unloads and frees
it).

Fixes: 49a086c201a9 ("bpftool: implement prog load command")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/prog.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 12b694fe0404..959aa53ab678 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -695,12 +695,18 @@ static int do_load(int argc, char **argv)
 	}
 
 	if (do_pin_fd(prog_fd, argv[1]))
-		return -1;
+		goto err_close_obj;
 
 	if (json_output)
 		jsonw_null(json_wtr);
 
+	bpf_object__close(obj);
+
 	return 0;
+
+err_close_obj:
+	bpf_object__close(obj);
+	return -1;
 }
 
 static int do_help(int argc, char **argv)

From 81e167c2a216e7b54e6add9d2badcda267fe33b1 Mon Sep 17 00:00:00 2001
From: Jeffrin Jose T <ahiliation@gmail.com>
Date: Thu, 21 Jun 2018 22:30:20 +0530
Subject: [PATCH 033/382] selftests: bpf: notification about privilege required
 to run test_kmod.sh testing script

The test_kmod.sh script require root privilege for the successful
execution of the test.

This patch is to notify the user about the privilege the script
demands for the successful execution of the test.

Signed-off-by: Jeffrin Jose T (Rajagiri SET) <ahiliation@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_kmod.sh | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh
index 35669ccd4d23..9df0d2ac45f8 100755
--- a/tools/testing/selftests/bpf/test_kmod.sh
+++ b/tools/testing/selftests/bpf/test_kmod.sh
@@ -1,6 +1,15 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+msg="skip all tests:"
+if [ "$(id -u)" != "0" ]; then
+	echo $msg please run this as root >&2
+	exit $ksft_skip
+fi
+
 SRC_TREE=../../../../
 
 test_run()

From 3e1a61b30c6a3a31314570c0c418a6bf84b056b1 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 21 Jun 2018 10:02:20 -0700
Subject: [PATCH 034/382] tools/bpf: fix test_sockmap failure

On one of our production test machine, when running
bpf selftest test_sockmap, I got the following error:
  # sudo ./test_sockmap
  libbpf: failed to create map (name: 'sock_map'): Operation not permitted
  libbpf: failed to load object 'test_sockmap_kern.o'
  libbpf: Can't get the 0th fd from program sk_skb1: only -1 instances
  ......
  load_bpf_file: (-1) Operation not permitted
  ERROR: (-1) load bpf failed

The error is due to not-big-enough rlimit
  struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};

The test already includes "bpf_rlimit.h", which sets current
and max rlimit to RLIM_INFINITY. Let us just use it.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_sockmap.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 05c8cb71724a..9e78df207919 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -1413,18 +1413,12 @@ static int test_suite(void)
 
 int main(int argc, char **argv)
 {
-	struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
 	int iov_count = 1, length = 1024, rate = 1;
 	struct sockmap_options options = {0};
 	int opt, longindex, err, cg_fd = 0;
 	char *bpf_file = BPF_SOCKMAP_FILENAME;
 	int test = PING_PONG;
 
-	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
-		perror("setrlimit(RLIMIT_MEMLOCK)");
-		return 1;
-	}
-
 	if (argc < 2)
 		return test_suite();
 

From 2068db53b6337f8c6c7bcd07dea0b5e82b1a1a02 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Wed, 20 Jun 2018 07:19:01 -0500
Subject: [PATCH 035/382] ipmi: Cleanup oops on initialization failure

Commit 93c303d2045b3 "ipmi_si: Clean up shutdown a bit" didn't
copy the behavior of the cleanup in one spot, it needed to
check for a non-NULL interface before cleaning it up.

Reported-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
Tested-by: Meelis Roos <mroos@linux.ee>
---
 drivers/char/ipmi/ipmi_si_intf.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index ad353be871bf..90ec010bffbd 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -2088,8 +2088,10 @@ static int try_smi_init(struct smi_info *new_smi)
 	return 0;
 
 out_err:
-	ipmi_unregister_smi(new_smi->intf);
-	new_smi->intf = NULL;
+	if (new_smi->intf) {
+		ipmi_unregister_smi(new_smi->intf);
+		new_smi->intf = NULL;
+	}
 
 	kfree(init_name);
 

From dc0f0a026d33819bb82d5c26ab2fca838e2004be Mon Sep 17 00:00:00 2001
From: Haiyue Wang <haiyue.wang@linux.intel.com>
Date: Sat, 23 Jun 2018 21:51:13 +0800
Subject: [PATCH 036/382] ipmi: kcs_bmc: fix IRQ exception if the channel is
 not open

When kcs_bmc_handle_event calls kcs_force_abort function to handle the
not open (no user running) KCS channel transaction, the returned status
value -ENODEV causes the low level IRQ handler indicating that the irq
was not for him by returning IRQ_NONE. After some time, this IRQ will
be treated to be spurious one, and the exception dump happens.

   irq 30: nobody cared (try booting with the "irqpoll" option)
   CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.10.15-npcm750 #1
   Hardware name: NPCMX50 Chip family
   [<c010b264>] (unwind_backtrace) from [<c0106930>] (show_stack+0x20/0x24)
   [<c0106930>] (show_stack) from [<c03dad38>] (dump_stack+0x8c/0xa0)
   [<c03dad38>] (dump_stack) from [<c0168810>] (__report_bad_irq+0x3c/0xdc)
   [<c0168810>] (__report_bad_irq) from [<c0168c34>] (note_interrupt+0x29c/0x2ec)
   [<c0168c34>] (note_interrupt) from [<c0165c80>] (handle_irq_event_percpu+0x5c/0x68)
   [<c0165c80>] (handle_irq_event_percpu) from [<c0165cd4>] (handle_irq_event+0x48/0x6c)
   [<c0165cd4>] (handle_irq_event) from [<c0169664>] (handle_fasteoi_irq+0xc8/0x198)
   [<c0169664>] (handle_fasteoi_irq) from [<c016529c>] (__handle_domain_irq+0x90/0xe8)
   [<c016529c>] (__handle_domain_irq) from [<c01014bc>] (gic_handle_irq+0x58/0x9c)
   [<c01014bc>] (gic_handle_irq) from [<c010752c>] (__irq_svc+0x6c/0x90)
   Exception stack(0xc0a01de8 to 0xc0a01e30)
   1de0:                   00002080 c0a6fbc0 00000000 00000000 00000000 c096d294
   1e00: 00000000 00000001 dc406400 f03ff100 00000082 c0a01e94 c0a6fbc0 c0a01e38
   1e20: 00200102 c01015bc 60000113 ffffffff
   [<c010752c>] (__irq_svc) from [<c01015bc>] (__do_softirq+0xbc/0x358)
   [<c01015bc>] (__do_softirq) from [<c011c798>] (irq_exit+0xb8/0xec)
   [<c011c798>] (irq_exit) from [<c01652a0>] (__handle_domain_irq+0x94/0xe8)
   [<c01652a0>] (__handle_domain_irq) from [<c01014bc>] (gic_handle_irq+0x58/0x9c)
   [<c01014bc>] (gic_handle_irq) from [<c010752c>] (__irq_svc+0x6c/0x90)
   Exception stack(0xc0a01ef8 to 0xc0a01f40)
   1ee0:                                                       00000000 000003ae
   1f00: dcc0f338 c0111060 c0a00000 c0a0cc44 c0a0cbe4 c0a1c22b c07bc218 00000001
   1f20: dcffca40 c0a01f54 c0a01f58 c0a01f48 c0103524 c0103528 60000013 ffffffff
   [<c010752c>] (__irq_svc) from [<c0103528>] (arch_cpu_idle+0x48/0x4c)
   [<c0103528>] (arch_cpu_idle) from [<c0681390>] (default_idle_call+0x30/0x3c)
   [<c0681390>] (default_idle_call) from [<c0156f24>] (do_idle+0xc8/0x134)
   [<c0156f24>] (do_idle) from [<c015722c>] (cpu_startup_entry+0x28/0x2c)
   [<c015722c>] (cpu_startup_entry) from [<c067ad74>] (rest_init+0x84/0x88)
   [<c067ad74>] (rest_init) from [<c0900d44>] (start_kernel+0x388/0x394)
   [<c0900d44>] (start_kernel) from [<0000807c>] (0x807c)
   handlers:
   [<c041c5dc>] npcm7xx_kcs_irq
   Disabling IRQ #30

It needs to change the returned status from -ENODEV to 0. The -ENODEV
was originally used to tell the low level IRQ handler that no user was
running, but not consider the IRQ handling desgin.

And multiple KCS channels share one IRQ handler, it needs to check the
IBF flag before doing force abort. If the IBF is set, after handling,
return 0 to low level IRQ handler to indicate that the IRQ is handled.

Signed-off-by: Haiyue Wang <haiyue.wang@linux.intel.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/kcs_bmc.c | 33 +++++++++++----------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/drivers/char/ipmi/kcs_bmc.c b/drivers/char/ipmi/kcs_bmc.c
index fbfc05e3f3d1..bb882ab161fe 100644
--- a/drivers/char/ipmi/kcs_bmc.c
+++ b/drivers/char/ipmi/kcs_bmc.c
@@ -210,34 +210,23 @@ static void kcs_bmc_handle_cmd(struct kcs_bmc *kcs_bmc)
 int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc)
 {
 	unsigned long flags;
-	int ret = 0;
+	int ret = -ENODATA;
 	u8 status;
 
 	spin_lock_irqsave(&kcs_bmc->lock, flags);
 
-	if (!kcs_bmc->running) {
-		kcs_force_abort(kcs_bmc);
-		ret = -ENODEV;
-		goto out_unlock;
+	status = read_status(kcs_bmc);
+	if (status & KCS_STATUS_IBF) {
+		if (!kcs_bmc->running)
+			kcs_force_abort(kcs_bmc);
+		else if (status & KCS_STATUS_CMD_DAT)
+			kcs_bmc_handle_cmd(kcs_bmc);
+		else
+			kcs_bmc_handle_data(kcs_bmc);
+
+		ret = 0;
 	}
 
-	status = read_status(kcs_bmc) & (KCS_STATUS_IBF | KCS_STATUS_CMD_DAT);
-
-	switch (status) {
-	case KCS_STATUS_IBF | KCS_STATUS_CMD_DAT:
-		kcs_bmc_handle_cmd(kcs_bmc);
-		break;
-
-	case KCS_STATUS_IBF:
-		kcs_bmc_handle_data(kcs_bmc);
-		break;
-
-	default:
-		ret = -ENODATA;
-		break;
-	}
-
-out_unlock:
 	spin_unlock_irqrestore(&kcs_bmc->lock, flags);
 
 	return ret;

From 49a6ec5b807ea4ad7ebe1f58080ebb8497cb2d2c Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@zonque.org>
Date: Sun, 17 Jun 2018 13:53:09 +0200
Subject: [PATCH 037/382] ARM: dts: am437x: make edt-ft5x06 a wakeup source

The touchscreen driver no longer configures the device as wakeup source by
default. A "wakeup-source" property is needed.

Signed-off-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am437x-sk-evm.dts | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts
index 440351ad0b80..d4be3fd0b6f4 100644
--- a/arch/arm/boot/dts/am437x-sk-evm.dts
+++ b/arch/arm/boot/dts/am437x-sk-evm.dts
@@ -610,6 +610,8 @@ edt-ft5306@38 {
 
 		touchscreen-size-x = <480>;
 		touchscreen-size-y = <272>;
+
+		wakeup-source;
 	};
 
 	tlv320aic3106: tlv320aic3106@1b {

From 891f6a726cacbb87e5b06076693ffab53bd378d7 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 21 Jun 2018 14:49:38 +0200
Subject: [PATCH 038/382] s390: Correct register corruption in critical section
 cleanup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the critical section cleanup we must not mess with r1.  For march=z9
or older, larl + ex (instead of exrl) are used with r1 as a temporary
register. This can clobber r1 in several interrupt handlers. Fix this by
using r11 as a temp register.  r11 is being saved by all callers of
cleanup_critical.

Fixes: 6dd85fbb87 ("s390: move expoline assembler macros to a header")
Cc: stable@vger.kernel.org #v4.16
Reported-by: Oliver Kurz <okurz@suse.com>
Reported-by: Petr Tesařík <ptesarik@suse.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/entry.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index f03402efab4b..3891805bfcdd 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -1265,7 +1265,7 @@ cleanup_critical:
 	jl	0f
 	clg	%r9,BASED(.Lcleanup_table+104)	# .Lload_fpu_regs_end
 	jl	.Lcleanup_load_fpu_regs
-0:	BR_EX	%r14
+0:	BR_EX	%r14,%r11
 
 	.align	8
 .Lcleanup_table:
@@ -1301,7 +1301,7 @@ cleanup_critical:
 	ni	__SIE_PROG0C+3(%r9),0xfe	# no longer in SIE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	larl	%r9,sie_exit			# skip forward to sie_exit
-	BR_EX	%r14
+	BR_EX	%r14,%r11
 #endif
 
 .Lcleanup_system_call:

From 68d676a089625daed9d61df56ac63b5cd7189efe Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 22 Jun 2018 11:56:56 -0700
Subject: [PATCH 039/382] nfp: bpf: don't stop offload if replace failed

Stopping offload completely if replace of program failed dates
back to days of transparent offload.  Back then we wanted to
silently fall back to the in-driver processing.  Today we mark
programs for offload when they are loaded into the kernel, so
the transparent offload is no longer a reality.

Flags check in the driver will only allow replace of a driver
program with another driver program or an offload program with
another offload program.

When driver program is replaced stopping offload is a no-op,
because driver program isn't offloaded.  When replacing
offloaded program if the offload fails the entire operation
will fail all the way back to user space and we should continue
using the old program.  IOW when replacing a driver program
stopping offload is unnecessary and when replacing offloaded
program - it's a bug, old program should continue to run.

In practice this bug would mean that if offload operation was to
fail (either due to FW communication error, kernel OOM or new
program being offloaded but for a different netdev) driver
would continue reporting that previous XDP program is offloaded
but in fact no program will be loaded in hardware.  The failure
is fairly unlikely (found by inspection, when working on the code)
but it's unpleasant.

Backport note: even though the bug was introduced in commit
cafa92ac2553 ("nfp: bpf: add support for XDP_FLAGS_HW_MODE"),
this fix depends on commit 441a33031fe5 ("net: xdp: don't allow
device-bound programs in driver mode"), so this fix is sufficient
only in v4.15 or newer.  Kernels v4.13.x and v4.14.x do need to
stop offload if it was transparent/opportunistic, i.e. if
XDP_FLAGS_HW_MODE was not set on running program.

Fixes: cafa92ac2553 ("nfp: bpf: add support for XDP_FLAGS_HW_MODE")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/net/ethernet/netronome/nfp/bpf/main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index fcdfb8e7fdea..7184af94aa53 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -81,10 +81,10 @@ nfp_bpf_xdp_offload(struct nfp_app *app, struct nfp_net *nn,
 
 	ret = nfp_net_bpf_offload(nn, prog, running, extack);
 	/* Stop offload if replace not possible */
-	if (ret && prog)
-		nfp_bpf_xdp_offload(app, nn, NULL, extack);
+	if (ret)
+		return ret;
 
-	nn->dp.bpf_offload_xdp = prog && !ret;
+	nn->dp.bpf_offload_xdp = !!prog;
 	return ret;
 }
 

From 560b423dd9af4272a1f3685c2d6b073fdc4af7c7 Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Mon, 18 Jun 2018 06:52:58 +0900
Subject: [PATCH 040/382] openrisc: Call destructor during __pte_free_tlb

This fixes an issue uncovered when a recent change to add the "page
table" flag was merged.  During bootup we see many errors like the
following:

    BUG: Bad page state in process mkdir  pfn:00bae
    page:c1ff15c0 count:0 mapcount:-1024 mapping:00000000 index:0x0
    flags: 0x0()
    raw: 00000000 00000000 00000000 fffffbff 00000000 00000100 00000200 00000000
    page dumped because: nonzero mapcount
    Modules linked in:
    CPU: 0 PID: 46 Comm: mkdir Tainted: G    B             4.17.0-simple-smp-07461-g1d40a5ea01d5-dirty #993
    Call trace:
    [<(ptrval)>] show_stack+0x44/0x54
    [<(ptrval)>] dump_stack+0xb0/0xe8
    [<(ptrval)>] bad_page+0x138/0x174
    [<(ptrval)>] ? cpumask_next+0x24/0x34
    [<(ptrval)>] free_pages_check_bad+0x6c/0xd0
    [<(ptrval)>] free_pcppages_bulk+0x174/0x42c
    [<(ptrval)>] free_unref_page_commit.isra.17+0xb8/0xc8
    [<(ptrval)>] free_unref_page_list+0x10c/0x190
    [<(ptrval)>] ? set_reset_devices+0x0/0x2c
    [<(ptrval)>] release_pages+0x3a0/0x414
    [<(ptrval)>] tlb_flush_mmu_free+0x5c/0x90
    [<(ptrval)>] tlb_flush_mmu+0x90/0xa4
    [<(ptrval)>] arch_tlb_finish_mmu+0x50/0x94
    [<(ptrval)>] tlb_finish_mmu+0x30/0x64
    [<(ptrval)>] exit_mmap+0x110/0x1e0
    [<(ptrval)>] mmput+0x50/0xf0
    [<(ptrval)>] do_exit+0x274/0xa94
    [<(ptrval)>] do_group_exit+0x50/0x110
    [<(ptrval)>] __wake_up_parent+0x0/0x38
    [<(ptrval)>] _syscall_return+0x0/0x4

During the __pte_free_tlb path openrisc fails to call the page
destructor which would clear the new bits that were introduced.
To fix this we are calling the destructor.

It seem openrisc was the only architecture missing this,  all other
architectures either call the destructor like we are doing here or use
pte_free.

Note: failing to call the destructor was also messing up the zone stats
(and will be cause other problems if you were using SPLIT_PTE_PTLOCKS,
which we are not yet).

Fixes: 1d40a5ea01d53 ("mm: mark pages in use for page tables")
Acked-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/include/asm/pgalloc.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index 3e1a46615120..8999b9226512 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -98,8 +98,12 @@ static inline void pte_free(struct mm_struct *mm, struct page *pte)
 	__free_page(pte);
 }
 
+#define __pte_free_tlb(tlb, pte, addr)	\
+do {					\
+	pgtable_page_dtor(pte);		\
+	tlb_remove_page((tlb), (pte));	\
+} while (0)
 
-#define __pte_free_tlb(tlb, pte, addr) tlb_remove_page((tlb), (pte))
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
 #define check_pgt_cache()          do { } while (0)

From 8f732850df1b2b4d8d719f7e606dfb3050e7ea11 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Thu, 31 May 2018 13:49:29 +0200
Subject: [PATCH 041/382] HID: core: allow concurrent registration of drivers

Detected on the Dell XPS 9365.

The laptop has 2 devices that benefit from the hid-generic auto-unbinding.
When those 2 devices are presented to the userspace, udev loads both wacom and
hid-multitouch. When this happens, the code in __hid_bus_reprobe_drivers() is
called concurrently and the second device gets reprobed twice.

An other bug in the power_supply subsystem prevent to remove the wacom driver
if it just finished its initialization, which basically kills the wacom node.

[jkosina@suse.cz: reformat changelog a bit]
Fixes c17a7476e4c4 ("HID: core: rewrite the hid-generic automatic unbind")
Cc: stable@vger.kernel.org # v4.17
Tested-by: Mario Limonciello <mario.limonciello@dell.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 5 ++++-
 include/linux/hid.h    | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 355dc7e49562..a460ec147aee 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1949,6 +1949,8 @@ static int hid_device_probe(struct device *dev)
 	}
 	hdev->io_started = false;
 
+	clear_bit(ffs(HID_STAT_REPROBED), &hdev->status);
+
 	if (!hdev->driver) {
 		id = hid_match_device(hdev, hdrv);
 		if (id == NULL) {
@@ -2212,7 +2214,8 @@ static int __hid_bus_reprobe_drivers(struct device *dev, void *data)
 	struct hid_device *hdev = to_hid_device(dev);
 
 	if (hdev->driver == hdrv &&
-	    !hdrv->match(hdev, hid_ignore_special_drivers))
+	    !hdrv->match(hdev, hid_ignore_special_drivers) &&
+	    !test_and_set_bit(ffs(HID_STAT_REPROBED), &hdev->status))
 		return device_reprobe(dev);
 
 	return 0;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 41a3d5775394..773bcb1d4044 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -511,6 +511,7 @@ struct hid_output_fifo {
 #define HID_STAT_ADDED		BIT(0)
 #define HID_STAT_PARSED		BIT(1)
 #define HID_STAT_DUP_DETECTED	BIT(2)
+#define HID_STAT_REPROBED	BIT(3)
 
 struct hid_input {
 	struct list_head list;
@@ -579,7 +580,7 @@ struct hid_device {							/* device report descriptor */
 	bool battery_avoid_query;
 #endif
 
-	unsigned int status;						/* see STAT flags above */
+	unsigned long status;						/* see STAT flags above */
 	unsigned claimed;						/* Claimed by hidinput, hiddev? */
 	unsigned quirks;						/* Various quirks the device can pull on us */
 	bool io_started;						/* If IO has started */

From 7b72717a20bba8bdd01b14c0460be7d15061cd6b Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Thu, 21 Jun 2018 07:43:21 -0700
Subject: [PATCH 042/382] iw_cxgb4: correctly enforce the max reg_mr depth

The code was mistakenly using the length of the page array memory instead
of the depth of the page array.

This would cause MR creation to fail in some cases.

Fixes: 8376b86de7d3 ("iw_cxgb4: Support the new memory registration API")
Cc: stable@vger.kernel.org
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/cxgb4/mem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 1445918e3239..7b76e6f81aeb 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -774,7 +774,7 @@ static int c4iw_set_page(struct ib_mr *ibmr, u64 addr)
 {
 	struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
 
-	if (unlikely(mhp->mpl_len == mhp->max_mpl_len))
+	if (unlikely(mhp->mpl_len == mhp->attr.pbl_size))
 		return -ENOMEM;
 
 	mhp->mpl[mhp->mpl_len++] = addr;

From 940efcc8889f0d15567eb07fc9fd69b06e366aa5 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sun, 24 Jun 2018 11:23:42 +0300
Subject: [PATCH 043/382] RDMA/uverbs: Protect from attempts to create flows on
 unsupported QP

Flows can be created on UD and RAW_PACKET QP types. Attempts to provide
other QP types as an input causes to various unpredictable failures.

The reason is that in order to support all various types (e.g. XRC), we
are supposed to use real_qp handle and not qp handle and expect to
driver/FW to fail such (XRC) flows. The simpler and safer variant is to
ban all QP types except UD and RAW_PACKET, instead of relying on
driver/FW.

Cc: <stable@vger.kernel.org> # 3.11
Fixes: 436f2ad05a0b ("IB/core: Export ib_create/destroy_flow through uverbs")
Cc: syzkaller <syzkaller@googlegroups.com>
Reported-by: Noa Osherovich <noaos@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 3e90b6a1d9d2..89c4ce2da78b 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -3559,6 +3559,11 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 		goto err_uobj;
 	}
 
+	if (qp->qp_type != IB_QPT_UD && qp->qp_type != IB_QPT_RAW_PACKET) {
+		err = -EINVAL;
+		goto err_put;
+	}
+
 	flow_attr = kzalloc(struct_size(flow_attr, flows,
 				cmd.flow_attr.num_of_specs), GFP_KERNEL);
 	if (!flow_attr) {

From 4fae7f170416f970e5655f7e945ce69286b1c4ff Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sun, 24 Jun 2018 11:23:53 +0300
Subject: [PATCH 044/382] RDMA/uverbs: Fix slab-out-of-bounds in
 ib_uverbs_ex_create_flow

The check of cmd.flow_attr.size should check into account the size of the
reserved field (2 bytes), otherwise user can provide a size which will
cause a slab-out-of-bounds warning below.

==================================================================
BUG: KASAN: slab-out-of-bounds in ib_uverbs_ex_create_flow+0x1740/0x1d00
Read of size 2 at addr ffff880068dff1a6 by task syz-executor775/269

CPU: 0 PID: 269 Comm: syz-executor775 Not tainted 4.18.0-rc1+ #245
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
rel-1.11.0-0-g63451fca13-prebuilt.qemu-project.org 04/01/2014
Call Trace:
 dump_stack+0xef/0x17e
 print_address_description+0x83/0x3b0
 kasan_report+0x18d/0x4d0
 ib_uverbs_ex_create_flow+0x1740/0x1d00
 ib_uverbs_write+0x923/0x1010
 __vfs_write+0x10d/0x720
 vfs_write+0x1b0/0x550
 ksys_write+0xc6/0x1a0
 do_syscall_64+0xa7/0x590
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x433899
Code: fd ff 48 81 c4 80 00 00 00 e9 f1 fe ff ff 0f 1f 00 48 89 f8 48 89
f7 48 89 d6 48 89 ca 4d 89 c2 4d
89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 3b 91 fd ff c3 66
2e 0f 1f 84 00 00 00 00
RSP: 002b:00007ffc2724db58 EFLAGS: 00000217 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000020006880 RCX: 0000000000433899
RDX: 00000000000000e0 RSI: 0000000020002480 RDI: 0000000000000003
RBP: 00000000006d7018 R08: 00000000004002f8 R09: 00000000004002f8
R10: 00000000004002f8 R11: 0000000000000217 R12: 0000000000000000

R13: 000000000040cd20 R14: 000000000040cdb0 R15: 0000000000000006

Allocated by task 269:
 kasan_kmalloc+0xa0/0xd0
 __kmalloc+0x1a9/0x510
 ib_uverbs_ex_create_flow+0x26c/0x1d00
 ib_uverbs_write+0x923/0x1010
 __vfs_write+0x10d/0x720
 vfs_write+0x1b0/0x550
 ksys_write+0xc6/0x1a0
 do_syscall_64+0xa7/0x590
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Freed by task 0:
 __kasan_slab_free+0x12e/0x180
 kfree+0x159/0x630
 detach_buf+0x559/0x7a0
 virtqueue_get_buf_ctx+0x3cc/0xab0
 virtblk_done+0x1eb/0x3d0
 vring_interrupt+0x16d/0x2b0
 __handle_irq_event_percpu+0x10a/0x980
 handle_irq_event_percpu+0x77/0x190
 handle_irq_event+0xc6/0x1a0
 handle_edge_irq+0x211/0xd80
 handle_irq+0x3d/0x60
 do_IRQ+0x9b/0x220

The buggy address belongs to the object at ffff880068dff180
 which belongs to the cache kmalloc-64 of size 64
The buggy address is located 38 bytes inside of
 64-byte region [ffff880068dff180, ffff880068dff1c0)
The buggy address belongs to the page:
page:ffffea0001a37fc0 count:1 mapcount:0 mapping:ffff88006c401780
index:0x0
flags: 0x4000000000000100(slab)
raw: 4000000000000100 ffffea0001a31100 0000001100000011 ffff88006c401780
raw: 0000000000000000 00000000802a002a 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff880068dff080: fb fb fb fb fc fc fc fc fb fb fb fb fb fb fb fb
 ffff880068dff100: fc fc fc fc fb fb fb fb fb fb fb fb fc fc fc fc
>ffff880068dff180: 00 00 00 00 07 fc fc fc fc fc fc fc fb fb fb fb
                               ^
 ffff880068dff200: fb fb fb fb fc fc fc fc 00 00 00 00 00 00 fc fc
 ffff880068dff280: fc fc fc fc 00 00 00 00 00 00 00 00 fc fc fc fc
==================================================================

Cc: <stable@vger.kernel.org> # 3.12
Fixes: f88482743872 ("IB/core: clarify overflow/underflow checks on ib_create/destroy_flow")
Cc: syzkaller <syzkaller@googlegroups.com>
Reported-by: Noa Osherovich <noaos@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 89c4ce2da78b..87ffeebc0b28 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -3488,8 +3488,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 	struct ib_flow_attr		  *flow_attr;
 	struct ib_qp			  *qp;
 	struct ib_uflow_resources	  *uflow_res;
+	struct ib_uverbs_flow_spec_hdr	  *kern_spec;
 	int err = 0;
-	void *kern_spec;
 	void *ib_spec;
 	int i;
 
@@ -3538,8 +3538,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 		if (!kern_flow_attr)
 			return -ENOMEM;
 
-		memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
-		err = ib_copy_from_udata(kern_flow_attr + 1, ucore,
+		*kern_flow_attr = cmd.flow_attr;
+		err = ib_copy_from_udata(&kern_flow_attr->flow_specs, ucore,
 					 cmd.flow_attr.size);
 		if (err)
 			goto err_free_attr;
@@ -3583,21 +3583,22 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 	flow_attr->flags = kern_flow_attr->flags;
 	flow_attr->size = sizeof(*flow_attr);
 
-	kern_spec = kern_flow_attr + 1;
+	kern_spec = kern_flow_attr->flow_specs;
 	ib_spec = flow_attr + 1;
 	for (i = 0; i < flow_attr->num_of_specs &&
-	     cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
-	     cmd.flow_attr.size >=
-	     ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
-		err = kern_spec_to_ib_spec(file->ucontext, kern_spec, ib_spec,
-					   uflow_res);
+			cmd.flow_attr.size > sizeof(*kern_spec) &&
+			cmd.flow_attr.size >= kern_spec->size;
+	     i++) {
+		err = kern_spec_to_ib_spec(
+				file->ucontext, (struct ib_uverbs_flow_spec *)kern_spec,
+				ib_spec, uflow_res);
 		if (err)
 			goto err_free;
 
 		flow_attr->size +=
 			((union ib_flow_spec *) ib_spec)->size;
-		cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size;
-		kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size;
+		cmd.flow_attr.size -= kern_spec->size;
+		kern_spec = ((void *)kern_spec) + kern_spec->size;
 		ib_spec += ((union ib_flow_spec *) ib_spec)->size;
 	}
 	if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) {

From 0613de37373bbbc747d434f643620472bd13303b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 2 Jun 2018 10:52:56 +0300
Subject: [PATCH 045/382] clk: davinci: cfgchip: testing the wrong variable

There is a copy and paste bug here.  We should be testing "usb1" instead
of "usb0".

Fixes: 58e1e2d2cd89 ("clk: davinci: cfgchip: Add TI DA8XX USB PHY clocks")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David Lechner <david@lechnology.com>
---
 drivers/clk/davinci/da8xx-cfgchip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/davinci/da8xx-cfgchip.c b/drivers/clk/davinci/da8xx-cfgchip.c
index aae62a5b8734..d1bbee19ed0f 100644
--- a/drivers/clk/davinci/da8xx-cfgchip.c
+++ b/drivers/clk/davinci/da8xx-cfgchip.c
@@ -672,7 +672,7 @@ static int of_da8xx_usb_phy_clk_init(struct device *dev, struct regmap *regmap)
 
 	usb1 = da8xx_cfgchip_register_usb1_clk48(dev, regmap);
 	if (IS_ERR(usb1)) {
-		if (PTR_ERR(usb0) == -EPROBE_DEFER)
+		if (PTR_ERR(usb1) == -EPROBE_DEFER)
 			return -EPROBE_DEFER;
 
 		dev_warn(dev, "Failed to register usb1_clk48 (%ld)\n",

From 176cbf5f62e7a0aa0180af4826da9e7da1359988 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Mon, 25 Jun 2018 19:25:50 +0200
Subject: [PATCH 046/382] clk: davinci: fix a typo (which leads to build
 failures)

This should have been DM365, not DM356.

Fixes: 4eff0bebf4ed5 ("clk: davinci: Fix link errors when not all SoCs are enabled")
Cc: stable@vger.kernel.org
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David Lechner <david@lechnology.com>
---
 drivers/clk/davinci/psc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/davinci/psc.h b/drivers/clk/davinci/psc.h
index 6a42529d31a9..cc5614567a70 100644
--- a/drivers/clk/davinci/psc.h
+++ b/drivers/clk/davinci/psc.h
@@ -107,7 +107,7 @@ extern const struct davinci_psc_init_data of_da850_psc1_init_data;
 #ifdef CONFIG_ARCH_DAVINCI_DM355
 extern const struct davinci_psc_init_data dm355_psc_init_data;
 #endif
-#ifdef CONFIG_ARCH_DAVINCI_DM356
+#ifdef CONFIG_ARCH_DAVINCI_DM365
 extern const struct davinci_psc_init_data dm365_psc_init_data;
 #endif
 #ifdef CONFIG_ARCH_DAVINCI_DM644x

From 2203d8a76ea3b962559acfe3bb80eddfdfbc9cee Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Mon, 25 Jun 2018 08:44:51 +0200
Subject: [PATCH 047/382] sample/mdev/mbochs: remove mbochs_kmap_atomic_dmabuf

Atomic mapping interface for dmabufs will be removed.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 samples/vfio-mdev/mbochs.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
index 2960e26c6ea4..aa25cda21d22 100644
--- a/samples/vfio-mdev/mbochs.c
+++ b/samples/vfio-mdev/mbochs.c
@@ -803,15 +803,6 @@ static void mbochs_release_dmabuf(struct dma_buf *buf)
 	mutex_unlock(&mdev_state->ops_lock);
 }
 
-static void *mbochs_kmap_atomic_dmabuf(struct dma_buf *buf,
-				       unsigned long page_num)
-{
-	struct mbochs_dmabuf *dmabuf = buf->priv;
-	struct page *page = dmabuf->pages[page_num];
-
-	return kmap_atomic(page);
-}
-
 static void *mbochs_kmap_dmabuf(struct dma_buf *buf, unsigned long page_num)
 {
 	struct mbochs_dmabuf *dmabuf = buf->priv;
@@ -824,7 +815,6 @@ static struct dma_buf_ops mbochs_dmabuf_ops = {
 	.map_dma_buf	  = mbochs_map_dmabuf,
 	.unmap_dma_buf	  = mbochs_unmap_dmabuf,
 	.release	  = mbochs_release_dmabuf,
-	.map_atomic	  = mbochs_kmap_atomic_dmabuf,
 	.map		  = mbochs_kmap_dmabuf,
 	.mmap		  = mbochs_mmap_dmabuf,
 };

From 7733e05b34f5d855c5590d816b41ddfee972d188 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Mon, 25 Jun 2018 08:44:52 +0200
Subject: [PATCH 048/382] sample/mdev/mbochs: add mbochs_kunmap_dmabuf

There is no default implementation for dma_buf_ops->unmap.
So add a function unmapping the page, otherwise we'll leak them.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 samples/vfio-mdev/mbochs.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
index aa25cda21d22..85ac6037696f 100644
--- a/samples/vfio-mdev/mbochs.c
+++ b/samples/vfio-mdev/mbochs.c
@@ -811,11 +811,18 @@ static void *mbochs_kmap_dmabuf(struct dma_buf *buf, unsigned long page_num)
 	return kmap(page);
 }
 
+static void mbochs_kunmap_dmabuf(struct dma_buf *buf, unsigned long page_num,
+				 void *vaddr)
+{
+	kunmap(vaddr);
+}
+
 static struct dma_buf_ops mbochs_dmabuf_ops = {
 	.map_dma_buf	  = mbochs_map_dmabuf,
 	.unmap_dma_buf	  = mbochs_unmap_dmabuf,
 	.release	  = mbochs_release_dmabuf,
 	.map		  = mbochs_kmap_dmabuf,
+	.unmap		  = mbochs_kunmap_dmabuf,
 	.mmap		  = mbochs_mmap_dmabuf,
 };
 

From 0ee1f4734967af8321ecebaf9c74221ace34f2d5 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 25 Jun 2018 09:26:27 +0200
Subject: [PATCH 049/382] r8152: napi hangup fix after disconnect

When unplugging an r8152 adapter while the interface is UP, the NIC
becomes unusable.  usb->disconnect (aka rtl8152_disconnect) deletes
napi. Then, rtl8152_disconnect calls unregister_netdev and that invokes
netdev->ndo_stop (aka rtl8152_close). rtl8152_close tries to
napi_disable, but the napi is already deleted by disconnect above. So
the first while loop in napi_disable never finishes. This results in
complete deadlock of the network layer as there is rtnl_mutex held by
unregister_netdev.

So avoid the call to napi_disable in rtl8152_close when the device is
already gone.

The other calls to usb_kill_urb, cancel_delayed_work_sync,
netif_stop_queue etc. seem to be fine. The urb and netdev is not
destroyed yet.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: linux-usb@vger.kernel.org
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 86f7196f9d91..2a58607a6aea 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -3962,7 +3962,8 @@ static int rtl8152_close(struct net_device *netdev)
 #ifdef CONFIG_PM_SLEEP
 	unregister_pm_notifier(&tp->pm_notifier);
 #endif
-	napi_disable(&tp->napi);
+	if (!test_bit(RTL8152_UNPLUG, &tp->flags))
+		napi_disable(&tp->napi);
 	clear_bit(WORK_ENABLE, &tp->flags);
 	usb_kill_urb(tp->intr_urb);
 	cancel_delayed_work_sync(&tp->schedule);

From fdb5c4531c1e0e50e609df83f736b6f3a02896e2 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Tue, 19 Jun 2018 00:04:24 +0100
Subject: [PATCH 050/382] bpf: fix attach type BPF_LIRC_MODE2 dependency wrt
 CONFIG_CGROUP_BPF

If the kernel is compiled with CONFIG_CGROUP_BPF not enabled, it is not
possible to attach, detach or query IR BPF programs to /dev/lircN devices,
making them impossible to use. For embedded devices, it should be possible
to use IR decoding without cgroups or CONFIG_CGROUP_BPF enabled.

This change requires some refactoring, since bpf_prog_{attach,detach,query}
functions are now always compiled, but their code paths for cgroups need
moving out. Rather than a #ifdef CONFIG_CGROUP_BPF in kernel/bpf/syscall.c,
moving them to kernel/bpf/cgroup.c and kernel/bpf/sockmap.c does not
require #ifdefs since that is already conditionally compiled.

Fixes: f4364dcfc86d ("media: rc: introduce BPF_PROG_LIRC_MODE2")
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/media/rc/bpf-lirc.c | 14 +-----
 include/linux/bpf-cgroup.h  | 26 ++++++++++
 include/linux/bpf.h         |  8 +++
 include/linux/bpf_lirc.h    |  5 +-
 kernel/bpf/cgroup.c         | 54 ++++++++++++++++++++
 kernel/bpf/sockmap.c        | 18 +++++++
 kernel/bpf/syscall.c        | 99 ++++++++-----------------------------
 7 files changed, 132 insertions(+), 92 deletions(-)

diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index 40826bba06b6..fcfab6635f9c 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -207,29 +207,19 @@ void lirc_bpf_free(struct rc_dev *rcdev)
 	bpf_prog_array_free(rcdev->raw->progs);
 }
 
-int lirc_prog_attach(const union bpf_attr *attr)
+int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 {
-	struct bpf_prog *prog;
 	struct rc_dev *rcdev;
 	int ret;
 
 	if (attr->attach_flags)
 		return -EINVAL;
 
-	prog = bpf_prog_get_type(attr->attach_bpf_fd,
-				 BPF_PROG_TYPE_LIRC_MODE2);
-	if (IS_ERR(prog))
-		return PTR_ERR(prog);
-
 	rcdev = rc_dev_get_from_fd(attr->target_fd);
-	if (IS_ERR(rcdev)) {
-		bpf_prog_put(prog);
+	if (IS_ERR(rcdev))
 		return PTR_ERR(rcdev);
-	}
 
 	ret = lirc_bpf_attach(rcdev, prog);
-	if (ret)
-		bpf_prog_put(prog);
 
 	put_device(&rcdev->dev);
 
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 975fb4cf1bb7..79795c5fa7c3 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -188,12 +188,38 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 									      \
 	__ret;								      \
 })
+int cgroup_bpf_prog_attach(const union bpf_attr *attr,
+			   enum bpf_prog_type ptype, struct bpf_prog *prog);
+int cgroup_bpf_prog_detach(const union bpf_attr *attr,
+			   enum bpf_prog_type ptype);
+int cgroup_bpf_prog_query(const union bpf_attr *attr,
+			  union bpf_attr __user *uattr);
 #else
 
+struct bpf_prog;
 struct cgroup_bpf {};
 static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
 static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
 
+static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
+					 enum bpf_prog_type ptype,
+					 struct bpf_prog *prog)
+{
+	return -EINVAL;
+}
+
+static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr,
+					 enum bpf_prog_type ptype)
+{
+	return -EINVAL;
+}
+
+static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
+					union bpf_attr __user *uattr)
+{
+	return -EINVAL;
+}
+
 #define cgroup_bpf_enabled (0)
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7df32a3200f7..8827e797ff97 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -696,6 +696,8 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
 struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
 struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key);
 int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
+int sockmap_get_from_fd(const union bpf_attr *attr, int type,
+			struct bpf_prog *prog);
 #else
 static inline struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
 {
@@ -714,6 +716,12 @@ static inline int sock_map_prog(struct bpf_map *map,
 {
 	return -EOPNOTSUPP;
 }
+
+static inline int sockmap_get_from_fd(const union bpf_attr *attr, int type,
+				      struct bpf_prog *prog)
+{
+	return -EINVAL;
+}
 #endif
 
 #if defined(CONFIG_XDP_SOCKETS)
diff --git a/include/linux/bpf_lirc.h b/include/linux/bpf_lirc.h
index 5f8a4283092d..9d9ff755ec29 100644
--- a/include/linux/bpf_lirc.h
+++ b/include/linux/bpf_lirc.h
@@ -5,11 +5,12 @@
 #include <uapi/linux/bpf.h>
 
 #ifdef CONFIG_BPF_LIRC_MODE2
-int lirc_prog_attach(const union bpf_attr *attr);
+int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 int lirc_prog_detach(const union bpf_attr *attr);
 int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr);
 #else
-static inline int lirc_prog_attach(const union bpf_attr *attr)
+static inline int lirc_prog_attach(const union bpf_attr *attr,
+				   struct bpf_prog *prog)
 {
 	return -EINVAL;
 }
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index f7c00bd6f8e4..3d83ee7df381 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -428,6 +428,60 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 	return ret;
 }
 
+int cgroup_bpf_prog_attach(const union bpf_attr *attr,
+			   enum bpf_prog_type ptype, struct bpf_prog *prog)
+{
+	struct cgroup *cgrp;
+	int ret;
+
+	cgrp = cgroup_get_from_fd(attr->target_fd);
+	if (IS_ERR(cgrp))
+		return PTR_ERR(cgrp);
+
+	ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
+				attr->attach_flags);
+	cgroup_put(cgrp);
+	return ret;
+}
+
+int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
+{
+	struct bpf_prog *prog;
+	struct cgroup *cgrp;
+	int ret;
+
+	cgrp = cgroup_get_from_fd(attr->target_fd);
+	if (IS_ERR(cgrp))
+		return PTR_ERR(cgrp);
+
+	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+	if (IS_ERR(prog))
+		prog = NULL;
+
+	ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
+	if (prog)
+		bpf_prog_put(prog);
+
+	cgroup_put(cgrp);
+	return ret;
+}
+
+int cgroup_bpf_prog_query(const union bpf_attr *attr,
+			  union bpf_attr __user *uattr)
+{
+	struct cgroup *cgrp;
+	int ret;
+
+	cgrp = cgroup_get_from_fd(attr->query.target_fd);
+	if (IS_ERR(cgrp))
+		return PTR_ERR(cgrp);
+
+	ret = cgroup_bpf_query(cgrp, attr, uattr);
+
+	cgroup_put(cgrp);
+	return ret;
+}
+
 /**
  * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
  * @sk: The socket sending or receiving traffic
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 52a91d816c0e..81d0c55a77aa 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -1915,6 +1915,24 @@ int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
 	return 0;
 }
 
+int sockmap_get_from_fd(const union bpf_attr *attr, int type,
+			struct bpf_prog *prog)
+{
+	int ufd = attr->target_fd;
+	struct bpf_map *map;
+	struct fd f;
+	int err;
+
+	f = fdget(ufd);
+	map = __bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	err = sock_map_prog(map, prog, attr->attach_type);
+	fdput(f);
+	return err;
+}
+
 static void *sock_map_lookup(struct bpf_map *map, void *key)
 {
 	return NULL;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 35dc466641f2..d10ecd78105f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1483,8 +1483,6 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 	return err;
 }
 
-#ifdef CONFIG_CGROUP_BPF
-
 static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
 					     enum bpf_attach_type attach_type)
 {
@@ -1499,40 +1497,6 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
 
 #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
 
-static int sockmap_get_from_fd(const union bpf_attr *attr,
-			       int type, bool attach)
-{
-	struct bpf_prog *prog = NULL;
-	int ufd = attr->target_fd;
-	struct bpf_map *map;
-	struct fd f;
-	int err;
-
-	f = fdget(ufd);
-	map = __bpf_map_get(f);
-	if (IS_ERR(map))
-		return PTR_ERR(map);
-
-	if (attach) {
-		prog = bpf_prog_get_type(attr->attach_bpf_fd, type);
-		if (IS_ERR(prog)) {
-			fdput(f);
-			return PTR_ERR(prog);
-		}
-	}
-
-	err = sock_map_prog(map, prog, attr->attach_type);
-	if (err) {
-		fdput(f);
-		if (prog)
-			bpf_prog_put(prog);
-		return err;
-	}
-
-	fdput(f);
-	return 0;
-}
-
 #define BPF_F_ATTACH_MASK \
 	(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
 
@@ -1540,7 +1504,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 {
 	enum bpf_prog_type ptype;
 	struct bpf_prog *prog;
-	struct cgroup *cgrp;
 	int ret;
 
 	if (!capable(CAP_NET_ADMIN))
@@ -1577,12 +1540,15 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 		ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
 		break;
 	case BPF_SK_MSG_VERDICT:
-		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, true);
+		ptype = BPF_PROG_TYPE_SK_MSG;
+		break;
 	case BPF_SK_SKB_STREAM_PARSER:
 	case BPF_SK_SKB_STREAM_VERDICT:
-		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true);
+		ptype = BPF_PROG_TYPE_SK_SKB;
+		break;
 	case BPF_LIRC_MODE2:
-		return lirc_prog_attach(attr);
+		ptype = BPF_PROG_TYPE_LIRC_MODE2;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1596,18 +1562,20 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 		return -EINVAL;
 	}
 
-	cgrp = cgroup_get_from_fd(attr->target_fd);
-	if (IS_ERR(cgrp)) {
-		bpf_prog_put(prog);
-		return PTR_ERR(cgrp);
+	switch (ptype) {
+	case BPF_PROG_TYPE_SK_SKB:
+	case BPF_PROG_TYPE_SK_MSG:
+		ret = sockmap_get_from_fd(attr, ptype, prog);
+		break;
+	case BPF_PROG_TYPE_LIRC_MODE2:
+		ret = lirc_prog_attach(attr, prog);
+		break;
+	default:
+		ret = cgroup_bpf_prog_attach(attr, ptype, prog);
 	}
 
-	ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
-				attr->attach_flags);
 	if (ret)
 		bpf_prog_put(prog);
-	cgroup_put(cgrp);
-
 	return ret;
 }
 
@@ -1616,9 +1584,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 static int bpf_prog_detach(const union bpf_attr *attr)
 {
 	enum bpf_prog_type ptype;
-	struct bpf_prog *prog;
-	struct cgroup *cgrp;
-	int ret;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -1651,29 +1616,17 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 		ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
 		break;
 	case BPF_SK_MSG_VERDICT:
-		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, false);
+		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, NULL);
 	case BPF_SK_SKB_STREAM_PARSER:
 	case BPF_SK_SKB_STREAM_VERDICT:
-		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false);
+		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL);
 	case BPF_LIRC_MODE2:
 		return lirc_prog_detach(attr);
 	default:
 		return -EINVAL;
 	}
 
-	cgrp = cgroup_get_from_fd(attr->target_fd);
-	if (IS_ERR(cgrp))
-		return PTR_ERR(cgrp);
-
-	prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
-	if (IS_ERR(prog))
-		prog = NULL;
-
-	ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
-	if (prog)
-		bpf_prog_put(prog);
-	cgroup_put(cgrp);
-	return ret;
+	return cgroup_bpf_prog_detach(attr, ptype);
 }
 
 #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
@@ -1681,9 +1634,6 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 static int bpf_prog_query(const union bpf_attr *attr,
 			  union bpf_attr __user *uattr)
 {
-	struct cgroup *cgrp;
-	int ret;
-
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 	if (CHECK_ATTR(BPF_PROG_QUERY))
@@ -1711,14 +1661,9 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	default:
 		return -EINVAL;
 	}
-	cgrp = cgroup_get_from_fd(attr->query.target_fd);
-	if (IS_ERR(cgrp))
-		return PTR_ERR(cgrp);
-	ret = cgroup_bpf_query(cgrp, attr, uattr);
-	cgroup_put(cgrp);
-	return ret;
+
+	return cgroup_bpf_prog_query(attr, uattr);
 }
-#endif /* CONFIG_CGROUP_BPF */
 
 #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
 
@@ -2365,7 +2310,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_OBJ_GET:
 		err = bpf_obj_get(&attr);
 		break;
-#ifdef CONFIG_CGROUP_BPF
 	case BPF_PROG_ATTACH:
 		err = bpf_prog_attach(&attr);
 		break;
@@ -2375,7 +2319,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_PROG_QUERY:
 		err = bpf_prog_query(&attr, uattr);
 		break;
-#endif
 	case BPF_PROG_TEST_RUN:
 		err = bpf_prog_test_run(&attr, uattr);
 		break;

From 5fadfc61934a33d362ca47c7f70786105d5681c1 Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Mon, 25 Jun 2018 16:56:05 +0200
Subject: [PATCH 051/382] selftests: bpf: add missing NET_SCHED to config

CONFIG_NET_SCHED wasn't enabled in arm64's defconfig only for x86.

So bpf/test_tunnel.sh tests fails with:

  RTNETLINK answers: Operation not supported
  RTNETLINK answers: Operation not supported
  We have an error talking to the kernel, -1

Enable NET_SCHED and more tests pass.

Fixes: 3bce593ac06b ("selftests: bpf: config: add config fragments")
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 7eb613ffef55..b4994a94968b 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -6,6 +6,7 @@ CONFIG_TEST_BPF=m
 CONFIG_CGROUP_BPF=y
 CONFIG_NETDEVSIM=m
 CONFIG_NET_CLS_ACT=y
+CONFIG_NET_SCHED=y
 CONFIG_NET_SCH_INGRESS=y
 CONFIG_NET_IPIP=y
 CONFIG_IPV6=y

From b0e4b8bcd48bde31690f066044767475b2e4852b Mon Sep 17 00:00:00 2001
From: Jeffrin Jose T <ahiliation@gmail.com>
Date: Sat, 23 Jun 2018 00:24:17 +0530
Subject: [PATCH 052/382] selftests: bpf: notification about privilege required
 to run test_lirc_mode2.sh testing script

The test_lirc_mode2.sh script require root privilege for the successful
execution of the test.

This patch is to notify the user about the privilege the script
demands for the successful execution of the test.

Signed-off-by: Jeffrin Jose T (Rajagiri SET) <ahiliation@gmail.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_lirc_mode2.sh | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh
index ce2e15e4f976..677686198df3 100755
--- a/tools/testing/selftests/bpf/test_lirc_mode2.sh
+++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh
@@ -1,6 +1,15 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+msg="skip all tests:"
+if [ $UID != 0 ]; then
+	echo $msg please run this as root >&2
+	exit $ksft_skip
+fi
+
 GREEN='\033[0;92m'
 RED='\033[0;31m'
 NC='\033[0m' # No Color

From dd349c3ffd93d15d01f4f5ace73870ca45ea249d Mon Sep 17 00:00:00 2001
From: Jeffrin Jose T <ahiliation@gmail.com>
Date: Sat, 23 Jun 2018 03:10:32 +0530
Subject: [PATCH 053/382] selftests: bpf: notification about privilege required
 to run test_lwt_seg6local.sh testing script

This test needs root privilege for it's successful execution.

This patch is atleast used to notify the user about the privilege
the script demands for the  smooth execution of the test.

Signed-off-by: Jeffrin Jose T (Rajagiri SET) <ahiliation@gmail.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_lwt_seg6local.sh | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
index 1c77994b5e71..270fa8f49573 100755
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -21,6 +21,15 @@
 # An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
 # datagram can be read on NS6 when binding to fb00::6.
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+msg="skip all tests:"
+if [ $UID != 0 ]; then
+	echo $msg please run this as root >&2
+	exit $ksft_skip
+fi
+
 TMP_FILE="/tmp/selftest_lwt_seg6local.txt"
 
 cleanup()

From 2259b7a64d71f27311a19fd7a5bed47413d75985 Mon Sep 17 00:00:00 2001
From: Dave Stevenson <dave.stevenson@raspberrypi.org>
Date: Mon, 25 Jun 2018 15:07:12 +0100
Subject: [PATCH 054/382] net: lan78xx: Allow for VLAN headers in timeout calcs

The frame abort timeout being set by lan78xx_set_rx_max_frame_length
didn't account for any VLAN headers, resulting in very low
throughput if used with tagged VLANs.

Use VLAN_ETH_HLEN instead of ETH_HLEN to correct for this.

Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 8dff87ec6d99..1a1d3cc8e308 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2298,7 +2298,7 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu)
 	if ((ll_mtu % dev->maxpacket) == 0)
 		return -EDOM;
 
-	ret = lan78xx_set_rx_max_frame_length(dev, new_mtu + ETH_HLEN);
+	ret = lan78xx_set_rx_max_frame_length(dev, new_mtu + VLAN_ETH_HLEN);
 
 	netdev->mtu = new_mtu;
 
@@ -2587,7 +2587,8 @@ static int lan78xx_reset(struct lan78xx_net *dev)
 	buf |= FCT_TX_CTL_EN_;
 	ret = lan78xx_write_reg(dev, FCT_TX_CTL, buf);
 
-	ret = lan78xx_set_rx_max_frame_length(dev, dev->net->mtu + ETH_HLEN);
+	ret = lan78xx_set_rx_max_frame_length(dev,
+					      dev->net->mtu + VLAN_ETH_HLEN);
 
 	ret = lan78xx_read_reg(dev, MAC_RX, &buf);
 	buf |= MAC_RX_RXEN_;

From 4a27327b156e1e543c839a9358ba885564729ae7 Mon Sep 17 00:00:00 2001
From: Dave Stevenson <dave.stevenson@raspberrypi.org>
Date: Mon, 25 Jun 2018 15:07:13 +0100
Subject: [PATCH 055/382] net: lan78xx: Add support for VLAN filtering.

HW_VLAN_CTAG_FILTER was partially implemented, but not advertised
to Linux.

Complete the implementation of this.

Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 1a1d3cc8e308..bd41a36924db 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2363,7 +2363,7 @@ static int lan78xx_set_features(struct net_device *netdev,
 		pdata->rfe_ctl &= ~(RFE_CTL_ICMP_COE_ | RFE_CTL_IGMP_COE_);
 	}
 
-	if (features & NETIF_F_HW_VLAN_CTAG_RX)
+	if (features & NETIF_F_HW_VLAN_CTAG_FILTER)
 		pdata->rfe_ctl |= RFE_CTL_VLAN_FILTER_;
 	else
 		pdata->rfe_ctl &= ~RFE_CTL_VLAN_FILTER_;
@@ -2976,6 +2976,9 @@ static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf)
 	if (DEFAULT_TSO_CSUM_ENABLE)
 		dev->net->features |= NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_SG;
 
+	if (DEFAULT_VLAN_FILTER_ENABLE)
+		dev->net->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
 	dev->net->hw_features = dev->net->features;
 
 	ret = lan78xx_setup_irq_domain(dev);

From ec21ecf0aad27956dc64475e5acd78f3575df462 Mon Sep 17 00:00:00 2001
From: Dave Stevenson <dave.stevenson@raspberrypi.org>
Date: Mon, 25 Jun 2018 15:07:14 +0100
Subject: [PATCH 056/382] net: lan78xx: Add support for VLAN tag stripping.

The chip supports stripping the VLAN tag and reporting it
in metadata.
Complete the support for this.

Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index bd41a36924db..28da296f8cf7 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -64,6 +64,7 @@
 #define DEFAULT_RX_CSUM_ENABLE		(true)
 #define DEFAULT_TSO_CSUM_ENABLE		(true)
 #define DEFAULT_VLAN_FILTER_ENABLE	(true)
+#define DEFAULT_VLAN_RX_OFFLOAD		(true)
 #define TX_OVERHEAD			(8)
 #define RXW_PADDING			2
 
@@ -2363,6 +2364,11 @@ static int lan78xx_set_features(struct net_device *netdev,
 		pdata->rfe_ctl &= ~(RFE_CTL_ICMP_COE_ | RFE_CTL_IGMP_COE_);
 	}
 
+	if (features & NETIF_F_HW_VLAN_CTAG_RX)
+		pdata->rfe_ctl |= RFE_CTL_VLAN_STRIP_;
+	else
+		pdata->rfe_ctl &= ~RFE_CTL_VLAN_STRIP_;
+
 	if (features & NETIF_F_HW_VLAN_CTAG_FILTER)
 		pdata->rfe_ctl |= RFE_CTL_VLAN_FILTER_;
 	else
@@ -2976,6 +2982,9 @@ static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf)
 	if (DEFAULT_TSO_CSUM_ENABLE)
 		dev->net->features |= NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_SG;
 
+	if (DEFAULT_VLAN_RX_OFFLOAD)
+		dev->net->features |= NETIF_F_HW_VLAN_CTAG_RX;
+
 	if (DEFAULT_VLAN_FILTER_ENABLE)
 		dev->net->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
@@ -3052,6 +3061,16 @@ static void lan78xx_rx_csum_offload(struct lan78xx_net *dev,
 	}
 }
 
+static void lan78xx_rx_vlan_offload(struct lan78xx_net *dev,
+				    struct sk_buff *skb,
+				    u32 rx_cmd_a, u32 rx_cmd_b)
+{
+	if ((dev->net->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+	    (rx_cmd_a & RX_CMD_A_FVTG_))
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+				       (rx_cmd_b & 0xffff));
+}
+
 static void lan78xx_skb_return(struct lan78xx_net *dev, struct sk_buff *skb)
 {
 	int		status;
@@ -3116,6 +3135,8 @@ static int lan78xx_rx(struct lan78xx_net *dev, struct sk_buff *skb)
 			if (skb->len == size) {
 				lan78xx_rx_csum_offload(dev, skb,
 							rx_cmd_a, rx_cmd_b);
+				lan78xx_rx_vlan_offload(dev, skb,
+							rx_cmd_a, rx_cmd_b);
 
 				skb_trim(skb, skb->len - 4); /* remove fcs */
 				skb->truesize = size + sizeof(struct sk_buff);
@@ -3134,6 +3155,7 @@ static int lan78xx_rx(struct lan78xx_net *dev, struct sk_buff *skb)
 			skb_set_tail_pointer(skb2, size);
 
 			lan78xx_rx_csum_offload(dev, skb2, rx_cmd_a, rx_cmd_b);
+			lan78xx_rx_vlan_offload(dev, skb2, rx_cmd_a, rx_cmd_b);
 
 			skb_trim(skb2, skb2->len - 4); /* remove fcs */
 			skb2->truesize = size + sizeof(struct sk_buff);

From 9343ac87f2a4e09bf6e27b5f31e72e9e3a82abff Mon Sep 17 00:00:00 2001
From: Dave Stevenson <dave.stevenson@raspberrypi.org>
Date: Mon, 25 Jun 2018 15:07:15 +0100
Subject: [PATCH 057/382] net: lan78xx: Use s/w csum check on VLANs without tag
 stripping

Observations of VLANs dropping packets due to invalid
checksums when not offloading VLAN tag receive.
With VLAN tag stripping enabled no issue is observed.

Drop back to s/w checksums if VLAN offload is disabled.

Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 28da296f8cf7..2e4130746c40 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -3052,8 +3052,13 @@ static void lan78xx_rx_csum_offload(struct lan78xx_net *dev,
 				    struct sk_buff *skb,
 				    u32 rx_cmd_a, u32 rx_cmd_b)
 {
+	/* HW Checksum offload appears to be flawed if used when not stripping
+	 * VLAN headers. Drop back to S/W checksums under these conditions.
+	 */
 	if (!(dev->net->features & NETIF_F_RXCSUM) ||
-	    unlikely(rx_cmd_a & RX_CMD_A_ICSM_)) {
+	    unlikely(rx_cmd_a & RX_CMD_A_ICSM_) ||
+	    ((rx_cmd_a & RX_CMD_A_FVTG_) &&
+	     !(dev->net->features & NETIF_F_HW_VLAN_CTAG_RX))) {
 		skb->ip_summed = CHECKSUM_NONE;
 	} else {
 		skb->csum = ntohs((u16)(rx_cmd_b >> RX_CMD_B_CSUM_SHIFT_));

From dffd22aed2aa1e804bccf19b30a421e89ee2ae61 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Wed, 20 Jun 2018 18:33:45 +0200
Subject: [PATCH 058/382] netfilter: nf_log: fix uninit read in
 nf_log_proc_dostring

When proc_dostring() is called with a non-zero offset in strict mode, it
doesn't just write to the ->data buffer, it also reads. Make sure it
doesn't read uninitialized data.

Fixes: c6ac37d8d884 ("netfilter: nf_log: fix error on write NONE to [...]")
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_log.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 426457047578..2c47f9ec3511 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -424,6 +424,10 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
 	if (write) {
 		struct ctl_table tmp = *table;
 
+		/* proc_dostring() can append to existing strings, so we need to
+		 * initialize it as an empty string.
+		 */
+		buf[0] = '\0';
 		tmp.data = buf;
 		r = proc_dostring(&tmp, write, buffer, lenp, ppos);
 		if (r)

From ce00bf07cc95a57cd20b208e02b3c2604e532ae8 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Mon, 25 Jun 2018 17:22:00 +0200
Subject: [PATCH 059/382] netfilter: nf_log: don't hold nf_log_mutex during
 user access

The old code would indefinitely block other users of nf_log_mutex if
a userspace access in proc_dostring() blocked e.g. due to a userfaultfd
region. Fix it by moving proc_dostring() out of the locked region.

This is a followup to commit 266d07cb1c9a ("netfilter: nf_log: fix
sleeping function called from invalid context"), which changed this code
from using rcu_read_lock() to taking nf_log_mutex.

Fixes: 266d07cb1c9a ("netfilter: nf_log: fix sleeping function calle[...]")
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_log.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 2c47f9ec3511..a61d6df6e5f6 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -446,14 +446,17 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
 		rcu_assign_pointer(net->nf.nf_loggers[tindex], logger);
 		mutex_unlock(&nf_log_mutex);
 	} else {
+		struct ctl_table tmp = *table;
+
+		tmp.data = buf;
 		mutex_lock(&nf_log_mutex);
 		logger = nft_log_dereference(net->nf.nf_loggers[tindex]);
 		if (!logger)
-			table->data = "NONE";
+			strlcpy(buf, "NONE", sizeof(buf));
 		else
-			table->data = logger->name;
-		r = proc_dostring(table, write, buffer, lenp, ppos);
+			strlcpy(buf, logger->name, sizeof(buf));
 		mutex_unlock(&nf_log_mutex);
+		r = proc_dostring(&tmp, write, buffer, lenp, ppos);
 	}
 
 	return r;

From 59b433c825569ce251371485f0e29fca888b549d Mon Sep 17 00:00:00 2001
From: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Date: Fri, 22 Jun 2018 06:55:47 -0700
Subject: [PATCH 060/382] scsi: aacraid: Fix PD performance regression over
 incorrect qd being set

The driver fails to set the correct queue depth for native devices, due to
failing to set the device type prior to calling aac_set_safw_target_qd().
This results in slave configure setting the queue depth to 1.

This causes around 30% performance degradation. Fixed by setting the dev
type before trying to set queue depth.

Reported-by: Steve Best <sbest@redhat.com>
Fixes: 0bcb45fb20c21 ("scsi: aacraid: Add helper function to set queue depth")
cc: stable@vger.kernel.org
Signed-off-by: Raghava Aditya Renukunta <RaghavaAditya.Renukunta@microsemi.com>
Reviewed-by: David Carroll <David.Carroll@microsemi.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/aachba.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index a9831bd37a73..a57f3a7d4748 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -1974,7 +1974,6 @@ static void aac_set_safw_attr_all_targets(struct aac_dev *dev)
 	u32 lun_count, nexus;
 	u32 i, bus, target;
 	u8 expose_flag, attribs;
-	u8 devtype;
 
 	lun_count = aac_get_safw_phys_lun_count(dev);
 
@@ -1992,23 +1991,23 @@ static void aac_set_safw_attr_all_targets(struct aac_dev *dev)
 			continue;
 
 		if (expose_flag != 0) {
-			devtype = AAC_DEVTYPE_RAID_MEMBER;
-			goto update_devtype;
+			dev->hba_map[bus][target].devtype =
+				AAC_DEVTYPE_RAID_MEMBER;
+			continue;
 		}
 
 		if (nexus != 0 && (attribs & 8)) {
-			devtype = AAC_DEVTYPE_NATIVE_RAW;
+			dev->hba_map[bus][target].devtype =
+				AAC_DEVTYPE_NATIVE_RAW;
 			dev->hba_map[bus][target].rmw_nexus =
 					nexus;
 		} else
-			devtype = AAC_DEVTYPE_ARC_RAW;
+			dev->hba_map[bus][target].devtype =
+				AAC_DEVTYPE_ARC_RAW;
 
 		dev->hba_map[bus][target].scan_counter = dev->scan_counter;
 
 		aac_set_safw_target_qd(dev, bus, target);
-
-update_devtype:
-		dev->hba_map[bus][target].devtype = devtype;
 	}
 }
 

From b36e4523d4d56e2595e28f16f6ccf1cd6a9fc452 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 20 Jun 2018 23:32:26 +0200
Subject: [PATCH 061/382] netfilter: nf_conncount: fix garbage collection
 confirm race

Yi-Hung Wei and Justin Pettit found a race in the garbage collection scheme
used by nf_conncount.

When doing list walk, we lookup the tuple in the conntrack table.
If the lookup fails we remove this tuple from our list because
the conntrack entry is gone.

This is the common cause, but turns out its not the only one.
The list entry could have been created just before by another cpu, i.e. the
conntrack entry might not yet have been inserted into the global hash.

The avoid this, we introduce a timestamp and the owning cpu.
If the entry appears to be stale, evict only if:
 1. The current cpu is the one that added the entry, or,
 2. The timestamp is older than two jiffies

The second constraint allows GC to be taken over by other
cpu too (e.g. because a cpu was offlined or napi got moved to another
cpu).

We can't pretend the 'doubtful' entry wasn't in our list.
Instead, when we don't find an entry indicate via IS_ERR
that entry was removed ('did not exist' or withheld
('might-be-unconfirmed').

This most likely also fixes a xt_connlimit imbalance earlier reported by
Dmitry Andrianov.

Cc: Dmitry Andrianov <dmitry.andrianov@alertme.com>
Reported-by: Justin Pettit <jpettit@vmware.com>
Reported-by: Yi-Hung Wei <yihung.wei@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Yi-Hung Wei <yihung.wei@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conncount.c | 52 ++++++++++++++++++++++++++++++++----
 1 file changed, 47 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index d8383609fe28..510039862aa9 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -47,6 +47,8 @@ struct nf_conncount_tuple {
 	struct hlist_node		node;
 	struct nf_conntrack_tuple	tuple;
 	struct nf_conntrack_zone	zone;
+	int				cpu;
+	u32				jiffies32;
 };
 
 struct nf_conncount_rb {
@@ -91,11 +93,42 @@ bool nf_conncount_add(struct hlist_head *head,
 		return false;
 	conn->tuple = *tuple;
 	conn->zone = *zone;
+	conn->cpu = raw_smp_processor_id();
+	conn->jiffies32 = (u32)jiffies;
 	hlist_add_head(&conn->node, head);
 	return true;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_add);
 
+static const struct nf_conntrack_tuple_hash *
+find_or_evict(struct net *net, struct nf_conncount_tuple *conn)
+{
+	const struct nf_conntrack_tuple_hash *found;
+	unsigned long a, b;
+	int cpu = raw_smp_processor_id();
+	__s32 age;
+
+	found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
+	if (found)
+		return found;
+	b = conn->jiffies32;
+	a = (u32)jiffies;
+
+	/* conn might have been added just before by another cpu and
+	 * might still be unconfirmed.  In this case, nf_conntrack_find()
+	 * returns no result.  Thus only evict if this cpu added the
+	 * stale entry or if the entry is older than two jiffies.
+	 */
+	age = a - b;
+	if (conn->cpu == cpu || age >= 2) {
+		hlist_del(&conn->node);
+		kmem_cache_free(conncount_conn_cachep, conn);
+		return ERR_PTR(-ENOENT);
+	}
+
+	return ERR_PTR(-EAGAIN);
+}
+
 unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
 				 const struct nf_conntrack_tuple *tuple,
 				 const struct nf_conntrack_zone *zone,
@@ -103,18 +136,27 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
 {
 	const struct nf_conntrack_tuple_hash *found;
 	struct nf_conncount_tuple *conn;
-	struct hlist_node *n;
 	struct nf_conn *found_ct;
+	struct hlist_node *n;
 	unsigned int length = 0;
 
 	*addit = tuple ? true : false;
 
 	/* check the saved connections */
 	hlist_for_each_entry_safe(conn, n, head, node) {
-		found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
-		if (found == NULL) {
-			hlist_del(&conn->node);
-			kmem_cache_free(conncount_conn_cachep, conn);
+		found = find_or_evict(net, conn);
+		if (IS_ERR(found)) {
+			/* Not found, but might be about to be confirmed */
+			if (PTR_ERR(found) == -EAGAIN) {
+				length++;
+				if (!tuple)
+					continue;
+
+				if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
+				    nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
+				    nf_ct_zone_id(zone, zone->dir))
+					*addit = false;
+			}
 			continue;
 		}
 

From 26b5b874aff5659a7e26e5b1997e3df2c41fa7fd Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Mon, 25 Jun 2018 16:25:44 +0200
Subject: [PATCH 062/382] scsi: sg: mitigate read/write abuse

As Al Viro noted in commit 128394eff343 ("sg_write()/bsg_write() is not fit
to be called under KERNEL_DS"), sg improperly accesses userspace memory
outside the provided buffer, permitting kernel memory corruption via
splice().  But it doesn't just do it on ->write(), also on ->read().

As a band-aid, make sure that the ->read() and ->write() handlers can not
be called in weird contexts (kernel context or credentials different from
file opener), like for ib_safe_file_access().

If someone needs to use these interfaces from different security contexts,
a new interface should be written that goes through the ->ioctl() handler.

I've mostly copypasted ib_safe_file_access() over as sg_safe_file_access()
because I couldn't find a good common header - please tell me if you know a
better way.

[mkp: s/_safe_/_check_/]

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: <stable@vger.kernel.org>
Signed-off-by: Jann Horn <jannh@google.com>
Acked-by: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sg.c | 42 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 53ae52dbff84..cd2fdac000c9 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -51,6 +51,7 @@ static int sg_version_num = 30536;	/* 2 digits for each component */
 #include <linux/atomic.h>
 #include <linux/ratelimit.h>
 #include <linux/uio.h>
+#include <linux/cred.h> /* for sg_check_file_access() */
 
 #include "scsi.h"
 #include <scsi/scsi_dbg.h>
@@ -209,6 +210,33 @@ static void sg_device_destroy(struct kref *kref);
 	sdev_prefix_printk(prefix, (sdp)->device,		\
 			   (sdp)->disk->disk_name, fmt, ##a)
 
+/*
+ * The SCSI interfaces that use read() and write() as an asynchronous variant of
+ * ioctl(..., SG_IO, ...) are fundamentally unsafe, since there are lots of ways
+ * to trigger read() and write() calls from various contexts with elevated
+ * privileges. This can lead to kernel memory corruption (e.g. if these
+ * interfaces are called through splice()) and privilege escalation inside
+ * userspace (e.g. if a process with access to such a device passes a file
+ * descriptor to a SUID binary as stdin/stdout/stderr).
+ *
+ * This function provides protection for the legacy API by restricting the
+ * calling context.
+ */
+static int sg_check_file_access(struct file *filp, const char *caller)
+{
+	if (filp->f_cred != current_real_cred()) {
+		pr_err_once("%s: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+			caller, task_tgid_vnr(current), current->comm);
+		return -EPERM;
+	}
+	if (uaccess_kernel()) {
+		pr_err_once("%s: process %d (%s) called from kernel context, this is not allowed.\n",
+			caller, task_tgid_vnr(current), current->comm);
+		return -EACCES;
+	}
+	return 0;
+}
+
 static int sg_allow_access(struct file *filp, unsigned char *cmd)
 {
 	struct sg_fd *sfp = filp->private_data;
@@ -393,6 +421,14 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos)
 	struct sg_header *old_hdr = NULL;
 	int retval = 0;
 
+	/*
+	 * This could cause a response to be stranded. Close the associated
+	 * file descriptor to free up any resources being held.
+	 */
+	retval = sg_check_file_access(filp, __func__);
+	if (retval)
+		return retval;
+
 	if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp)))
 		return -ENXIO;
 	SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp,
@@ -580,9 +616,11 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos)
 	struct sg_header old_hdr;
 	sg_io_hdr_t *hp;
 	unsigned char cmnd[SG_MAX_CDB_SIZE];
+	int retval;
 
-	if (unlikely(uaccess_kernel()))
-		return -EINVAL;
+	retval = sg_check_file_access(filp, __func__);
+	if (retval)
+		return retval;
 
 	if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp)))
 		return -ENXIO;

From b697d7d8c741f27b728a878fc55852b06d0f6f5e Mon Sep 17 00:00:00 2001
From: "Michael J. Ruhl" <michael.j.ruhl@intel.com>
Date: Wed, 20 Jun 2018 09:29:08 -0700
Subject: [PATCH 063/382] IB/hfi1: Fix incorrect mixing of ERR_PTR and NULL
 return values

The __get_txreq() function can return a pointer, ERR_PTR(-EBUSY), or NULL.
All of the relevant call sites look for IS_ERR, so the NULL return would
lead to a NULL pointer exception.

Do not use the ERR_PTR mechanism for this function.

Update all call sites to handle the return value correctly.

Clean up error paths to reflect return value.

Fixes: 45842abbb292 ("staging/rdma/hfi1: move txreq header code")
Cc: <stable@vger.kernel.org> # 4.9.x+
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Kamenee Arumugam <kamenee.arumugam@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hfi1/rc.c          | 2 +-
 drivers/infiniband/hw/hfi1/uc.c          | 4 ++--
 drivers/infiniband/hw/hfi1/ud.c          | 4 ++--
 drivers/infiniband/hw/hfi1/verbs_txreq.c | 4 ++--
 drivers/infiniband/hw/hfi1/verbs_txreq.h | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 1a1a47ac53c6..f15c93102081 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -271,7 +271,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 
 	lockdep_assert_held(&qp->s_lock);
 	ps->s_txreq = get_txreq(ps->dev, qp);
-	if (IS_ERR(ps->s_txreq))
+	if (!ps->s_txreq)
 		goto bail_no_tx;
 
 	if (priv->hdr_type == HFI1_PKT_TYPE_9B) {
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index b7b671017e59..e254dcec6f64 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -72,7 +72,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 	int middle = 0;
 
 	ps->s_txreq = get_txreq(ps->dev, qp);
-	if (IS_ERR(ps->s_txreq))
+	if (!ps->s_txreq)
 		goto bail_no_tx;
 
 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 1ab332f1866e..70d39fc450a1 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -503,7 +503,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 	u32 lid;
 
 	ps->s_txreq = get_txreq(ps->dev, qp);
-	if (IS_ERR(ps->s_txreq))
+	if (!ps->s_txreq)
 		goto bail_no_tx;
 
 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index 873e48ea923f..c4ab2d5b4502 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2016 - 2017 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -94,7 +94,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 				struct rvt_qp *qp)
 	__must_hold(&qp->s_lock)
 {
-	struct verbs_txreq *tx = ERR_PTR(-EBUSY);
+	struct verbs_txreq *tx = NULL;
 
 	write_seqlock(&dev->txwait_lock);
 	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 729244c3086c..1c19bbc764b2 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -83,7 +83,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
 	if (unlikely(!tx)) {
 		/* call slow path to get the lock */
 		tx = __get_txreq(dev, qp);
-		if (IS_ERR(tx))
+		if (!tx)
 			return tx;
 	}
 	tx->qp = qp;

From 8ffd569aaa818f2624ca821d9a246342fa8b8c50 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Mon, 4 Jun 2018 19:46:53 +0300
Subject: [PATCH 064/382] net/mlx5e: Don't attempt to dereference the ppriv
 struct if not being eswitch manager

The check for cpu hit statistics was not returning immediate false for
any non vport rep netdev and hence we crashed (say on mlx5 probed VFs) if
user-space tool was calling into any possible netdev in the system.

Fix that by doing a proper check before dereferencing.

Fixes: 1d447a39142e ('net/mlx5e: Extendable vport representor netdev private data')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reported-by: Eli Cohen <eli@melloanox.com>
Reviewed-by: Eli Cohen <eli@melloanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 57987f6546e8..378ad74518ec 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -837,8 +837,12 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
 static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
 {
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
-	struct mlx5_eswitch_rep *rep = rpriv->rep;
+	struct mlx5_eswitch_rep *rep;
 
+	if (!MLX5_CAP_GEN(priv->mdev, eswitch_flow_table))
+		return false;
+
+	rep = rpriv->rep;
 	if (rep && rep->vport != FDB_UPLINK_VPORT)
 		return true;
 

From 0efc8562491b7d36f6bbc4fbc8f3348cb6641e9c Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 31 May 2018 11:16:18 +0300
Subject: [PATCH 065/382] net/mlx5: E-Switch, Avoid setup attempt if not being
 e-switch manager

In smartnic env, the host (PF) driver might not be an e-switch
manager, hence the FW will err on driver attempts to deal with
setting/unsetting the eswitch and as a result the overall setup
of sriov will fail.

Fix that by avoiding the operation if e-switch management is not
allowed for this driver instance. While here, move to use the
correct name for the esw manager capability name.

Fixes: 81848731ff40 ('net/mlx5: E-Switch, Add SR-IOV (FDB) support')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reported-by: Guy Kushnir <guyk@mellanox.com>
Reviewed-by: Eli Cohen <eli@melloanox.com>
Tested-by: Eli Cohen <eli@melloanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c  | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/fw.c      | 5 +++--
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c   | 7 ++++++-
 include/linux/mlx5/eswitch.h                      | 2 ++
 include/linux/mlx5/mlx5_ifc.h                     | 2 +-
 7 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 378ad74518ec..60236f73373c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -839,7 +839,7 @@ static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	struct mlx5_eswitch_rep *rep;
 
-	if (!MLX5_CAP_GEN(priv->mdev, eswitch_flow_table))
+	if (!MLX5_ESWITCH_MANAGER(priv->mdev))
 		return false;
 
 	rep = rpriv->rep;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index f63dfbcd29fe..103fd6a0cc65 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1604,7 +1604,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 	if (!ESW_ALLOWED(esw))
 		return 0;
 
-	if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) ||
+	if (!MLX5_ESWITCH_MANAGER(esw->dev) ||
 	    !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
 		esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n");
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 49a75d31185e..f1a86cea86a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -32,6 +32,7 @@
 
 #include <linux/mutex.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/eswitch.h>
 
 #include "mlx5_core.h"
 #include "fs_core.h"
@@ -2652,7 +2653,7 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 			goto err;
 	}
 
-	if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
+	if (MLX5_ESWITCH_MANAGER(dev)) {
 		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) {
 			err = init_fdb_root_ns(steering);
 			if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index afd9f4fa22f4..41ad24f0de2c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -32,6 +32,7 @@
 
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/cmd.h>
+#include <linux/mlx5/eswitch.h>
 #include <linux/module.h>
 #include "mlx5_core.h"
 #include "../../mlxfw/mlxfw.h"
@@ -159,13 +160,13 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 	}
 
 	if (MLX5_CAP_GEN(dev, vport_group_manager) &&
-	    MLX5_CAP_GEN(dev, eswitch_flow_table)) {
+	    MLX5_ESWITCH_MANAGER(dev)) {
 		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE);
 		if (err)
 			return err;
 	}
 
-	if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
+	if (MLX5_ESWITCH_MANAGER(dev)) {
 		err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH);
 		if (err)
 			return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index 2a8b529ce6dd..a0674962f02c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -88,6 +88,9 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
 		return -EBUSY;
 	}
 
+	if (!MLX5_ESWITCH_MANAGER(dev))
+		goto enable_vfs_hca;
+
 	err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
 	if (err) {
 		mlx5_core_warn(dev,
@@ -95,6 +98,7 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
 		return err;
 	}
 
+enable_vfs_hca:
 	for (vf = 0; vf < num_vfs; vf++) {
 		err = mlx5_core_enable_hca(dev, vf + 1);
 		if (err) {
@@ -140,7 +144,8 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
 	}
 
 out:
-	mlx5_eswitch_disable_sriov(dev->priv.eswitch);
+	if (MLX5_ESWITCH_MANAGER(dev))
+		mlx5_eswitch_disable_sriov(dev->priv.eswitch);
 
 	if (mlx5_wait_for_vf_pages(dev))
 		mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index d3c9db492b30..fab5121ffb8f 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -8,6 +8,8 @@
 
 #include <linux/mlx5/driver.h>
 
+#define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager)
+
 enum {
 	SRIOV_NONE,
 	SRIOV_LEGACY,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 27134c4fcb76..ac281f5ec9b8 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -922,7 +922,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         vnic_env_queue_counters[0x1];
 	u8         ets[0x1];
 	u8         nic_flow_table[0x1];
-	u8         eswitch_flow_table[0x1];
+	u8         eswitch_manager[0x1];
 	u8         device_memory[0x1];
 	u8         mcam_reg[0x1];
 	u8         pcam_reg[0x1];

From 733d3e5497070d05971352ca5087bac83c197c3d Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 31 May 2018 11:32:56 +0300
Subject: [PATCH 066/382] net/mlx5e: Avoid dealing with vport representors if
 not being e-switch manager

In smartnic env, the host (PF) driver might not be an e-switch
manager, hence the switchdev mode representors are running on
the embedded cpu (EC) and not at the host.

As such, we should avoid dealing with vport representors if
not being esw manager.

While here, make sure to disallow eswitch switchdev related
setups through devlink if we are not esw managers.

Fixes: cb67b832921c ('net/mlx5e: Introduce SRIOV VF representors')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c    | 12 ++++++------
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c     |  2 +-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c   |  4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 56c1b6f5593e..dae4156a710d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2846,7 +2846,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
 	mlx5e_activate_channels(&priv->channels);
 	netif_tx_start_all_queues(priv->netdev);
 
-	if (MLX5_VPORT_MANAGER(priv->mdev))
+	if (MLX5_ESWITCH_MANAGER(priv->mdev))
 		mlx5e_add_sqs_fwd_rules(priv);
 
 	mlx5e_wait_channels_min_rx_wqes(&priv->channels);
@@ -2857,7 +2857,7 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
 {
 	mlx5e_redirect_rqts_to_drop(priv);
 
-	if (MLX5_VPORT_MANAGER(priv->mdev))
+	if (MLX5_ESWITCH_MANAGER(priv->mdev))
 		mlx5e_remove_sqs_fwd_rules(priv);
 
 	/* FIXME: This is a W/A only for tx timeout watch dog false alarm when
@@ -4597,7 +4597,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	mlx5e_set_netdev_dev_addr(netdev);
 
 #if IS_ENABLED(CONFIG_MLX5_ESWITCH)
-	if (MLX5_VPORT_MANAGER(mdev))
+	if (MLX5_ESWITCH_MANAGER(mdev))
 		netdev->switchdev_ops = &mlx5e_switchdev_ops;
 #endif
 
@@ -4753,7 +4753,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 
 	mlx5e_enable_async_events(priv);
 
-	if (MLX5_VPORT_MANAGER(priv->mdev))
+	if (MLX5_ESWITCH_MANAGER(priv->mdev))
 		mlx5e_register_vport_reps(priv);
 
 	if (netdev->reg_state != NETREG_REGISTERED)
@@ -4788,7 +4788,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
 
 	queue_work(priv->wq, &priv->set_rx_mode_work);
 
-	if (MLX5_VPORT_MANAGER(priv->mdev))
+	if (MLX5_ESWITCH_MANAGER(priv->mdev))
 		mlx5e_unregister_vport_reps(priv);
 
 	mlx5e_disable_async_events(priv);
@@ -4972,7 +4972,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
 		return NULL;
 
 #ifdef CONFIG_MLX5_ESWITCH
-	if (MLX5_VPORT_MANAGER(mdev)) {
+	if (MLX5_ESWITCH_MANAGER(mdev)) {
 		rpriv = mlx5e_alloc_nic_rep_priv(mdev);
 		if (!rpriv) {
 			mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 60236f73373c..2b8040a3cdbd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -823,7 +823,7 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	struct mlx5_eswitch_rep *rep;
 
-	if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager))
+	if (!MLX5_ESWITCH_MANAGER(priv->mdev))
 		return false;
 
 	rep = rpriv->rep;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index cecd201f0b73..91f1209886ff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1079,8 +1079,8 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink)
 	if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
 		return -EOPNOTSUPP;
 
-	if (!MLX5_CAP_GEN(dev, vport_group_manager))
-		return -EOPNOTSUPP;
+	if(!MLX5_ESWITCH_MANAGER(dev))
+		return -EPERM;
 
 	if (dev->priv.eswitch->mode == SRIOV_NONE)
 		return -EOPNOTSUPP;

From aff2252a2ad3844ca47bf2f18af071101baace40 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 31 May 2018 11:40:17 +0300
Subject: [PATCH 067/382] IB/mlx5: Avoid dealing with vport representors if not
 being e-switch manager

In smartnic env, the host (PF) driver might not be an e-switch
manager, hence the switchdev mode representors are running on
the embedded cpu (EC) and not at the host.

As such, we should avoid dealing with vport representors if
not being esw manager.

Fixes: b5ca15ad7e61 ('IB/mlx5: Add proper representors support')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index e52dd21519b4..634be96dcb86 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -6107,7 +6107,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
 			     MLX5_CAP_GEN(mdev, num_vhca_ports));
 
-	if (MLX5_VPORT_MANAGER(mdev) &&
+	if (MLX5_ESWITCH_MANAGER(mdev) &&
 	    mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
 		dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);
 

From a8d70a054a718b63058b3d3ac58b6181815e4289 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Thu, 31 May 2018 12:24:48 +0300
Subject: [PATCH 068/382] net/mlx5: E-Switch, Disallow vlan/spoofcheck setup if
 not being esw manager

In smartnic env, if the host (PF) driver is not an e-switch manager, we
are not allowed to apply eswitch ports setups such as vlan (VST),
spoof-checks, min/max rate or state.

Make sure we are eswitch manager when coming to issue these callbacks
and err otherwise.

Also fix the definition of ESW_ALLOWED to rely on eswitch_manager
capability and on the vport_group_manger.

Operations on the VF nic vport context, such as setting a mac or reading
the vport counters are allowed to the PF in this scheme.

The modify nic vport guid code was modified to omit checking the
nic_vport_node_guid_modify eswitch capability.
The reason for doing so is that modifying node guid requires vport group
manager capability, and there's no need to check further capabilities.

1. set_vf_vlan     - disallowed
2. set_vf_spoofchk - disallowed
3. set_vf_mac      - allowed
4. get_vf_config   - allowed
5. set_vf_trust    - disallowed
6. set_vf_rate     - disallowed
7. get_vf_stat     - allowed
8. set_vf_link_state - disallowed

Fixes: f942380c1239 ('net/mlx5: E-Switch, Vport ingress/egress ACLs rules for spoofchk')
Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Tested-by: Or Gerlitz <ogerlitz@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 12 +++++-------
 drivers/net/ethernet/mellanox/mlx5/core/vport.c   |  2 --
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 103fd6a0cc65..b79d74860a30 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1594,17 +1594,15 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
 }
 
 /* Public E-Switch API */
-#define ESW_ALLOWED(esw) ((esw) && MLX5_VPORT_MANAGER((esw)->dev))
+#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
+
 
 int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 {
 	int err;
 	int i, enabled_events;
 
-	if (!ESW_ALLOWED(esw))
-		return 0;
-
-	if (!MLX5_ESWITCH_MANAGER(esw->dev) ||
+	if (!ESW_ALLOWED(esw) ||
 	    !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
 		esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n");
 		return -EOPNOTSUPP;
@@ -1806,7 +1804,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
 	u64 node_guid;
 	int err = 0;
 
-	if (!ESW_ALLOWED(esw))
+	if (!MLX5_CAP_GEN(esw->dev, vport_group_manager))
 		return -EPERM;
 	if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac))
 		return -EINVAL;
@@ -1883,7 +1881,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
 {
 	struct mlx5_vport *evport;
 
-	if (!ESW_ALLOWED(esw))
+	if (!MLX5_CAP_GEN(esw->dev, vport_group_manager))
 		return -EPERM;
 	if (!LEGAL_VPORT(esw, vport))
 		return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 719cecb182c6..7eecd5b07bb1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -549,8 +549,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
 		return -EINVAL;
 	if (!MLX5_CAP_GEN(mdev, vport_group_manager))
 		return -EACCES;
-	if (!MLX5_CAP_ESW(mdev, nic_vport_node_guid_modify))
-		return -EOPNOTSUPP;
 
 	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)

From f811980444ec59ad62f9e041adbb576a821132c7 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Wed, 13 Jun 2018 10:27:34 +0300
Subject: [PATCH 069/382] net/mlx5: Fix required capability for manipulating
 MPFS

Manipulating of the MPFS requires eswitch manager capabilities.

Fixes: eeb66cdb6826 ('net/mlx5: Separate between E-Switch and MPFS')
Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
index 7cb67122e8b5..98359559c77e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
@@ -33,6 +33,7 @@
 #include <linux/etherdevice.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/eswitch.h>
 #include "mlx5_core.h"
 #include "lib/mpfs.h"
 
@@ -98,7 +99,7 @@ int mlx5_mpfs_init(struct mlx5_core_dev *dev)
 	int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
 	struct mlx5_mpfs *mpfs;
 
-	if (!MLX5_VPORT_MANAGER(dev))
+	if (!MLX5_ESWITCH_MANAGER(dev))
 		return 0;
 
 	mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL);
@@ -122,7 +123,7 @@ void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev)
 {
 	struct mlx5_mpfs *mpfs = dev->priv.mpfs;
 
-	if (!MLX5_VPORT_MANAGER(dev))
+	if (!MLX5_ESWITCH_MANAGER(dev))
 		return;
 
 	WARN_ON(!hlist_empty(mpfs->hash));
@@ -137,7 +138,7 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac)
 	u32 index;
 	int err;
 
-	if (!MLX5_VPORT_MANAGER(dev))
+	if (!MLX5_ESWITCH_MANAGER(dev))
 		return 0;
 
 	mutex_lock(&mpfs->lock);
@@ -179,7 +180,7 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac)
 	int err = 0;
 	u32 index;
 
-	if (!MLX5_VPORT_MANAGER(dev))
+	if (!MLX5_ESWITCH_MANAGER(dev))
 		return 0;
 
 	mutex_lock(&mpfs->lock);

From d14fcb8d877caf1b8d6bd65d444bf62b21f2070c Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayag@mellanox.com>
Date: Tue, 22 May 2018 14:14:02 +0300
Subject: [PATCH 070/382] net/mlx5: Fix wrong size allocation for QoS ETC TC
 regitster

The driver allocates wrong size (due to wrong struct name) when issuing
a query/set request to NIC's register.

Fixes: d8880795dabf ("net/mlx5e: Implement DCBNL IEEE max rate")
Signed-off-by: Shay Agroskin <shayag@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/port.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index fa9d0760dd36..31a9cbd85689 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -701,7 +701,7 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_prio_tc);
 static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in,
 				   int inlen)
 {
-	u32 out[MLX5_ST_SZ_DW(qtct_reg)];
+	u32 out[MLX5_ST_SZ_DW(qetc_reg)];
 
 	if (!MLX5_CAP_GEN(mdev, ets))
 		return -EOPNOTSUPP;
@@ -713,7 +713,7 @@ static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in,
 static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out,
 				     int outlen)
 {
-	u32 in[MLX5_ST_SZ_DW(qtct_reg)];
+	u32 in[MLX5_ST_SZ_DW(qetc_reg)];
 
 	if (!MLX5_CAP_GEN(mdev, ets))
 		return -EOPNOTSUPP;

From 603b7bcff824740500ddfa001d7a7168b0b38542 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Fri, 25 May 2018 20:25:59 +0300
Subject: [PATCH 071/382] net/mlx5: Fix incorrect raw command length parsing

The NULL character was not set correctly for the string containing
the command length, this caused failures reading the output of the
command due to a random length. The fix is to initialize the output
length string.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 487388aed98f..2db5fe571594 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1276,7 +1276,7 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf,
 {
 	struct mlx5_core_dev *dev = filp->private_data;
 	struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
-	char outlen_str[8];
+	char outlen_str[8] = {0};
 	int outlen;
 	void *ptr;
 	int err;
@@ -1291,8 +1291,6 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf,
 	if (copy_from_user(outlen_str, buf, count))
 		return -EFAULT;
 
-	outlen_str[7] = 0;
-
 	err = sscanf(outlen_str, "%d", &outlen);
 	if (err < 0)
 		return err;

From d412c31dae053bf30a1bc15582a9990df297a660 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Tue, 12 Jun 2018 16:14:31 +0300
Subject: [PATCH 072/382] net/mlx5: Fix command interface race in polling mode

The command interface can work in two modes: Events and Polling.
In the general case, each time we invoke a command, a work is
queued to handle it.

When working in events, the interrupt handler completes the
command execution. On the other hand, when working in polling
mode, the work itself completes it.

Due to a bug in the work handler, a command could have been
completed by the interrupt handler, while the work handler
hasn't finished yet, causing the it to complete once again
if the command interface mode was changed from Events to
polling after the interrupt handler was called.

mlx5_unload_one()
        mlx5_stop_eqs()
                // Destroy the EQ before cmd EQ
                ...cmd_work_handler()
                        write_doorbell()
                        --> EVENT_TYPE_CMD
                                mlx5_cmd_comp_handler() // First free
                                        free_ent(cmd, ent->idx)
                                        complete(&ent->done)

        <-- mlx5_stop_eqs //cmd was complete
                // move to polling before destroying the last cmd EQ
                mlx5_cmd_use_polling()
                        cmd->mode = POLL;

                --> cmd_work_handler (continues)
                        if (cmd->mode == POLL)
                                mlx5_cmd_comp_handler() // Double free

The solution is to store the cmd->mode before writing the doorbell.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 2db5fe571594..384c1fa49081 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -807,6 +807,7 @@ static void cmd_work_handler(struct work_struct *work)
 	unsigned long flags;
 	bool poll_cmd = ent->polling;
 	int alloc_ret;
+	int cmd_mode;
 
 	sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
 	down(sem);
@@ -853,6 +854,7 @@ static void cmd_work_handler(struct work_struct *work)
 	set_signature(ent, !cmd->checksum_disabled);
 	dump_command(dev, ent, 1);
 	ent->ts1 = ktime_get_ns();
+	cmd_mode = cmd->mode;
 
 	if (ent->callback)
 		schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
@@ -877,7 +879,7 @@ static void cmd_work_handler(struct work_struct *work)
 	iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell);
 	mmiowb();
 	/* if not in polling don't use ent after this point */
-	if (cmd->mode == CMD_MODE_POLLING || poll_cmd) {
+	if (cmd_mode == CMD_MODE_POLLING || poll_cmd) {
 		poll_timeout(ent);
 		/* make sure we read the descriptor after ownership is SW */
 		rmb();

From c809195f5523dd4d09403bbb1c9732d548aa0d1e Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Mon, 25 Jun 2018 06:41:25 -0700
Subject: [PATCH 073/382] rds: clean up loopback rds_connections on netns
 deletion

The RDS core module creates rds_connections based on callbacks
from rds_loop_transport when sending/receiving packets to local
addresses.

These connections will need to be cleaned up when they are
created from a netns that is not init_net, and that netns is deleted.

Add the changes aligned with the changes from
commit ebeeb1ad9b8a ("rds: tcp: use rds_destroy_pending() to synchronize
netns/module teardown and rds connection/workq management") for
rds_loop_transport

Reported-and-tested-by: syzbot+4c20b3866171ce8441d2@syzkaller.appspotmail.com
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/connection.c | 11 ++++++++-
 net/rds/loop.c       | 56 ++++++++++++++++++++++++++++++++++++++++++++
 net/rds/loop.h       |  2 ++
 3 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/net/rds/connection.c b/net/rds/connection.c
index abef75da89a7..cfb05953b0e5 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -659,11 +659,19 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
 
 int rds_conn_init(void)
 {
+	int ret;
+
+	ret = rds_loop_net_init(); /* register pernet callback */
+	if (ret)
+		return ret;
+
 	rds_conn_slab = kmem_cache_create("rds_connection",
 					  sizeof(struct rds_connection),
 					  0, 0, NULL);
-	if (!rds_conn_slab)
+	if (!rds_conn_slab) {
+		rds_loop_net_exit();
 		return -ENOMEM;
+	}
 
 	rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info);
 	rds_info_register_func(RDS_INFO_SEND_MESSAGES,
@@ -676,6 +684,7 @@ int rds_conn_init(void)
 
 void rds_conn_exit(void)
 {
+	rds_loop_net_exit(); /* unregister pernet callback */
 	rds_loop_exit();
 
 	WARN_ON(!hlist_empty(rds_conn_hash));
diff --git a/net/rds/loop.c b/net/rds/loop.c
index dac6218a460e..feea1f96ee2a 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -33,6 +33,8 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/in.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
 
 #include "rds_single_path.h"
 #include "rds.h"
@@ -40,6 +42,17 @@
 
 static DEFINE_SPINLOCK(loop_conns_lock);
 static LIST_HEAD(loop_conns);
+static atomic_t rds_loop_unloading = ATOMIC_INIT(0);
+
+static void rds_loop_set_unloading(void)
+{
+	atomic_set(&rds_loop_unloading, 1);
+}
+
+static bool rds_loop_is_unloading(struct rds_connection *conn)
+{
+	return atomic_read(&rds_loop_unloading) != 0;
+}
 
 /*
  * This 'loopback' transport is a special case for flows that originate
@@ -165,6 +178,8 @@ void rds_loop_exit(void)
 	struct rds_loop_connection *lc, *_lc;
 	LIST_HEAD(tmp_list);
 
+	rds_loop_set_unloading();
+	synchronize_rcu();
 	/* avoid calling conn_destroy with irqs off */
 	spin_lock_irq(&loop_conns_lock);
 	list_splice(&loop_conns, &tmp_list);
@@ -177,6 +192,46 @@ void rds_loop_exit(void)
 	}
 }
 
+static void rds_loop_kill_conns(struct net *net)
+{
+	struct rds_loop_connection *lc, *_lc;
+	LIST_HEAD(tmp_list);
+
+	spin_lock_irq(&loop_conns_lock);
+	list_for_each_entry_safe(lc, _lc, &loop_conns, loop_node)  {
+		struct net *c_net = read_pnet(&lc->conn->c_net);
+
+		if (net != c_net)
+			continue;
+		list_move_tail(&lc->loop_node, &tmp_list);
+	}
+	spin_unlock_irq(&loop_conns_lock);
+
+	list_for_each_entry_safe(lc, _lc, &tmp_list, loop_node) {
+		WARN_ON(lc->conn->c_passive);
+		rds_conn_destroy(lc->conn);
+	}
+}
+
+static void __net_exit rds_loop_exit_net(struct net *net)
+{
+	rds_loop_kill_conns(net);
+}
+
+static struct pernet_operations rds_loop_net_ops = {
+	.exit = rds_loop_exit_net,
+};
+
+int rds_loop_net_init(void)
+{
+	return register_pernet_device(&rds_loop_net_ops);
+}
+
+void rds_loop_net_exit(void)
+{
+	unregister_pernet_device(&rds_loop_net_ops);
+}
+
 /*
  * This is missing .xmit_* because loop doesn't go through generic
  * rds_send_xmit() and doesn't call rds_recv_incoming().  .listen_stop and
@@ -194,4 +249,5 @@ struct rds_transport rds_loop_transport = {
 	.inc_free		= rds_loop_inc_free,
 	.t_name			= "loopback",
 	.t_type			= RDS_TRANS_LOOP,
+	.t_unloading		= rds_loop_is_unloading,
 };
diff --git a/net/rds/loop.h b/net/rds/loop.h
index 469fa4b2da4f..bbc8cdd030df 100644
--- a/net/rds/loop.h
+++ b/net/rds/loop.h
@@ -5,6 +5,8 @@
 /* loop.c */
 extern struct rds_transport rds_loop_transport;
 
+int rds_loop_net_init(void);
+void rds_loop_net_exit(void);
 void rds_loop_exit(void);
 
 #endif

From 7c8f4e6dc30996bff806285730a0bb4e714d3d52 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 26 Jun 2018 01:39:32 +0200
Subject: [PATCH 074/382] fib_rules: match rules based on suppress_* properties
 too

Two rules with different values of suppress_prefix or suppress_ifgroup
are not the same. This fixes an -EEXIST when running:

   $ ip -4 rule add table main suppress_prefixlength 0

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Fixes: f9d4b0c1e969 ("fib_rules: move common handling of newrule delrule msgs into fib_nl2rule")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 126ffc5bc630..bc8425d81022 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -416,6 +416,14 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops,
 		if (rule->mark && r->mark != rule->mark)
 			continue;
 
+		if (rule->suppress_ifgroup != -1 &&
+		    r->suppress_ifgroup != rule->suppress_ifgroup)
+			continue;
+
+		if (rule->suppress_prefixlen != -1 &&
+		    r->suppress_prefixlen != rule->suppress_prefixlen)
+			continue;
+
 		if (rule->mark_mask && r->mark_mask != rule->mark_mask)
 			continue;
 

From a64119415ff248efa61301783bc26551df5dabf6 Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Date: Mon, 25 Jun 2018 20:36:27 -0700
Subject: [PATCH 075/382] nfp: flower: fix mpls ether type detection

Previously it was not possible to distinguish between mpls ether types and
other ether types. This leads to incorrect classification of offloaded
filters that match on mpls ether type. For example the following two
filters overlap:

 # tc filter add dev eth0 parent ffff: \
    protocol 0x8847 flower \
    action mirred egress redirect dev eth1

 # tc filter add dev eth0 parent ffff: \
    protocol 0x0800 flower \
    action mirred egress redirect dev eth2

The driver now correctly includes the mac_mpls layer where HW stores mpls
fields, when it detects an mpls ether type. It also sets the MPLS_Q bit to
indicate that the filter should match mpls packets.

Fixes: bb055c198d9b ("nfp: add mpls match offloading support")
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/flower/match.c  | 14 ++++++++++++++
 .../net/ethernet/netronome/nfp/flower/offload.c    |  8 ++++++++
 2 files changed, 22 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index 91935405f586..84f7a5dbea9d 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -123,6 +123,20 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *frame,
 			 NFP_FLOWER_MASK_MPLS_Q;
 
 		frame->mpls_lse = cpu_to_be32(t_mpls);
+	} else if (dissector_uses_key(flow->dissector,
+				      FLOW_DISSECTOR_KEY_BASIC)) {
+		/* Check for mpls ether type and set NFP_FLOWER_MASK_MPLS_Q
+		 * bit, which indicates an mpls ether type but without any
+		 * mpls fields.
+		 */
+		struct flow_dissector_key_basic *key_basic;
+
+		key_basic = skb_flow_dissector_target(flow->dissector,
+						      FLOW_DISSECTOR_KEY_BASIC,
+						      flow->key);
+		if (key_basic->n_proto == cpu_to_be16(ETH_P_MPLS_UC) ||
+		    key_basic->n_proto == cpu_to_be16(ETH_P_MPLS_MC))
+			frame->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q);
 	}
 }
 
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index c42e64f32333..477f584f6d28 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -264,6 +264,14 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 		case cpu_to_be16(ETH_P_ARP):
 			return -EOPNOTSUPP;
 
+		case cpu_to_be16(ETH_P_MPLS_UC):
+		case cpu_to_be16(ETH_P_MPLS_MC):
+			if (!(key_layer & NFP_FLOWER_LAYER_MAC)) {
+				key_layer |= NFP_FLOWER_LAYER_MAC;
+				key_size += sizeof(struct nfp_flower_mac_mpls);
+			}
+			break;
+
 		/* Will be included in layer 2. */
 		case cpu_to_be16(ETH_P_8021Q):
 			break;

From 951a8ee6def39e25d0e60b9394e5a249ba8b2390 Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Mon, 25 Jun 2018 20:36:28 -0700
Subject: [PATCH 076/382] nfp: reject binding to shared blocks

TC shared blocks allow multiple qdiscs to be grouped together and filters
shared between them. Currently the chains of filters attached to a block
are only flushed when the block is removed. If a qdisc is removed from a
block but the block still exists, flow del messages are not passed to the
callback registered for that qdisc. For the NFP, this presents the
possibility of rules still existing in hw when they should be removed.

Prevent binding to shared blocks until the kernel can send per qdisc del
messages when block unbinds occur.

tcf_block_shared() was not used outside of the core until now, so also
add an empty implementation for builds with CONFIG_NET_CLS=n.

Fixes: 4861738775d7 ("net: sched: introduce shared filter blocks infrastructure")
Signed-off-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/bpf/main.c       | 3 +++
 drivers/net/ethernet/netronome/nfp/flower/offload.c | 3 +++
 include/net/pkt_cls.h                               | 5 +++++
 3 files changed, 11 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index fcdfb8e7fdea..6b15e3b11956 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -202,6 +202,9 @@ static int nfp_bpf_setup_tc_block(struct net_device *netdev,
 	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
 		return -EOPNOTSUPP;
 
+	if (tcf_block_shared(f->block))
+		return -EOPNOTSUPP;
+
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 477f584f6d28..525057bee0ed 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -631,6 +631,9 @@ static int nfp_flower_setup_tc_block(struct net_device *netdev,
 	if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
 		return -EOPNOTSUPP;
 
+	if (tcf_block_shared(f->block))
+		return -EOPNOTSUPP;
+
 	switch (f->command) {
 	case TC_BLOCK_BIND:
 		return tcf_block_cb_register(f->block,
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a3c1a2c47cd4..20b059574e60 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -111,6 +111,11 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
 {
 }
 
+static inline bool tcf_block_shared(struct tcf_block *block)
+{
+	return false;
+}
+
 static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
 {
 	return NULL;

From 2d2595719a97c876f35b1e60e5768e58753b268c Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@gmx.com>
Date: Tue, 26 Jun 2018 09:16:31 +0800
Subject: [PATCH 077/382] nfp: cast sizeof() to int when comparing with error
 code

sizeof() will return unsigned value so in the error check
negative error code will be always larger than sizeof().

Fixes: a0d8e02c35ff ("nfp: add support for reading nffw info")

Signed-off-by: Chengguang Xu <cgxu519@gmx.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c
index cd34097b79f1..37a6d7822a38 100644
--- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c
+++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c
@@ -232,7 +232,7 @@ struct nfp_nffw_info *nfp_nffw_info_open(struct nfp_cpp *cpp)
 	err = nfp_cpp_read(cpp, nfp_resource_cpp_id(state->res),
 			   nfp_resource_address(state->res),
 			   fwinf, sizeof(*fwinf));
-	if (err < sizeof(*fwinf))
+	if (err < (int)sizeof(*fwinf))
 		goto err_release;
 
 	if (!nffw_res_flg_init_get(fwinf))

From 45a0faaba9c8c5ba1e31a08a391aed0bad327167 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Wed, 27 Jun 2018 10:07:45 -0300
Subject: [PATCH 078/382] drm/etnaviv: Check for
 platform_device_register_simple() failure

platform_device_register_simple() may fail, so we should better
check its return value and propagate it in the case of error.

Cc: <stable@vger.kernel.org>
Fixes: 246774d17fc0 ("drm/etnaviv: remove the need for a gpu-subsystem DT node")
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_drv.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index e5013a999147..19abd3ca6618 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -631,8 +631,11 @@ static struct platform_driver etnaviv_platform_driver = {
 	},
 };
 
+static struct platform_device *etnaviv_drm;
+
 static int __init etnaviv_init(void)
 {
+	struct platform_device *pdev;
 	int ret;
 	struct device_node *np;
 
@@ -644,7 +647,7 @@ static int __init etnaviv_init(void)
 
 	ret = platform_driver_register(&etnaviv_platform_driver);
 	if (ret != 0)
-		platform_driver_unregister(&etnaviv_gpu_driver);
+		goto unregister_gpu_driver;
 
 	/*
 	 * If the DT contains at least one available GPU device, instantiate
@@ -653,12 +656,24 @@ static int __init etnaviv_init(void)
 	for_each_compatible_node(np, NULL, "vivante,gc") {
 		if (!of_device_is_available(np))
 			continue;
-
-		platform_device_register_simple("etnaviv", -1, NULL, 0);
+		pdev = platform_device_register_simple("etnaviv", -1,
+						       NULL, 0);
+		if (IS_ERR(pdev)) {
+			ret = PTR_ERR(pdev);
+			of_node_put(np);
+			goto unregister_platform_driver;
+		}
+		etnaviv_drm = pdev;
 		of_node_put(np);
 		break;
 	}
 
+	return 0;
+
+unregister_platform_driver:
+	platform_driver_unregister(&etnaviv_platform_driver);
+unregister_gpu_driver:
+	platform_driver_unregister(&etnaviv_gpu_driver);
 	return ret;
 }
 module_init(etnaviv_init);

From bf6ba3aeb2962e5ee4a78e7535af579ecba630bb Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Wed, 27 Jun 2018 10:07:46 -0300
Subject: [PATCH 079/382] drm/etnaviv: Fix driver unregistering

Russell King reported:

"When removing and reloading the etnaviv module, the following splat
occurs:

sysfs: cannot create duplicate filename '/devices/platform/etnaviv'
CPU: 0 PID: 1471 Comm: modprobe Not tainted 4.17.0+ #1608
Hardware name: Marvell Dove (Cubox)
Backtrace:
[<c00157d4>] (dump_backtrace) from [<c0015b8c>] (show_stack+0x18/0x1c)
 r6:ef033e38 r5:ee07b340 r4:edb9d000 r3:00000000
[<c0015b74>] (show_stack) from [<c0620784>] (dump_stack+0x20/0x28)
[<c0620764>] (dump_stack) from [<c01bcd24>] (sysfs_warn_dup+0x5c/0x70)
[<c01bccc8>] (sysfs_warn_dup) from [<c01bce14>] (sysfs_create_dir_ns+0x90/0x98)
..."

Commit 246774d17fc0 ("drm/etnaviv: remove the need for a gpu-subsystem
DT node") introduced DRM registration via
platform_device_register_simple(), but missed to call
platform_device_unregister() inside etnaviv_exit().

Fix the problem by calling platform_device_unregister() inside
etnaviv_exit(). While at it, also rearrange the function calls
in the exit path to make them happen in the opposite order of
registration.

Tested on a imx6-sabresd board.

Cc: <stable@vger.kernel.org>
Fixes: 246774d17fc0 ("drm/etnaviv: remove the need for a gpu-subsystem DT node")
Reported-by: Russell King <linux@armlinux.org.uk>
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 19abd3ca6618..540b59fb4103 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -680,8 +680,9 @@ module_init(etnaviv_init);
 
 static void __exit etnaviv_exit(void)
 {
-	platform_driver_unregister(&etnaviv_gpu_driver);
+	platform_device_unregister(etnaviv_drm);
 	platform_driver_unregister(&etnaviv_platform_driver);
+	platform_driver_unregister(&etnaviv_gpu_driver);
 }
 module_exit(etnaviv_exit);
 

From 82be2ab159a3a0ae4024b946a31f12b221f6c8ff Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 27 Jun 2018 17:47:37 +0800
Subject: [PATCH 080/382] hinic: reset irq affinity before freeing irq

Following warning is seen when rmmod hinic. This is because affinity
value is not reset before calling free_irq(). This patch fixes it.

[   55.181232] WARNING: CPU: 38 PID: 19589 at kernel/irq/manage.c:1608
__free_irq+0x2aa/0x2c0

Fixes: 352f58b0d9f2 ("net-next/hinic: Set Rxq irq to specific cpu for NUMA")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_rx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
index e2e5cdc7119c..4c0f7eda1166 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
@@ -439,6 +439,7 @@ static void rx_free_irq(struct hinic_rxq *rxq)
 {
 	struct hinic_rq *rq = rxq->rq;
 
+	irq_set_affinity_hint(rq->irq, NULL);
 	free_irq(rq->irq, rxq);
 	rx_del_napi(rxq);
 }

From 88e85a7daf8e21f2d6cb054374d480c540725cde Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 26 Jun 2018 12:55:35 +0900
Subject: [PATCH 081/382] bpfilter: check compiler capability in Kconfig

With the brand-new syntax extension of Kconfig, we can directly
check the compiler capability in the configuration phase.

If the cc-can-link.sh fails, the BPFILTER_UMH is automatically
hidden by the dependency.

I also deleted 'default n', which is no-op.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Makefile               | 5 -----
 net/Makefile           | 4 ----
 net/bpfilter/Kconfig   | 2 +-
 scripts/cc-can-link.sh | 2 +-
 4 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/Makefile b/Makefile
index c9132594860b..e1bd4c3627bc 100644
--- a/Makefile
+++ b/Makefile
@@ -507,11 +507,6 @@ ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLA
   KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
 endif
 
-ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/cc-can-link.sh $(CC)), y)
-  CC_CAN_LINK := y
-  export CC_CAN_LINK
-endif
-
 # The expansion should be delayed until arch/$(SRCARCH)/Makefile is included.
 # Some architectures define CROSS_COMPILE in arch/$(SRCARCH)/Makefile.
 # CC_VERSION_TEXT is referenced from Kconfig (so it needs export),
diff --git a/net/Makefile b/net/Makefile
index 13ec0d5415c7..bdaf53925acd 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -20,11 +20,7 @@ obj-$(CONFIG_TLS)		+= tls/
 obj-$(CONFIG_XFRM)		+= xfrm/
 obj-$(CONFIG_UNIX)		+= unix/
 obj-$(CONFIG_NET)		+= ipv6/
-ifneq ($(CC_CAN_LINK),y)
-$(warning CC cannot link executables. Skipping bpfilter.)
-else
 obj-$(CONFIG_BPFILTER)		+= bpfilter/
-endif
 obj-$(CONFIG_PACKET)		+= packet/
 obj-$(CONFIG_NET_KEY)		+= key/
 obj-$(CONFIG_BRIDGE)		+= bridge/
diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig
index a948b072c28f..76deb6615883 100644
--- a/net/bpfilter/Kconfig
+++ b/net/bpfilter/Kconfig
@@ -1,6 +1,5 @@
 menuconfig BPFILTER
 	bool "BPF based packet filtering framework (BPFILTER)"
-	default n
 	depends on NET && BPF && INET
 	help
 	  This builds experimental bpfilter framework that is aiming to
@@ -9,6 +8,7 @@ menuconfig BPFILTER
 if BPFILTER
 config BPFILTER_UMH
 	tristate "bpfilter kernel module with user mode helper"
+	depends on $(success,$(srctree)/scripts/cc-can-link.sh $(CC))
 	default m
 	help
 	  This builds bpfilter kernel module with embedded user mode helper
diff --git a/scripts/cc-can-link.sh b/scripts/cc-can-link.sh
index 208eb2825dab..6efcead31989 100755
--- a/scripts/cc-can-link.sh
+++ b/scripts/cc-can-link.sh
@@ -1,7 +1,7 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
-cat << "END" | $@ -x c - -o /dev/null >/dev/null 2>&1 && echo "y"
+cat << "END" | $@ -x c - -o /dev/null >/dev/null 2>&1
 #include <stdio.h>
 int main(void)
 {

From fec9d3b1dc4c481f20f5d2f5aef3ad1cb7504186 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Tue, 26 Jun 2018 10:44:01 +0200
Subject: [PATCH 082/382] net: macb: initialize bp->queues[0].bp for at91rm9200

The macb driver currently crashes on at91rm9200 with the following trace:

Unable to handle kernel NULL pointer dereference at virtual address 00000014
[...]
[<c031da44>] (macb_rx_desc) from [<c031f2bc>] (at91ether_open+0x2e8/0x3f8)
[<c031f2bc>] (at91ether_open) from [<c041e8d8>] (__dev_open+0x120/0x13c)
[<c041e8d8>] (__dev_open) from [<c041ec08>] (__dev_change_flags+0x17c/0x1a8)
[<c041ec08>] (__dev_change_flags) from [<c041ec4c>] (dev_change_flags+0x18/0x4c)
[<c041ec4c>] (dev_change_flags) from [<c07a5f4c>] (ip_auto_config+0x220/0x10b0)
[<c07a5f4c>] (ip_auto_config) from [<c000a4fc>] (do_one_initcall+0x78/0x18c)
[<c000a4fc>] (do_one_initcall) from [<c0783e50>] (kernel_init_freeable+0x184/0x1c4)
[<c0783e50>] (kernel_init_freeable) from [<c0574d70>] (kernel_init+0x8/0xe8)
[<c0574d70>] (kernel_init) from [<c00090e0>] (ret_from_fork+0x14/0x34)

Solve that by initializing bp->queues[0].bp in at91ether_init (as is done
in macb_init).

Fixes: ae1f2a56d273 ("net: macb: Added support for many RX queues")
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 3e93df5d4e3b..96cc03a6d942 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -3726,6 +3726,8 @@ static int at91ether_init(struct platform_device *pdev)
 	int err;
 	u32 reg;
 
+	bp->queues[0].bp = bp;
+
 	dev->netdev_ops = &at91ether_netdev_ops;
 	dev->ethtool_ops = &macb_ethtool_ops;
 

From ad088ec480768850db019a5cc543685e868a513d Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Tue, 26 Jun 2018 17:39:48 +0200
Subject: [PATCH 083/382] ixgbe: split XDP_TX tail and XDP_REDIRECT map
 flushing

The driver was combining the XDP_TX tail flush and XDP_REDIRECT
map flushing (xdp_do_flush_map).  This is suboptimal, these two
flush operations should be kept separate.

Fixes: 11393cc9b9be ("xdp: Add batching support to redirect map")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 24 +++++++++++--------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 3e87dbbc9024..62e57b05a0ae 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2186,9 +2186,10 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
 	return skb;
 }
 
-#define IXGBE_XDP_PASS 0
-#define IXGBE_XDP_CONSUMED 1
-#define IXGBE_XDP_TX 2
+#define IXGBE_XDP_PASS		0
+#define IXGBE_XDP_CONSUMED	BIT(0)
+#define IXGBE_XDP_TX		BIT(1)
+#define IXGBE_XDP_REDIR		BIT(2)
 
 static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
 			       struct xdp_frame *xdpf);
@@ -2225,7 +2226,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
 	case XDP_REDIRECT:
 		err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
 		if (!err)
-			result = IXGBE_XDP_TX;
+			result = IXGBE_XDP_REDIR;
 		else
 			result = IXGBE_XDP_CONSUMED;
 		break;
@@ -2285,7 +2286,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 	unsigned int mss = 0;
 #endif /* IXGBE_FCOE */
 	u16 cleaned_count = ixgbe_desc_unused(rx_ring);
-	bool xdp_xmit = false;
+	unsigned int xdp_xmit = 0;
 	struct xdp_buff xdp;
 
 	xdp.rxq = &rx_ring->xdp_rxq;
@@ -2328,8 +2329,10 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 		}
 
 		if (IS_ERR(skb)) {
-			if (PTR_ERR(skb) == -IXGBE_XDP_TX) {
-				xdp_xmit = true;
+			unsigned int xdp_res = -PTR_ERR(skb);
+
+			if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) {
+				xdp_xmit |= xdp_res;
 				ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size);
 			} else {
 				rx_buffer->pagecnt_bias++;
@@ -2401,7 +2404,10 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 		total_rx_packets++;
 	}
 
-	if (xdp_xmit) {
+	if (xdp_xmit & IXGBE_XDP_REDIR)
+		xdp_do_flush_map();
+
+	if (xdp_xmit & IXGBE_XDP_TX) {
 		struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
 
 		/* Force memory writes to complete before letting h/w
@@ -2409,8 +2415,6 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 		 */
 		wmb();
 		writel(ring->next_to_use, ring->tail);
-
-		xdp_do_flush_map();
 	}
 
 	u64_stats_update_begin(&rx_ring->syncp);

From 2e6893123830d04edc366e0ce59d46e622e140c1 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Tue, 26 Jun 2018 17:39:53 +0200
Subject: [PATCH 084/382] i40e: split XDP_TX tail and XDP_REDIRECT map flushing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver was combining the XDP_TX tail flush and XDP_REDIRECT
map flushing (xdp_do_flush_map).  This is suboptimal, these two
flush operations should be kept separate.

It looks like the mistake was copy-pasted from ixgbe.

Fixes: d9314c474d4f ("i40e: add support for XDP_REDIRECT")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 24 +++++++++++++--------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index ed6dbcfd4e96..b151ae316546 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2199,9 +2199,10 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
 	return true;
 }
 
-#define I40E_XDP_PASS 0
-#define I40E_XDP_CONSUMED 1
-#define I40E_XDP_TX 2
+#define I40E_XDP_PASS		0
+#define I40E_XDP_CONSUMED	BIT(0)
+#define I40E_XDP_TX		BIT(1)
+#define I40E_XDP_REDIR		BIT(2)
 
 static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
 			      struct i40e_ring *xdp_ring);
@@ -2248,7 +2249,7 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
 		break;
 	case XDP_REDIRECT:
 		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-		result = !err ? I40E_XDP_TX : I40E_XDP_CONSUMED;
+		result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
 		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
@@ -2311,7 +2312,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
 	struct sk_buff *skb = rx_ring->skb;
 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
-	bool failure = false, xdp_xmit = false;
+	unsigned int xdp_xmit = 0;
+	bool failure = false;
 	struct xdp_buff xdp;
 
 	xdp.rxq = &rx_ring->xdp_rxq;
@@ -2372,8 +2374,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 		}
 
 		if (IS_ERR(skb)) {
-			if (PTR_ERR(skb) == -I40E_XDP_TX) {
-				xdp_xmit = true;
+			unsigned int xdp_res = -PTR_ERR(skb);
+
+			if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) {
+				xdp_xmit |= xdp_res;
 				i40e_rx_buffer_flip(rx_ring, rx_buffer, size);
 			} else {
 				rx_buffer->pagecnt_bias++;
@@ -2427,12 +2431,14 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 		total_rx_packets++;
 	}
 
-	if (xdp_xmit) {
+	if (xdp_xmit & I40E_XDP_REDIR)
+		xdp_do_flush_map();
+
+	if (xdp_xmit & I40E_XDP_TX) {
 		struct i40e_ring *xdp_ring =
 			rx_ring->vsi->xdp_rings[rx_ring->queue_index];
 
 		i40e_xdp_ring_update_tail(xdp_ring);
-		xdp_do_flush_map();
 	}
 
 	rx_ring->skb = skb;

From 2471c75efed32529698c26da499954f0253cb401 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Tue, 26 Jun 2018 17:39:58 +0200
Subject: [PATCH 085/382] virtio_net: split XDP_TX kick and XDP_REDIRECT map
 flushing

The driver was combining XDP_TX virtqueue_kick and XDP_REDIRECT
map flushing (xdp_do_flush_map).  This is suboptimal, these two
flush operations should be kept separate.

The suboptimal behavior was introduced in commit 9267c430c6b6
("virtio-net: add missing virtqueue kick when flushing packets").

Fixes: 9267c430c6b6 ("virtio-net: add missing virtqueue kick when flushing packets")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b6c9a2af3732..53085c63277b 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -53,6 +53,10 @@ module_param(napi_tx, bool, 0644);
 /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */
 #define VIRTIO_XDP_HEADROOM 256
 
+/* Separating two types of XDP xmit */
+#define VIRTIO_XDP_TX		BIT(0)
+#define VIRTIO_XDP_REDIR	BIT(1)
+
 /* RX packet size EWMA. The average packet size is used to determine the packet
  * buffer size when refilling RX rings. As the entire RX ring may be refilled
  * at once, the weight is chosen so that the EWMA will be insensitive to short-
@@ -582,7 +586,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
 				     struct receive_queue *rq,
 				     void *buf, void *ctx,
 				     unsigned int len,
-				     bool *xdp_xmit)
+				     unsigned int *xdp_xmit)
 {
 	struct sk_buff *skb;
 	struct bpf_prog *xdp_prog;
@@ -654,14 +658,14 @@ static struct sk_buff *receive_small(struct net_device *dev,
 				trace_xdp_exception(vi->dev, xdp_prog, act);
 				goto err_xdp;
 			}
-			*xdp_xmit = true;
+			*xdp_xmit |= VIRTIO_XDP_TX;
 			rcu_read_unlock();
 			goto xdp_xmit;
 		case XDP_REDIRECT:
 			err = xdp_do_redirect(dev, &xdp, xdp_prog);
 			if (err)
 				goto err_xdp;
-			*xdp_xmit = true;
+			*xdp_xmit |= VIRTIO_XDP_REDIR;
 			rcu_read_unlock();
 			goto xdp_xmit;
 		default:
@@ -723,7 +727,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 					 void *buf,
 					 void *ctx,
 					 unsigned int len,
-					 bool *xdp_xmit)
+					 unsigned int *xdp_xmit)
 {
 	struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
 	u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
@@ -818,7 +822,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 					put_page(xdp_page);
 				goto err_xdp;
 			}
-			*xdp_xmit = true;
+			*xdp_xmit |= VIRTIO_XDP_TX;
 			if (unlikely(xdp_page != page))
 				put_page(page);
 			rcu_read_unlock();
@@ -830,7 +834,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 					put_page(xdp_page);
 				goto err_xdp;
 			}
-			*xdp_xmit = true;
+			*xdp_xmit |= VIRTIO_XDP_REDIR;
 			if (unlikely(xdp_page != page))
 				put_page(page);
 			rcu_read_unlock();
@@ -939,7 +943,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 }
 
 static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
-		       void *buf, unsigned int len, void **ctx, bool *xdp_xmit)
+		       void *buf, unsigned int len, void **ctx,
+		       unsigned int *xdp_xmit)
 {
 	struct net_device *dev = vi->dev;
 	struct sk_buff *skb;
@@ -1232,7 +1237,8 @@ static void refill_work(struct work_struct *work)
 	}
 }
 
-static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit)
+static int virtnet_receive(struct receive_queue *rq, int budget,
+			   unsigned int *xdp_xmit)
 {
 	struct virtnet_info *vi = rq->vq->vdev->priv;
 	unsigned int len, received = 0, bytes = 0;
@@ -1321,7 +1327,7 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
 	struct virtnet_info *vi = rq->vq->vdev->priv;
 	struct send_queue *sq;
 	unsigned int received, qp;
-	bool xdp_xmit = false;
+	unsigned int xdp_xmit = 0;
 
 	virtnet_poll_cleantx(rq);
 
@@ -1331,12 +1337,14 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
 	if (received < budget)
 		virtqueue_napi_complete(napi, rq->vq, received);
 
-	if (xdp_xmit) {
+	if (xdp_xmit & VIRTIO_XDP_REDIR)
+		xdp_do_flush_map();
+
+	if (xdp_xmit & VIRTIO_XDP_TX) {
 		qp = vi->curr_queue_pairs - vi->xdp_queue_pairs +
 		     smp_processor_id();
 		sq = &vi->sq[qp];
 		virtqueue_kick(sq->vq);
-		xdp_do_flush_map();
 	}
 
 	return received;

From e3f329c600033f011a978a8bc4ddb1e2e94c4f4d Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Tue, 19 Jun 2018 15:20:50 +0200
Subject: [PATCH 086/382] dmaengine: pl330: report BURST residue granularity

The reported residue is already calculated in BURST unit granularity, so
advertise this capability properly to other devices in the system.

Fixes: aee4d1fac887 ("dmaengine: pl330: improve pl330_tx_status() function")
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/pl330.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index defcdde4d358..de0957fe9668 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -3033,7 +3033,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	pd->src_addr_widths = PL330_DMA_BUSWIDTHS;
 	pd->dst_addr_widths = PL330_DMA_BUSWIDTHS;
 	pd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
-	pd->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+	pd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
 	pd->max_burst = ((pl330->quirks & PL330_QUIRK_BROKEN_NO_FLUSHP) ?
 			 1 : PL330_MAX_BURST);
 

From c4c2b7644cc9a41f17a8cc8904efe3f66ae4c7ed Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 22 Jun 2018 14:15:47 +0300
Subject: [PATCH 087/382] dmaengine: k3dma: Off by one in
 k3_of_dma_simple_xlate()

The d->chans[] array has d->dma_requests elements so the > should be
>= here.

Fixes: 8e6152bc660e ("dmaengine: Add hisilicon k3 DMA engine driver")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/k3dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c
index fa31cccbe04f..6bfa217ed6d0 100644
--- a/drivers/dma/k3dma.c
+++ b/drivers/dma/k3dma.c
@@ -794,7 +794,7 @@ static struct dma_chan *k3_of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 	struct k3_dma_dev *d = ofdma->of_dma_data;
 	unsigned int request = dma_spec->args[0];
 
-	if (request > d->dma_requests)
+	if (request >= d->dma_requests)
 		return NULL;
 
 	return dma_get_slave_channel(&(d->chans[request].vc.chan));

From 933e671f8cff87552d4d26cf3874633b11ae78ba Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuah@kernel.org>
Date: Tue, 26 Jun 2018 12:22:28 -0600
Subject: [PATCH 088/382] selftests/net: Fix permissions for fib_tests.sh
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

fib_tests.sh became non-executable at some point. This is
what happens:
  selftests: net: fib_tests.sh: Warning: file fib_tests.sh is
  not executable, correct this.
  not ok 1..11 selftests: net: fib_tests.sh [FAIL]

Fixes: d69faad76584 ("selftests: fib_tests: Add prefix route tests with metric")

Signed-off-by: Daniel Díaz <daniel.diaz@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_tests.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 tools/testing/selftests/net/fib_tests.sh

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
old mode 100644
new mode 100755

From 977c7114ebda2e746a114840d3a875e0cdb826fb Mon Sep 17 00:00:00 2001
From: Doron Roberts-Kedes <doronrk@fb.com>
Date: Tue, 26 Jun 2018 18:33:33 -0700
Subject: [PATCH 089/382] strparser: Remove early eaten to fix full tcp receive
 buffer stall

On receving an incomplete message, the existing code stores the
remaining length of the cloned skb in the early_eaten field instead of
incrementing the value returned by __strp_recv. This defers invocation
of sock_rfree for the current skb until the next invocation of
__strp_recv, which returns early_eaten if early_eaten is non-zero.

This behavior causes a stall when the current message occupies the very
tail end of a massive skb, and strp_peek/need_bytes indicates that the
remainder of the current message has yet to arrive on the socket. The
TCP receive buffer is totally full, causing the TCP window to go to
zero, so the remainder of the message will never arrive.

Incrementing the value returned by __strp_recv by the amount otherwise
stored in early_eaten prevents stalls of this nature.

Signed-off-by: Doron Roberts-Kedes <doronrk@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/strparser/strparser.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 373836615c57..625acb27efcc 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -35,7 +35,6 @@ struct _strp_msg {
 	 */
 	struct strp_msg strp;
 	int accum_len;
-	int early_eaten;
 };
 
 static inline struct _strp_msg *_strp_msg(struct sk_buff *skb)
@@ -115,20 +114,6 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 	head = strp->skb_head;
 	if (head) {
 		/* Message already in progress */
-
-		stm = _strp_msg(head);
-		if (unlikely(stm->early_eaten)) {
-			/* Already some number of bytes on the receive sock
-			 * data saved in skb_head, just indicate they
-			 * are consumed.
-			 */
-			eaten = orig_len <= stm->early_eaten ?
-				orig_len : stm->early_eaten;
-			stm->early_eaten -= eaten;
-
-			return eaten;
-		}
-
 		if (unlikely(orig_offset)) {
 			/* Getting data with a non-zero offset when a message is
 			 * in progress is not expected. If it does happen, we
@@ -297,9 +282,9 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
 				}
 
 				stm->accum_len += cand_len;
+				eaten += cand_len;
 				strp->need_bytes = stm->strp.full_len -
 						       stm->accum_len;
-				stm->early_eaten = cand_len;
 				STRP_STATS_ADD(strp->stats.bytes, cand_len);
 				desc->count = 0; /* Stop reading socket */
 				break;

From 8e75887d321d102200abf3a9fa621e2c10ff4cc5 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 26 Jun 2018 20:13:48 -0700
Subject: [PATCH 090/382] bpfilter: include bpfilter_umh in assembly instead of
 using objcopy

What we want here is to embed a user-space program into the kernel.
Instead of the complex ELF magic, let's simply wrap it in the assembly
with the '.incbin' directive.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bpfilter/Makefile            | 17 ++---------------
 net/bpfilter/bpfilter_kern.c     | 11 +++++------
 net/bpfilter/bpfilter_umh_blob.S |  7 +++++++
 3 files changed, 14 insertions(+), 21 deletions(-)
 create mode 100644 net/bpfilter/bpfilter_umh_blob.S

diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
index 051dc18b8ccb..39c6980b5d99 100644
--- a/net/bpfilter/Makefile
+++ b/net/bpfilter/Makefile
@@ -15,20 +15,7 @@ ifeq ($(CONFIG_BPFILTER_UMH), y)
 HOSTLDFLAGS += -static
 endif
 
-# a bit of elf magic to convert bpfilter_umh binary into a binary blob
-# inside bpfilter_umh.o elf file referenced by
-# _binary_net_bpfilter_bpfilter_umh_start symbol
-# which bpfilter_kern.c passes further into umh blob loader at run-time
-quiet_cmd_copy_umh = GEN $@
-      cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \
-      $(OBJCOPY) -I binary \
-          `LC_ALL=C $(OBJDUMP) -f net/bpfilter/bpfilter_umh \
-          |awk -F' |,' '/file format/{print "-O",$$NF} \
-          /^architecture:/{print "-B",$$2}'` \
-      --rename-section .data=.init.rodata $< $@
-
-$(obj)/bpfilter_umh.o: $(obj)/bpfilter_umh
-	$(call cmd,copy_umh)
+$(obj)/bpfilter_umh_blob.o: $(obj)/bpfilter_umh
 
 obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o
-bpfilter-objs += bpfilter_kern.o bpfilter_umh.o
+bpfilter-objs += bpfilter_kern.o bpfilter_umh_blob.o
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index 09522573f611..f0fc182d3db7 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -10,11 +10,8 @@
 #include <linux/file.h>
 #include "msgfmt.h"
 
-#define UMH_start _binary_net_bpfilter_bpfilter_umh_start
-#define UMH_end _binary_net_bpfilter_bpfilter_umh_end
-
-extern char UMH_start;
-extern char UMH_end;
+extern char bpfilter_umh_start;
+extern char bpfilter_umh_end;
 
 static struct umh_info info;
 /* since ip_getsockopt() can run in parallel, serialize access to umh */
@@ -93,7 +90,9 @@ static int __init load_umh(void)
 	int err;
 
 	/* fork usermode process */
-	err = fork_usermode_blob(&UMH_start, &UMH_end - &UMH_start, &info);
+	err = fork_usermode_blob(&bpfilter_umh_start,
+				 &bpfilter_umh_end - &bpfilter_umh_start,
+				 &info);
 	if (err)
 		return err;
 	pr_info("Loaded bpfilter_umh pid %d\n", info.pid);
diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S
new file mode 100644
index 000000000000..40311d10d2f2
--- /dev/null
+++ b/net/bpfilter/bpfilter_umh_blob.S
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+	.section .init.rodata, "a"
+	.global bpfilter_umh_start
+bpfilter_umh_start:
+	.incbin "net/bpfilter/bpfilter_umh"
+	.global bpfilter_umh_end
+bpfilter_umh_end:

From 15ecbe94a45ef88491ca459b26efdd02f91edb6d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 27 Jun 2018 08:47:21 -0700
Subject: [PATCH 091/382] tcp: add one more quick ack after after ECN events

Larry Brakmo proposal ( https://patchwork.ozlabs.org/patch/935233/
tcp: force cwnd at least 2 in tcp_cwnd_reduction) made us rethink
about our recent patch removing ~16 quick acks after ECN events.

tcp_enter_quickack_mode(sk, 1) makes sure one immediate ack is sent,
but in the case the sender cwnd was lowered to 1, we do not want
to have a delayed ack for the next packet we will receive.

Fixes: 522040ea5fdd ("tcp: do not aggressively quick ack after ECN events")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Neal Cardwell <ncardwell@google.com>
Cc: Lawrence Brakmo <brakmo@fb.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 355d3dffd021..045d930d01a9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -265,7 +265,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
 		 * it is probably a retransmit.
 		 */
 		if (tp->ecn_flags & TCP_ECN_SEEN)
-			tcp_enter_quickack_mode(sk, 1);
+			tcp_enter_quickack_mode(sk, 2);
 		break;
 	case INET_ECN_CE:
 		if (tcp_ca_needs_ecn(sk))
@@ -273,7 +273,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
 
 		if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
 			/* Better not delay acks, sender can have a very low cwnd */
-			tcp_enter_quickack_mode(sk, 1);
+			tcp_enter_quickack_mode(sk, 2);
 			tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
 		}
 		tp->ecn_flags |= TCP_ECN_SEEN;

From 24ac3a08e65845a098140ff270229dec4a897404 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.ibm.com>
Date: Wed, 27 Jun 2018 17:59:50 +0200
Subject: [PATCH 092/382] net/smc: rebuild nonblocking connect

The recent poll change may lead to stalls for non-blocking connecting
SMC sockets, since sock_poll_wait is no longer performed on the
internal CLC socket, but on the outer SMC socket.  kernel_connect() on
the internal CLC socket returns with -EINPROGRESS, but the wake up
logic does not work in all cases. If the internal CLC socket is still
in state TCP_SYN_SENT when polled, sock_poll_wait() from sock_poll()
does not sleep. It is supposed to sleep till the state of the internal
CLC socket switches to TCP_ESTABLISHED.

This problem triggered a redesign of the SMC nonblocking connect logic.
This patch introduces a connect worker covering all connect steps
followed by a wake up of socket waiters. It allows to get rid of all
delays and locks in smc_poll().

Fixes: c0129a061442 ("smc: convert to ->poll_mask")
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c | 91 +++++++++++++++++++++++++++++++++---------------
 net/smc/smc.h    |  8 +++++
 2 files changed, 71 insertions(+), 28 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index da7f02edcd37..4d1d4ddb2f49 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -45,6 +45,7 @@ static DEFINE_MUTEX(smc_create_lgr_pending);	/* serialize link group
 						 */
 
 static void smc_tcp_listen_work(struct work_struct *);
+static void smc_connect_work(struct work_struct *);
 
 static void smc_set_keepalive(struct sock *sk, int val)
 {
@@ -122,6 +123,12 @@ static int smc_release(struct socket *sock)
 		goto out;
 
 	smc = smc_sk(sk);
+
+	/* cleanup for a dangling non-blocking connect */
+	flush_work(&smc->connect_work);
+	kfree(smc->connect_info);
+	smc->connect_info = NULL;
+
 	if (sk->sk_state == SMC_LISTEN)
 		/* smc_close_non_accepted() is called and acquires
 		 * sock lock for child sockets again
@@ -186,6 +193,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
 	sk->sk_protocol = protocol;
 	smc = smc_sk(sk);
 	INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
+	INIT_WORK(&smc->connect_work, smc_connect_work);
 	INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
 	INIT_LIST_HEAD(&smc->accept_q);
 	spin_lock_init(&smc->accept_q_lock);
@@ -576,6 +584,35 @@ static int __smc_connect(struct smc_sock *smc)
 	return 0;
 }
 
+static void smc_connect_work(struct work_struct *work)
+{
+	struct smc_sock *smc = container_of(work, struct smc_sock,
+					    connect_work);
+	int rc;
+
+	lock_sock(&smc->sk);
+	rc = kernel_connect(smc->clcsock, &smc->connect_info->addr,
+			    smc->connect_info->alen, smc->connect_info->flags);
+	if (smc->clcsock->sk->sk_err) {
+		smc->sk.sk_err = smc->clcsock->sk->sk_err;
+		goto out;
+	}
+	if (rc < 0) {
+		smc->sk.sk_err = -rc;
+		goto out;
+	}
+
+	rc = __smc_connect(smc);
+	if (rc < 0)
+		smc->sk.sk_err = -rc;
+
+out:
+	smc->sk.sk_state_change(&smc->sk);
+	kfree(smc->connect_info);
+	smc->connect_info = NULL;
+	release_sock(&smc->sk);
+}
+
 static int smc_connect(struct socket *sock, struct sockaddr *addr,
 		       int alen, int flags)
 {
@@ -605,15 +642,32 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
 
 	smc_copy_sock_settings_to_clc(smc);
 	tcp_sk(smc->clcsock->sk)->syn_smc = 1;
-	rc = kernel_connect(smc->clcsock, addr, alen, flags);
-	if (rc)
-		goto out;
+	if (flags & O_NONBLOCK) {
+		if (smc->connect_info) {
+			rc = -EALREADY;
+			goto out;
+		}
+		smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL);
+		if (!smc->connect_info) {
+			rc = -ENOMEM;
+			goto out;
+		}
+		smc->connect_info->alen = alen;
+		smc->connect_info->flags = flags ^ O_NONBLOCK;
+		memcpy(&smc->connect_info->addr, addr, alen);
+		schedule_work(&smc->connect_work);
+		rc = -EINPROGRESS;
+	} else {
+		rc = kernel_connect(smc->clcsock, addr, alen, flags);
+		if (rc)
+			goto out;
 
-	rc = __smc_connect(smc);
-	if (rc < 0)
-		goto out;
-	else
-		rc = 0; /* success cases including fallback */
+		rc = __smc_connect(smc);
+		if (rc < 0)
+			goto out;
+		else
+			rc = 0; /* success cases including fallback */
+	}
 
 out:
 	release_sock(sk);
@@ -1278,34 +1332,17 @@ static __poll_t smc_poll_mask(struct socket *sock, __poll_t events)
 	struct sock *sk = sock->sk;
 	__poll_t mask = 0;
 	struct smc_sock *smc;
-	int rc;
 
 	if (!sk)
 		return EPOLLNVAL;
 
 	smc = smc_sk(sock->sk);
-	sock_hold(sk);
-	lock_sock(sk);
 	if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
 		/* delegate to CLC child sock */
-		release_sock(sk);
 		mask = smc->clcsock->ops->poll_mask(smc->clcsock, events);
-		lock_sock(sk);
 		sk->sk_err = smc->clcsock->sk->sk_err;
-		if (sk->sk_err) {
+		if (sk->sk_err)
 			mask |= EPOLLERR;
-		} else {
-			/* if non-blocking connect finished ... */
-			if (sk->sk_state == SMC_INIT &&
-			    mask & EPOLLOUT &&
-			    smc->clcsock->sk->sk_state != TCP_CLOSE) {
-				rc = __smc_connect(smc);
-				if (rc < 0)
-					mask |= EPOLLERR;
-				/* success cases including fallback */
-				mask |= EPOLLOUT | EPOLLWRNORM;
-			}
-		}
 	} else {
 		if (sk->sk_err)
 			mask |= EPOLLERR;
@@ -1334,8 +1371,6 @@ static __poll_t smc_poll_mask(struct socket *sock, __poll_t events)
 			mask |= EPOLLPRI;
 
 	}
-	release_sock(sk);
-	sock_put(sk);
 
 	return mask;
 }
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 51ae1f10d81a..d7ca26570482 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -187,11 +187,19 @@ struct smc_connection {
 	struct work_struct	close_work;	/* peer sent some closing */
 };
 
+struct smc_connect_info {
+	int			flags;
+	int			alen;
+	struct sockaddr		addr;
+};
+
 struct smc_sock {				/* smc sock container */
 	struct sock		sk;
 	struct socket		*clcsock;	/* internal tcp socket */
 	struct smc_connection	conn;		/* smc connection */
 	struct smc_sock		*listen_smc;	/* listen parent */
+	struct smc_connect_info *connect_info;	/* connect address & flags */
+	struct work_struct	connect_work;	/* handle non-blocking connect*/
 	struct work_struct	tcp_listen_work;/* handle tcp socket accepts */
 	struct work_struct	smc_listen_work;/* prepare new accept socket */
 	struct list_head	accept_q;	/* sockets to be accepted */

From 4205c88eaf17b5f3ee30032d68df55cd5d9077a1 Mon Sep 17 00:00:00 2001
From: Jose Abreu <Jose.Abreu@synopsys.com>
Date: Wed, 27 Jun 2018 15:03:20 +0100
Subject: [PATCH 093/382] net: stmmac: Set DMA buffer size in HW

This is clearly a bug.

We need to set the DMA buffer size in the HW otherwise corruption can
occur when receiving packets.

This is probably not occuring because of small MTU values and because HW
has a default value internally (which currently is bigger than default
buffer size).

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: Vitor Soares <soares@synopsys.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c  | 12 ++++++++++++
 drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h  |  2 ++
 drivers/net/ethernet/stmicro/stmmac/hwif.h        |  3 +++
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |  2 ++
 4 files changed, 19 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index d37f17ca62fe..65bc3556bd8f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -407,6 +407,16 @@ static void dwmac4_enable_tso(void __iomem *ioaddr, bool en, u32 chan)
 	}
 }
 
+static void dwmac4_set_bfsize(void __iomem *ioaddr, int bfsize, u32 chan)
+{
+	u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(chan));
+
+	value &= ~DMA_RBSZ_MASK;
+	value |= (bfsize << DMA_RBSZ_SHIFT) & DMA_RBSZ_MASK;
+
+	writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
+}
+
 const struct stmmac_dma_ops dwmac4_dma_ops = {
 	.reset = dwmac4_dma_reset,
 	.init = dwmac4_dma_init,
@@ -431,6 +441,7 @@ const struct stmmac_dma_ops dwmac4_dma_ops = {
 	.set_rx_tail_ptr = dwmac4_set_rx_tail_ptr,
 	.set_tx_tail_ptr = dwmac4_set_tx_tail_ptr,
 	.enable_tso = dwmac4_enable_tso,
+	.set_bfsize = dwmac4_set_bfsize,
 };
 
 const struct stmmac_dma_ops dwmac410_dma_ops = {
@@ -457,4 +468,5 @@ const struct stmmac_dma_ops dwmac410_dma_ops = {
 	.set_rx_tail_ptr = dwmac4_set_rx_tail_ptr,
 	.set_tx_tail_ptr = dwmac4_set_tx_tail_ptr,
 	.enable_tso = dwmac4_enable_tso,
+	.set_bfsize = dwmac4_set_bfsize,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
index c63c1fe3f26b..22a4a6dbb1a4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
@@ -120,6 +120,8 @@
 
 /* DMA Rx Channel X Control register defines */
 #define DMA_CONTROL_SR			BIT(0)
+#define DMA_RBSZ_MASK			GENMASK(14, 1)
+#define DMA_RBSZ_SHIFT			1
 
 /* Interrupt status per channel */
 #define DMA_CHAN_STATUS_REB		GENMASK(21, 19)
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index e44e7b26ce82..fe8b536b13f8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -183,6 +183,7 @@ struct stmmac_dma_ops {
 	void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
 	void (*set_tx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
 	void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan);
+	void (*set_bfsize)(void __iomem *ioaddr, int bfsize, u32 chan);
 };
 
 #define stmmac_reset(__priv, __args...) \
@@ -235,6 +236,8 @@ struct stmmac_dma_ops {
 	stmmac_do_void_callback(__priv, dma, set_tx_tail_ptr, __args)
 #define stmmac_enable_tso(__priv, __args...) \
 	stmmac_do_void_callback(__priv, dma, enable_tso, __args)
+#define stmmac_set_dma_bfsize(__priv, __args...) \
+	stmmac_do_void_callback(__priv, dma, set_bfsize, __args)
 
 struct mac_device_info;
 struct net_device;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index cba46b62a1cd..60f59abab009 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1804,6 +1804,8 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 
 		stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan,
 				rxfifosz, qmode);
+		stmmac_set_dma_bfsize(priv, priv->ioaddr, priv->dma_buf_sz,
+				chan);
 	}
 
 	for (chan = 0; chan < tx_channels_count; chan++) {

From 5a267832c2ec47b2dad0fdb291a96bb5b8869315 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Fri, 22 Jun 2018 10:55:45 -0700
Subject: [PATCH 094/382] MIPS: Call dump_stack() from show_regs()

The generic nmi_cpu_backtrace() function calls show_regs() when a struct
pt_regs is available, and dump_stack() otherwise. If we were to make use
of the generic nmi_cpu_backtrace() with MIPS' current implementation of
show_regs() this would mean that we see only register data with no
accompanying stack information, in contrast with our current
implementation which calls dump_stack() regardless of whether register
state is available.

In preparation for making use of the generic nmi_cpu_backtrace() to
implement arch_trigger_cpumask_backtrace(), have our implementation of
show_regs() call dump_stack() and drop the explicit dump_stack() call in
arch_dump_stack() which is invoked by arch_trigger_cpumask_backtrace().

This will allow the output we produce to remain the same after a later
patch switches to using nmi_cpu_backtrace(). It may mean that we produce
extra stack output in other uses of show_regs(), but this:

  1) Seems harmless.
  2) Is good for consistency between arch_trigger_cpumask_backtrace()
     and other users of show_regs().
  3) Matches the behaviour of the ARM & PowerPC architectures.

Marked for stable back to v4.9 as a prerequisite of the following patch
"MIPS: Call dump_stack() from show_regs()".

Signed-off-by: Paul Burton <paul.burton@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/19596/
Cc: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Huacai Chen <chenhc@lemote.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # v4.9+
---
 arch/mips/kernel/process.c | 4 ++--
 arch/mips/kernel/traps.c   | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 8d85046adcc8..d4cfeb931382 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -663,8 +663,8 @@ static void arch_dump_stack(void *info)
 
 	if (regs)
 		show_regs(regs);
-
-	dump_stack();
+	else
+		dump_stack();
 }
 
 void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index d67fa74622ee..8d505a21396e 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -351,6 +351,7 @@ static void __show_regs(const struct pt_regs *regs)
 void show_regs(struct pt_regs *regs)
 {
 	__show_regs((struct pt_regs *)regs);
+	dump_stack();
 }
 
 void show_registers(struct pt_regs *regs)

From 363a3d3fb7d478d7dd49b8c6294436b8ba5984cc Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Mon, 11 Jun 2018 16:46:40 +0800
Subject: [PATCH 095/382] drm/amd/powerplay: correct vega12 thermal support as
 true

Thermal support is enabled on vega12.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index 782e2098824d..6955c359b706 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -81,6 +81,7 @@ static void vega12_set_default_registry_data(struct pp_hwmgr *hwmgr)
 
 	data->registry_data.disallowed_features = 0x0;
 	data->registry_data.od_state_in_dc_support = 0;
+	data->registry_data.thermal_support = 1;
 	data->registry_data.skip_baco_hardware = 0;
 
 	data->registry_data.log_avfs_param = 0;

From 36953aa94a8fcc884b970d63d3e4c650257dd03e Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Mon, 28 May 2018 08:59:16 +0800
Subject: [PATCH 096/382] drm/amd/powerplay: correct vega12 bootup values
 settings

The vbios firmware structure changed between v3_1 and v3_2. So,
the code to setup bootup values needs different paths based
on header version.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/powerplay/hwmgr/ppatomfwctrl.c    | 94 ++++++++++++++++---
 .../drm/amd/powerplay/hwmgr/ppatomfwctrl.h    |  3 +
 .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c    |  3 +
 .../drm/amd/powerplay/hwmgr/vega12_hwmgr.h    |  3 +
 4 files changed, 91 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
index 5325661fedff..aa2faffef034 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
@@ -512,14 +512,82 @@ int pp_atomfwctrl_get_clk_information_by_clkid(struct pp_hwmgr *hwmgr, BIOS_CLKI
 	return 0;
 }
 
+static void pp_atomfwctrl_copy_vbios_bootup_values_3_2(struct pp_hwmgr *hwmgr,
+			struct pp_atomfwctrl_bios_boot_up_values *boot_values,
+			struct atom_firmware_info_v3_2 *fw_info)
+{
+	uint32_t frequency = 0;
+
+	boot_values->ulRevision = fw_info->firmware_revision;
+	boot_values->ulGfxClk   = fw_info->bootup_sclk_in10khz;
+	boot_values->ulUClk     = fw_info->bootup_mclk_in10khz;
+	boot_values->usVddc     = fw_info->bootup_vddc_mv;
+	boot_values->usVddci    = fw_info->bootup_vddci_mv;
+	boot_values->usMvddc    = fw_info->bootup_mvddc_mv;
+	boot_values->usVddGfx   = fw_info->bootup_vddgfx_mv;
+	boot_values->ucCoolingID = fw_info->coolingsolution_id;
+	boot_values->ulSocClk   = 0;
+	boot_values->ulDCEFClk   = 0;
+
+	if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_SOCCLK_ID, &frequency))
+		boot_values->ulSocClk   = frequency;
+
+	if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_DCEFCLK_ID, &frequency))
+		boot_values->ulDCEFClk  = frequency;
+
+	if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_ECLK_ID, &frequency))
+		boot_values->ulEClk     = frequency;
+
+	if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_VCLK_ID, &frequency))
+		boot_values->ulVClk     = frequency;
+
+	if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_DCLK_ID, &frequency))
+		boot_values->ulDClk     = frequency;
+}
+
+static void pp_atomfwctrl_copy_vbios_bootup_values_3_1(struct pp_hwmgr *hwmgr,
+			struct pp_atomfwctrl_bios_boot_up_values *boot_values,
+			struct atom_firmware_info_v3_1 *fw_info)
+{
+	uint32_t frequency = 0;
+
+	boot_values->ulRevision = fw_info->firmware_revision;
+	boot_values->ulGfxClk   = fw_info->bootup_sclk_in10khz;
+	boot_values->ulUClk     = fw_info->bootup_mclk_in10khz;
+	boot_values->usVddc     = fw_info->bootup_vddc_mv;
+	boot_values->usVddci    = fw_info->bootup_vddci_mv;
+	boot_values->usMvddc    = fw_info->bootup_mvddc_mv;
+	boot_values->usVddGfx   = fw_info->bootup_vddgfx_mv;
+	boot_values->ucCoolingID = fw_info->coolingsolution_id;
+	boot_values->ulSocClk   = 0;
+	boot_values->ulDCEFClk   = 0;
+
+	if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_SOCCLK_ID, &frequency))
+		boot_values->ulSocClk   = frequency;
+
+	if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_DCEFCLK_ID, &frequency))
+		boot_values->ulDCEFClk  = frequency;
+
+	if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_ECLK_ID, &frequency))
+		boot_values->ulEClk     = frequency;
+
+	if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_VCLK_ID, &frequency))
+		boot_values->ulVClk     = frequency;
+
+	if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_DCLK_ID, &frequency))
+		boot_values->ulDClk     = frequency;
+}
+
 int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr,
 			struct pp_atomfwctrl_bios_boot_up_values *boot_values)
 {
-	struct atom_firmware_info_v3_1 *info = NULL;
+	struct atom_firmware_info_v3_2 *fwinfo_3_2;
+	struct atom_firmware_info_v3_1 *fwinfo_3_1;
+	struct atom_common_table_header *info = NULL;
 	uint16_t ix;
 
 	ix = GetIndexIntoMasterDataTable(firmwareinfo);
-	info = (struct atom_firmware_info_v3_1 *)
+	info = (struct atom_common_table_header *)
 		smu_atom_get_data_table(hwmgr->adev,
 				ix, NULL, NULL, NULL);
 
@@ -528,16 +596,18 @@ int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr,
 		return -EINVAL;
 	}
 
-	boot_values->ulRevision = info->firmware_revision;
-	boot_values->ulGfxClk   = info->bootup_sclk_in10khz;
-	boot_values->ulUClk     = info->bootup_mclk_in10khz;
-	boot_values->usVddc     = info->bootup_vddc_mv;
-	boot_values->usVddci    = info->bootup_vddci_mv;
-	boot_values->usMvddc    = info->bootup_mvddc_mv;
-	boot_values->usVddGfx   = info->bootup_vddgfx_mv;
-	boot_values->ucCoolingID = info->coolingsolution_id;
-	boot_values->ulSocClk   = 0;
-	boot_values->ulDCEFClk   = 0;
+	if ((info->format_revision == 3) && (info->content_revision == 2)) {
+		fwinfo_3_2 = (struct atom_firmware_info_v3_2 *)info;
+		pp_atomfwctrl_copy_vbios_bootup_values_3_2(hwmgr,
+				boot_values, fwinfo_3_2);
+	} else if ((info->format_revision == 3) && (info->content_revision == 1)) {
+		fwinfo_3_1 = (struct atom_firmware_info_v3_1 *)info;
+		pp_atomfwctrl_copy_vbios_bootup_values_3_1(hwmgr,
+				boot_values, fwinfo_3_1);
+	} else {
+		pr_info("Fw info table revision does not match!");
+		return -EINVAL;
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
index fe10aa4db5e6..745bd3809549 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
@@ -136,6 +136,9 @@ struct pp_atomfwctrl_bios_boot_up_values {
 	uint32_t   ulUClk;
 	uint32_t   ulSocClk;
 	uint32_t   ulDCEFClk;
+	uint32_t   ulEClk;
+	uint32_t   ulVClk;
+	uint32_t   ulDClk;
 	uint16_t   usVddc;
 	uint16_t   usVddci;
 	uint16_t   usMvddc;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index 6955c359b706..c98e5de777cd 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -804,6 +804,9 @@ static int vega12_init_smc_table(struct pp_hwmgr *hwmgr)
 		data->vbios_boot_state.soc_clock = boot_up_values.ulSocClk;
 		data->vbios_boot_state.dcef_clock = boot_up_values.ulDCEFClk;
 		data->vbios_boot_state.uc_cooling_id = boot_up_values.ucCoolingID;
+		data->vbios_boot_state.eclock = boot_up_values.ulEClk;
+		data->vbios_boot_state.dclock = boot_up_values.ulDClk;
+		data->vbios_boot_state.vclock = boot_up_values.ulVClk;
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 				PPSMC_MSG_SetMinDeepSleepDcefclk,
 			(uint32_t)(data->vbios_boot_state.dcef_clock / 100));
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
index e81ded1ec198..49b38df8c7f2 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h
@@ -167,6 +167,9 @@ struct vega12_vbios_boot_state {
 	uint32_t    mem_clock;
 	uint32_t    soc_clock;
 	uint32_t    dcef_clock;
+	uint32_t    eclock;
+	uint32_t    dclock;
+	uint32_t    vclock;
 };
 
 #define DPMTABLE_OD_UPDATE_SCLK     0x00000001

From 1513b1c93f825f74751897258e0bdde64fd3bb6a Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Mon, 11 Jun 2018 15:20:39 +0800
Subject: [PATCH 097/382] drm/amd/powerplay: smc_dpm_info structure change

A new member Vr2_I2C_address is added.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/include/atomfirmware.h                   | 5 ++++-
 drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c           | 2 ++
 drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h           | 2 ++
 drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c | 2 ++
 drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h    | 5 ++++-
 5 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h
index 092d800b703a..33b4de4ad66e 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -1433,7 +1433,10 @@ struct atom_smc_dpm_info_v4_1
 	uint8_t  acggfxclkspreadpercent;
 	uint16_t acggfxclkspreadfreq;
 
-	uint32_t boardreserved[10];
+	uint8_t Vr2_I2C_address;
+	uint8_t padding_vr2[3];
+
+	uint32_t boardreserved[9];
 };
 
 /* 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
index aa2faffef034..d27c1c9df286 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c
@@ -699,5 +699,7 @@ int pp_atomfwctrl_get_smc_dpm_information(struct pp_hwmgr *hwmgr,
 	param->acggfxclkspreadpercent = info->acggfxclkspreadpercent;
 	param->acggfxclkspreadfreq = info->acggfxclkspreadfreq;
 
+	param->Vr2_I2C_address = info->Vr2_I2C_address;
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
index 745bd3809549..22e21668c93a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h
@@ -210,6 +210,8 @@ struct pp_atomfwctrl_smc_dpm_parameters
 	uint8_t  acggfxclkspreadenabled;
 	uint8_t  acggfxclkspreadpercent;
 	uint16_t acggfxclkspreadfreq;
+
+	uint8_t Vr2_I2C_address;
 };
 
 int pp_atomfwctrl_get_gpu_pll_dividers_vega10(struct pp_hwmgr *hwmgr,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c
index 888ddca902d8..29914700ee82 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c
@@ -230,6 +230,8 @@ static int append_vbios_pptable(struct pp_hwmgr *hwmgr, PPTable_t *ppsmc_pptable
 		ppsmc_pptable->AcgThresholdFreqLow = 0xFFFF;
 	}
 
+	ppsmc_pptable->Vr2_I2C_address = smc_dpm_table.Vr2_I2C_address;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h
index 2f8a3b983cce..b08526fd1619 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h
@@ -499,7 +499,10 @@ typedef struct {
 	uint8_t      AcgGfxclkSpreadPercent;
 	uint16_t     AcgGfxclkSpreadFreq;
 
-	uint32_t     BoardReserved[10];
+  uint8_t      Vr2_I2C_address;
+  uint8_t      padding_vr2[3];
+
+  uint32_t     BoardReserved[9];
 
 
   uint32_t     MmHubPadding[7];

From bda3153998f3eb2cafa4a6311971143628eacdbc Mon Sep 17 00:00:00 2001
From: BingJing Chang <bingjingc@synology.com>
Date: Thu, 28 Jun 2018 18:40:11 +0800
Subject: [PATCH 098/382] md/raid10: fix that replacement cannot complete
 recovery after reassemble

During assemble, the spare marked for replacement is not checked.
conf->fullsync cannot be updated to be 1. As a result, recovery will
treat it as a clean array. All recovering sectors are skipped. Original
device is replaced with the not-recovered spare.

mdadm -C /dev/md0 -l10 -n4 -pn2 /dev/loop[0123]
mdadm /dev/md0 -a /dev/loop4
mdadm /dev/md0 --replace /dev/loop0
mdadm -S /dev/md0 # stop array during recovery

mdadm -A /dev/md0 /dev/loop[01234]

After reassemble, you can see recovery go on, but it completes
immediately. In fact, recovery is not actually processed.

To solve this problem, we just add the missing logics for replacment
spares. (In raid1.c or raid5.c, they have already been checked.)

Reported-by: Alex Chen <alexchen@synology.com>
Reviewed-by: Alex Wu <alexwu@synology.com>
Reviewed-by: Chung-Chiang Cheng <cccheng@synology.com>
Signed-off-by: BingJing Chang <bingjingc@synology.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid10.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 478cf446827f..35bd3a62451b 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3893,6 +3893,13 @@ static int raid10_run(struct mddev *mddev)
 			    disk->rdev->saved_raid_disk < 0)
 				conf->fullsync = 1;
 		}
+
+		if (disk->replacement &&
+		    !test_bit(In_sync, &disk->replacement->flags) &&
+		    disk->replacement->saved_raid_disk < 0) {
+			conf->fullsync = 1;
+		}
+
 		disk->recovery_disabled = mddev->recovery_disabled - 1;
 	}
 

From 2cd5fe22d9a45cdf11c62bbe8db3ce9101207510 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Mon, 25 Jun 2018 21:09:04 -0400
Subject: [PATCH 099/382] drm/amdgpu: Make struct amdgpu_atif private to
 amdgpu_acpi.c

Currently, there is nothing in amdgpu that actually uses these structs
other than amdgpu_acpi.c. Additionally, since we're about to start
saving the correct ACPI handle to use for calling ATIF in this struct
this saves us from having to handle making sure that the acpi_handle
(and by proxy, the type definition for acpi_handle and all of the other
acpi headers) doesn't need to be included within the amdgpu_drv struct
itself. This follows the example set by amdgpu_atpx_handler.c.

Signed-off-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h      | 40 +-----------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 54 ++++++++++++++++++++++--
 2 files changed, 53 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a59c07590cee..7df5d3d11aff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -190,6 +190,7 @@ struct amdgpu_job;
 struct amdgpu_irq_src;
 struct amdgpu_fpriv;
 struct amdgpu_bo_va_mapping;
+struct amdgpu_atif;
 
 enum amdgpu_cp_irq {
 	AMDGPU_CP_IRQ_GFX_EOP = 0,
@@ -1269,43 +1270,6 @@ struct amdgpu_vram_scratch {
 /*
  * ACPI
  */
-struct amdgpu_atif_notification_cfg {
-	bool enabled;
-	int command_code;
-};
-
-struct amdgpu_atif_notifications {
-	bool display_switch;
-	bool expansion_mode_change;
-	bool thermal_state;
-	bool forced_power_state;
-	bool system_power_state;
-	bool display_conf_change;
-	bool px_gfx_switch;
-	bool brightness_change;
-	bool dgpu_display_event;
-};
-
-struct amdgpu_atif_functions {
-	bool system_params;
-	bool sbios_requests;
-	bool select_active_disp;
-	bool lid_state;
-	bool get_tv_standard;
-	bool set_tv_standard;
-	bool get_panel_expansion_mode;
-	bool set_panel_expansion_mode;
-	bool temperature_change;
-	bool graphics_device_types;
-};
-
-struct amdgpu_atif {
-	struct amdgpu_atif_notifications notifications;
-	struct amdgpu_atif_functions functions;
-	struct amdgpu_atif_notification_cfg notification_cfg;
-	struct amdgpu_encoder *encoder_for_bl;
-};
-
 struct amdgpu_atcs_functions {
 	bool get_ext_state;
 	bool pcie_perf_req;
@@ -1466,7 +1430,7 @@ struct amdgpu_device {
 #if defined(CONFIG_DEBUG_FS)
 	struct dentry			*debugfs_regs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
 #endif
-	struct amdgpu_atif		atif;
+	struct amdgpu_atif		*atif;
 	struct amdgpu_atcs		atcs;
 	struct mutex			srbm_mutex;
 	/* GRBM index mutex. Protects concurrent access to GRBM index */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 8fa850a070e0..22c7e8ec0b9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -34,6 +34,43 @@
 #include "amd_acpi.h"
 #include "atom.h"
 
+struct amdgpu_atif_notification_cfg {
+	bool enabled;
+	int command_code;
+};
+
+struct amdgpu_atif_notifications {
+	bool display_switch;
+	bool expansion_mode_change;
+	bool thermal_state;
+	bool forced_power_state;
+	bool system_power_state;
+	bool display_conf_change;
+	bool px_gfx_switch;
+	bool brightness_change;
+	bool dgpu_display_event;
+};
+
+struct amdgpu_atif_functions {
+	bool system_params;
+	bool sbios_requests;
+	bool select_active_disp;
+	bool lid_state;
+	bool get_tv_standard;
+	bool set_tv_standard;
+	bool get_panel_expansion_mode;
+	bool set_panel_expansion_mode;
+	bool temperature_change;
+	bool graphics_device_types;
+};
+
+struct amdgpu_atif {
+	struct amdgpu_atif_notifications notifications;
+	struct amdgpu_atif_functions functions;
+	struct amdgpu_atif_notification_cfg notification_cfg;
+	struct amdgpu_encoder *encoder_for_bl;
+};
+
 /* Call the ATIF method
  */
 /**
@@ -292,7 +329,7 @@ static int amdgpu_atif_get_sbios_requests(acpi_handle handle,
 static int amdgpu_atif_handler(struct amdgpu_device *adev,
 			struct acpi_bus_event *event)
 {
-	struct amdgpu_atif *atif = &adev->atif;
+	struct amdgpu_atif *atif = adev->atif;
 	struct atif_sbios_requests req;
 	acpi_handle handle;
 	int count;
@@ -303,7 +340,8 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
 	if (strcmp(event->device_class, ACPI_VIDEO_CLASS) != 0)
 		return NOTIFY_DONE;
 
-	if (!atif->notification_cfg.enabled ||
+	if (!atif ||
+	    !atif->notification_cfg.enabled ||
 	    event->type != atif->notification_cfg.command_code)
 		/* Not our event */
 		return NOTIFY_DONE;
@@ -642,7 +680,7 @@ static int amdgpu_acpi_event(struct notifier_block *nb,
 int amdgpu_acpi_init(struct amdgpu_device *adev)
 {
 	acpi_handle handle;
-	struct amdgpu_atif *atif = &adev->atif;
+	struct amdgpu_atif *atif;
 	struct amdgpu_atcs *atcs = &adev->atcs;
 	int ret;
 
@@ -659,11 +697,19 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
 	}
 
 	/* Call the ATIF method */
+	atif = kzalloc(sizeof(*atif), GFP_KERNEL);
+	if (!atif) {
+		DRM_WARN("Not enough memory to initialize ATIF\n");
+		goto out;
+	}
+
 	ret = amdgpu_atif_verify_interface(handle, atif);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Call to ATIF verify_interface failed: %d\n", ret);
+		kfree(atif);
 		goto out;
 	}
+	adev->atif = atif;
 
 	if (atif->notifications.brightness_change) {
 		struct drm_encoder *tmp;
@@ -720,4 +766,6 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
 void amdgpu_acpi_fini(struct amdgpu_device *adev)
 {
 	unregister_acpi_notifier(&adev->acpi_nb);
+	if (adev->atif)
+		kfree(adev->atif);
 }

From 4aa5d5eb82bb237d0bb3a38b2a7555054d018081 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Mon, 25 Jun 2018 21:09:06 -0400
Subject: [PATCH 100/382] drm/amdgpu: Add amdgpu_atpx_get_dhandle()

Since it seems that some vendors are storing the ATIF ACPI methods under
the same handle that ATPX lives under instead of the device's own
handle, we're going to need to be able to retrieve this handle later so
we can probe for ATIF there.

Signed-off-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h              | 6 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 7df5d3d11aff..7dcbac8af9a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1858,6 +1858,12 @@ static inline bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return false;
 static inline bool amdgpu_has_atpx(void) { return false; }
 #endif
 
+#if defined(CONFIG_VGA_SWITCHEROO) && defined(CONFIG_ACPI)
+void *amdgpu_atpx_get_dhandle(void);
+#else
+static inline void *amdgpu_atpx_get_dhandle(void) { return NULL; }
+#endif
+
 /*
  * KMS
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index daa06e7c5bb7..9ab89371d9e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -90,6 +90,12 @@ bool amdgpu_atpx_dgpu_req_power_for_displays(void) {
 	return amdgpu_atpx_priv.atpx.dgpu_req_power_for_displays;
 }
 
+#if defined(CONFIG_ACPI)
+void *amdgpu_atpx_get_dhandle(void) {
+	return amdgpu_atpx_priv.dhandle;
+}
+#endif
+
 /**
  * amdgpu_atpx_call - call an ATPX method
  *

From f9ff68521a5541e1fdaeb0ef11871c035b30e409 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Mon, 25 Jun 2018 21:09:07 -0400
Subject: [PATCH 101/382] drm/amdgpu: Dynamically probe for ATIF handle (v2)

The other day I was testing one of the HP laptops at my office with an
i915/amdgpu hybrid setup and noticed that hotplugging was non-functional
on almost all of the display outputs. I eventually discovered that all
of the external outputs were connected to the amdgpu device instead of
i915, and that the hotplugs weren't being detected so long as the GPU
was in runtime suspend. After some talking with folks at AMD, I learned
that amdgpu is actually supposed to support hotplug detection in runtime
suspend so long as the OEM has implemented it properly in the firmware.

On this HP ZBook 15 G4 (the machine in question), amdgpu wasn't managing
to find the ATIF handle at all despite the fact that I could see acpi
events being sent in response to any hotplugging. After going through
dumps of the firmware, I discovered that this machine did in fact
support ATIF, but that it's ATIF method lived in an entirely different
namespace than this device's handle (the device handle was
\_SB_.PCI0.PEG0.PEGP, but ATIF lives in ATPX's handle at
\_SB_.PCI0.GFX0).

So, fix this by probing ATPX's ACPI parent's namespace if we can't find
ATIF elsewhere, along with storing a pointer to the proper handle to use
for ATIF and using that instead of the device's handle.

This fixes HPD detection while in runtime suspend for this ZBook!

v2: Update the comment to reflect how the namespaces are arranged
based on the system configuration. (Alex)

Signed-off-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 79 +++++++++++++++++-------
 1 file changed, 58 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 22c7e8ec0b9a..0d8c3fc6eace 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -65,6 +65,8 @@ struct amdgpu_atif_functions {
 };
 
 struct amdgpu_atif {
+	acpi_handle handle;
+
 	struct amdgpu_atif_notifications notifications;
 	struct amdgpu_atif_functions functions;
 	struct amdgpu_atif_notification_cfg notification_cfg;
@@ -83,8 +85,9 @@ struct amdgpu_atif {
  * Executes the requested ATIF function (all asics).
  * Returns a pointer to the acpi output buffer.
  */
-static union acpi_object *amdgpu_atif_call(acpi_handle handle, int function,
-		struct acpi_buffer *params)
+static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif,
+					   int function,
+					   struct acpi_buffer *params)
 {
 	acpi_status status;
 	union acpi_object atif_arg_elements[2];
@@ -107,7 +110,8 @@ static union acpi_object *amdgpu_atif_call(acpi_handle handle, int function,
 		atif_arg_elements[1].integer.value = 0;
 	}
 
-	status = acpi_evaluate_object(handle, "ATIF", &atif_arg, &buffer);
+	status = acpi_evaluate_object(atif->handle, NULL, &atif_arg,
+				      &buffer);
 
 	/* Fail only if calling the method fails and ATIF is supported */
 	if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
@@ -178,15 +182,14 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas
  * (all asics).
  * returns 0 on success, error on failure.
  */
-static int amdgpu_atif_verify_interface(acpi_handle handle,
-		struct amdgpu_atif *atif)
+static int amdgpu_atif_verify_interface(struct amdgpu_atif *atif)
 {
 	union acpi_object *info;
 	struct atif_verify_interface output;
 	size_t size;
 	int err = 0;
 
-	info = amdgpu_atif_call(handle, ATIF_FUNCTION_VERIFY_INTERFACE, NULL);
+	info = amdgpu_atif_call(atif, ATIF_FUNCTION_VERIFY_INTERFACE, NULL);
 	if (!info)
 		return -EIO;
 
@@ -213,6 +216,35 @@ static int amdgpu_atif_verify_interface(acpi_handle handle,
 	return err;
 }
 
+static acpi_handle amdgpu_atif_probe_handle(acpi_handle dhandle)
+{
+	acpi_handle handle = NULL;
+	char acpi_method_name[255] = { 0 };
+	struct acpi_buffer buffer = { sizeof(acpi_method_name), acpi_method_name };
+	acpi_status status;
+
+	/* For PX/HG systems, ATIF and ATPX are in the iGPU's namespace, on dGPU only
+	 * systems, ATIF is in the dGPU's namespace.
+	 */
+	status = acpi_get_handle(dhandle, "ATIF", &handle);
+	if (ACPI_SUCCESS(status))
+		goto out;
+
+	if (amdgpu_has_atpx()) {
+		status = acpi_get_handle(amdgpu_atpx_get_dhandle(), "ATIF",
+					 &handle);
+		if (ACPI_SUCCESS(status))
+			goto out;
+	}
+
+	DRM_DEBUG_DRIVER("No ATIF handle found\n");
+	return NULL;
+out:
+	acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer);
+	DRM_DEBUG_DRIVER("Found ATIF handle %s\n", acpi_method_name);
+	return handle;
+}
+
 /**
  * amdgpu_atif_get_notification_params - determine notify configuration
  *
@@ -225,15 +257,16 @@ static int amdgpu_atif_verify_interface(acpi_handle handle,
  * where n is specified in the result if a notifier is used.
  * Returns 0 on success, error on failure.
  */
-static int amdgpu_atif_get_notification_params(acpi_handle handle,
-		struct amdgpu_atif_notification_cfg *n)
+static int amdgpu_atif_get_notification_params(struct amdgpu_atif *atif)
 {
 	union acpi_object *info;
+	struct amdgpu_atif_notification_cfg *n = &atif->notification_cfg;
 	struct atif_system_params params;
 	size_t size;
 	int err = 0;
 
-	info = amdgpu_atif_call(handle, ATIF_FUNCTION_GET_SYSTEM_PARAMETERS, NULL);
+	info = amdgpu_atif_call(atif, ATIF_FUNCTION_GET_SYSTEM_PARAMETERS,
+				NULL);
 	if (!info) {
 		err = -EIO;
 		goto out;
@@ -287,14 +320,15 @@ static int amdgpu_atif_get_notification_params(acpi_handle handle,
  * (all asics).
  * Returns 0 on success, error on failure.
  */
-static int amdgpu_atif_get_sbios_requests(acpi_handle handle,
-		struct atif_sbios_requests *req)
+static int amdgpu_atif_get_sbios_requests(struct amdgpu_atif *atif,
+					  struct atif_sbios_requests *req)
 {
 	union acpi_object *info;
 	size_t size;
 	int count = 0;
 
-	info = amdgpu_atif_call(handle, ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS, NULL);
+	info = amdgpu_atif_call(atif, ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS,
+				NULL);
 	if (!info)
 		return -EIO;
 
@@ -327,11 +361,10 @@ static int amdgpu_atif_get_sbios_requests(acpi_handle handle,
  * Returns NOTIFY code
  */
 static int amdgpu_atif_handler(struct amdgpu_device *adev,
-			struct acpi_bus_event *event)
+			       struct acpi_bus_event *event)
 {
 	struct amdgpu_atif *atif = adev->atif;
 	struct atif_sbios_requests req;
-	acpi_handle handle;
 	int count;
 
 	DRM_DEBUG_DRIVER("event, device_class = %s, type = %#x\n",
@@ -347,8 +380,7 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
 		return NOTIFY_DONE;
 
 	/* Check pending SBIOS requests */
-	handle = ACPI_HANDLE(&adev->pdev->dev);
-	count = amdgpu_atif_get_sbios_requests(handle, &req);
+	count = amdgpu_atif_get_sbios_requests(atif, &req);
 
 	if (count <= 0)
 		return NOTIFY_DONE;
@@ -679,7 +711,7 @@ static int amdgpu_acpi_event(struct notifier_block *nb,
  */
 int amdgpu_acpi_init(struct amdgpu_device *adev)
 {
-	acpi_handle handle;
+	acpi_handle handle, atif_handle;
 	struct amdgpu_atif *atif;
 	struct amdgpu_atcs *atcs = &adev->atcs;
 	int ret;
@@ -696,14 +728,20 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
 		DRM_DEBUG_DRIVER("Call to ATCS verify_interface failed: %d\n", ret);
 	}
 
-	/* Call the ATIF method */
+	/* Probe for ATIF, and initialize it if found */
+	atif_handle = amdgpu_atif_probe_handle(handle);
+	if (!atif_handle)
+		goto out;
+
 	atif = kzalloc(sizeof(*atif), GFP_KERNEL);
 	if (!atif) {
 		DRM_WARN("Not enough memory to initialize ATIF\n");
 		goto out;
 	}
+	atif->handle = atif_handle;
 
-	ret = amdgpu_atif_verify_interface(handle, atif);
+	/* Call the ATIF method */
+	ret = amdgpu_atif_verify_interface(atif);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Call to ATIF verify_interface failed: %d\n", ret);
 		kfree(atif);
@@ -739,8 +777,7 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
 	}
 
 	if (atif->functions.system_params) {
-		ret = amdgpu_atif_get_notification_params(handle,
-				&atif->notification_cfg);
+		ret = amdgpu_atif_get_notification_params(atif);
 		if (ret) {
 			DRM_DEBUG_DRIVER("Call to GET_SYSTEM_PARAMS failed: %d\n",
 					ret);

From b63e132b6433a41cf311e8bc382d33fd2b73b505 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Fri, 22 Jun 2018 10:55:46 -0700
Subject: [PATCH 102/382] MIPS: Use async IPIs for
 arch_trigger_cpumask_backtrace()

The current MIPS implementation of arch_trigger_cpumask_backtrace() is
broken because it attempts to use synchronous IPIs despite the fact that
it may be run with interrupts disabled.

This means that when arch_trigger_cpumask_backtrace() is invoked, for
example by the RCU CPU stall watchdog, we may:

  - Deadlock due to use of synchronous IPIs with interrupts disabled,
    causing the CPU that's attempting to generate the backtrace output
    to hang itself.

  - Not succeed in generating the desired output from remote CPUs.

  - Produce warnings about this from smp_call_function_many(), for
    example:

    [42760.526910] INFO: rcu_sched detected stalls on CPUs/tasks:
    [42760.535755]  0-...!: (1 GPs behind) idle=ade/140000000000000/0 softirq=526944/526945 fqs=0
    [42760.547874]  1-...!: (0 ticks this GP) idle=e4a/140000000000000/0 softirq=547885/547885 fqs=0
    [42760.559869]  (detected by 2, t=2162 jiffies, g=266689, c=266688, q=33)
    [42760.568927] ------------[ cut here ]------------
    [42760.576146] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:416 smp_call_function_many+0x88/0x20c
    [42760.587839] Modules linked in:
    [42760.593152] CPU: 2 PID: 1216 Comm: sh Not tainted 4.15.4-00373-gee058bb4d0c2 #2
    [42760.603767] Stack : 8e09bd20 8e09bd20 8e09bd20 fffffff0 00000007 00000006 00000000 8e09bca8
    [42760.616937]         95b2b379 95b2b379 807a0080 00000007 81944518 0000018a 00000032 00000000
    [42760.630095]         00000000 00000030 80000000 00000000 806eca74 00000009 8017e2b8 000001a0
    [42760.643169]         00000000 00000002 00000000 8e09baa4 00000008 808b8008 86d69080 8e09bca0
    [42760.656282]         8e09ad50 805e20aa 00000000 00000000 00000000 8017e2b8 00000009 801070ca
    [42760.669424]         ...
    [42760.673919] Call Trace:
    [42760.678672] [<27fde568>] show_stack+0x70/0xf0
    [42760.685417] [<84751641>] dump_stack+0xaa/0xd0
    [42760.692188] [<699d671c>] __warn+0x80/0x92
    [42760.698549] [<68915d41>] warn_slowpath_null+0x28/0x36
    [42760.705912] [<f7c76c1c>] smp_call_function_many+0x88/0x20c
    [42760.713696] [<6bbdfc2a>] arch_trigger_cpumask_backtrace+0x30/0x4a
    [42760.722216] [<f845bd33>] rcu_dump_cpu_stacks+0x6a/0x98
    [42760.729580] [<796e7629>] rcu_check_callbacks+0x672/0x6ac
    [42760.737476] [<059b3b43>] update_process_times+0x18/0x34
    [42760.744981] [<6eb94941>] tick_sched_handle.isra.5+0x26/0x38
    [42760.752793] [<478d3d70>] tick_sched_timer+0x1c/0x50
    [42760.759882] [<e56ea39f>] __hrtimer_run_queues+0xc6/0x226
    [42760.767418] [<e88bbcae>] hrtimer_interrupt+0x88/0x19a
    [42760.775031] [<6765a19e>] gic_compare_interrupt+0x2e/0x3a
    [42760.782761] [<0558bf5f>] handle_percpu_devid_irq+0x78/0x168
    [42760.790795] [<90c11ba2>] generic_handle_irq+0x1e/0x2c
    [42760.798117] [<1b6d462c>] gic_handle_local_int+0x38/0x86
    [42760.805545] [<b2ada1c7>] gic_irq_dispatch+0xa/0x14
    [42760.812534] [<90c11ba2>] generic_handle_irq+0x1e/0x2c
    [42760.820086] [<c7521934>] do_IRQ+0x16/0x20
    [42760.826274] [<9aef3ce6>] plat_irq_dispatch+0x62/0x94
    [42760.833458] [<6a94b53c>] except_vec_vi_end+0x70/0x78
    [42760.840655] [<22284043>] smp_call_function_many+0x1ba/0x20c
    [42760.848501] [<54022b58>] smp_call_function+0x1e/0x2c
    [42760.855693] [<ab9fc705>] flush_tlb_mm+0x2a/0x98
    [42760.862730] [<0844cdd0>] tlb_flush_mmu+0x1c/0x44
    [42760.869628] [<cb259b74>] arch_tlb_finish_mmu+0x26/0x3e
    [42760.877021] [<1aeaaf74>] tlb_finish_mmu+0x18/0x66
    [42760.883907] [<b3fce717>] exit_mmap+0x76/0xea
    [42760.890428] [<c4c8a2f6>] mmput+0x80/0x11a
    [42760.896632] [<a41a08f4>] do_exit+0x1f4/0x80c
    [42760.903158] [<ee01cef6>] do_group_exit+0x20/0x7e
    [42760.909990] [<13fa8d54>] __wake_up_parent+0x0/0x1e
    [42760.917045] [<46cf89d0>] smp_call_function_many+0x1a2/0x20c
    [42760.924893] [<8c21a93b>] syscall_common+0x14/0x1c
    [42760.931765] ---[ end trace 02aa09da9dc52a60 ]---
    [42760.938342] ------------[ cut here ]------------
    [42760.945311] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:291 smp_call_function_single+0xee/0xf8
    ...

This patch switches MIPS' arch_trigger_cpumask_backtrace() to use async
IPIs & smp_call_function_single_async() in order to resolve this
problem. We ensure use of the pre-allocated call_single_data_t
structures is serialized by maintaining a cpumask indicating that
they're busy, and refusing to attempt to send an IPI when a CPU's bit is
set in this mask. This should only happen if a CPU hasn't responded to a
previous backtrace IPI - ie. if it's hung - and we print a warning to
the console in this case.

I've marked this for stable branches as far back as v4.9, to which it
applies cleanly. Strictly speaking the faulty MIPS implementation can be
traced further back to commit 856839b76836 ("MIPS: Add
arch_trigger_all_cpu_backtrace() function") in v3.19, but kernel
versions v3.19 through v4.8 will require further work to backport due to
the rework performed in commit 9a01c3ed5cdb ("nmi_backtrace: add more
trigger_*_cpu_backtrace() methods").

Signed-off-by: Paul Burton <paul.burton@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/19597/
Cc: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Huacai Chen <chenhc@lemote.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # v4.9+
Fixes: 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function")
Fixes: 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods")
---
 arch/mips/kernel/process.c | 45 +++++++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 15 deletions(-)

diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index d4cfeb931382..9670e70139fd 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -29,6 +29,7 @@
 #include <linux/kallsyms.h>
 #include <linux/random.h>
 #include <linux/prctl.h>
+#include <linux/nmi.h>
 
 #include <asm/asm.h>
 #include <asm/bootinfo.h>
@@ -655,28 +656,42 @@ unsigned long arch_align_stack(unsigned long sp)
 	return sp & ALMASK;
 }
 
-static void arch_dump_stack(void *info)
+static DEFINE_PER_CPU(call_single_data_t, backtrace_csd);
+static struct cpumask backtrace_csd_busy;
+
+static void handle_backtrace(void *info)
 {
-	struct pt_regs *regs;
+	nmi_cpu_backtrace(get_irq_regs());
+	cpumask_clear_cpu(smp_processor_id(), &backtrace_csd_busy);
+}
 
-	regs = get_irq_regs();
+static void raise_backtrace(cpumask_t *mask)
+{
+	call_single_data_t *csd;
+	int cpu;
 
-	if (regs)
-		show_regs(regs);
-	else
-		dump_stack();
+	for_each_cpu(cpu, mask) {
+		/*
+		 * If we previously sent an IPI to the target CPU & it hasn't
+		 * cleared its bit in the busy cpumask then it didn't handle
+		 * our previous IPI & it's not safe for us to reuse the
+		 * call_single_data_t.
+		 */
+		if (cpumask_test_and_set_cpu(cpu, &backtrace_csd_busy)) {
+			pr_warn("Unable to send backtrace IPI to CPU%u - perhaps it hung?\n",
+				cpu);
+			continue;
+		}
+
+		csd = &per_cpu(backtrace_csd, cpu);
+		csd->func = handle_backtrace;
+		smp_call_function_single_async(cpu, csd);
+	}
 }
 
 void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
 {
-	long this_cpu = get_cpu();
-
-	if (cpumask_test_cpu(this_cpu, mask) && !exclude_self)
-		dump_stack();
-
-	smp_call_function_many(mask, arch_dump_stack, NULL, 1);
-
-	put_cpu();
+	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace);
 }
 
 int mips_get_process_fp_mode(struct task_struct *task)

From 3203c9010060806ff88c9989aeab4dc8d9a474dc Mon Sep 17 00:00:00 2001
From: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
Date: Wed, 27 Jun 2018 17:19:21 +0200
Subject: [PATCH 103/382] test_bpf: flag tests that cannot be jited on s390

Flag with FLAG_EXPECTED_FAIL the BPF_MAXINSNS tests that cannot be jited
on s390 because they exceed BPF_SIZE_MAX and fail when
CONFIG_BPF_JIT_ALWAYS_ON is set. Also set .expected_errcode to -ENOTSUPP
so the tests pass in that case.

Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 lib/test_bpf.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 60aedc879361..08d3d59dca17 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5282,21 +5282,31 @@ static struct bpf_test tests[] = {
 	{	/* Mainly checking JIT here. */
 		"BPF_MAXINSNS: Ctx heavy transformations",
 		{ },
+#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
+		CLASSIC | FLAG_EXPECTED_FAIL,
+#else
 		CLASSIC,
+#endif
 		{ },
 		{
 			{  1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) },
 			{ 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }
 		},
 		.fill_helper = bpf_fill_maxinsns6,
+		.expected_errcode = -ENOTSUPP,
 	},
 	{	/* Mainly checking JIT here. */
 		"BPF_MAXINSNS: Call heavy transformations",
 		{ },
+#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+#else
 		CLASSIC | FLAG_NO_DATA,
+#endif
 		{ },
 		{ { 1, 0 }, { 10, 0 } },
 		.fill_helper = bpf_fill_maxinsns7,
+		.expected_errcode = -ENOTSUPP,
 	},
 	{	/* Mainly checking JIT here. */
 		"BPF_MAXINSNS: Jump heavy test",
@@ -5347,18 +5357,28 @@ static struct bpf_test tests[] = {
 	{
 		"BPF_MAXINSNS: exec all MSH",
 		{ },
+#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
+		CLASSIC | FLAG_EXPECTED_FAIL,
+#else
 		CLASSIC,
+#endif
 		{ 0xfa, 0xfb, 0xfc, 0xfd, },
 		{ { 4, 0xababab83 } },
 		.fill_helper = bpf_fill_maxinsns13,
+		.expected_errcode = -ENOTSUPP,
 	},
 	{
 		"BPF_MAXINSNS: ld_abs+get_processor_id",
 		{ },
+#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
+		CLASSIC | FLAG_EXPECTED_FAIL,
+#else
 		CLASSIC,
+#endif
 		{ },
 		{ { 1, 0xbee } },
 		.fill_helper = bpf_fill_ld_abs_get_processor_id,
+		.expected_errcode = -ENOTSUPP,
 	},
 	/*
 	 * LD_IND / LD_ABS on fragmented SKBs

From 4c79579b44b1876444f4d04de31c1a37098a0350 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 26 Jun 2018 16:21:18 -0700
Subject: [PATCH 104/382] bpf: Change bpf_fib_lookup to return lookup status

For ACLs implemented using either FIB rules or FIB entries, the BPF
program needs the FIB lookup status to be able to drop the packet.
Since the bpf_fib_lookup API has not reached a released kernel yet,
change the return code to contain an encoding of the FIB lookup
result and return the nexthop device index in the params struct.

In addition, inform the BPF program of any post FIB lookup reason as
to why the packet needs to go up the stack.

The fib result for unicast routes must have an egress device, so remove
the check that it is non-NULL.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h   | 28 ++++++++++---
 net/core/filter.c          | 86 ++++++++++++++++++++++++--------------
 samples/bpf/xdp_fwd_kern.c |  8 ++--
 3 files changed, 81 insertions(+), 41 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 59b19b6a40d7..b7db3261c62d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1857,7 +1857,8 @@ union bpf_attr {
  *		is resolved), the nexthop address is returned in ipv4_dst
  *		or ipv6_dst based on family, smac is set to mac address of
  *		egress device, dmac is set to nexthop mac address, rt_metric
- *		is set to metric from route (IPv4/IPv6 only).
+ *		is set to metric from route (IPv4/IPv6 only), and ifindex
+ *		is set to the device index of the nexthop from the FIB lookup.
  *
  *             *plen* argument is the size of the passed in struct.
  *             *flags* argument can be a combination of one or more of the
@@ -1873,9 +1874,10 @@ union bpf_attr {
  *             *ctx* is either **struct xdp_md** for XDP programs or
  *             **struct sk_buff** tc cls_act programs.
  *     Return
- *             Egress device index on success, 0 if packet needs to continue
- *             up the stack for further processing or a negative error in case
- *             of failure.
+ *		* < 0 if any input argument is invalid
+ *		*   0 on success (packet is forwarded, nexthop neighbor exists)
+ *		* > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
+ *		*     packet is not forwarded or needs assist from full stack
  *
  * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
  *	Description
@@ -2612,6 +2614,18 @@ struct bpf_raw_tracepoint_args {
 #define BPF_FIB_LOOKUP_DIRECT  BIT(0)
 #define BPF_FIB_LOOKUP_OUTPUT  BIT(1)
 
+enum {
+	BPF_FIB_LKUP_RET_SUCCESS,      /* lookup successful */
+	BPF_FIB_LKUP_RET_BLACKHOLE,    /* dest is blackholed; can be dropped */
+	BPF_FIB_LKUP_RET_UNREACHABLE,  /* dest is unreachable; can be dropped */
+	BPF_FIB_LKUP_RET_PROHIBIT,     /* dest not allowed; can be dropped */
+	BPF_FIB_LKUP_RET_NOT_FWDED,    /* packet is not forwarded */
+	BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */
+	BPF_FIB_LKUP_RET_UNSUPP_LWT,   /* fwd requires encapsulation */
+	BPF_FIB_LKUP_RET_NO_NEIGH,     /* no neighbor entry for nh */
+	BPF_FIB_LKUP_RET_FRAG_NEEDED,  /* fragmentation required to fwd */
+};
+
 struct bpf_fib_lookup {
 	/* input:  network family for lookup (AF_INET, AF_INET6)
 	 * output: network family of egress nexthop
@@ -2625,7 +2639,11 @@ struct bpf_fib_lookup {
 
 	/* total length of packet from network header - used for MTU check */
 	__u16	tot_len;
-	__u32	ifindex;  /* L3 device index for lookup */
+
+	/* input: L3 device index for lookup
+	 * output: device index from FIB lookup
+	 */
+	__u32	ifindex;
 
 	union {
 		/* inputs to lookup */
diff --git a/net/core/filter.c b/net/core/filter.c
index e7f12e9f598c..0ca6907d7efe 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4073,8 +4073,9 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
 	memcpy(params->smac, dev->dev_addr, ETH_ALEN);
 	params->h_vlan_TCI = 0;
 	params->h_vlan_proto = 0;
+	params->ifindex = dev->ifindex;
 
-	return dev->ifindex;
+	return 0;
 }
 #endif
 
@@ -4098,7 +4099,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	/* verify forwarding is enabled on this interface */
 	in_dev = __in_dev_get_rcu(dev);
 	if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
-		return 0;
+		return BPF_FIB_LKUP_RET_FWD_DISABLED;
 
 	if (flags & BPF_FIB_LOOKUP_OUTPUT) {
 		fl4.flowi4_iif = 1;
@@ -4123,7 +4124,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 
 		tb = fib_get_table(net, tbid);
 		if (unlikely(!tb))
-			return 0;
+			return BPF_FIB_LKUP_RET_NOT_FWDED;
 
 		err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
 	} else {
@@ -4135,8 +4136,20 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 		err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
 	}
 
-	if (err || res.type != RTN_UNICAST)
-		return 0;
+	if (err) {
+		/* map fib lookup errors to RTN_ type */
+		if (err == -EINVAL)
+			return BPF_FIB_LKUP_RET_BLACKHOLE;
+		if (err == -EHOSTUNREACH)
+			return BPF_FIB_LKUP_RET_UNREACHABLE;
+		if (err == -EACCES)
+			return BPF_FIB_LKUP_RET_PROHIBIT;
+
+		return BPF_FIB_LKUP_RET_NOT_FWDED;
+	}
+
+	if (res.type != RTN_UNICAST)
+		return BPF_FIB_LKUP_RET_NOT_FWDED;
 
 	if (res.fi->fib_nhs > 1)
 		fib_select_path(net, &res, &fl4, NULL);
@@ -4144,19 +4157,16 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	if (check_mtu) {
 		mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
 		if (params->tot_len > mtu)
-			return 0;
+			return BPF_FIB_LKUP_RET_FRAG_NEEDED;
 	}
 
 	nh = &res.fi->fib_nh[res.nh_sel];
 
 	/* do not handle lwt encaps right now */
 	if (nh->nh_lwtstate)
-		return 0;
+		return BPF_FIB_LKUP_RET_UNSUPP_LWT;
 
 	dev = nh->nh_dev;
-	if (unlikely(!dev))
-		return 0;
-
 	if (nh->nh_gw)
 		params->ipv4_dst = nh->nh_gw;
 
@@ -4166,10 +4176,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	 * rcu_read_lock_bh is not needed here
 	 */
 	neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
-	if (neigh)
-		return bpf_fib_set_fwd_params(params, neigh, dev);
+	if (!neigh)
+		return BPF_FIB_LKUP_RET_NO_NEIGH;
 
-	return 0;
+	return bpf_fib_set_fwd_params(params, neigh, dev);
 }
 #endif
 
@@ -4190,7 +4200,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 
 	/* link local addresses are never forwarded */
 	if (rt6_need_strict(dst) || rt6_need_strict(src))
-		return 0;
+		return BPF_FIB_LKUP_RET_NOT_FWDED;
 
 	dev = dev_get_by_index_rcu(net, params->ifindex);
 	if (unlikely(!dev))
@@ -4198,7 +4208,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 
 	idev = __in6_dev_get_safely(dev);
 	if (unlikely(!idev || !net->ipv6.devconf_all->forwarding))
-		return 0;
+		return BPF_FIB_LKUP_RET_FWD_DISABLED;
 
 	if (flags & BPF_FIB_LOOKUP_OUTPUT) {
 		fl6.flowi6_iif = 1;
@@ -4225,7 +4235,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 
 		tb = ipv6_stub->fib6_get_table(net, tbid);
 		if (unlikely(!tb))
-			return 0;
+			return BPF_FIB_LKUP_RET_NOT_FWDED;
 
 		f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
 	} else {
@@ -4238,11 +4248,23 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	}
 
 	if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
-		return 0;
+		return BPF_FIB_LKUP_RET_NOT_FWDED;
 
-	if (unlikely(f6i->fib6_flags & RTF_REJECT ||
-	    f6i->fib6_type != RTN_UNICAST))
-		return 0;
+	if (unlikely(f6i->fib6_flags & RTF_REJECT)) {
+		switch (f6i->fib6_type) {
+		case RTN_BLACKHOLE:
+			return BPF_FIB_LKUP_RET_BLACKHOLE;
+		case RTN_UNREACHABLE:
+			return BPF_FIB_LKUP_RET_UNREACHABLE;
+		case RTN_PROHIBIT:
+			return BPF_FIB_LKUP_RET_PROHIBIT;
+		default:
+			return BPF_FIB_LKUP_RET_NOT_FWDED;
+		}
+	}
+
+	if (f6i->fib6_type != RTN_UNICAST)
+		return BPF_FIB_LKUP_RET_NOT_FWDED;
 
 	if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
 		f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
@@ -4252,11 +4274,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	if (check_mtu) {
 		mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
 		if (params->tot_len > mtu)
-			return 0;
+			return BPF_FIB_LKUP_RET_FRAG_NEEDED;
 	}
 
 	if (f6i->fib6_nh.nh_lwtstate)
-		return 0;
+		return BPF_FIB_LKUP_RET_UNSUPP_LWT;
 
 	if (f6i->fib6_flags & RTF_GATEWAY)
 		*dst = f6i->fib6_nh.nh_gw;
@@ -4270,10 +4292,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	 */
 	neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
 				      ndisc_hashfn, dst, dev);
-	if (neigh)
-		return bpf_fib_set_fwd_params(params, neigh, dev);
+	if (!neigh)
+		return BPF_FIB_LKUP_RET_NO_NEIGH;
 
-	return 0;
+	return bpf_fib_set_fwd_params(params, neigh, dev);
 }
 #endif
 
@@ -4315,7 +4337,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
 	   struct bpf_fib_lookup *, params, int, plen, u32, flags)
 {
 	struct net *net = dev_net(skb->dev);
-	int index = -EAFNOSUPPORT;
+	int rc = -EAFNOSUPPORT;
 
 	if (plen < sizeof(*params))
 		return -EINVAL;
@@ -4326,25 +4348,25 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
 	switch (params->family) {
 #if IS_ENABLED(CONFIG_INET)
 	case AF_INET:
-		index = bpf_ipv4_fib_lookup(net, params, flags, false);
+		rc = bpf_ipv4_fib_lookup(net, params, flags, false);
 		break;
 #endif
 #if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
-		index = bpf_ipv6_fib_lookup(net, params, flags, false);
+		rc = bpf_ipv6_fib_lookup(net, params, flags, false);
 		break;
 #endif
 	}
 
-	if (index > 0) {
+	if (!rc) {
 		struct net_device *dev;
 
-		dev = dev_get_by_index_rcu(net, index);
+		dev = dev_get_by_index_rcu(net, params->ifindex);
 		if (!is_skb_forwardable(dev, skb))
-			index = 0;
+			rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
 	}
 
-	return index;
+	return rc;
 }
 
 static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c
index 6673cdb9f55c..a7e94e7ff87d 100644
--- a/samples/bpf/xdp_fwd_kern.c
+++ b/samples/bpf/xdp_fwd_kern.c
@@ -48,9 +48,9 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
 	struct ethhdr *eth = data;
 	struct ipv6hdr *ip6h;
 	struct iphdr *iph;
-	int out_index;
 	u16 h_proto;
 	u64 nh_off;
+	int rc;
 
 	nh_off = sizeof(*eth);
 	if (data + nh_off > data_end)
@@ -101,7 +101,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
 
 	fib_params.ifindex = ctx->ingress_ifindex;
 
-	out_index = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
+	rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
 
 	/* verify egress index has xdp support
 	 * TO-DO bpf_map_lookup_elem(&tx_port, &key) fails with
@@ -109,7 +109,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
 	 * NOTE: without verification that egress index supports XDP
 	 *       forwarding packets are dropped.
 	 */
-	if (out_index > 0) {
+	if (rc == 0) {
 		if (h_proto == htons(ETH_P_IP))
 			ip_decrease_ttl(iph);
 		else if (h_proto == htons(ETH_P_IPV6))
@@ -117,7 +117,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
 
 		memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
 		memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
-		return bpf_redirect_map(&tx_port, out_index, 0);
+		return bpf_redirect_map(&tx_port, fib_params.ifindex, 0);
 	}
 
 	return XDP_PASS;

From b62cc6fdd793eaac50e4191c8637ffff9e9574d6 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 18 Jun 2018 10:07:35 -0700
Subject: [PATCH 105/382] libnvdimm, pmem: Fix memcpy_mcsafe() return code
 handling in nsio_rw_bytes()

Commit 60622d68227d "x86/asm/memcpy_mcsafe: Return bytes remaining"
converted callers of memcpy_mcsafe() to expect a positive 'bytes
remaining' value rather than a negative error code. The nsio_rw_bytes()
conversion failed to return success. The failure is benign in that
nsio_rw_bytes() will end up writing back what it just read.

Fixes: 60622d68227d ("x86/asm/memcpy_mcsafe: Return bytes remaining")
Cc: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/claim.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 2e96b34bc936..fb667bf469c7 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -278,6 +278,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
 			return -EIO;
 		if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0)
 			return -EIO;
+		return 0;
 	}
 
 	if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {

From 5a14e91d559aee5bdb0e002e1153fd9c4338a29e Mon Sep 17 00:00:00 2001
From: Jeff Moyer <jmoyer@redhat.com>
Date: Wed, 27 Jun 2018 11:43:58 -0400
Subject: [PATCH 106/382] dev-dax: check_vma: ratelimit dev_info-s

This is easily triggered from userspace, so let's ratelimit the
messages.

Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/device.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index de2f8297a210..108c37fca782 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -189,14 +189,16 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
 
 	/* prevent private mappings from being established */
 	if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) {
-		dev_info(dev, "%s: %s: fail, attempted private mapping\n",
+		dev_info_ratelimited(dev,
+				"%s: %s: fail, attempted private mapping\n",
 				current->comm, func);
 		return -EINVAL;
 	}
 
 	mask = dax_region->align - 1;
 	if (vma->vm_start & mask || vma->vm_end & mask) {
-		dev_info(dev, "%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n",
+		dev_info_ratelimited(dev,
+				"%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n",
 				current->comm, func, vma->vm_start, vma->vm_end,
 				mask);
 		return -EINVAL;
@@ -204,13 +206,15 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
 
 	if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV
 			&& (vma->vm_flags & VM_DONTCOPY) == 0) {
-		dev_info(dev, "%s: %s: fail, dax range requires MADV_DONTFORK\n",
+		dev_info_ratelimited(dev,
+				"%s: %s: fail, dax range requires MADV_DONTFORK\n",
 				current->comm, func);
 		return -EINVAL;
 	}
 
 	if (!vma_is_dax(vma)) {
-		dev_info(dev, "%s: %s: fail, vma is not DAX capable\n",
+		dev_info_ratelimited(dev,
+				"%s: %s: fail, vma is not DAX capable\n",
 				current->comm, func);
 		return -EINVAL;
 	}

From e7441c9274a6a5453e06f4c2b8b5f72eca0a3f17 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Tue, 19 Jun 2018 10:39:50 -0500
Subject: [PATCH 107/382] mac80211: disable BHs/preemption in
 ieee80211_tx_control_port()

On pre-emption enabled kernels the following print was being seen due to
missing local_bh_disable/local_bh_enable calls.  mac80211 assumes that
pre-emption is disabled in the data path.

    BUG: using smp_processor_id() in preemptible [00000000] code: iwd/517
    caller is __ieee80211_subif_start_xmit+0x144/0x210 [mac80211]
    [...]
    Call Trace:
    dump_stack+0x5c/0x80
    check_preemption_disabled.cold.0+0x46/0x51
    __ieee80211_subif_start_xmit+0x144/0x210 [mac80211]

Fixes: 911806491425 ("mac80211: Add support for tx_control_port")
Signed-off-by: Denis Kenzior <denkenz@gmail.com>
[commit message rewrite, fixes tag]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 44b5dfe8727d..fa1f1e63a264 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -4845,7 +4845,9 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
 	skb_reset_network_header(skb);
 	skb_reset_mac_header(skb);
 
+	local_bh_disable();
 	__ieee80211_subif_start_xmit(skb, skb->dev, flags);
+	local_bh_enable();
 
 	return 0;
 }

From 188f60ab8e787fcbb5ac9d64ede23a0070231f09 Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Sun, 24 Jun 2018 21:10:49 -0400
Subject: [PATCH 108/382] nl80211: relax ht operation checks for mesh

Commit 9757235f451c, "nl80211: correct checks for
NL80211_MESHCONF_HT_OPMODE value") relaxed the range for the HT
operation field in meshconf, while also adding checks requiring
the non-greenfield and non-ht-sta bits to be set in certain
circumstances.  The latter bit is actually reserved for mesh BSSes
according to Table 9-168 in 802.11-2016, so in fact it should not
be set.

wpa_supplicant sets these bits because the mesh and AP code share
the same implementation, but authsae does not.  As a result, some
meshconf updates from authsae which set only the NONHT_MIXED
protection bits were being rejected.

In order to avoid breaking userspace by changing the rules again,
simply accept the values with or without the bits set, and mask
off the reserved bit to match the spec.

While in here, update the 802.11-2012 reference to 802.11-2016.

Fixes: 9757235f451c ("nl80211: correct checks for NL80211_MESHCONF_HT_OPMODE value")
Cc: Masashi Honma <masashi.honma@gmail.com>
Signed-off-by: Bob Copeland <bobcopeland@fb.com>
Reviewed-by: Masashi Honma <masashi.honma@gmail.com>
Reviewed-by: Masashi Honma <masashi.honma@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c7bbe5f0aae8..351eeaf16abe 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6231,7 +6231,7 @@ do {									    \
 				  nl80211_check_s32);
 	/*
 	 * Check HT operation mode based on
-	 * IEEE 802.11 2012 8.4.2.59 HT Operation element.
+	 * IEEE 802.11-2016 9.4.2.57 HT Operation element.
 	 */
 	if (tb[NL80211_MESHCONF_HT_OPMODE]) {
 		ht_opmode = nla_get_u16(tb[NL80211_MESHCONF_HT_OPMODE]);
@@ -6241,22 +6241,9 @@ do {									    \
 				  IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
 			return -EINVAL;
 
-		if ((ht_opmode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT) &&
-		    (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
-			return -EINVAL;
+		/* NON_HT_STA bit is reserved, but some programs set it */
+		ht_opmode &= ~IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT;
 
-		switch (ht_opmode & IEEE80211_HT_OP_MODE_PROTECTION) {
-		case IEEE80211_HT_OP_MODE_PROTECTION_NONE:
-		case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ:
-			if (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)
-				return -EINVAL;
-			break;
-		case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER:
-		case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED:
-			if (!(ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
-				return -EINVAL;
-			break;
-		}
 		cfg->ht_opmode = ht_opmode;
 		mask |= (1 << (NL80211_MESHCONF_HT_OPMODE - 1));
 	}

From 95bca62fb723a121954fc7ae5473bb2c1f0d5986 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 29 Jun 2018 09:33:39 +0200
Subject: [PATCH 109/382] nl80211: check nla_parse_nested() return values

At the very least we should check the return value if
nla_parse_nested() is called with a non-NULL policy.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 351eeaf16abe..4eece06be1e7 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -10949,9 +10949,12 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 				    rem) {
 			u8 *mask_pat;
 
-			nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
-					 nl80211_packet_pattern_policy,
-					 info->extack);
+			err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
+					       nl80211_packet_pattern_policy,
+					       info->extack);
+			if (err)
+				goto error;
+
 			err = -EINVAL;
 			if (!pat_tb[NL80211_PKTPAT_MASK] ||
 			    !pat_tb[NL80211_PKTPAT_PATTERN])
@@ -11200,8 +11203,11 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
 			    rem) {
 		u8 *mask_pat;
 
-		nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
-				 nl80211_packet_pattern_policy, NULL);
+		err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
+				       nl80211_packet_pattern_policy, NULL);
+		if (err)
+			return err;
+
 		if (!pat_tb[NL80211_PKTPAT_MASK] ||
 		    !pat_tb[NL80211_PKTPAT_PATTERN])
 			return -EINVAL;

From 69705ad21029b6e92c9facb0eb12cde1a4fd52b7 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 7 Jun 2018 13:06:06 +0200
Subject: [PATCH 110/382] drm/exynos: ipp: Rework checking for the correct
 buffer formats

Prepare a common function for size and scale checks and call it for
source and destination buffers. Then also move there the state-less checks
from exynos_drm_ipp_task_setup_buffer, so the format information is already
available in limits processing. Finally perform the IPP_LIMIT_BUFFER check
on the real width of the buffer (the width calculated from the provided
buffer pitch).

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_ipp.c | 108 +++++++++++++-----------
 1 file changed, 57 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
index 26374e58c557..8840e6ec8e43 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
@@ -345,27 +345,6 @@ static int exynos_drm_ipp_task_setup_buffer(struct exynos_drm_ipp_buffer *buf,
 	int ret = 0;
 	int i;
 
-	/* basic checks */
-	if (buf->buf.width == 0 || buf->buf.height == 0)
-		return -EINVAL;
-	buf->format = drm_format_info(buf->buf.fourcc);
-	for (i = 0; i < buf->format->num_planes; i++) {
-		unsigned int width = (i == 0) ? buf->buf.width :
-			     DIV_ROUND_UP(buf->buf.width, buf->format->hsub);
-
-		if (buf->buf.pitch[i] == 0)
-			buf->buf.pitch[i] = width * buf->format->cpp[i];
-		if (buf->buf.pitch[i] < width * buf->format->cpp[i])
-			return -EINVAL;
-		if (!buf->buf.gem_id[i])
-			return -ENOENT;
-	}
-
-	/* pitch for additional planes must match */
-	if (buf->format->num_planes > 2 &&
-	    buf->buf.pitch[1] != buf->buf.pitch[2])
-		return -EINVAL;
-
 	/* get GEM buffers and check their size */
 	for (i = 0; i < buf->format->num_planes; i++) {
 		unsigned int height = (i == 0) ? buf->buf.height :
@@ -495,12 +474,13 @@ static int exynos_drm_ipp_check_size_limits(struct exynos_drm_ipp_buffer *buf,
 	enum drm_ipp_size_id id = rotate ? IPP_LIMIT_ROTATED : IPP_LIMIT_AREA;
 	struct drm_ipp_limit l;
 	struct drm_exynos_ipp_limit_val *lh = &l.h, *lv = &l.v;
+	int real_width = buf->buf.pitch[0] / buf->format->cpp[0];
 
 	if (!limits)
 		return 0;
 
 	__get_size_limit(limits, num_limits, IPP_LIMIT_BUFFER, &l);
-	if (!__size_limit_check(buf->buf.width, &l.h) ||
+	if (!__size_limit_check(real_width, &l.h) ||
 	    !__size_limit_check(buf->buf.height, &l.v))
 		return -EINVAL;
 
@@ -560,10 +540,62 @@ static int exynos_drm_ipp_check_scale_limits(
 	return 0;
 }
 
+static int exynos_drm_ipp_check_format(struct exynos_drm_ipp_task *task,
+				       struct exynos_drm_ipp_buffer *buf,
+				       struct exynos_drm_ipp_buffer *src,
+				       struct exynos_drm_ipp_buffer *dst,
+				       bool rotate, bool swap)
+{
+	const struct exynos_drm_ipp_formats *fmt;
+	int ret, i;
+
+	fmt = __ipp_format_get(task->ipp, buf->buf.fourcc, buf->buf.modifier,
+			       buf == src ? DRM_EXYNOS_IPP_FORMAT_SOURCE :
+					    DRM_EXYNOS_IPP_FORMAT_DESTINATION);
+	if (!fmt) {
+		DRM_DEBUG_DRIVER("Task %pK: %s format not supported\n", task,
+				 buf == src ? "src" : "dst");
+		return -EINVAL;
+	}
+
+	/* basic checks */
+	if (buf->buf.width == 0 || buf->buf.height == 0)
+		return -EINVAL;
+
+	buf->format = drm_format_info(buf->buf.fourcc);
+	for (i = 0; i < buf->format->num_planes; i++) {
+		unsigned int width = (i == 0) ? buf->buf.width :
+			     DIV_ROUND_UP(buf->buf.width, buf->format->hsub);
+
+		if (buf->buf.pitch[i] == 0)
+			buf->buf.pitch[i] = width * buf->format->cpp[i];
+		if (buf->buf.pitch[i] < width * buf->format->cpp[i])
+			return -EINVAL;
+		if (!buf->buf.gem_id[i])
+			return -ENOENT;
+	}
+
+	/* pitch for additional planes must match */
+	if (buf->format->num_planes > 2 &&
+	    buf->buf.pitch[1] != buf->buf.pitch[2])
+		return -EINVAL;
+
+	/* check driver limits */
+	ret = exynos_drm_ipp_check_size_limits(buf, fmt->limits,
+					       fmt->num_limits,
+					       rotate,
+					       buf == dst ? swap : false);
+	if (ret)
+		return ret;
+	ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect,
+						fmt->limits,
+						fmt->num_limits, swap);
+	return ret;
+}
+
 static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task)
 {
 	struct exynos_drm_ipp *ipp = task->ipp;
-	const struct exynos_drm_ipp_formats *src_fmt, *dst_fmt;
 	struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst;
 	unsigned int rotation = task->transform.rotation;
 	int ret = 0;
@@ -607,37 +639,11 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task)
 		return -EINVAL;
 	}
 
-	src_fmt = __ipp_format_get(ipp, src->buf.fourcc, src->buf.modifier,
-				   DRM_EXYNOS_IPP_FORMAT_SOURCE);
-	if (!src_fmt) {
-		DRM_DEBUG_DRIVER("Task %pK: src format not supported\n", task);
-		return -EINVAL;
-	}
-	ret = exynos_drm_ipp_check_size_limits(src, src_fmt->limits,
-					       src_fmt->num_limits,
-					       rotate, false);
-	if (ret)
-		return ret;
-	ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect,
-						src_fmt->limits,
-						src_fmt->num_limits, swap);
+	ret = exynos_drm_ipp_check_format(task, src, src, dst, rotate, swap);
 	if (ret)
 		return ret;
 
-	dst_fmt = __ipp_format_get(ipp, dst->buf.fourcc, dst->buf.modifier,
-				   DRM_EXYNOS_IPP_FORMAT_DESTINATION);
-	if (!dst_fmt) {
-		DRM_DEBUG_DRIVER("Task %pK: dst format not supported\n", task);
-		return -EINVAL;
-	}
-	ret = exynos_drm_ipp_check_size_limits(dst, dst_fmt->limits,
-					       dst_fmt->num_limits,
-					       false, swap);
-	if (ret)
-		return ret;
-	ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect,
-						dst_fmt->limits,
-						dst_fmt->num_limits, swap);
+	ret = exynos_drm_ipp_check_format(task, dst, src, dst, false, swap);
 	if (ret)
 		return ret;
 

From 1b0966c3e951da6ae523c4e954b1f43b22927948 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 7 Jun 2018 13:06:07 +0200
Subject: [PATCH 111/382] drm/exynos: rotator: Fix DRM_MODE_REFLECT_{X,Y}
 interpretation

Horizontal (DRM_MODE_REFLECT_Y) and vertical (DMR_MODE_REFLECT_Y) flip
were swapped in Rotator driver. Fix this by swapping code for interpreting
them.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_rotator.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index 1a76dd3d52e1..a820a68429b9 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -168,9 +168,9 @@ static void rotator_dst_set_transf(struct rot_context *rot,
 	val &= ~ROT_CONTROL_FLIP_MASK;
 
 	if (rotation & DRM_MODE_REFLECT_X)
-		val |= ROT_CONTROL_FLIP_HORIZONTAL;
-	if (rotation & DRM_MODE_REFLECT_Y)
 		val |= ROT_CONTROL_FLIP_VERTICAL;
+	if (rotation & DRM_MODE_REFLECT_Y)
+		val |= ROT_CONTROL_FLIP_HORIZONTAL;
 
 	val &= ~ROT_CONTROL_ROT_MASK;
 

From 280e54c9f614c88292685383cf2d65057586e9fb Mon Sep 17 00:00:00 2001
From: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Date: Thu, 7 Jun 2018 13:06:08 +0200
Subject: [PATCH 112/382] drm/exynos: scaler: Reset hardware before starting
 the operation

Ensure that Scaler hardware is properly reset and interrupts are cleared
before processing next image.

Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_scaler.c | 32 ++++++++++++++++++++--
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
index 91d4382343d0..d25069e88186 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
@@ -30,6 +30,7 @@
 #define scaler_write(cfg, offset)	writel(cfg, scaler->regs + (offset))
 #define SCALER_MAX_CLK			4
 #define SCALER_AUTOSUSPEND_DELAY	2000
+#define SCALER_RESET_WAIT_RETRIES	100
 
 struct scaler_data {
 	const char	*clk_name[SCALER_MAX_CLK];
@@ -100,6 +101,23 @@ static u32 scaler_get_format(u32 drm_fmt)
 	return 0;
 }
 
+static inline int scaler_reset(struct scaler_context *scaler)
+{
+	int retry = SCALER_RESET_WAIT_RETRIES;
+
+	scaler_write(SCALER_CFG_SOFT_RESET, SCALER_CFG);
+	do {
+		cpu_relax();
+	} while (retry > 1 &&
+		 scaler_read(SCALER_CFG) & SCALER_CFG_SOFT_RESET);
+	do {
+		cpu_relax();
+		scaler_write(1, SCALER_INT_EN);
+	} while (retry > 0 && scaler_read(SCALER_INT_EN) != 1);
+
+	return retry ? 0 : -EIO;
+}
+
 static inline void scaler_enable_int(struct scaler_context *scaler)
 {
 	u32 val;
@@ -354,9 +372,13 @@ static int scaler_commit(struct exynos_drm_ipp *ipp,
 	u32 dst_fmt = scaler_get_format(task->dst.buf.fourcc);
 	struct drm_exynos_ipp_task_rect *dst_pos = &task->dst.rect;
 
-	scaler->task = task;
-
 	pm_runtime_get_sync(scaler->dev);
+	if (scaler_reset(scaler)) {
+		pm_runtime_put(scaler->dev);
+		return -EIO;
+	}
+
+	scaler->task = task;
 
 	scaler_set_src_fmt(scaler, src_fmt);
 	scaler_set_src_base(scaler, &task->src);
@@ -394,7 +416,11 @@ static inline void scaler_disable_int(struct scaler_context *scaler)
 
 static inline u32 scaler_get_int_status(struct scaler_context *scaler)
 {
-	return scaler_read(SCALER_INT_STATUS);
+	u32 val = scaler_read(SCALER_INT_STATUS);
+
+	scaler_write(val, SCALER_INT_STATUS);
+
+	return val;
 }
 
 static inline int scaler_task_done(u32 val)

From 4e1a6230313ad76c08633fa57853349de56f60a2 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 7 Jun 2018 13:06:09 +0200
Subject: [PATCH 113/382] drm/exynos: scaler: Fix support for YUV420, YUV422
 and YUV444 modes

Fix Cb/CR components order in two-planar YUV420, YUV422 and YUV444 modes.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_scaler.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
index d25069e88186..0ddb6eec7b11 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
@@ -52,9 +52,9 @@ struct scaler_context {
 static u32 scaler_get_format(u32 drm_fmt)
 {
 	switch (drm_fmt) {
-	case DRM_FORMAT_NV21:
-		return SCALER_YUV420_2P_UV;
 	case DRM_FORMAT_NV12:
+		return SCALER_YUV420_2P_UV;
+	case DRM_FORMAT_NV21:
 		return SCALER_YUV420_2P_VU;
 	case DRM_FORMAT_YUV420:
 		return SCALER_YUV420_3P;
@@ -64,15 +64,15 @@ static u32 scaler_get_format(u32 drm_fmt)
 		return SCALER_YUV422_1P_UYVY;
 	case DRM_FORMAT_YVYU:
 		return SCALER_YUV422_1P_YVYU;
-	case DRM_FORMAT_NV61:
-		return SCALER_YUV422_2P_UV;
 	case DRM_FORMAT_NV16:
+		return SCALER_YUV422_2P_UV;
+	case DRM_FORMAT_NV61:
 		return SCALER_YUV422_2P_VU;
 	case DRM_FORMAT_YUV422:
 		return SCALER_YUV422_3P;
-	case DRM_FORMAT_NV42:
-		return SCALER_YUV444_2P_UV;
 	case DRM_FORMAT_NV24:
+		return SCALER_YUV444_2P_UV;
+	case DRM_FORMAT_NV42:
 		return SCALER_YUV444_2P_VU;
 	case DRM_FORMAT_YUV444:
 		return SCALER_YUV444_3P;

From 4958a1c0c9c4a48d6ba9e2b184d93cab0dce68e1 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 7 Jun 2018 13:06:10 +0200
Subject: [PATCH 114/382] drm/exynos: gsc: Use real buffer width for
 configuring the hardware

DMA hardware should respect buffer pitch, so use the width calculated from
the buffer pitch instead of the virtual one.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_gsc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 35ac66730563..8af7f167015b 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -577,7 +577,7 @@ static void gsc_src_set_size(struct gsc_context *ctx,
 	cfg &= ~(GSC_SRCIMG_HEIGHT_MASK |
 		GSC_SRCIMG_WIDTH_MASK);
 
-	cfg |= (GSC_SRCIMG_WIDTH(buf->buf.width) |
+	cfg |= (GSC_SRCIMG_WIDTH(buf->buf.pitch[0] / buf->format->cpp[0]) |
 		GSC_SRCIMG_HEIGHT(buf->buf.height));
 
 	gsc_write(cfg, GSC_SRCIMG_SIZE);
@@ -868,7 +868,7 @@ static void gsc_dst_set_size(struct gsc_context *ctx,
 	/* original size */
 	cfg = gsc_read(GSC_DSTIMG_SIZE);
 	cfg &= ~(GSC_DSTIMG_HEIGHT_MASK | GSC_DSTIMG_WIDTH_MASK);
-	cfg |= GSC_DSTIMG_WIDTH(buf->buf.width) |
+	cfg |= GSC_DSTIMG_WIDTH(buf->buf.pitch[0] / buf->format->cpp[0]) |
 	       GSC_DSTIMG_HEIGHT(buf->buf.height);
 	gsc_write(cfg, GSC_DSTIMG_SIZE);
 

From 28b676329cc1adfa37b5291e13055e0819a80e42 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 7 Jun 2018 13:06:11 +0200
Subject: [PATCH 115/382] drm/exynos: gsc: Increase Exynos5433 buffer width
 alignment to 16 pixels

Investigation revealed that GScaler hardware requires the real buffer width
(pitch) to be aligned to 16 pixels.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_gsc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 8af7f167015b..246300771d55 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -1341,7 +1341,7 @@ static const struct drm_exynos_ipp_limit gsc_5420_limits[] = {
 };
 
 static const struct drm_exynos_ipp_limit gsc_5433_limits[] = {
-	{ IPP_SIZE_LIMIT(BUFFER, .h = { 32, 8191, 2 }, .v = { 16, 8191, 2 }) },
+	{ IPP_SIZE_LIMIT(BUFFER, .h = { 32, 8191, 16 }, .v = { 16, 8191, 2 }) },
 	{ IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 1 }, .v = { 8, 3344, 1 }) },
 	{ IPP_SIZE_LIMIT(ROTATED, .h = { 32, 2047 }, .v = { 8, 8191 }) },
 	{ IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 },

From 4cc11a5f53049c8a2b02763a53ecd42371355c10 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 7 Jun 2018 13:06:12 +0200
Subject: [PATCH 116/382] drm/exynos: gsc: Fix DRM_MODE_REFLECT_{X,Y}
 interpretation

Horizontal (DRM_MODE_REFLECT_Y) and vertical (DMR_MODE_REFLECT_Y) flip
were swapped in GScaler driver. Fix this by swapping code for interpreting
them.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_gsc.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 246300771d55..177e31c3a51d 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -523,30 +523,30 @@ static void gsc_src_set_transf(struct gsc_context *ctx, unsigned int rotation)
 
 	switch (degree) {
 	case DRM_MODE_ROTATE_0:
-		if (rotation & DRM_MODE_REFLECT_Y)
-			cfg |= GSC_IN_ROT_XFLIP;
 		if (rotation & DRM_MODE_REFLECT_X)
+			cfg |= GSC_IN_ROT_XFLIP;
+		if (rotation & DRM_MODE_REFLECT_Y)
 			cfg |= GSC_IN_ROT_YFLIP;
 		break;
 	case DRM_MODE_ROTATE_90:
 		cfg |= GSC_IN_ROT_90;
-		if (rotation & DRM_MODE_REFLECT_Y)
-			cfg |= GSC_IN_ROT_XFLIP;
 		if (rotation & DRM_MODE_REFLECT_X)
+			cfg |= GSC_IN_ROT_XFLIP;
+		if (rotation & DRM_MODE_REFLECT_Y)
 			cfg |= GSC_IN_ROT_YFLIP;
 		break;
 	case DRM_MODE_ROTATE_180:
 		cfg |= GSC_IN_ROT_180;
-		if (rotation & DRM_MODE_REFLECT_Y)
-			cfg &= ~GSC_IN_ROT_XFLIP;
 		if (rotation & DRM_MODE_REFLECT_X)
+			cfg &= ~GSC_IN_ROT_XFLIP;
+		if (rotation & DRM_MODE_REFLECT_Y)
 			cfg &= ~GSC_IN_ROT_YFLIP;
 		break;
 	case DRM_MODE_ROTATE_270:
 		cfg |= GSC_IN_ROT_270;
-		if (rotation & DRM_MODE_REFLECT_Y)
-			cfg &= ~GSC_IN_ROT_XFLIP;
 		if (rotation & DRM_MODE_REFLECT_X)
+			cfg &= ~GSC_IN_ROT_XFLIP;
+		if (rotation & DRM_MODE_REFLECT_Y)
 			cfg &= ~GSC_IN_ROT_YFLIP;
 		break;
 	}

From dd209ef809080ced903e7747ee3ef640c923a1d2 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 7 Jun 2018 13:06:13 +0200
Subject: [PATCH 117/382] drm/exynos: gsc: Fix support for NV16/61,
 YUV420/YVU420 and YUV422 modes

Fix following issues related to planar YUV pixel format configuration:
- NV16/61 modes were incorrectly programmed as NV12/21,
- YVU420 was programmed as YUV420 on source,
- YVU420 and YUV422 were programmed as YUV420 on output.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_gsc.c | 29 +++++++++++++++++--------
 drivers/gpu/drm/exynos/regs-gsc.h       |  1 +
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 177e31c3a51d..7ba414b52faa 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -492,21 +492,25 @@ static void gsc_src_set_fmt(struct gsc_context *ctx, u32 fmt)
 			GSC_IN_CHROMA_ORDER_CRCB);
 		break;
 	case DRM_FORMAT_NV21:
+		cfg |= (GSC_IN_CHROMA_ORDER_CRCB | GSC_IN_YUV420_2P);
+		break;
 	case DRM_FORMAT_NV61:
-		cfg |= (GSC_IN_CHROMA_ORDER_CRCB |
-			GSC_IN_YUV420_2P);
+		cfg |= (GSC_IN_CHROMA_ORDER_CRCB | GSC_IN_YUV422_2P);
 		break;
 	case DRM_FORMAT_YUV422:
 		cfg |= GSC_IN_YUV422_3P;
 		break;
 	case DRM_FORMAT_YUV420:
+		cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV420_3P);
+		break;
 	case DRM_FORMAT_YVU420:
-		cfg |= GSC_IN_YUV420_3P;
+		cfg |= (GSC_IN_CHROMA_ORDER_CRCB | GSC_IN_YUV420_3P);
 		break;
 	case DRM_FORMAT_NV12:
+		cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV420_2P);
+		break;
 	case DRM_FORMAT_NV16:
-		cfg |= (GSC_IN_CHROMA_ORDER_CBCR |
-			GSC_IN_YUV420_2P);
+		cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV422_2P);
 		break;
 	}
 
@@ -672,18 +676,25 @@ static void gsc_dst_set_fmt(struct gsc_context *ctx, u32 fmt)
 			GSC_OUT_CHROMA_ORDER_CRCB);
 		break;
 	case DRM_FORMAT_NV21:
-	case DRM_FORMAT_NV61:
 		cfg |= (GSC_OUT_CHROMA_ORDER_CRCB | GSC_OUT_YUV420_2P);
 		break;
+	case DRM_FORMAT_NV61:
+		cfg |= (GSC_OUT_CHROMA_ORDER_CRCB | GSC_OUT_YUV422_2P);
+		break;
 	case DRM_FORMAT_YUV422:
+		cfg |= GSC_OUT_YUV422_3P;
+		break;
 	case DRM_FORMAT_YUV420:
+		cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV420_3P);
+		break;
 	case DRM_FORMAT_YVU420:
-		cfg |= GSC_OUT_YUV420_3P;
+		cfg |= (GSC_OUT_CHROMA_ORDER_CRCB | GSC_OUT_YUV420_3P);
 		break;
 	case DRM_FORMAT_NV12:
+		cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV420_2P);
+		break;
 	case DRM_FORMAT_NV16:
-		cfg |= (GSC_OUT_CHROMA_ORDER_CBCR |
-			GSC_OUT_YUV420_2P);
+		cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV422_2P);
 		break;
 	}
 
diff --git a/drivers/gpu/drm/exynos/regs-gsc.h b/drivers/gpu/drm/exynos/regs-gsc.h
index 4704a993cbb7..16b39734115c 100644
--- a/drivers/gpu/drm/exynos/regs-gsc.h
+++ b/drivers/gpu/drm/exynos/regs-gsc.h
@@ -138,6 +138,7 @@
 #define GSC_OUT_YUV420_3P		(3 << 4)
 #define GSC_OUT_YUV422_1P		(4 << 4)
 #define GSC_OUT_YUV422_2P		(5 << 4)
+#define GSC_OUT_YUV422_3P		(6 << 4)
 #define GSC_OUT_YUV444			(7 << 4)
 #define GSC_OUT_TILE_TYPE_MASK		(1 << 2)
 #define GSC_OUT_TILE_C_16x8		(0 << 2)

From 5d5657aac0b40867fe57c50d395e9bb08274ceb8 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 7 Jun 2018 13:06:14 +0200
Subject: [PATCH 118/382] drm/exynos: fimc: Use real buffer width for
 configuring the hardware

DMA hardware should respect buffer pitch, so use the width calculated from
the buffer pitch instead of the virtual one.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_fimc.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
index 6127ef25acd6..e8d0670bb5f8 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
@@ -470,17 +470,18 @@ static void fimc_src_set_transf(struct fimc_context *ctx, unsigned int rotation)
 static void fimc_set_window(struct fimc_context *ctx,
 			    struct exynos_drm_ipp_buffer *buf)
 {
+	unsigned int real_width = buf->buf.pitch[0] / buf->format->cpp[0];
 	u32 cfg, h1, h2, v1, v2;
 
 	/* cropped image */
 	h1 = buf->rect.x;
-	h2 = buf->buf.width - buf->rect.w - buf->rect.x;
+	h2 = real_width - buf->rect.w - buf->rect.x;
 	v1 = buf->rect.y;
 	v2 = buf->buf.height - buf->rect.h - buf->rect.y;
 
 	DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]hsize[%d]vsize[%d]\n",
 		buf->rect.x, buf->rect.y, buf->rect.w, buf->rect.h,
-		buf->buf.width, buf->buf.height);
+		real_width, buf->buf.height);
 	DRM_DEBUG_KMS("h1[%d]h2[%d]v1[%d]v2[%d]\n", h1, h2, v1, v2);
 
 	/*
@@ -503,12 +504,13 @@ static void fimc_set_window(struct fimc_context *ctx,
 static void fimc_src_set_size(struct fimc_context *ctx,
 			      struct exynos_drm_ipp_buffer *buf)
 {
+	unsigned int real_width = buf->buf.pitch[0] / buf->format->cpp[0];
 	u32 cfg;
 
-	DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", buf->buf.width, buf->buf.height);
+	DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", real_width, buf->buf.height);
 
 	/* original size */
-	cfg = (EXYNOS_ORGISIZE_HORIZONTAL(buf->buf.width) |
+	cfg = (EXYNOS_ORGISIZE_HORIZONTAL(real_width) |
 		EXYNOS_ORGISIZE_VERTICAL(buf->buf.height));
 
 	fimc_write(ctx, cfg, EXYNOS_ORGISIZE);
@@ -529,7 +531,7 @@ static void fimc_src_set_size(struct fimc_context *ctx,
 	 * for now, we support only ITU601 8 bit mode
 	 */
 	cfg = (EXYNOS_CISRCFMT_ITU601_8BIT |
-		EXYNOS_CISRCFMT_SOURCEHSIZE(buf->buf.width) |
+		EXYNOS_CISRCFMT_SOURCEHSIZE(real_width) |
 		EXYNOS_CISRCFMT_SOURCEVSIZE(buf->buf.height));
 	fimc_write(ctx, cfg, EXYNOS_CISRCFMT);
 
@@ -842,12 +844,13 @@ static void fimc_set_scaler(struct fimc_context *ctx, struct fimc_scaler *sc)
 static void fimc_dst_set_size(struct fimc_context *ctx,
 			     struct exynos_drm_ipp_buffer *buf)
 {
+	unsigned int real_width = buf->buf.pitch[0] / buf->format->cpp[0];
 	u32 cfg, cfg_ext;
 
-	DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", buf->buf.width, buf->buf.height);
+	DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", real_width, buf->buf.height);
 
 	/* original size */
-	cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(buf->buf.width) |
+	cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(real_width) |
 		EXYNOS_ORGOSIZE_VERTICAL(buf->buf.height));
 
 	fimc_write(ctx, cfg, EXYNOS_ORGOSIZE);

From ab337fc274a1957ff0771f19e826c736253f7c39 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 7 Jun 2018 13:07:40 +0200
Subject: [PATCH 119/382] drm/exynos: decon5433: Fix per-plane global alpha for
 XRGB modes

Set per-plane global alpha to maximum value to get proper blending of
XRGB and ARGB planes. This fixes the strange order of overlapping planes.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 82c95c34447f..92d28b5b1077 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -356,8 +356,8 @@ static void decon_update_plane(struct exynos_drm_crtc *crtc,
 		writel(val, ctx->addr + DECON_VIDOSDxB(win));
 	}
 
-	val = VIDOSD_Wx_ALPHA_R_F(0x0) | VIDOSD_Wx_ALPHA_G_F(0x0) |
-		VIDOSD_Wx_ALPHA_B_F(0x0);
+	val = VIDOSD_Wx_ALPHA_R_F(0xff) | VIDOSD_Wx_ALPHA_G_F(0xff) |
+		VIDOSD_Wx_ALPHA_B_F(0xff);
 	writel(val, ctx->addr + DECON_VIDOSDxC(win));
 
 	val = VIDOSD_Wx_ALPHA_R_F(0x0) | VIDOSD_Wx_ALPHA_G_F(0x0) |

From 7b7aa62c05eac9789c208b946f515983a9255d8d Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 7 Jun 2018 13:07:49 +0200
Subject: [PATCH 120/382] drm/exynos: decon5433: Fix WINCONx reset value

The only bits that should be preserved in decon_win_set_fmt() is
WINCONx_ENWIN_F. All other bits depends on the selected pixel formats and
are set by the mentioned function.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 92d28b5b1077..e868773ea509 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -265,7 +265,7 @@ static void decon_win_set_pixfmt(struct decon_context *ctx, unsigned int win,
 	unsigned long val;
 
 	val = readl(ctx->addr + DECON_WINCONx(win));
-	val &= ~WINCONx_BPPMODE_MASK;
+	val &= WINCONx_ENWIN_F;
 
 	switch (fb->format->format) {
 	case DRM_FORMAT_XRGB1555:

From e94595b0f59c26581f6ae8a3972b99f56a415b76 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Sun, 17 Jun 2018 10:55:29 +0200
Subject: [PATCH 121/382] drm/exynos: ipp: use correct enum type

The limit_id_fallback array uses enum drm_ipp_size_id to index its
content. The content itself is of type enum drm_exynos_ipp_limit_type.

Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_ipp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
index 8840e6ec8e43..b435db8fc916 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
@@ -407,7 +407,7 @@ enum drm_ipp_size_id {
 	IPP_LIMIT_BUFFER, IPP_LIMIT_AREA, IPP_LIMIT_ROTATED, IPP_LIMIT_MAX
 };
 
-static const enum drm_ipp_size_id limit_id_fallback[IPP_LIMIT_MAX][4] = {
+static const enum drm_exynos_ipp_limit_type limit_id_fallback[IPP_LIMIT_MAX][4] = {
 	[IPP_LIMIT_BUFFER]  = { DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER },
 	[IPP_LIMIT_AREA]    = { DRM_EXYNOS_IPP_LIMIT_SIZE_AREA,
 				DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER },

From e699e2c6a654ff8d7303f5297ab5dd83da7b23e0 Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Wed, 27 Jun 2018 15:16:42 -0700
Subject: [PATCH 122/382] net, mm: account sock objects to kmemcg

Currently the kernel accounts the memory for network traffic through
mem_cgroup_[un]charge_skmem() interface. However the memory accounted
only includes the truesize of sk_buff which does not include the size of
sock objects. In our production environment, with opt-out kmem
accounting, the sock kmem caches (TCP[v6], UDP[v6], RAW[v6], UNIX) are
among the top most charged kmem caches and consume a significant amount
of memory which can not be left as system overhead. So, this patch
converts the kmem caches of all sock objects to SLAB_ACCOUNT.

Signed-off-by: Shakeel Butt <shakeelb@google.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index bcc41829a16d..9e8f65585b81 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3243,7 +3243,8 @@ static int req_prot_init(const struct proto *prot)
 
 	rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
 					   rsk_prot->obj_size, 0,
-					   prot->slab_flags, NULL);
+					   SLAB_ACCOUNT | prot->slab_flags,
+					   NULL);
 
 	if (!rsk_prot->slab) {
 		pr_crit("%s: Can't create request sock SLAB cache!\n",
@@ -3258,7 +3259,8 @@ int proto_register(struct proto *prot, int alloc_slab)
 	if (alloc_slab) {
 		prot->slab = kmem_cache_create_usercopy(prot->name,
 					prot->obj_size, 0,
-					SLAB_HWCACHE_ALIGN | prot->slab_flags,
+					SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
+					prot->slab_flags,
 					prot->useroffset, prot->usersize,
 					NULL);
 
@@ -3281,6 +3283,7 @@ int proto_register(struct proto *prot, int alloc_slab)
 				kmem_cache_create(prot->twsk_prot->twsk_slab_name,
 						  prot->twsk_prot->twsk_obj_size,
 						  0,
+						  SLAB_ACCOUNT |
 						  prot->slab_flags,
 						  NULL);
 			if (prot->twsk_prot->twsk_slab == NULL)

From 18d405af30bf6506bf2fc49056de7691c949812e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 28 Jun 2018 23:34:57 +0200
Subject: [PATCH 123/382] bpf, arm32: fix to use bpf_jit_binary_lock_ro api

Any eBPF JIT that where its underlying arch supports ARCH_HAS_SET_MEMORY
would need to use bpf_jit_binary_{un,}lock_ro() pair instead of the
set_memory_{ro,rw}() pair directly as otherwise changes to the former
might break. arm32's eBPF conversion missed to change it, so fix this
up here.

Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/arm/net/bpf_jit_32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 6e8b71613039..f6a62ae44a65 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1844,7 +1844,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		/* there are 2 passes here */
 		bpf_jit_dump(prog->len, image_size, 2, ctx.target);
 
-	set_memory_ro((unsigned long)header, header->pages);
+	bpf_jit_binary_lock_ro(header);
 	prog->bpf_func = (void *)ctx.target;
 	prog->jited = 1;
 	prog->jited_len = image_size;

From f605ce5eb26ac934fb8106d75d46a2c875a2bf23 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 28 Jun 2018 23:34:58 +0200
Subject: [PATCH 124/382] bpf, s390: fix potential memleak when later
 bpf_jit_prog fails

If we would ever fail in the bpf_jit_prog() pass that writes the
actual insns to the image after we got header via bpf_jit_binary_alloc()
then we also need to make sure to free it through bpf_jit_binary_free()
again when bailing out. Given we had prior bpf_jit_prog() passes to
initially probe for clobbered registers, program size and to fill in
addrs arrray for jump targets, this is more of a theoretical one,
but at least make sure this doesn't break with future changes.

Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/s390/net/bpf_jit_comp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index d2db8acb1a55..5f0234ec8038 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1286,6 +1286,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		goto free_addrs;
 	}
 	if (bpf_jit_prog(&jit, fp)) {
+		bpf_jit_binary_free(header);
 		fp = orig_fp;
 		goto free_addrs;
 	}

From 85782e037f8aba8922dadb24a1523ca0b82ab8bc Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 28 Jun 2018 23:34:59 +0200
Subject: [PATCH 125/382] bpf: undo prog rejection on read-only lock failure

Partially undo commit 9facc336876f ("bpf: reject any prog that failed
read-only lock") since it caused a regression, that is, syzkaller was
able to manage to cause a panic via fault injection deep in set_memory_ro()
path by letting an allocation fail: In x86's __change_page_attr_set_clr()
it was able to change the attributes of the primary mapping but not in
the alias mapping via cpa_process_alias(), so the second, inner call
to the __change_page_attr() via __change_page_attr_set_clr() had to split
a larger page and failed in the alloc_pages() with the artifically triggered
allocation error which is then propagated down to the call site.

Thus, for set_memory_ro() this means that it returned with an error, but
from debugging a probe_kernel_write() revealed EFAULT on that memory since
the primary mapping succeeded to get changed. Therefore the subsequent
hdr->locked = 0 reset triggered the panic as it was performed on read-only
memory, so call-site assumptions were infact wrong to assume that it would
either succeed /or/ not succeed at all since there's no such rollback in
set_memory_*() calls from partial change of mappings, in other words, we're
left in a state that is "half done". A later undo via set_memory_rw() is
succeeding though due to matching permissions on that part (aka due to the
try_preserve_large_page() succeeding). While reproducing locally with
explicitly triggering this error, the initial splitting only happens on
rare occasions and in real world it would additionally need oom conditions,
but that said, it could partially fail. Therefore, it is definitely wrong
to bail out on set_memory_ro() error and reject the program with the
set_memory_*() semantics we have today. Shouldn't have gone the extra mile
since no other user in tree today infact checks for any set_memory_*()
errors, e.g. neither module_enable_ro() / module_disable_ro() for module
RO/NX handling which is mostly default these days nor kprobes core with
alloc_insn_page() / free_insn_page() as examples that could be invoked long
after bootup and original 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit
against spraying attacks") did neither when it got first introduced to BPF
so "improving" with bailing out was clearly not right when set_memory_*()
cannot handle it today.

Kees suggested that if set_memory_*() can fail, we should annotate it with
__must_check, and all callers need to deal with it gracefully given those
set_memory_*() markings aren't "advisory", but they're expected to actually
do what they say. This might be an option worth to move forward in future
but would at the same time require that set_memory_*() calls from supporting
archs are guaranteed to be "atomic" in that they provide rollback if part
of the range fails, once that happened, the transition from RW -> RO could
be made more robust that way, while subsequent RO -> RW transition /must/
continue guaranteeing to always succeed the undo part.

Reported-by: syzbot+a4eb8c7766952a1ca872@syzkaller.appspotmail.com
Reported-by: syzbot+d866d1925855328eac3b@syzkaller.appspotmail.com
Fixes: 9facc336876f ("bpf: reject any prog that failed read-only lock")
Cc: Laura Abbott <labbott@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h | 56 ++++++------------------------------------
 kernel/bpf/core.c      | 28 ---------------------
 2 files changed, 8 insertions(+), 76 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 20f2659dd829..300baad62c88 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -470,9 +470,7 @@ struct sock_fprog_kern {
 };
 
 struct bpf_binary_header {
-	u16 pages;
-	u16 locked:1;
-
+	u32 pages;
 	/* Some arches need word alignment for their instructions */
 	u8 image[] __aligned(4);
 };
@@ -481,7 +479,7 @@ struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
 	u16			jited:1,	/* Is our filter JIT'ed? */
 				jit_requested:1,/* archs need to JIT the prog */
-				locked:1,	/* Program image locked? */
+				undo_set_mem:1,	/* Passed set_memory_ro() checkpoint */
 				gpl_compatible:1, /* Is filter GPL compatible? */
 				cb_access:1,	/* Is control block accessed? */
 				dst_needed:1,	/* Do we need dst entry? */
@@ -677,46 +675,24 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
 
 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 {
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	fp->locked = 1;
-	if (set_memory_ro((unsigned long)fp, fp->pages))
-		fp->locked = 0;
-#endif
+	fp->undo_set_mem = 1;
+	set_memory_ro((unsigned long)fp, fp->pages);
 }
 
 static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 {
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	if (fp->locked) {
-		WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
-		/* In case set_memory_rw() fails, we want to be the first
-		 * to crash here instead of some random place later on.
-		 */
-		fp->locked = 0;
-	}
-#endif
+	if (fp->undo_set_mem)
+		set_memory_rw((unsigned long)fp, fp->pages);
 }
 
 static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
 {
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	hdr->locked = 1;
-	if (set_memory_ro((unsigned long)hdr, hdr->pages))
-		hdr->locked = 0;
-#endif
+	set_memory_ro((unsigned long)hdr, hdr->pages);
 }
 
 static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
 {
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	if (hdr->locked) {
-		WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
-		/* In case set_memory_rw() fails, we want to be the first
-		 * to crash here instead of some random place later on.
-		 */
-		hdr->locked = 0;
-	}
-#endif
+	set_memory_rw((unsigned long)hdr, hdr->pages);
 }
 
 static inline struct bpf_binary_header *
@@ -728,22 +704,6 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp)
 	return (void *)addr;
 }
 
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp)
-{
-	if (!fp->locked)
-		return -ENOLCK;
-	if (fp->jited) {
-		const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
-
-		if (!hdr->locked)
-			return -ENOLCK;
-	}
-
-	return 0;
-}
-#endif
-
 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
 static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
 {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index a9e6c04d0f4a..1e5625d46414 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -598,8 +598,6 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
 	bpf_fill_ill_insns(hdr, size);
 
 	hdr->pages = size / PAGE_SIZE;
-	hdr->locked = 0;
-
 	hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
 		     PAGE_SIZE - sizeof(*hdr));
 	start = (get_random_int() % hole) & ~(alignment - 1);
@@ -1450,22 +1448,6 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
 	return 0;
 }
 
-static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp)
-{
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
-	int i, err;
-
-	for (i = 0; i < fp->aux->func_cnt; i++) {
-		err = bpf_prog_check_pages_ro_single(fp->aux->func[i]);
-		if (err)
-			return err;
-	}
-
-	return bpf_prog_check_pages_ro_single(fp);
-#endif
-	return 0;
-}
-
 static void bpf_prog_select_func(struct bpf_prog *fp)
 {
 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
@@ -1524,17 +1506,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 	 * all eBPF JITs might immediately support all features.
 	 */
 	*err = bpf_check_tail_call(fp);
-	if (*err)
-		return fp;
 
-	/* Checkpoint: at this point onwards any cBPF -> eBPF or
-	 * native eBPF program is read-only. If we failed to change
-	 * the page attributes (e.g. allocation failure from
-	 * splitting large pages), then reject the whole program
-	 * in order to guarantee not ending up with any W+X pages
-	 * from BPF side in kernel.
-	 */
-	*err = bpf_prog_check_pages_ro_locked(fp);
 	return fp;
 }
 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);

From 55c5e0c602c20cb6f350e5ae357cfd7e04ebb189 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Sat, 2 Jun 2018 11:02:02 -0300
Subject: [PATCH 126/382] dt-bindings: clock: imx6ul: Do not change the clock
 definition order

Commit f5a4670de966 ("clk: imx: Add new clo01 and clo2 controlled
by CCOSR") introduced the CLK_CLKO definitions, but didn't put them
at the end of the list, which may cause dtb breakage when running an old
dtb with a newer kernel.

In order to avoid that, simply add the new CLK_CKO clock definitions
at the end of the list.

Fixes: f5a4670de966 ("clk: imx: Add new clo01 and clo2 controlled by CCOSR")
Reported-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/dt-bindings/clock/imx6ul-clock.h | 40 +++++++++++-------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/include/dt-bindings/clock/imx6ul-clock.h b/include/dt-bindings/clock/imx6ul-clock.h
index 9564597cbfac..0aa1d9c3e0b9 100644
--- a/include/dt-bindings/clock/imx6ul-clock.h
+++ b/include/dt-bindings/clock/imx6ul-clock.h
@@ -235,27 +235,25 @@
 #define IMX6UL_CLK_CSI_PODF		222
 #define IMX6UL_CLK_PLL3_120M		223
 #define IMX6UL_CLK_KPP			224
-#define IMX6UL_CLK_CKO1_SEL		225
-#define IMX6UL_CLK_CKO1_PODF		226
-#define IMX6UL_CLK_CKO1			227
-#define IMX6UL_CLK_CKO2_SEL		228
-#define IMX6UL_CLK_CKO2_PODF		229
-#define IMX6UL_CLK_CKO2			230
-#define IMX6UL_CLK_CKO			231
-
-/* For i.MX6ULL */
-#define IMX6ULL_CLK_ESAI_PRED		232
-#define IMX6ULL_CLK_ESAI_PODF		233
-#define IMX6ULL_CLK_ESAI_EXTAL		234
-#define IMX6ULL_CLK_ESAI_MEM		235
-#define IMX6ULL_CLK_ESAI_IPG		236
-#define IMX6ULL_CLK_DCP_CLK		237
-#define IMX6ULL_CLK_EPDC_PRE_SEL	238
-#define IMX6ULL_CLK_EPDC_SEL		239
-#define IMX6ULL_CLK_EPDC_PODF		240
-#define IMX6ULL_CLK_EPDC_ACLK		241
-#define IMX6ULL_CLK_EPDC_PIX		242
-#define IMX6ULL_CLK_ESAI_SEL		243
+#define IMX6ULL_CLK_ESAI_PRED		225
+#define IMX6ULL_CLK_ESAI_PODF		226
+#define IMX6ULL_CLK_ESAI_EXTAL		227
+#define IMX6ULL_CLK_ESAI_MEM		228
+#define IMX6ULL_CLK_ESAI_IPG		229
+#define IMX6ULL_CLK_DCP_CLK		230
+#define IMX6ULL_CLK_EPDC_PRE_SEL	231
+#define IMX6ULL_CLK_EPDC_SEL		232
+#define IMX6ULL_CLK_EPDC_PODF		233
+#define IMX6ULL_CLK_EPDC_ACLK		234
+#define IMX6ULL_CLK_EPDC_PIX		235
+#define IMX6ULL_CLK_ESAI_SEL		236
+#define IMX6UL_CLK_CKO1_SEL		237
+#define IMX6UL_CLK_CKO1_PODF		238
+#define IMX6UL_CLK_CKO1			239
+#define IMX6UL_CLK_CKO2_SEL		240
+#define IMX6UL_CLK_CKO2_PODF		241
+#define IMX6UL_CLK_CKO2			242
+#define IMX6UL_CLK_CKO			243
 #define IMX6UL_CLK_END			244
 
 #endif /* __DT_BINDINGS_CLOCK_IMX6UL_H */

From 4050360f964694a3ac0c83badd1a441207c86889 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Fri, 29 Jun 2018 13:47:38 -0500
Subject: [PATCH 127/382] PCI: rcar: Shut the PHY down in failpath

If anything fails past phy_init_fn() and the system is a Gen3 with
a PHY, the PHY will be left on and inited. This is caused by the
phy_init_fn, which is in fact a pointer to rcar_pcie_phy_init_gen3()
function, which starts the PHY, yet has no counterpart in the failpath.
Add that counterpart.

Fixes: 517ca93a7159 ("PCI: rcar: Add R-Car gen3 PHY support")
Signed-off-by: Marek Vasut <marek.vasut+renesas@gmail.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Simon Horman <horms+renesas@verge.net.au>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Phil Edworthy <phil.edworthy@renesas.com>
Cc: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/pci/controller/pcie-rcar.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/controller/pcie-rcar.c b/drivers/pci/controller/pcie-rcar.c
index 874d75c9ee4a..8b0d3206cbc4 100644
--- a/drivers/pci/controller/pcie-rcar.c
+++ b/drivers/pci/controller/pcie-rcar.c
@@ -1165,7 +1165,7 @@ static int rcar_pcie_probe(struct platform_device *pdev)
 	if (rcar_pcie_hw_init(pcie)) {
 		dev_info(dev, "PCIe link down\n");
 		err = -ENODEV;
-		goto err_clk_disable;
+		goto err_phy_shutdown;
 	}
 
 	data = rcar_pci_read_reg(pcie, MACSR);
@@ -1177,7 +1177,7 @@ static int rcar_pcie_probe(struct platform_device *pdev)
 			dev_err(dev,
 				"failed to enable MSI support: %d\n",
 				err);
-			goto err_clk_disable;
+			goto err_phy_shutdown;
 		}
 	}
 
@@ -1191,6 +1191,12 @@ static int rcar_pcie_probe(struct platform_device *pdev)
 	if (IS_ENABLED(CONFIG_PCI_MSI))
 		rcar_pcie_teardown_msi(pcie);
 
+err_phy_shutdown:
+	if (pcie->phy) {
+		phy_power_off(pcie->phy);
+		phy_exit(pcie->phy);
+	}
+
 err_clk_disable:
 	clk_disable_unprepare(pcie->bus_clk);
 

From 3c5777c372b6eb2e17802b3dc4bd5ebea45d9bcc Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Fri, 29 Jun 2018 13:48:15 -0500
Subject: [PATCH 128/382] PCI: rcar: Clean up PHY init on failure

If the Gen3 PHY fails to power up, the code does not undo the
initialization caused by phy_init(). Add the missing failure
handling to the rcar_pcie_phy_init_gen3() function.

Fixes: 517ca93a7159 ("PCI: rcar: Add R-Car gen3 PHY support")
Reported-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Marek Vasut <marek.vasut+renesas@gmail.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Simon Horman <horms+renesas@verge.net.au>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Phil Edworthy <phil.edworthy@renesas.com>
Cc: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/pci/controller/pcie-rcar.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/controller/pcie-rcar.c b/drivers/pci/controller/pcie-rcar.c
index 8b0d3206cbc4..c8febb009454 100644
--- a/drivers/pci/controller/pcie-rcar.c
+++ b/drivers/pci/controller/pcie-rcar.c
@@ -680,7 +680,11 @@ static int rcar_pcie_phy_init_gen3(struct rcar_pcie *pcie)
 	if (err)
 		return err;
 
-	return phy_power_on(pcie->phy);
+	err = phy_power_on(pcie->phy);
+	if (err)
+		phy_exit(pcie->phy);
+
+	return err;
 }
 
 static int rcar_msi_alloc(struct rcar_msi *chip)

From 9bc5f0833a55c7bf768d517b5fdcf89ead43b794 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 29 Jun 2018 13:49:04 -0500
Subject: [PATCH 129/382] PCI: controller: dwc: Do not let PCIE_DW_PLAT_HOST
 default to yes

PCIE_DW_PLAT_HOST does not have any platform dependency, so it should
not default to yes.

Fixes: 1d906b22076e12cf ("PCI: dwc: Add support for EP mode")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
---
 drivers/pci/controller/dwc/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
index 16f52c626b4b..91b0194240a5 100644
--- a/drivers/pci/controller/dwc/Kconfig
+++ b/drivers/pci/controller/dwc/Kconfig
@@ -58,7 +58,6 @@ config PCIE_DW_PLAT_HOST
 	depends on PCI && PCI_MSI_IRQ_DOMAIN
 	select PCIE_DW_HOST
 	select PCIE_DW_PLAT
-	default y
 	help
 	  Enables support for the PCIe controller in the Designware IP to
 	  work in host mode. There are two instances of PCIe controller in

From fb0de5b8dcc68da4a0d39b0f5fcd5368085ad7c1 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 29 Jun 2018 13:49:24 -0500
Subject: [PATCH 130/382] PCI: endpoint: Use after free in
 pci_epf_unregister_driver()

We need to use list_for_each_entry_safe() because the
pci_ep_cfs_remove_epf_group() function frees "group".

Fixes: ef1433f717a2 ("PCI: endpoint: Create configfs entry for each pci_epf_device_id table entry")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
[lorenzo.pieralisi@arm.com: updated commit log]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/pci/endpoint/pci-epf-core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/endpoint/pci-epf-core.c b/drivers/pci/endpoint/pci-epf-core.c
index 523a8cab3bfb..bf53fad636a5 100644
--- a/drivers/pci/endpoint/pci-epf-core.c
+++ b/drivers/pci/endpoint/pci-epf-core.c
@@ -145,10 +145,10 @@ EXPORT_SYMBOL_GPL(pci_epf_alloc_space);
  */
 void pci_epf_unregister_driver(struct pci_epf_driver *driver)
 {
-	struct config_group *group;
+	struct config_group *group, *tmp;
 
 	mutex_lock(&pci_epf_mutex);
-	list_for_each_entry(group, &driver->epf_group, group_entry)
+	list_for_each_entry_safe(group, tmp, &driver->epf_group, group_entry)
 		pci_ep_cfs_remove_epf_group(group);
 	list_del(&driver->epf_group);
 	mutex_unlock(&pci_epf_mutex);

From 8c3f9bd851a4d3acf0a0f222d4e9e41c0cd1ea8e Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <hofrat@osadl.org>
Date: Fri, 29 Jun 2018 13:49:54 -0500
Subject: [PATCH 131/382] PCI: xilinx: Add missing of_node_put()

The call to of_get_next_child() returns a node pointer with refcount
incremented thus it must be explicitly decremented here after the last
usage.

Fixes: 8961def56845 ("PCI: xilinx: Add Xilinx AXI PCIe Host Bridge IP driver")
Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
[lorenzo.pieralisi@arm.com: reworked commit log]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/controller/pcie-xilinx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pci/controller/pcie-xilinx.c b/drivers/pci/controller/pcie-xilinx.c
index b110a3a814e3..7b1389d8e2a5 100644
--- a/drivers/pci/controller/pcie-xilinx.c
+++ b/drivers/pci/controller/pcie-xilinx.c
@@ -509,6 +509,7 @@ static int xilinx_pcie_init_irq_domain(struct xilinx_pcie_port *port)
 	port->leg_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
 						 &intx_domain_ops,
 						 port);
+	of_node_put(pcie_intc_node);
 	if (!port->leg_domain) {
 		dev_err(dev, "Failed to get a INTx IRQ domain\n");
 		return -ENODEV;

From 342639d996f18bc0a4db2f42a84230c0a966dc94 Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <hofrat@osadl.org>
Date: Fri, 29 Jun 2018 13:50:10 -0500
Subject: [PATCH 132/382] PCI: xilinx-nwl: Add missing of_node_put()

The call to of_get_next_child() returns a node pointer with
refcount incremented thus it must be explicitly decremented
here after the last usage.

Fixes: ab597d35ef11 ("PCI: xilinx-nwl: Add support for Xilinx NWL PCIe Host Controller")
Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
[lorenzo.pieralisi@arm.com: updated commit log]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/controller/pcie-xilinx-nwl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c
index 6a4bbb5b3de0..fb32840ce8e6 100644
--- a/drivers/pci/controller/pcie-xilinx-nwl.c
+++ b/drivers/pci/controller/pcie-xilinx-nwl.c
@@ -559,7 +559,7 @@ static int nwl_pcie_init_irq_domain(struct nwl_pcie *pcie)
 							PCI_NUM_INTX,
 							&legacy_domain_ops,
 							pcie);
-
+	of_node_put(legacy_intc_node);
 	if (!pcie->legacy_irq_domain) {
 		dev_err(dev, "failed to create IRQ domain\n");
 		return -ENOMEM;

From 3dc6ddfedc2818eaaa36842fbb049191e0c5e50f Mon Sep 17 00:00:00 2001
From: Nicholas Mc Guire <hofrat@osadl.org>
Date: Fri, 29 Jun 2018 13:50:27 -0500
Subject: [PATCH 133/382] PCI: faraday: Add missing of_node_put()

The call to of_get_next_child() returns a node pointer with refcount
incremented thus it must be explicitly decremented here in the error
path and after the last usage.

Fixes: d3c68e0a7e34 ("PCI: faraday: Add Faraday Technology FTPCI100 PCI Host Bridge driver")
Signed-off-by: Nicholas Mc Guire <hofrat@osadl.org>
[lorenzo.pieralisi@arm.com: updated commit log]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pci/controller/pci-ftpci100.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pci/controller/pci-ftpci100.c b/drivers/pci/controller/pci-ftpci100.c
index a1ebe9ed441f..20bb2564a6b3 100644
--- a/drivers/pci/controller/pci-ftpci100.c
+++ b/drivers/pci/controller/pci-ftpci100.c
@@ -355,11 +355,13 @@ static int faraday_pci_setup_cascaded_irq(struct faraday_pci *p)
 	irq = of_irq_get(intc, 0);
 	if (irq <= 0) {
 		dev_err(p->dev, "failed to get parent IRQ\n");
+		of_node_put(intc);
 		return irq ?: -EINVAL;
 	}
 
 	p->irqdomain = irq_domain_add_linear(intc, PCI_NUM_INTX,
 					     &faraday_pci_irqdomain_ops, p);
+	of_node_put(intc);
 	if (!p->irqdomain) {
 		dev_err(p->dev, "failed to create Gemini PCI IRQ domain\n");
 		return -EINVAL;

From 38972375ef7bdc7dd989bcb48d5448662a95bca2 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 29 Jun 2018 15:08:52 -0500
Subject: [PATCH 134/382] PCI/IOV: Reset total_VFs limit after detaching PF
 driver

The TotalVFs register in the SR-IOV capability is the hardware limit on the
number of VFs.  A PF driver can limit the number of VFs further with
pci_sriov_set_totalvfs().  When the PF driver is removed, reset any VF
limit that was imposed by the driver because that limit may not apply to
other drivers.

Before 8d85a7a4f2c9 ("PCI/IOV: Allow PF drivers to limit total_VFs to 0"),
pci_sriov_set_totalvfs(pdev, 0) meant "we can enable TotalVFs virtual
functions", and the nfp driver used that to remove the VF limit when the
driver unloads.

8d85a7a4f2c9 broke that because instead of removing the VF limit,
pci_sriov_set_totalvfs(pdev, 0) actually sets the limit to zero, and that
limit persists even if another driver is loaded.

We could fix that by making the nfp driver reset the limit when it unloads,
but it seems more robust to do it in the PCI core instead of relying on the
driver.

The regression scenario is:

  nfp_pci_probe (driver 1)
  ...
  nfp_pci_remove
    pci_sriov_set_totalvfs(pf->pdev, 0)   # limits VFs to 0

  ...
  nfp_pci_probe (driver 2)
    nfp_rtsym_read_le("nfd_vf_cfg_max_vfs")
    # no VF limit from firmware

Now driver 2 is broken because the VF limit is still 0 from driver 1.

Fixes: 8d85a7a4f2c9 ("PCI/IOV: Allow PF drivers to limit total_VFs to 0")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
[bhelgaas: changelog, rename functions]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/iov.c        | 16 ++++++++++++++++
 drivers/pci/pci-driver.c |  1 +
 drivers/pci/pci.h        |  4 ++++
 3 files changed, 21 insertions(+)

diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index d0d73dbbd5ca..0f04ae648cf1 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -574,6 +574,22 @@ void pci_iov_release(struct pci_dev *dev)
 		sriov_release(dev);
 }
 
+/**
+ * pci_iov_remove - clean up SR-IOV state after PF driver is detached
+ * @dev: the PCI device
+ */
+void pci_iov_remove(struct pci_dev *dev)
+{
+	struct pci_sriov *iov = dev->sriov;
+
+	if (!dev->is_physfn)
+		return;
+
+	iov->driver_max_VFs = iov->total_VFs;
+	if (iov->num_VFs)
+		pci_warn(dev, "driver left SR-IOV enabled after remove\n");
+}
+
 /**
  * pci_iov_update_resource - update a VF BAR
  * @dev: the PCI device
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index c125d53033c6..6792292b5fc7 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -445,6 +445,7 @@ static int pci_device_remove(struct device *dev)
 		}
 		pcibios_free_irq(pci_dev);
 		pci_dev->driver = NULL;
+		pci_iov_remove(pci_dev);
 	}
 
 	/* Undo the runtime PM settings in local_pci_probe() */
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index c358e7a07f3f..882f1f9596df 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -311,6 +311,7 @@ static inline void pci_restore_ats_state(struct pci_dev *dev)
 #ifdef CONFIG_PCI_IOV
 int pci_iov_init(struct pci_dev *dev);
 void pci_iov_release(struct pci_dev *dev);
+void pci_iov_remove(struct pci_dev *dev);
 void pci_iov_update_resource(struct pci_dev *dev, int resno);
 resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno);
 void pci_restore_iov_state(struct pci_dev *dev);
@@ -323,6 +324,9 @@ static inline int pci_iov_init(struct pci_dev *dev)
 }
 static inline void pci_iov_release(struct pci_dev *dev)
 
+{
+}
+static inline void pci_iov_remove(struct pci_dev *dev)
 {
 }
 static inline void pci_restore_iov_state(struct pci_dev *dev)

From 83235822b8b4fe47ecbd6b6bcbcc902860ac00fc Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 29 Jun 2018 15:09:00 -0500
Subject: [PATCH 135/382] nfp: stop limiting VFs to 0

Before 8d85a7a4f2c9 ("PCI/IOV: Allow PF drivers to limit total_VFs to 0"),
pci_sriov_set_totalvfs(pdev, 0) meant "we can enable TotalVFs virtual
functions".  After 8d85a7a4f2c9, it means "we can't enable *any* VFs".

That broke this scenario where nfp intends to remove any limit on the
number of VFs that can be enabled:

  nfp_pci_probe
    nfp_pcie_sriov_read_nfd_limit
      nfp_rtsym_read_le("nfd_vf_cfg_max_vfs", &err)
      pci_sriov_set_totalvfs(pf->pdev, 0)   # if FW didn't expose a limit

  ...
  # userspace writes N to sysfs "sriov_numvfs":
  sriov_numvfs_store
    pci_sriov_get_totalvfs                  # now returns 0
    return -ERANGE

Prior to 8d85a7a4f2c9, pci_sriov_get_totalvfs() returned TotalVFs, but it
now returns 0.

Remove the pci_sriov_set_totalvfs(pdev, 0) calls so we don't limit the
number of VFs that can be enabled.

Fixes: 8d85a7a4f2c9 ("PCI/IOV: Allow PF drivers to limit total_VFs to 0")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
[bhelgaas: changelog]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_main.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index 46b76d5a726c..152283d7e59c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -240,7 +240,6 @@ static int nfp_pcie_sriov_read_nfd_limit(struct nfp_pf *pf)
 		return pci_sriov_set_totalvfs(pf->pdev, pf->limit_vfs);
 
 	pf->limit_vfs = ~0;
-	pci_sriov_set_totalvfs(pf->pdev, 0); /* 0 is unset */
 	/* Allow any setting for backwards compatibility if symbol not found */
 	if (err == -ENOENT)
 		return 0;
@@ -668,7 +667,7 @@ static int nfp_pci_probe(struct pci_dev *pdev,
 
 	err = nfp_net_pci_probe(pf);
 	if (err)
-		goto err_sriov_unlimit;
+		goto err_fw_unload;
 
 	err = nfp_hwmon_register(pf);
 	if (err) {
@@ -680,8 +679,6 @@ static int nfp_pci_probe(struct pci_dev *pdev,
 
 err_net_remove:
 	nfp_net_pci_remove(pf);
-err_sriov_unlimit:
-	pci_sriov_set_totalvfs(pf->pdev, 0);
 err_fw_unload:
 	kfree(pf->rtbl);
 	nfp_mip_close(pf->mip);
@@ -715,7 +712,6 @@ static void nfp_pci_remove(struct pci_dev *pdev)
 	nfp_hwmon_unregister(pf);
 
 	nfp_pcie_sriov_disable(pdev);
-	pci_sriov_set_totalvfs(pf->pdev, 0);
 
 	nfp_net_pci_remove(pf);
 

From c860e997e9170a6d68f9d1e6e2cf61f572191aaf Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 27 Jun 2018 16:04:48 -0700
Subject: [PATCH 136/382] tcp: fix Fast Open key endianness

Fast Open key could be stored in different endian based on the CPU.
Previously hosts in different endianness in a server farm using
the same key config (sysctl value) would produce different cookies.
This patch fixes it by always storing it as little endian to keep
same API for LE hosts.

Reported-by: Daniele Iamartino <danielei@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/sysctl_net_ipv4.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d06247ba08b2..af0a857d8352 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -265,8 +265,9 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
 	    ipv4.sysctl_tcp_fastopen);
 	struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
 	struct tcp_fastopen_context *ctxt;
-	int ret;
 	u32  user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
+	__le32 key[4];
+	int ret, i;
 
 	tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
 	if (!tbl.data)
@@ -275,11 +276,14 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
 	rcu_read_lock();
 	ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
 	if (ctxt)
-		memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
+		memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
 	else
-		memset(user_key, 0, sizeof(user_key));
+		memset(key, 0, sizeof(key));
 	rcu_read_unlock();
 
+	for (i = 0; i < ARRAY_SIZE(key); i++)
+		user_key[i] = le32_to_cpu(key[i]);
+
 	snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
 		user_key[0], user_key[1], user_key[2], user_key[3]);
 	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
@@ -290,13 +294,17 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
 			ret = -EINVAL;
 			goto bad_key;
 		}
-		tcp_fastopen_reset_cipher(net, NULL, user_key,
+
+		for (i = 0; i < ARRAY_SIZE(user_key); i++)
+			key[i] = cpu_to_le32(user_key[i]);
+
+		tcp_fastopen_reset_cipher(net, NULL, key,
 					  TCP_FASTOPEN_KEY_LENGTH);
 	}
 
 bad_key:
 	pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
-	       user_key[0], user_key[1], user_key[2], user_key[3],
+		user_key[0], user_key[1], user_key[2], user_key[3],
 	       (char *)tbl.data, ret);
 	kfree(tbl.data);
 	return ret;

From 92291c95e71a7898109c1f95adfb48aa69e5ba7b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 28 Jun 2018 12:24:42 +0300
Subject: [PATCH 137/382] atm: iphase: fix a 64 bit bug

The code assumes that there is 4 bytes in a pointer and it doesn't
allocate enough memory.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/iphase.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index ff81a576347e..82532c299bb5 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -1618,7 +1618,7 @@ static int rx_init(struct atm_dev *dev)
 	skb_queue_head_init(&iadev->rx_dma_q);  
 	iadev->rx_free_desc_qhead = NULL;   
 
-	iadev->rx_open = kcalloc(4, iadev->num_vc, GFP_KERNEL);
+	iadev->rx_open = kcalloc(iadev->num_vc, sizeof(void *), GFP_KERNEL);
 	if (!iadev->rx_open) {
 		printk(KERN_ERR DEV_LABEL "itf %d couldn't get free page\n",
 		dev->number);  

From 5037c6280606396ab4d3065b066d4b574df020dc Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 28 Jun 2018 12:31:25 +0300
Subject: [PATCH 138/382] cnic: tidy up a size calculation

Static checkers complain that id_tbl->table points to longs and 4 bytes
is smaller than sizeof(long).  But the since other side is dividing by
32 instead of sizeof(long), that means the current code works fine.

Anyway, it's more conventional to use the BITS_TO_LONGS() macro when
we're allocating a bitmap.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/cnic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index 30273a7717e2..4fd829b5e65d 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -660,7 +660,7 @@ static int cnic_init_id_tbl(struct cnic_id_tbl *id_tbl, u32 size, u32 start_id,
 	id_tbl->max = size;
 	id_tbl->next = next;
 	spin_lock_init(&id_tbl->lock);
-	id_tbl->table = kcalloc(DIV_ROUND_UP(size, 32), 4, GFP_KERNEL);
+	id_tbl->table = kcalloc(BITS_TO_LONGS(size), sizeof(long), GFP_KERNEL);
 	if (!id_tbl->table)
 		return -ENOMEM;
 

From 484c016d9392786ce5c74017c206c706f29f823d Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Thu, 28 Jun 2018 04:52:15 -0700
Subject: [PATCH 139/382] bnx2x: Fix receiving tx-timeout in error or recovery
 state.

Driver performs the internal reload when it receives tx-timeout event from
the OS. Internal reload might fail in some scenarios e.g., fatal HW issues.
In such cases OS still see the link, which would result in undesirable
functionalities such as re-generation of tx-timeouts.
The patch addresses this issue by indicating the link-down to OS when
tx-timeout is detected, and keeping the link in down state till the
internal reload is successful.

Please consider applying it to 'net' branch.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h      | 1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c  | 6 ++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 6 ++++++
 3 files changed, 13 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index d847e1b9c37b..be1506169076 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1533,6 +1533,7 @@ struct bnx2x {
 	struct link_vars	link_vars;
 	u32			link_cnt;
 	struct bnx2x_link_report_data last_reported_link;
+	bool			force_link_down;
 
 	struct mdio_if_info	mdio;
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 8cd73ff5debc..af7b5a4d8ba0 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1261,6 +1261,11 @@ void __bnx2x_link_report(struct bnx2x *bp)
 {
 	struct bnx2x_link_report_data cur_data;
 
+	if (bp->force_link_down) {
+		bp->link_vars.link_up = 0;
+		return;
+	}
+
 	/* reread mf_cfg */
 	if (IS_PF(bp) && !CHIP_IS_E1(bp))
 		bnx2x_read_mf_cfg(bp);
@@ -2817,6 +2822,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 		bp->pending_max = 0;
 	}
 
+	bp->force_link_down = false;
 	if (bp->port.pmf) {
 		rc = bnx2x_initial_phy_init(bp, load_mode);
 		if (rc)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 5b1ed240bf18..57348f2b49a3 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -10279,6 +10279,12 @@ static void bnx2x_sp_rtnl_task(struct work_struct *work)
 		bp->sp_rtnl_state = 0;
 		smp_mb();
 
+		/* Immediately indicate link as down */
+		bp->link_vars.link_up = 0;
+		bp->force_link_down = true;
+		netif_carrier_off(bp->dev);
+		BNX2X_ERR("Indicating link is down due to Tx-timeout\n");
+
 		bnx2x_nic_unload(bp, UNLOAD_NORMAL, true);
 		/* When ret value shows failure of allocation failure,
 		 * the nic is rebooted again. If open still fails, a error

From b95f6fbc8e15803a596ca5e5e21008fba29694c6 Mon Sep 17 00:00:00 2001
From: Madalin Bucur <madalin.bucur@nxp.com>
Date: Thu, 28 Jun 2018 15:26:50 +0300
Subject: [PATCH 140/382] fsl/fman: fix parser reporting bad checksum on short
 frames

The FMan hardware parser needs to be configured to remove the
short frame padding from the checksum calculation, otherwise
short UDP and TCP frames are likely to be marked as having a
bad checksum.

Signed-off-by: Madalin Bucur <madalin.bucur@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fman/fman_port.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c
index ce6e24c74978..ecbf6187e13a 100644
--- a/drivers/net/ethernet/freescale/fman/fman_port.c
+++ b/drivers/net/ethernet/freescale/fman/fman_port.c
@@ -324,6 +324,10 @@ struct fman_port_qmi_regs {
 #define HWP_HXS_PHE_REPORT 0x00000800
 #define HWP_HXS_PCAC_PSTAT 0x00000100
 #define HWP_HXS_PCAC_PSTOP 0x00000001
+#define HWP_HXS_TCP_OFFSET 0xA
+#define HWP_HXS_UDP_OFFSET 0xB
+#define HWP_HXS_SH_PAD_REM 0x80000000
+
 struct fman_port_hwp_regs {
 	struct {
 		u32 ssa; /* Soft Sequence Attachment */
@@ -728,6 +732,10 @@ static void init_hwp(struct fman_port *port)
 		iowrite32be(0xffffffff, &regs->pmda[i].lcv);
 	}
 
+	/* Short packet padding removal from checksum calculation */
+	iowrite32be(HWP_HXS_SH_PAD_REM, &regs->pmda[HWP_HXS_TCP_OFFSET].ssa);
+	iowrite32be(HWP_HXS_SH_PAD_REM, &regs->pmda[HWP_HXS_UDP_OFFSET].ssa);
+
 	start_port_hwp(port);
 }
 

From 595e802e53f24642a145cf7f3e4ac9afab4c21ec Mon Sep 17 00:00:00 2001
From: Madalin Bucur <madalin.bucur@nxp.com>
Date: Thu, 28 Jun 2018 15:26:51 +0300
Subject: [PATCH 141/382] dpaa_eth: DPAA SGT needs to be 256B

The DPAA HW requires that at least 256 bytes from the start of the
first scatter-gather table entry are allocated and accessible. The
hardware reads the maximum size the table can have in one access,
thus requiring that the allocation and mapping to be done for the
maximum size of 256B even if there is a smaller number of entries
in the table.

Signed-off-by: Madalin Bucur <madalin.bucur@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 5f4e1ffa7b95..ab02057ac730 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -125,6 +125,9 @@ MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms");
 /* Default alignment for start of data in an Rx FD */
 #define DPAA_FD_DATA_ALIGNMENT  16
 
+/* The DPAA requires 256 bytes reserved and mapped for the SGT */
+#define DPAA_SGT_SIZE 256
+
 /* Values for the L3R field of the FM Parse Results
  */
 /* L3 Type field: First IP Present IPv4 */
@@ -1617,8 +1620,8 @@ static struct sk_buff *dpaa_cleanup_tx_fd(const struct dpaa_priv *priv,
 
 	if (unlikely(qm_fd_get_format(fd) == qm_fd_sg)) {
 		nr_frags = skb_shinfo(skb)->nr_frags;
-		dma_unmap_single(dev, addr, qm_fd_get_offset(fd) +
-				 sizeof(struct qm_sg_entry) * (1 + nr_frags),
+		dma_unmap_single(dev, addr,
+				 qm_fd_get_offset(fd) + DPAA_SGT_SIZE,
 				 dma_dir);
 
 		/* The sgt buffer has been allocated with netdev_alloc_frag(),
@@ -1903,8 +1906,7 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
 	void *sgt_buf;
 
 	/* get a page frag to store the SGTable */
-	sz = SKB_DATA_ALIGN(priv->tx_headroom +
-		sizeof(struct qm_sg_entry) * (1 + nr_frags));
+	sz = SKB_DATA_ALIGN(priv->tx_headroom + DPAA_SGT_SIZE);
 	sgt_buf = netdev_alloc_frag(sz);
 	if (unlikely(!sgt_buf)) {
 		netdev_err(net_dev, "netdev_alloc_frag() failed for size %d\n",
@@ -1972,9 +1974,8 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
 	skbh = (struct sk_buff **)buffer_start;
 	*skbh = skb;
 
-	addr = dma_map_single(dev, buffer_start, priv->tx_headroom +
-			      sizeof(struct qm_sg_entry) * (1 + nr_frags),
-			      dma_dir);
+	addr = dma_map_single(dev, buffer_start,
+			      priv->tx_headroom + DPAA_SGT_SIZE, dma_dir);
 	if (unlikely(dma_mapping_error(dev, addr))) {
 		dev_err(dev, "DMA mapping failed");
 		err = -EINVAL;

From d14b56f508ad70eca3e659545aab3c45200f258c Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Thu, 28 Jun 2018 17:53:06 +0200
Subject: [PATCH 142/382] net: cleanup gfp mask in alloc_skb_with_frags

alloc_skb_with_frags uses __GFP_NORETRY for non-sleeping allocations
which is just a noop and a little bit confusing.

__GFP_NORETRY was added by ed98df3361f0 ("net: use __GFP_NORETRY for
high order allocations") to prevent from the OOM killer. Yet this was
not enough because fb05e7a89f50 ("net: don't wait for order-3 page
allocation") didn't want an excessive reclaim for non-costly orders
so it made it completely NOWAIT while it preserved __GFP_NORETRY in
place which is now redundant.

Drop the pointless __GFP_NORETRY because this function is used as
copy&paste source for other places.

Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c642304f178c..eba8dae22c25 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5276,8 +5276,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
 			if (npages >= 1 << order) {
 				page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
 						   __GFP_COMP |
-						   __GFP_NOWARN |
-						   __GFP_NORETRY,
+						   __GFP_NOWARN,
 						   order);
 				if (page)
 					goto fill_page;

From e7c7faa936680a2f0e78fc6d9683a5728421a150 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 28 Jun 2018 13:36:55 -0700
Subject: [PATCH 143/382] net/ipv6: Fix updates to prefix route

Sowmini reported that a recent commit broke prefix routes for linklocal
addresses. The newly added modify_prefix_route is attempting to add a
new prefix route when the ifp priority does not match the route metric
however the check needs to account for the default priority. In addition,
the route add fails because the route already exists, and then the delete
removes the one that exists. Flip the order to do the delete first.

Fixes: 8308f3ff1753 ("net/ipv6: Add support for specifying metric of connected routes")
Reported-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Tested-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c134286d6a41..91580c62bb86 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4528,6 +4528,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
 			       unsigned long expires, u32 flags)
 {
 	struct fib6_info *f6i;
+	u32 prio;
 
 	f6i = addrconf_get_prefix_route(&ifp->addr,
 					ifp->prefix_len,
@@ -4536,13 +4537,15 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
 	if (!f6i)
 		return -ENOENT;
 
-	if (f6i->fib6_metric != ifp->rt_priority) {
+	prio = ifp->rt_priority ? : IP6_RT_PRIO_ADDRCONF;
+	if (f6i->fib6_metric != prio) {
+		/* delete old one */
+		ip6_del_rt(dev_net(ifp->idev->dev), f6i);
+
 		/* add new one */
 		addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
 				      ifp->rt_priority, ifp->idev->dev,
 				      expires, flags, GFP_KERNEL);
-		/* delete old one */
-		ip6_del_rt(dev_net(ifp->idev->dev), f6i);
 	} else {
 		if (!expires)
 			fib6_clean_expires(f6i);

From 713b4a33524721767703956654f4d6713c556f2b Mon Sep 17 00:00:00 2001
From: Dan Murphy <dmurphy@ti.com>
Date: Fri, 29 Jun 2018 10:28:31 -0500
Subject: [PATCH 144/382] net: phy: DP83TC811: Fix disabling interrupts

Fix a bug where INT_STAT1 was written twice and
INT_STAT2 was ignored when disabling interrupts.

Fixes: b753a9faaf9a ("net: phy: DP83TC811: Introduce support for the DP83TC811 phy")
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Dan Murphy <dmurphy@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/dp83tc811.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/dp83tc811.c b/drivers/net/phy/dp83tc811.c
index 081d99aa3985..49ac678eb2dc 100644
--- a/drivers/net/phy/dp83tc811.c
+++ b/drivers/net/phy/dp83tc811.c
@@ -222,7 +222,7 @@ static int dp83811_config_intr(struct phy_device *phydev)
 		if (err < 0)
 			return err;
 
-		err = phy_write(phydev, MII_DP83811_INT_STAT1, 0);
+		err = phy_write(phydev, MII_DP83811_INT_STAT2, 0);
 	}
 
 	return err;

From cafb39600e7a73263122a0e2db052d691686378f Mon Sep 17 00:00:00 2001
From: Bert Kenward <bkenward@solarflare.com>
Date: Fri, 29 Jun 2018 16:29:28 +0100
Subject: [PATCH 145/382] sfc: correctly initialise filter rwsem for farch

Fixes: fc7a6c287ff3 ("sfc: use a semaphore to lock farch filters too")
Suggested-by: Joseph Korty <joe.korty@concurrent-rt.com>
Signed-off-by: Bert Kenward <bkenward@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/farch.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 8edf20967c82..e045a5d6b938 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -2794,6 +2794,7 @@ int efx_farch_filter_table_probe(struct efx_nic *efx)
 	if (!state)
 		return -ENOMEM;
 	efx->filter_state = state;
+	init_rwsem(&state->lock);
 
 	table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
 	table->id = EFX_FARCH_FILTER_TABLE_RX_IP;

From bc800e8b39bad60ccdb83be828da63af71ab87b3 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 29 Jun 2018 17:51:26 +0200
Subject: [PATCH 146/382] alx: take rtnl before calling __alx_open from resume

The __alx_open function can be called from ndo_open, which is called
under RTNL, or from alx_resume, which isn't. Since commit d768319cd427,
we're calling the netif_set_real_num_{tx,rx}_queues functions, which
need to be called under RTNL.

This is similar to commit 0c2cc02e571a ("igb: Move the calls to set the
Tx and Rx queues into igb_open").

Fixes: d768319cd427 ("alx: enable multiple tx queues")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/alx/main.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 567ee54504bc..5e5022fa1d04 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -1897,13 +1897,19 @@ static int alx_resume(struct device *dev)
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct alx_priv *alx = pci_get_drvdata(pdev);
 	struct alx_hw *hw = &alx->hw;
+	int err;
 
 	alx_reset_phy(hw);
 
 	if (!netif_running(alx->dev))
 		return 0;
 	netif_device_attach(alx->dev);
-	return __alx_open(alx, true);
+
+	rtnl_lock();
+	err = __alx_open(alx, true);
+	rtnl_unlock();
+
+	return err;
 }
 
 static SIMPLE_DEV_PM_OPS(alx_pm_ops, alx_suspend, alx_resume);

From 4664610537d398d55be19432f9cd9c29c831e159 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 29 Jun 2018 19:45:50 +0200
Subject: [PATCH 147/382] Revert "s390/qeth: use Read device to query
 hypervisor for MAC"

This reverts commit b7493e91c11a757cf0f8ab26989642ee4bb2c642.

On its own, querying RDEV for a MAC address works fine. But when upgrading
from a qeth that previously queried DDEV on a z/VM NIC (ie. any kernel with
commit ec61bd2fd2a2), the RDEV query now returns a _different_ MAC address
than the DDEV query.

If the NIC is configured with MACPROTECT, z/VM apparently requires us to
use the MAC that was initially returned (on DDEV) and registered. So after
upgrading to a kernel that uses RDEV, the SETVMAC registration cmd for the
new MAC address fails and we end up with a non-operabel interface.

To avoid regressions on upgrade, switch back to using DDEV for the MAC
address query. The downgrade path (first RDEV, later DDEV) is fine, in this
case both queries return the same MAC address.

Fixes: b7493e91c11a ("s390/qeth: use Read device to query hypervisor for MAC")
Reported-by: Michal Kubecek <mkubecek@suse.com>
Tested-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 8e1474f1ffac..9d9182ed8ac4 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -4834,7 +4834,7 @@ int qeth_vm_request_mac(struct qeth_card *card)
 		goto out;
 	}
 
-	ccw_device_get_id(CARD_RDEV(card), &id);
+	ccw_device_get_id(CARD_DDEV(card), &id);
 	request->resp_buf_len = sizeof(*response);
 	request->resp_version = DIAG26C_VERSION2;
 	request->op_code = DIAG26C_GET_MAC;

From 4789a21880488048105590049fc41a99f53d565d Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 29 Jun 2018 19:45:51 +0200
Subject: [PATCH 148/382] s390/qeth: fix race when setting MAC address

When qeth_l2_set_mac_address() finds the card in a non-reachable state,
it merely copies the new MAC address into dev->dev_addr so that
__qeth_l2_set_online() can later register it with the HW.

But __qeth_l2_set_online() may very well be running concurrently, so we
can't trust the card state without appropriate locking:
If the online sequence is past the point where it registers
dev->dev_addr (but not yet in SOFTSETUP state), any address change needs
to be properly programmed into the HW. Otherwise the netdevice ends up
with a different MAC address than what's set in the HW, and inbound
traffic is not forwarded as expected.

This is most likely to occur for OSD in LPAR, where
commit 21b1702af12e ("s390/qeth: improve fallback to random MAC address")
now triggers eg. systemd to immediately change the MAC when the netdevice
is registered with a NET_ADDR_RANDOM address.

Fixes: bcacfcbc82b4 ("s390/qeth: fix MAC address update sequence")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l2_main.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index a7cb37da6a21..7daf125dae76 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -501,27 +501,34 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p)
 		return -ERESTARTSYS;
 	}
 
+	/* avoid racing against concurrent state change: */
+	if (!mutex_trylock(&card->conf_mutex))
+		return -EAGAIN;
+
 	if (!qeth_card_hw_is_reachable(card)) {
 		ether_addr_copy(dev->dev_addr, addr->sa_data);
-		return 0;
+		goto out_unlock;
 	}
 
 	/* don't register the same address twice */
 	if (ether_addr_equal_64bits(dev->dev_addr, addr->sa_data) &&
 	    (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED))
-		return 0;
+		goto out_unlock;
 
 	/* add the new address, switch over, drop the old */
 	rc = qeth_l2_send_setmac(card, addr->sa_data);
 	if (rc)
-		return rc;
+		goto out_unlock;
 	ether_addr_copy(old_addr, dev->dev_addr);
 	ether_addr_copy(dev->dev_addr, addr->sa_data);
 
 	if (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)
 		qeth_l2_remove_mac(card, old_addr);
 	card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED;
-	return 0;
+
+out_unlock:
+	mutex_unlock(&card->conf_mutex);
+	return rc;
 }
 
 static void qeth_promisc_to_bridge(struct qeth_card *card)

From 9d0a58fb9747afd27d490c02a97889a1b59f6be4 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 29 Jun 2018 19:45:52 +0200
Subject: [PATCH 149/382] s390/qeth: avoid using is_multicast_ether_addr_64bits
 on (u8 *)[6]

*ether_addr*_64bits functions have been introduced to optimize
performance critical paths, which access 6-byte ethernet address as u64
value to get "nice" assembly. A harmless hack works nicely on ethernet
addresses shoved into a structure or a larger buffer, until busted by
Kasan on smth like plain (u8 *)[6].

qeth_l2_set_mac_address calls qeth_l2_remove_mac passing
u8 old_addr[ETH_ALEN] as an argument.

Adding/removing macs for an ethernet adapter is not that performance
critical. Moreover is_multicast_ether_addr_64bits itself on s390 is not
faster than is_multicast_ether_addr:

is_multicast_ether_addr(%r2) -> %r2
llc	%r2,0(%r2)
risbg	%r2,%r2,63,191,0

is_multicast_ether_addr_64bits(%r2) -> %r2
llgc	%r2,0(%r2)
risbg	%r2,%r2,63,191,0

So, let's just use is_multicast_ether_addr instead of
is_multicast_ether_addr_64bits.

Fixes: bcacfcbc82b4 ("s390/qeth: fix MAC address update sequence")
Reviewed-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l2_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 7daf125dae76..5464515b71f1 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -140,7 +140,7 @@ static int qeth_l2_send_setmac(struct qeth_card *card, __u8 *mac)
 
 static int qeth_l2_write_mac(struct qeth_card *card, u8 *mac)
 {
-	enum qeth_ipa_cmds cmd = is_multicast_ether_addr_64bits(mac) ?
+	enum qeth_ipa_cmds cmd = is_multicast_ether_addr(mac) ?
 					IPA_CMD_SETGMAC : IPA_CMD_SETVMAC;
 	int rc;
 
@@ -157,7 +157,7 @@ static int qeth_l2_write_mac(struct qeth_card *card, u8 *mac)
 
 static int qeth_l2_remove_mac(struct qeth_card *card, u8 *mac)
 {
-	enum qeth_ipa_cmds cmd = is_multicast_ether_addr_64bits(mac) ?
+	enum qeth_ipa_cmds cmd = is_multicast_ether_addr(mac) ?
 					IPA_CMD_DELGMAC : IPA_CMD_DELVMAC;
 	int rc;
 

From ce28867fd20c23cd769e78b4d619c4755bf71a1c Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 29 Jun 2018 19:45:53 +0200
Subject: [PATCH 150/382] s390/qeth: don't clobber buffer on async TX
 completion

If qeth_qdio_output_handler() detects that a transmit requires async
completion, it replaces the pending buffer's metadata object
(qeth_qdio_out_buffer) so that this queue buffer can be re-used while
the data is pending completion.

Later when the CQ indicates async completion of such a metadata object,
qeth_qdio_cq_handler() tries to free any data associated with this
object (since HW has now completed the transfer). By calling
qeth_clear_output_buffer(), it erronously operates on the queue buffer
that _previously_ belonged to this transfer ... but which has been
potentially re-used several times by now.
This results in double-free's of the buffer's data, and failing
transmits as the buffer descriptor is scrubbed in mid-air.

The correct way of handling this situation is to
1. scrub the queue buffer when it is prepared for re-use, and
2. later obtain the data addresses from the async-completion notifier
   (ie. the AOB), instead of the queue buffer.

All this only affects qeth devices used for af_iucv HiperTransport.

Fixes: 0da9581ddb0f ("qeth: exploit asynchronous delivery of storage blocks")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      | 11 +++++++++++
 drivers/s390/net/qeth_core_main.c | 22 ++++++++++++++++------
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 2a5fec55bf60..940fd7b558d3 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -829,6 +829,17 @@ struct qeth_trap_id {
 /*some helper functions*/
 #define QETH_CARD_IFNAME(card) (((card)->dev)? (card)->dev->name : "")
 
+static inline void qeth_scrub_qdio_buffer(struct qdio_buffer *buf,
+					  unsigned int elements)
+{
+	unsigned int i;
+
+	for (i = 0; i < elements; i++)
+		memset(&buf->element[i], 0, sizeof(struct qdio_buffer_element));
+	buf->element[14].sflags = 0;
+	buf->element[15].sflags = 0;
+}
+
 /**
  * qeth_get_elements_for_range() -	find number of SBALEs to cover range.
  * @start:				Start of the address range.
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 9d9182ed8ac4..d20a69a3bc40 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -73,9 +73,6 @@ static void qeth_notify_skbs(struct qeth_qdio_out_q *queue,
 		struct qeth_qdio_out_buffer *buf,
 		enum iucv_tx_notify notification);
 static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf);
-static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
-		struct qeth_qdio_out_buffer *buf,
-		enum qeth_qdio_buffer_states newbufstate);
 static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *, int);
 
 struct workqueue_struct *qeth_wq;
@@ -489,6 +486,7 @@ static void qeth_qdio_handle_aob(struct qeth_card *card,
 	struct qaob *aob;
 	struct qeth_qdio_out_buffer *buffer;
 	enum iucv_tx_notify notification;
+	unsigned int i;
 
 	aob = (struct qaob *) phys_to_virt(phys_aob_addr);
 	QETH_CARD_TEXT(card, 5, "haob");
@@ -513,10 +511,18 @@ static void qeth_qdio_handle_aob(struct qeth_card *card,
 	qeth_notify_skbs(buffer->q, buffer, notification);
 
 	buffer->aob = NULL;
-	qeth_clear_output_buffer(buffer->q, buffer,
-				 QETH_QDIO_BUF_HANDLED_DELAYED);
+	/* Free dangling allocations. The attached skbs are handled by
+	 * qeth_cleanup_handled_pending().
+	 */
+	for (i = 0;
+	     i < aob->sb_count && i < QETH_MAX_BUFFER_ELEMENTS(card);
+	     i++) {
+		if (aob->sba[i] && buffer->is_header[i])
+			kmem_cache_free(qeth_core_header_cache,
+					(void *) aob->sba[i]);
+	}
+	atomic_set(&buffer->state, QETH_QDIO_BUF_HANDLED_DELAYED);
 
-	/* from here on: do not touch buffer anymore */
 	qdio_release_aob(aob);
 }
 
@@ -3759,6 +3765,10 @@ static void qeth_qdio_output_handler(struct ccw_device *ccwdev,
 			QETH_CARD_TEXT(queue->card, 5, "aob");
 			QETH_CARD_TEXT_(queue->card, 5, "%lx",
 					virt_to_phys(buffer->aob));
+
+			/* prepare the queue slot for re-use: */
+			qeth_scrub_qdio_buffer(buffer->buffer,
+					       QETH_MAX_BUFFER_ELEMENTS(card));
 			if (qeth_init_qdio_out_buf(queue, bidx)) {
 				QETH_CARD_TEXT(card, 2, "outofbuf");
 				qeth_schedule_recovery(card);

From d025da9eb1e48d3e5f2a2ff13ac5ac536ba4be43 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 29 Jun 2018 19:45:54 +0200
Subject: [PATCH 151/382] s390/qeth: consistently re-enable device features

commit e830baa9c3f0 ("qeth: restore device features after recovery") and
commit ce3443564145 ("s390/qeth: rely on kernel for feature recovery")
made sure that the HW functions for device features get re-programmed
after recovery.

But we missed that the same handling is also required when a card is
first set offline (destroying all HW context), and then online again.
Fix this by moving the re-enable action out of the recovery-only path.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      |  2 +-
 drivers/s390/net/qeth_core_main.c | 23 +++++++++++------------
 drivers/s390/net/qeth_l2_main.c   |  5 ++---
 drivers/s390/net/qeth_l3_main.c   |  3 ++-
 4 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 940fd7b558d3..a246a618f9a4 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -1040,7 +1040,7 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *,
 						 __u16, __u16,
 						 enum qeth_prot_versions);
 int qeth_set_features(struct net_device *, netdev_features_t);
-void qeth_recover_features(struct net_device *dev);
+void qeth_enable_hw_features(struct net_device *dev);
 netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
 netdev_features_t qeth_features_check(struct sk_buff *skb,
 				      struct net_device *dev,
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index d20a69a3bc40..d01ac29fd986 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -6469,28 +6469,27 @@ static int qeth_set_ipa_rx_csum(struct qeth_card *card, bool on)
 #define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO | \
 			  NETIF_F_IPV6_CSUM)
 /**
- * qeth_recover_features() - Restore device features after recovery
- * @dev:	the recovering net_device
- *
- * Caller must hold rtnl lock.
+ * qeth_enable_hw_features() - (Re-)Enable HW functions for device features
+ * @dev:	a net_device
  */
-void qeth_recover_features(struct net_device *dev)
+void qeth_enable_hw_features(struct net_device *dev)
 {
-	netdev_features_t features = dev->features;
 	struct qeth_card *card = dev->ml_priv;
+	netdev_features_t features;
 
+	rtnl_lock();
+	features = dev->features;
 	/* force-off any feature that needs an IPA sequence.
 	 * netdev_update_features() will restart them.
 	 */
 	dev->features &= ~QETH_HW_FEATURES;
 	netdev_update_features(dev);
-
-	if (features == dev->features)
-		return;
-	dev_warn(&card->gdev->dev,
-		 "Device recovery failed to restore all offload features\n");
+	if (features != dev->features)
+		dev_warn(&card->gdev->dev,
+			 "Device recovery failed to restore all offload features\n");
+	rtnl_unlock();
 }
-EXPORT_SYMBOL_GPL(qeth_recover_features);
+EXPORT_SYMBOL_GPL(qeth_enable_hw_features);
 
 int qeth_set_features(struct net_device *dev, netdev_features_t features)
 {
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 5464515b71f1..2487f0aeb165 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -1119,6 +1119,8 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 		netif_carrier_off(card->dev);
 
 	qeth_set_allowed_threads(card, 0xffffffff, 0);
+
+	qeth_enable_hw_features(card->dev);
 	if (recover_flag == CARD_STATE_RECOVER) {
 		if (recovery_mode &&
 		    card->info.type != QETH_CARD_TYPE_OSN) {
@@ -1130,9 +1132,6 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 		}
 		/* this also sets saved unicast addresses */
 		qeth_l2_set_rx_mode(card->dev);
-		rtnl_lock();
-		qeth_recover_features(card->dev);
-		rtnl_unlock();
 	}
 	/* let user_space know that device is online */
 	kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index e7fa479adf47..5905dc63e256 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2662,6 +2662,8 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 		netif_carrier_on(card->dev);
 	else
 		netif_carrier_off(card->dev);
+
+	qeth_enable_hw_features(card->dev);
 	if (recover_flag == CARD_STATE_RECOVER) {
 		rtnl_lock();
 		if (recovery_mode)
@@ -2669,7 +2671,6 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 		else
 			dev_open(card->dev);
 		qeth_l3_set_rx_mode(card->dev);
-		qeth_recover_features(card->dev);
 		rtnl_unlock();
 	}
 	qeth_trace_features(card);

From ced9e191501e52b95e1b57b8e0db00943869eed0 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Fri, 29 Jun 2018 13:28:07 -0500
Subject: [PATCH 152/382] atm: zatm: Fix potential Spectre v1

pool can be indirectly controlled by user-space, hence leading to
a potential exploitation of the Spectre variant 1 vulnerability.

This issue was detected with the help of Smatch:

drivers/atm/zatm.c:1491 zatm_ioctl() warn: potential spectre issue
'zatm_dev->pool_info' (local cap)

Fix this by sanitizing pool before using it to index
zatm_dev->pool_info

Notice that given that speculation windows are large, the policy is
to kill the speculation on the first load and not worry if it can be
completed with a dependent load/store [1].

[1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/zatm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index a8d2eb0ceb8d..2c288d1f42bb 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -1483,6 +1483,8 @@ static int zatm_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg)
 					return -EFAULT;
 				if (pool < 0 || pool > ZATM_LAST_POOL)
 					return -EINVAL;
+				pool = array_index_nospec(pool,
+							  ZATM_LAST_POOL + 1);
 				if (copy_from_user(&info,
 				    &((struct zatm_pool_req __user *) arg)->info,
 				    sizeof(info))) return -EFAULT;

From 3f76df198288ceec92fc9eddecad1e73c52769b0 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Fri, 29 Jun 2018 13:42:48 -0700
Subject: [PATCH 153/382] net: use dev_change_tx_queue_len() for SIOCSIFTXQLEN

As noticed by Eric, we need to switch to the helper
dev_change_tx_queue_len() for SIOCSIFTXQLEN call path too,
otheriwse still miss dev_qdisc_change_tx_queue_len().

Fixes: 6a643ddb5624 ("net: introduce helper dev_change_tx_queue_len()")
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev_ioctl.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index a04e1e88bf3a..50537ff961a7 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -285,16 +285,9 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
 		if (ifr->ifr_qlen < 0)
 			return -EINVAL;
 		if (dev->tx_queue_len ^ ifr->ifr_qlen) {
-			unsigned int orig_len = dev->tx_queue_len;
-
-			dev->tx_queue_len = ifr->ifr_qlen;
-			err = call_netdevice_notifiers(
-					NETDEV_CHANGE_TX_QUEUE_LEN, dev);
-			err = notifier_to_errno(err);
-			if (err) {
-				dev->tx_queue_len = orig_len;
+			err = dev_change_tx_queue_len(dev, ifr->ifr_qlen);
+			if (err)
 				return err;
-			}
 		}
 		return 0;
 

From 3ffe64f1a641b80a82d9ef4efa7a05ce69049871 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <sthemmin@microsoft.com>
Date: Fri, 29 Jun 2018 14:07:16 -0700
Subject: [PATCH 154/382] hv_netvsc: split sub-channel setup into async and
 sync

When doing device hotplug the sub channel must be async to avoid
deadlock issues because device is discovered in softirq context.

When doing changes to MTU and number of channels, the setup
must be synchronous to avoid races such as when MTU and device
settings are done in a single ip command.

Reported-by: Thomas Walker <Thomas.Walker@twosigma.com>
Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug")
Fixes: 732e49850c5e ("netvsc: fix race on sub channel creation")
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/hyperv_net.h   |  2 +-
 drivers/net/hyperv/netvsc.c       | 37 ++++++++++++++++++-
 drivers/net/hyperv/netvsc_drv.c   | 17 ++++++++-
 drivers/net/hyperv/rndis_filter.c | 61 ++++++-------------------------
 4 files changed, 65 insertions(+), 52 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 1a924b867b07..4b6e308199d2 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -210,7 +210,7 @@ int netvsc_recv_callback(struct net_device *net,
 void netvsc_channel_cb(void *context);
 int netvsc_poll(struct napi_struct *napi, int budget);
 
-void rndis_set_subchannel(struct work_struct *w);
+int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev);
 int rndis_filter_open(struct netvsc_device *nvdev);
 int rndis_filter_close(struct netvsc_device *nvdev);
 struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 5d5bd513847f..8e9d0ee1572b 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -65,6 +65,41 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
 			       VM_PKT_DATA_INBAND, 0);
 }
 
+/* Worker to setup sub channels on initial setup
+ * Initial hotplug event occurs in softirq context
+ * and can't wait for channels.
+ */
+static void netvsc_subchan_work(struct work_struct *w)
+{
+	struct netvsc_device *nvdev =
+		container_of(w, struct netvsc_device, subchan_work);
+	struct rndis_device *rdev;
+	int i, ret;
+
+	/* Avoid deadlock with device removal already under RTNL */
+	if (!rtnl_trylock()) {
+		schedule_work(w);
+		return;
+	}
+
+	rdev = nvdev->extension;
+	if (rdev) {
+		ret = rndis_set_subchannel(rdev->ndev, nvdev);
+		if (ret == 0) {
+			netif_device_attach(rdev->ndev);
+		} else {
+			/* fallback to only primary channel */
+			for (i = 1; i < nvdev->num_chn; i++)
+				netif_napi_del(&nvdev->chan_table[i].napi);
+
+			nvdev->max_chn = 1;
+			nvdev->num_chn = 1;
+		}
+	}
+
+	rtnl_unlock();
+}
+
 static struct netvsc_device *alloc_net_device(void)
 {
 	struct netvsc_device *net_device;
@@ -81,7 +116,7 @@ static struct netvsc_device *alloc_net_device(void)
 
 	init_completion(&net_device->channel_init_wait);
 	init_waitqueue_head(&net_device->subchan_open);
-	INIT_WORK(&net_device->subchan_work, rndis_set_subchannel);
+	INIT_WORK(&net_device->subchan_work, netvsc_subchan_work);
 
 	return net_device;
 }
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index fe2256bf1d13..dd1d6e115145 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -905,8 +905,20 @@ static int netvsc_attach(struct net_device *ndev,
 	if (IS_ERR(nvdev))
 		return PTR_ERR(nvdev);
 
-	/* Note: enable and attach happen when sub-channels setup */
+	if (nvdev->num_chn > 1) {
+		ret = rndis_set_subchannel(ndev, nvdev);
 
+		/* if unavailable, just proceed with one queue */
+		if (ret) {
+			nvdev->max_chn = 1;
+			nvdev->num_chn = 1;
+		}
+	}
+
+	/* In any case device is now ready */
+	netif_device_attach(ndev);
+
+	/* Note: enable and attach happen when sub-channels setup */
 	netif_carrier_off(ndev);
 
 	if (netif_running(ndev)) {
@@ -2089,6 +2101,9 @@ static int netvsc_probe(struct hv_device *dev,
 
 	memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
 
+	if (nvdev->num_chn > 1)
+		schedule_work(&nvdev->subchan_work);
+
 	/* hw_features computed in rndis_netdev_set_hwcaps() */
 	net->features = net->hw_features |
 		NETIF_F_HIGHDMA | NETIF_F_SG |
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 5428bb261102..9b4e3c3787e5 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1062,29 +1062,15 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
  * This breaks overlap of processing the host message for the
  * new primary channel with the initialization of sub-channels.
  */
-void rndis_set_subchannel(struct work_struct *w)
+int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev)
 {
-	struct netvsc_device *nvdev
-		= container_of(w, struct netvsc_device, subchan_work);
 	struct nvsp_message *init_packet = &nvdev->channel_init_pkt;
-	struct net_device_context *ndev_ctx;
-	struct rndis_device *rdev;
-	struct net_device *ndev;
-	struct hv_device *hv_dev;
+	struct net_device_context *ndev_ctx = netdev_priv(ndev);
+	struct hv_device *hv_dev = ndev_ctx->device_ctx;
+	struct rndis_device *rdev = nvdev->extension;
 	int i, ret;
 
-	if (!rtnl_trylock()) {
-		schedule_work(w);
-		return;
-	}
-
-	rdev = nvdev->extension;
-	if (!rdev)
-		goto unlock;	/* device was removed */
-
-	ndev = rdev->ndev;
-	ndev_ctx = netdev_priv(ndev);
-	hv_dev = ndev_ctx->device_ctx;
+	ASSERT_RTNL();
 
 	memset(init_packet, 0, sizeof(struct nvsp_message));
 	init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL;
@@ -1100,13 +1086,13 @@ void rndis_set_subchannel(struct work_struct *w)
 			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	if (ret) {
 		netdev_err(ndev, "sub channel allocate send failed: %d\n", ret);
-		goto failed;
+		return ret;
 	}
 
 	wait_for_completion(&nvdev->channel_init_wait);
 	if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) {
 		netdev_err(ndev, "sub channel request failed\n");
-		goto failed;
+		return -EIO;
 	}
 
 	nvdev->num_chn = 1 +
@@ -1125,21 +1111,7 @@ void rndis_set_subchannel(struct work_struct *w)
 	for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
 		ndev_ctx->tx_table[i] = i % nvdev->num_chn;
 
-	netif_device_attach(ndev);
-	rtnl_unlock();
-	return;
-
-failed:
-	/* fallback to only primary channel */
-	for (i = 1; i < nvdev->num_chn; i++)
-		netif_napi_del(&nvdev->chan_table[i].napi);
-
-	nvdev->max_chn = 1;
-	nvdev->num_chn = 1;
-
-	netif_device_attach(ndev);
-unlock:
-	rtnl_unlock();
+	return 0;
 }
 
 static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
@@ -1360,21 +1332,12 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
 		netif_napi_add(net, &net_device->chan_table[i].napi,
 			       netvsc_poll, NAPI_POLL_WEIGHT);
 
-	if (net_device->num_chn > 1)
-		schedule_work(&net_device->subchan_work);
+	return net_device;
 
 out:
-	/* if unavailable, just proceed with one queue */
-	if (ret) {
-		net_device->max_chn = 1;
-		net_device->num_chn = 1;
-	}
-
-	/* No sub channels, device is ready */
-	if (net_device->num_chn == 1)
-		netif_device_attach(net);
-
-	return net_device;
+	/* setting up multiple channels failed */
+	net_device->max_chn = 1;
+	net_device->num_chn = 1;
 
 err_dev_remv:
 	rndis_filter_device_remove(dev, net_device);

From 35e8c7ba0863acadd4b9badd09740af7a8331125 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Fri, 29 Jun 2018 14:32:15 -0700
Subject: [PATCH 155/382] net: fib_rules: bring back rule_exists to match rule
 during add

After commit f9d4b0c1e969 ("fib_rules: move common handling of newrule
delrule msgs into fib_nl2rule"), rule_exists got replaced by rule_find
for existing rule lookup in both the add and del paths. While this
is good for the delete path, it solves a few problems but opens up
a few invalid key matches in the add path.

$ip -4 rule add table main tos 10 fwmark 1
$ip -4 rule add table main tos 10
RTNETLINK answers: File exists

The problem here is rule_find does not check if the key masks in
the new and old rule are the same and hence ends up matching a more
secific rule. Rule key masks cannot be easily compared today without
an elaborate if-else block. Its best to introduce key masks for easier
and accurate rule comparison in the future. Until then, due to fear of
regressions this patch re-introduces older loose rule_exists during add.
Also fixes both rule_exists and rule_find to cover missing attributes.

Fixes: f9d4b0c1e969 ("fib_rules: move common handling of newrule delrule msgs into fib_nl2rule")
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c | 72 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 71 insertions(+), 1 deletion(-)

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index bc8425d81022..f64aa13811ea 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -444,6 +444,9 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops,
 		if (rule->ip_proto && r->ip_proto != rule->ip_proto)
 			continue;
 
+		if (rule->proto && r->proto != rule->proto)
+			continue;
+
 		if (fib_rule_port_range_set(&rule->sport_range) &&
 		    !fib_rule_port_range_compare(&r->sport_range,
 						 &rule->sport_range))
@@ -653,6 +656,73 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	return err;
 }
 
+static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
+		       struct nlattr **tb, struct fib_rule *rule)
+{
+	struct fib_rule *r;
+
+	list_for_each_entry(r, &ops->rules_list, list) {
+		if (r->action != rule->action)
+			continue;
+
+		if (r->table != rule->table)
+			continue;
+
+		if (r->pref != rule->pref)
+			continue;
+
+		if (memcmp(r->iifname, rule->iifname, IFNAMSIZ))
+			continue;
+
+		if (memcmp(r->oifname, rule->oifname, IFNAMSIZ))
+			continue;
+
+		if (r->mark != rule->mark)
+			continue;
+
+		if (r->suppress_ifgroup != rule->suppress_ifgroup)
+			continue;
+
+		if (r->suppress_prefixlen != rule->suppress_prefixlen)
+			continue;
+
+		if (r->mark_mask != rule->mark_mask)
+			continue;
+
+		if (r->tun_id != rule->tun_id)
+			continue;
+
+		if (r->fr_net != rule->fr_net)
+			continue;
+
+		if (r->l3mdev != rule->l3mdev)
+			continue;
+
+		if (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
+		    !uid_eq(r->uid_range.end, rule->uid_range.end))
+			continue;
+
+		if (r->ip_proto != rule->ip_proto)
+			continue;
+
+		if (r->proto != rule->proto)
+			continue;
+
+		if (!fib_rule_port_range_compare(&r->sport_range,
+						 &rule->sport_range))
+			continue;
+
+		if (!fib_rule_port_range_compare(&r->dport_range,
+						 &rule->dport_range))
+			continue;
+
+		if (!ops->compare(r, frh, tb))
+			continue;
+		return 1;
+	}
+	return 0;
+}
+
 int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		   struct netlink_ext_ack *extack)
 {
@@ -687,7 +757,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		goto errout;
 
 	if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
-	    rule_find(ops, frh, tb, rule, user_priority)) {
+	    rule_exists(ops, frh, tb, rule)) {
 		err = -EEXIST;
 		goto errout_free;
 	}

From c1985cefd844e26bd19673a6df8d8f0b1918c2db Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Thu, 28 Jun 2018 09:56:55 -0700
Subject: [PATCH 156/382] acpi/nfit: fix cmd_rc for acpi_nfit_ctl to always
 return a value

cmd_rc is passed in by reference to the acpi_nfit_ctl() function and the
caller expects a value returned. However, when the package is pass through
via the ND_CMD_CALL command, cmd_rc is not touched. Make sure cmd_rc is
always set.

Fixes: aef253382266 ("libnvdimm, nfit: centralize command status translation")

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index d15814e1727f..471402cee1f1 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -408,6 +408,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 	const guid_t *guid;
 	int rc, i;
 
+	*cmd_rc = -EINVAL;
 	func = cmd;
 	if (cmd == ND_CMD_CALL) {
 		call_pkg = buf;
@@ -518,6 +519,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 		 * If we return an error (like elsewhere) then caller wouldn't
 		 * be able to rely upon data returned to make calculation.
 		 */
+		*cmd_rc = 0;
 		return 0;
 	}
 

From 1273c253c32b9a073a4d8921ed079177ccc7c8af Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Mon, 18 Jun 2018 17:17:02 -0600
Subject: [PATCH 157/382] tools/testing/nvdimm: advertise a write cache for
 nfit_test

Commit 546eb0317cfa "libnvdimm, pmem: Do not flush power-fail protected CPU caches"
fixed the write_cache detection to correctly show the lack of a write
cache based on the platform capabilities described in the ACPI NFIT. The
nfit_test unit tests expected a write cache to be present, so change the
nfit test namespaces to only advertise a persistence domain limited to
the memory controller. This allows the kernel to show a write_cache
attribute, and the test behaviour remains unchanged.

Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index a8fb63edcf89..e2926f72a821 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -1991,8 +1991,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES;
 	pcap->header.length = sizeof(*pcap);
 	pcap->highest_capability = 1;
-	pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH |
-		ACPI_NFIT_CAPABILITY_MEM_FLUSH;
+	pcap->capabilities = ACPI_NFIT_CAPABILITY_MEM_FLUSH;
 	offset += pcap->header.length;
 
 	if (t->setup_hotplug) {

From bb94b55af3461e26b32f0e23d455abeae0cfca5d Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Fri, 29 Jun 2018 11:31:50 -0600
Subject: [PATCH 158/382] vfio: Use get_user_pages_longterm correctly

The patch noted in the fixes below converted get_user_pages_fast() to
get_user_pages_longterm(), however the two calls differ in a few ways.

First _fast() is documented to not require the mmap_sem, while _longterm()
is documented to need it. Hold the mmap sem as required.

Second, _fast accepts an 'int write' while _longterm uses 'unsigned int
gup_flags', so the expression '!!(prot & IOMMU_WRITE)' is only working by
luck as FOLL_WRITE is currently == 0x1. Use the expected FOLL_WRITE
constant instead.

Fixes: 94db151dc892 ("vfio: disable filesystem-dax page pinning")
Cc: <stable@vger.kernel.org>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio_iommu_type1.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 2c75b33db4ac..3e5b17710a4f 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -343,18 +343,16 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
 	struct page *page[1];
 	struct vm_area_struct *vma;
 	struct vm_area_struct *vmas[1];
+	unsigned int flags = 0;
 	int ret;
 
+	if (prot & IOMMU_WRITE)
+		flags |= FOLL_WRITE;
+
+	down_read(&mm->mmap_sem);
 	if (mm == current->mm) {
-		ret = get_user_pages_longterm(vaddr, 1, !!(prot & IOMMU_WRITE),
-					      page, vmas);
+		ret = get_user_pages_longterm(vaddr, 1, flags, page, vmas);
 	} else {
-		unsigned int flags = 0;
-
-		if (prot & IOMMU_WRITE)
-			flags |= FOLL_WRITE;
-
-		down_read(&mm->mmap_sem);
 		ret = get_user_pages_remote(NULL, mm, vaddr, 1, flags, page,
 					    vmas, NULL);
 		/*
@@ -368,8 +366,8 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
 			ret = -EOPNOTSUPP;
 			put_page(page[0]);
 		}
-		up_read(&mm->mmap_sem);
 	}
+	up_read(&mm->mmap_sem);
 
 	if (ret == 1) {
 		*pfn = page_to_pfn(page[0]);

From 26112ddc254c98681b224aa9ededefc01b6e02d2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 30 Jun 2018 23:19:33 +0200
Subject: [PATCH 159/382] PCI / ACPI / PM: Resume bridges w/o drivers on
 suspend-to-RAM

It is reported that commit c62ec4610c40 (PM / core: Fix direct_complete
handling for devices with no callbacks) introduced a system suspend
regression on Samsung 305V4A by allowing a PCI bridge (not a PCIe
port) to stay in D3 over suspend-to-RAM, which is a side effect of
setting power.direct_complete for the children of that bridge that
have no PM callbacks.

On the majority of systems PCI bridges are not allowed to be
runtime-suspended (the power/control sysfs attribute is set to "on"
for them by default), but user space can change that setting and if
it does so and a given bridge has no children with PM callbacks, the
direct_complete optimization will be applied to it and it will stay
in suspend over system suspend.  Apparently, that confuses the
platform firmware on the affected machine and that may very well
happen elsewhere, so avoid the direct_complete optimization for
PCI bridges with no drivers (if there is a driver, it should take
care of the PM handling) on suspend-to-RAM altogether (that should
not matter for suspend-to-idle as platform firmware is not involved
in it).

Fixes: c62ec4610c40 (PM / core: Fix direct_complete handling for devices with no callbacks)
Link: https://bugzilla.kernel.org/show_bug.cgi?id=199941
Reported-by: n0000b.n000b@gmail.com
Tested-by: n0000b.n000b@gmail.com
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: 4.15+ <stable@vger.kernel.org> # 4.15+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pci/pci-acpi.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 65113b6eed14..89ee6a2b6eb8 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -629,6 +629,18 @@ static bool acpi_pci_need_resume(struct pci_dev *dev)
 {
 	struct acpi_device *adev = ACPI_COMPANION(&dev->dev);
 
+	/*
+	 * In some cases (eg. Samsung 305V4A) leaving a bridge in suspend over
+	 * system-wide suspend/resume confuses the platform firmware, so avoid
+	 * doing that, unless the bridge has a driver that should take care of
+	 * the PM handling.  According to Section 16.1.6 of ACPI 6.2, endpoint
+	 * devices are expected to be in D3 before invoking the S3 entry path
+	 * from the firmware, so they should not be affected by this issue.
+	 */
+	if (pci_is_bridge(dev) && !dev->driver &&
+	    acpi_target_system_state() != ACPI_STATE_S0)
+		return true;
+
 	if (!adev || !acpi_device_power_manageable(adev))
 		return false;
 

From a0d5f3b69af7733f3173a8e19d51f68a08017c76 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 30 Jun 2018 23:24:04 +0200
Subject: [PATCH 160/382] ACPICA: Drop leading newlines from error messages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 5088814a6e93 (ACPICA: AML parser: attempt to continue loading
table after error) unintentionally added leading newlines to error
messages emitted by ACPICA which caused unexpected things to be
printed to the kernel log.  Drop these newlines (which effectively
reverts the part of commit 5088814a6e93 adding them).

Fixes: 5088814a6e93 (ACPICA: AML parser: attempt to continue loading table after error)
Reported-by: Toralf Förster <toralf.foerster@gmx.de>
Reported-by: Guenter Roeck <linux@roeck-us.net>
Cc: 4.17+ <stable@vger.kernel.org> # 4.17+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/uterror.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/acpica/uterror.c b/drivers/acpi/acpica/uterror.c
index 5a64ddaed8a3..e47430272692 100644
--- a/drivers/acpi/acpica/uterror.c
+++ b/drivers/acpi/acpica/uterror.c
@@ -182,19 +182,19 @@ acpi_ut_prefixed_namespace_error(const char *module_name,
 	switch (lookup_status) {
 	case AE_ALREADY_EXISTS:
 
-		acpi_os_printf("\n" ACPI_MSG_BIOS_ERROR);
+		acpi_os_printf(ACPI_MSG_BIOS_ERROR);
 		message = "Failure creating";
 		break;
 
 	case AE_NOT_FOUND:
 
-		acpi_os_printf("\n" ACPI_MSG_BIOS_ERROR);
+		acpi_os_printf(ACPI_MSG_BIOS_ERROR);
 		message = "Could not resolve";
 		break;
 
 	default:
 
-		acpi_os_printf("\n" ACPI_MSG_ERROR);
+		acpi_os_printf(ACPI_MSG_ERROR);
 		message = "Failure resolving";
 		break;
 	}

From 9901c5d77e969d8215a8e8d087ef02e6feddc84c Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sat, 30 Jun 2018 06:17:36 -0700
Subject: [PATCH 161/382] bpf: sockmap, fix crash when ipv6 sock is added

This fixes a crash where we assign tcp_prot to IPv6 sockets instead
of tcpv6_prot.

Previously we overwrote the sk->prot field with tcp_prot even in the
AF_INET6 case. This patch ensures the correct tcp_prot and tcpv6_prot
are used.

Tested with 'netserver -6' and 'netperf -H [IPv6]' as well as
'netperf -H [IPv4]'. The ESTABLISHED check resolves the previously
crashing case here.

Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support")
Reported-by: syzbot+5c063698bdbfac19f363@syzkaller.appspotmail.com
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/sockmap.c | 58 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 48 insertions(+), 10 deletions(-)

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 81d0c55a77aa..bfdfbd199c3b 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -140,6 +140,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
 			    int offset, size_t size, int flags);
+static void bpf_tcp_close(struct sock *sk, long timeout);
 
 static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
 {
@@ -161,7 +162,42 @@ static bool bpf_tcp_stream_read(const struct sock *sk)
 	return !empty;
 }
 
-static struct proto tcp_bpf_proto;
+enum {
+	SOCKMAP_IPV4,
+	SOCKMAP_IPV6,
+	SOCKMAP_NUM_PROTS,
+};
+
+enum {
+	SOCKMAP_BASE,
+	SOCKMAP_TX,
+	SOCKMAP_NUM_CONFIGS,
+};
+
+static struct proto *saved_tcpv6_prot __read_mostly;
+static DEFINE_SPINLOCK(tcpv6_prot_lock);
+static struct proto bpf_tcp_prots[SOCKMAP_NUM_PROTS][SOCKMAP_NUM_CONFIGS];
+static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
+			 struct proto *base)
+{
+	prot[SOCKMAP_BASE]			= *base;
+	prot[SOCKMAP_BASE].close		= bpf_tcp_close;
+	prot[SOCKMAP_BASE].recvmsg		= bpf_tcp_recvmsg;
+	prot[SOCKMAP_BASE].stream_memory_read	= bpf_tcp_stream_read;
+
+	prot[SOCKMAP_TX]			= prot[SOCKMAP_BASE];
+	prot[SOCKMAP_TX].sendmsg		= bpf_tcp_sendmsg;
+	prot[SOCKMAP_TX].sendpage		= bpf_tcp_sendpage;
+}
+
+static void update_sk_prot(struct sock *sk, struct smap_psock *psock)
+{
+	int family = sk->sk_family == AF_INET6 ? SOCKMAP_IPV6 : SOCKMAP_IPV4;
+	int conf = psock->bpf_tx_msg ? SOCKMAP_TX : SOCKMAP_BASE;
+
+	sk->sk_prot = &bpf_tcp_prots[family][conf];
+}
+
 static int bpf_tcp_init(struct sock *sk)
 {
 	struct smap_psock *psock;
@@ -181,14 +217,17 @@ static int bpf_tcp_init(struct sock *sk)
 	psock->save_close = sk->sk_prot->close;
 	psock->sk_proto = sk->sk_prot;
 
-	if (psock->bpf_tx_msg) {
-		tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
-		tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
-		tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg;
-		tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
+	/* Build IPv6 sockmap whenever the address of tcpv6_prot changes */
+	if (sk->sk_family == AF_INET6 &&
+	    unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
+		spin_lock_bh(&tcpv6_prot_lock);
+		if (likely(sk->sk_prot != saved_tcpv6_prot)) {
+			build_protos(bpf_tcp_prots[SOCKMAP_IPV6], sk->sk_prot);
+			smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
+		}
+		spin_unlock_bh(&tcpv6_prot_lock);
 	}
-
-	sk->sk_prot = &tcp_bpf_proto;
+	update_sk_prot(sk, psock);
 	rcu_read_unlock();
 	return 0;
 }
@@ -1111,8 +1150,7 @@ static void bpf_tcp_msg_add(struct smap_psock *psock,
 
 static int bpf_tcp_ulp_register(void)
 {
-	tcp_bpf_proto = tcp_prot;
-	tcp_bpf_proto.close = bpf_tcp_close;
+	build_protos(bpf_tcp_prots[SOCKMAP_IPV4], &tcp_prot);
 	/* Once BPF TX ULP is registered it is never unregistered. It
 	 * will be in the ULP list for the lifetime of the system. Doing
 	 * duplicate registers is not a problem.

From 54fedb42c6537dcb0102e4a58a88456a6286999d Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sat, 30 Jun 2018 06:17:41 -0700
Subject: [PATCH 162/382] bpf: sockmap, fix smap_list_map_remove when psock is
 in many maps

If a hashmap is free'd with open socks it removes the reference to
the hash entry from the psock. If that is the last reference to the
psock then it will also be free'd by the reference counting logic.
However the current logic that removes the hash reference from the
list of references is broken. In smap_list_remove() we first check
if the sockmap entry matches and then check if the hashmap entry
matches. But, the sockmap entry sill always match because its NULL in
this case which causes the first entry to be removed from the list.
If this is always the "right" entry (because the user adds/removes
entries in order) then everything is OK but otherwise a subsequent
bpf_tcp_close() may reference a free'd object.

To fix this create two list handlers one for sockmap and one for
sockhash.

Reported-by: syzbot+0ce137753c78f7b6acc1@syzkaller.appspotmail.com
Fixes: 81110384441a ("bpf: sockmap, add hash map support")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/sockmap.c | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index bfdfbd199c3b..65a937ed5762 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -1602,17 +1602,27 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
 	return ERR_PTR(err);
 }
 
-static void smap_list_remove(struct smap_psock *psock,
-			     struct sock **entry,
-			     struct htab_elem *hash_link)
+static void smap_list_map_remove(struct smap_psock *psock,
+				 struct sock **entry)
 {
 	struct smap_psock_map_entry *e, *tmp;
 
 	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
-		if (e->entry == entry || e->hash_link == hash_link) {
+		if (e->entry == entry)
+			list_del(&e->list);
+	}
+}
+
+static void smap_list_hash_remove(struct smap_psock *psock,
+				  struct htab_elem *hash_link)
+{
+	struct smap_psock_map_entry *e, *tmp;
+
+	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
+		struct htab_elem *c = e->hash_link;
+
+		if (c == hash_link)
 			list_del(&e->list);
-			break;
-		}
 	}
 }
 
@@ -1647,7 +1657,7 @@ static void sock_map_free(struct bpf_map *map)
 		 * to be null and queued for garbage collection.
 		 */
 		if (likely(psock)) {
-			smap_list_remove(psock, &stab->sock_map[i], NULL);
+			smap_list_map_remove(psock, &stab->sock_map[i]);
 			smap_release_sock(psock, sock);
 		}
 		write_unlock_bh(&sock->sk_callback_lock);
@@ -1706,7 +1716,7 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
 
 	if (psock->bpf_parse)
 		smap_stop_sock(psock, sock);
-	smap_list_remove(psock, &stab->sock_map[k], NULL);
+	smap_list_map_remove(psock, &stab->sock_map[k]);
 	smap_release_sock(psock, sock);
 out:
 	write_unlock_bh(&sock->sk_callback_lock);
@@ -1908,7 +1918,7 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 		struct smap_psock *opsock = smap_psock_sk(osock);
 
 		write_lock_bh(&osock->sk_callback_lock);
-		smap_list_remove(opsock, &stab->sock_map[i], NULL);
+		smap_list_map_remove(opsock, &stab->sock_map[i]);
 		smap_release_sock(opsock, osock);
 		write_unlock_bh(&osock->sk_callback_lock);
 	}
@@ -2142,7 +2152,7 @@ static void sock_hash_free(struct bpf_map *map)
 			 * (psock) to be null and queued for garbage collection.
 			 */
 			if (likely(psock)) {
-				smap_list_remove(psock, NULL, l);
+				smap_list_hash_remove(psock, l);
 				smap_release_sock(psock, sock);
 			}
 			write_unlock_bh(&sock->sk_callback_lock);
@@ -2322,7 +2332,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 		psock = smap_psock_sk(l_old->sk);
 
 		hlist_del_rcu(&l_old->hash_node);
-		smap_list_remove(psock, NULL, l_old);
+		smap_list_hash_remove(psock, l_old);
 		smap_release_sock(psock, l_old->sk);
 		free_htab_elem(htab, l_old);
 	}
@@ -2390,7 +2400,7 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key)
 		 * to be null and queued for garbage collection.
 		 */
 		if (likely(psock)) {
-			smap_list_remove(psock, NULL, l);
+			smap_list_hash_remove(psock, l);
 			smap_release_sock(psock, sock);
 		}
 		write_unlock_bh(&sock->sk_callback_lock);

From e9db4ef6bf4ca9894bb324c76e01b8f1a16b2650 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sat, 30 Jun 2018 06:17:47 -0700
Subject: [PATCH 163/382] bpf: sockhash fix omitted bucket lock in sock_close

First the sk_callback_lock() was being used to protect both the
sock callback hooks and the psock->maps list. This got overly
convoluted after the addition of sockhash (in sockmap it made
some sense because masp and callbacks were tightly coupled) so
lets split out a specific lock for maps and only use the callback
lock for its intended purpose. This fixes a couple cases where
we missed using maps lock when it was in fact needed. Also this
makes it easier to follow the code because now we can put the
locking closer to the actual code its serializing.

Next, in sock_hash_delete_elem() the pattern was as follows,

  sock_hash_delete_elem()
     [...]
     spin_lock(bucket_lock)
     l = lookup_elem_raw()
     if (l)
        hlist_del_rcu()
        write_lock(sk_callback_lock)
         .... destroy psock ...
        write_unlock(sk_callback_lock)
     spin_unlock(bucket_lock)

The ordering is necessary because we only know the {p}sock after
dereferencing the hash table which we can't do unless we have the
bucket lock held. Once we have the bucket lock and the psock element
it is deleted from the hashmap to ensure any other path doing a lookup
will fail. Finally, the refcnt is decremented and if zero the psock
is destroyed.

In parallel with the above (or free'ing the map) a tcp close event
may trigger tcp_close(). Which at the moment omits the bucket lock
altogether (oops!) where the flow looks like this,

  bpf_tcp_close()
     [...]
     write_lock(sk_callback_lock)
     for each psock->maps // list of maps this sock is part of
         hlist_del_rcu(ref_hash_node);
         .... destroy psock ...
     write_unlock(sk_callback_lock)

Obviously, and demonstrated by syzbot, this is broken because
we can have multiple threads deleting entries via hlist_del_rcu().

To fix this we might be tempted to wrap the hlist operation in a
bucket lock but that would create a lock inversion problem. In
summary to follow locking rules the psocks maps list needs the
sk_callback_lock (after this patch maps_lock) but we need the bucket
lock to do the hlist_del_rcu.

To resolve the lock inversion problem pop the head of the maps list
repeatedly and remove the reference until no more are left. If a
delete happens in parallel from the BPF API that is OK as well because
it will do a similar action, lookup the lock in the map/hash, delete
it from the map/hash, and dec the refcnt. We check for this case
before doing a destroy on the psock to ensure we don't have two
threads tearing down a psock. The new logic is as follows,

  bpf_tcp_close()
  e = psock_map_pop(psock->maps) // done with map lock
  bucket_lock() // lock hash list bucket
  l = lookup_elem_raw(head, hash, key, key_size);
  if (l) {
     //only get here if elmnt was not already removed
     hlist_del_rcu()
     ... destroy psock...
  }
  bucket_unlock()

And finally for all the above to work add missing locking around  map
operations per above. Then add RCU annotations and use
rcu_dereference/rcu_assign_pointer to manage values relying on RCU so
that the object is not free'd from sock_hash_free() while it is being
referenced in bpf_tcp_close().

Reported-by: syzbot+0ce137753c78f7b6acc1@syzkaller.appspotmail.com
Fixes: 81110384441a ("bpf: sockmap, add hash map support")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/sockmap.c | 145 ++++++++++++++++++++++++++++---------------
 1 file changed, 96 insertions(+), 49 deletions(-)

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 65a937ed5762..ac09b35a9567 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -72,6 +72,7 @@ struct bpf_htab {
 	u32 n_buckets;
 	u32 elem_size;
 	struct bpf_sock_progs progs;
+	struct rcu_head rcu;
 };
 
 struct htab_elem {
@@ -89,8 +90,8 @@ enum smap_psock_state {
 struct smap_psock_map_entry {
 	struct list_head list;
 	struct sock **entry;
-	struct htab_elem *hash_link;
-	struct bpf_htab *htab;
+	struct htab_elem __rcu *hash_link;
+	struct bpf_htab __rcu *htab;
 };
 
 struct smap_psock {
@@ -120,6 +121,7 @@ struct smap_psock {
 	struct bpf_prog *bpf_parse;
 	struct bpf_prog *bpf_verdict;
 	struct list_head maps;
+	spinlock_t maps_lock;
 
 	/* Back reference used when sock callback trigger sockmap operations */
 	struct sock *sock;
@@ -258,16 +260,54 @@ static void bpf_tcp_release(struct sock *sk)
 	rcu_read_unlock();
 }
 
+static struct htab_elem *lookup_elem_raw(struct hlist_head *head,
+					 u32 hash, void *key, u32 key_size)
+{
+	struct htab_elem *l;
+
+	hlist_for_each_entry_rcu(l, head, hash_node) {
+		if (l->hash == hash && !memcmp(&l->key, key, key_size))
+			return l;
+	}
+
+	return NULL;
+}
+
+static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
+{
+	return &htab->buckets[hash & (htab->n_buckets - 1)];
+}
+
+static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
+{
+	return &__select_bucket(htab, hash)->head;
+}
+
 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 {
 	atomic_dec(&htab->count);
 	kfree_rcu(l, rcu);
 }
 
+static struct smap_psock_map_entry *psock_map_pop(struct sock *sk,
+						  struct smap_psock *psock)
+{
+	struct smap_psock_map_entry *e;
+
+	spin_lock_bh(&psock->maps_lock);
+	e = list_first_entry_or_null(&psock->maps,
+				     struct smap_psock_map_entry,
+				     list);
+	if (e)
+		list_del(&e->list);
+	spin_unlock_bh(&psock->maps_lock);
+	return e;
+}
+
 static void bpf_tcp_close(struct sock *sk, long timeout)
 {
 	void (*close_fun)(struct sock *sk, long timeout);
-	struct smap_psock_map_entry *e, *tmp;
+	struct smap_psock_map_entry *e;
 	struct sk_msg_buff *md, *mtmp;
 	struct smap_psock *psock;
 	struct sock *osk;
@@ -286,7 +326,6 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
 	 */
 	close_fun = psock->save_close;
 
-	write_lock_bh(&sk->sk_callback_lock);
 	if (psock->cork) {
 		free_start_sg(psock->sock, psock->cork);
 		kfree(psock->cork);
@@ -299,20 +338,38 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
 		kfree(md);
 	}
 
-	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
+	e = psock_map_pop(sk, psock);
+	while (e) {
 		if (e->entry) {
 			osk = cmpxchg(e->entry, sk, NULL);
 			if (osk == sk) {
-				list_del(&e->list);
 				smap_release_sock(psock, sk);
 			}
 		} else {
-			hlist_del_rcu(&e->hash_link->hash_node);
-			smap_release_sock(psock, e->hash_link->sk);
-			free_htab_elem(e->htab, e->hash_link);
+			struct htab_elem *link = rcu_dereference(e->hash_link);
+			struct bpf_htab *htab = rcu_dereference(e->htab);
+			struct hlist_head *head;
+			struct htab_elem *l;
+			struct bucket *b;
+
+			b = __select_bucket(htab, link->hash);
+			head = &b->head;
+			raw_spin_lock_bh(&b->lock);
+			l = lookup_elem_raw(head,
+					    link->hash, link->key,
+					    htab->map.key_size);
+			/* If another thread deleted this object skip deletion.
+			 * The refcnt on psock may or may not be zero.
+			 */
+			if (l) {
+				hlist_del_rcu(&link->hash_node);
+				smap_release_sock(psock, link->sk);
+				free_htab_elem(htab, link);
+			}
+			raw_spin_unlock_bh(&b->lock);
 		}
+		e = psock_map_pop(sk, psock);
 	}
-	write_unlock_bh(&sk->sk_callback_lock);
 	rcu_read_unlock();
 	close_fun(sk, timeout);
 }
@@ -1395,7 +1452,9 @@ static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
 {
 	if (refcount_dec_and_test(&psock->refcnt)) {
 		tcp_cleanup_ulp(sock);
+		write_lock_bh(&sock->sk_callback_lock);
 		smap_stop_sock(psock, sock);
+		write_unlock_bh(&sock->sk_callback_lock);
 		clear_bit(SMAP_TX_RUNNING, &psock->state);
 		rcu_assign_sk_user_data(sock, NULL);
 		call_rcu_sched(&psock->rcu, smap_destroy_psock);
@@ -1546,6 +1605,7 @@ static struct smap_psock *smap_init_psock(struct sock *sock, int node)
 	INIT_LIST_HEAD(&psock->maps);
 	INIT_LIST_HEAD(&psock->ingress);
 	refcount_set(&psock->refcnt, 1);
+	spin_lock_init(&psock->maps_lock);
 
 	rcu_assign_sk_user_data(sock, psock);
 	sock_hold(sock);
@@ -1607,10 +1667,12 @@ static void smap_list_map_remove(struct smap_psock *psock,
 {
 	struct smap_psock_map_entry *e, *tmp;
 
+	spin_lock_bh(&psock->maps_lock);
 	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
 		if (e->entry == entry)
 			list_del(&e->list);
 	}
+	spin_unlock_bh(&psock->maps_lock);
 }
 
 static void smap_list_hash_remove(struct smap_psock *psock,
@@ -1618,12 +1680,14 @@ static void smap_list_hash_remove(struct smap_psock *psock,
 {
 	struct smap_psock_map_entry *e, *tmp;
 
+	spin_lock_bh(&psock->maps_lock);
 	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
-		struct htab_elem *c = e->hash_link;
+		struct htab_elem *c = rcu_dereference(e->hash_link);
 
 		if (c == hash_link)
 			list_del(&e->list);
 	}
+	spin_unlock_bh(&psock->maps_lock);
 }
 
 static void sock_map_free(struct bpf_map *map)
@@ -1649,7 +1713,6 @@ static void sock_map_free(struct bpf_map *map)
 		if (!sock)
 			continue;
 
-		write_lock_bh(&sock->sk_callback_lock);
 		psock = smap_psock_sk(sock);
 		/* This check handles a racing sock event that can get the
 		 * sk_callback_lock before this case but after xchg happens
@@ -1660,7 +1723,6 @@ static void sock_map_free(struct bpf_map *map)
 			smap_list_map_remove(psock, &stab->sock_map[i]);
 			smap_release_sock(psock, sock);
 		}
-		write_unlock_bh(&sock->sk_callback_lock);
 	}
 	rcu_read_unlock();
 
@@ -1709,7 +1771,6 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
 	if (!sock)
 		return -EINVAL;
 
-	write_lock_bh(&sock->sk_callback_lock);
 	psock = smap_psock_sk(sock);
 	if (!psock)
 		goto out;
@@ -1719,7 +1780,6 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
 	smap_list_map_remove(psock, &stab->sock_map[k]);
 	smap_release_sock(psock, sock);
 out:
-	write_unlock_bh(&sock->sk_callback_lock);
 	return 0;
 }
 
@@ -1800,7 +1860,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
 		}
 	}
 
-	write_lock_bh(&sock->sk_callback_lock);
 	psock = smap_psock_sk(sock);
 
 	/* 2. Do not allow inheriting programs if psock exists and has
@@ -1857,7 +1916,9 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
 		if (err)
 			goto out_free;
 		smap_init_progs(psock, verdict, parse);
+		write_lock_bh(&sock->sk_callback_lock);
 		smap_start_sock(psock, sock);
+		write_unlock_bh(&sock->sk_callback_lock);
 	}
 
 	/* 4. Place psock in sockmap for use and stop any programs on
@@ -1867,9 +1928,10 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
 	 */
 	if (map_link) {
 		e->entry = map_link;
+		spin_lock_bh(&psock->maps_lock);
 		list_add_tail(&e->list, &psock->maps);
+		spin_unlock_bh(&psock->maps_lock);
 	}
-	write_unlock_bh(&sock->sk_callback_lock);
 	return err;
 out_free:
 	smap_release_sock(psock, sock);
@@ -1880,7 +1942,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
 	}
 	if (tx_msg)
 		bpf_prog_put(tx_msg);
-	write_unlock_bh(&sock->sk_callback_lock);
 	kfree(e);
 	return err;
 }
@@ -1917,10 +1978,8 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 	if (osock) {
 		struct smap_psock *opsock = smap_psock_sk(osock);
 
-		write_lock_bh(&osock->sk_callback_lock);
 		smap_list_map_remove(opsock, &stab->sock_map[i]);
 		smap_release_sock(opsock, osock);
-		write_unlock_bh(&osock->sk_callback_lock);
 	}
 out:
 	return err;
@@ -2109,14 +2168,13 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
 	return ERR_PTR(err);
 }
 
-static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
+static void __bpf_htab_free(struct rcu_head *rcu)
 {
-	return &htab->buckets[hash & (htab->n_buckets - 1)];
-}
+	struct bpf_htab *htab;
 
-static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
-{
-	return &__select_bucket(htab, hash)->head;
+	htab = container_of(rcu, struct bpf_htab, rcu);
+	bpf_map_area_free(htab->buckets);
+	kfree(htab);
 }
 
 static void sock_hash_free(struct bpf_map *map)
@@ -2135,16 +2193,18 @@ static void sock_hash_free(struct bpf_map *map)
 	 */
 	rcu_read_lock();
 	for (i = 0; i < htab->n_buckets; i++) {
-		struct hlist_head *head = select_bucket(htab, i);
+		struct bucket *b = __select_bucket(htab, i);
+		struct hlist_head *head;
 		struct hlist_node *n;
 		struct htab_elem *l;
 
+		raw_spin_lock_bh(&b->lock);
+		head = &b->head;
 		hlist_for_each_entry_safe(l, n, head, hash_node) {
 			struct sock *sock = l->sk;
 			struct smap_psock *psock;
 
 			hlist_del_rcu(&l->hash_node);
-			write_lock_bh(&sock->sk_callback_lock);
 			psock = smap_psock_sk(sock);
 			/* This check handles a racing sock event that can get
 			 * the sk_callback_lock before this case but after xchg
@@ -2155,13 +2215,12 @@ static void sock_hash_free(struct bpf_map *map)
 				smap_list_hash_remove(psock, l);
 				smap_release_sock(psock, sock);
 			}
-			write_unlock_bh(&sock->sk_callback_lock);
-			kfree(l);
+			free_htab_elem(htab, l);
 		}
+		raw_spin_unlock_bh(&b->lock);
 	}
 	rcu_read_unlock();
-	bpf_map_area_free(htab->buckets);
-	kfree(htab);
+	call_rcu(&htab->rcu, __bpf_htab_free);
 }
 
 static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
@@ -2188,19 +2247,6 @@ static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
 	return l_new;
 }
 
-static struct htab_elem *lookup_elem_raw(struct hlist_head *head,
-					 u32 hash, void *key, u32 key_size)
-{
-	struct htab_elem *l;
-
-	hlist_for_each_entry_rcu(l, head, hash_node) {
-		if (l->hash == hash && !memcmp(&l->key, key, key_size))
-			return l;
-	}
-
-	return NULL;
-}
-
 static inline u32 htab_map_hash(const void *key, u32 key_len)
 {
 	return jhash(key, key_len, 0);
@@ -2320,9 +2366,12 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 		goto bucket_err;
 	}
 
-	e->hash_link = l_new;
-	e->htab = container_of(map, struct bpf_htab, map);
+	rcu_assign_pointer(e->hash_link, l_new);
+	rcu_assign_pointer(e->htab,
+			   container_of(map, struct bpf_htab, map));
+	spin_lock_bh(&psock->maps_lock);
 	list_add_tail(&e->list, &psock->maps);
+	spin_unlock_bh(&psock->maps_lock);
 
 	/* add new element to the head of the list, so that
 	 * concurrent search will find it before old elem
@@ -2392,7 +2441,6 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key)
 		struct smap_psock *psock;
 
 		hlist_del_rcu(&l->hash_node);
-		write_lock_bh(&sock->sk_callback_lock);
 		psock = smap_psock_sk(sock);
 		/* This check handles a racing sock event that can get the
 		 * sk_callback_lock before this case but after xchg happens
@@ -2403,7 +2451,6 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key)
 			smap_list_hash_remove(psock, l);
 			smap_release_sock(psock, sock);
 		}
-		write_unlock_bh(&sock->sk_callback_lock);
 		free_htab_elem(htab, l);
 		ret = 0;
 	}

From caac76a5170eb508529bbff9d9300e9c57126444 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sat, 30 Jun 2018 06:17:52 -0700
Subject: [PATCH 164/382] bpf: sockhash, add release routine

Add map_release_uref pointer to hashmap ops. This was dropped when
original sockhash code was ported into bpf-next before initial
commit.

Fixes: 81110384441a ("bpf: sockmap, add hash map support")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/sockmap.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index ac09b35a9567..cf7b6a6dbd1f 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -2496,6 +2496,7 @@ const struct bpf_map_ops sock_hash_ops = {
 	.map_get_next_key = sock_hash_get_next_key,
 	.map_update_elem = sock_hash_update_elem,
 	.map_delete_elem = sock_hash_delete_elem,
+	.map_release_uref = sock_map_release,
 };
 
 BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,

From 06d793b114e9d922c03aa077ac6c5c51fdda2722 Mon Sep 17 00:00:00 2001
From: Nick Dyer <nick@shmanahar.org>
Date: Thu, 21 Jun 2018 19:10:00 +0100
Subject: [PATCH 165/382] ARM: dts: imx51-zii-rdu1: fix touchscreen pinctrl

The pinctrl settings were incorrect for the touchscreen interrupt line, causing
an interrupt storm. This change has been tested with both the atmel_mxt_ts and
RMI4 drivers on the RDU1 units.

The value 0x4 comes from the value of register IOMUXC_SW_PAD_CTL_PAD_CSI1_D8
from the old vendor kernel.

Signed-off-by: Nick Dyer <nick@shmanahar.org>
Fixes: ceef0396f367 ("ARM: dts: imx: add ZII RDU1 board")
Cc: <stable@vger.kernel.org> # 4.15+
Reviewed-by: Fabio Estevam <fabio.estevam@nxp.com>
Tested-by: Chris Healy <cphealy@gmail.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx51-zii-rdu1.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/imx51-zii-rdu1.dts b/arch/arm/boot/dts/imx51-zii-rdu1.dts
index df9eca94d812..8a878687197b 100644
--- a/arch/arm/boot/dts/imx51-zii-rdu1.dts
+++ b/arch/arm/boot/dts/imx51-zii-rdu1.dts
@@ -770,7 +770,7 @@ MX51_PAD_NANDF_D15__GPIO3_25		0x21e6
 
 	pinctrl_ts: tsgrp {
 		fsl,pins = <
-			MX51_PAD_CSI1_D8__GPIO3_12		0x85
+			MX51_PAD_CSI1_D8__GPIO3_12		0x04
 			MX51_PAD_CSI1_D9__GPIO3_13		0x85
 		>;
 	};

From ae15a41a641449f536578b0d9ec0e4ade130deb5 Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Sun, 1 Jul 2018 14:17:36 +0900
Subject: [PATCH 166/382] openrisc: entry: Fix delay slot exception detection

Originally in patch e6d20c55a4 ("openrisc: entry: Fix delay slot
detection") I fixed delay slot detection, but only for QEMU.  We missed
that hardware delay slot detection using delay slot exception flag (DSX)
was still broken.  This was because QEMU set the DSX flag in both
pre-exception supervision register (ESR) and supervision register (SR)
register, but on real hardware the DSX flag is only set on the SR
register during exceptions.

Fix this by carrying the DSX flag into the SR register during exception.
We also update the DSX flag read locations to read the value from the SR
register not the pt_regs SR register which represents ESR.  The ESR
should never have the DSX flag set.

In the process I updated/removed a few comments to match the current
state.  Including removing a comment saying that the DSX detection logic
was inefficient and needed to be rewritten.

I have tested this on QEMU with a patch ensuring it matches the hardware
specification.

Link: https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg00000.html
Fixes: e6d20c55a4 ("openrisc: entry: Fix delay slot detection")
Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/kernel/entry.S | 8 +-------
 arch/openrisc/kernel/head.S  | 9 ++++++---
 arch/openrisc/kernel/traps.c | 2 +-
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S
index 690d55272ba6..0c826ad6e994 100644
--- a/arch/openrisc/kernel/entry.S
+++ b/arch/openrisc/kernel/entry.S
@@ -277,12 +277,6 @@ EXCEPTION_ENTRY(_data_page_fault_handler)
 	l.addi  r3,r1,0                    // pt_regs
 	/* r4 set be EXCEPTION_HANDLE */   // effective address of fault
 
-	/*
-	 * __PHX__: TODO
-	 *
-	 * all this can be written much simpler. look at
-	 * DTLB miss handler in the CONFIG_GUARD_PROTECTED_CORE part
-	 */
 #ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX
 	l.lwz   r6,PT_PC(r3)               // address of an offending insn
 	l.lwz   r6,0(r6)                   // instruction that caused pf
@@ -314,7 +308,7 @@ EXCEPTION_ENTRY(_data_page_fault_handler)
 
 #else
 
-	l.lwz   r6,PT_SR(r3)               // SR
+	l.mfspr r6,r0,SPR_SR               // SR
 	l.andi  r6,r6,SPR_SR_DSX           // check for delay slot exception
 	l.sfne  r6,r0                      // exception happened in delay slot
 	l.bnf   7f
diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S
index fb02b2a1d6f2..9fc6b60140f0 100644
--- a/arch/openrisc/kernel/head.S
+++ b/arch/openrisc/kernel/head.S
@@ -210,8 +210,7 @@
  *	 r4  - EEAR     exception EA
  *	 r10 - current	pointing to current_thread_info struct
  *	 r12 - syscall  0, since we didn't come from syscall
- *	 r13 - temp	it actually contains new SR, not needed anymore
- *	 r31 - handler	address of the handler we'll jump to
+ *	 r30 - handler	address of the handler we'll jump to
  *
  *	 handler has to save remaining registers to the exception
  *	 ksp frame *before* tainting them!
@@ -244,6 +243,7 @@
 	/* r1 is KSP, r30 is __pa(KSP) */			;\
 	tophys  (r30,r1)					;\
 	l.sw    PT_GPR12(r30),r12				;\
+	/* r4 use for tmp before EA */				;\
 	l.mfspr r12,r0,SPR_EPCR_BASE				;\
 	l.sw    PT_PC(r30),r12					;\
 	l.mfspr r12,r0,SPR_ESR_BASE				;\
@@ -263,7 +263,10 @@
 	/* r12 == 1 if we come from syscall */			;\
 	CLEAR_GPR(r12)						;\
 	/* ----- turn on MMU ----- */				;\
-	l.ori	r30,r0,(EXCEPTION_SR)				;\
+	/* Carry DSX into exception SR */			;\
+	l.mfspr r30,r0,SPR_SR					;\
+	l.andi	r30,r30,SPR_SR_DSX				;\
+	l.ori	r30,r30,(EXCEPTION_SR)				;\
 	l.mtspr	r0,r30,SPR_ESR_BASE				;\
 	/* r30:	EA address of handler */			;\
 	LOAD_SYMBOL_2_GPR(r30,handler)				;\
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index fac246e6f37a..d8981cbb852a 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -300,7 +300,7 @@ static inline int in_delay_slot(struct pt_regs *regs)
 		return 0;
 	}
 #else
-	return regs->sr & SPR_SR_DSX;
+	return mfspr(SPR_SR) & SPR_SR_DSX;
 #endif
 }
 

From 1236f22fbae15df3736ab4a984c64c0c6ee6254c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@helsinki.fi>
Date: Fri, 29 Jun 2018 13:07:53 +0300
Subject: [PATCH 167/382] tcp: prevent bogus FRTO undos with non-SACK flows
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If SACK is not enabled and the first cumulative ACK after the RTO
retransmission covers more than the retransmitted skb, a spurious
FRTO undo will trigger (assuming FRTO is enabled for that RTO).
The reason is that any non-retransmitted segment acknowledged will
set FLAG_ORIG_SACK_ACKED in tcp_clean_rtx_queue even if there is
no indication that it would have been delivered for real (the
scoreboard is not kept with TCPCB_SACKED_ACKED bits in the non-SACK
case so the check for that bit won't help like it does with SACK).
Having FLAG_ORIG_SACK_ACKED set results in the spurious FRTO undo
in tcp_process_loss.

We need to use more strict condition for non-SACK case and check
that none of the cumulatively ACKed segments were retransmitted
to prove that progress is due to original transmissions. Only then
keep FLAG_ORIG_SACK_ACKED set, allowing FRTO undo to proceed in
non-SACK case.

(FLAG_ORIG_SACK_ACKED is planned to be renamed to FLAG_ORIG_PROGRESS
to better indicate its purpose but to keep this change minimal, it
will be done in another patch).

Besides burstiness and congestion control violations, this problem
can result in RTO loop: When the loss recovery is prematurely
undoed, only new data will be transmitted (if available) and
the next retransmission can occur only after a new RTO which in case
of multiple losses (that are not for consecutive packets) requires
one RTO per loss to recover.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Tested-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 045d930d01a9..8e5522c6833a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3181,6 +3181,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
 
 		if (tcp_is_reno(tp)) {
 			tcp_remove_reno_sacks(sk, pkts_acked);
+
+			/* If any of the cumulatively ACKed segments was
+			 * retransmitted, non-SACK case cannot confirm that
+			 * progress was due to original transmission due to
+			 * lack of TCPCB_SACKED_ACKED bits even if some of
+			 * the packets may have been never retransmitted.
+			 */
+			if (flag & FLAG_RETRANS_DATA_ACKED)
+				flag &= ~FLAG_ORIG_SACK_ACKED;
 		} else {
 			int delta;
 

From a068b94d74ddb7776ca707b6d39d1ac0d2d057e6 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 18 Jun 2018 15:33:23 -0700
Subject: [PATCH 168/382] crypto: arm/speck - fix building in Thumb2 mode

Building the kernel with CONFIG_THUMB2_KERNEL=y and
CONFIG_CRYPTO_SPECK_NEON set fails with the following errors:

    arch/arm/crypto/speck-neon-core.S: Assembler messages:

    arch/arm/crypto/speck-neon-core.S:419: Error: r13 not allowed here -- `bic sp,#0xf'
    arch/arm/crypto/speck-neon-core.S:423: Error: r13 not allowed here -- `bic sp,#0xf'
    arch/arm/crypto/speck-neon-core.S:427: Error: r13 not allowed here -- `bic sp,#0xf'
    arch/arm/crypto/speck-neon-core.S:431: Error: r13 not allowed here -- `bic sp,#0xf'

The problem is that the 'bic' instruction can't operate on the 'sp'
register in Thumb2 mode.  Fix it by using a temporary register.  This
isn't in the main loop, so the performance difference is negligible.
This also matches what aes-neonbs-core.S does.

Reported-by: Stefan Agner <stefan@agner.ch>
Fixes: ede9622162fa ("crypto: arm/speck - add NEON-accelerated implementation of Speck-XTS")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/speck-neon-core.S | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/arm/crypto/speck-neon-core.S b/arch/arm/crypto/speck-neon-core.S
index 3c1e203e53b9..57caa742016e 100644
--- a/arch/arm/crypto/speck-neon-core.S
+++ b/arch/arm/crypto/speck-neon-core.S
@@ -272,9 +272,11 @@
 	 * Allocate stack space to store 128 bytes worth of tweaks.  For
 	 * performance, this space is aligned to a 16-byte boundary so that we
 	 * can use the load/store instructions that declare 16-byte alignment.
+	 * For Thumb2 compatibility, don't do the 'bic' directly on 'sp'.
 	 */
-	sub		sp, #128
-	bic		sp, #0xf
+	sub		r12, sp, #128
+	bic		r12, #0xf
+	mov		sp, r12
 
 .if \n == 64
 	// Load first tweak

From 221e00d1fce976d8a04ff591a0150caf84e176f8 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@alien8.de>
Date: Sat, 23 Jun 2018 12:36:22 +0200
Subject: [PATCH 169/382] crypto: x86 - Add missing RETs

Add explicit RETs to the tail calls of AEGIS and MORUS crypto algorithms
otherwise they run into INT3 padding due to

  ("x86/asm: Pad assembly functions with INT3 instructions")

leading to spurious debug exceptions.

Mike Galbraith <efault@gmx.de> took care of all the remaining callsites.

Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Ondrej Mosnacek <omosnacek@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/aegis128-aesni-asm.S  | 1 +
 arch/x86/crypto/aegis128l-aesni-asm.S | 1 +
 arch/x86/crypto/aegis256-aesni-asm.S  | 1 +
 arch/x86/crypto/morus1280-avx2-asm.S  | 1 +
 arch/x86/crypto/morus1280-sse2-asm.S  | 1 +
 arch/x86/crypto/morus640-sse2-asm.S   | 1 +
 6 files changed, 6 insertions(+)

diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
index 9254e0b6cc06..717bf0776421 100644
--- a/arch/x86/crypto/aegis128-aesni-asm.S
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -535,6 +535,7 @@ ENTRY(crypto_aegis128_aesni_enc_tail)
 	movdqu STATE3, 0x40(STATEP)
 
 	FRAME_END
+	ret
 ENDPROC(crypto_aegis128_aesni_enc_tail)
 
 .macro decrypt_block a s0 s1 s2 s3 s4 i
diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S b/arch/x86/crypto/aegis128l-aesni-asm.S
index 9263c344f2c7..4eda2b8db9e1 100644
--- a/arch/x86/crypto/aegis128l-aesni-asm.S
+++ b/arch/x86/crypto/aegis128l-aesni-asm.S
@@ -645,6 +645,7 @@ ENTRY(crypto_aegis128l_aesni_enc_tail)
 	state_store0
 
 	FRAME_END
+	ret
 ENDPROC(crypto_aegis128l_aesni_enc_tail)
 
 /*
diff --git a/arch/x86/crypto/aegis256-aesni-asm.S b/arch/x86/crypto/aegis256-aesni-asm.S
index 1d977d515bf9..32aae8397268 100644
--- a/arch/x86/crypto/aegis256-aesni-asm.S
+++ b/arch/x86/crypto/aegis256-aesni-asm.S
@@ -543,6 +543,7 @@ ENTRY(crypto_aegis256_aesni_enc_tail)
 	state_store0
 
 	FRAME_END
+	ret
 ENDPROC(crypto_aegis256_aesni_enc_tail)
 
 /*
diff --git a/arch/x86/crypto/morus1280-avx2-asm.S b/arch/x86/crypto/morus1280-avx2-asm.S
index 37d422e77931..07653d4582a6 100644
--- a/arch/x86/crypto/morus1280-avx2-asm.S
+++ b/arch/x86/crypto/morus1280-avx2-asm.S
@@ -453,6 +453,7 @@ ENTRY(crypto_morus1280_avx2_enc_tail)
 	vmovdqu STATE4, (4 * 32)(%rdi)
 
 	FRAME_END
+	ret
 ENDPROC(crypto_morus1280_avx2_enc_tail)
 
 /*
diff --git a/arch/x86/crypto/morus1280-sse2-asm.S b/arch/x86/crypto/morus1280-sse2-asm.S
index 1fe637c7be9d..bd1aa1b60869 100644
--- a/arch/x86/crypto/morus1280-sse2-asm.S
+++ b/arch/x86/crypto/morus1280-sse2-asm.S
@@ -652,6 +652,7 @@ ENTRY(crypto_morus1280_sse2_enc_tail)
 	movdqu STATE4_HI, (9 * 16)(%rdi)
 
 	FRAME_END
+	ret
 ENDPROC(crypto_morus1280_sse2_enc_tail)
 
 /*
diff --git a/arch/x86/crypto/morus640-sse2-asm.S b/arch/x86/crypto/morus640-sse2-asm.S
index 71c72a0a0862..efa02816d921 100644
--- a/arch/x86/crypto/morus640-sse2-asm.S
+++ b/arch/x86/crypto/morus640-sse2-asm.S
@@ -437,6 +437,7 @@ ENTRY(crypto_morus640_sse2_enc_tail)
 	movdqu STATE4, (4 * 16)(%rdi)
 
 	FRAME_END
+	ret
 ENDPROC(crypto_morus640_sse2_enc_tail)
 
 /*

From ecd60532e060e45c63c57ecf1c8549b1d656d34d Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@linux-m68k.org>
Date: Mon, 18 Jun 2018 15:34:14 +1000
Subject: [PATCH 170/382] m68k: fix "bad page state" oops on ColdFire boot
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Booting a ColdFire m68k core with MMU enabled causes a "bad page state"
oops since commit 1d40a5ea01d5 ("mm: mark pages in use for page tables"):

 BUG: Bad page state in process sh  pfn:01ce2
 page:004fefc8 count:0 mapcount:-1024 mapping:00000000 index:0x0
 flags: 0x0()
 raw: 00000000 00000000 00000000 fffffbff 00000000 00000100 00000200 00000000
 raw: 039c4000
 page dumped because: nonzero mapcount
 Modules linked in:
 CPU: 0 PID: 22 Comm: sh Not tainted 4.17.0-07461-g1d40a5ea01d5 #13

Fix by calling pgtable_page_dtor() in our __pte_free_tlb() code path,
so that the PG_table flag is cleared before we free the pte page.

Note that I had to change the type of pte_free() to be static from
extern. Otherwise you get a lot of warnings like this:

./arch/m68k/include/asm/mcf_pgalloc.h:80:2: warning: ‘pgtable_page_dtor’ is static but used in inline function ‘pte_free’ which is not static
  pgtable_page_dtor(page);
  ^

And making it static is consistent with our use of this in the other
m68k pgalloc definitions of pte_free().

Signed-off-by: Greg Ungerer <gerg@linux-m68k.org>
CC: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/include/asm/mcf_pgalloc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index 8b707c249026..12fe700632f4 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -44,6 +44,7 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 				  unsigned long address)
 {
+	pgtable_page_dtor(page);
 	__free_page(page);
 }
 
@@ -74,8 +75,9 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm,
 	return page;
 }
 
-extern inline void pte_free(struct mm_struct *mm, struct page *page)
+static inline void pte_free(struct mm_struct *mm, struct page *page)
 {
+	pgtable_page_dtor(page);
 	__free_page(page);
 }
 

From fa7743b141678116c19b89c5492b330671a51e02 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tdz@users.sourceforge.net>
Date: Mon, 18 Jun 2018 15:17:37 +0200
Subject: [PATCH 171/382] drm/exynos: Replace drm_framebuffer_{un/reference}
 with put,get functions

This patch unifies the naming of DRM functions for reference counting
of struct drm_framebuffer. The resulting code is more aligned with the
rest of the Linux kernel interfaces.

Signed-off-by: Thomas Zimmermann <tdz@users.sourceforge.net>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_plane.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.c b/drivers/gpu/drm/exynos/exynos_drm_plane.c
index 38a2a7f1204b..7098c6d35266 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_plane.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_plane.c
@@ -132,7 +132,7 @@ static void exynos_drm_plane_reset(struct drm_plane *plane)
 	if (plane->state) {
 		exynos_state = to_exynos_plane_state(plane->state);
 		if (exynos_state->base.fb)
-			drm_framebuffer_unreference(exynos_state->base.fb);
+			drm_framebuffer_put(exynos_state->base.fb);
 		kfree(exynos_state);
 		plane->state = NULL;
 	}

From af7d9101a08d752bb6da9a80df6edbc618fab4cc Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tdz@users.sourceforge.net>
Date: Mon, 18 Jun 2018 15:17:38 +0200
Subject: [PATCH 172/382] drm/exynos: Replace
 drm_gem_object_unreference_unlocked with put function

This patch unifies the naming of DRM functions for reference counting
of struct drm_gem_object. The resulting code is more aligned with the
rest of the Linux kernel interfaces.

Signed-off-by: Thomas Zimmermann <tdz@users.sourceforge.net>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_fb.c  |  2 +-
 drivers/gpu/drm/exynos/exynos_drm_gem.c | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c
index 7fcc1a7ab1a0..27b7d34d776c 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fb.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c
@@ -138,7 +138,7 @@ exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv,
 
 err:
 	while (i--)
-		drm_gem_object_unreference_unlocked(&exynos_gem[i]->base);
+		drm_gem_object_put_unlocked(&exynos_gem[i]->base);
 
 	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 6e1494fa71b4..bdf5a7655228 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -143,7 +143,7 @@ static int exynos_drm_gem_handle_create(struct drm_gem_object *obj,
 	DRM_DEBUG_KMS("gem handle = 0x%x\n", *handle);
 
 	/* drop reference from allocate - handle holds it now. */
-	drm_gem_object_unreference_unlocked(obj);
+	drm_gem_object_put_unlocked(obj);
 
 	return 0;
 }
@@ -186,7 +186,7 @@ unsigned long exynos_drm_gem_get_size(struct drm_device *dev,
 
 	exynos_gem = to_exynos_gem(obj);
 
-	drm_gem_object_unreference_unlocked(obj);
+	drm_gem_object_put_unlocked(obj);
 
 	return exynos_gem->size;
 }
@@ -329,13 +329,13 @@ void exynos_drm_gem_put_dma_addr(struct drm_device *dev,
 		return;
 	}
 
-	drm_gem_object_unreference_unlocked(obj);
+	drm_gem_object_put_unlocked(obj);
 
 	/*
 	 * decrease obj->refcount one more time because we has already
 	 * increased it at exynos_drm_gem_get_dma_addr().
 	 */
-	drm_gem_object_unreference_unlocked(obj);
+	drm_gem_object_put_unlocked(obj);
 }
 
 static int exynos_drm_gem_mmap_buffer(struct exynos_drm_gem *exynos_gem,
@@ -383,7 +383,7 @@ int exynos_drm_gem_get_ioctl(struct drm_device *dev, void *data,
 	args->flags = exynos_gem->flags;
 	args->size = exynos_gem->size;
 
-	drm_gem_object_unreference_unlocked(obj);
+	drm_gem_object_put_unlocked(obj);
 
 	return 0;
 }

From aab109b340eaf3968337e1d19d71ff0551c57365 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tdz@users.sourceforge.net>
Date: Mon, 18 Jun 2018 15:17:39 +0200
Subject: [PATCH 173/382] drm/exynos: Replace drm_dev_unref with drm_dev_put

This patch unifies the naming of DRM functions for reference counting
of struct drm_device. The resulting code is more aligned with the rest
of the Linux kernel interfaces.

Signed-off-by: Thomas Zimmermann <tdz@users.sourceforge.net>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index a81b4a5e24a7..ed3cc2989f93 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -420,7 +420,7 @@ static int exynos_drm_bind(struct device *dev)
 err_free_private:
 	kfree(private);
 err_free_drm:
-	drm_dev_unref(drm);
+	drm_dev_put(drm);
 
 	return ret;
 }
@@ -444,7 +444,7 @@ static void exynos_drm_unbind(struct device *dev)
 	drm->dev_private = NULL;
 	dev_set_drvdata(dev, NULL);
 
-	drm_dev_unref(drm);
+	drm_dev_put(drm);
 }
 
 static const struct component_master_ops exynos_drm_ops = {

From 510fe10b6180137773dce9032b51bb82ff946c2d Mon Sep 17 00:00:00 2001
From: Zhao Yan <yan.y.zhao@intel.com>
Date: Tue, 19 Jun 2018 15:44:11 +0800
Subject: [PATCH 174/382] drm/i915/gvt: fix a bug of partially write ggtt
 enties

when guest writes ggtt entries, it could write 8 bytes a time if
gtt_entry_size is 8. But, qemu could split the 8 bytes into 2 consecutive
4-byte writes.

If each 4-byte partial write could trigger a host ggtt write, it is very
possible that a wrong combination is written to the host ggtt. E.g.
the higher 4 bytes is the old value, but the lower 4 bytes is the new
value, and this 8-byte combination is wrong but written to the ggtt, thus
causing bugs.

To handle this condition, we just record the first 4-byte write, then wait
until the second 4-byte write comes and write the combined 64-bit data to
host ggtt table.

To save memory space and to spot partial write as early as possible, we
don't keep this information for every ggtt index. Instread, we just record
the last ggtt write position, and assume the two 4-byte writes come in
consecutively for each vgpu.

This assumption is right based on the characteristic of ggtt entry which
stores memory address. When gtt_entry_size is 8, the guest memory physical
address should be 64 bits, so any sane guest driver should write 8-byte
long data at a time, so 2 consecutive 4-byte writes at the same ggtt index
should be trapped in gvt.

v2:
when incomplete ggtt entry write is located, e.g.
    1. guest only writes 4 bytes at a ggtt offset and no long writes the
       rest 4 bytes.
    2. guest writes 4 bytes of a ggtt offset, then write at other ggtt
       offsets, then return back to write the left 4 bytes of the first
       ggtt offset.
add error handling logic to remap host entry to scratch page, and mark
guest virtual ggtt entry as not present.  (zhenyu wang)

Signed-off-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gtt.c | 58 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/gvt/gtt.h |  2 ++
 2 files changed, 60 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 23296547da95..4efec8fa6c1d 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1592,6 +1592,7 @@ static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
 		vgpu_free_mm(mm);
 		return ERR_PTR(-ENOMEM);
 	}
+	mm->ggtt_mm.last_partial_off = -1UL;
 
 	return mm;
 }
@@ -1616,6 +1617,7 @@ void _intel_vgpu_mm_release(struct kref *mm_ref)
 		invalidate_ppgtt_mm(mm);
 	} else {
 		vfree(mm->ggtt_mm.virtual_ggtt);
+		mm->ggtt_mm.last_partial_off = -1UL;
 	}
 
 	vgpu_free_mm(mm);
@@ -1868,6 +1870,62 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
 	memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
 			bytes);
 
+	/* If ggtt entry size is 8 bytes, and it's split into two 4 bytes
+	 * write, we assume the two 4 bytes writes are consecutive.
+	 * Otherwise, we abort and report error
+	 */
+	if (bytes < info->gtt_entry_size) {
+		if (ggtt_mm->ggtt_mm.last_partial_off == -1UL) {
+			/* the first partial part*/
+			ggtt_mm->ggtt_mm.last_partial_off = off;
+			ggtt_mm->ggtt_mm.last_partial_data = e.val64;
+			return 0;
+		} else if ((g_gtt_index ==
+				(ggtt_mm->ggtt_mm.last_partial_off >>
+				info->gtt_entry_size_shift)) &&
+			(off !=	ggtt_mm->ggtt_mm.last_partial_off)) {
+			/* the second partial part */
+
+			int last_off = ggtt_mm->ggtt_mm.last_partial_off &
+				(info->gtt_entry_size - 1);
+
+			memcpy((void *)&e.val64 + last_off,
+				(void *)&ggtt_mm->ggtt_mm.last_partial_data +
+				last_off, bytes);
+
+			ggtt_mm->ggtt_mm.last_partial_off = -1UL;
+		} else {
+			int last_offset;
+
+			gvt_vgpu_err("failed to populate guest ggtt entry: abnormal ggtt entry write sequence, last_partial_off=%lx, offset=%x, bytes=%d, ggtt entry size=%d\n",
+					ggtt_mm->ggtt_mm.last_partial_off, off,
+					bytes, info->gtt_entry_size);
+
+			/* set host ggtt entry to scratch page and clear
+			 * virtual ggtt entry as not present for last
+			 * partially write offset
+			 */
+			last_offset = ggtt_mm->ggtt_mm.last_partial_off &
+					(~(info->gtt_entry_size - 1));
+
+			ggtt_get_host_entry(ggtt_mm, &m, last_offset);
+			ggtt_invalidate_pte(vgpu, &m);
+			ops->set_pfn(&m, gvt->gtt.scratch_mfn);
+			ops->clear_present(&m);
+			ggtt_set_host_entry(ggtt_mm, &m, last_offset);
+			ggtt_invalidate(gvt->dev_priv);
+
+			ggtt_get_guest_entry(ggtt_mm, &e, last_offset);
+			ops->clear_present(&e);
+			ggtt_set_guest_entry(ggtt_mm, &e, last_offset);
+
+			ggtt_mm->ggtt_mm.last_partial_off = off;
+			ggtt_mm->ggtt_mm.last_partial_data = e.val64;
+
+			return 0;
+		}
+	}
+
 	if (ops->test_present(&e)) {
 		gfn = ops->get_pfn(&e);
 		m = e;
diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
index 3792f2b7f4ff..97e62647418a 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.h
+++ b/drivers/gpu/drm/i915/gvt/gtt.h
@@ -150,6 +150,8 @@ struct intel_vgpu_mm {
 		} ppgtt_mm;
 		struct {
 			void *virtual_ggtt;
+			unsigned long last_partial_off;
+			u64 last_partial_data;
 		} ggtt_mm;
 	};
 };

From a4cae23cc05ccec749c2fc70fa9d8cda7c582319 Mon Sep 17 00:00:00 2001
From: Xiaolin Zhang <xiaolin.zhang@intel.com>
Date: Thu, 21 Jun 2018 14:33:43 +0800
Subject: [PATCH 175/382] drm/i915/gvt: changed DDI mode emulation type

changed gvt display transcode DDI mode from DP_SST to
DVI to address below calltrace issue during guest booting
up which is caused by zero dotclock initial value with DP_SST
mode. transcode DVI mode emulation also align with native with DP
connection.

[drm:drm_calc_timestamping_constants]
ERROR crtc 41: Can't calculate constants, dotclock = 0!

WARNING: at drivers/gpu/drm/drm_vblank.c:620
drm_calc_vbltimestamp_from_scanoutpos

Call Trace:
? drm_calc_timestamping_constants+0x144/0x150 [drm]
drm_get_last_vbltimestamp+0x54/0x90 [drm]
drm_reset_vblank_timestamp+0x59/0xd0 [drm]
drm_crtc_vblank_on+0x7b/0xd0 [drm]
intel_modeset_setup_hw_state+0xb67/0xfd0 [i915]
? gen2_read32+0x110/0x110 [i915]
? drm_modeset_lock+0x30/0xa0 [drm]
intel_modeset_init+0x794/0x19d0 [i915]
? intel_setup_gmbus+0x232/0x2e0 [i915]
i915_driver_load+0xb4a/0xf40 [i915]

Signed-off-by: Xiaolin Zhang <xiaolin.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/display.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index 6d8180e8d1e2..4b072ade8c38 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -196,7 +196,7 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
 			TRANS_DDI_PORT_MASK);
 		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
-			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
+			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DVI |
 			(PORT_B << TRANS_DDI_PORT_SHIFT) |
 			TRANS_DDI_FUNC_ENABLE);
 		if (IS_BROADWELL(dev_priv)) {
@@ -216,7 +216,7 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
 			TRANS_DDI_PORT_MASK);
 		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
-			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
+			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DVI |
 			(PORT_C << TRANS_DDI_PORT_SHIFT) |
 			TRANS_DDI_FUNC_ENABLE);
 		if (IS_BROADWELL(dev_priv)) {
@@ -236,7 +236,7 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
 			TRANS_DDI_PORT_MASK);
 		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
-			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
+			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DVI |
 			(PORT_D << TRANS_DDI_PORT_SHIFT) |
 			TRANS_DDI_FUNC_ENABLE);
 		if (IS_BROADWELL(dev_priv)) {

From 08b393d01c88aff27347ed2b1b354eb4db2f1532 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 27 Jun 2018 17:03:45 -0500
Subject: [PATCH 176/382] objtool: Support GCC 8 '-fnoreorder-functions'

Since the following commit:

  cd77849a69cf ("objtool: Fix GCC 8 cold subfunction detection for aliased functions")

... if the kernel is built with EXTRA_CFLAGS='-fno-reorder-functions',
objtool can get stuck in an infinite loop.

That flag causes the new GCC 8 cold subfunctions to be placed in .text
instead of .text.unlikely.  But it also has an unfortunate quirk: in the
symbol table, the subfunction (e.g., nmi_panic.cold.7) is nested inside
the parent (nmi_panic).

That function overlap confuses objtool, and causes it to get into an
infinite loop in next_insn_same_func().  Here's Allan's description of
the loop:

  "Objtool iterates through the instructions in nmi_panic using
  next_insn_same_func. Once it reaches the end of nmi_panic at 0x534 it
  jumps to 0x528 as that's the start of nmi_panic.cold.7. However, since
  the instructions starting at 0x528 are still associated with nmi_panic
  objtool will get stuck in a loop, continually jumping back to 0x528
  after reaching 0x534."

Fix it by shortening the length of the parent function so that the
functions no longer overlap.

Reported-and-analyzed-by: Allan Xavier <allan.x.xavier@oracle.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Allan Xavier <allan.x.xavier@oracle.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/9e704c52bee651129b036be14feda317ae5606ae.1530136978.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/elf.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 4e60e105583e..0d1acb704f64 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -302,19 +302,34 @@ static int read_symbols(struct elf *elf)
 				continue;
 			sym->pfunc = sym->cfunc = sym;
 			coldstr = strstr(sym->name, ".cold.");
-			if (coldstr) {
-				coldstr[0] = '\0';
-				pfunc = find_symbol_by_name(elf, sym->name);
-				coldstr[0] = '.';
+			if (!coldstr)
+				continue;
 
-				if (!pfunc) {
-					WARN("%s(): can't find parent function",
-					     sym->name);
-					goto err;
-				}
+			coldstr[0] = '\0';
+			pfunc = find_symbol_by_name(elf, sym->name);
+			coldstr[0] = '.';
 
-				sym->pfunc = pfunc;
-				pfunc->cfunc = sym;
+			if (!pfunc) {
+				WARN("%s(): can't find parent function",
+				     sym->name);
+				goto err;
+			}
+
+			sym->pfunc = pfunc;
+			pfunc->cfunc = sym;
+
+			/*
+			 * Unfortunately, -fnoreorder-functions puts the child
+			 * inside the parent.  Remove the overlap so we can
+			 * have sane assumptions.
+			 *
+			 * Note that pfunc->len now no longer matches
+			 * pfunc->sym.st_size.
+			 */
+			if (sym->sec == pfunc->sec &&
+			    sym->offset >= pfunc->offset &&
+			    sym->offset + sym->len == pfunc->offset + pfunc->len) {
+				pfunc->len -= sym->len;
 			}
 		}
 	}

From 920c92448839bd4f8eb87a92b08cad56d449caff Mon Sep 17 00:00:00 2001
From: Murray McAllister <murray.mcallister@insomniasec.com>
Date: Mon, 2 Jul 2018 13:07:28 +1200
Subject: [PATCH 177/382] staging: rtl8723bs: Prevent an underflow in
 rtw_check_beacon_data().

Dan Carpenter reported an integer underflow issue in the rtl8188eu driver.
This is also needed for the length (signed integer) in rtl8723bs, as it is
later converted to an unsigned integer and used in a memcpy operation.

Original issue is at https://patchwork.kernel.org/patch/9796371/

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Murray McAllister <murray.mcallister@insomniasec.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/core/rtw_ap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8723bs/core/rtw_ap.c b/drivers/staging/rtl8723bs/core/rtw_ap.c
index 45c05527a57a..faf4b4158cfa 100644
--- a/drivers/staging/rtl8723bs/core/rtw_ap.c
+++ b/drivers/staging/rtl8723bs/core/rtw_ap.c
@@ -1051,7 +1051,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf,  int len)
 		return _FAIL;
 
 
-	if (len > MAX_IE_SZ)
+	if (len < 0 || len > MAX_IE_SZ)
 		return _FAIL;
 
 	pbss_network->IELength = len;

From 3284da34a87ab7a527a593f89bbdaf6debe9e713 Mon Sep 17 00:00:00 2001
From: Stefan Haberland <sth@linux.ibm.com>
Date: Tue, 12 Jun 2018 16:47:10 +0200
Subject: [PATCH 178/382] s390/dasd: reduce the default queue depth and nr of
 hardware queues

Reduce the default values for the number of hardware queues and queue depth
to significantly reduce the memory footprint of a DASD device.
The memory consumption per DASD device reduces from approximately 40MB to
approximately 1.5MB.

This is necessary to build systems with a large number of DASD devices and
a reasonable amount of memory.
Performance measurements showed that good performance results are possible
with the new default values even on systems with lots of CPUs and lots of
alias devices.

Fixes: e443343e509a ("s390/dasd: blk-mq conversion")
Reviewed-by: Jan Hoeppner <hoeppner@linux.ibm.com>
Reviewed-by: Peter Oberparleiter <oberpar@linux.ibm.com>
Signed-off-by: Stefan Haberland <sth@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/dasd.c     | 13 +++++++++++--
 drivers/s390/block/dasd_int.h |  8 --------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index d3a38c421503..a9f60d0ee02e 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -41,6 +41,15 @@
 
 #define DASD_DIAG_MOD		"dasd_diag_mod"
 
+static unsigned int queue_depth = 32;
+static unsigned int nr_hw_queues = 4;
+
+module_param(queue_depth, uint, 0444);
+MODULE_PARM_DESC(queue_depth, "Default queue depth for new DASD devices");
+
+module_param(nr_hw_queues, uint, 0444);
+MODULE_PARM_DESC(nr_hw_queues, "Default number of hardware queues for new DASD devices");
+
 /*
  * SECTION: exported variables of dasd.c
  */
@@ -3115,8 +3124,8 @@ static int dasd_alloc_queue(struct dasd_block *block)
 
 	block->tag_set.ops = &dasd_mq_ops;
 	block->tag_set.cmd_size = sizeof(struct dasd_ccw_req);
-	block->tag_set.nr_hw_queues = DASD_NR_HW_QUEUES;
-	block->tag_set.queue_depth = DASD_MAX_LCU_DEV * DASD_REQ_PER_DEV;
+	block->tag_set.nr_hw_queues = nr_hw_queues;
+	block->tag_set.queue_depth = queue_depth;
 	block->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 
 	rc = blk_mq_alloc_tag_set(&block->tag_set);
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 976b6bd4fb05..de6b96036aa4 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -228,14 +228,6 @@ struct dasd_ccw_req {
 #define DASD_CQR_SUPPRESS_IL	6	/* Suppress 'Incorrect Length' error */
 #define DASD_CQR_SUPPRESS_CR	7	/* Suppress 'Command Reject' error */
 
-/*
- * There is no reliable way to determine the number of available CPUs on
- * LPAR but there is no big performance difference between 1 and the
- * maximum CPU number.
- * 64 is a good trade off performance wise.
- */
-#define DASD_NR_HW_QUEUES 64
-#define DASD_MAX_LCU_DEV 256
 #define DASD_REQ_PER_DEV 4
 
 /* Signature for error recovery functions. */

From dfa758638fd2d1184760deb2693abf76e982c53a Mon Sep 17 00:00:00 2001
From: Eric Farman <farman@linux.ibm.com>
Date: Fri, 29 Jun 2018 19:54:01 +0200
Subject: [PATCH 179/382] s390/mm: fix refcount usage for 4K pgste

s390 no longer uses the _mapcount field in struct page to identify
the page table format being used. While the code was diligent in handling
the different mappings, it neglected to turn "off" the map bits when
alloc_pgste was being used. This resulted in bits remaining "on" in the
_refcount field, and thus an artifically huge "in use" count that prevents
the pages from actually being released by __free_page.

There's opportunity for improvement in the "1 vs 3" vs "1U vs 3U" vs
"0x1 vs 0x11" etc. variations for all these calls, I am just keeping
things simple compared to neighboring code.

Fixes: 620b4e903179 ("s390: use _refcount for pgtables")
Reported-by: Halil Pasic <pasic@linux.ibm.com>
Bisected-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Eric Farman <farman@linux.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/pgalloc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 84bd6329a88d..e3bd5627afef 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -252,6 +252,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
 		spin_unlock_bh(&mm->context.lock);
 		if (mask != 0)
 			return;
+	} else {
+		atomic_xor_bits(&page->_refcount, 3U << 24);
 	}
 
 	pgtable_page_dtor(page);
@@ -304,6 +306,8 @@ static void __tlb_remove_table(void *_table)
 			break;
 		/* fallthrough */
 	case 3:		/* 4K page table with pgstes */
+		if (mask & 3)
+			atomic_xor_bits(&page->_refcount, 3 << 24);
 		pgtable_page_dtor(page);
 		__free_page(page);
 		break;

From 07eaa43e66f505980d00e0f5fe697f3da7c6a730 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Tue, 29 May 2018 12:00:54 +0300
Subject: [PATCH 180/382] ARM: dts: dra7: Disable metastability workaround for
 USB2

Disable the metastability workaround for USB2. The original
patch disabled the workaround on the wrong USB port.

Fixes: b8c9c6fa2002 ("ARM: dts: dra7: Disable USB metastability workaround for USB2")
Cc: <stable@vger.kernel.org>        [4.16+]
Signed-off-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/dra7.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 9dcd14edc202..e03495a799ce 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -1580,7 +1580,6 @@ usb1: usb@48890000 {
 				dr_mode = "otg";
 				snps,dis_u3_susphy_quirk;
 				snps,dis_u2_susphy_quirk;
-				snps,dis_metastability_quirk;
 			};
 		};
 
@@ -1608,6 +1607,7 @@ usb2: usb@488d0000 {
 				dr_mode = "otg";
 				snps,dis_u3_susphy_quirk;
 				snps,dis_u2_susphy_quirk;
+				snps,dis_metastability_quirk;
 			};
 		};
 

From 207490517cf80d99f471d1b54e747eb95a4b8bea Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 18 Jun 2018 10:45:49 -0700
Subject: [PATCH 181/382] bus: ti-sysc: Use 2-factor allocator arguments

This adjusts the allocator calls to use 2-factor argument call style, as
done treewide already for improved defense against allocation overflows.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/bus/ti-sysc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 1cc29629d238..80d60f43db56 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -169,9 +169,9 @@ static int sysc_get_clocks(struct sysc *ddata)
 	const char *name;
 	int nr_fck = 0, nr_ick = 0, i, error = 0;
 
-	ddata->clock_roles = devm_kzalloc(ddata->dev,
-					  sizeof(*ddata->clock_roles) *
+	ddata->clock_roles = devm_kcalloc(ddata->dev,
 					  SYSC_MAX_CLOCKS,
+					  sizeof(*ddata->clock_roles),
 					  GFP_KERNEL);
 	if (!ddata->clock_roles)
 		return -ENOMEM;
@@ -200,8 +200,8 @@ static int sysc_get_clocks(struct sysc *ddata)
 		return -EINVAL;
 	}
 
-	ddata->clocks = devm_kzalloc(ddata->dev,
-				     sizeof(*ddata->clocks) * ddata->nr_clocks,
+	ddata->clocks = devm_kcalloc(ddata->dev,
+				     ddata->nr_clocks, sizeof(*ddata->clocks),
 				     GFP_KERNEL);
 	if (!ddata->clocks)
 		return -ENOMEM;

From 603d4cf8fe095b1ee78f423d514427be507fb513 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Sat, 30 Jun 2018 17:38:55 +0200
Subject: [PATCH 182/382] net: fix use-after-free in GRO with ESP

Since the addition of GRO for ESP, gro_receive can consume the skb and
return -EINPROGRESS. In that case, the lower layer GRO handler cannot
touch the skb anymore.

Commit 5f114163f2f5 ("net: Add a skb_gro_flush_final helper.") converted
some of the gro_receive handlers that can lead to ESP's gro_receive so
that they wouldn't access the skb when -EINPROGRESS is returned, but
missed other spots, mainly in tunneling protocols.

This patch finishes the conversion to using skb_gro_flush_final(), and
adds a new helper, skb_gro_flush_final_remcsum(), used in VXLAN and
GUE.

Fixes: 5f114163f2f5 ("net: Add a skb_gro_flush_final helper.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c      |  2 +-
 drivers/net/vxlan.c       |  4 +---
 include/linux/netdevice.h | 20 ++++++++++++++++++++
 net/8021q/vlan.c          |  2 +-
 net/ipv4/fou.c            |  4 +---
 net/ipv4/gre_offload.c    |  2 +-
 net/ipv4/udp_offload.c    |  2 +-
 7 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 750eaa53bf0c..ada33c2d9ac2 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -476,7 +476,7 @@ static struct sk_buff **geneve_gro_receive(struct sock *sk,
 out_unlock:
 	rcu_read_unlock();
 out:
-	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_flush_final(skb, pp, flush);
 
 	return pp;
 }
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index aee0e60471f1..f6bb1d54d4bd 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -623,9 +623,7 @@ static struct sk_buff **vxlan_gro_receive(struct sock *sk,
 	flush = 0;
 
 out:
-	skb_gro_remcsum_cleanup(skb, &grc);
-	skb->remcsum_offload = 0;
-	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
 
 	return pp;
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3ec9850c7936..3d0cc0b5cec2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2789,11 +2789,31 @@ static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp,
 	if (PTR_ERR(pp) != -EINPROGRESS)
 		NAPI_GRO_CB(skb)->flush |= flush;
 }
+static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
+					       struct sk_buff **pp,
+					       int flush,
+					       struct gro_remcsum *grc)
+{
+	if (PTR_ERR(pp) != -EINPROGRESS) {
+		NAPI_GRO_CB(skb)->flush |= flush;
+		skb_gro_remcsum_cleanup(skb, grc);
+		skb->remcsum_offload = 0;
+	}
+}
 #else
 static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush)
 {
 	NAPI_GRO_CB(skb)->flush |= flush;
 }
+static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
+					       struct sk_buff **pp,
+					       int flush,
+					       struct gro_remcsum *grc)
+{
+	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_remcsum_cleanup(skb, grc);
+	skb->remcsum_offload = 0;
+}
 #endif
 
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 73a65789271b..8ccee3d01822 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -693,7 +693,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
 out_unlock:
 	rcu_read_unlock();
 out:
-	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_flush_final(skb, pp, flush);
 
 	return pp;
 }
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 1540db65241a..c9ec1603666b 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -448,9 +448,7 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
 out_unlock:
 	rcu_read_unlock();
 out:
-	NAPI_GRO_CB(skb)->flush |= flush;
-	skb_gro_remcsum_cleanup(skb, &grc);
-	skb->remcsum_offload = 0;
+	skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
 
 	return pp;
 }
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 1859c473b21a..6a7d980105f6 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -223,7 +223,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
 out_unlock:
 	rcu_read_unlock();
 out:
-	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_flush_final(skb, pp, flush);
 
 	return pp;
 }
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 92dc9e5a7ff3..69c54540d5b4 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -394,7 +394,7 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
 out_unlock:
 	rcu_read_unlock();
 out:
-	NAPI_GRO_CB(skb)->flush |= flush;
+	skb_gro_flush_final(skb, pp, flush);
 	return pp;
 }
 EXPORT_SYMBOL(udp_gro_receive);

From fc9c2029e37c3ae9efc28bf47045e0b87e09660c Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sat, 30 Jun 2018 15:26:56 -0700
Subject: [PATCH 183/382] ipv6: sr: fix passing wrong flags to
 crypto_alloc_shash()

The 'mask' argument to crypto_alloc_shash() uses the CRYPTO_ALG_* flags,
not 'gfp_t'.  So don't pass GFP_KERNEL to it.

Fixes: bf355b8d2c30 ("ipv6: sr: add core files for SR HMAC support")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/seg6_hmac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index 33fb35cbfac1..558fe8cc6d43 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -373,7 +373,7 @@ static int seg6_hmac_init_algo(void)
 			return -ENOMEM;
 
 		for_each_possible_cpu(cpu) {
-			tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL);
+			tfm = crypto_alloc_shash(algo->name, 0, 0);
 			if (IS_ERR(tfm))
 				return PTR_ERR(tfm);
 			p_tfm = per_cpu_ptr(algo->tfms, cpu);

From 5dc2d3996a8b221c20dd0900bdad45031a572530 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sun, 1 Jul 2018 16:21:21 +0800
Subject: [PATCH 184/382] ipvlan: call dev_change_flags when ipvlan mode is
 reset

After we change the ipvlan mode from l3 to l2, or vice versa, we only
reset IFF_NOARP flag, but don't flush the ARP table cache, which will
cause eth->h_dest to be equal to eth->h_source in ipvlan_xmit_mode_l2().
Then the message will not come out of host.

Here is the reproducer on local host:

ip link set eth1 up
ip addr add 192.168.1.1/24 dev eth1
ip link add link eth1 ipvlan1 type ipvlan mode l3

ip netns add net1
ip link set ipvlan1 netns net1
ip netns exec net1 ip link set ipvlan1 up
ip netns exec net1 ip addr add 192.168.2.1/24 dev ipvlan1

ip route add 192.168.2.0/24 via 192.168.1.2
ping 192.168.2.2 -c 2

ip netns exec net1 ip link set ipvlan1 type ipvlan mode l2
ping 192.168.2.2 -c 2

Add the same configuration on remote host. After we set the mode to l2,
we could find that the src/dst MAC addresses are the same on eth1:

21:26:06.648565 00:b7:13:ad:d3:05 > 00:b7:13:ad:d3:05, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 64, id 58356, offset 0, flags [DF], proto ICMP (1), length 84)
    192.168.2.1 > 192.168.2.2: ICMP echo request, id 22686, seq 1, length 64

Fix this by calling dev_change_flags(), which will call netdevice notifier
with flag change info.

v2:
a) As pointed out by Wang Cong, check return value for dev_change_flags() when
change dev flags.
b) As suggested by Stefano and Sabrina, move flags setting before l3mdev_ops.
So we don't need to redo ipvlan_{, un}register_nf_hook() again in err path.

Reported-by: Jianlin Shi <jishi@redhat.com>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Fixes: 2ad7bf3638411 ("ipvlan: Initial check-in of the IPVLAN driver.")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipvlan/ipvlan_main.c | 36 +++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 23c1d6600241..4a949569ec4c 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -75,10 +75,23 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
 {
 	struct ipvl_dev *ipvlan;
 	struct net_device *mdev = port->dev;
-	int err = 0;
+	unsigned int flags;
+	int err;
 
 	ASSERT_RTNL();
 	if (port->mode != nval) {
+		list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
+			flags = ipvlan->dev->flags;
+			if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) {
+				err = dev_change_flags(ipvlan->dev,
+						       flags | IFF_NOARP);
+			} else {
+				err = dev_change_flags(ipvlan->dev,
+						       flags & ~IFF_NOARP);
+			}
+			if (unlikely(err))
+				goto fail;
+		}
 		if (nval == IPVLAN_MODE_L3S) {
 			/* New mode is L3S */
 			err = ipvlan_register_nf_hook(read_pnet(&port->pnet));
@@ -86,21 +99,28 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
 				mdev->l3mdev_ops = &ipvl_l3mdev_ops;
 				mdev->priv_flags |= IFF_L3MDEV_MASTER;
 			} else
-				return err;
+				goto fail;
 		} else if (port->mode == IPVLAN_MODE_L3S) {
 			/* Old mode was L3S */
 			mdev->priv_flags &= ~IFF_L3MDEV_MASTER;
 			ipvlan_unregister_nf_hook(read_pnet(&port->pnet));
 			mdev->l3mdev_ops = NULL;
 		}
-		list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
-			if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S)
-				ipvlan->dev->flags |= IFF_NOARP;
-			else
-				ipvlan->dev->flags &= ~IFF_NOARP;
-		}
 		port->mode = nval;
 	}
+	return 0;
+
+fail:
+	/* Undo the flags changes that have been done so far. */
+	list_for_each_entry_continue_reverse(ipvlan, &port->ipvlans, pnode) {
+		flags = ipvlan->dev->flags;
+		if (port->mode == IPVLAN_MODE_L3 ||
+		    port->mode == IPVLAN_MODE_L3S)
+			dev_change_flags(ipvlan->dev, flags | IFF_NOARP);
+		else
+			dev_change_flags(ipvlan->dev, flags & ~IFF_NOARP);
+	}
+
 	return err;
 }
 

From bb7858ba1102f82470a917e041fd23e6385c31be Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Sun, 1 Jul 2018 20:03:05 -0700
Subject: [PATCH 185/382] qed: Limit msix vectors in kdump kernel to the
 minimum required count.

Memory size is limited in the kdump kernel environment. Allocation of more
msix-vectors (or queues) consumes few tens of MBs of memory, which might
lead to the kdump kernel failure.
This patch adds changes to limit the number of MSI-X vectors in kdump
kernel to minimum required value (i.e., 2 per engine).

Fixes: fe56b9e6a ("qed: Add module with basic common support")
Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_main.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 5c10fd7210c3..0cbc74d6ca8b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -789,6 +789,14 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev,
 	/* We want a minimum of one slowpath and one fastpath vector per hwfn */
 	cdev->int_params.in.min_msix_cnt = cdev->num_hwfns * 2;
 
+	if (is_kdump_kernel()) {
+		DP_INFO(cdev,
+			"Kdump kernel: Limit the max number of requested MSI-X vectors to %hd\n",
+			cdev->int_params.in.min_msix_cnt);
+		cdev->int_params.in.num_vectors =
+			cdev->int_params.in.min_msix_cnt;
+	}
+
 	rc = qed_set_int_mode(cdev, false);
 	if (rc)  {
 		DP_ERR(cdev, "qed_slowpath_setup_int ERR\n");

From 538f8d00ba8bb417c4d9e76c61dee59d812d8287 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Sun, 1 Jul 2018 20:03:06 -0700
Subject: [PATCH 186/382] qed: Fix setting of incorrect eswitch mode.

By default, driver sets the eswitch mode incorrectly as VEB (virtual
Ethernet bridging).
Need to set VEB eswitch mode only when sriov is enabled, and it should be
to set NONE by default. The patch incorporates this change.

Fixes: 0fefbfbaa ("qed*: Management firmware - notifications and defaults")
Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c   |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_sriov.c | 19 +++++++++++++++++--
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 329781cda77f..e5249b4741d0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1804,7 +1804,7 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 			DP_INFO(p_hwfn, "Failed to update driver state\n");
 
 		rc = qed_mcp_ov_update_eswitch(p_hwfn, p_hwfn->p_main_ptt,
-					       QED_OV_ESWITCH_VEB);
+					       QED_OV_ESWITCH_NONE);
 		if (rc)
 			DP_INFO(p_hwfn, "Failed to update eswitch mode\n");
 	}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index f01bf52bc381..fd59cf45f4be 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -4513,6 +4513,8 @@ static void qed_sriov_enable_qid_config(struct qed_hwfn *hwfn,
 static int qed_sriov_enable(struct qed_dev *cdev, int num)
 {
 	struct qed_iov_vf_init_params params;
+	struct qed_hwfn *hwfn;
+	struct qed_ptt *ptt;
 	int i, j, rc;
 
 	if (num >= RESC_NUM(&cdev->hwfns[0], QED_VPORT)) {
@@ -4525,8 +4527,8 @@ static int qed_sriov_enable(struct qed_dev *cdev, int num)
 
 	/* Initialize HW for VF access */
 	for_each_hwfn(cdev, j) {
-		struct qed_hwfn *hwfn = &cdev->hwfns[j];
-		struct qed_ptt *ptt = qed_ptt_acquire(hwfn);
+		hwfn = &cdev->hwfns[j];
+		ptt = qed_ptt_acquire(hwfn);
 
 		/* Make sure not to use more than 16 queues per VF */
 		params.num_queues = min_t(int,
@@ -4562,6 +4564,19 @@ static int qed_sriov_enable(struct qed_dev *cdev, int num)
 		goto err;
 	}
 
+	hwfn = QED_LEADING_HWFN(cdev);
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt) {
+		DP_ERR(hwfn, "Failed to acquire ptt\n");
+		rc = -EBUSY;
+		goto err;
+	}
+
+	rc = qed_mcp_ov_update_eswitch(hwfn, ptt, QED_OV_ESWITCH_VEB);
+	if (rc)
+		DP_INFO(cdev, "Failed to update eswitch mode\n");
+	qed_ptt_release(hwfn, ptt);
+
 	return num;
 
 err:

From cc9b27cdf7bd3c86df73439758ac1564bc8f5bbe Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Sun, 1 Jul 2018 20:03:07 -0700
Subject: [PATCH 187/382] qed: Fix use of incorrect size in memcpy call.

Use the correct size value while copying chassis/port id values.

Fixes: 6ad8c632e ("qed: Add support for query/config dcbx.")
Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index f0b01385d5cb..e0680ce91328 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -709,9 +709,9 @@ qed_dcbx_get_local_lldp_params(struct qed_hwfn *p_hwfn,
 	p_local = &p_hwfn->p_dcbx_info->lldp_local[LLDP_NEAREST_BRIDGE];
 
 	memcpy(params->lldp_local.local_chassis_id, p_local->local_chassis_id,
-	       ARRAY_SIZE(p_local->local_chassis_id));
+	       sizeof(p_local->local_chassis_id));
 	memcpy(params->lldp_local.local_port_id, p_local->local_port_id,
-	       ARRAY_SIZE(p_local->local_port_id));
+	       sizeof(p_local->local_port_id));
 }
 
 static void
@@ -723,9 +723,9 @@ qed_dcbx_get_remote_lldp_params(struct qed_hwfn *p_hwfn,
 	p_remote = &p_hwfn->p_dcbx_info->lldp_remote[LLDP_NEAREST_BRIDGE];
 
 	memcpy(params->lldp_remote.peer_chassis_id, p_remote->peer_chassis_id,
-	       ARRAY_SIZE(p_remote->peer_chassis_id));
+	       sizeof(p_remote->peer_chassis_id));
 	memcpy(params->lldp_remote.peer_port_id, p_remote->peer_port_id,
-	       ARRAY_SIZE(p_remote->peer_port_id));
+	       sizeof(p_remote->peer_port_id));
 }
 
 static int

From 82a4e71b1565dea8387f54503e806cf374e779ec Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Sun, 1 Jul 2018 20:03:08 -0700
Subject: [PATCH 188/382] qede: Adverstise software timestamp caps when PHC is
 not available.

When ptp clock is not available for a PF (e.g., higher PFs in NPAR mode),
get-tsinfo() callback should return the software timestamp capabilities
instead of returning the error.

Fixes: 4c55215c ("qede: Add driver support for PTP")
Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede_ptp.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
index 02adb513f475..013ff567283c 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
@@ -337,8 +337,14 @@ int qede_ptp_get_ts_info(struct qede_dev *edev, struct ethtool_ts_info *info)
 {
 	struct qede_ptp *ptp = edev->ptp;
 
-	if (!ptp)
-		return -EIO;
+	if (!ptp) {
+		info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
+					SOF_TIMESTAMPING_RX_SOFTWARE |
+					SOF_TIMESTAMPING_SOFTWARE;
+		info->phc_index = -1;
+
+		return 0;
+	}
 
 	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
 				SOF_TIMESTAMPING_RX_SOFTWARE |

From fe6e04941aa12479a1a58656362bec74100bf7d7 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Fri, 29 Jun 2018 19:01:44 +0900
Subject: [PATCH 189/382] mmc: renesas_sdhi_internal_dmac: Fix missing unmap in
 error patch

This patch fixes an issue that lacks the dma_unmap_sg() calling in
the error patch of renesas_sdhi_internal_dmac_start_dma().

Fixes: 0cbc94daa554 ("mmc: renesas_sdhi_internal_dmac: limit DMA RX for old SoCs")
Cc: <stable@vger.kernel.org> # v4.17+
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/renesas_sdhi_internal_dmac.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
index f7f9773d161f..d676a20d959f 100644
--- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c
+++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
@@ -164,17 +164,14 @@ renesas_sdhi_internal_dmac_start_dma(struct tmio_mmc_host *host,
 		goto force_pio;
 
 	/* This DMAC cannot handle if buffer is not 8-bytes alignment */
-	if (!IS_ALIGNED(sg_dma_address(sg), 8)) {
-		dma_unmap_sg(&host->pdev->dev, sg, host->sg_len,
-			     mmc_get_dma_dir(data));
-		goto force_pio;
-	}
+	if (!IS_ALIGNED(sg_dma_address(sg), 8))
+		goto force_pio_with_unmap;
 
 	if (data->flags & MMC_DATA_READ) {
 		dtran_mode |= DTRAN_MODE_CH_NUM_CH1;
 		if (test_bit(SDHI_INTERNAL_DMAC_ONE_RX_ONLY, &global_flags) &&
 		    test_and_set_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags))
-			goto force_pio;
+			goto force_pio_with_unmap;
 	} else {
 		dtran_mode |= DTRAN_MODE_CH_NUM_CH0;
 	}
@@ -189,6 +186,9 @@ renesas_sdhi_internal_dmac_start_dma(struct tmio_mmc_host *host,
 
 	return;
 
+force_pio_with_unmap:
+	dma_unmap_sg(&host->pdev->dev, sg, host->sg_len, mmc_get_dma_dir(data));
+
 force_pio:
 	host->force_pio = true;
 	renesas_sdhi_internal_dmac_enable_dma(host, false);

From 25a98edd5795719c5187e16ea271e8de86e02809 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Fri, 29 Jun 2018 19:01:45 +0900
Subject: [PATCH 190/382] mmc: renesas_sdhi_internal_dmac: Cannot clear the
 RX_IN_USE in abort

This patch is fixes an issue that the SDHI_INTERNAL_DMAC_RX_IN_USE
flag cannot be cleared because tmio_mmc_core sets the host->data
to NULL before the tmio_mmc_core calls tmio_mmc_abort_dma().

So, this patch clears the SDHI_INTERNAL_DMAC_RX_IN_USE in
the renesas_sdhi_internal_dmac_abort_dma() anyway. This doesn't
cause any side effects.

Fixes: 0cbc94daa554 ("mmc: renesas_sdhi_internal_dmac: limit DMA RX for old SoCs")
Cc: <stable@vger.kernel.org> # v4.17+
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/renesas_sdhi_internal_dmac.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
index d676a20d959f..d032bd63444d 100644
--- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c
+++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c
@@ -139,8 +139,7 @@ renesas_sdhi_internal_dmac_abort_dma(struct tmio_mmc_host *host) {
 	renesas_sdhi_internal_dmac_dm_write(host, DM_CM_RST,
 					    RST_RESERVED_BITS | val);
 
-	if (host->data && host->data->flags & MMC_DATA_READ)
-		clear_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags);
+	clear_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags);
 
 	renesas_sdhi_internal_dmac_enable_dma(host, true);
 }

From aaa23f86001bdb82d2f937c5c7bce0a1e11a6c5b Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 27 Jun 2018 07:25:32 +0100
Subject: [PATCH 191/382] ALSA: hda - Handle pm failure during hotplug

Obtaining the runtime pm wakeref can fail, especially in a hotplug
scenario where i915.ko has been unloaded. If we do not catch the
failure, we end up with an unbalanced pm.

v2 additions by tiwai:
hdmi_present_sense() checks the return value and handle only a
negative error case and bails out only if it's really still suspended.
Also, snd_hda_power_down() is called at the error path so that the
refcount is balanced.

Along with it, the spec->pcm_lock is taken outside
hdmi_present_sense() in the caller side, so that it won't cause
deadlock at reentrace via runtime resume.

v3 fix by tiwai:
Missing linux/pm_runtime.h is included.

References: 222bde03881c ("ALSA: hda - Fix mutex deadlock at HDMI/DP hotplug")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_hdmi.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 98e1c411c56a..8a49415aebac 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -33,6 +33,7 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/pm_runtime.h>
 #include <sound/core.h>
 #include <sound/jack.h>
 #include <sound/asoundef.h>
@@ -764,8 +765,10 @@ static void check_presence_and_report(struct hda_codec *codec, hda_nid_t nid,
 
 	if (pin_idx < 0)
 		return;
+	mutex_lock(&spec->pcm_lock);
 	if (hdmi_present_sense(get_pin(spec, pin_idx), 1))
 		snd_hda_jack_report_sync(codec);
+	mutex_unlock(&spec->pcm_lock);
 }
 
 static void jack_callback(struct hda_codec *codec,
@@ -1628,21 +1631,23 @@ static void sync_eld_via_acomp(struct hda_codec *codec,
 static bool hdmi_present_sense(struct hdmi_spec_per_pin *per_pin, int repoll)
 {
 	struct hda_codec *codec = per_pin->codec;
-	struct hdmi_spec *spec = codec->spec;
 	int ret;
 
 	/* no temporary power up/down needed for component notifier */
-	if (!codec_has_acomp(codec))
-		snd_hda_power_up_pm(codec);
+	if (!codec_has_acomp(codec)) {
+		ret = snd_hda_power_up_pm(codec);
+		if (ret < 0 && pm_runtime_suspended(hda_codec_dev(codec))) {
+			snd_hda_power_down_pm(codec);
+			return false;
+		}
+	}
 
-	mutex_lock(&spec->pcm_lock);
 	if (codec_has_acomp(codec)) {
 		sync_eld_via_acomp(codec, per_pin);
 		ret = false; /* don't call snd_hda_jack_report_sync() */
 	} else {
 		ret = hdmi_present_sense_via_verbs(per_pin, repoll);
 	}
-	mutex_unlock(&spec->pcm_lock);
 
 	if (!codec_has_acomp(codec))
 		snd_hda_power_down_pm(codec);
@@ -1654,12 +1659,16 @@ static void hdmi_repoll_eld(struct work_struct *work)
 {
 	struct hdmi_spec_per_pin *per_pin =
 	container_of(to_delayed_work(work), struct hdmi_spec_per_pin, work);
+	struct hda_codec *codec = per_pin->codec;
+	struct hdmi_spec *spec = codec->spec;
 
 	if (per_pin->repoll_count++ > 6)
 		per_pin->repoll_count = 0;
 
+	mutex_lock(&spec->pcm_lock);
 	if (hdmi_present_sense(per_pin, per_pin->repoll_count))
 		snd_hda_jack_report_sync(per_pin->codec);
+	mutex_unlock(&spec->pcm_lock);
 }
 
 static void intel_haswell_fixup_connect_list(struct hda_codec *codec,

From 64dafbc9530c10300acffc57fae3269d95fa8f93 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Mon, 25 Jun 2018 11:39:52 +0200
Subject: [PATCH 192/382] drbd: fix access after free

We have
  struct drbd_requests { ... struct bio *private_bio;  ... }
to hold a bio clone for local submission.

On local IO completion, we put that bio, and in case we want to use the
result later, we overload that member to hold the ERR_PTR() of the
completion result,

Which, before v4.3, used to be the passed in "int error",
so we could first bio_put(), then assign.

v4.3-rc1~100^2~21 4246a0b63bd8 block: add a bi_error field to struct bio
changed that:
  	bio_put(req->private_bio);
 -	req->private_bio = ERR_PTR(error);
 +	req->private_bio = ERR_PTR(bio->bi_error);

Which introduces an access after free,
because it was non obvious that req->private_bio == bio.

Impact of that was mostly unnoticable, because we only use that value
in a multiple-failure case, and even then map any "unexpected" error
code to EIO, so worst case we could potentially mask a more specific
error with EIO in a multiple failure case.

Unless the pointed to memory region was unmapped, as is the case with
CONFIG_DEBUG_PAGEALLOC, in which case this results in

  BUG: unable to handle kernel paging request

v4.13-rc1~70^2~75 4e4cbee93d56 block: switch bios to blk_status_t
changes it further to
  	bio_put(req->private_bio);
  	req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status));

And blk_status_to_errno() now contains a WARN_ON_ONCE() for unexpected
values, which catches this "sometimes", if the memory has been reused
quickly enough for other things.

Should also go into stable since 4.3, with the trivial change around 4.13.

Cc: stable@vger.kernel.org
Fixes: 4246a0b63bd8 block: add a bi_error field to struct bio
Reported-by: Sarah Newman <srn@prgmr.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_worker.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 1476cb3439f4..5e793dd7adfb 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -282,8 +282,8 @@ void drbd_request_endio(struct bio *bio)
 		what = COMPLETED_OK;
 	}
 
-	bio_put(req->private_bio);
 	req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status));
+	bio_put(bio);
 
 	/* not req_mod(), we need irqsave here! */
 	spin_lock_irqsave(&device->resource->req_lock, flags);

From 8740fa6f5c834a881b9b0d560e62c6fe3c1e60ef Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 2 Jul 2018 08:30:22 +0100
Subject: [PATCH 193/382] sata_nv: remove redundant pointers sdev0 and sdev1

Pointers sdev0 and sdev1 are being assigned but are never used hence they
are redundant and can be removed.

Cleans up clang warnings:
warning: variable 'sdev0' set but not used [-Wunused-but-set-variable]
warning: variable 'sdev1' set but not used [-Wunused-but-set-variable]

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/sata_nv.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 10ae11aa1926..72c9b922a77b 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -675,7 +675,6 @@ static int nv_adma_slave_config(struct scsi_device *sdev)
 	struct ata_port *ap = ata_shost_to_port(sdev->host);
 	struct nv_adma_port_priv *pp = ap->private_data;
 	struct nv_adma_port_priv *port0, *port1;
-	struct scsi_device *sdev0, *sdev1;
 	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
 	unsigned long segment_boundary, flags;
 	unsigned short sg_tablesize;
@@ -736,8 +735,6 @@ static int nv_adma_slave_config(struct scsi_device *sdev)
 
 	port0 = ap->host->ports[0]->private_data;
 	port1 = ap->host->ports[1]->private_data;
-	sdev0 = ap->host->ports[0]->link.device[0].sdev;
-	sdev1 = ap->host->ports[1]->link.device[0].sdev;
 	if ((port0->flags & NV_ADMA_ATAPI_SETUP_COMPLETE) ||
 	    (port1->flags & NV_ADMA_ATAPI_SETUP_COMPLETE)) {
 		/*

From 240630e61870e62e39a97225048f9945848fa5f5 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 1 Jul 2018 12:15:46 +0200
Subject: [PATCH 194/382] ahci: Disable LPM on Lenovo 50 series laptops with a
 too old BIOS

There have been several reports of LPM related hard freezes about once
a day on multiple Lenovo 50 series models. Strange enough these reports
where not disk model specific as LPM issues usually are and some users
with the exact same disk + laptop where seeing them while other users
where not seeing these issues.

It turns out that enabling LPM triggers a firmware bug somewhere, which
has been fixed in later BIOS versions.

This commit adds a new ahci_broken_lpm() function and a new ATA_FLAG_NO_LPM
for dealing with this.

The ahci_broken_lpm() function contains DMI match info for the 4 models
which are known to be affected by this and the DMI BIOS date field for
known good BIOS versions. If the BIOS date is older then the one in the
table LPM will be disabled and a warning will be printed.

Note the BIOS dates are for known good versions, some older versions may
work too, but we don't know for sure, the table is using dates from BIOS
versions for which users have confirmed that upgrading to that version
makes the problem go away.

Unfortunately I've been unable to get hold of the reporter who reported
that BIOS version 2.35 fixed the problems on the W541 for him. I've been
able to verify the DMI_SYS_VENDOR and DMI_PRODUCT_VERSION from an older
dmidecode, but I don't know the exact BIOS date as reported in the DMI.
Lenovo keeps a changelog with dates in their release notes, but the
dates there are the release dates not the build dates which are in DMI.
So I've chosen to set the date to which we compare to one day past the
release date of the 2.34 BIOS. I plan to fix this with a follow up
commit once I've the necessary info.

Cc: stable@vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/ahci.c        | 59 +++++++++++++++++++++++++++++++++++++++
 drivers/ata/libata-core.c |  3 ++
 include/linux/libata.h    |  1 +
 3 files changed, 63 insertions(+)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 738fb22978dd..fdeb3b4d0f4a 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1280,6 +1280,59 @@ static bool ahci_broken_suspend(struct pci_dev *pdev)
 	return strcmp(buf, dmi->driver_data) < 0;
 }
 
+static bool ahci_broken_lpm(struct pci_dev *pdev)
+{
+	static const struct dmi_system_id sysids[] = {
+		/* Various Lenovo 50 series have LPM issues with older BIOSen */
+		{
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+				DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X250"),
+			},
+			.driver_data = "20180406", /* 1.31 */
+		},
+		{
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+				DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad L450"),
+			},
+			.driver_data = "20180420", /* 1.28 */
+		},
+		{
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+				DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T450s"),
+			},
+			.driver_data = "20180315", /* 1.33 */
+		},
+		{
+			.matches = {
+				DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+				DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W541"),
+			},
+			/*
+			 * Note date based on release notes, 2.35 has been
+			 * reported to be good, but I've been unable to get
+			 * a hold of the reporter to get the DMI BIOS date.
+			 * TODO: fix this.
+			 */
+			.driver_data = "20180310", /* 2.35 */
+		},
+		{ }	/* terminate list */
+	};
+	const struct dmi_system_id *dmi = dmi_first_match(sysids);
+	int year, month, date;
+	char buf[9];
+
+	if (!dmi)
+		return false;
+
+	dmi_get_date(DMI_BIOS_DATE, &year, &month, &date);
+	snprintf(buf, sizeof(buf), "%04d%02d%02d", year, month, date);
+
+	return strcmp(buf, dmi->driver_data) < 0;
+}
+
 static bool ahci_broken_online(struct pci_dev *pdev)
 {
 #define ENCODE_BUSDEVFN(bus, slot, func)			\
@@ -1694,6 +1747,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			"quirky BIOS, skipping spindown on poweroff\n");
 	}
 
+	if (ahci_broken_lpm(pdev)) {
+		pi.flags |= ATA_FLAG_NO_LPM;
+		dev_warn(&pdev->dev,
+			 "BIOS update required for Link Power Management support\n");
+	}
+
 	if (ahci_broken_suspend(pdev)) {
 		hpriv->flags |= AHCI_HFLAG_NO_SUSPEND;
 		dev_warn(&pdev->dev,
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 27d15ed7fa3d..cc71c63df381 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2493,6 +2493,9 @@ int ata_dev_configure(struct ata_device *dev)
 	    (id[ATA_ID_SATA_CAPABILITY] & 0xe) == 0x2)
 		dev->horkage |= ATA_HORKAGE_NOLPM;
 
+	if (ap->flags & ATA_FLAG_NO_LPM)
+		dev->horkage |= ATA_HORKAGE_NOLPM;
+
 	if (dev->horkage & ATA_HORKAGE_NOLPM) {
 		ata_dev_warn(dev, "LPM support broken, forcing max_power\n");
 		dev->link->ap->target_lpm_policy = ATA_LPM_MAX_POWER;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index a2257e380789..32f247cb5e9e 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -210,6 +210,7 @@ enum {
 	ATA_FLAG_SLAVE_POSS	= (1 << 0), /* host supports slave dev */
 					    /* (doesn't imply presence) */
 	ATA_FLAG_SATA		= (1 << 1),
+	ATA_FLAG_NO_LPM		= (1 << 2), /* host not happy with LPM */
 	ATA_FLAG_NO_LOG_PAGE	= (1 << 5), /* do not issue log page read */
 	ATA_FLAG_NO_ATAPI	= (1 << 6), /* No ATAPI support */
 	ATA_FLAG_PIO_DMA	= (1 << 7), /* PIO cmds via DMA */

From 74cb319bd97dd62881f97ea5a3228f7c2546bf56 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Mon, 2 Jul 2018 17:13:31 +0300
Subject: [PATCH 195/382] usb: xhci: dbc: Don't decrement runtime PM counter if
 DBC is not started

pm_runtime_put_sync() gets called everytime in xhci_dbc_stop().

If dbc is not started, this makes the runtime PM counter incorrectly
becomes 0, and calls autosuspend function. Then we'll keep seeing this:
[54664.762220] xhci_hcd 0000:00:14.0: Root hub is not suspended

So only calls pm_runtime_put_sync() when dbc was started.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-dbgcap.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c
index 1fbfd89d0a0f..387f124a8334 100644
--- a/drivers/usb/host/xhci-dbgcap.c
+++ b/drivers/usb/host/xhci-dbgcap.c
@@ -508,16 +508,18 @@ static int xhci_do_dbc_start(struct xhci_hcd *xhci)
 	return 0;
 }
 
-static void xhci_do_dbc_stop(struct xhci_hcd *xhci)
+static int xhci_do_dbc_stop(struct xhci_hcd *xhci)
 {
 	struct xhci_dbc		*dbc = xhci->dbc;
 
 	if (dbc->state == DS_DISABLED)
-		return;
+		return -1;
 
 	writel(0, &dbc->regs->control);
 	xhci_dbc_mem_cleanup(xhci);
 	dbc->state = DS_DISABLED;
+
+	return 0;
 }
 
 static int xhci_dbc_start(struct xhci_hcd *xhci)
@@ -544,6 +546,7 @@ static int xhci_dbc_start(struct xhci_hcd *xhci)
 
 static void xhci_dbc_stop(struct xhci_hcd *xhci)
 {
+	int ret;
 	unsigned long		flags;
 	struct xhci_dbc		*dbc = xhci->dbc;
 	struct dbc_port		*port = &dbc->port;
@@ -556,10 +559,11 @@ static void xhci_dbc_stop(struct xhci_hcd *xhci)
 		xhci_dbc_tty_unregister_device(xhci);
 
 	spin_lock_irqsave(&dbc->lock, flags);
-	xhci_do_dbc_stop(xhci);
+	ret = xhci_do_dbc_stop(xhci);
 	spin_unlock_irqrestore(&dbc->lock, flags);
 
-	pm_runtime_put_sync(xhci_to_hcd(xhci)->self.controller);
+	if (!ret)
+		pm_runtime_put_sync(xhci_to_hcd(xhci)->self.controller);
 }
 
 static void

From 146971e6244b18eca60f614a1b94048844066d23 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 1 Jul 2018 11:48:34 +0200
Subject: [PATCH 196/382] typec: tcpm: Correctly report power_supply current
 and voltage for non pd supply

Commit f2a8aa053c17 ("typec: tcpm: Represent source supply through
power_supply") moved the code to register a power_supply representing
the device supplying power to the type-C connector, from the fusb302
code to the generic tcpm code so that we have a psy reporting the
supply voltage and current for all tcpm devices.

This broke the reporting of current and voltage through the psy interface
when supplied by a a non pd supply (5V, current as reported by
get_current_limit). The cause of this breakage is port->supply_voltage
and port->current_limit not being set in that case.

This commit fixes this by setting port->supply_voltage and
port->current_limit from tcpm_set_current_limit().

This commit also removes setting supply_voltage and current_limit
from tcpm_reset_port() as that calls tcpm_set_current_limit(0, 0)
which now already sets these to 0.

Fixes: f2a8aa053c17 ("typec: tcpm: Represent source supply through...")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Adam Thomson <Adam.Thomson.Opensource@diasemi.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c
index d961f1ec0e08..150f43668bec 100644
--- a/drivers/usb/typec/tcpm.c
+++ b/drivers/usb/typec/tcpm.c
@@ -725,6 +725,9 @@ static int tcpm_set_current_limit(struct tcpm_port *port, u32 max_ma, u32 mv)
 
 	tcpm_log(port, "Setting voltage/current limit %u mV %u mA", mv, max_ma);
 
+	port->supply_voltage = mv;
+	port->current_limit = max_ma;
+
 	if (port->tcpc->set_current_limit)
 		ret = port->tcpc->set_current_limit(port->tcpc, max_ma, mv);
 
@@ -2595,8 +2598,6 @@ static void tcpm_reset_port(struct tcpm_port *port)
 	tcpm_set_attached_state(port, false);
 	port->try_src_count = 0;
 	port->try_snk_count = 0;
-	port->supply_voltage = 0;
-	port->current_limit = 0;
 	port->usb_type = POWER_SUPPLY_USB_TYPE_C;
 
 	power_supply_changed(port->psy);

From b3a653288e1aeebcc7367dc6d5a42bccbff8bde1 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 1 Jul 2018 11:48:35 +0200
Subject: [PATCH 197/382] i2c-cht-wc: Fix bq24190 supplier

Commit f2a8aa053c17 ("typec: tcpm: Represent source supply through
power_supply") moved the code to register a power_supply representing
the device supplying power to the type-C connector, from the fusb302
code to the generic tcpm code.

This has caused the power-supply registered by the fusb302 driver,
which determines how much current the bq24190 can draw, to change name
from "fusb302-typec-source" to "tcpm-source-psy-i2c-fusb302".

Fixes: f2a8aa053c17 ("typec: tcpm: Represent source supply through...")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Adam Thomson <Adam.Thomson.Opensource@diasemi.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-cht-wc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-cht-wc.c b/drivers/i2c/busses/i2c-cht-wc.c
index 44cffad43701..c4d176f5ed79 100644
--- a/drivers/i2c/busses/i2c-cht-wc.c
+++ b/drivers/i2c/busses/i2c-cht-wc.c
@@ -234,7 +234,8 @@ static const struct irq_chip cht_wc_i2c_irq_chip = {
 	.name			= "cht_wc_ext_chrg_irq_chip",
 };
 
-static const char * const bq24190_suppliers[] = { "fusb302-typec-source" };
+static const char * const bq24190_suppliers[] = {
+	"tcpm-source-psy-i2c-fusb302" };
 
 static const struct property_entry bq24190_props[] = {
 	PROPERTY_ENTRY_STRING_ARRAY("supplied-from", bq24190_suppliers),

From ba44579141f9e2c0229e6e7eeb00b5fa68f0f74a Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Wed, 27 Jun 2018 15:15:40 +0300
Subject: [PATCH 198/382] ahci: Add Intel Ice Lake LP PCI ID

This should also be using the default LPM policy for mobile chipsets so
add the PCI ID to the driver list of supported devices.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/ata/ahci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index fdeb3b4d0f4a..b2b9eba1d214 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -400,6 +400,7 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, 0x0f23), board_ahci_mobile }, /* Bay Trail AHCI */
 	{ PCI_VDEVICE(INTEL, 0x22a3), board_ahci_mobile }, /* Cherry Tr. AHCI */
 	{ PCI_VDEVICE(INTEL, 0x5ae3), board_ahci_mobile }, /* ApolloLake AHCI */
+	{ PCI_VDEVICE(INTEL, 0x34d3), board_ahci_mobile }, /* Ice Lake LP AHCI */
 
 	/* JMicron 360/1/3/5/6, match class to avoid IDE function */
 	{ PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,

From b320a0a9f23c98f21631eb27bcbbca91c79b1c6e Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Tue, 26 Jun 2018 20:56:54 +0900
Subject: [PATCH 199/382] ata: Fix ZBC_OUT command block check

The block (LBA) specified must not exceed the last addressable LBA,
which is dev->nr_sectors - 1. So fix the correct check is
"if (block >= dev->n_sectors)" and not "if (block > dev->n_sectords)".

Additionally, the asc/ascq to return for an LBA that is not a zone start
LBA should be ILLEGAL REQUEST, regardless if the bad LBA is out of
range.

Reported-by: David Butterfield <david.butterfield@wdc.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Cc: stable@vger.kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-scsi.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 6a91d04351d9..a5543751f446 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -3805,8 +3805,13 @@ static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc)
 		 */
 		goto invalid_param_len;
 	}
-	if (block > dev->n_sectors)
-		goto out_of_range;
+	if (block >= dev->n_sectors) {
+		/*
+		 * Block must be a valid zone ID (a zone start LBA).
+		 */
+		fp = 2;
+		goto invalid_fld;
+	}
 
 	all = cdb[14] & 0x1;
 
@@ -3837,10 +3842,6 @@ static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc)
  invalid_fld:
 	ata_scsi_set_invalid_field(qc->dev, scmd, fp, 0xff);
 	return 1;
- out_of_range:
-	/* "Logical Block Address out of range" */
-	ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x21, 0x00);
-	return 1;
 invalid_param_len:
 	/* "Parameter list length error" */
 	ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x1a, 0x0);

From 6edf1d4cb0acde3a0a5dac849f33031bd7abb7b1 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Tue, 26 Jun 2018 20:56:55 +0900
Subject: [PATCH 200/382] ata: Fix ZBC_OUT all bit handling

If the ALL bit is set in the ZBC_OUT command, the command zone ID field
(block) should be ignored.

Reported-by: David Butterfield <david.butterfield@wdc.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Cc: stable@vger.kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-scsi.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index a5543751f446..aad1b01447de 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -3805,7 +3805,14 @@ static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc)
 		 */
 		goto invalid_param_len;
 	}
-	if (block >= dev->n_sectors) {
+
+	all = cdb[14] & 0x1;
+	if (all) {
+		/*
+		 * Ignore the block address (zone ID) as defined by ZBC.
+		 */
+		block = 0;
+	} else if (block >= dev->n_sectors) {
 		/*
 		 * Block must be a valid zone ID (a zone start LBA).
 		 */
@@ -3813,8 +3820,6 @@ static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc)
 		goto invalid_fld;
 	}
 
-	all = cdb[14] & 0x1;
-
 	if (ata_ncq_enabled(qc->dev) &&
 	    ata_fpdma_zac_mgmt_out_supported(qc->dev)) {
 		tf->protocol = ATA_PROT_NCQ_NODATA;

From 718b5406cd76f1aa6434311241b7febf0e8571ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Fri, 29 Jun 2018 16:27:10 +0200
Subject: [PATCH 201/382] drm: Use kvzalloc for allocating blob property memory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The property size may be controlled by userspace, can be large (I've
seen failure with order 4, i.e. 16 pages / 64 KB) and doesn't need to be
physically contiguous.

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180629142710.2069-1-michel@daenzer.net
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/drm_property.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c
index 1f8031e30f53..cdb10f885a4f 100644
--- a/drivers/gpu/drm/drm_property.c
+++ b/drivers/gpu/drm/drm_property.c
@@ -532,7 +532,7 @@ static void drm_property_free_blob(struct kref *kref)
 
 	drm_mode_object_unregister(blob->dev, &blob->base);
 
-	kfree(blob);
+	kvfree(blob);
 }
 
 /**
@@ -559,7 +559,7 @@ drm_property_create_blob(struct drm_device *dev, size_t length,
 	if (!length || length > ULONG_MAX - sizeof(struct drm_property_blob))
 		return ERR_PTR(-EINVAL);
 
-	blob = kzalloc(sizeof(struct drm_property_blob)+length, GFP_KERNEL);
+	blob = kvzalloc(sizeof(struct drm_property_blob)+length, GFP_KERNEL);
 	if (!blob)
 		return ERR_PTR(-ENOMEM);
 
@@ -576,7 +576,7 @@ drm_property_create_blob(struct drm_device *dev, size_t length,
 	ret = __drm_mode_object_add(dev, &blob->base, DRM_MODE_OBJECT_BLOB,
 				    true, drm_property_free_blob);
 	if (ret) {
-		kfree(blob);
+		kvfree(blob);
 		return ERR_PTR(-EINVAL);
 	}
 

From 0859df22ab7cfb3ad2df2caed76cadce6ac33a80 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 28 Jun 2018 12:38:53 -0500
Subject: [PATCH 202/382] drm/amdgpu: fix swapped emit_ib_size in vce3

The phys and vm versions had the values swapped.

Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 0999c843f623..a71b97519cc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -900,7 +900,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
 	.emit_frame_size =
 		4 + /* vce_v3_0_emit_pipeline_sync */
 		6, /* amdgpu_vce_ring_emit_fence x1 no user fence */
-	.emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
+	.emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
 	.emit_ib = amdgpu_vce_ring_emit_ib,
 	.emit_fence = amdgpu_vce_ring_emit_fence,
 	.test_ring = amdgpu_vce_ring_test_ring,
@@ -924,7 +924,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = {
 		6 + /* vce_v3_0_emit_vm_flush */
 		4 + /* vce_v3_0_emit_pipeline_sync */
 		6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */
-	.emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */
+	.emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */
 	.emit_ib = vce_v3_0_ring_emit_ib,
 	.emit_vm_flush = vce_v3_0_emit_vm_flush,
 	.emit_pipeline_sync = vce_v3_0_emit_pipeline_sync,

From c3d0280be8d5c6de9e3848e280493f561799bb67 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 28 Jun 2018 12:44:25 -0500
Subject: [PATCH 203/382] drm/amdgpu/pm: fix display count in non-DC path

new_active_crtcs is a bitmask, new_active_crtc_count is the
actual count.

Reviewed-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index b455da487782..fc818b4d849c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -1882,7 +1882,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 		if (!amdgpu_device_has_dc_support(adev)) {
 			mutex_lock(&adev->pm.mutex);
 			amdgpu_dpm_get_active_displays(adev);
-			adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtcs;
+			adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtc_count;
 			adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev);
 			adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev);
 			/* we have issues with mclk switching with refresh rates over 120 hz on the non-DC code. */

From d55bac2754476624f23bdf3b908d117f3cdf469b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 22 Jun 2018 13:03:07 +0200
Subject: [PATCH 204/382] ata: Remove depends on HAS_DMA in case of platform
 dependency

Remove dependencies on HAS_DMA where a Kconfig symbol depends on another
symbol that implies HAS_DMA, and, optionally, on "|| COMPILE_TEST".
In most cases this other symbol is an architecture or platform specific
symbol, or PCI.

Generic symbols and drivers without platform dependencies keep their
dependencies on HAS_DMA, to prevent compiling subsystems or drivers that
cannot work anyway.

This simplifies the dependencies, and allows to improve compile-testing.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Acked-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/Kconfig | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 2b16e7c8fff3..39b181d6bd0d 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -398,7 +398,6 @@ config SATA_DWC_VDEBUG
 
 config SATA_HIGHBANK
 	tristate "Calxeda Highbank SATA support"
-	depends on HAS_DMA
 	depends on ARCH_HIGHBANK || COMPILE_TEST
 	help
 	  This option enables support for the Calxeda Highbank SoC's
@@ -408,7 +407,6 @@ config SATA_HIGHBANK
 
 config SATA_MV
 	tristate "Marvell SATA support"
-	depends on HAS_DMA
 	depends on PCI || ARCH_DOVE || ARCH_MV78XX0 || \
 		   ARCH_MVEBU || ARCH_ORION5X || COMPILE_TEST
 	select GENERIC_PHY

From 30998033f62a9915e0f884d680569a39bc9ce133 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <Sudeep.Holla@arm.com>
Date: Fri, 29 Jun 2018 17:17:57 +0100
Subject: [PATCH 205/382] ACPI / PPTT: use ACPI ID whenever
 ACPI_PPTT_ACPI_PROCESSOR_ID_VALID is set
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, we use the ACPI processor ID only for the leaf/processor nodes
as the specification states it must match the value of the ACPI processor
ID field in the processor’s entry in the MADT.

However, if a PPTT structure represents a processors group, it
matches a processor container UID in the namespace and the
ACPI_PPTT_ACPI_PROCESSOR_ID_VALID flag indicates whether the
ACPI processor ID is valid.

Let's use UID whenever ACPI_PPTT_ACPI_PROCESSOR_ID_VALID is set to be
consistent instead of using table offset as it's currently done for
non-leaf nodes.

Fixes: 2bd00bcd73e5 (ACPI/PPTT: Add Processor Properties Topology Table parsing)
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Jeremy Linton <jeremy.linton@arm.com>
[ rjw: Changelog (minor) ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/pptt.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
index e5ea1974d1e3..d1e26cb599bf 100644
--- a/drivers/acpi/pptt.c
+++ b/drivers/acpi/pptt.c
@@ -481,8 +481,14 @@ static int topology_get_acpi_cpu_tag(struct acpi_table_header *table,
 	if (cpu_node) {
 		cpu_node = acpi_find_processor_package_id(table, cpu_node,
 							  level, flag);
-		/* Only the first level has a guaranteed id */
-		if (level == 0)
+		/*
+		 * As per specification if the processor structure represents
+		 * an actual processor, then ACPI processor ID must be valid.
+		 * For processor containers ACPI_PPTT_ACPI_PROCESSOR_ID_VALID
+		 * should be set if the UID is valid
+		 */
+		if (level == 0 ||
+		    cpu_node->flags & ACPI_PPTT_ACPI_PROCESSOR_ID_VALID)
 			return cpu_node->acpi_processor_id;
 		return ACPI_PTR_DIFF(cpu_node, table);
 	}

From a17712c8e4be4fa5404d20e9cd3b2b21eae7bc56 Mon Sep 17 00:00:00 2001
From: Jon Derrick <jonathan.derrick@intel.com>
Date: Mon, 2 Jul 2018 18:45:18 -0400
Subject: [PATCH 206/382] ext4: check superblock mapped prior to committing

This patch attempts to close a hole leading to a BUG seen with hot
removals during writes [1].

A block device (NVME namespace in this test case) is formatted to EXT4
without partitions. It's mounted and write I/O is run to a file, then
the device is hot removed from the slot. The superblock attempts to be
written to the drive which is no longer present.

The typical chain of events leading to the BUG:
ext4_commit_super()
  __sync_dirty_buffer()
    submit_bh()
      submit_bh_wbc()
        BUG_ON(!buffer_mapped(bh));

This fix checks for the superblock's buffer head being mapped prior to
syncing.

[1] https://www.spinics.net/lists/linux-ext4/msg56527.html

Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/ext4/super.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b37b00befd65..f36c9f774e83 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4808,6 +4808,14 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 
 	if (!sbh || block_device_ejected(sb))
 		return error;
+
+	/*
+	 * The superblock bh should be mapped, but it might not be if the
+	 * device was hot-removed. Not much we can do but fail the I/O.
+	 */
+	if (!buffer_mapped(sbh))
+		return error;
+
 	/*
 	 * If the file system is mounted read-only, don't update the
 	 * superblock write time.  This avoids updating the superblock

From 60feca8f59a0bbdee9545ab52775cad0171151ff Mon Sep 17 00:00:00 2001
From: Anson Huang <Anson.Huang@nxp.com>
Date: Mon, 2 Jul 2018 09:27:47 +0800
Subject: [PATCH 207/382] mmc: core: cd_label must be last entry of mmc_gpio
 struct
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit bfd694d5e21c ("mmc: core: Add tunable delay
before detecting card after card is inserted") adds
"u32 cd_debounce_delay_ms" to the last of mmc_gpio
struct and cause "char cd_label[0]" NOT work as string
pointer of card detect label, when "cat /proc/interrupts",
the devname for card detect gpio is incorrect as below:

144:          0  gpio-mxc  22 Edge      ▒
161:          0  gpio-mxc   7 Edge      ▒

Move the cd_label field down to fix this, and drop the
zero from the array size to prevent future similar bugs,
the result is correct as below:

144:          0  gpio-mxc  22 Edge      2198000.mmc cd
161:          0  gpio-mxc   7 Edge      2190000.mmc cd

Fixes: bfd694d5e21c ("mmc: core: Add tunable delay before detecting card after card is inserted")
Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Tested-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/slot-gpio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index ef05e0039378..2a833686784b 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -27,8 +27,8 @@ struct mmc_gpio {
 	bool override_cd_active_level;
 	irqreturn_t (*cd_gpio_isr)(int irq, void *dev_id);
 	char *ro_label;
-	char cd_label[0];
 	u32 cd_debounce_delay_ms;
+	char cd_label[];
 };
 
 static irqreturn_t mmc_gpio_cd_irqt(int irq, void *dev_id)

From 5d1c115241e4aa0034ddf44693c6bd2e89e10831 Mon Sep 17 00:00:00 2001
From: Robert Nelson <robertcnelson@gmail.com>
Date: Mon, 2 Jul 2018 15:21:43 -0500
Subject: [PATCH 208/382] ARM: dts: am335x-bone-common: Fix mmc0 Write Protect

Mainline Commit b74c2b21e1551018af53ee6c1efc051dfce2d788 added the pinmux
settings for mmc1, however this pin (0x9a0) is routed to P9_42 on the cape
header. Thus any BeagleBone cape that utilizes P9_42 triggers mmc0's Write
Protect.

Fixes: b74c2b21e155 ("ARM: dts: am33xx: Add pinmux data for mmc1 in
am335x-evm, evmsk and beaglebone")
Signed-off-by: Robert Nelson <robertcnelson@gmail.com>
CC: Faiz Abbas <faiz_abbas@ti.com>
CC: Tony Lindgren <tony@atomide.com>
CC: Jason Kridner <jkridner@beagleboard.org>
CC: Drew Fustini <drew@beagleboard.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am335x-bone-common.dtsi | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi
index f9e8667f5886..73b514dddf65 100644
--- a/arch/arm/boot/dts/am335x-bone-common.dtsi
+++ b/arch/arm/boot/dts/am335x-bone-common.dtsi
@@ -168,7 +168,6 @@ AM33XX_IOPAD(0x8f4, PIN_INPUT_PULLUP | MUX_MODE0)	/* mmc0_dat2.mmc0_dat2 */
 			AM33XX_IOPAD(0x8f0, PIN_INPUT_PULLUP | MUX_MODE0)	/* mmc0_dat3.mmc0_dat3 */
 			AM33XX_IOPAD(0x904, PIN_INPUT_PULLUP | MUX_MODE0)	/* mmc0_cmd.mmc0_cmd */
 			AM33XX_IOPAD(0x900, PIN_INPUT_PULLUP | MUX_MODE0)	/* mmc0_clk.mmc0_clk */
-			AM33XX_IOPAD(0x9a0, PIN_INPUT | MUX_MODE4)		/* mcasp0_aclkr.mmc0_sdwp */
 		>;
 	};
 

From d9c0ffcabd6aae7ff1e34e8078354c13bb9f1183 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Thu, 28 Jun 2018 18:29:41 +0200
Subject: [PATCH 209/382] sched/nohz: Skip remote tick on idle task entirely

Some people have reported that the warning in sched_tick_remote()
occasionally triggers, especially in favour of some RCU-Torture
pressure:

	WARNING: CPU: 11 PID: 906 at kernel/sched/core.c:3138 sched_tick_remote+0xb6/0xc0
	Modules linked in:
	CPU: 11 PID: 906 Comm: kworker/u32:3 Not tainted 4.18.0-rc2+ #1
	Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
	Workqueue: events_unbound sched_tick_remote
	RIP: 0010:sched_tick_remote+0xb6/0xc0
	Code: e8 0f 06 b8 00 c6 03 00 fb eb 9d 8b 43 04 85 c0 75 8d 48 8b 83 e0 0a 00 00 48 85 c0 75 81 eb 88 48 89 df e8 bc fe ff ff eb aa <0f> 0b eb
	+c5 66 0f 1f 44 00 00 bf 17 00 00 00 e8 b6 2e fe ff 0f b6
	Call Trace:
	 process_one_work+0x1df/0x3b0
	 worker_thread+0x44/0x3d0
	 kthread+0xf3/0x130
	 ? set_worker_desc+0xb0/0xb0
	 ? kthread_create_worker_on_cpu+0x70/0x70
	 ret_from_fork+0x35/0x40

This happens when the remote tick applies on an idle task. Usually the
idle_cpu() check avoids that, but it is performed before we lock the
runqueue and it is therefore racy. It was intended to be that way in
order to prevent from useless runqueue locks since idle task tick
callback is a no-op.

Now if the racy check slips out of our hands and we end up remotely
ticking an idle task, the empty task_tick_idle() is harmless. Still
it won't pass the WARN_ON_ONCE() test that ensures rq_clock_task() is
not too far from curr->se.exec_start because update_curr_idle() doesn't
update the exec_start value like other scheduler policies. Hence the
reported false positive.

So let's have another check, while the rq is locked, to make sure we
don't remote tick on an idle task. The lockless idle_cpu() still applies
to avoid unecessary rq lock contention.

Reported-by: Jacek Tomaka <jacekt@dug.com>
Reported-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reported-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1530203381-31234-1-git-send-email-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 78d8facba456..22fce36426c0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3113,7 +3113,9 @@ static void sched_tick_remote(struct work_struct *work)
 	struct tick_work *twork = container_of(dwork, struct tick_work, work);
 	int cpu = twork->cpu;
 	struct rq *rq = cpu_rq(cpu);
+	struct task_struct *curr;
 	struct rq_flags rf;
+	u64 delta;
 
 	/*
 	 * Handle the tick only if it appears the remote CPU is running in full
@@ -3122,24 +3124,28 @@ static void sched_tick_remote(struct work_struct *work)
 	 * statistics and checks timeslices in a time-independent way, regardless
 	 * of when exactly it is running.
 	 */
-	if (!idle_cpu(cpu) && tick_nohz_tick_stopped_cpu(cpu)) {
-		struct task_struct *curr;
-		u64 delta;
+	if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu))
+		goto out_requeue;
 
-		rq_lock_irq(rq, &rf);
-		update_rq_clock(rq);
-		curr = rq->curr;
-		delta = rq_clock_task(rq) - curr->se.exec_start;
+	rq_lock_irq(rq, &rf);
+	curr = rq->curr;
+	if (is_idle_task(curr))
+		goto out_unlock;
 
-		/*
-		 * Make sure the next tick runs within a reasonable
-		 * amount of time.
-		 */
-		WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
-		curr->sched_class->task_tick(rq, curr, 0);
-		rq_unlock_irq(rq, &rf);
-	}
+	update_rq_clock(rq);
+	delta = rq_clock_task(rq) - curr->se.exec_start;
 
+	/*
+	 * Make sure the next tick runs within a reasonable
+	 * amount of time.
+	 */
+	WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
+	curr->sched_class->task_tick(rq, curr, 0);
+
+out_unlock:
+	rq_unlock_irq(rq, &rf);
+
+out_requeue:
 	/*
 	 * Run the remote tick once per second (1Hz). This arbitrary
 	 * frequency is large enough to avoid overload but short enough

From 296b2ffe7fa9ed756c41415c6b1512bc4ad687b1 Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Tue, 26 Jun 2018 15:53:22 +0200
Subject: [PATCH 210/382] sched/rt: Fix call to cpufreq_update_util()

With commit:

  8f111bc357aa ("cpufreq/schedutil: Rewrite CPUFREQ_RT support")

the schedutil governor uses rq->rt.rt_nr_running to detect whether an
RT task is currently running on the CPU and to set frequency to max
if necessary.

cpufreq_update_util() is called in enqueue/dequeue_top_rt_rq() but
rq->rt.rt_nr_running has not been updated yet when dequeue_top_rt_rq() is
called so schedutil still considers that an RT task is running when the
last task is dequeued. The update of rq->rt.rt_nr_running happens later
in dequeue_rt_stack().

In fact, we can take advantage of the sequence that the dequeue then
re-enqueue rt entities when a rt task is enqueued or dequeued;
As a result enqueue_top_rt_rq() is always called when a task is
enqueued or dequeued and also when groups are throttled or unthrottled.
The only place that not use enqueue_top_rt_rq() is when root rt_rq is
throttled.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: efault@gmx.de
Cc: juri.lelli@redhat.com
Cc: patrick.bellasi@arm.com
Cc: viresh.kumar@linaro.org
Fixes: 8f111bc357aa ('cpufreq/schedutil: Rewrite CPUFREQ_RT support')
Link: http://lkml.kernel.org/r/1530021202-21695-1-git-send-email-vincent.guittot@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/cpufreq_schedutil.c |  2 +-
 kernel/sched/rt.c                | 16 ++++++++++------
 kernel/sched/sched.h             |  5 +++++
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 3cde46483f0a..c907fde01eaa 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -192,7 +192,7 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
 {
 	struct rq *rq = cpu_rq(sg_cpu->cpu);
 
-	if (rq->rt.rt_nr_running)
+	if (rt_rq_is_runnable(&rq->rt))
 		return sg_cpu->max;
 
 	/*
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 47556b0c9a95..572567078b60 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -508,8 +508,11 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 
 	rt_se = rt_rq->tg->rt_se[cpu];
 
-	if (!rt_se)
+	if (!rt_se) {
 		dequeue_top_rt_rq(rt_rq);
+		/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
+		cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
+	}
 	else if (on_rt_rq(rt_se))
 		dequeue_rt_entity(rt_se, 0);
 }
@@ -1001,8 +1004,6 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)
 	sub_nr_running(rq, rt_rq->rt_nr_running);
 	rt_rq->rt_queued = 0;
 
-	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
-	cpufreq_update_util(rq, 0);
 }
 
 static void
@@ -1014,11 +1015,14 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq)
 
 	if (rt_rq->rt_queued)
 		return;
-	if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running)
+
+	if (rt_rq_throttled(rt_rq))
 		return;
 
-	add_nr_running(rq, rt_rq->rt_nr_running);
-	rt_rq->rt_queued = 1;
+	if (rt_rq->rt_nr_running) {
+		add_nr_running(rq, rt_rq->rt_nr_running);
+		rt_rq->rt_queued = 1;
+	}
 
 	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
 	cpufreq_update_util(rq, 0);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6601baf2361c..27ddec334601 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -609,6 +609,11 @@ struct rt_rq {
 #endif
 };
 
+static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)
+{
+	return rt_rq->rt_queued && rt_rq->rt_nr_running;
+}
+
 /* Deadline class' related fields in a runqueue */
 struct dl_rq {
 	/* runqueue is an rbtree, ordered by deadline */

From 512ac999d2755d2b7109e996a76b6fb8b888631d Mon Sep 17 00:00:00 2001
From: Xunlei Pang <xlpang@linux.alibaba.com>
Date: Wed, 20 Jun 2018 18:18:33 +0800
Subject: [PATCH 211/382] sched/fair: Fix bandwidth timer clock drift condition

I noticed that cgroup task groups constantly get throttled even
if they have low CPU usage, this causes some jitters on the response
time to some of our business containers when enabling CPU quotas.

It's very simple to reproduce:

  mkdir /sys/fs/cgroup/cpu/test
  cd /sys/fs/cgroup/cpu/test
  echo 100000 > cpu.cfs_quota_us
  echo $$ > tasks

then repeat:

  cat cpu.stat | grep nr_throttled  # nr_throttled will increase steadily

After some analysis, we found that cfs_rq::runtime_remaining will
be cleared by expire_cfs_rq_runtime() due to two equal but stale
"cfs_{b|q}->runtime_expires" after period timer is re-armed.

The current condition to judge clock drift in expire_cfs_rq_runtime()
is wrong, the two runtime_expires are actually the same when clock
drift happens, so this condtion can never hit. The orginal design was
correctly done by this commit:

  a9cf55b28610 ("sched: Expire invalid runtime")

... but was changed to be the current implementation due to its locking bug.

This patch introduces another way, it adds a new field in both structures
cfs_rq and cfs_bandwidth to record the expiration update sequence, and
uses them to figure out if clock drift happens (true if they are equal).

Signed-off-by: Xunlei Pang <xlpang@linux.alibaba.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ben Segall <bsegall@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 51f2176d74ac ("sched/fair: Fix unlocked reads of some cfs_b->quota/period")
Link: http://lkml.kernel.org/r/20180620101834.24455-1-xlpang@linux.alibaba.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c  | 14 ++++++++------
 kernel/sched/sched.h |  6 ++++--
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1866e64792a7..791707c56886 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4590,6 +4590,7 @@ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
 	now = sched_clock_cpu(smp_processor_id());
 	cfs_b->runtime = cfs_b->quota;
 	cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period);
+	cfs_b->expires_seq++;
 }
 
 static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
@@ -4612,6 +4613,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 	struct task_group *tg = cfs_rq->tg;
 	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
 	u64 amount = 0, min_amount, expires;
+	int expires_seq;
 
 	/* note: this is a positive sum as runtime_remaining <= 0 */
 	min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining;
@@ -4628,6 +4630,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 			cfs_b->idle = 0;
 		}
 	}
+	expires_seq = cfs_b->expires_seq;
 	expires = cfs_b->runtime_expires;
 	raw_spin_unlock(&cfs_b->lock);
 
@@ -4637,8 +4640,10 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 	 * spread between our sched_clock and the one on which runtime was
 	 * issued.
 	 */
-	if ((s64)(expires - cfs_rq->runtime_expires) > 0)
+	if (cfs_rq->expires_seq != expires_seq) {
+		cfs_rq->expires_seq = expires_seq;
 		cfs_rq->runtime_expires = expires;
+	}
 
 	return cfs_rq->runtime_remaining > 0;
 }
@@ -4664,12 +4669,9 @@ static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 	 * has not truly expired.
 	 *
 	 * Fortunately we can check determine whether this the case by checking
-	 * whether the global deadline has advanced. It is valid to compare
-	 * cfs_b->runtime_expires without any locks since we only care about
-	 * exact equality, so a partial write will still work.
+	 * whether the global deadline(cfs_b->expires_seq) has advanced.
 	 */
-
-	if (cfs_rq->runtime_expires != cfs_b->runtime_expires) {
+	if (cfs_rq->expires_seq == cfs_b->expires_seq) {
 		/* extend local deadline, drift is bounded above by 2 ticks */
 		cfs_rq->runtime_expires += TICK_NSEC;
 	} else {
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 27ddec334601..c7742dcc136c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -334,9 +334,10 @@ struct cfs_bandwidth {
 	u64			runtime;
 	s64			hierarchical_quota;
 	u64			runtime_expires;
+	int			expires_seq;
 
-	int			idle;
-	int			period_active;
+	short			idle;
+	short			period_active;
 	struct hrtimer		period_timer;
 	struct hrtimer		slack_timer;
 	struct list_head	throttled_cfs_rq;
@@ -551,6 +552,7 @@ struct cfs_rq {
 
 #ifdef CONFIG_CFS_BANDWIDTH
 	int			runtime_enabled;
+	int			expires_seq;
 	u64			runtime_expires;
 	s64			runtime_remaining;
 

From f1d1be8aee6c461652aea8f58bedebaa73d7f4d3 Mon Sep 17 00:00:00 2001
From: Xunlei Pang <xlpang@linux.alibaba.com>
Date: Wed, 20 Jun 2018 18:18:34 +0800
Subject: [PATCH 212/382] sched/fair: Advance global expiration when period
 timer is restarted

When period gets restarted after some idle time, start_cfs_bandwidth()
doesn't update the expiration information, expire_cfs_rq_runtime() will
see cfs_rq->runtime_expires smaller than rq clock and go to the clock
drift logic, wasting needless CPU cycles on the scheduler hot path.

Update the global expiration in start_cfs_bandwidth() to avoid frequent
expire_cfs_rq_runtime() calls once a new period begins.

Signed-off-by: Xunlei Pang <xlpang@linux.alibaba.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ben Segall <bsegall@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180620101834.24455-2-xlpang@linux.alibaba.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 791707c56886..840b92ee6f89 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5204,13 +5204,18 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 
 void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
 {
+	u64 overrun;
+
 	lockdep_assert_held(&cfs_b->lock);
 
-	if (!cfs_b->period_active) {
-		cfs_b->period_active = 1;
-		hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
-		hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
-	}
+	if (cfs_b->period_active)
+		return;
+
+	cfs_b->period_active = 1;
+	overrun = hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
+	cfs_b->runtime_expires += (overrun + 1) * ktime_to_ns(cfs_b->period);
+	cfs_b->expires_seq++;
+	hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
 }
 
 static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)

From 3482d98bbc730758b63a5d1cf41d05ea17481412 Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Thu, 14 Jun 2018 12:33:00 +0200
Subject: [PATCH 213/382] sched/util_est: Fix util_est_dequeue() for throttled
 cfs_rq

When a cfs_rq is throttled, parent cfs_rq->nr_running is decreased and
everything happens at cfs_rq level. Currently util_est stays unchanged
in such case and it keeps accounting the utilization of throttled tasks.
This can somewhat make sense as we don't dequeue tasks but only throttled
cfs_rq.

If a task of another group is enqueued/dequeued and root cfs_rq becomes
idle during the dequeue, util_est will be cleared whereas it was
accounting util_est of throttled tasks before. So the behavior of util_est
is not always the same regarding throttled tasks and depends of side
activity. Furthermore, util_est will not be updated when the cfs_rq is
unthrottled as everything happens at cfs_rq level. Main results is that
util_est will stay null whereas we now have running tasks. We have to wait
for the next dequeue/enqueue of the previously throttled tasks to get an
up to date util_est.

Remove the assumption that cfs_rq's estimated utilization of a CPU is 0
if there is no running task so the util_est of a task remains until the
latter is dequeued even if its cfs_rq has been throttled.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Patrick Bellasi <patrick.bellasi@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 7f65ea42eb00 ("sched/fair: Add util_est on top of PELT")
Link: http://lkml.kernel.org/r/1528972380-16268-1-git-send-email-vincent.guittot@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 840b92ee6f89..2f0a0be4d344 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3982,18 +3982,10 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep)
 	if (!sched_feat(UTIL_EST))
 		return;
 
-	/*
-	 * Update root cfs_rq's estimated utilization
-	 *
-	 * If *p is the last task then the root cfs_rq's estimated utilization
-	 * of a CPU is 0 by definition.
-	 */
-	ue.enqueued = 0;
-	if (cfs_rq->nr_running) {
-		ue.enqueued  = cfs_rq->avg.util_est.enqueued;
-		ue.enqueued -= min_t(unsigned int, ue.enqueued,
-				     (_task_util_est(p) | UTIL_AVG_UNCHANGED));
-	}
+	/* Update root cfs_rq's estimated utilization */
+	ue.enqueued  = cfs_rq->avg.util_est.enqueued;
+	ue.enqueued -= min_t(unsigned int, ue.enqueued,
+			     (_task_util_est(p) | UTIL_AVG_UNCHANGED));
 	WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued);
 
 	/*

From 1cef1150ef40ec52f507436a14230cbc2623299c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 7 Jun 2018 11:45:49 +0200
Subject: [PATCH 214/382] kthread, sched/core: Fix kthread_parkme() (again...)

Gaurav reports that commit:

  85f1abe0019f ("kthread, sched/wait: Fix kthread_parkme() completion issue")

isn't working for him. Because of the following race:

> controller Thread                               CPUHP Thread
> takedown_cpu
> kthread_park
> kthread_parkme
> Set KTHREAD_SHOULD_PARK
>                                                 smpboot_thread_fn
>                                                 set Task interruptible
>
>
> wake_up_process
>  if (!(p->state & state))
>                 goto out;
>
>                                                 Kthread_parkme
>                                                 SET TASK_PARKED
>                                                 schedule
>                                                 raw_spin_lock(&rq->lock)
> ttwu_remote
> waiting for __task_rq_lock
>                                                 context_switch
>
>                                                 finish_lock_switch
>
>
>
>                                                 Case TASK_PARKED
>                                                 kthread_park_complete
>
>
> SET Running

Furthermore, Oleg noticed that the whole scheduler TASK_PARKED
handling is buggered because the TASK_DEAD thing is done with
preemption disabled, the current code can still complete early on
preemption :/

So basically revert that earlier fix and go with a variant of the
alternative mentioned in the commit. Promote TASK_PARKED to special
state to avoid the store-store issue on task->state leading to the
WARN in kthread_unpark() -> __kthread_bind().

But in addition, add wait_task_inactive() to kthread_park() to ensure
the task really is PARKED when we return from kthread_park(). This
avoids the whole kthread still gets migrated nonsense -- although it
would be really good to get this done differently.

Reported-by: Gaurav Kohli <gkohli@codeaurora.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 85f1abe0019f ("kthread, sched/wait: Fix kthread_parkme() completion issue")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kthread.h |  1 -
 include/linux/sched.h   |  2 +-
 kernel/kthread.c        | 30 ++++++++++++++++++++++++------
 kernel/sched/core.c     | 31 +++++++++++--------------------
 4 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 2803264c512f..c1961761311d 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -62,7 +62,6 @@ void *kthread_probe_data(struct task_struct *k);
 int kthread_park(struct task_struct *k);
 void kthread_unpark(struct task_struct *k);
 void kthread_parkme(void);
-void kthread_park_complete(struct task_struct *k);
 
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9256118bd40c..43731fe51c97 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -118,7 +118,7 @@ struct task_group;
  * the comment with set_special_state().
  */
 #define is_special_task_state(state)				\
-	((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD))
+	((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD))
 
 #define __set_current_state(state_value)			\
 	do {							\
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 481951bf091d..750cb8082694 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -177,9 +177,20 @@ void *kthread_probe_data(struct task_struct *task)
 static void __kthread_parkme(struct kthread *self)
 {
 	for (;;) {
-		set_current_state(TASK_PARKED);
+		/*
+		 * TASK_PARKED is a special state; we must serialize against
+		 * possible pending wakeups to avoid store-store collisions on
+		 * task->state.
+		 *
+		 * Such a collision might possibly result in the task state
+		 * changin from TASK_PARKED and us failing the
+		 * wait_task_inactive() in kthread_park().
+		 */
+		set_special_state(TASK_PARKED);
 		if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
 			break;
+
+		complete_all(&self->parked);
 		schedule();
 	}
 	__set_current_state(TASK_RUNNING);
@@ -191,11 +202,6 @@ void kthread_parkme(void)
 }
 EXPORT_SYMBOL_GPL(kthread_parkme);
 
-void kthread_park_complete(struct task_struct *k)
-{
-	complete_all(&to_kthread(k)->parked);
-}
-
 static int kthread(void *_create)
 {
 	/* Copy data: it's on kthread's stack */
@@ -461,6 +467,9 @@ void kthread_unpark(struct task_struct *k)
 
 	reinit_completion(&kthread->parked);
 	clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+	/*
+	 * __kthread_parkme() will either see !SHOULD_PARK or get the wakeup.
+	 */
 	wake_up_state(k, TASK_PARKED);
 }
 EXPORT_SYMBOL_GPL(kthread_unpark);
@@ -487,7 +496,16 @@ int kthread_park(struct task_struct *k)
 	set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
 	if (k != current) {
 		wake_up_process(k);
+		/*
+		 * Wait for __kthread_parkme() to complete(), this means we
+		 * _will_ have TASK_PARKED and are about to call schedule().
+		 */
 		wait_for_completion(&kthread->parked);
+		/*
+		 * Now wait for that schedule() to complete and the task to
+		 * get scheduled out.
+		 */
+		WARN_ON_ONCE(!wait_task_inactive(k, TASK_PARKED));
 	}
 
 	return 0;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 22fce36426c0..fe365c9a08e9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7,7 +7,6 @@
  */
 #include "sched.h"
 
-#include <linux/kthread.h>
 #include <linux/nospec.h>
 
 #include <linux/kcov.h>
@@ -2724,28 +2723,20 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 		membarrier_mm_sync_core_before_usermode(mm);
 		mmdrop(mm);
 	}
-	if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) {
-		switch (prev_state) {
-		case TASK_DEAD:
-			if (prev->sched_class->task_dead)
-				prev->sched_class->task_dead(prev);
+	if (unlikely(prev_state == TASK_DEAD)) {
+		if (prev->sched_class->task_dead)
+			prev->sched_class->task_dead(prev);
 
-			/*
-			 * Remove function-return probe instances associated with this
-			 * task and put them back on the free list.
-			 */
-			kprobe_flush_task(prev);
+		/*
+		 * Remove function-return probe instances associated with this
+		 * task and put them back on the free list.
+		 */
+		kprobe_flush_task(prev);
 
-			/* Task is done with its stack. */
-			put_task_stack(prev);
+		/* Task is done with its stack. */
+		put_task_stack(prev);
 
-			put_task_struct(prev);
-			break;
-
-		case TASK_PARKED:
-			kthread_park_complete(prev);
-			break;
-		}
+		put_task_struct(prev);
 	}
 
 	tick_nohz_task_switch();

From 4fb5f58e8d191f7c81637ad81284e4848afb4244 Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Date: Mon, 2 Jul 2018 23:49:54 -0700
Subject: [PATCH 215/382] x86/mm/32: Initialize the CR4 shadow before
 __flush_tlb_all()

On 32-bit kernels, __flush_tlb_all() may have read the CR4 shadow before the
initialization of CR4 shadow in cpu_init().

Fix it by adding an explicit cr4_init_shadow() call into start_secondary()
which is the first function called on non-boot SMP CPUs - ahead of the
__flush_tlb_all() call.

( This is somewhat of a layering violation, but start_secondary() does
  CR4 bootstrap in the PCID case anyway. )

Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: http://lkml.kernel.org/r/b07b6ae9-4b57-4b40-b9bc-50c2c67f1d91@default
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/smpboot.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c2f7d1d2a5c3..db9656e13ea0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -221,6 +221,11 @@ static void notrace start_secondary(void *unused)
 #ifdef CONFIG_X86_32
 	/* switch away from the initial page table */
 	load_cr3(swapper_pg_dir);
+	/*
+	 * Initialize the CR4 shadow before doing anything that could
+	 * try to read it.
+	 */
+	cr4_init_shadow();
 	__flush_tlb_all();
 #endif
 	load_current_idt();

From 0144eb204cdcdf09a76794b4a294291388e739bc Mon Sep 17 00:00:00 2001
From: Adam Ford <aford173@gmail.com>
Date: Mon, 25 Jun 2018 07:41:33 -0500
Subject: [PATCH 216/382] ARM: dts: omap3: Fix am3517 mdio and emac clock
 references

A previous patch removed OMAP clock aliases that were perceived
to be unnecessary.  Unfortunately, it broke the ethernet on the
am3517-evm.  This patch enables the MDIO clock and EMAC clock.

Fixes: 0ed266d7ae5e ("clk: ti: omap3: cleanup unnecessary clock aliases")
Cc: stable@vger.kernel.org #4.16+

Signed-off-by: Adam Ford <aford173@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am3517.dtsi | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm/boot/dts/am3517.dtsi b/arch/arm/boot/dts/am3517.dtsi
index ca294914bbb1..4b6062b631b1 100644
--- a/arch/arm/boot/dts/am3517.dtsi
+++ b/arch/arm/boot/dts/am3517.dtsi
@@ -39,6 +39,8 @@ davinci_emac: ethernet@5c000000 {
 			ti,davinci-ctrl-ram-size = <0x2000>;
 			ti,davinci-rmii-en = /bits/ 8 <1>;
 			local-mac-address = [ 00 00 00 00 00 00 ];
+			clocks = <&emac_ick>;
+			clock-names = "ick";
 		};
 
 		davinci_mdio: ethernet@5c030000 {
@@ -49,6 +51,8 @@ davinci_mdio: ethernet@5c030000 {
 			bus_freq = <1000000>;
 			#address-cells = <1>;
 			#size-cells = <0>;
+			clocks = <&emac_fck>;
+			clock-names = "fck";
 		};
 
 		uart4: serial@4809e000 {

From 845d382bb15c6e7dc5026c0ff919c5b13fc7e11b Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Mon, 2 Jul 2018 16:35:53 -0500
Subject: [PATCH 217/382] x86/bugs: Update when to check for the LS_CFG SSBD
 mitigation

If either the X86_FEATURE_AMD_SSBD or X86_FEATURE_VIRT_SSBD features are
present, then there is no need to perform the check for the LS_CFG SSBD
mitigation support.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180702213553.29202.21089.stgit@tlendack-t1.amdoffice.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/amd.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 082d7875cef8..38915fbfae73 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -543,7 +543,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 		nodes_per_socket = ((value >> 3) & 7) + 1;
 	}
 
-	if (c->x86 >= 0x15 && c->x86 <= 0x17) {
+	if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) &&
+	    !boot_cpu_has(X86_FEATURE_VIRT_SSBD) &&
+	    c->x86 >= 0x15 && c->x86 <= 0x17) {
 		unsigned int bit;
 
 		switch (c->x86) {

From 612bc3b3d4be749f73a513a17d9b3ee1330d3487 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Mon, 2 Jul 2018 16:36:02 -0500
Subject: [PATCH 218/382] x86/bugs: Fix the AMD SSBD usage of the SPEC_CTRL MSR

On AMD, the presence of the MSR_SPEC_CTRL feature does not imply that the
SSBD mitigation support should use the SPEC_CTRL MSR. Other features could
have caused the MSR_SPEC_CTRL feature to be set, while a different SSBD
mitigation option is in place.

Update the SSBD support to check for the actual SSBD features that will
use the SPEC_CTRL MSR.

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 6ac2f49edb1e ("x86/bugs: Add AMD's SPEC_CTRL MSR usage")
Link: http://lkml.kernel.org/r/20180702213602.29202.33151.stgit@tlendack-t1.amdoffice.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/bugs.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 404df26b7de8..5c0ea39311fe 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -155,7 +155,8 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
 		guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
 
 		/* SSBD controlled in MSR_SPEC_CTRL */
-		if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
+		if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+		    static_cpu_has(X86_FEATURE_AMD_SSBD))
 			hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
 
 		if (hostval != guestval) {
@@ -533,9 +534,10 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
 		 * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may
 		 * use a completely different MSR and bit dependent on family.
 		 */
-		if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+		if (!static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) &&
+		    !static_cpu_has(X86_FEATURE_AMD_SSBD)) {
 			x86_amd_ssb_disable();
-		else {
+		} else {
 			x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
 			x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
 			wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);

From aa7eee8a143a7e8b530eb1e75fb86cae793d1e21 Mon Sep 17 00:00:00 2001
From: Vignesh R <vigneshr@ti.com>
Date: Sat, 30 Jun 2018 16:24:21 +0530
Subject: [PATCH 219/382] mtd: spi-nor: cadence-quadspi: Fix direct mode write
 timeouts

Sometimes when writing large size files to flash in direct/memory mapped
mode, it is seen that flash write enable command times out with error:
[  503.146293] cadence-qspi 47040000.ospi: Flash command execution timed out.

This is because, we need to make sure previous direct write operation
is complete by polling for IDLE bit in CONFIG_REG before starting the
next operation.

Fix this by polling for IDLE bit after memory mapped write.

Fixes: a27f2eaf2b27 ("mtd: spi-nor: cadence-quadspi: Add support for direct access mode")
Cc: stable@vger.kernel.org
Signed-off-by: Vignesh R <vigneshr@ti.com>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/spi-nor/cadence-quadspi.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c
index c3f7aaa5d18f..d7e10b36a0b9 100644
--- a/drivers/mtd/spi-nor/cadence-quadspi.c
+++ b/drivers/mtd/spi-nor/cadence-quadspi.c
@@ -926,10 +926,12 @@ static ssize_t cqspi_write(struct spi_nor *nor, loff_t to,
 	if (ret)
 		return ret;
 
-	if (f_pdata->use_direct_mode)
+	if (f_pdata->use_direct_mode) {
 		memcpy_toio(cqspi->ahb_base + to, buf, len);
-	else
+		ret = cqspi_wait_idle(cqspi);
+	} else {
 		ret = cqspi_indirect_write_execute(nor, to, buf, len);
+	}
 	if (ret)
 		return ret;
 

From d03db2bc26f0e4a6849ad649a09c9c73fccdc656 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 21 Jun 2018 09:23:22 -0700
Subject: [PATCH 220/382] compiler-gcc.h: Add __attribute__((gnu_inline)) to
 all inline declarations

Functions marked extern inline do not emit an externally visible
function when the gnu89 C standard is used. Some KBUILD Makefiles
overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without
an explicit C standard specified, the default is gnu11. Since c99, the
semantics of extern inline have changed such that an externally visible
function is always emitted. This can lead to multiple definition errors
of extern inline functions at link time of compilation units whose build
files have removed an explicit C standard compiler flag for users of GCC
5.1+ or Clang.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: H. Peter Anvin <hpa@zytor.com>
Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: sedat.dilek@gmail.com
Cc: thomas.lendacky@amd.com
Cc: tstellar@redhat.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler-gcc.h | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index fd282c7d3e5e..573f5a7d42d4 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -65,6 +65,18 @@
 #define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
 #endif
 
+/*
+ * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
+ * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
+ * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
+ * defined so the gnu89 semantics are the default.
+ */
+#ifdef __GNUC_STDC_INLINE__
+# define __gnu_inline	__attribute__((gnu_inline))
+#else
+# define __gnu_inline
+#endif
+
 /*
  * Force always-inline if the user requests it so via the .config,
  * or if gcc is too old.
@@ -72,19 +84,22 @@
  * -Wunused-function.  This turns out to avoid the need for complex #ifdef
  * directives.  Suppress the warning in clang as well by using "unused"
  * function attribute, which is redundant but not harmful for gcc.
+ * Prefer gnu_inline, so that extern inline functions do not emit an
+ * externally visible function. This makes extern inline behave as per gnu89
+ * semantics rather than c99. This prevents multiple symbol definition errors
+ * of extern inline functions at link time.
+ * A lot of inline functions can cause havoc with function tracing.
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) ||		\
     !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
-#define inline inline		__attribute__((always_inline,unused)) notrace
-#define __inline__ __inline__	__attribute__((always_inline,unused)) notrace
-#define __inline __inline	__attribute__((always_inline,unused)) notrace
+#define inline \
+	inline __attribute__((always_inline, unused)) notrace __gnu_inline
 #else
-/* A lot of inline functions can cause havoc with function tracing */
-#define inline inline		__attribute__((unused)) notrace
-#define __inline__ __inline__	__attribute__((unused)) notrace
-#define __inline __inline	__attribute__((unused)) notrace
+#define inline inline		__attribute__((unused)) notrace __gnu_inline
 #endif
 
+#define __inline__ inline
+#define __inline inline
 #define __always_inline	inline __attribute__((always_inline))
 #define  noinline	__attribute__((noinline))
 

From 0e2e160033283e20f688d8bad5b89460cc5bfcc4 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Thu, 21 Jun 2018 09:23:23 -0700
Subject: [PATCH 221/382] x86/asm: Add _ASM_ARG* constants for argument
 registers to <asm/asm.h>

i386 and x86-64 uses different registers for arguments; make them
available so we don't have to #ifdef in the actual code.

Native size and specified size (q, l, w, b) versions are provided.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Sedat Dilek <sedat.dilek@gmail.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: arnd@arndb.de
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: joe@perches.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: thomas.lendacky@amd.com
Cc: tstellar@redhat.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-3-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/asm.h | 59 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 219faaec51df..990770f9e76b 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -46,6 +46,65 @@
 #define _ASM_SI		__ASM_REG(si)
 #define _ASM_DI		__ASM_REG(di)
 
+#ifndef __x86_64__
+/* 32 bit */
+
+#define _ASM_ARG1	_ASM_AX
+#define _ASM_ARG2	_ASM_DX
+#define _ASM_ARG3	_ASM_CX
+
+#define _ASM_ARG1L	eax
+#define _ASM_ARG2L	edx
+#define _ASM_ARG3L	ecx
+
+#define _ASM_ARG1W	ax
+#define _ASM_ARG2W	dx
+#define _ASM_ARG3W	cx
+
+#define _ASM_ARG1B	al
+#define _ASM_ARG2B	dl
+#define _ASM_ARG3B	cl
+
+#else
+/* 64 bit */
+
+#define _ASM_ARG1	_ASM_DI
+#define _ASM_ARG2	_ASM_SI
+#define _ASM_ARG3	_ASM_DX
+#define _ASM_ARG4	_ASM_CX
+#define _ASM_ARG5	r8
+#define _ASM_ARG6	r9
+
+#define _ASM_ARG1Q	rdi
+#define _ASM_ARG2Q	rsi
+#define _ASM_ARG3Q	rdx
+#define _ASM_ARG4Q	rcx
+#define _ASM_ARG5Q	r8
+#define _ASM_ARG6Q	r9
+
+#define _ASM_ARG1L	edi
+#define _ASM_ARG2L	esi
+#define _ASM_ARG3L	edx
+#define _ASM_ARG4L	ecx
+#define _ASM_ARG5L	r8d
+#define _ASM_ARG6L	r9d
+
+#define _ASM_ARG1W	di
+#define _ASM_ARG2W	si
+#define _ASM_ARG3W	dx
+#define _ASM_ARG4W	cx
+#define _ASM_ARG5W	r8w
+#define _ASM_ARG6W	r9w
+
+#define _ASM_ARG1B	dil
+#define _ASM_ARG2B	sil
+#define _ASM_ARG3B	dl
+#define _ASM_ARG4B	cl
+#define _ASM_ARG5B	r8b
+#define _ASM_ARG6B	r9b
+
+#endif
+
 /*
  * Macros to generate condition code outputs from inline assembly,
  * The output operand must be type "bool".

From d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 21 Jun 2018 09:23:24 -0700
Subject: [PATCH 222/382] x86/paravirt: Make native_save_fl() extern inline

native_save_fl() is marked static inline, but by using it as
a function pointer in arch/x86/kernel/paravirt.c, it MUST be outlined.

paravirt's use of native_save_fl() also requires that no GPRs other than
%rax are clobbered.

Compilers have different heuristics which they use to emit stack guard
code, the emittance of which can break paravirt's callee saved assumption
by clobbering %rcx.

Marking a function definition extern inline means that if this version
cannot be inlined, then the out-of-line version will be preferred. By
having the out-of-line version be implemented in assembly, it cannot be
instrumented with a stack protector, which might violate custom calling
conventions that code like paravirt rely on.

The semantics of extern inline has changed since gnu89. This means that
folks using GCC versions >= 5.1 may see symbol redefinition errors at
link time for subdirs that override KBUILD_CFLAGS (making the C standard
used implicit) regardless of this patch. This has been cleaned up
earlier in the patch set, but is left as a note in the commit message
for future travelers.

Reports:
 https://lkml.org/lkml/2018/5/7/534
 https://github.com/ClangBuiltLinux/linux/issues/16

Discussion:
 https://bugs.llvm.org/show_bug.cgi?id=37512
 https://lkml.org/lkml/2018/5/24/1371

Thanks to the many folks that participated in the discussion.

Debugged-by: Alistair Strachan <astrachan@google.com>
Debugged-by: Matthias Kaehlcke <mka@chromium.org>
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: H. Peter Anvin <hpa@zytor.com>
Suggested-by: Tom Stellar <tstellar@redhat.com>
Reported-by: Sedat Dilek <sedat.dilek@gmail.com>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: akataria@vmware.com
Cc: akpm@linux-foundation.org
Cc: andrea.parri@amarulasolutions.com
Cc: ard.biesheuvel@linaro.org
Cc: aryabinin@virtuozzo.com
Cc: astrachan@google.com
Cc: boris.ostrovsky@oracle.com
Cc: brijesh.singh@amd.com
Cc: caoj.fnst@cn.fujitsu.com
Cc: geert@linux-m68k.org
Cc: ghackmann@google.com
Cc: gregkh@linuxfoundation.org
Cc: jan.kiszka@siemens.com
Cc: jarkko.sakkinen@linux.intel.com
Cc: joe@perches.com
Cc: jpoimboe@redhat.com
Cc: keescook@google.com
Cc: kirill.shutemov@linux.intel.com
Cc: kstewart@linuxfoundation.org
Cc: linux-efi@vger.kernel.org
Cc: linux-kbuild@vger.kernel.org
Cc: manojgupta@google.com
Cc: mawilcox@microsoft.com
Cc: michal.lkml@markovi.net
Cc: mjg59@google.com
Cc: mka@chromium.org
Cc: pombredanne@nexb.com
Cc: rientjes@google.com
Cc: rostedt@goodmis.org
Cc: thomas.lendacky@amd.com
Cc: tweek@google.com
Cc: virtualization@lists.linux-foundation.org
Cc: will.deacon@arm.com
Cc: yamada.masahiro@socionext.com
Link: http://lkml.kernel.org/r/20180621162324.36656-4-ndesaulniers@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/irqflags.h |  2 +-
 arch/x86/kernel/Makefile        |  1 +
 arch/x86/kernel/irqflags.S      | 26 ++++++++++++++++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/irqflags.S

diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 89f08955fff7..c4fc17220df9 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -13,7 +13,7 @@
  * Interrupt control:
  */
 
-static inline unsigned long native_save_fl(void)
+extern inline unsigned long native_save_fl(void)
 {
 	unsigned long flags;
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 02d6f5cf4e70..8824d01c0c35 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -61,6 +61,7 @@ obj-y			+= alternative.o i8253.o hw_breakpoint.o
 obj-y			+= tsc.o tsc_msr.o io_delay.o rtc.o
 obj-y			+= pci-iommu_table.o
 obj-y			+= resource.o
+obj-y			+= irqflags.o
 
 obj-y				+= process.o
 obj-y				+= fpu/
diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S
new file mode 100644
index 000000000000..ddeeaac8adda
--- /dev/null
+++ b/arch/x86/kernel/irqflags.S
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/asm.h>
+#include <asm/export.h>
+#include <linux/linkage.h>
+
+/*
+ * unsigned long native_save_fl(void)
+ */
+ENTRY(native_save_fl)
+	pushf
+	pop %_ASM_AX
+	ret
+ENDPROC(native_save_fl)
+EXPORT_SYMBOL(native_save_fl)
+
+/*
+ * void native_restore_fl(unsigned long flags)
+ * %eax/%rdi: flags
+ */
+ENTRY(native_restore_fl)
+	push %_ASM_ARG1
+	popf
+	ret
+ENDPROC(native_restore_fl)
+EXPORT_SYMBOL(native_restore_fl)

From 7a6b9f4d601dfce8cb68f0dcfd834270280e31e6 Mon Sep 17 00:00:00 2001
From: x00270170 <xiaqing17@hisilicon.com>
Date: Tue, 3 Jul 2018 15:06:27 +0800
Subject: [PATCH 223/382] mmc: dw_mmc: fix card threshold control configuration

Card write threshold control is supposed to be set since controller
version 2.80a for data write in HS400 mode and data read in
HS200/HS400/SDR104 mode. However the current code returns without
configuring it in the case of data writing in HS400 mode.
Meanwhile the patch fixes that the current code goes to
'disable' when doing data reading in HS400 mode.

Fixes: 7e4bf1bc9543 ("mmc: dw_mmc: add the card write threshold for HS400 mode")
Signed-off-by: Qing Xia <xiaqing17@hisilicon.com>
Cc: stable@vger.kernel.org # v4.8+
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 623f4d27fa01..80dc2fd6576c 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -1065,8 +1065,8 @@ static void dw_mci_ctrl_thld(struct dw_mci *host, struct mmc_data *data)
 	 * It's used when HS400 mode is enabled.
 	 */
 	if (data->flags & MMC_DATA_WRITE &&
-		!(host->timing != MMC_TIMING_MMC_HS400))
-		return;
+		host->timing != MMC_TIMING_MMC_HS400)
+		goto disable;
 
 	if (data->flags & MMC_DATA_WRITE)
 		enable = SDMMC_CARD_WR_THR_EN;
@@ -1074,7 +1074,8 @@ static void dw_mci_ctrl_thld(struct dw_mci *host, struct mmc_data *data)
 		enable = SDMMC_CARD_RD_THR_EN;
 
 	if (host->timing != MMC_TIMING_MMC_HS200 &&
-	    host->timing != MMC_TIMING_UHS_SDR104)
+	    host->timing != MMC_TIMING_UHS_SDR104 &&
+	    host->timing != MMC_TIMING_MMC_HS400)
 		goto disable;
 
 	blksz_depth = blksz / (1 << host->data_shift);

From 717adfdaf14704fd3ec7fa2c04520c0723247eac Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Mon, 2 Jul 2018 16:59:37 -0700
Subject: [PATCH 224/382] HID: debug: check length before copy_to_user()

If our length is greater than the size of the buffer, we
overflow the buffer

Cc: stable@vger.kernel.org
Signed-off-by: Daniel Rosenberg <drosen@google.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-debug.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 4f4e7a08a07b..4db8e140f709 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -1154,6 +1154,8 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer,
 			goto out;
 		if (list->tail > list->head) {
 			len = list->tail - list->head;
+			if (len > count)
+				len = count;
 
 			if (copy_to_user(buffer + ret, &list->hid_debug_buf[list->head], len)) {
 				ret = -EFAULT;
@@ -1163,6 +1165,8 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer,
 			list->head += len;
 		} else {
 			len = HID_DEBUG_BUFSIZE - list->head;
+			if (len > count)
+				len = count;
 
 			if (copy_to_user(buffer, &list->hid_debug_buf[list->head], len)) {
 				ret = -EFAULT;
@@ -1170,7 +1174,9 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer,
 			}
 			list->head = 0;
 			ret += len;
-			goto copy_rest;
+			count -= len;
+			if (count > 0)
+				goto copy_rest;
 		}
 
 	}

From 3b8d573586d1b9dee33edf6cb6f2ca05f4bca568 Mon Sep 17 00:00:00 2001
From: Jason Gerecke <killertofu@gmail.com>
Date: Tue, 26 Jun 2018 09:58:02 -0700
Subject: [PATCH 225/382] HID: wacom: Correct touch maximum XY of 2nd-gen
 Intuos

The touch sensors on the 2nd-gen Intuos tablets don't use a 4096x4096
sensor like other similar tablets (3rd-gen Bamboo, Intuos5, etc.).
The incorrect maximum XY values don't normally affect userspace since
touch input from these devices is typically relative rather than
absolute. It does, however, cause problems when absolute distances
need to be measured, e.g. for gesture recognition. Since the resolution
of the touch sensor on these devices is 10 units / mm (versus 100 for
the pen sensor), the proper maximum values can be calculated by simply
dividing by 10.

Fixes: b5fd2a3e92 ("Input: wacom - add support for three new Intuos devices")
Signed-off-by: Jason Gerecke <jason.gerecke@wacom.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/wacom_wac.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 0bb44d0088ed..ad7afa74d365 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -3365,8 +3365,14 @@ void wacom_setup_device_quirks(struct wacom *wacom)
 			if (features->type >= INTUOSHT && features->type <= BAMBOO_PT)
 				features->device_type |= WACOM_DEVICETYPE_PAD;
 
-			features->x_max = 4096;
-			features->y_max = 4096;
+			if (features->type == INTUOSHT2) {
+				features->x_max = features->x_max / 10;
+				features->y_max = features->y_max / 10;
+			}
+			else {
+				features->x_max = 4096;
+				features->y_max = 4096;
+			}
 		}
 		else if (features->pktlen == WACOM_PKGLEN_BBTOUCH) {
 			features->device_type |= WACOM_DEVICETYPE_PAD;

From 90d72ce079791399ac255c75728f3c9e747b093d Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Mon, 2 Jul 2018 19:27:13 -0700
Subject: [PATCH 226/382] vmw_balloon: fix inflation with batching

Embarrassingly, the recent fix introduced worse problem than it solved,
causing the balloon not to inflate. The VM informed the hypervisor that
the pages for lock/unlock are sitting in the wrong address, as it used
the page that is used the uninitialized page variable.

Fixes: b23220fe054e9 ("vmw_balloon: fixing double free when batching mode is off")
Cc: stable@vger.kernel.org
Reviewed-by: Xavier Deguillard <xdeguillard@vmware.com>
Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/vmw_balloon.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index efd733472a35..56c6f79a5c5a 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -467,7 +467,7 @@ static int vmballoon_send_batched_lock(struct vmballoon *b,
 		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
 {
 	unsigned long status;
-	unsigned long pfn = page_to_pfn(b->page);
+	unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
 
 	STATS_INC(b->stats.lock[is_2m_pages]);
 
@@ -515,7 +515,7 @@ static bool vmballoon_send_batched_unlock(struct vmballoon *b,
 		unsigned int num_pages, bool is_2m_pages, unsigned int *target)
 {
 	unsigned long status;
-	unsigned long pfn = page_to_pfn(b->page);
+	unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
 
 	STATS_INC(b->stats.unlock[is_2m_pages]);
 

From b7a020bff31318fc8785e6f96b1d38c1625cf1fb Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Thu, 7 Jun 2018 00:31:48 +0300
Subject: [PATCH 227/382] mei: discard messages from not connected client
 during power down.

This fixes regression introduced by
commit 8d52af6795c0 ("mei: speed up the power down flow")

In power down or suspend flow a message can still be received
from the FW because the clients fake disconnection.
In normal case we interpret messages w/o destination as corrupted
and link reset is performed in order to clean the channel,
but during power down link reset is already in progress resulting
in endless loop. To resolve the issue under power down flow we
discard messages silently.

Cc: <stable@vger.kernel.org> 4.16+
Fixes: 8d52af6795c0 ("mei: speed up the power down flow")
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199541
Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/interrupt.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
index b0b8f18a85e3..6649f0d56d2f 100644
--- a/drivers/misc/mei/interrupt.c
+++ b/drivers/misc/mei/interrupt.c
@@ -310,8 +310,11 @@ int mei_irq_read_handler(struct mei_device *dev,
 	if (&cl->link == &dev->file_list) {
 		/* A message for not connected fixed address clients
 		 * should be silently discarded
+		 * On power down client may be force cleaned,
+		 * silently discard such messages
 		 */
-		if (hdr_is_fixed(mei_hdr)) {
+		if (hdr_is_fixed(mei_hdr) ||
+		    dev->dev_state == MEI_DEV_POWER_DOWN) {
 			mei_irq_discard_msg(dev, mei_hdr);
 			ret = 0;
 			goto reset_slots;

From d403397c7c0821704a2f4da2694b46e423791261 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat, 30 Jun 2018 10:05:09 +0100
Subject: [PATCH 228/382] drm/i915: Try GGTT mmapping whole object as partial

If the whole object is already pinned by HW for use as scanout, we will
fail to move it to the mappable region and so must resort to using a
partial VMA covering the whole object.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104513
Fixes: aa136d9d72c2 ("drm/i915: Convert partial ggtt vma to full ggtt if it spans the entire object")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180630090509.469-1-chris@chris-wilson.co.uk
(cherry picked from commit 7e7367d3bc6cf27dd7e007e7897fcebfeff1ee8b)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 28 +++++++++++++++++-----------
 drivers/gpu/drm/i915/i915_vma.c |  2 +-
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d44ad7bc1e94..17c5097721e8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2002,7 +2002,6 @@ int i915_gem_fault(struct vm_fault *vmf)
 	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
 	struct i915_vma *vma;
 	pgoff_t page_offset;
-	unsigned int flags;
 	int ret;
 
 	/* We don't use vmf->pgoff since that has the fake offset */
@@ -2038,27 +2037,34 @@ int i915_gem_fault(struct vm_fault *vmf)
 		goto err_unlock;
 	}
 
-	/* If the object is smaller than a couple of partial vma, it is
-	 * not worth only creating a single partial vma - we may as well
-	 * clear enough space for the full object.
-	 */
-	flags = PIN_MAPPABLE;
-	if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT)
-		flags |= PIN_NONBLOCK | PIN_NONFAULT;
 
 	/* Now pin it into the GTT as needed */
-	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags);
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
+				       PIN_MAPPABLE |
+				       PIN_NONBLOCK |
+				       PIN_NONFAULT);
 	if (IS_ERR(vma)) {
 		/* Use a partial view if it is bigger than available space */
 		struct i915_ggtt_view view =
 			compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
+		unsigned int flags;
 
-		/* Userspace is now writing through an untracked VMA, abandon
+		flags = PIN_MAPPABLE;
+		if (view.type == I915_GGTT_VIEW_NORMAL)
+			flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
+
+		/*
+		 * Userspace is now writing through an untracked VMA, abandon
 		 * all hope that the hardware is able to track future writes.
 		 */
 		obj->frontbuffer_ggtt_origin = ORIGIN_CPU;
 
-		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
+		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+		if (IS_ERR(vma) && !view.type) {
+			flags = PIN_MAPPABLE;
+			view.type = I915_GGTT_VIEW_PARTIAL;
+			vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags);
+		}
 	}
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 9324d476e0a7..0531c01c3604 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -109,7 +109,7 @@ vma_create(struct drm_i915_gem_object *obj,
 						     obj->base.size >> PAGE_SHIFT));
 			vma->size = view->partial.size;
 			vma->size <<= PAGE_SHIFT;
-			GEM_BUG_ON(vma->size >= obj->base.size);
+			GEM_BUG_ON(vma->size > obj->base.size);
 		} else if (view->type == I915_GGTT_VIEW_ROTATED) {
 			vma->size = intel_rotation_info_size(&view->rotated);
 			vma->size <<= PAGE_SHIFT;

From 157bcc06094c3c5800d3f4676527047b79b618e7 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Mon, 25 Jun 2018 09:34:03 -0300
Subject: [PATCH 229/382] ARM: imx_v6_v7_defconfig: Select ULPI support

Select CONFIG_USB_CHIPIDEA_ULPI and CONFIG_USB_ULPI_BUS so that
USB ULPI can be functional on some boards like imx51-babbge.

This fixes a kernel hang in 4.18-rc1 on i.mx51-babbage, caused by commit
03e6275ae381 ("usb: chipidea: Fix ULPI on imx51").

Suggested-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/configs/imx_v6_v7_defconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index f70507ab91ee..200ebda47e0c 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -302,6 +302,7 @@ CONFIG_USB_STORAGE=y
 CONFIG_USB_CHIPIDEA=y
 CONFIG_USB_CHIPIDEA_UDC=y
 CONFIG_USB_CHIPIDEA_HOST=y
+CONFIG_USB_CHIPIDEA_ULPI=y
 CONFIG_USB_SERIAL=m
 CONFIG_USB_SERIAL_GENERIC=y
 CONFIG_USB_SERIAL_FTDI_SIO=m
@@ -338,6 +339,7 @@ CONFIG_USB_GADGETFS=m
 CONFIG_USB_FUNCTIONFS=m
 CONFIG_USB_MASS_STORAGE=m
 CONFIG_USB_G_SERIAL=m
+CONFIG_USB_ULPI_BUS=y
 CONFIG_MMC=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y

From 2ceb2780b790b74bc408a949f6aedbad8afa693e Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Tue, 26 Jun 2018 08:37:09 -0300
Subject: [PATCH 230/382] ARM: imx_v4_v5_defconfig: Select ULPI support

Select CONFIG_USB_CHIPIDEA_ULPI and CONFIG_USB_ULPI_BUS so that
USB ULPI can be functional on some boards like that use ULPI
interface.

Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/configs/imx_v4_v5_defconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/configs/imx_v4_v5_defconfig b/arch/arm/configs/imx_v4_v5_defconfig
index 054591dc9a00..4cd2f4a2bff4 100644
--- a/arch/arm/configs/imx_v4_v5_defconfig
+++ b/arch/arm/configs/imx_v4_v5_defconfig
@@ -141,9 +141,11 @@ CONFIG_USB_STORAGE=y
 CONFIG_USB_CHIPIDEA=y
 CONFIG_USB_CHIPIDEA_UDC=y
 CONFIG_USB_CHIPIDEA_HOST=y
+CONFIG_USB_CHIPIDEA_ULPI=y
 CONFIG_NOP_USB_XCEIV=y
 CONFIG_USB_GADGET=y
 CONFIG_USB_ETH=m
+CONFIG_USB_ULPI_BUS=y
 CONFIG_MMC=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y

From 9fea4b395260175de4016b42982f45a3e6e03d0b Mon Sep 17 00:00:00 2001
From: Evan Green <evgreen@chromium.org>
Date: Mon, 2 Jul 2018 16:03:46 -0700
Subject: [PATCH 231/382] loop: Add LOOP_SET_BLOCK_SIZE in compat ioctl

This change adds LOOP_SET_BLOCK_SIZE as one of the supported ioctls
in lo_compat_ioctl. It only takes an unsigned long argument, and
in practice a 32-bit value works fine.

Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Evan Green <evgreen@chromium.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 21e6d1b3b393..258355c5a93a 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1612,6 +1612,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
 		arg = (unsigned long) compat_ptr(arg);
 	case LOOP_SET_FD:
 	case LOOP_CHANGE_FD:
+	case LOOP_SET_BLOCK_SIZE:
 		err = lo_ioctl(bdev, mode, cmd, arg);
 		break;
 	default:

From 895b66129ad8c562865b64306032bdb378f4484f Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 29 Jun 2018 11:15:37 +0200
Subject: [PATCH 232/382] PM / Domains: Don't power on at attach for the multi
 PM domain case

There are no legacy behavior in drivers to consider while attaching a
device to genpd - for the multiple PM domain case.

For that reason, let's instead require the driver to runtime resume the
device, via calling pm_runtime_get_sync() for example, when it needs to
power on the corresponding PM domain.

This allows us to improve the situation during attach. Instead of always
power on the PM domain, which may be unnecessary, let's leave it in its
current state. Additionally, to avoid the PM domain to stay powered on,
let's schedule a power off work.

Fixes: 3c095f32a92b (PM / Domains: Add support for multi PM domains ...)
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index c298de8a8308..9e8484189034 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2235,7 +2235,7 @@ static void genpd_dev_pm_sync(struct device *dev)
 }
 
 static int __genpd_dev_pm_attach(struct device *dev, struct device_node *np,
-				 unsigned int index)
+				 unsigned int index, bool power_on)
 {
 	struct of_phandle_args pd_args;
 	struct generic_pm_domain *pd;
@@ -2271,9 +2271,11 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device_node *np,
 	dev->pm_domain->detach = genpd_dev_pm_detach;
 	dev->pm_domain->sync = genpd_dev_pm_sync;
 
-	genpd_lock(pd);
-	ret = genpd_power_on(pd, 0);
-	genpd_unlock(pd);
+	if (power_on) {
+		genpd_lock(pd);
+		ret = genpd_power_on(pd, 0);
+		genpd_unlock(pd);
+	}
 
 	if (ret)
 		genpd_remove_device(pd, dev);
@@ -2307,7 +2309,7 @@ int genpd_dev_pm_attach(struct device *dev)
 				       "#power-domain-cells") != 1)
 		return 0;
 
-	return __genpd_dev_pm_attach(dev, dev->of_node, 0);
+	return __genpd_dev_pm_attach(dev, dev->of_node, 0, true);
 }
 EXPORT_SYMBOL_GPL(genpd_dev_pm_attach);
 
@@ -2359,14 +2361,14 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev,
 	}
 
 	/* Try to attach the device to the PM domain at the specified index. */
-	ret = __genpd_dev_pm_attach(genpd_dev, dev->of_node, index);
+	ret = __genpd_dev_pm_attach(genpd_dev, dev->of_node, index, false);
 	if (ret < 1) {
 		device_unregister(genpd_dev);
 		return ret ? ERR_PTR(ret) : NULL;
 	}
 
-	pm_runtime_set_active(genpd_dev);
 	pm_runtime_enable(genpd_dev);
+	genpd_queue_power_off_work(dev_to_genpd(genpd_dev));
 
 	return genpd_dev;
 }

From 410da1e12ffed61129d61df5b7adce4d08c7f17c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 3 Jul 2018 09:53:43 -0700
Subject: [PATCH 233/382] net/smc: fix up merge error with poll changes

My networking merge (commit 4e33d7d47943: "Pull networking fixes from
David Miller") got the poll() handling conflict wrong for af_smc.

The conflict between my a11e1d432b51 ("Revert changes to convert to
->poll_mask() and aio IOCB_CMD_POLL") and Ursula Braun's 24ac3a08e658
("net/smc: rebuild nonblocking connect") should have left the call to
sock_poll_wait() in place, just without the socket lock release/retake.

And I really should have realized that.  But happily, I at least asked
Ursula to double-check the merge, and she set me right.

This also fixes an incidental whitespace issue nearby that annoyed me
while looking at this.

Pointed-out-by: Ursula Braun <ubraun@linux.ibm.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/smc/af_smc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index e017b6a4452b..3c1405df936c 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1345,6 +1345,8 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
 		if (sk->sk_err)
 			mask |= EPOLLERR;
 	} else {
+		if (sk->sk_state != SMC_CLOSED)
+			sock_poll_wait(file, sk_sleep(sk), wait);
 		if (sk->sk_err)
 			mask |= EPOLLERR;
 		if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
@@ -1370,7 +1372,6 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
 		}
 		if (smc->conn.urg_state == SMC_URG_VALID)
 			mask |= EPOLLPRI;
-
 	}
 
 	return mask;

From fe48aecb4df837540f13b5216f27ddb306aaf4b9 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sun, 1 Jul 2018 15:31:54 +0300
Subject: [PATCH 234/382] RDMA/uverbs: Don't fail in creation of multiple flows

The conversion from offsetof() calculations to sizeof()
wrongly behaved for missed exact size and in scenario with
more than one flow.

In such scenario we got "create flow failed, flow 10: 8 bytes
left from uverb cmd" error, which is wrong because the size of
kern_spec is exactly 8 bytes, and we were not supposed to fail.

Cc: <stable@vger.kernel.org> # 3.12
Fixes: 4fae7f170416 ("RDMA/uverbs: Fix slab-out-of-bounds in ib_uverbs_ex_create_flow")
Reported-by: Ran Rozenstein <ranro@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 87ffeebc0b28..cc06e8404e9b 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -3586,7 +3586,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 	kern_spec = kern_flow_attr->flow_specs;
 	ib_spec = flow_attr + 1;
 	for (i = 0; i < flow_attr->num_of_specs &&
-			cmd.flow_attr.size > sizeof(*kern_spec) &&
+			cmd.flow_attr.size >= sizeof(*kern_spec) &&
 			cmd.flow_attr.size >= kern_spec->size;
 	     i++) {
 		err = kern_spec_to_ib_spec(

From dbd39cf4249316c4089b8987f20850763ebbf43e Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Date: Thu, 31 May 2018 09:24:02 -0300
Subject: [PATCH 235/382] i2c: stu300: use non-archaic spelling of failes

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-stu300.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-stu300.c b/drivers/i2c/busses/i2c-stu300.c
index e866c481bfc3..fce52bdab2b7 100644
--- a/drivers/i2c/busses/i2c-stu300.c
+++ b/drivers/i2c/busses/i2c-stu300.c
@@ -127,7 +127,7 @@ enum stu300_error {
 
 /*
  * The number of address send athemps tried before giving up.
- * If the first one failes it seems like 5 to 8 attempts are required.
+ * If the first one fails it seems like 5 to 8 attempts are required.
  */
 #define NUM_ADDR_RESEND_ATTEMPTS 12
 

From cf4d418e653afc84c9c873236033e06be5d58f1c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 28 Mar 2018 16:09:10 +0200
Subject: [PATCH 236/382] tracing: Avoid string overflow

'err' is used as a NUL-terminated string, but using strncpy() with the length
equal to the buffer size may result in lack of the termination:

kernel/trace/trace_events_hist.c: In function 'hist_err_event':
kernel/trace/trace_events_hist.c:396:3: error: 'strncpy' specified bound 256 equals destination size [-Werror=stringop-truncation]
   strncpy(err, var, MAX_FILTER_STR_VAL);

This changes it to use the safer strscpy() instead.

Link: http://lkml.kernel.org/r/20180328140920.2842153-1-arnd@arndb.de

Cc: stable@vger.kernel.org
Fixes: f404da6e1d46 ("tracing: Add 'last error' error facility for hist triggers")
Acked-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 046c716a6536..aae18af94c94 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -393,7 +393,7 @@ static void hist_err_event(char *str, char *system, char *event, char *var)
 	else if (system)
 		snprintf(err, MAX_FILTER_STR_VAL, "%s.%s", system, event);
 	else
-		strncpy(err, var, MAX_FILTER_STR_VAL);
+		strscpy(err, var, MAX_FILTER_STR_VAL);
 
 	hist_err(str, err);
 }

From f90658725ba7ebb031054866aff4cda0d099a3b1 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 2 Jul 2018 11:41:38 -0400
Subject: [PATCH 237/382] tracing: Make create_filter() code match the comments

The comment in create_filter() states that the passed in filter pointer
(filterp) will either be NULL or contain an error message stating why the
filter failed. But it also expects the filter pointer to point to NULL when
passed in. If it is not, the function create_filter_start() will warn and
return an error message without updating the filter pointer. This is not
what the comment states.

As we always expect the pointer to point to NULL, if it is not, trigger a
WARN_ON(), set it to NULL, and then continue the path as the rest will work
as the comment states. Also update the comment to state it must point to
NULL.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_filter.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 0dceb77d1d42..893a206bcba4 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1701,6 +1701,7 @@ static void create_filter_finish(struct filter_parse_error *pe)
  * @filter_str: filter string
  * @set_str: remember @filter_str and enable detailed error in filter
  * @filterp: out param for created filter (always updated on return)
+ *           Must be a pointer that references a NULL pointer.
  *
  * Creates a filter for @call with @filter_str.  If @set_str is %true,
  * @filter_str is copied and recorded in the new filter.
@@ -1718,6 +1719,10 @@ static int create_filter(struct trace_event_call *call,
 	struct filter_parse_error *pe = NULL;
 	int err;
 
+	/* filterp must point to NULL */
+	if (WARN_ON(*filterp))
+		*filterp = NULL;
+
 	err = create_filter_start(filter_string, set_str, &pe, filterp);
 	if (err)
 		return err;

From f26808ba7227a921e0e8549c7d3c52332b920085 Mon Sep 17 00:00:00 2001
From: yuan linyu <Linyu.Yuan@alcatel-sbell.com.cn>
Date: Sun, 8 Apr 2018 19:36:31 +0800
Subject: [PATCH 238/382] tracing: Optimize trace_buffer_iter() logic

Simplify and optimize the logic in trace_buffer_iter() to use a conditional
operation instead of an if conditional.

Link: http://lkml.kernel.org/r/20180408113631.3947-1-cugyly@163.com

Signed-off-by: yuan linyu <Linyu.Yuan@alcatel-sbell.com.cn>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 630c5a24b2b2..f8f86231ad90 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -583,9 +583,7 @@ static __always_inline void trace_clear_recursion(int bit)
 static inline struct ring_buffer_iter *
 trace_buffer_iter(struct trace_iterator *iter, int cpu)
 {
-	if (iter->buffer_iter && iter->buffer_iter[cpu])
-		return iter->buffer_iter[cpu];
-	return NULL;
+	return iter->buffer_iter ? iter->buffer_iter[cpu] : NULL;
 }
 
 int tracer_init(struct tracer *t, struct trace_array *tr);

From 26b68dd2f48fe7699a89f0cfbb9f4a650dc1c837 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Thu, 8 Mar 2018 21:58:43 +0100
Subject: [PATCH 239/382] tracing: Use __printf markup to silence compiler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Silence warnings (triggered at W=1) by adding relevant __printf attributes.

  CC      kernel/trace/trace.o
kernel/trace/trace.c: In function ‘__trace_array_vprintk’:
kernel/trace/trace.c:2979:2: warning: function might be possible candidate for ‘gnu_printf’ format attribute [-Wsuggest-attribute=format]
  len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
  ^~~
  AR      kernel/trace/built-in.o

Link: http://lkml.kernel.org/r/20180308205843.27447-1-malat@debian.org

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a0079b4c7a49..f054bd6a1c66 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2953,6 +2953,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 }
 EXPORT_SYMBOL_GPL(trace_vbprintk);
 
+__printf(3, 0)
 static int
 __trace_array_vprintk(struct ring_buffer *buffer,
 		      unsigned long ip, const char *fmt, va_list args)
@@ -3007,12 +3008,14 @@ __trace_array_vprintk(struct ring_buffer *buffer,
 	return len;
 }
 
+__printf(3, 0)
 int trace_array_vprintk(struct trace_array *tr,
 			unsigned long ip, const char *fmt, va_list args)
 {
 	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
 }
 
+__printf(3, 0)
 int trace_array_printk(struct trace_array *tr,
 		       unsigned long ip, const char *fmt, ...)
 {
@@ -3028,6 +3031,7 @@ int trace_array_printk(struct trace_array *tr,
 	return ret;
 }
 
+__printf(3, 4)
 int trace_array_printk_buf(struct ring_buffer *buffer,
 			   unsigned long ip, const char *fmt, ...)
 {
@@ -3043,6 +3047,7 @@ int trace_array_printk_buf(struct ring_buffer *buffer,
 	return ret;
 }
 
+__printf(2, 0)
 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 {
 	return trace_array_vprintk(&global_trace, ip, fmt, args);

From 5ccba64a560fa6ca06008d4001f5d46ebeb34b41 Mon Sep 17 00:00:00 2001
From: Yisheng Xie <xieyisheng1@huawei.com>
Date: Fri, 2 Feb 2018 10:14:49 +0800
Subject: [PATCH 240/382] ftrace: Nuke clear_ftrace_function

clear_ftrace_function is not used outside of ftrace.c and is not help to
use a function, so nuke it per Steve's suggestion.

Link: http://lkml.kernel.org/r/1517537689-34947-1-git-send-email-xieyisheng1@huawei.com

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  2 --
 kernel/trace/ftrace.c  | 13 +------------
 2 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 8154f4920fcb..ebb77674be90 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -223,7 +223,6 @@ extern enum ftrace_tracing_type_t ftrace_tracing_type;
  */
 int register_ftrace_function(struct ftrace_ops *ops);
 int unregister_ftrace_function(struct ftrace_ops *ops);
-void clear_ftrace_function(void);
 
 extern void ftrace_stub(unsigned long a0, unsigned long a1,
 			struct ftrace_ops *op, struct pt_regs *regs);
@@ -239,7 +238,6 @@ static inline int ftrace_nr_registered_ops(void)
 {
 	return 0;
 }
-static inline void clear_ftrace_function(void) { }
 static inline void ftrace_kill(void) { }
 static inline void ftrace_free_init_mem(void) { }
 static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index efed9c1cfb7e..caf9cbf35816 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -192,17 +192,6 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
 	op->saved_func(ip, parent_ip, op, regs);
 }
 
-/**
- * clear_ftrace_function - reset the ftrace function
- *
- * This NULLs the ftrace function and in essence stops
- * tracing.  There may be lag
- */
-void clear_ftrace_function(void)
-{
-	ftrace_trace_function = ftrace_stub;
-}
-
 static void ftrace_sync(struct work_struct *work)
 {
 	/*
@@ -6689,7 +6678,7 @@ void ftrace_kill(void)
 {
 	ftrace_disabled = 1;
 	ftrace_enabled = 0;
-	clear_ftrace_function();
+	ftrace_trace_function = ftrace_stub;
 }
 
 /**

From 1fe4293f4b8de75824935f8d8e9a99c7fc6873da Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Wed, 31 Jan 2018 23:48:49 +0800
Subject: [PATCH 241/382] tracing: Fix missing return symbol in function_graph
 output

The function_graph tracer does not show the interrupt return marker for the
leaf entry. On leaf entries, we see an unbalanced interrupt marker (the
interrupt was entered, but nevern left).

Before:
 1)               |  SyS_write() {
 1)               |    __fdget_pos() {
 1)   0.061 us    |      __fget_light();
 1)   0.289 us    |    }
 1)               |    vfs_write() {
 1)   0.049 us    |      rw_verify_area();
 1) + 15.424 us   |      __vfs_write();
 1)   ==========> |
 1)   6.003 us    |      smp_apic_timer_interrupt();
 1)   0.055 us    |      __fsnotify_parent();
 1)   0.073 us    |      fsnotify();
 1) + 23.665 us   |    }
 1) + 24.501 us   |  }

After:
 0)               |  SyS_write() {
 0)               |    __fdget_pos() {
 0)   0.052 us    |      __fget_light();
 0)   0.328 us    |    }
 0)               |    vfs_write() {
 0)   0.057 us    |      rw_verify_area();
 0)               |      __vfs_write() {
 0)   ==========> |
 0)   8.548 us    |      smp_apic_timer_interrupt();
 0)   <========== |
 0) + 36.507 us   |      } /* __vfs_write */
 0)   0.049 us    |      __fsnotify_parent();
 0)   0.066 us    |      fsnotify();
 0) + 50.064 us   |    }
 0) + 50.952 us   |  }

Link: http://lkml.kernel.org/r/1517413729-20411-1-git-send-email-changbin.du@intel.com

Cc: stable@vger.kernel.org
Fixes: f8b755ac8e0cc ("tracing/function-graph-tracer: Output arrows signal on hardirq call/return")
Signed-off-by: Changbin Du <changbin.du@intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_functions_graph.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 23c0b0cb5fb9..169b3c44ee97 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -831,6 +831,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 	struct ftrace_graph_ret *graph_ret;
 	struct ftrace_graph_ent *call;
 	unsigned long long duration;
+	int cpu = iter->cpu;
 	int i;
 
 	graph_ret = &ret_entry->ret;
@@ -839,7 +840,6 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 
 	if (data) {
 		struct fgraph_cpu_data *cpu_data;
-		int cpu = iter->cpu;
 
 		cpu_data = per_cpu_ptr(data->cpu_data, cpu);
 
@@ -869,6 +869,9 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 
 	trace_seq_printf(s, "%ps();\n", (void *)call->func);
 
+	print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET,
+			cpu, iter->ent->pid, flags);
+
 	return trace_handle_return(s);
 }
 

From 1e2c043628c7736dd56536d16c0ce009bc834ae7 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Tue, 3 Jul 2018 17:02:39 -0700
Subject: [PATCH 242/382] userfaultfd: hugetlbfs: fix
 userfaultfd_huge_must_wait() pte access

Use huge_ptep_get() to translate huge ptes to normal ptes so we can
check them with the huge_pte_* functions.  Otherwise some architectures
will check the wrong values and will not wait for userspace to bring in
the memory.

Link: http://lkml.kernel.org/r/20180626132421.78084-1-frankja@linux.ibm.com
Fixes: 369cd2121be4 ("userfaultfd: hugetlbfs: userfaultfd_huge_must_wait for hugepmd ranges")
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 123bf7d516fc..594d192b2331 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -222,24 +222,26 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
 					 unsigned long reason)
 {
 	struct mm_struct *mm = ctx->mm;
-	pte_t *pte;
+	pte_t *ptep, pte;
 	bool ret = true;
 
 	VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
 
-	pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
-	if (!pte)
+	ptep = huge_pte_offset(mm, address, vma_mmu_pagesize(vma));
+
+	if (!ptep)
 		goto out;
 
 	ret = false;
+	pte = huge_ptep_get(ptep);
 
 	/*
 	 * Lockless access: we're in a wait_event so it's ok if it
 	 * changes under us.
 	 */
-	if (huge_pte_none(*pte))
+	if (huge_pte_none(pte))
 		ret = true;
-	if (!huge_pte_write(*pte) && (reason & VM_UFFD_WP))
+	if (!huge_pte_write(pte) && (reason & VM_UFFD_WP))
 		ret = true;
 out:
 	return ret;

From 520495fe96d74e05db585fc748351e0504d8f40d Mon Sep 17 00:00:00 2001
From: Cannon Matthews <cannonmatthews@google.com>
Date: Tue, 3 Jul 2018 17:02:43 -0700
Subject: [PATCH 243/382] mm: hugetlb: yield when prepping struct pages

When booting with very large numbers of gigantic (i.e.  1G) pages, the
operations in the loop of gather_bootmem_prealloc, and specifically
prep_compound_gigantic_page, takes a very long time, and can cause a
softlockup if enough pages are requested at boot.

For example booting with 3844 1G pages requires prepping
(set_compound_head, init the count) over 1 billion 4K tail pages, which
takes considerable time.

Add a cond_resched() to the outer loop in gather_bootmem_prealloc() to
prevent this lockup.

Tested: Booted with softlockup_panic=1 hugepagesz=1G hugepages=3844 and
no softlockup is reported, and the hugepages are reported as
successfully setup.

Link: http://lkml.kernel.org/r/20180627214447.260804-1-cannonmatthews@google.com
Signed-off-by: Cannon Matthews <cannonmatthews@google.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Andres Lagar-Cavilla <andreslc@google.com>
Cc: Peter Feiner <pfeiner@google.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3612fbb32e9d..039ddbc574e9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2163,6 +2163,7 @@ static void __init gather_bootmem_prealloc(void)
 		 */
 		if (hstate_is_gigantic(h))
 			adjust_managed_page_count(page, 1 << h->order);
+		cond_resched();
 	}
 }
 

From 1e8e18f694a52d703665012ca486826f64bac29d Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 3 Jul 2018 17:02:46 -0700
Subject: [PATCH 244/382] kasan: fix shadow_size calculation error in
 kasan_module_alloc

There is a special case that the size is "(N << KASAN_SHADOW_SCALE_SHIFT)
Pages plus X", the value of X is [1, KASAN_SHADOW_SCALE_SIZE-1].  The
operation "size >> KASAN_SHADOW_SCALE_SHIFT" will drop X, and the
roundup operation can not retrieve the missed one page.  For example:
size=0x28006, PAGE_SIZE=0x1000, KASAN_SHADOW_SCALE_SHIFT=3, we will get
shadow_size=0x5000, but actually we need 6 pages.

  shadow_size = round_up(size >> KASAN_SHADOW_SCALE_SHIFT, PAGE_SIZE);

This can lead to a kernel crash when kasan is enabled and the value of
mod->core_layout.size or mod->init_layout.size is like above.  Because
the shadow memory of X has not been allocated and mapped.

move_module:
  ptr = module_alloc(mod->core_layout.size);
  ...
  memset(ptr, 0, mod->core_layout.size);		//crashed

  Unable to handle kernel paging request at virtual address ffff0fffff97b000
  ......
  Call trace:
    __asan_storeN+0x174/0x1a8
    memset+0x24/0x48
    layout_and_allocate+0xcd8/0x1800
    load_module+0x190/0x23e8
    SyS_finit_module+0x148/0x180

Link: http://lkml.kernel.org/r/1529659626-12660-1-git-send-email-thunder.leizhen@huawei.com
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Reviewed-by: Dmitriy Vyukov <dvyukov@google.com>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Cc: Libin <huawei.libin@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kasan/kasan.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index f185455b3406..c3bd5209da38 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -619,12 +619,13 @@ void kasan_kfree_large(void *ptr, unsigned long ip)
 int kasan_module_alloc(void *addr, size_t size)
 {
 	void *ret;
+	size_t scaled_size;
 	size_t shadow_size;
 	unsigned long shadow_start;
 
 	shadow_start = (unsigned long)kasan_mem_to_shadow(addr);
-	shadow_size = round_up(size >> KASAN_SHADOW_SCALE_SHIFT,
-			PAGE_SIZE);
+	scaled_size = (size + KASAN_SHADOW_MASK) >> KASAN_SHADOW_SCALE_SHIFT;
+	shadow_size = round_up(scaled_size, PAGE_SIZE);
 
 	if (WARN_ON(!PAGE_ALIGNED(shadow_start)))
 		return -EINVAL;

From 5e4e290d3751607726a62f0b49e11261a0a9345e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 3 Jul 2018 17:02:49 -0700
Subject: [PATCH 245/382] ARM: disable KCOV for trusted foundations code

The ARM trusted foundations code is currently broken in linux-next when
CONFIG_KCOV_INSTRUMENT_ALL is set:

  /tmp/ccHdQsCI.s: Assembler messages:
  /tmp/ccHdQsCI.s:37: Error: .err encountered
  /tmp/ccHdQsCI.s:38: Error: .err encountered
  /tmp/ccHdQsCI.s:39: Error: .err encountered
  scripts/Makefile.build:311: recipe for target 'arch/arm/firmware/trusted_foundations.o' failed

I could not find a function attribute that lets me disable
-fsanitize-coverage=trace-pc for just one function, so this turns it off
for the entire file instead.

Link: http://lkml.kernel.org/r/20180529103636.1535457-1-arnd@arndb.de
Fixes: 758517202bd2e4 ("arm: port KCOV to arm")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Olof Johansson <olof@lixom.net>
Tested-by: Olof Johansson <olof@lixom.net>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/firmware/Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm/firmware/Makefile b/arch/arm/firmware/Makefile
index a71f16536b6c..6e41336b0bc4 100644
--- a/arch/arm/firmware/Makefile
+++ b/arch/arm/firmware/Makefile
@@ -1 +1,4 @@
 obj-$(CONFIG_TRUSTED_FOUNDATIONS)	+= trusted_foundations.o
+
+# tf_generic_smc() fails to build with -fsanitize-coverage=trace-pc
+KCOV_INSTRUMENT                := n

From fc36def997cfd6cbff3eda4f82853a5c311c5466 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Tue, 3 Jul 2018 17:02:53 -0700
Subject: [PATCH 246/382] mm: teach dump_page() to correctly output poisoned
 struct pages

If struct page is poisoned, and uninitialized access is detected via
PF_POISONED_CHECK(page) dump_page() is called to output the page.  But,
the dump_page() itself accesses struct page to determine how to print
it, and therefore gets into a recursive loop.

For example:

  dump_page()
   __dump_page()
    PageSlab(page)
     PF_POISONED_CHECK(page)
      VM_BUG_ON_PGFLAGS(PagePoisoned(page), page)
       dump_page() recursion loop.

Link: http://lkml.kernel.org/r/20180702180536.2552-1-pasha.tatashin@oracle.com
Fixes: f165b378bbdf ("mm: uninitialized struct page poisoning sanity checking")
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/debug.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/mm/debug.c b/mm/debug.c
index 56e2d9125ea5..38c926520c97 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -43,12 +43,25 @@ const struct trace_print_flags vmaflag_names[] = {
 
 void __dump_page(struct page *page, const char *reason)
 {
+	bool page_poisoned = PagePoisoned(page);
+	int mapcount;
+
+	/*
+	 * If struct page is poisoned don't access Page*() functions as that
+	 * leads to recursive loop. Page*() check for poisoned pages, and calls
+	 * dump_page() when detected.
+	 */
+	if (page_poisoned) {
+		pr_emerg("page:%px is uninitialized and poisoned", page);
+		goto hex_only;
+	}
+
 	/*
 	 * Avoid VM_BUG_ON() in page_mapcount().
 	 * page->_mapcount space in struct page is used by sl[aou]b pages to
 	 * encode own info.
 	 */
-	int mapcount = PageSlab(page) ? 0 : page_mapcount(page);
+	mapcount = PageSlab(page) ? 0 : page_mapcount(page);
 
 	pr_emerg("page:%px count:%d mapcount:%d mapping:%px index:%#lx",
 		  page, page_ref_count(page), mapcount,
@@ -60,6 +73,7 @@ void __dump_page(struct page *page, const char *reason)
 
 	pr_emerg("flags: %#lx(%pGp)\n", page->flags, &page->flags);
 
+hex_only:
 	print_hex_dump(KERN_ALERT, "raw: ", DUMP_PREFIX_NONE, 32,
 			sizeof(unsigned long), page,
 			sizeof(struct page), false);
@@ -68,7 +82,7 @@ void __dump_page(struct page *page, const char *reason)
 		pr_alert("page dumped because: %s\n", reason);
 
 #ifdef CONFIG_MEMCG
-	if (page->mem_cgroup)
+	if (!page_poisoned && page->mem_cgroup)
 		pr_alert("page->mem_cgroup:%px\n", page->mem_cgroup);
 #endif
 }

From 8bf935501a7ef1b2ec3b51c804d657d5895f221a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Sat, 30 Jun 2018 10:53:57 +0200
Subject: [PATCH 247/382] s390: wire up io_pgetevents system call

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/syscalls/syscall.tbl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 8b210ead7956..4baefed5fefb 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -389,3 +389,4 @@
 379  common	statx			sys_statx			compat_sys_statx
 380  common	s390_sthyi		sys_s390_sthyi			compat_sys_s390_sthyi
 381  common	kexec_file_load		sys_kexec_file_load		compat_sys_kexec_file_load
+382  common	io_pgetevents		sys_io_pgetevents		compat_sys_io_pgetevents

From 9d6d99e3ac8ccfd0945edb3c83cd912838775056 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Sat, 30 Jun 2018 10:54:15 +0200
Subject: [PATCH 248/382] s390: wire up rseq system call

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Kconfig                     | 1 +
 arch/s390/kernel/compat_wrapper.c     | 1 +
 arch/s390/kernel/entry.S              | 4 ++++
 arch/s390/kernel/signal.c             | 3 ++-
 arch/s390/kernel/syscalls/syscall.tbl | 1 +
 5 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index baed39772c84..e44bb2b2873e 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -160,6 +160,7 @@ config S390
 	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS
 	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_RSEQ
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING
 	select MODULES_USE_ELF_RELA
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index 607c5e9fba3d..2ce28bf0c5ec 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -183,3 +183,4 @@ COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb);
 COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer);
 COMPAT_SYSCALL_WRAP4(s390_sthyi, unsigned long, code, void __user *, info, u64 __user *, rc, unsigned long, flags);
 COMPAT_SYSCALL_WRAP5(kexec_file_load, int, kernel_fd, int, initrd_fd, unsigned long, cmdline_len, const char __user *, cmdline_ptr, unsigned long, flags)
+COMPAT_SYSCALL_WRAP4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 3891805bfcdd..150130c897c3 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -357,6 +357,10 @@ ENTRY(system_call)
 	stg	%r2,__PT_R2(%r11)		# store return value
 
 .Lsysc_return:
+#ifdef CONFIG_DEBUG_RSEQ
+	lgr	%r2,%r11
+	brasl	%r14,rseq_syscall
+#endif
 	LOCKDEP_SYS_EXIT
 .Lsysc_tif:
 	TSTMSK	__PT_FLAGS(%r11),_PIF_WORK
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 2d2960ab3e10..22f08245aa5d 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -498,7 +498,7 @@ void do_signal(struct pt_regs *regs)
 		}
 		/* No longer in a system call */
 		clear_pt_regs_flag(regs, PIF_SYSCALL);
-
+		rseq_signal_deliver(&ksig, regs);
 		if (is_compat_task())
 			handle_signal32(&ksig, oldset, regs);
 		else
@@ -537,4 +537,5 @@ void do_notify_resume(struct pt_regs *regs)
 {
 	clear_thread_flag(TIF_NOTIFY_RESUME);
 	tracehook_notify_resume(regs);
+	rseq_handle_notify_resume(NULL, regs);
 }
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 4baefed5fefb..022fc099b628 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -390,3 +390,4 @@
 380  common	s390_sthyi		sys_s390_sthyi			compat_sys_s390_sthyi
 381  common	kexec_file_load		sys_kexec_file_load		compat_sys_kexec_file_load
 382  common	io_pgetevents		sys_io_pgetevents		compat_sys_io_pgetevents
+383  common	rseq			sys_rseq			compat_sys_rseq

From c660f40b711980b42d8beac4b395a10645b20d5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <nicolai.haehnle@amd.com>
Date: Fri, 29 Jun 2018 13:23:25 +0200
Subject: [PATCH 249/382] drm/amdgpu: fix user fence write race condition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The buffer object backing the user fence is reserved using the non-user
fence, i.e., as soon as the non-user fence is signaled, the user fence
buffer object can be moved or even destroyed.

Therefore, emit the user fence first.

Both fences have the same cache invalidation behavior, so this should
have no user-visible effect.

Signed-off-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index f70eeed9ed76..7aaa263ad8c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -231,6 +231,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
 		fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
 
+	/* wrap the last IB with fence */
+	if (job && job->uf_addr) {
+		amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
+				       fence_flags | AMDGPU_FENCE_FLAG_64BIT);
+	}
+
 	r = amdgpu_fence_emit(ring, f, fence_flags);
 	if (r) {
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
@@ -243,12 +249,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (ring->funcs->insert_end)
 		ring->funcs->insert_end(ring);
 
-	/* wrap the last IB with fence */
-	if (job && job->uf_addr) {
-		amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
-				       fence_flags | AMDGPU_FENCE_FLAG_64BIT);
-	}
-
 	if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
 		amdgpu_ring_patch_cond_exec(ring, patch_offset);
 

From 673b4271665a12fa839a12abb50e6f6e9953c081 Mon Sep 17 00:00:00 2001
From: Jouke Witteveen <j.witteveen@gmail.com>
Date: Wed, 4 Jul 2018 12:27:15 +0200
Subject: [PATCH 250/382] ACPI / battery: Safe unregistering of hooks

A hooking API was implemented for 4.17 in fa93854f7a7ed63d followed
by hooks for Thinkpad laptops in 2801b9683f740012. The Thinkpad
drivers did not support the Thinkpad 13 and the hooking API crashes
on unsupported batteries by altering a list of hooks during unsafe
iteration. Thus, Thinkpad 13 laptops could no longer boot.

Additionally, a lock was kept in place and debugging information was
printed out of order.

Fixes: fa93854f7a7e (battery: Add the battery hooking API)
Cc: 4.17+ <stable@vger.kernel.org> # 4.17+
Signed-off-by: Jouke Witteveen <j.witteveen@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/battery.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index b0113a5802a3..d79ad844c78f 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -717,10 +717,11 @@ void battery_hook_register(struct acpi_battery_hook *hook)
 			 */
 			pr_err("extension failed to load: %s", hook->name);
 			__battery_hook_unregister(hook, 0);
-			return;
+			goto end;
 		}
 	}
 	pr_info("new extension: %s\n", hook->name);
+end:
 	mutex_unlock(&hook_mutex);
 }
 EXPORT_SYMBOL_GPL(battery_hook_register);
@@ -732,7 +733,7 @@ EXPORT_SYMBOL_GPL(battery_hook_register);
 */
 static void battery_hook_add_battery(struct acpi_battery *battery)
 {
-	struct acpi_battery_hook *hook_node;
+	struct acpi_battery_hook *hook_node, *tmp;
 
 	mutex_lock(&hook_mutex);
 	INIT_LIST_HEAD(&battery->list);
@@ -744,15 +745,15 @@ static void battery_hook_add_battery(struct acpi_battery *battery)
 	 * when a battery gets hotplugged or initialized
 	 * during the battery module initialization.
 	 */
-	list_for_each_entry(hook_node, &battery_hook_list, list) {
+	list_for_each_entry_safe(hook_node, tmp, &battery_hook_list, list) {
 		if (hook_node->add_battery(battery->bat)) {
 			/*
 			 * The notification of the extensions has failed, to
 			 * prevent further errors we will unload the extension.
 			 */
-			__battery_hook_unregister(hook_node, 0);
 			pr_err("error in extension, unloading: %s",
 					hook_node->name);
+			__battery_hook_unregister(hook_node, 0);
 		}
 	}
 	mutex_unlock(&hook_mutex);

From e33eab9ded328ccc14308afa51b5be7cbe78d30b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 4 Jul 2018 12:29:38 +0300
Subject: [PATCH 251/382] USB: serial: ch341: fix type promotion bug in
 ch341_control_in()

The "r" variable is an int and "bufsize" is an unsigned int so the
comparison is type promoted to unsigned.  If usb_control_msg() returns a
negative that is treated as a high positive value and the error handling
doesn't work.

Fixes: 2d5a9c72d0c4 ("USB: serial: ch341: fix control-message error handling")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ch341.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index bdd7a5ad3bf1..3bb1fff02bed 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -128,7 +128,7 @@ static int ch341_control_in(struct usb_device *dev,
 	r = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), request,
 			    USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN,
 			    value, index, buf, bufsize, DEFAULT_TIMEOUT);
-	if (r < bufsize) {
+	if (r < (int)bufsize) {
 		if (r >= 0) {
 			dev_err(&dev->dev,
 				"short control message received (%d < %u)\n",

From 367b160fe4717c14a2a978b6f9ffb75a7762d3ed Mon Sep 17 00:00:00 2001
From: Olli Salonen <olli.salonen@iki.fi>
Date: Wed, 4 Jul 2018 14:07:42 +0300
Subject: [PATCH 252/382] USB: serial: cp210x: add another USB ID for Qivicon
 ZigBee stick

There are two versions of the Qivicon Zigbee stick in circulation. This
adds the second USB ID to the cp210x driver.

Signed-off-by: Olli Salonen <olli.salonen@iki.fi>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/cp210x.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index ee0cc1d90b51..626a29d9aa58 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -149,6 +149,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x8977) },	/* CEL MeshWorks DevKit Device */
 	{ USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */
 	{ USB_DEVICE(0x10C4, 0x89A4) }, /* CESINEL FTBC Flexible Thyristor Bridge Controller */
+	{ USB_DEVICE(0x10C4, 0x89FB) }, /* Qivicon ZigBee USB Radio Stick */
 	{ USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */
 	{ USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */
 	{ USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */

From f0139752f71b05c918d86101643eb00b312ab883 Mon Sep 17 00:00:00 2001
From: Maciej Purski <m.purski@samsung.com>
Date: Thu, 28 Jun 2018 18:44:01 +0200
Subject: [PATCH 253/382] drm/bridge/sii8620: Send AVI infoframe in all MHL
 versions

Currently AVI infoframe is sent only in MHL3. However, some MHL2 dongles
need AVI infoframe to work correctly in either packed pixel mode or
non-packed pixel mode.

Send AVI infoframe in set_infoframes() in every case. Create an
infoframe using drm_hdmi_infoframe_from_display_mode() instead of
manually filling each infoframe structure's field.

Signed-off-by: Maciej Purski <m.purski@samsung.com>
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1530204243-6370-2-git-send-email-m.purski@samsung.com
---
 drivers/gpu/drm/bridge/sil-sii8620.c | 39 ++++++++++++++--------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c
index 250effa0e6b8..bd30ccf3d195 100644
--- a/drivers/gpu/drm/bridge/sil-sii8620.c
+++ b/drivers/gpu/drm/bridge/sil-sii8620.c
@@ -14,6 +14,7 @@
 #include <drm/bridge/mhl.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_encoder.h>
 
 #include <linux/clk.h>
 #include <linux/delay.h>
@@ -72,9 +73,7 @@ struct sii8620 {
 	struct regulator_bulk_data supplies[2];
 	struct mutex lock; /* context lock, protects fields below */
 	int error;
-	int pixel_clock;
 	unsigned int use_packed_pixel:1;
-	int video_code;
 	enum sii8620_mode mode;
 	enum sii8620_sink_type sink_type;
 	u8 cbus_status;
@@ -82,7 +81,6 @@ struct sii8620 {
 	u8 xstat[MHL_XDS_SIZE];
 	u8 devcap[MHL_DCAP_SIZE];
 	u8 xdevcap[MHL_XDC_SIZE];
-	u8 avif[HDMI_INFOFRAME_SIZE(AVI)];
 	bool feature_complete;
 	bool devcap_read;
 	bool sink_detected;
@@ -1082,18 +1080,28 @@ static ssize_t mhl3_infoframe_pack(struct mhl3_infoframe *frame,
 	return frm_len;
 }
 
-static void sii8620_set_infoframes(struct sii8620 *ctx)
+static void sii8620_set_infoframes(struct sii8620 *ctx,
+				   struct drm_display_mode *mode)
 {
 	struct mhl3_infoframe mhl_frm;
 	union hdmi_infoframe frm;
 	u8 buf[31];
 	int ret;
 
+	ret = drm_hdmi_avi_infoframe_from_display_mode(&frm.avi,
+						       mode,
+						       true);
+	if (ctx->use_packed_pixel)
+		frm.avi.colorspace = HDMI_COLORSPACE_YUV422;
+
+	if (!ret)
+		ret = hdmi_avi_infoframe_pack(&frm.avi, buf, ARRAY_SIZE(buf));
+	if (ret > 0)
+		sii8620_write_buf(ctx, REG_TPI_AVI_CHSUM, buf + 3, ret - 3);
+
 	if (!sii8620_is_mhl3(ctx) || !ctx->use_packed_pixel) {
 		sii8620_write(ctx, REG_TPI_SC,
 			BIT_TPI_SC_TPI_OUTPUT_MODE_0_HDMI);
-		sii8620_write_buf(ctx, REG_TPI_AVI_CHSUM, ctx->avif + 3,
-			ARRAY_SIZE(ctx->avif) - 3);
 		sii8620_write(ctx, REG_PKT_FILTER_0,
 			BIT_PKT_FILTER_0_DROP_CEA_GAMUT_PKT |
 			BIT_PKT_FILTER_0_DROP_MPEG_PKT |
@@ -1102,16 +1110,6 @@ static void sii8620_set_infoframes(struct sii8620 *ctx)
 		return;
 	}
 
-	ret = hdmi_avi_infoframe_init(&frm.avi);
-	frm.avi.colorspace = HDMI_COLORSPACE_YUV422;
-	frm.avi.active_aspect = HDMI_ACTIVE_ASPECT_PICTURE;
-	frm.avi.picture_aspect = HDMI_PICTURE_ASPECT_16_9;
-	frm.avi.colorimetry = HDMI_COLORIMETRY_ITU_709;
-	frm.avi.video_code = ctx->video_code;
-	if (!ret)
-		ret = hdmi_avi_infoframe_pack(&frm.avi, buf, ARRAY_SIZE(buf));
-	if (ret > 0)
-		sii8620_write_buf(ctx, REG_TPI_AVI_CHSUM, buf + 3, ret - 3);
 	sii8620_write(ctx, REG_PKT_FILTER_0,
 		BIT_PKT_FILTER_0_DROP_CEA_GAMUT_PKT |
 		BIT_PKT_FILTER_0_DROP_MPEG_PKT |
@@ -1131,6 +1129,9 @@ static void sii8620_set_infoframes(struct sii8620 *ctx)
 
 static void sii8620_start_video(struct sii8620 *ctx)
 {
+	struct drm_display_mode *mode =
+		&ctx->bridge.encoder->crtc->state->adjusted_mode;
+
 	if (!sii8620_is_mhl3(ctx))
 		sii8620_stop_video(ctx);
 
@@ -1167,7 +1168,7 @@ static void sii8620_start_video(struct sii8620 *ctx)
 			  MHL_XDS_LINK_RATE_6_0_GBPS, 0x40 },
 		};
 		u8 p0_ctrl = BIT_M3_P0CTRL_MHL3_P0_PORT_EN;
-		int clk = ctx->pixel_clock * (ctx->use_packed_pixel ? 2 : 3);
+		int clk = mode->clock * (ctx->use_packed_pixel ? 2 : 3);
 		int i;
 
 		for (i = 0; i < ARRAY_SIZE(clk_spec) - 1; ++i)
@@ -1196,7 +1197,7 @@ static void sii8620_start_video(struct sii8620 *ctx)
 			clk_spec[i].link_rate);
 	}
 
-	sii8620_set_infoframes(ctx);
+	sii8620_set_infoframes(ctx, mode);
 }
 
 static void sii8620_disable_hpd(struct sii8620 *ctx)
@@ -2242,8 +2243,6 @@ static bool sii8620_mode_fixup(struct drm_bridge *bridge,
 	mutex_lock(&ctx->lock);
 
 	ctx->use_packed_pixel = sii8620_is_packing_required(ctx, adjusted_mode);
-	ctx->video_code = drm_match_cea_mode(adjusted_mode);
-	ctx->pixel_clock = adjusted_mode->clock;
 
 	mutex_unlock(&ctx->lock);
 

From fdddc65ab35d575b42aab411b2dc687601eab680 Mon Sep 17 00:00:00 2001
From: Maciej Purski <m.purski@samsung.com>
Date: Thu, 28 Jun 2018 18:44:02 +0200
Subject: [PATCH 254/382] drm/bridge/sii8620: Fix display of packed pixel modes

Current implementation does not guarantee packed pixel modes working
with every dongle. There are some dongles, which require selecting
the output mode explicitly.

Write proper values to registers in packed_pixel mode, based on how it
is done in vendor's code. Select output color space: RGB
(no packed pixel) or YCBCR422 (packed pixel).

This reverts commit e8b92efa629dac0e70ea4145c5e70616de5f89c8
("drm/bridge/sii8620: fix display of packed pixel modes in MHL2").

Signed-off-by: Maciej Purski <m.purski@samsung.com>
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1530204243-6370-3-git-send-email-m.purski@samsung.com
---
 drivers/gpu/drm/bridge/sil-sii8620.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c
index bd30ccf3d195..16fe7ea9d90c 100644
--- a/drivers/gpu/drm/bridge/sil-sii8620.c
+++ b/drivers/gpu/drm/bridge/sil-sii8620.c
@@ -1015,21 +1015,36 @@ static void sii8620_stop_video(struct sii8620 *ctx)
 
 static void sii8620_set_format(struct sii8620 *ctx)
 {
+	u8 out_fmt;
+
 	if (sii8620_is_mhl3(ctx)) {
 		sii8620_setbits(ctx, REG_M3_P0CTRL,
 				BIT_M3_P0CTRL_MHL3_P0_PIXEL_MODE_PACKED,
 				ctx->use_packed_pixel ? ~0 : 0);
 	} else {
+		if (ctx->use_packed_pixel) {
+			sii8620_write_seq_static(ctx,
+				REG_VID_MODE, BIT_VID_MODE_M1080P,
+				REG_MHL_TOP_CTL, BIT_MHL_TOP_CTL_MHL_PP_SEL | 1,
+				REG_MHLTX_CTL6, 0x60
+			);
+		} else {
 			sii8620_write_seq_static(ctx,
 				REG_VID_MODE, 0,
 				REG_MHL_TOP_CTL, 1,
 				REG_MHLTX_CTL6, 0xa0
 			);
+		}
 	}
 
+	if (ctx->use_packed_pixel)
+		out_fmt = VAL_TPI_FORMAT(YCBCR422, FULL);
+	else
+		out_fmt = VAL_TPI_FORMAT(RGB, FULL);
+
 	sii8620_write_seq(ctx,
 		REG_TPI_INPUT, VAL_TPI_FORMAT(RGB, FULL),
-		REG_TPI_OUTPUT, VAL_TPI_FORMAT(RGB, FULL),
+		REG_TPI_OUTPUT, out_fmt,
 	);
 }
 

From 44f9a4b0dc7563669772198e5b226cab58a00167 Mon Sep 17 00:00:00 2001
From: Maciej Purski <m.purski@samsung.com>
Date: Thu, 28 Jun 2018 18:44:03 +0200
Subject: [PATCH 255/382] drm/bridge/sii8620: Fix link mode selection

Current link mode values do not allow to enable packed pixel modes.

Select packed pixel clock mode, if needed, every time the link mode
register gets updated.

Signed-off-by: Maciej Purski <m.purski@samsung.com>
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1530204243-6370-4-git-send-email-m.purski@samsung.com
---
 drivers/gpu/drm/bridge/sil-sii8620.c | 30 ++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c
index 16fe7ea9d90c..a6e8f4591e63 100644
--- a/drivers/gpu/drm/bridge/sil-sii8620.c
+++ b/drivers/gpu/drm/bridge/sil-sii8620.c
@@ -1165,8 +1165,14 @@ static void sii8620_start_video(struct sii8620 *ctx)
 	sii8620_set_format(ctx);
 
 	if (!sii8620_is_mhl3(ctx)) {
-		sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE),
-			MHL_DST_LM_CLK_MODE_NORMAL | MHL_DST_LM_PATH_ENABLED);
+		u8 link_mode = MHL_DST_LM_PATH_ENABLED;
+
+		if (ctx->use_packed_pixel)
+			link_mode |= MHL_DST_LM_CLK_MODE_PACKED_PIXEL;
+		else
+			link_mode |= MHL_DST_LM_CLK_MODE_NORMAL;
+
+		sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE), link_mode);
 		sii8620_set_auto_zone(ctx);
 	} else {
 		static const struct {
@@ -1677,14 +1683,18 @@ static void sii8620_status_dcap_ready(struct sii8620 *ctx)
 
 static void sii8620_status_changed_path(struct sii8620 *ctx)
 {
-	if (ctx->stat[MHL_DST_LINK_MODE] & MHL_DST_LM_PATH_ENABLED) {
-		sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE),
-				      MHL_DST_LM_CLK_MODE_NORMAL
-				      | MHL_DST_LM_PATH_ENABLED);
-	} else {
-		sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE),
-				      MHL_DST_LM_CLK_MODE_NORMAL);
-	}
+	u8 link_mode;
+
+	if (ctx->use_packed_pixel)
+		link_mode = MHL_DST_LM_CLK_MODE_PACKED_PIXEL;
+	else
+		link_mode = MHL_DST_LM_CLK_MODE_NORMAL;
+
+	if (ctx->stat[MHL_DST_LINK_MODE] & MHL_DST_LM_PATH_ENABLED)
+		link_mode |= MHL_DST_LM_PATH_ENABLED;
+
+	sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE),
+			      link_mode);
 }
 
 static void sii8620_msc_mr_write_stat(struct sii8620 *ctx)

From 38fc4248677552ce35efc09902fdcb06b61d7ef9 Mon Sep 17 00:00:00 2001
From: Paul Kocialkowski <contact@paulk.fr>
Date: Mon, 2 Jul 2018 11:16:59 +0200
Subject: [PATCH 256/382] arm64: Use aarch64elf and aarch64elfb emulation mode
 variants

The aarch64linux and aarch64linuxb emulation modes are not supported by
bare-metal toolchains and Linux using them forbids building the kernel
with these toolchains.

Since there is apparently no reason to target these emulation modes, the
more generic elf modes are used instead, allowing to build on bare-metal
toolchains as well as the already-supported ones.

Fixes: 3d6a7b99e3fa ("arm64: ensure the kernel is compiled for LP64")

Cc: stable@vger.kernel.org
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paul Kocialkowski <contact@paulk.fr>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 45272266dafb..27389adf511a 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -61,14 +61,14 @@ KBUILD_CPPFLAGS	+= -mbig-endian
 CHECKFLAGS	+= -D__AARCH64EB__
 AS		+= -EB
 LD		+= -EB
-LDFLAGS		+= -maarch64linuxb
+LDFLAGS		+= -maarch64elfb
 UTS_MACHINE	:= aarch64_be
 else
 KBUILD_CPPFLAGS	+= -mlittle-endian
 CHECKFLAGS	+= -D__AARCH64EL__
 AS		+= -EL
 LD		+= -EL
-LDFLAGS		+= -maarch64linux
+LDFLAGS		+= -maarch64elf
 UTS_MACHINE	:= aarch64
 endif
 

From d7ef4899d7182f9d4267b4e4a5cc3689c1a04f25 Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Wed, 4 Jul 2018 21:00:14 +0530
Subject: [PATCH 257/382] sample/vfio-mdev: Change return type to vm_fault_t

convert mbochs_region_vm_fault and mbochs_dmabuf_vm_fault
to return vm_fault_t type.

Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 samples/vfio-mdev/mbochs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
index 85ac6037696f..d5d5a499160c 100644
--- a/samples/vfio-mdev/mbochs.c
+++ b/samples/vfio-mdev/mbochs.c
@@ -657,7 +657,7 @@ static void mbochs_put_pages(struct mdev_state *mdev_state)
 	dev_dbg(dev, "%s: %d pages released\n", __func__, count);
 }
 
-static int mbochs_region_vm_fault(struct vm_fault *vmf)
+static vm_fault_t mbochs_region_vm_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct mdev_state *mdev_state = vma->vm_private_data;
@@ -695,7 +695,7 @@ static int mbochs_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
 	return 0;
 }
 
-static int mbochs_dmabuf_vm_fault(struct vm_fault *vmf)
+static vm_fault_t mbochs_dmabuf_vm_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct mbochs_dmabuf *dmabuf = vma->vm_private_data;

From d5fad48cfb4b183d87960904c177eeeb9412b929 Mon Sep 17 00:00:00 2001
From: Zong Li <zong@andestech.com>
Date: Mon, 25 Jun 2018 16:49:37 +0800
Subject: [PATCH 258/382] RISC-V: Add conditional macro for zone of DMA32

The DMA32 is for 64-bit usage.

Signed-off-by: Zong Li <zong@andestech.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/mm/init.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index c77df8142be2..58a522f9bcc3 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -28,7 +28,9 @@ static void __init zone_sizes_init(void)
 {
 	unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, };
 
+#ifdef CONFIG_ZONE_DMA32
 	max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G, max_low_pfn));
+#endif
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 
 	free_area_init_nodes(max_zone_pfns);

From 8f79125d285d2d71ed110e875754942256efa51d Mon Sep 17 00:00:00 2001
From: Zong Li <zong@andestech.com>
Date: Mon, 25 Jun 2018 16:49:38 +0800
Subject: [PATCH 259/382] RISC-V: Select GENERIC_UCMPDI2 on RV32I

On 32-bit, it need to use __ucmpdi2, otherwise, it can't find the __ucmpdi2
symbol.

Signed-off-by: Zong Li <zong@andestech.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index f12680c9b947..4764fdeb4f1f 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -107,6 +107,7 @@ config ARCH_RV32I
 	select GENERIC_LIB_ASHLDI3
 	select GENERIC_LIB_ASHRDI3
 	select GENERIC_LIB_LSHRDI3
+	select GENERIC_LIB_UCMPDI2
 
 config ARCH_RV64I
 	bool "RV64I"

From c480d8911fda96a0f37634bd4dc4e2c8a87c38da Mon Sep 17 00:00:00 2001
From: Zong Li <zong@andestech.com>
Date: Mon, 25 Jun 2018 16:49:39 +0800
Subject: [PATCH 260/382] RISC-V: Add definiion of extract symbol's index and
 type for 32-bit

Use generic marco to get the index and type of symbol.

Signed-off-by: Zong Li <zong@andestech.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/uapi/asm/elf.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/include/uapi/asm/elf.h b/arch/riscv/include/uapi/asm/elf.h
index 5cae4c30cd8e..1e0dfc36aab9 100644
--- a/arch/riscv/include/uapi/asm/elf.h
+++ b/arch/riscv/include/uapi/asm/elf.h
@@ -21,8 +21,13 @@ typedef struct user_regs_struct elf_gregset_t;
 
 typedef union __riscv_fp_state elf_fpregset_t;
 
-#define ELF_RISCV_R_SYM(r_info) ((r_info) >> 32)
-#define ELF_RISCV_R_TYPE(r_info) ((r_info) & 0xffffffff)
+#if __riscv_xlen == 64
+#define ELF_RISCV_R_SYM(r_info)		ELF64_R_SYM(r_info)
+#define ELF_RISCV_R_TYPE(r_info)	ELF64_R_TYPE(r_info)
+#else
+#define ELF_RISCV_R_SYM(r_info)		ELF32_R_SYM(r_info)
+#define ELF_RISCV_R_TYPE(r_info)	ELF32_R_TYPE(r_info)
+#endif
 
 /*
  * RISC-V relocation types

From 7df85002178e708aa749c63020fd333d9f085ced Mon Sep 17 00:00:00 2001
From: Zong Li <zong@andestech.com>
Date: Mon, 25 Jun 2018 16:49:40 +0800
Subject: [PATCH 261/382] RISC-V: Change variable type for 32-bit compatible

Signed-off-by: Zong Li <zong@andestech.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/kernel/module.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 1d5e9b934b8c..e8c6aaa2aab4 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -37,7 +37,7 @@ static int apply_r_riscv_64_rela(struct module *me, u32 *location, Elf_Addr v)
 static int apply_r_riscv_branch_rela(struct module *me, u32 *location,
 				     Elf_Addr v)
 {
-	s64 offset = (void *)v - (void *)location;
+	ptrdiff_t offset = (void *)v - (void *)location;
 	u32 imm12 = (offset & 0x1000) << (31 - 12);
 	u32 imm11 = (offset & 0x800) >> (11 - 7);
 	u32 imm10_5 = (offset & 0x7e0) << (30 - 10);
@@ -50,7 +50,7 @@ static int apply_r_riscv_branch_rela(struct module *me, u32 *location,
 static int apply_r_riscv_jal_rela(struct module *me, u32 *location,
 				  Elf_Addr v)
 {
-	s64 offset = (void *)v - (void *)location;
+	ptrdiff_t offset = (void *)v - (void *)location;
 	u32 imm20 = (offset & 0x100000) << (31 - 20);
 	u32 imm19_12 = (offset & 0xff000);
 	u32 imm11 = (offset & 0x800) << (20 - 11);
@@ -63,7 +63,7 @@ static int apply_r_riscv_jal_rela(struct module *me, u32 *location,
 static int apply_r_riscv_rcv_branch_rela(struct module *me, u32 *location,
 					 Elf_Addr v)
 {
-	s64 offset = (void *)v - (void *)location;
+	ptrdiff_t offset = (void *)v - (void *)location;
 	u16 imm8 = (offset & 0x100) << (12 - 8);
 	u16 imm7_6 = (offset & 0xc0) >> (6 - 5);
 	u16 imm5 = (offset & 0x20) >> (5 - 2);
@@ -78,7 +78,7 @@ static int apply_r_riscv_rcv_branch_rela(struct module *me, u32 *location,
 static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location,
 				       Elf_Addr v)
 {
-	s64 offset = (void *)v - (void *)location;
+	ptrdiff_t offset = (void *)v - (void *)location;
 	u16 imm11 = (offset & 0x800) << (12 - 11);
 	u16 imm10 = (offset & 0x400) >> (10 - 8);
 	u16 imm9_8 = (offset & 0x300) << (12 - 11);
@@ -96,7 +96,7 @@ static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location,
 static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location,
 					 Elf_Addr v)
 {
-	s64 offset = (void *)v - (void *)location;
+	ptrdiff_t offset = (void *)v - (void *)location;
 	s32 hi20;
 
 	if (offset != (s32)offset) {
@@ -178,7 +178,7 @@ static int apply_r_riscv_lo12_s_rela(struct module *me, u32 *location,
 static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location,
 				       Elf_Addr v)
 {
-	s64 offset = (void *)v - (void *)location;
+	ptrdiff_t offset = (void *)v - (void *)location;
 	s32 hi20;
 
 	/* Always emit the got entry */
@@ -200,7 +200,7 @@ static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location,
 static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location,
 				       Elf_Addr v)
 {
-	s64 offset = (void *)v - (void *)location;
+	ptrdiff_t offset = (void *)v - (void *)location;
 	s32 fill_v = offset;
 	u32 hi20, lo12;
 
@@ -227,7 +227,7 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location,
 static int apply_r_riscv_call_rela(struct module *me, u32 *location,
 				   Elf_Addr v)
 {
-	s64 offset = (void *)v - (void *)location;
+	ptrdiff_t offset = (void *)v - (void *)location;
 	s32 fill_v = offset;
 	u32 hi20, lo12;
 
@@ -347,7 +347,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 			unsigned int j;
 
 			for (j = 0; j < sechdrs[relsec].sh_size / sizeof(*rel); j++) {
-				u64 hi20_loc =
+				unsigned long hi20_loc =
 					sechdrs[sechdrs[relsec].sh_info].sh_addr
 					+ rel[j].r_offset;
 				u32 hi20_type = ELF_RISCV_R_TYPE(rel[j].r_info);
@@ -360,12 +360,12 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 					Elf_Sym *hi20_sym =
 						(Elf_Sym *)sechdrs[symindex].sh_addr
 						+ ELF_RISCV_R_SYM(rel[j].r_info);
-					u64 hi20_sym_val =
+					unsigned long hi20_sym_val =
 						hi20_sym->st_value
 						+ rel[j].r_addend;
 
 					/* Calculate lo12 */
-					u64 offset = hi20_sym_val - hi20_loc;
+					size_t offset = hi20_sym_val - hi20_loc;
 					if (IS_ENABLED(CONFIG_MODULE_SECTIONS)
 					    && hi20_type == R_RISCV_GOT_HI20) {
 						offset = module_emit_got_entry(

From 781c8fe2da3d2c7c95cd7ffddbab63b80a79da4d Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@suse.de>
Date: Tue, 12 Jun 2018 19:26:36 +0200
Subject: [PATCH 262/382] RISC-V: fix R_RISCV_ADD32/R_RISCV_SUB32 relocations

The R_RISCV_ADD32/R_RISCV_SUB32 relocations should add/subtract the
address of the symbol (without overflow check), not its contents.

Signed-off-by: Andreas Schwab <schwab@suse.de>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/kernel/module.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 1d5e9b934b8c..6bb48315c973 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -263,14 +263,14 @@ static int apply_r_riscv_align_rela(struct module *me, u32 *location,
 static int apply_r_riscv_add32_rela(struct module *me, u32 *location,
 				    Elf_Addr v)
 {
-	*(u32 *)location += (*(u32 *)v);
+	*(u32 *)location += (u32)v;
 	return 0;
 }
 
 static int apply_r_riscv_sub32_rela(struct module *me, u32 *location,
 				    Elf_Addr v)
 {
-	*(u32 *)location -= (*(u32 *)v);
+	*(u32 *)location -= (u32)v;
 	return 0;
 }
 

From f67f10b8a6c96ab6c1d6946d269e2ca5f9998cc2 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 19 Jun 2018 15:41:34 -0600
Subject: [PATCH 263/382] riscv: remove unnecessary of_platform_populate call

The DT core will call of_platform_default_populate, so it is not
necessary for arch specific code to call it unless there are custom
match entries, auxdata or parent device. Neither of those apply here, so
remove the call.

Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: linux-riscv@lists.infradead.org
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/kernel/setup.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index ee44a48faf79..f0d2070866d4 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -220,8 +220,3 @@ void __init setup_arch(char **cmdline_p)
 	riscv_fill_hwcap();
 }
 
-static int __init riscv_device_init(void)
-{
-	return of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-}
-subsys_initcall_sync(riscv_device_init);

From 8606544890d7dc4f7a740963f70dc1e1d54f8e30 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@sifive.com>
Date: Fri, 22 Jun 2018 15:46:28 -0700
Subject: [PATCH 264/382] RISC-V: Don't include irq-riscv-intc.h

This file has never existed in the upstream kernel, but it's guarded by
an #ifdef that's also never existed in the upstream kernel.  As a part
of our interrupt controller refactoring this header is no longer
necessary, but this reference managed to sneak in anyway.

Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/kernel/irq.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index b74cbfbce2d0..7bcdaed15703 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -16,10 +16,6 @@
 #include <linux/irqchip.h>
 #include <linux/irqdomain.h>
 
-#ifdef CONFIG_RISCV_INTC
-#include <linux/irqchip/irq-riscv-intc.h>
-#endif
-
 void __init init_IRQ(void)
 {
 	irqchip_init();

From 1db9b80980d26fe95c942e0bb8bde2ca715029ad Mon Sep 17 00:00:00 2001
From: Jim Wilson <jimw@sifive.com>
Date: Mon, 11 Jun 2018 14:48:22 -0700
Subject: [PATCH 265/382] RISC-V: Fix PTRACE_SETREGSET bug.

In riscv_gpr_set, pass regs instead of &regs to user_regset_copyin to fix
gdb segfault.

Signed-off-by: Jim Wilson <jimw@sifive.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/kernel/ptrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index ba3e80712797..9f82a7e34c64 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -50,7 +50,7 @@ static int riscv_gpr_set(struct task_struct *target,
 	struct pt_regs *regs;
 
 	regs = task_pt_regs(target);
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &regs, 0, -1);
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1);
 	return ret;
 }
 

From 2893af07e507e993ad71ca6d66a7b02be741571c Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 3 Jul 2018 10:22:00 +0900
Subject: [PATCH 266/382] arm64: add endianness option to LDFLAGS instead of LD

With the recent syntax extension, Kconfig is now able to evaluate the
compiler / toolchain capability.

However, accumulating flags to 'LD' is not compatible with the way
it works; 'LD' must be passed to Kconfig to call $(ld-option,...)
from Kconfig files.  If you tweak 'LD' in arch Makefile depending on
CONFIG_CPU_BIG_ENDIAN, this would end up with circular dependency
between Makefile and Kconfig.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Makefile | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 27389adf511a..48158c550110 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -60,15 +60,13 @@ ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
 KBUILD_CPPFLAGS	+= -mbig-endian
 CHECKFLAGS	+= -D__AARCH64EB__
 AS		+= -EB
-LD		+= -EB
-LDFLAGS		+= -maarch64elfb
+LDFLAGS		+= -EB -maarch64elfb
 UTS_MACHINE	:= aarch64_be
 else
 KBUILD_CPPFLAGS	+= -mlittle-endian
 CHECKFLAGS	+= -D__AARCH64EL__
 AS		+= -EL
-LD		+= -EL
-LDFLAGS		+= -maarch64elf
+LDFLAGS		+= -EL -maarch64elf
 UTS_MACHINE	:= aarch64
 endif
 

From 99ec9e77511dea55d81729fc80b6c63a61bfa8e0 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Sun, 3 Jun 2018 16:40:54 +0200
Subject: [PATCH 267/382] drm/udl: fix display corruption of the last line

The displaylink hardware has such a peculiarity that it doesn't render a
command until next command is received. This produces occasional
corruption, such as when setting 22x11 font on the console, only the first
line of the cursor will be blinking if the cursor is located at some
specific columns.

When we end up with a repeating pixel, the driver has a bug that it leaves
one uninitialized byte after the command (and this byte is enough to flush
the command and render it - thus it fixes the screen corruption), however
whe we end up with a non-repeating pixel, there is no byte appended and
this results in temporary screen corruption.

This patch fixes the screen corruption by always appending a byte 0xAF at
the end of URB. It also removes the uninitialized byte.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/udl/udl_fb.c       |  5 ++++-
 drivers/gpu/drm/udl/udl_transfer.c | 11 +++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index 2ebdc6d5a76e..d5583190f3e4 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -137,7 +137,10 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y,
 
 	if (cmd > (char *) urb->transfer_buffer) {
 		/* Send partial buffer remaining before exiting */
-		int len = cmd - (char *) urb->transfer_buffer;
+		int len;
+		if (cmd < (char *) urb->transfer_buffer + urb->transfer_buffer_length)
+			*cmd++ = 0xAF;
+		len = cmd - (char *) urb->transfer_buffer;
 		ret = udl_submit_urb(dev, urb, len);
 		bytes_sent += len;
 	} else
diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c
index 0c87b1ac6b68..b992644c17e6 100644
--- a/drivers/gpu/drm/udl/udl_transfer.c
+++ b/drivers/gpu/drm/udl/udl_transfer.c
@@ -153,11 +153,11 @@ static void udl_compress_hline16(
 		raw_pixels_count_byte = cmd++; /*  we'll know this later */
 		raw_pixel_start = pixel;
 
-		cmd_pixel_end = pixel + (min(MAX_CMD_PIXELS + 1,
-			min((int)(pixel_end - pixel) / bpp,
-			    (int)(cmd_buffer_end - cmd) / 2))) * bpp;
+		cmd_pixel_end = pixel + min3(MAX_CMD_PIXELS + 1UL,
+					(unsigned long)(pixel_end - pixel) / bpp,
+					(unsigned long)(cmd_buffer_end - 1 - cmd) / 2) * bpp;
 
-		prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp);
+		prefetch_range((void *) pixel, cmd_pixel_end - pixel);
 		pixel_val16 = get_pixel_val16(pixel, bpp);
 
 		while (pixel < cmd_pixel_end) {
@@ -193,6 +193,9 @@ static void udl_compress_hline16(
 		if (pixel > raw_pixel_start) {
 			/* finalize last RAW span */
 			*raw_pixels_count_byte = ((pixel-raw_pixel_start) / bpp) & 0xFF;
+		} else {
+			/* undo unused byte */
+			cmd--;
 		}
 
 		*cmd_pixels_count_byte = ((pixel - cmd_pixel_start) / bpp) & 0xFF;

From b8ba3578752b7aae5b6f8bd235873ea112969ff9 Mon Sep 17 00:00:00 2001
From: Stefan Mavrodiev <stefan@olimex.com>
Date: Wed, 4 Jul 2018 09:28:59 +0300
Subject: [PATCH 268/382] mmc: sunxi: Disable irq during pm_suspend

When mmc host controller enters suspend state, the clocks are
disabled, but irqs are not. For some reason the irqchip emits
false interrupts, which causes system lock loop.

Debug log is:
  ...
  sunxi-mmc 1c11000.mmc: setting clk to 52000000, rounded 51200000
  sunxi-mmc 1c11000.mmc: enabling the clock
  sunxi-mmc 1c11000.mmc: cmd 13(8000014d) arg 10000 ie 0x0000bbc6 len 0
  sunxi-mmc 1c11000.mmc: irq: rq (ptrval) mi 00000004 idi 00000000
  sunxi-mmc 1c11000.mmc: cmd 6(80000146) arg 3210101 ie 0x0000bbc6 len 0
  sunxi-mmc 1c11000.mmc: irq: rq (ptrval) mi 00000004 idi 00000000
  sunxi-mmc 1c11000.mmc: cmd 13(8000014d) arg 10000 ie 0x0000bbc6 len 0
  sunxi-mmc 1c11000.mmc: irq: rq (ptrval) mi 00000004 idi 00000000
  mmc1: new DDR MMC card at address 0001
  mmcblk1: mmc1:0001 AGND3R 14.6 GiB
  mmcblk1boot0: mmc1:0001 AGND3R partition 1 4.00 MiB
  mmcblk1boot1: mmc1:0001 AGND3R partition 2 4.00 MiB
  sunxi-mmc 1c11000.mmc: cmd 18(80003352) arg 0 ie 0x0000fbc2 len 409
  sunxi-mmc 1c11000.mmc: irq: rq (ptrval) mi 00004000 idi 00000002
   mmcblk1: p1
  sunxi-mmc 1c11000.mmc: irq: rq   (null) mi 00000000 idi 00000000
  sunxi-mmc 1c11000.mmc: irq: rq   (null) mi 00000000 idi 00000000
  sunxi-mmc 1c11000.mmc: irq: rq   (null) mi 00000000 idi 00000000
  sunxi-mmc 1c11000.mmc: irq: rq   (null) mi 00000000 idi 00000000
and so on...

This issue apears on eMMC cards, routed on MMC2 slot. The patch is
tested with A20-OLinuXino-MICRO/LIME/LIME2 boards.

Fixes: 9a8e1e8cc2c0 ("mmc: sunxi: Add runtime_pm support")
Signed-off-by: Stefan Mavrodiev <stefan@olimex.com>
Acked-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sunxi-mmc.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c
index e7472590f2ed..8e7f3e35ee3d 100644
--- a/drivers/mmc/host/sunxi-mmc.c
+++ b/drivers/mmc/host/sunxi-mmc.c
@@ -1446,6 +1446,7 @@ static int sunxi_mmc_runtime_resume(struct device *dev)
 	sunxi_mmc_init_host(host);
 	sunxi_mmc_set_bus_width(host, mmc->ios.bus_width);
 	sunxi_mmc_set_clk(host, &mmc->ios);
+	enable_irq(host->irq);
 
 	return 0;
 }
@@ -1455,6 +1456,12 @@ static int sunxi_mmc_runtime_suspend(struct device *dev)
 	struct mmc_host	*mmc = dev_get_drvdata(dev);
 	struct sunxi_mmc_host *host = mmc_priv(mmc);
 
+	/*
+	 * When clocks are off, it's possible receiving
+	 * fake interrupts, which will stall the system.
+	 * Disabling the irq  will prevent this.
+	 */
+	disable_irq(host->irq);
 	sunxi_mmc_reset_host(host);
 	sunxi_mmc_disable(host);
 

From 2c83a726d6fbb5d130d8f2edd82a258adb675ac3 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Wed, 27 Jun 2018 15:58:13 +0200
Subject: [PATCH 269/382] drm/etnaviv: bring back progress check in job timeout
 handler

When the hangcheck handler was replaced by the DRM scheduler timeout
handling we dropped the forward progress check, as this might allow
clients to hog the GPU for a long time with a big job.

It turns out that even reasonably well behaved clients like the
Armada Xorg driver occasionally trip over the 500ms timeout. Bring
back the forward progress check to get rid of the userspace regression.

We would still like to fix userspace to submit smaller batches
if possible, but that is for another day.

Cc: <stable@vger.kernel.org>
Fixes: 6d7a20c07760 (drm/etnaviv: replace hangcheck with scheduler timeout)
Reported-by: Russell King <linux@armlinux.org.uk>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/etnaviv/etnaviv_gpu.h   |  3 +++
 drivers/gpu/drm/etnaviv/etnaviv_sched.c | 24 ++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index dd430f0f8ff5..90f17ff7888e 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -131,6 +131,9 @@ struct etnaviv_gpu {
 	struct work_struct sync_point_work;
 	int sync_point_event;
 
+	/* hang detection */
+	u32 hangcheck_dma_addr;
+
 	void __iomem *mmio;
 	int irq;
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index a74eb57af15b..50d6b88cb7aa 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -10,6 +10,7 @@
 #include "etnaviv_gem.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_sched.h"
+#include "state.xml.h"
 
 static int etnaviv_job_hang_limit = 0;
 module_param_named(job_hang_limit, etnaviv_job_hang_limit, int , 0444);
@@ -85,6 +86,29 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
 {
 	struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job);
 	struct etnaviv_gpu *gpu = submit->gpu;
+	u32 dma_addr;
+	int change;
+
+	/*
+	 * If the GPU managed to complete this jobs fence, the timout is
+	 * spurious. Bail out.
+	 */
+	if (fence_completed(gpu, submit->out_fence->seqno))
+		return;
+
+	/*
+	 * If the GPU is still making forward progress on the front-end (which
+	 * should never loop) we shift out the timeout to give it a chance to
+	 * finish the job.
+	 */
+	dma_addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS);
+	change = dma_addr - gpu->hangcheck_dma_addr;
+	if (change < 0 || change > 16) {
+		gpu->hangcheck_dma_addr = dma_addr;
+		schedule_delayed_work(&sched_job->work_tdr,
+				      sched_job->sched->timeout);
+		return;
+	}
 
 	/* block scheduler */
 	kthread_park(gpu->sched.thread);

From a6311be8f062db5fe93a08a3722fae53a58f2499 Mon Sep 17 00:00:00 2001
From: Mikita Lipski <mikita.lipski@amd.com>
Date: Wed, 4 Jul 2018 17:26:26 -0400
Subject: [PATCH 270/382] drm/amd/display: adding ycbcr420 pixel encoding for
 hdmi

[why]
HDMI EDID's VSDB contains spectial timings for specifically
YCbCr 4:2:0 colour space. In those cases we need to verify
if the mode provided is one of the special ones has to use
YCbCr 4:2:0 pixel encoding for display info.
[how]
Verify if the mode is using specific ycbcr420 colour space with
the help of DRM helper function and assign the mode to use
ycbcr420 pixel encoding.

Tested-by: Mike Lothian <mike@fireburn.co.uk>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Mikita Lipski <mikita.lipski@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 3a8d6356afc2..f97dfc13a8f0 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2183,6 +2183,7 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream,
 					     const struct drm_connector *connector)
 {
 	struct dc_crtc_timing *timing_out = &stream->timing;
+	const struct drm_display_info *info = &connector->display_info;
 
 	memset(timing_out, 0, sizeof(struct dc_crtc_timing));
 
@@ -2191,8 +2192,10 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream,
 	timing_out->v_border_top = 0;
 	timing_out->v_border_bottom = 0;
 	/* TODO: un-hardcode */
-
-	if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCRCB444)
+	if (drm_mode_is_420_only(info, mode_in)
+			&& stream->sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A)
+		timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420;
+	else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCRCB444)
 			&& stream->sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A)
 		timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR444;
 	else

From e2a46a48b94d143b7fabd9da7d45eef1a0799986 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael@kernel.org>
Date: Thu, 5 Jul 2018 12:27:53 +0200
Subject: [PATCH 271/382] MAINTAINERS: Add myself as driver core changes
 reviewer

I really need to look at driver core changes before they are applied
due to PM dependencies and they sometimes get lost in the LKML
traffic, so add myself as an official driver core reviewer.

Signed-off-by: Rafael J. Wysocki <rafael@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6cfd16790add..68aa943a672c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4456,6 +4456,7 @@ F:	Documentation/blockdev/drbd/
 
 DRIVER CORE, KOBJECTS, DEBUGFS AND SYSFS
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+R:	"Rafael J. Wysocki" <rafael@kernel.org>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
 S:	Supported
 F:	Documentation/kobject.txt

From 413ff0b942481c7ac2e800abbbac5af318a65e61 Mon Sep 17 00:00:00 2001
From: Mikita Lipski <mikita.lipski@amd.com>
Date: Wed, 4 Jul 2018 17:27:56 -0400
Subject: [PATCH 272/382] drm/amd/display: add a check for display depth
 validity

[why]
HDMI 2.0 fails to validate 4K@60 timing with 10 bpc
[how]
Adding a helper function that would verify if the display depth
assigned would pass a bandwidth validation.
Drop the display depth by one level till calculated pixel clk
is lower than maximum TMDS clk.

Bugzilla: https://bugs.freedesktop.org/106959

Tested-by: Mike Lothian <mike@fireburn.co.uk>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Mikita Lipski <mikita.lipski@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index f97dfc13a8f0..770c6b24be0b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2175,6 +2175,46 @@ get_output_color_space(const struct dc_crtc_timing *dc_crtc_timing)
 	return color_space;
 }
 
+static void reduce_mode_colour_depth(struct dc_crtc_timing *timing_out)
+{
+	if (timing_out->display_color_depth <= COLOR_DEPTH_888)
+		return;
+
+	timing_out->display_color_depth--;
+}
+
+static void adjust_colour_depth_from_display_info(struct dc_crtc_timing *timing_out,
+						const struct drm_display_info *info)
+{
+	int normalized_clk;
+	if (timing_out->display_color_depth <= COLOR_DEPTH_888)
+		return;
+	do {
+		normalized_clk = timing_out->pix_clk_khz;
+		/* YCbCr 4:2:0 requires additional adjustment of 1/2 */
+		if (timing_out->pixel_encoding == PIXEL_ENCODING_YCBCR420)
+			normalized_clk /= 2;
+		/* Adjusting pix clock following on HDMI spec based on colour depth */
+		switch (timing_out->display_color_depth) {
+		case COLOR_DEPTH_101010:
+			normalized_clk = (normalized_clk * 30) / 24;
+			break;
+		case COLOR_DEPTH_121212:
+			normalized_clk = (normalized_clk * 36) / 24;
+			break;
+		case COLOR_DEPTH_161616:
+			normalized_clk = (normalized_clk * 48) / 24;
+			break;
+		default:
+			return;
+		}
+		if (normalized_clk <= info->max_tmds_clock)
+			return;
+		reduce_mode_colour_depth(timing_out);
+
+	} while (timing_out->display_color_depth > COLOR_DEPTH_888);
+
+}
 /*****************************************************************************/
 
 static void
@@ -2231,6 +2271,8 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream,
 
 	stream->out_transfer_func->type = TF_TYPE_PREDEFINED;
 	stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB;
+	if (stream->sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A)
+		adjust_colour_depth_from_display_info(timing_out, info);
 }
 
 static void fill_audio_info(struct audio_info *audio_info,

From 1a381d4a0a9a0f999a13faaba22bf6b3fc80dcb9 Mon Sep 17 00:00:00 2001
From: Greg Hackmann <ghackmann@android.com>
Date: Wed, 27 Jun 2018 12:46:14 -0700
Subject: [PATCH 273/382] arm64: remove no-op -p linker flag

Linking the ARM64 defconfig kernel with LLVM lld fails with the error:

  ld.lld: error: unknown argument: -p
  Makefile:1015: recipe for target 'vmlinux' failed

Without this flag, the ARM64 defconfig kernel successfully links with
lld and boots on Dragonboard 410c.

After digging through binutils source and changelogs, it turns out that
-p is only relevant to ancient binutils installations targeting 32-bit
ARM.  binutils accepts -p for AArch64 too, but it's always been
undocumented and silently ignored.  A comment in
ld/emultempl/aarch64elf.em explains that it's "Only here for backwards
compatibility".

Since this flag is a no-op on ARM64, we can safely drop it.

Acked-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Greg Hackmann <ghackmann@google.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 48158c550110..7976d2d242fa 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -10,7 +10,7 @@
 #
 # Copyright (C) 1995-2001 by Russell King
 
-LDFLAGS_vmlinux	:=-p --no-undefined -X
+LDFLAGS_vmlinux	:=--no-undefined -X
 CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
 GZFLAGS		:=-9
 

From d02d21ea007b6b33cdaf15c2f84fb1fea996ecc2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 4 Jul 2018 18:17:51 -0700
Subject: [PATCH 274/382] autofs: rename 'autofs' module back to 'autofs4'

It turns out that systemd has a bug: it wants to load the autofs module
early because of some initialization ordering with udev, and it doesn't
do that correctly.  Everywhere else it does the proper "look up module
name" that does the proper alias resolution, but in that early code, it
just uses a hardcoded "autofs4" for the module name.

The result of that is that as of commit a2225d931f75 ("autofs: remove
left-over autofs4 stubs"), you get

    systemd[1]: Failed to insert module 'autofs4': No such file or directory

in the system logs, and a lack of module loading.  All this despite the
fact that we had very clearly marked 'autofs4' as an alias for this
module.

What's so ridiculous about this is that literally everything else does
the module alias handling correctly, including really old versions of
systemd (that just used 'modprobe' to do this), and even all the other
systemd module loading code.

Only that special systemd early module load code is broken, hardcoding
the module names for not just 'autofs4', but also "ipv6", "unix",
"ip_tables" and "virtio_rng".  Very annoying.

Instead of creating an _additional_ separate compatibility 'autofs4'
module, just rely on the fact that everybody else gets this right, and
just call the module 'autofs4' for compatibility reasons, with 'autofs'
as the alias name.

That will allow the systemd people to fix their bugs, adding the proper
alias handling, and maybe even fix the name of the module to be just
"autofs" (so that they can _test_ the alias handling).  And eventually,
we can revert this silly compatibility hack.

See also

    https://github.com/systemd/systemd/issues/9501
    https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=902946

for the systemd bug reports upstream and in the Debian bug tracker
respectively.

Fixes: a2225d931f75 ("autofs: remove left-over autofs4 stubs")
Reported-by: Ben Hutchings <ben@decadent.org.uk>
Reported-by: Michael Biebl <biebl@debian.org>
Cc: Ian Kent <raven@themaw.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs/Makefile | 4 ++--
 fs/autofs/init.c   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/autofs/Makefile b/fs/autofs/Makefile
index 43fedde15c26..1f85d35ec8b7 100644
--- a/fs/autofs/Makefile
+++ b/fs/autofs/Makefile
@@ -2,6 +2,6 @@
 # Makefile for the linux autofs-filesystem routines.
 #
 
-obj-$(CONFIG_AUTOFS_FS) += autofs.o
+obj-$(CONFIG_AUTOFS_FS) += autofs4.o
 
-autofs-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o
+autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o
diff --git a/fs/autofs/init.c b/fs/autofs/init.c
index cc9447e1903f..79ae07d9592f 100644
--- a/fs/autofs/init.c
+++ b/fs/autofs/init.c
@@ -23,7 +23,7 @@ static struct file_system_type autofs_fs_type = {
 	.kill_sb	= autofs_kill_sb,
 };
 MODULE_ALIAS_FS("autofs");
-MODULE_ALIAS("autofs4");
+MODULE_ALIAS("autofs");
 
 static int __init init_autofs_fs(void)
 {

From 696e420bb2a6624478105651d5368d45b502b324 Mon Sep 17 00:00:00 2001
From: Lars Persson <lars.persson@axis.com>
Date: Mon, 25 Jun 2018 14:05:25 +0200
Subject: [PATCH 275/382] cifs: Fix use after free of a mid_q_entry

With protocol version 2.0 mounts we have seen crashes with corrupt mid
entries. Either the server->pending_mid_q list becomes corrupt with a
cyclic reference in one element or a mid object fetched by the
demultiplexer thread becomes overwritten during use.

Code review identified a race between the demultiplexer thread and the
request issuing thread. The demultiplexer thread seems to be written
with the assumption that it is the sole user of the mid object until
it calls the mid callback which either wakes the issuer task or
deletes the mid.

This assumption is not true because the issuer task can be woken up
earlier by a signal. If the demultiplexer thread has proceeded as far
as setting the mid_state to MID_RESPONSE_RECEIVED then the issuer
thread will happily end up calling cifs_delete_mid while the
demultiplexer thread still is using the mid object.

Inserting a delay in the cifs demultiplexer thread widens the race
window and makes reproduction of the race very easy:

		if (server->large_buf)
			buf = server->bigbuf;

+		usleep_range(500, 4000);

		server->lstrp = jiffies;

To resolve this I think the proper solution involves putting a
reference count on the mid object. This patch makes sure that the
demultiplexer thread holds a reference until it has finished
processing the transaction.

Cc: stable@vger.kernel.org
Signed-off-by: Lars Persson <larper@axis.com>
Acked-by: Paulo Alcantara <palcantara@suse.de>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsglob.h      |  1 +
 fs/cifs/cifsproto.h     |  1 +
 fs/cifs/connect.c       |  8 +++++++-
 fs/cifs/smb1ops.c       |  1 +
 fs/cifs/smb2ops.c       |  1 +
 fs/cifs/smb2transport.c |  1 +
 fs/cifs/transport.c     | 18 +++++++++++++++++-
 7 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index bd78da59a4fd..a2962fd41c6f 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1416,6 +1416,7 @@ typedef int (mid_handle_t)(struct TCP_Server_Info *server,
 /* one of these for every pending CIFS request to the server */
 struct mid_q_entry {
 	struct list_head qhead;	/* mids waiting on reply from this server */
+	struct kref refcount;
 	struct TCP_Server_Info *server;	/* server corresponding to this mid */
 	__u64 mid;		/* multiplex id */
 	__u32 pid;		/* process id */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 03018be17283..1890f534c88b 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -82,6 +82,7 @@ extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer,
 					struct TCP_Server_Info *server);
 extern void DeleteMidQEntry(struct mid_q_entry *midEntry);
 extern void cifs_delete_mid(struct mid_q_entry *mid);
+extern void cifs_mid_q_entry_release(struct mid_q_entry *midEntry);
 extern void cifs_wake_up_task(struct mid_q_entry *mid);
 extern int cifs_handle_standard(struct TCP_Server_Info *server,
 				struct mid_q_entry *mid);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a57da1b88bdf..5df2c0698cda 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -924,6 +924,7 @@ cifs_demultiplex_thread(void *p)
 				server->pdu_size = next_offset;
 		}
 
+		mid_entry = NULL;
 		if (server->ops->is_transform_hdr &&
 		    server->ops->receive_transform &&
 		    server->ops->is_transform_hdr(buf)) {
@@ -938,8 +939,11 @@ cifs_demultiplex_thread(void *p)
 				length = mid_entry->receive(server, mid_entry);
 		}
 
-		if (length < 0)
+		if (length < 0) {
+			if (mid_entry)
+				cifs_mid_q_entry_release(mid_entry);
 			continue;
+		}
 
 		if (server->large_buf)
 			buf = server->bigbuf;
@@ -956,6 +960,8 @@ cifs_demultiplex_thread(void *p)
 
 			if (!mid_entry->multiRsp || mid_entry->multiEnd)
 				mid_entry->callback(mid_entry);
+
+			cifs_mid_q_entry_release(mid_entry);
 		} else if (server->ops->is_oplock_break &&
 			   server->ops->is_oplock_break(buf, server)) {
 			cifs_dbg(FYI, "Received oplock break\n");
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index aff8ce8ba34d..646dcd149de1 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -107,6 +107,7 @@ cifs_find_mid(struct TCP_Server_Info *server, char *buffer)
 		if (compare_mid(mid->mid, buf) &&
 		    mid->mid_state == MID_REQUEST_SUBMITTED &&
 		    le16_to_cpu(mid->command) == buf->Command) {
+			kref_get(&mid->refcount);
 			spin_unlock(&GlobalMid_Lock);
 			return mid;
 		}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 0356b5559c71..e9216ce88796 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -203,6 +203,7 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf)
 		if ((mid->mid == wire_mid) &&
 		    (mid->mid_state == MID_REQUEST_SUBMITTED) &&
 		    (mid->command == shdr->Command)) {
+			kref_get(&mid->refcount);
 			spin_unlock(&GlobalMid_Lock);
 			return mid;
 		}
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 51b9437c3c7b..50592976dcb4 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -548,6 +548,7 @@ smb2_mid_entry_alloc(const struct smb2_sync_hdr *shdr,
 
 	temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS);
 	memset(temp, 0, sizeof(struct mid_q_entry));
+	kref_init(&temp->refcount);
 	temp->mid = le64_to_cpu(shdr->MessageId);
 	temp->pid = current->pid;
 	temp->command = shdr->Command; /* Always LE */
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index fb57dfbfb749..208ecb830466 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -61,6 +61,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
 
 	temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS);
 	memset(temp, 0, sizeof(struct mid_q_entry));
+	kref_init(&temp->refcount);
 	temp->mid = get_mid(smb_buffer);
 	temp->pid = current->pid;
 	temp->command = cpu_to_le16(smb_buffer->Command);
@@ -82,6 +83,21 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
 	return temp;
 }
 
+static void _cifs_mid_q_entry_release(struct kref *refcount)
+{
+	struct mid_q_entry *mid = container_of(refcount, struct mid_q_entry,
+					       refcount);
+
+	mempool_free(mid, cifs_mid_poolp);
+}
+
+void cifs_mid_q_entry_release(struct mid_q_entry *midEntry)
+{
+	spin_lock(&GlobalMid_Lock);
+	kref_put(&midEntry->refcount, _cifs_mid_q_entry_release);
+	spin_unlock(&GlobalMid_Lock);
+}
+
 void
 DeleteMidQEntry(struct mid_q_entry *midEntry)
 {
@@ -110,7 +126,7 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
 		}
 	}
 #endif
-	mempool_free(midEntry, cifs_mid_poolp);
+	cifs_mid_q_entry_release(midEntry);
 }
 
 void

From 27c32b49c3dbfe1e5f57d2b61823bf9474ae0875 Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <paulo@paulo.ac>
Date: Sat, 23 Jun 2018 14:52:23 -0300
Subject: [PATCH 276/382] cifs: Fix validation of signed data in smb3+

Fixes: c713c8770fa5 ("cifs: push rfc1002 generation down the stack")

We failed to validate signed data returned by the server because
__cifs_calc_signature() now expects to sign the actual data in iov but
we were also passing down the rfc1002 length.

Fix smb3_calc_signature() to calculate signature of rfc1002 length prior
to passing only the actual data iov[1-N] to __cifs_calc_signature(). In
addition, there are a few cases where no rfc1002 length is passed so we
make sure there's one (iov_len == 4).

Signed-off-by: Paulo Alcantara <palcantara@suse.de>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2transport.c | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 50592976dcb4..1af46ca5a951 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -408,12 +408,14 @@ generate_smb311signingkey(struct cifs_ses *ses)
 int
 smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 {
-	int rc = 0;
+	int rc;
 	unsigned char smb3_signature[SMB2_CMACAES_SIZE];
 	unsigned char *sigptr = smb3_signature;
 	struct kvec *iov = rqst->rq_iov;
 	struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base;
 	struct cifs_ses *ses;
+	struct shash_desc *shash = &server->secmech.sdesccmacaes->shash;
+	struct smb_rqst drqst;
 
 	ses = smb2_find_smb_ses(server, shdr->SessionId);
 	if (!ses) {
@@ -425,8 +427,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 	memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE);
 
 	rc = crypto_shash_setkey(server->secmech.cmacaes,
-		ses->smb3signingkey, SMB2_CMACAES_SIZE);
-
+				 ses->smb3signingkey, SMB2_CMACAES_SIZE);
 	if (rc) {
 		cifs_dbg(VFS, "%s: Could not set key for cmac aes\n", __func__);
 		return rc;
@@ -437,15 +438,33 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 	 * so unlike smb2 case we do not have to check here if secmech are
 	 * initialized
 	 */
-	rc = crypto_shash_init(&server->secmech.sdesccmacaes->shash);
+	rc = crypto_shash_init(shash);
 	if (rc) {
 		cifs_dbg(VFS, "%s: Could not init cmac aes\n", __func__);
 		return rc;
 	}
 
-	rc = __cifs_calc_signature(rqst, server, sigptr,
-				   &server->secmech.sdesccmacaes->shash);
+	/*
+	 * For SMB2+, __cifs_calc_signature() expects to sign only the actual
+	 * data, that is, iov[0] should not contain a rfc1002 length.
+	 *
+	 * Sign the rfc1002 length prior to passing the data (iov[1-N]) down to
+	 * __cifs_calc_signature().
+	 */
+	drqst = *rqst;
+	if (drqst.rq_nvec >= 2 && iov[0].iov_len == 4) {
+		rc = crypto_shash_update(shash, iov[0].iov_base,
+					 iov[0].iov_len);
+		if (rc) {
+			cifs_dbg(VFS, "%s: Could not update with payload\n",
+				 __func__);
+			return rc;
+		}
+		drqst.rq_iov++;
+		drqst.rq_nvec--;
+	}
 
+	rc = __cifs_calc_signature(&drqst, server, sigptr, shash);
 	if (!rc)
 		memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE);
 

From 8de8c4608fe9edc046c31bf82b2b7ebc1daae015 Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <paulo@paulo.ac>
Date: Sat, 23 Jun 2018 14:52:24 -0300
Subject: [PATCH 277/382] cifs: Fix validation of signed data in smb2

Fixes: c713c8770fa5 ("cifs: push rfc1002 generation down the stack")

We failed to validate signed data returned by the server because
__cifs_calc_signature() now expects to sign the actual data in iov but
we were also passing down the rfc1002 length.

Fix smb3_calc_signature() to calculate signature of rfc1002 length prior
to passing only the actual data iov[1-N] to __cifs_calc_signature(). In
addition, there are a few cases where no rfc1002 length is passed so we
make sure there's one (iov_len == 4).

Signed-off-by: Paulo Alcantara <palcantara@suse.de>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2transport.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 1af46ca5a951..719d55e63d88 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -173,6 +173,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 	struct kvec *iov = rqst->rq_iov;
 	struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base;
 	struct cifs_ses *ses;
+	struct shash_desc *shash = &server->secmech.sdeschmacsha256->shash;
+	struct smb_rqst drqst;
 
 	ses = smb2_find_smb_ses(server, shdr->SessionId);
 	if (!ses) {
@@ -190,21 +192,39 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 	}
 
 	rc = crypto_shash_setkey(server->secmech.hmacsha256,
-		ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE);
+				 ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE);
 	if (rc) {
 		cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
 		return rc;
 	}
 
-	rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash);
+	rc = crypto_shash_init(shash);
 	if (rc) {
 		cifs_dbg(VFS, "%s: Could not init sha256", __func__);
 		return rc;
 	}
 
-	rc = __cifs_calc_signature(rqst, server, sigptr,
-		&server->secmech.sdeschmacsha256->shash);
+	/*
+	 * For SMB2+, __cifs_calc_signature() expects to sign only the actual
+	 * data, that is, iov[0] should not contain a rfc1002 length.
+	 *
+	 * Sign the rfc1002 length prior to passing the data (iov[1-N]) down to
+	 * __cifs_calc_signature().
+	 */
+	drqst = *rqst;
+	if (drqst.rq_nvec >= 2 && iov[0].iov_len == 4) {
+		rc = crypto_shash_update(shash, iov[0].iov_base,
+					 iov[0].iov_len);
+		if (rc) {
+			cifs_dbg(VFS, "%s: Could not update with payload\n",
+				 __func__);
+			return rc;
+		}
+		drqst.rq_iov++;
+		drqst.rq_nvec--;
+	}
 
+	rc = __cifs_calc_signature(&drqst, server, sigptr, shash);
 	if (!rc)
 		memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE);
 

From 81f39f951b8098b1c59b01ad10d06d7dc01c7019 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Thu, 28 Jun 2018 10:47:14 +1000
Subject: [PATCH 278/382] cifs: fix SMB1 breakage

SMB1 mounting broke in commit 35e2cc1ba755
("cifs: Use correct packet length in SMB2_TRANSFORM header")
Fix it and also rename smb2_rqst_len to smb_rqst_len
to make it less unobvious that the function is also called from
CIFS/SMB1

Good job by Paulo reviewing and cleaning up Ronnie's original patch.

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Reviewed-by: Paulo Alcantara <palcantara@suse.de>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2ops.c   | 2 +-
 fs/cifs/smb2proto.h | 4 ++--
 fs/cifs/smbdirect.c | 5 +++--
 fs/cifs/smbdirect.h | 4 ++--
 fs/cifs/transport.c | 9 +++++----
 5 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index e9216ce88796..04578f6e306b 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2522,7 +2522,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq,
 	if (!tr_hdr)
 		goto err_free_iov;
 
-	orig_len = smb2_rqst_len(old_rq, false);
+	orig_len = smb_rqst_len(server, old_rq);
 
 	/* fill the 2nd iov with a transform header */
 	fill_transform_hdr(tr_hdr, orig_len, old_rq);
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 3ae208ac2a77..6e6a4f2ec890 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -113,8 +113,8 @@ extern int smb2_unlock_range(struct cifsFileInfo *cfile,
 extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile);
 extern void smb2_reconnect_server(struct work_struct *work);
 extern int smb3_crypto_aead_allocate(struct TCP_Server_Info *server);
-extern unsigned long
-smb2_rqst_len(struct smb_rqst *rqst, bool skip_rfc1002_marker);
+extern unsigned long smb_rqst_len(struct TCP_Server_Info *server,
+				  struct smb_rqst *rqst);
 
 /*
  * SMB2 Worker functions - most of protocol specific implementation details
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index 6fd94d9ffac2..c55ea4e6201b 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -2083,8 +2083,9 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
  * rqst: the data to write
  * return value: 0 if successfully write, otherwise error code
  */
-int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
+int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst)
 {
+	struct smbd_connection *info = server->smbd_conn;
 	struct kvec vec;
 	int nvecs;
 	int size;
@@ -2118,7 +2119,7 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst)
 	 * rq_tailsz to PAGE_SIZE when the buffer has multiple pages and
 	 * ends at page boundary
 	 */
-	buflen = smb2_rqst_len(rqst, true);
+	buflen = smb_rqst_len(server, rqst);
 
 	if (buflen + sizeof(struct smbd_data_transfer) >
 		info->max_fragmented_send_size) {
diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h
index 1e419c21dc60..a11096254f29 100644
--- a/fs/cifs/smbdirect.h
+++ b/fs/cifs/smbdirect.h
@@ -292,7 +292,7 @@ void smbd_destroy(struct smbd_connection *info);
 
 /* Interface for carrying upper layer I/O through send/recv */
 int smbd_recv(struct smbd_connection *info, struct msghdr *msg);
-int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst);
+int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst);
 
 enum mr_state {
 	MR_READY,
@@ -332,7 +332,7 @@ static inline void *smbd_get_connection(
 static inline int smbd_reconnect(struct TCP_Server_Info *server) {return -1; }
 static inline void smbd_destroy(struct smbd_connection *info) {}
 static inline int smbd_recv(struct smbd_connection *info, struct msghdr *msg) {return -1; }
-static inline int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) {return -1; }
+static inline int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst) {return -1; }
 #endif
 
 #endif
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 208ecb830466..a341ec839c83 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -218,14 +218,15 @@ smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg,
 }
 
 unsigned long
-smb2_rqst_len(struct smb_rqst *rqst, bool skip_rfc1002_marker)
+smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst)
 {
 	unsigned int i;
 	struct kvec *iov;
 	int nvec;
 	unsigned long buflen = 0;
 
-	if (skip_rfc1002_marker && rqst->rq_iov[0].iov_len == 4) {
+	if (server->vals->header_preamble_size == 0 &&
+	    rqst->rq_nvec >= 2 && rqst->rq_iov[0].iov_len == 4) {
 		iov = &rqst->rq_iov[1];
 		nvec = rqst->rq_nvec - 1;
 	} else {
@@ -276,7 +277,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
 	__be32 rfc1002_marker;
 
 	if (cifs_rdma_enabled(server) && server->smbd_conn) {
-		rc = smbd_send(server->smbd_conn, rqst);
+		rc = smbd_send(server, rqst);
 		goto smbd_done;
 	}
 	if (ssocket == NULL)
@@ -287,7 +288,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
 				(char *)&val, sizeof(val));
 
 	for (j = 0; j < num_rqst; j++)
-		send_length += smb2_rqst_len(&rqst[j], true);
+		send_length += smb_rqst_len(server, &rqst[j]);
 	rfc1002_marker = cpu_to_be32(send_length);
 
 	/* Generate a rfc1002 marker for SMB2+ */

From 6aa0c114eceec8cc61715f74a4ce91b048d7561c Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <paulo@paulo.ac>
Date: Wed, 4 Jul 2018 14:16:16 -0300
Subject: [PATCH 279/382] cifs: Fix memory leak in smb2_set_ea()

This patch fixes a memory leak when doing a setxattr(2) in SMB2+.

Signed-off-by: Paulo Alcantara <palcantara@suse.de>
Cc: stable@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
---
 fs/cifs/smb2ops.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 04578f6e306b..1d5985bd760b 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -856,6 +856,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
 
 	rc = SMB2_set_ea(xid, tcon, fid.persistent_fid, fid.volatile_fid, ea,
 			 len);
+	kfree(ea);
+
 	SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
 
 	return rc;

From f46ecbd97f508e68a7806291a139499794874f3d Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Thu, 5 Jul 2018 11:46:42 +0200
Subject: [PATCH 280/382] cifs: Fix slab-out-of-bounds in send_set_info() on
 SMB2 ACE setting

A "small" CIFS buffer is not big enough in general to hold a
setacl request for SMB2, and we end up overflowing the buffer in
send_set_info(). For instance:

 # mount.cifs //127.0.0.1/test /mnt/test -o username=test,password=test,nounix,cifsacl
 # touch /mnt/test/acltest
 # getcifsacl /mnt/test/acltest
 REVISION:0x1
 CONTROL:0x9004
 OWNER:S-1-5-21-2926364953-924364008-418108241-1000
 GROUP:S-1-22-2-1001
 ACL:S-1-5-21-2926364953-924364008-418108241-1000:ALLOWED/0x0/0x1e01ff
 ACL:S-1-22-2-1001:ALLOWED/0x0/R
 ACL:S-1-22-2-1001:ALLOWED/0x0/R
 ACL:S-1-5-21-2926364953-924364008-418108241-1000:ALLOWED/0x0/0x1e01ff
 ACL:S-1-1-0:ALLOWED/0x0/R
 # setcifsacl -a "ACL:S-1-22-2-1004:ALLOWED/0x0/R" /mnt/test/acltest

this setacl will cause the following KASAN splat:

[  330.777927] BUG: KASAN: slab-out-of-bounds in send_set_info+0x4dd/0xc20 [cifs]
[  330.779696] Write of size 696 at addr ffff88010d5e2860 by task setcifsacl/1012

[  330.781882] CPU: 1 PID: 1012 Comm: setcifsacl Not tainted 4.18.0-rc2+ #2
[  330.783140] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
[  330.784395] Call Trace:
[  330.784789]  dump_stack+0xc2/0x16b
[  330.786777]  print_address_description+0x6a/0x270
[  330.787520]  kasan_report+0x258/0x380
[  330.788845]  memcpy+0x34/0x50
[  330.789369]  send_set_info+0x4dd/0xc20 [cifs]
[  330.799511]  SMB2_set_acl+0x76/0xa0 [cifs]
[  330.801395]  set_smb2_acl+0x7ac/0xf30 [cifs]
[  330.830888]  cifs_xattr_set+0x963/0xe40 [cifs]
[  330.840367]  __vfs_setxattr+0x84/0xb0
[  330.842060]  __vfs_setxattr_noperm+0xe6/0x370
[  330.843848]  vfs_setxattr+0xc2/0xd0
[  330.845519]  setxattr+0x258/0x320
[  330.859211]  path_setxattr+0x15b/0x1b0
[  330.864392]  __x64_sys_setxattr+0xc0/0x160
[  330.866133]  do_syscall_64+0x14e/0x4b0
[  330.876631]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  330.878503] RIP: 0033:0x7ff2e507db0a
[  330.880151] Code: 48 8b 0d 89 93 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 bc 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 56 93 2c 00 f7 d8 64 89 01 48
[  330.885358] RSP: 002b:00007ffdc4903c18 EFLAGS: 00000246 ORIG_RAX: 00000000000000bc
[  330.887733] RAX: ffffffffffffffda RBX: 000055d1170de140 RCX: 00007ff2e507db0a
[  330.890067] RDX: 000055d1170de7d0 RSI: 000055d115b39184 RDI: 00007ffdc4904818
[  330.892410] RBP: 0000000000000001 R08: 0000000000000000 R09: 000055d1170de7e4
[  330.894785] R10: 00000000000002b8 R11: 0000000000000246 R12: 0000000000000007
[  330.897148] R13: 000055d1170de0c0 R14: 0000000000000008 R15: 000055d1170de550

[  330.901057] Allocated by task 1012:
[  330.902888]  kasan_kmalloc+0xa0/0xd0
[  330.904714]  kmem_cache_alloc+0xc8/0x1d0
[  330.906615]  mempool_alloc+0x11e/0x380
[  330.908496]  cifs_small_buf_get+0x35/0x60 [cifs]
[  330.910510]  smb2_plain_req_init+0x4a/0xd60 [cifs]
[  330.912551]  send_set_info+0x198/0xc20 [cifs]
[  330.914535]  SMB2_set_acl+0x76/0xa0 [cifs]
[  330.916465]  set_smb2_acl+0x7ac/0xf30 [cifs]
[  330.918453]  cifs_xattr_set+0x963/0xe40 [cifs]
[  330.920426]  __vfs_setxattr+0x84/0xb0
[  330.922284]  __vfs_setxattr_noperm+0xe6/0x370
[  330.924213]  vfs_setxattr+0xc2/0xd0
[  330.926008]  setxattr+0x258/0x320
[  330.927762]  path_setxattr+0x15b/0x1b0
[  330.929592]  __x64_sys_setxattr+0xc0/0x160
[  330.931459]  do_syscall_64+0x14e/0x4b0
[  330.933314]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

[  330.936843] Freed by task 0:
[  330.938588] (stack is not available)

[  330.941886] The buggy address belongs to the object at ffff88010d5e2800
 which belongs to the cache cifs_small_rq of size 448
[  330.946362] The buggy address is located 96 bytes inside of
 448-byte region [ffff88010d5e2800, ffff88010d5e29c0)
[  330.950722] The buggy address belongs to the page:
[  330.952789] page:ffffea0004357880 count:1 mapcount:0 mapping:ffff880108fdca80 index:0x0 compound_mapcount: 0
[  330.955665] flags: 0x17ffffc0008100(slab|head)
[  330.957760] raw: 0017ffffc0008100 dead000000000100 dead000000000200 ffff880108fdca80
[  330.960356] raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000
[  330.963005] page dumped because: kasan: bad access detected

[  330.967039] Memory state around the buggy address:
[  330.969255]  ffff88010d5e2880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[  330.971833]  ffff88010d5e2900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[  330.974397] >ffff88010d5e2980: 00 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc
[  330.976956]                                            ^
[  330.979226]  ffff88010d5e2a00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  330.981755]  ffff88010d5e2a80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  330.984225] ==================================================================

Fix this by allocating a regular CIFS buffer in
smb2_plain_req_init() if the request command is SMB2_SET_INFO.

Reported-by: Jianhong Yin <jiyin@redhat.com>
Fixes: 366ed846df60 ("cifs: Use smb 2 - 3 and cifsacl mount options setacl function")
CC: Stable <stable@vger.kernel.org>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-and-tested-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2pdu.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 810b85787c91..dbfb83ae6137 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -340,7 +340,10 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon,
 		return rc;
 
 	/* BB eventually switch this to SMB2 specific small buf size */
-	*request_buf = cifs_small_buf_get();
+	if (smb2_command == SMB2_SET_INFO)
+		*request_buf = cifs_buf_get();
+	else
+		*request_buf = cifs_small_buf_get();
 	if (*request_buf == NULL) {
 		/* BB should we add a retry in here if not a writepage? */
 		return -ENOMEM;
@@ -3720,7 +3723,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
 
 	rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags,
 			    &rsp_iov);
-	cifs_small_buf_release(req);
+	cifs_buf_release(req);
 	rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base;
 
 	if (rc != 0) {

From 7ffbe65578b44fafdef577a360eb0583929f7c6e Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <paulo@paulo.ac>
Date: Thu, 5 Jul 2018 13:46:34 -0300
Subject: [PATCH 281/382] cifs: Fix infinite loop when using hard mount option

For every request we send, whether it is SMB1 or SMB2+, we attempt to
reconnect tcon (cifs_reconnect_tcon or smb2_reconnect) before carrying
out the request.

So, while server->tcpStatus != CifsNeedReconnect, we wait for the
reconnection to succeed on wait_event_interruptible_timeout(). If it
returns, that means that either the condition was evaluated to true, or
timeout elapsed, or it was interrupted by a signal.

Since we're not handling the case where the process woke up due to a
received signal (-ERESTARTSYS), the next call to
wait_event_interruptible_timeout() will _always_ fail and we end up
looping forever inside either cifs_reconnect_tcon() or smb2_reconnect().

Here's an example of how to trigger that:

$ mount.cifs //foo/share /mnt/test -o
username=foo,password=foo,vers=1.0,hard

(break connection to server before executing bellow cmd)
$ stat -f /mnt/test & sleep 140
[1] 2511

$ ps -aux -q 2511
USER       PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
root      2511  0.0  0.0  12892  1008 pts/0    S    12:24   0:00 stat -f
/mnt/test

$ kill -9 2511

(wait for a while; process is stuck in the kernel)
$ ps -aux -q 2511
USER       PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
root      2511 83.2  0.0  12892  1008 pts/0    R    12:24  30:01 stat -f
/mnt/test

By using 'hard' mount point means that cifs.ko will keep retrying
indefinitely, however we must allow the process to be killed otherwise
it would hang the system.

Signed-off-by: Paulo Alcantara <palcantara@suse.de>
Cc: stable@vger.kernel.org
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifssmb.c | 10 ++++++++--
 fs/cifs/smb2pdu.c | 18 ++++++++++++------
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index d352da325de3..93408eab92e7 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -157,8 +157,14 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command)
 	 * greater than cifs socket timeout which is 7 seconds
 	 */
 	while (server->tcpStatus == CifsNeedReconnect) {
-		wait_event_interruptible_timeout(server->response_q,
-			(server->tcpStatus != CifsNeedReconnect), 10 * HZ);
+		rc = wait_event_interruptible_timeout(server->response_q,
+						      (server->tcpStatus != CifsNeedReconnect),
+						      10 * HZ);
+		if (rc < 0) {
+			cifs_dbg(FYI, "%s: aborting reconnect due to a received"
+				 " signal by the process\n", __func__);
+			return -ERESTARTSYS;
+		}
 
 		/* are we still trying to reconnect? */
 		if (server->tcpStatus != CifsNeedReconnect)
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index dbfb83ae6137..e31375ecaa6f 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -155,7 +155,7 @@ smb2_hdr_assemble(struct smb2_sync_hdr *shdr, __le16 smb2_cmd,
 static int
 smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
 {
-	int rc = 0;
+	int rc;
 	struct nls_table *nls_codepage;
 	struct cifs_ses *ses;
 	struct TCP_Server_Info *server;
@@ -166,10 +166,10 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
 	 * for those three - in the calling routine.
 	 */
 	if (tcon == NULL)
-		return rc;
+		return 0;
 
 	if (smb2_command == SMB2_TREE_CONNECT)
-		return rc;
+		return 0;
 
 	if (tcon->tidStatus == CifsExiting) {
 		/*
@@ -212,8 +212,14 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
 			return -EAGAIN;
 		}
 
-		wait_event_interruptible_timeout(server->response_q,
-			(server->tcpStatus != CifsNeedReconnect), 10 * HZ);
+		rc = wait_event_interruptible_timeout(server->response_q,
+						      (server->tcpStatus != CifsNeedReconnect),
+						      10 * HZ);
+		if (rc < 0) {
+			cifs_dbg(FYI, "%s: aborting reconnect due to a received"
+				 " signal by the process\n", __func__);
+			return -ERESTARTSYS;
+		}
 
 		/* are we still trying to reconnect? */
 		if (server->tcpStatus != CifsNeedReconnect)
@@ -231,7 +237,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
 	}
 
 	if (!tcon->ses->need_reconnect && !tcon->need_reconnect)
-		return rc;
+		return 0;
 
 	nls_codepage = load_nls_default();
 

From 729c0c9dd55204f0c9a823ac8a7bfa83d36c7e78 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Thu, 5 Jul 2018 15:10:02 +0200
Subject: [PATCH 282/382] cifs: Fix stack out-of-bounds in
 smb{2,3}_create_lease_buf()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

smb{2,3}_create_lease_buf() store a lease key in the lease
context for later usage on a lease break.

In most paths, the key is currently sourced from data that
happens to be on the stack near local variables for oplock in
SMB2_open() callers, e.g. from open_shroot(), whereas
smb2_open_file() properly allocates space on its stack for it.

The address of those local variables holding the oplock is then
passed to create_lease_buf handlers via SMB2_open(), and 16
bytes near oplock are used. This causes a stack out-of-bounds
access as reported by KASAN on SMB2.1 and SMB3 mounts (first
out-of-bounds access is shown here):

[  111.528823] BUG: KASAN: stack-out-of-bounds in smb3_create_lease_buf+0x399/0x3b0 [cifs]
[  111.530815] Read of size 8 at addr ffff88010829f249 by task mount.cifs/985
[  111.532838] CPU: 3 PID: 985 Comm: mount.cifs Not tainted 4.18.0-rc3+ #91
[  111.534656] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
[  111.536838] Call Trace:
[  111.537528]  dump_stack+0xc2/0x16b
[  111.540890]  print_address_description+0x6a/0x270
[  111.542185]  kasan_report+0x258/0x380
[  111.544701]  smb3_create_lease_buf+0x399/0x3b0 [cifs]
[  111.546134]  SMB2_open+0x1ef8/0x4b70 [cifs]
[  111.575883]  open_shroot+0x339/0x550 [cifs]
[  111.591969]  smb3_qfs_tcon+0x32c/0x1e60 [cifs]
[  111.617405]  cifs_mount+0x4f3/0x2fc0 [cifs]
[  111.674332]  cifs_smb3_do_mount+0x263/0xf10 [cifs]
[  111.677915]  mount_fs+0x55/0x2b0
[  111.679504]  vfs_kern_mount.part.22+0xaa/0x430
[  111.684511]  do_mount+0xc40/0x2660
[  111.698301]  ksys_mount+0x80/0xd0
[  111.701541]  do_syscall_64+0x14e/0x4b0
[  111.711807]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  111.713665] RIP: 0033:0x7f372385b5fa
[  111.715311] Code: 48 8b 0d 99 78 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 66 78 2c 00 f7 d8 64 89 01 48
[  111.720330] RSP: 002b:00007ffff27049d8 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5
[  111.722601] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f372385b5fa
[  111.724842] RDX: 000055c2ecdc73b2 RSI: 000055c2ecdc73f9 RDI: 00007ffff270580f
[  111.727083] RBP: 00007ffff2705804 R08: 000055c2ee976060 R09: 0000000000001000
[  111.729319] R10: 0000000000000000 R11: 0000000000000206 R12: 00007f3723f4d000
[  111.731615] R13: 000055c2ee976060 R14: 00007f3723f4f90f R15: 0000000000000000

[  111.735448] The buggy address belongs to the page:
[  111.737420] page:ffffea000420a7c0 count:0 mapcount:0 mapping:0000000000000000 index:0x0
[  111.739890] flags: 0x17ffffc0000000()
[  111.741750] raw: 0017ffffc0000000 0000000000000000 dead000000000200 0000000000000000
[  111.744216] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
[  111.746679] page dumped because: kasan: bad access detected

[  111.750482] Memory state around the buggy address:
[  111.752562]  ffff88010829f100: 00 f2 f2 f2 f2 f2 f2 f2 00 00 00 00 00 00 00 00
[  111.754991]  ffff88010829f180: 00 00 f2 f2 00 00 00 00 00 00 00 00 00 00 00 00
[  111.757401] >ffff88010829f200: 00 00 00 00 00 f1 f1 f1 f1 01 f2 f2 f2 f2 f2 f2
[  111.759801]                                               ^
[  111.762034]  ffff88010829f280: f2 02 f2 f2 f2 f2 f2 f2 f2 00 00 00 00 00 00 00
[  111.764486]  ffff88010829f300: f2 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[  111.766913] ==================================================================

Lease keys are however already generated and stored in fid data
on open and create paths: pass them down to the lease context
creation handlers and use them.

Suggested-by: Aurélien Aptel <aaptel@suse.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Fixes: b8c32dbb0deb ("CIFS: Request SMB2.1 leases")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsglob.h |  2 +-
 fs/cifs/smb2file.c | 11 ++++-------
 fs/cifs/smb2ops.c  |  9 +++------
 fs/cifs/smb2pdu.c  |  7 ++++---
 fs/cifs/smb2pdu.h  |  6 ++----
 5 files changed, 14 insertions(+), 21 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a2962fd41c6f..c923c7854027 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -423,7 +423,7 @@ struct smb_version_operations {
 	void (*set_oplock_level)(struct cifsInodeInfo *, __u32, unsigned int,
 				 bool *);
 	/* create lease context buffer for CREATE request */
-	char * (*create_lease_buf)(u8 *, u8);
+	char * (*create_lease_buf)(u8 *lease_key, u8 oplock);
 	/* parse lease context buffer and return oplock/epoch info */
 	__u8 (*parse_lease_buf)(void *buf, unsigned int *epoch, char *lkey);
 	ssize_t (*copychunk_range)(const unsigned int,
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 788412675723..4ed10dd086e6 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -41,7 +41,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
 	int rc;
 	__le16 *smb2_path;
 	struct smb2_file_all_info *smb2_data = NULL;
-	__u8 smb2_oplock[17];
+	__u8 smb2_oplock;
 	struct cifs_fid *fid = oparms->fid;
 	struct network_resiliency_req nr_ioctl_req;
 
@@ -59,12 +59,9 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
 	}
 
 	oparms->desired_access |= FILE_READ_ATTRIBUTES;
-	*smb2_oplock = SMB2_OPLOCK_LEVEL_BATCH;
+	smb2_oplock = SMB2_OPLOCK_LEVEL_BATCH;
 
-	if (oparms->tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING)
-		memcpy(smb2_oplock + 1, fid->lease_key, SMB2_LEASE_KEY_SIZE);
-
-	rc = SMB2_open(xid, oparms, smb2_path, smb2_oplock, smb2_data, NULL,
+	rc = SMB2_open(xid, oparms, smb2_path, &smb2_oplock, smb2_data, NULL,
 		       NULL);
 	if (rc)
 		goto out;
@@ -101,7 +98,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
 		move_smb2_info_to_cifs(buf, smb2_data);
 	}
 
-	*oplock = *smb2_oplock;
+	*oplock = smb2_oplock;
 out:
 	kfree(smb2_data);
 	kfree(smb2_path);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 1d5985bd760b..ea92a38b2f08 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2222,8 +2222,7 @@ smb2_create_lease_buf(u8 *lease_key, u8 oplock)
 	if (!buf)
 		return NULL;
 
-	buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key));
-	buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8)));
+	memcpy(&buf->lcontext.LeaseKey, lease_key, SMB2_LEASE_KEY_SIZE);
 	buf->lcontext.LeaseState = map_oplock_to_lease(oplock);
 
 	buf->ccontext.DataOffset = cpu_to_le16(offsetof
@@ -2249,8 +2248,7 @@ smb3_create_lease_buf(u8 *lease_key, u8 oplock)
 	if (!buf)
 		return NULL;
 
-	buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key));
-	buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8)));
+	memcpy(&buf->lcontext.LeaseKey, lease_key, SMB2_LEASE_KEY_SIZE);
 	buf->lcontext.LeaseState = map_oplock_to_lease(oplock);
 
 	buf->ccontext.DataOffset = cpu_to_le16(offsetof
@@ -2287,8 +2285,7 @@ smb3_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key)
 	if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS)
 		return SMB2_OPLOCK_LEVEL_NOCHANGE;
 	if (lease_key)
-		memcpy(lease_key, &lc->lcontext.LeaseKeyLow,
-		       SMB2_LEASE_KEY_SIZE);
+		memcpy(lease_key, &lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE);
 	return le32_to_cpu(lc->lcontext.LeaseState);
 }
 
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index e31375ecaa6f..3c92678cb45b 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1716,12 +1716,12 @@ parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp,
 
 static int
 add_lease_context(struct TCP_Server_Info *server, struct kvec *iov,
-		  unsigned int *num_iovec, __u8 *oplock)
+		  unsigned int *num_iovec, u8 *lease_key, __u8 *oplock)
 {
 	struct smb2_create_req *req = iov[0].iov_base;
 	unsigned int num = *num_iovec;
 
-	iov[num].iov_base = server->ops->create_lease_buf(oplock+1, *oplock);
+	iov[num].iov_base = server->ops->create_lease_buf(lease_key, *oplock);
 	if (iov[num].iov_base == NULL)
 		return -ENOMEM;
 	iov[num].iov_len = server->vals->create_lease_size;
@@ -2181,7 +2181,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 	    *oplock == SMB2_OPLOCK_LEVEL_NONE)
 		req->RequestedOplockLevel = *oplock;
 	else {
-		rc = add_lease_context(server, iov, &n_iov, oplock);
+		rc = add_lease_context(server, iov, &n_iov,
+				       oparms->fid->lease_key, oplock);
 		if (rc) {
 			cifs_small_buf_release(req);
 			kfree(copy_path);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 824dddeee3f2..a671adcc44a6 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -678,16 +678,14 @@ struct create_context {
 #define SMB2_LEASE_KEY_SIZE 16
 
 struct lease_context {
-	__le64 LeaseKeyLow;
-	__le64 LeaseKeyHigh;
+	u8 LeaseKey[SMB2_LEASE_KEY_SIZE];
 	__le32 LeaseState;
 	__le32 LeaseFlags;
 	__le64 LeaseDuration;
 } __packed;
 
 struct lease_context_v2 {
-	__le64 LeaseKeyLow;
-	__le64 LeaseKeyHigh;
+	u8 LeaseKey[SMB2_LEASE_KEY_SIZE];
 	__le32 LeaseState;
 	__le32 LeaseFlags;
 	__le64 LeaseDuration;

From 7ec916f82c48dcfc115eee2e3e0e6d400e310fc5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 5 Jul 2018 13:29:55 -0600
Subject: [PATCH 283/382] Revert "iommu/intel-iommu: Enable
 CONFIG_DMA_DIRECT_OPS=y and clean up intel_{alloc,free}_coherent()"

This commit may cause a less than required dma mask to be used for
some allocations, which apparently leads to module load failures for
iwlwifi sometimes.

This reverts commit d657c5c73ca987214a6f9436e435b34fc60f332a.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Fabio Coatti <fabio.coatti@gmail.com>
Tested-by: Fabio Coatti <fabio.coatti@gmail.com>
---
 drivers/iommu/Kconfig       |  1 -
 drivers/iommu/intel-iommu.c | 62 +++++++++++++++++++++++++++----------
 2 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index e055d228bfb9..689ffe538370 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -142,7 +142,6 @@ config DMAR_TABLE
 config INTEL_IOMMU
 	bool "Support for Intel IOMMU using DMA Remapping Devices"
 	depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC)
-	select DMA_DIRECT_OPS
 	select IOMMU_API
 	select IOMMU_IOVA
 	select NEED_DMA_MAP_STATE
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 14e4b3722428..b344a883f116 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -31,7 +31,6 @@
 #include <linux/pci.h>
 #include <linux/dmar.h>
 #include <linux/dma-mapping.h>
-#include <linux/dma-direct.h>
 #include <linux/mempool.h>
 #include <linux/memory.h>
 #include <linux/cpu.h>
@@ -3713,30 +3712,61 @@ static void *intel_alloc_coherent(struct device *dev, size_t size,
 				  dma_addr_t *dma_handle, gfp_t flags,
 				  unsigned long attrs)
 {
-	void *vaddr;
+	struct page *page = NULL;
+	int order;
 
-	vaddr = dma_direct_alloc(dev, size, dma_handle, flags, attrs);
-	if (iommu_no_mapping(dev) || !vaddr)
-		return vaddr;
+	size = PAGE_ALIGN(size);
+	order = get_order(size);
 
-	*dma_handle = __intel_map_single(dev, virt_to_phys(vaddr),
-			PAGE_ALIGN(size), DMA_BIDIRECTIONAL,
-			dev->coherent_dma_mask);
-	if (!*dma_handle)
-		goto out_free_pages;
-	return vaddr;
+	if (!iommu_no_mapping(dev))
+		flags &= ~(GFP_DMA | GFP_DMA32);
+	else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
+		if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
+			flags |= GFP_DMA;
+		else
+			flags |= GFP_DMA32;
+	}
+
+	if (gfpflags_allow_blocking(flags)) {
+		unsigned int count = size >> PAGE_SHIFT;
+
+		page = dma_alloc_from_contiguous(dev, count, order, flags);
+		if (page && iommu_no_mapping(dev) &&
+		    page_to_phys(page) + size > dev->coherent_dma_mask) {
+			dma_release_from_contiguous(dev, page, count);
+			page = NULL;
+		}
+	}
+
+	if (!page)
+		page = alloc_pages(flags, order);
+	if (!page)
+		return NULL;
+	memset(page_address(page), 0, size);
+
+	*dma_handle = __intel_map_single(dev, page_to_phys(page), size,
+					 DMA_BIDIRECTIONAL,
+					 dev->coherent_dma_mask);
+	if (*dma_handle)
+		return page_address(page);
+	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
+		__free_pages(page, order);
 
-out_free_pages:
-	dma_direct_free(dev, size, vaddr, *dma_handle, attrs);
 	return NULL;
 }
 
 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
 				dma_addr_t dma_handle, unsigned long attrs)
 {
-	if (!iommu_no_mapping(dev))
-		intel_unmap(dev, dma_handle, PAGE_ALIGN(size));
-	dma_direct_free(dev, size, vaddr, dma_handle, attrs);
+	int order;
+	struct page *page = virt_to_page(vaddr);
+
+	size = PAGE_ALIGN(size);
+	order = get_order(size);
+
+	intel_unmap(dev, dma_handle, size);
+	if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
+		__free_pages(page, order);
 }
 
 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,

From 0fa3ecd87848c9c93c2c828ef4c3a8ca36ce46c7 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 3 Jul 2018 17:10:19 -0700
Subject: [PATCH 284/382] Fix up non-directory creation in SGID directories

sgid directories have special semantics, making newly created files in
the directory belong to the group of the directory, and newly created
subdirectories will also become sgid.  This is historically used for
group-shared directories.

But group directories writable by non-group members should not imply
that such non-group members can magically join the group, so make sure
to clear the sgid bit on non-directories for non-members (but remember
that sgid without group execute means "mandatory locking", just to
confuse things even more).

Reported-by: Jann Horn <jannh@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/inode.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/inode.c b/fs/inode.c
index 2c300e981796..8c86c809ca17 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1999,8 +1999,14 @@ void inode_init_owner(struct inode *inode, const struct inode *dir,
 	inode->i_uid = current_fsuid();
 	if (dir && dir->i_mode & S_ISGID) {
 		inode->i_gid = dir->i_gid;
+
+		/* Directories are special, and always inherit S_ISGID */
 		if (S_ISDIR(mode))
 			mode |= S_ISGID;
+		else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
+			 !in_group_p(inode->i_gid) &&
+			 !capable_wrt_inode_uidgid(dir, CAP_FSETID))
+			mode &= ~S_ISGID;
 	} else
 		inode->i_gid = current_fsgid();
 	inode->i_mode = mode;

From 523402fa9101090c91d2033b7ebdfdcf65880488 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Thu, 5 Jul 2018 14:37:52 -0700
Subject: [PATCH 285/382] MIPS: Fix ioremap() RAM check

We currently attempt to check whether a physical address range provided
to __ioremap() may be in use by the page allocator by examining the
value of PageReserved for each page in the region - lowmem pages not
marked reserved are presumed to be in use by the page allocator, and
requests to ioremap them fail.

The way we check this has been broken since commit 92923ca3aace ("mm:
meminit: only set page reserved in the memblock region"), because
memblock will typically not have any knowledge of non-RAM pages and
therefore those pages will not have the PageReserved flag set. Thus when
we attempt to ioremap a region outside of RAM we incorrectly fail
believing that the region is RAM that may be in use.

In most cases ioremap() on MIPS will take a fast-path to use the
unmapped kseg1 or xkphys virtual address spaces and never hit this path,
so the only way to hit it is for a MIPS32 system to attempt to ioremap()
an address range in lowmem with flags other than _CACHE_UNCACHED.
Perhaps the most straightforward way to do this is using
ioremap_uncached_accelerated(), which is how the problem was discovered.

Fix this by making use of walk_system_ram_range() to test the address
range provided to __ioremap() against only RAM pages, rather than all
lowmem pages. This means that if we have a lowmem I/O region, which is
very common for MIPS systems, we're free to ioremap() address ranges
within it. A nice bonus is that the test is no longer limited to lowmem.

The approach here matches the way x86 performed the same test after
commit c81c8a1eeede ("x86, ioremap: Speed up check for RAM pages") until
x86 moved towards a slightly more complicated check using walk_mem_res()
for unrelated reasons with commit 0e4c12b45aa8 ("x86/mm, resource: Use
PAGE_KERNEL protection for ioremap of memory pages").

Signed-off-by: Paul Burton <paul.burton@mips.com>
Reported-by: Serge Semin <fancer.lancer@gmail.com>
Tested-by: Serge Semin <fancer.lancer@gmail.com>
Fixes: 92923ca3aace ("mm: meminit: only set page reserved in the memblock region")
Cc: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # v4.2+
Patchwork: https://patchwork.linux-mips.org/patch/19786/
---
 arch/mips/mm/ioremap.c | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c
index 1986e09fb457..1601d90b087b 100644
--- a/arch/mips/mm/ioremap.c
+++ b/arch/mips/mm/ioremap.c
@@ -9,6 +9,7 @@
 #include <linux/export.h>
 #include <asm/addrspace.h>
 #include <asm/byteorder.h>
+#include <linux/ioport.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -98,6 +99,20 @@ static int remap_area_pages(unsigned long address, phys_addr_t phys_addr,
 	return error;
 }
 
+static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages,
+			       void *arg)
+{
+	unsigned long i;
+
+	for (i = 0; i < nr_pages; i++) {
+		if (pfn_valid(start_pfn + i) &&
+		    !PageReserved(pfn_to_page(start_pfn + i)))
+			return 1;
+	}
+
+	return 0;
+}
+
 /*
  * Generic mapping function (not visible outside):
  */
@@ -116,8 +131,8 @@ static int remap_area_pages(unsigned long address, phys_addr_t phys_addr,
 
 void __iomem * __ioremap(phys_addr_t phys_addr, phys_addr_t size, unsigned long flags)
 {
+	unsigned long offset, pfn, last_pfn;
 	struct vm_struct * area;
-	unsigned long offset;
 	phys_addr_t last_addr;
 	void * addr;
 
@@ -137,18 +152,16 @@ void __iomem * __ioremap(phys_addr_t phys_addr, phys_addr_t size, unsigned long
 		return (void __iomem *) CKSEG1ADDR(phys_addr);
 
 	/*
-	 * Don't allow anybody to remap normal RAM that we're using..
+	 * Don't allow anybody to remap RAM that may be allocated by the page
+	 * allocator, since that could lead to races & data clobbering.
 	 */
-	if (phys_addr < virt_to_phys(high_memory)) {
-		char *t_addr, *t_end;
-		struct page *page;
-
-		t_addr = __va(phys_addr);
-		t_end = t_addr + (size - 1);
-
-		for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
-			if(!PageReserved(page))
-				return NULL;
+	pfn = PFN_DOWN(phys_addr);
+	last_pfn = PFN_DOWN(last_addr);
+	if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL,
+				  __ioremap_check_ram) == 1) {
+		WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n",
+			  &phys_addr, &last_addr);
+		return NULL;
 	}
 
 	/*

From 33cc2c9667561b224215e6dfb5bf98e8fa17914e Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 5 Jul 2018 14:58:49 -0700
Subject: [PATCH 286/382] acpi, nfit: Fix scrub idle detection

The notification of scrub completion happens within the scrub workqueue.
That can clearly race someone running scrub_show() and work_busy()
before the workqueue has a chance to flush the recently completed work.
Add a flag to reliably indicate the idle vs busy state. Without this
change applications using poll(2) to wait for scrub-completion may
falsely wakeup and read ARS as being busy even though the thread is
going idle and then hang indefinitely.

Fixes: bc6ba8085842 ("nfit, address-range-scrub: rework and simplify ARS...")
Cc: <stable@vger.kernel.org>
Reported-by: Vishal Verma <vishal.l.verma@intel.com>
Tested-by: Vishal Verma <vishal.l.verma@intel.com>
Reported-by: Lukasz Dorau <lukasz.dorau@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 44 ++++++++++++++++++++++++++++++----------
 drivers/acpi/nfit/nfit.h |  1 +
 2 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 471402cee1f1..b8040fed2a69 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1275,7 +1275,7 @@ static ssize_t scrub_show(struct device *dev,
 
 		mutex_lock(&acpi_desc->init_mutex);
 		rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
-				work_busy(&acpi_desc->dwork.work)
+				acpi_desc->scrub_busy
 				&& !acpi_desc->cancel ? "+\n" : "\n");
 		mutex_unlock(&acpi_desc->init_mutex);
 	}
@@ -2941,6 +2941,32 @@ static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc,
 	return 0;
 }
 
+static void __sched_ars(struct acpi_nfit_desc *acpi_desc, unsigned int tmo)
+{
+	lockdep_assert_held(&acpi_desc->init_mutex);
+
+	acpi_desc->scrub_busy = 1;
+	/* note this should only be set from within the workqueue */
+	if (tmo)
+		acpi_desc->scrub_tmo = tmo;
+	queue_delayed_work(nfit_wq, &acpi_desc->dwork, tmo * HZ);
+}
+
+static void sched_ars(struct acpi_nfit_desc *acpi_desc)
+{
+	__sched_ars(acpi_desc, 0);
+}
+
+static void notify_ars_done(struct acpi_nfit_desc *acpi_desc)
+{
+	lockdep_assert_held(&acpi_desc->init_mutex);
+
+	acpi_desc->scrub_busy = 0;
+	acpi_desc->scrub_count++;
+	if (acpi_desc->scrub_count_state)
+		sysfs_notify_dirent(acpi_desc->scrub_count_state);
+}
+
 static void acpi_nfit_scrub(struct work_struct *work)
 {
 	struct acpi_nfit_desc *acpi_desc;
@@ -2951,14 +2977,10 @@ static void acpi_nfit_scrub(struct work_struct *work)
 	mutex_lock(&acpi_desc->init_mutex);
 	query_rc = acpi_nfit_query_poison(acpi_desc);
 	tmo = __acpi_nfit_scrub(acpi_desc, query_rc);
-	if (tmo) {
-		queue_delayed_work(nfit_wq, &acpi_desc->dwork, tmo * HZ);
-		acpi_desc->scrub_tmo = tmo;
-	} else {
-		acpi_desc->scrub_count++;
-		if (acpi_desc->scrub_count_state)
-			sysfs_notify_dirent(acpi_desc->scrub_count_state);
-	}
+	if (tmo)
+		__sched_ars(acpi_desc, tmo);
+	else
+		notify_ars_done(acpi_desc);
 	memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
 	mutex_unlock(&acpi_desc->init_mutex);
 }
@@ -3039,7 +3061,7 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
 			break;
 		}
 
-	queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
+	sched_ars(acpi_desc);
 	return 0;
 }
 
@@ -3241,7 +3263,7 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
 		}
 	}
 	if (scheduled) {
-		queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
+		sched_ars(acpi_desc);
 		dev_dbg(dev, "ars_scan triggered\n");
 	}
 	mutex_unlock(&acpi_desc->init_mutex);
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 7d15856a739f..a97ff42fe311 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -203,6 +203,7 @@ struct acpi_nfit_desc {
 	unsigned int max_ars;
 	unsigned int scrub_count;
 	unsigned int scrub_mode;
+	unsigned int scrub_busy:1;
 	unsigned int cancel:1;
 	unsigned long dimm_cmd_force_en;
 	unsigned long bus_cmd_force_en;

From 01b3cdfca263a17554f7b249d20a247b2a751521 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 4 Jul 2018 17:02:16 +0200
Subject: [PATCH 287/382] USB: serial: keyspan_pda: fix modem-status error
 handling

Fix broken modem-status error handling which could lead to bits of slab
data leaking to user space.

Fixes: 3b36a8fd6777 ("usb: fix uninitialized variable warning in keyspan_pda")
Cc: stable <stable@vger.kernel.org>     # 2.6.27
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/keyspan_pda.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index 5169624d8b11..38d43c4b7ce5 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -369,8 +369,10 @@ static int keyspan_pda_get_modem_info(struct usb_serial *serial,
 			     3, /* get pins */
 			     USB_TYPE_VENDOR|USB_RECIP_INTERFACE|USB_DIR_IN,
 			     0, 0, data, 1, 2000);
-	if (rc >= 0)
+	if (rc == 1)
 		*value = *data;
+	else if (rc >= 0)
+		rc = -EIO;
 
 	kfree(data);
 	return rc;

From 794744abfffef8b1f3c0c8a4896177d6d13d653d Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 4 Jul 2018 17:02:17 +0200
Subject: [PATCH 288/382] USB: serial: mos7840: fix status-register error
 handling

Add missing transfer-length sanity check to the status-register
completion handler to avoid leaking bits of uninitialised slab data to
user space.

Fixes: 3f5429746d91 ("USB: Moschip 7840 USB-Serial Driver")
Cc: stable <stable@vger.kernel.org>     # 2.6.19
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/mos7840.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index fdceb46d9fc6..b580b4c7fa48 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -468,6 +468,9 @@ static void mos7840_control_callback(struct urb *urb)
 	}
 
 	dev_dbg(dev, "%s urb buffer size is %d\n", __func__, urb->actual_length);
+	if (urb->actual_length < 1)
+		goto out;
+
 	dev_dbg(dev, "%s mos7840_port->MsrLsr is %d port %d\n", __func__,
 		mos7840_port->MsrLsr, mos7840_port->port_num);
 	data = urb->transfer_buffer;

From 1268ed0c474a5c8f165ef386f3310521b5e00e27 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Tue, 3 Jul 2018 16:01:55 -0700
Subject: [PATCH 289/382] x86/hyper-v: Fix the circular dependency in IPI
 enlightenment

The IPI hypercalls depend on being able to map the Linux notion of CPU ID
to the hypervisor's notion of the CPU ID. The array hv_vp_index[] provides
this mapping. Code for populating this array depends on the IPI functionality.
Break this circular dependency.

[ tglx: Use a proper define instead of '-1' with a u32 variable as pointed
  	out by Vitaly ]

Fixes: 68bb7bfb7985 ("X86/Hyper-V: Enable IPI enlightenments")
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Cc: gregkh@linuxfoundation.org
Cc: devel@linuxdriverproject.org
Cc: olaf@aepfle.de
Cc: apw@canonical.com
Cc: jasowang@redhat.com
Cc: hpa@zytor.com
Cc: sthemmin@microsoft.com
Cc: Michael.H.Kelley@microsoft.com
Cc: vkuznets@redhat.com
Link: https://lkml.kernel.org/r/20180703230155.15160-1-kys@linuxonhyperv.com
---
 arch/x86/hyperv/hv_apic.c       | 5 +++++
 arch/x86/hyperv/hv_init.c       | 5 ++++-
 arch/x86/include/asm/mshyperv.h | 5 ++++-
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index f68855499391..402338365651 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -114,6 +114,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
 		ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
 		nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
 	}
+	if (nr_bank < 0)
+		goto ipi_mask_ex_done;
 	if (!nr_bank)
 		ipi_arg->vp_set.format = HV_GENERIC_SET_ALL;
 
@@ -158,6 +160,9 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
 
 	for_each_cpu(cur_cpu, mask) {
 		vcpu = hv_cpu_number_to_vp_number(cur_cpu);
+		if (vcpu == VP_INVAL)
+			goto ipi_mask_done;
+
 		/*
 		 * This particular version of the IPI hypercall can
 		 * only target upto 64 CPUs.
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 4c431e1c1eff..1ff420217298 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -265,7 +265,7 @@ void __init hyperv_init(void)
 {
 	u64 guest_id, required_msrs;
 	union hv_x64_msr_hypercall_contents hypercall_msr;
-	int cpuhp;
+	int cpuhp, i;
 
 	if (x86_hyper_type != X86_HYPER_MS_HYPERV)
 		return;
@@ -293,6 +293,9 @@ void __init hyperv_init(void)
 	if (!hv_vp_index)
 		return;
 
+	for (i = 0; i < num_possible_cpus(); i++)
+		hv_vp_index[i] = VP_INVAL;
+
 	hv_vp_assist_page = kcalloc(num_possible_cpus(),
 				    sizeof(*hv_vp_assist_page), GFP_KERNEL);
 	if (!hv_vp_assist_page) {
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 3cd14311edfa..5a7375ed5f7c 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -9,6 +9,8 @@
 #include <asm/hyperv-tlfs.h>
 #include <asm/nospec-branch.h>
 
+#define VP_INVAL	U32_MAX
+
 struct ms_hyperv_info {
 	u32 features;
 	u32 misc_features;
@@ -20,7 +22,6 @@ struct ms_hyperv_info {
 
 extern struct ms_hyperv_info ms_hyperv;
 
-
 /*
  * Generate the guest ID.
  */
@@ -281,6 +282,8 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset,
 	 */
 	for_each_cpu(cpu, cpus) {
 		vcpu = hv_cpu_number_to_vp_number(cpu);
+		if (vcpu == VP_INVAL)
+			return -1;
 		vcpu_bank = vcpu / 64;
 		vcpu_offset = vcpu % 64;
 		__set_bit(vcpu_offset, (unsigned long *)

From c6b17f1020d956f4113d478cae6171b9093817ba Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Fri, 6 Jul 2018 15:14:11 +0800
Subject: [PATCH 290/382] ALSA: hda/realtek - two more lenovo models need fixup
 of MIC_LOCATION

We have two new lenovo desktop models which need to apply the fixup of
ALC294_FIXUP_LENOVO_MIC_LOCATION, and they have the same pin cfg as
the machine with subsystem id:0x17aa3136, now use the pincfg table
to apply the fixup for them.

Cc: <stable@vger.kernel.org>
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 5ad6c7e5f92e..7496be4491b1 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6612,7 +6612,6 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
 	SND_PCI_QUIRK(0x17aa, 0x312a, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
 	SND_PCI_QUIRK(0x17aa, 0x312f, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
-	SND_PCI_QUIRK(0x17aa, 0x3136, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
 	SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
 	SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
 	SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
@@ -6796,6 +6795,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 		{0x1a, 0x02a11040},
 		{0x1b, 0x01014020},
 		{0x21, 0x0221101f}),
+	SND_HDA_PIN_QUIRK(0x10ec0235, 0x17aa, "Lenovo", ALC294_FIXUP_LENOVO_MIC_LOCATION,
+		{0x14, 0x90170110},
+		{0x19, 0x02a11020},
+		{0x1a, 0x02a11030},
+		{0x21, 0x0221101f}),
 	SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
 		{0x12, 0x90a60140},
 		{0x14, 0x90170110},

From 09b1565324cba029dd0079b179e329813a1520c6 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 4 Jul 2018 12:57:54 -0700
Subject: [PATCH 291/382] kbuild: update ARCH alias info for sh

In Kbuild documentation, add alias for 64-bit sh ARCH ("sh64")
to the list of ARCH aliases.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Documentation/kbuild/kbuild.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt
index 6c9c69ec3986..ec5890b51bd6 100644
--- a/Documentation/kbuild/kbuild.txt
+++ b/Documentation/kbuild/kbuild.txt
@@ -88,6 +88,7 @@ In most cases the name of the architecture is the same as the
 directory name found in the arch/ directory.
 But some architectures such as x86 and sparc have aliases.
 x86: i386 for 32 bit, x86_64 for 64 bit
+sh: sh for 32 bit, sh64 for 64 bit
 sparc: sparc for 32 bit, sparc64 for 64 bit
 
 CROSS_COMPILE

From 5ba800962a80d4158b73fb91a7779df7b770c750 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 5 Jul 2018 12:12:03 -0700
Subject: [PATCH 292/382] kbuild: update ARCH alias info for sparc

The supported alias for building sparc 32-bit is "sparc32",
not "sparc", so update the alias documentation for that.
Just using "sparc" produces a 64-bit config file.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Documentation/kbuild/kbuild.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt
index ec5890b51bd6..d605d506b334 100644
--- a/Documentation/kbuild/kbuild.txt
+++ b/Documentation/kbuild/kbuild.txt
@@ -89,7 +89,7 @@ directory name found in the arch/ directory.
 But some architectures such as x86 and sparc have aliases.
 x86: i386 for 32 bit, x86_64 for 64 bit
 sh: sh for 32 bit, sh64 for 64 bit
-sparc: sparc for 32 bit, sparc64 for 64 bit
+sparc: sparc32 for 32 bit, sparc64 for 64 bit
 
 CROSS_COMPILE
 --------------------------------------------------

From 3f9cdee5929b7d035e86302dcf08fbf3e80b0739 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 4 Jul 2018 12:59:16 -0700
Subject: [PATCH 293/382] kbuild: delete INSTALL_FW_PATH from kbuild
 documentation

Removed Kbuild documentation for INSTALL_FW_PATH.

The kbuild symbol INSTALL_FW_PATH was removed from Kbuild tools in
September 2017 (for 4.14) but the symbol was not deleted from
the kbuild documentation, so do that now.

Fixes: 5620a0d1aacd ("firmware: delete in-kernel firmware")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: stable@vger.kernel.org # 4.14+
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Documentation/kbuild/kbuild.txt | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt
index d605d506b334..1eb59cba242c 100644
--- a/Documentation/kbuild/kbuild.txt
+++ b/Documentation/kbuild/kbuild.txt
@@ -149,15 +149,6 @@ stripped after they are installed.  If INSTALL_MOD_STRIP is '1', then
 the default option --strip-debug will be used.  Otherwise,
 INSTALL_MOD_STRIP value will be used as the options to the strip command.
 
-INSTALL_FW_PATH
---------------------------------------------------
-INSTALL_FW_PATH specifies where to install the firmware blobs.
-The default value is:
-
-    $(INSTALL_MOD_PATH)/lib/firmware
-
-The value can be overridden in which case the default value is ignored.
-
 INSTALL_HDR_PATH
 --------------------------------------------------
 INSTALL_HDR_PATH specifies where to install user space headers when

From 452d4c8673112ee72de6042d5d95c761acc2d35a Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 4 Jul 2018 15:49:56 -0700
Subject: [PATCH 294/382] kconfig: update user kconfig tools doc.

Update Documentation/kbuild/kconfig.txt, which mostly contains
user help for using the kernel config tools.

- Add mention of 'nconfig' embedded help text.
- Make the section on new config symbols readable.
- Correct how to find menuconfig search help.
- Add section on 'nconfig' usage.
- Mention that gconfig has multiple viewing modes/options.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Documentation/kbuild/kconfig.txt | 51 +++++++++++++++++++++++++++-----
 1 file changed, 43 insertions(+), 8 deletions(-)

diff --git a/Documentation/kbuild/kconfig.txt b/Documentation/kbuild/kconfig.txt
index 7233118f3a05..68c82914c0f3 100644
--- a/Documentation/kbuild/kconfig.txt
+++ b/Documentation/kbuild/kconfig.txt
@@ -2,9 +2,9 @@ This file contains some assistance for using "make *config".
 
 Use "make help" to list all of the possible configuration targets.
 
-The xconfig ('qconf') and menuconfig ('mconf') programs also
-have embedded help text.  Be sure to check it for navigation,
-search, and other general help text.
+The xconfig ('qconf'), menuconfig ('mconf'), and nconfig ('nconf')
+programs also have embedded help text.  Be sure to check that for
+navigation, search, and other general help text.
 
 ======================================================================
 General
@@ -17,13 +17,16 @@ this happens, using a previously working .config file and running
 for you, so you may find that you need to see what NEW kernel
 symbols have been introduced.
 
-To see a list of new config symbols when using "make oldconfig", use
+To see a list of new config symbols, use
 
 	cp user/some/old.config .config
 	make listnewconfig
 
 and the config program will list any new symbols, one per line.
 
+Alternatively, you can use the brute force method:
+
+	make oldconfig
 	scripts/diffconfig .config.old .config | less
 
 ______________________________________________________________________
@@ -160,7 +163,7 @@ Searching in menuconfig:
 		This lists all config symbols that contain "hotplug",
 		e.g., HOTPLUG_CPU, MEMORY_HOTPLUG.
 
-	For search help, enter / followed TAB-TAB-TAB (to highlight
+	For search help, enter / followed by TAB-TAB (to highlight
 	<Help>) and Enter.  This will tell you that you can also use
 	regular expressions (regexes) in the search string, so if you
 	are not interested in MEMORY_HOTPLUG, you could try
@@ -202,6 +205,39 @@ Example:
 	make MENUCONFIG_MODE=single_menu menuconfig
 
 
+======================================================================
+nconfig
+--------------------------------------------------
+
+nconfig is an alternate text-based configurator.  It lists function
+keys across the bottom of the terminal (window) that execute commands.
+You can also just use the corresponding numeric key to execute the
+commands unless you are in a data entry window.  E.g., instead of F6
+for Save, you can just press 6.
+
+Use F1 for Global help or F3 for the Short help menu.
+
+Searching in nconfig:
+
+	You can search either in the menu entry "prompt" strings
+	or in the configuration symbols.
+
+	Use / to begin a search through the menu entries.  This does
+	not support regular expressions.  Use <Down> or <Up> for
+	Next hit and Previous hit, respectively.  Use <Esc> to
+	terminate the search mode.
+
+	F8 (SymSearch) searches the configuration symbols for the
+	given string or regular expression (regex).
+
+NCONFIG_MODE
+--------------------------------------------------
+This mode shows all sub-menus in one large tree.
+
+Example:
+	make NCONFIG_MODE=single_menu nconfig
+
+
 ======================================================================
 xconfig
 --------------------------------------------------
@@ -230,8 +266,7 @@ gconfig
 
 Searching in gconfig:
 
-	None (gconfig isn't maintained as well as xconfig or menuconfig);
-	however, gconfig does have a few more viewing choices than
-	xconfig does.
+	There is no search command in gconfig.  However, gconfig does
+	have several different viewing choices, modes, and options.
 
 ###

From 00e0793f834cd233240b19532581e3205caaccd9 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 4 Jul 2018 19:47:39 -0700
Subject: [PATCH 295/382] kbuild: document the KBUILD_KCONFIG env. variable

Add usage info for the Kbuild environment variable KBUILD_KCONFIG.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Cao jin <caoj.fnst@cn.fujitsu.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Documentation/kbuild/kbuild.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt
index 1eb59cba242c..114c7ce7b58d 100644
--- a/Documentation/kbuild/kbuild.txt
+++ b/Documentation/kbuild/kbuild.txt
@@ -50,6 +50,11 @@ LDFLAGS_MODULE
 --------------------------------------------------
 Additional options used for $(LD) when linking modules.
 
+KBUILD_KCONFIG
+--------------------------------------------------
+Set the top-level Kconfig file to the value of this environment
+variable.  The default name is "Kconfig".
+
 KBUILD_VERBOSE
 --------------------------------------------------
 Set the kbuild verbosity. Can be assigned same values as "V=...".

From 48f6e3cf5bc6dd0ee00405342ff310c3b1fedb35 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 5 Jul 2018 11:48:21 +0900
Subject: [PATCH 296/382] kbuild: do not drop -I without parameter

The comment line for addtree says "skip if -I has no parameter".

What it actually does is "drop if -I has no parameter".  For example,
if you have the compiler flag '-I foo' (a space between), it will be
converted to 'foo'.  This completely changes the meaning.

What we want is, "do nothing" for -I without parameter so that
'-I foo' is kept as-is.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/Kbuild.include | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index c8156d61678c..86321f06461e 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -214,7 +214,7 @@ hdr-inst := -f $(srctree)/scripts/Makefile.headersinst obj
 # Prefix -I with $(srctree) if it is not an absolute path.
 # skip if -I has no parameter
 addtree = $(if $(patsubst -I%,%,$(1)), \
-$(if $(filter-out -I/% -I./% -I../%,$(1)),$(patsubst -I%,-I$(srctree)/%,$(1)),$(1)))
+$(if $(filter-out -I/% -I./% -I../%,$(1)),$(patsubst -I%,-I$(srctree)/%,$(1)),$(1)),$(1))
 
 # Find all -I options and call addtree
 flags = $(foreach o,$($(1)),$(if $(filter -I%,$(o)),$(call addtree,$(o)),$(o)))

From bd412d81b7ea4cfa265e3d2309a166181ec7bdab Mon Sep 17 00:00:00 2001
From: Ulf Magnusson <ulfalizer@gmail.com>
Date: Thu, 5 Jul 2018 12:33:07 +0900
Subject: [PATCH 297/382] kbuild: .PHONY is not a variable, but PHONY is

.PHONY is a target, not a variable.

Signed-off-by: Ulf Magnusson <ulfalizer@gmail.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index c5ce55cbc543..5b26847909ec 100644
--- a/Makefile
+++ b/Makefile
@@ -1717,6 +1717,6 @@ endif	# skip-makefile
 PHONY += FORCE
 FORCE:
 
-# Declare the contents of the .PHONY variable as phony.  We keep that
+# Declare the contents of the PHONY variable as phony.  We keep that
 # information in a variable so we can use it in if_changed and friends.
 .PHONY: $(PHONY)

From 6916162c7308332d5a029521821f925466de311d Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 5 Jul 2018 12:33:08 +0900
Subject: [PATCH 298/382] kbuild: remove duplicated comments about PHONY

The comment is the same as in the top-level Makefile.

Also, the comments contain typos:
  - the .PHONY variable  ->  the PHONY variable
  - se we can ...        ->  so we can ...

Instead of fixing the typos, just remove the duplicated comments.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/Makefile.build      | 3 ---
 scripts/Makefile.clean      | 3 ---
 scripts/Makefile.modbuiltin | 4 ----
 scripts/Makefile.modinst    | 4 ----
 scripts/Makefile.modpost    | 4 ----
 scripts/Makefile.modsign    | 3 ---
 6 files changed, 21 deletions(-)

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index e7889f486ca1..514ed63ff571 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -590,7 +590,4 @@ endif
 # We never want them to be removed automatically.
 .SECONDARY: $(targets)
 
-# Declare the contents of the .PHONY variable as phony.  We keep that
-# information in a variable se we can use it in if_changed and friends.
-
 .PHONY: $(PHONY)
diff --git a/scripts/Makefile.clean b/scripts/Makefile.clean
index 808d09f27ad4..17ef94c635cd 100644
--- a/scripts/Makefile.clean
+++ b/scripts/Makefile.clean
@@ -88,7 +88,4 @@ PHONY += $(subdir-ymn)
 $(subdir-ymn):
 	$(Q)$(MAKE) $(clean)=$@
 
-# Declare the contents of the .PHONY variable as phony.  We keep that
-# information in a variable se we can use it in if_changed and friends.
-
 .PHONY: $(PHONY)
diff --git a/scripts/Makefile.modbuiltin b/scripts/Makefile.modbuiltin
index a763b4775d06..40867a41615b 100644
--- a/scripts/Makefile.modbuiltin
+++ b/scripts/Makefile.modbuiltin
@@ -54,8 +54,4 @@ PHONY += $(subdir-ym)
 $(subdir-ym):
 	$(Q)$(MAKE) $(modbuiltin)=$@
 
-
-# Declare the contents of the .PHONY variable as phony.  We keep that
-# information in a variable se we can use it in if_changed and friends.
-
 .PHONY: $(PHONY)
diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst
index 51ca0244fc8a..ff5ca9817a85 100644
--- a/scripts/Makefile.modinst
+++ b/scripts/Makefile.modinst
@@ -35,8 +35,4 @@ modinst_dir = $(if $(KBUILD_EXTMOD),$(ext-mod-dir),kernel/$(@D))
 $(modules):
 	$(call cmd,modules_install,$(MODLIB)/$(modinst_dir))
 
-
-# Declare the contents of the .PHONY variable as phony.  We keep that
-# information in a variable so we can use it in if_changed and friends.
-
 .PHONY: $(PHONY)
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index df4174405feb..dd92dbbbaa68 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -149,8 +149,4 @@ ifneq ($(cmd_files),)
   include $(cmd_files)
 endif
 
-
-# Declare the contents of the .PHONY variable as phony.  We keep that
-# information in a variable se we can use it in if_changed and friends.
-
 .PHONY: $(PHONY)
diff --git a/scripts/Makefile.modsign b/scripts/Makefile.modsign
index 171483bc0538..da56aa78d245 100644
--- a/scripts/Makefile.modsign
+++ b/scripts/Makefile.modsign
@@ -27,7 +27,4 @@ modinst_dir = $(if $(KBUILD_EXTMOD),$(ext-mod-dir),kernel/$(@D))
 $(modules):
 	$(call cmd,sign_ko,$(MODLIB)/$(modinst_dir))
 
-# Declare the contents of the .PHONY variable as phony.  We keep that
-# information in a variable se we can use it in if_changed and friends.
-
 .PHONY: $(PHONY)

From c0addc9a5ba88350a3d9365a73290e34dc238b70 Mon Sep 17 00:00:00 2001
From: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Date: Thu, 5 Jul 2018 16:31:42 +0300
Subject: [PATCH 299/382] docs: kernel-parameters.txt: document xhci-hcd.quirks
 parameter

This parameter introduced several years ago in the XHCI host controller
driver was somehow left undocumented. Add a few lines in the kernel
parameters text.

Signed-off-by: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index efc7aa7a0670..533ff5c68970 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4846,3 +4846,8 @@
 	xirc2ps_cs=	[NET,PCMCIA]
 			Format:
 			<irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
+
+	xhci-hcd.quirks		[USB,KNL]
+			A hex value specifying bitmask with supplemental xhci
+			host controller quirks. Meaning of each bit can be
+			consulted in header drivers/usb/host/xhci.h.

From aaa8fee7dfc974d7f057bc9f1d8e305a114469ad Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 6 Jul 2018 15:58:10 +0200
Subject: [PATCH 300/382] usb/gadget: aspeed-vhub: add USB_LIBCOMPOSITE
 dependency

Without that option, we run into a link failure:

drivers/usb/gadget/udc/aspeed-vhub/hub.o: In function `ast_vhub_std_hub_request':
hub.c:(.text+0x5b0): undefined reference to `usb_gadget_get_string'

Fixes: 7ecca2a4080c ("usb/gadget: Add driver for Aspeed SoC virtual hub")
Acked-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/aspeed-vhub/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/gadget/udc/aspeed-vhub/Kconfig b/drivers/usb/gadget/udc/aspeed-vhub/Kconfig
index f0cdf89b8503..83ba8a2eb6af 100644
--- a/drivers/usb/gadget/udc/aspeed-vhub/Kconfig
+++ b/drivers/usb/gadget/udc/aspeed-vhub/Kconfig
@@ -2,6 +2,7 @@
 config USB_ASPEED_VHUB
 	tristate "Aspeed vHub UDC driver"
 	depends on ARCH_ASPEED || COMPILE_TEST
+	depends on USB_LIBCOMPOSITE
 	help
 	  USB peripheral controller for the Aspeed AST2500 family
 	  SoCs supporting the "vHub" functionality and USB2.0

From 313db3d6488bb03b61b99de9dbca061f1fd838e1 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 4 Jul 2018 12:48:53 +0300
Subject: [PATCH 301/382] xhci: xhci-mem: off by one in
 xhci_stream_id_to_ring()

The > should be >= here so that we don't read one element beyond the end
of the ep->stream_info->stream_rings[] array.

Fixes: e9df17eb1408 ("USB: xhci: Correct assumptions about number of rings per endpoint.")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-mem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 8a62eee9eee1..ef350c33dc4a 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -595,7 +595,7 @@ struct xhci_ring *xhci_stream_id_to_ring(
 	if (!ep->stream_info)
 		return NULL;
 
-	if (stream_id > ep->stream_info->num_streams)
+	if (stream_id >= ep->stream_info->num_streams)
 		return NULL;
 	return ep->stream_info->stream_rings[stream_id];
 }

From bba57eddadda936c94b5dccf73787cb9e159d0a5 Mon Sep 17 00:00:00 2001
From: Nico Sneck <snecknico@gmail.com>
Date: Mon, 2 Jul 2018 19:26:07 +0300
Subject: [PATCH 302/382] usb: quirks: add delay quirks for Corsair Strafe

Corsair Strafe appears to suffer from the same issues
as the Corsair Strafe RGB.
Apply the same quirks (control message delay and init delay)
that the RGB version has to 1b1c:1b15.

With these quirks in place the keyboard works correctly upon
booting the system, and no longer requires reattaching the device.

Signed-off-by: Nico Sneck <snecknico@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/quirks.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index c55def2f1320..097057d2eacf 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -378,6 +378,10 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* Corsair K70 RGB */
 	{ USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT },
 
+	/* Corsair Strafe */
+	{ USB_DEVICE(0x1b1c, 0x1b15), .driver_info = USB_QUIRK_DELAY_INIT |
+	  USB_QUIRK_DELAY_CTRL_MSG },
+
 	/* Corsair Strafe RGB */
 	{ USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT |
 	  USB_QUIRK_DELAY_CTRL_MSG },

From 568cc2f07c8ea5f71a0486464bd9703e4671045f Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Tue, 3 Jul 2018 06:24:20 +0300
Subject: [PATCH 303/382] ARM: dts: armada-38x: use the new thermal binding

Commit 2f28e4c24b10e (thermal: armada: Clarify control registers
accesses) introduced the new thermal binding. The new binding extends
the second registers field size to 8. Switch to the new binding to fix
thermal reading values. Without this change the fix for errata #132698
introduced in commit 8c0b888f661 (thermal: armada: Change sensors trim
default value) has no effect.

Cc: stable@vger.kernel.org # v4.16+
Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
---
 arch/arm/boot/dts/armada-38x.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi
index 18edc9bc7927..929459c42760 100644
--- a/arch/arm/boot/dts/armada-38x.dtsi
+++ b/arch/arm/boot/dts/armada-38x.dtsi
@@ -547,7 +547,7 @@ coredivclk: clock@e4250 {
 
 			thermal: thermal@e8078 {
 				compatible = "marvell,armada380-thermal";
-				reg = <0xe4078 0x4>, <0xe4074 0x4>;
+				reg = <0xe4078 0x4>, <0xe4070 0x8>;
 				status = "okay";
 			};
 

From f1e255d60ae66a9f672ff9a207ee6cd8e33d2679 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 6 Jul 2018 17:12:56 +0200
Subject: [PATCH 304/382] USB: yurex: fix out-of-bounds uaccess in read handler

In general, accessing userspace memory beyond the length of the supplied
buffer in VFS read/write handlers can lead to both kernel memory corruption
(via kernel_read()/kernel_write(), which can e.g. be triggered via
sys_splice()) and privilege escalation inside userspace.

Fix it by using simple_read_from_buffer() instead of custom logic.

Fixes: 6bc235a2e24a ("USB: add driver for Meywa-Denki & Kayac YUREX")
Signed-off-by: Jann Horn <jannh@google.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/yurex.c | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c
index 8abb6cbbd98a..3be40eaa1ac9 100644
--- a/drivers/usb/misc/yurex.c
+++ b/drivers/usb/misc/yurex.c
@@ -396,8 +396,7 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count,
 			  loff_t *ppos)
 {
 	struct usb_yurex *dev;
-	int retval = 0;
-	int bytes_read = 0;
+	int len = 0;
 	char in_buffer[20];
 	unsigned long flags;
 
@@ -405,26 +404,16 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count,
 
 	mutex_lock(&dev->io_mutex);
 	if (!dev->interface) {		/* already disconnected */
-		retval = -ENODEV;
-		goto exit;
+		mutex_unlock(&dev->io_mutex);
+		return -ENODEV;
 	}
 
 	spin_lock_irqsave(&dev->lock, flags);
-	bytes_read = snprintf(in_buffer, 20, "%lld\n", dev->bbu);
+	len = snprintf(in_buffer, 20, "%lld\n", dev->bbu);
 	spin_unlock_irqrestore(&dev->lock, flags);
-
-	if (*ppos < bytes_read) {
-		if (copy_to_user(buffer, in_buffer + *ppos, bytes_read - *ppos))
-			retval = -EFAULT;
-		else {
-			retval = bytes_read - *ppos;
-			*ppos += bytes_read;
-		}
-	}
-
-exit:
 	mutex_unlock(&dev->io_mutex);
-	return retval;
+
+	return simple_read_from_buffer(buffer, count, ppos, in_buffer, len);
 }
 
 static ssize_t yurex_write(struct file *file, const char __user *user_buffer,

From d59d2f9995d28974877750f429e821324bd603c7 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Fri, 6 Jul 2018 13:44:35 +0800
Subject: [PATCH 305/382] staging: r8822be: Fix RTL8822be can't find any
 wireless AP

RTL8822be can't bring up properly on ASUS X530UN, and dmesg says:
[ 8.591333] r8822be: module is from the staging directory, the quality
is unknown, you have been warned.
[ 8.593122] r8822be 0000:02:00.0: enabling device (0000 -> 0003)
[ 8.669163] r8822be: Using firmware rtlwifi/rtl8822befw.bin
[ 9.289939] r8822be: rtlwifi: wireless switch is on
[ 10.056426] r8822be 0000:02:00.0 wlp2s0: renamed from wlan0
...
[ 11.952534] r8822be: halmac_init_hal failed
[ 11.955933] r8822be: halmac_init_hal failed
[ 11.956227] r8822be: halmac_init_hal failed
[ 22.007942] r8822be: halmac_init_hal failed

Jian-Hong reported it works if turn off ASPM with module parameter aspm=0.
In order to fix this problem kindly, this commit don't turn off aspm but
enlarge ASPM L1 latency to 7.

Reported-by: Jian-Hong Pan <jian-hong@endlessm.com>
Tested-by: Jian-Hong Pan <jian-hong@endlessm.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtlwifi/rtl8822be/hw.c | 2 +-
 drivers/staging/rtlwifi/wifi.h         | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/rtlwifi/rtl8822be/hw.c b/drivers/staging/rtlwifi/rtl8822be/hw.c
index 7947edb239a1..88ba5b2fea6a 100644
--- a/drivers/staging/rtlwifi/rtl8822be/hw.c
+++ b/drivers/staging/rtlwifi/rtl8822be/hw.c
@@ -803,7 +803,7 @@ static void _rtl8822be_enable_aspm_back_door(struct ieee80211_hw *hw)
 		return;
 
 	pci_read_config_byte(rtlpci->pdev, 0x70f, &tmp);
-	pci_write_config_byte(rtlpci->pdev, 0x70f, tmp | BIT(7));
+	pci_write_config_byte(rtlpci->pdev, 0x70f, tmp | ASPM_L1_LATENCY << 3);
 
 	pci_read_config_byte(rtlpci->pdev, 0x719, &tmp);
 	pci_write_config_byte(rtlpci->pdev, 0x719, tmp | BIT(3) | BIT(4));
diff --git a/drivers/staging/rtlwifi/wifi.h b/drivers/staging/rtlwifi/wifi.h
index 012fb618840b..a45f0eb69d3f 100644
--- a/drivers/staging/rtlwifi/wifi.h
+++ b/drivers/staging/rtlwifi/wifi.h
@@ -88,6 +88,7 @@
 #define RTL_USB_MAX_RX_COUNT			100
 #define QBSS_LOAD_SIZE				5
 #define MAX_WMMELE_LENGTH			64
+#define ASPM_L1_LATENCY				7
 
 #define TOTAL_CAM_ENTRY				32
 

From a0341fc1981a950c1e902ab901e98f60e0e243f3 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Sat, 7 Jul 2018 04:16:33 +0200
Subject: [PATCH 306/382] ibmasm: don't write out of bounds in read handler

This read handler had a lot of custom logic and wrote outside the bounds of
the provided buffer. This could lead to kernel and userspace memory
corruption. Just use simple_read_from_buffer() with a stack buffer.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ibmasm/ibmasmfs.c | 27 +++------------------------
 1 file changed, 3 insertions(+), 24 deletions(-)

diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index e05c3245930a..fa840666bdd1 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -507,35 +507,14 @@ static int remote_settings_file_close(struct inode *inode, struct file *file)
 static ssize_t remote_settings_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
 {
 	void __iomem *address = (void __iomem *)file->private_data;
-	unsigned char *page;
-	int retval;
 	int len = 0;
 	unsigned int value;
-
-	if (*offset < 0)
-		return -EINVAL;
-	if (count == 0 || count > 1024)
-		return 0;
-	if (*offset != 0)
-		return 0;
-
-	page = (unsigned char *)__get_free_page(GFP_KERNEL);
-	if (!page)
-		return -ENOMEM;
+	char lbuf[20];
 
 	value = readl(address);
-	len = sprintf(page, "%d\n", value);
+	len = snprintf(lbuf, sizeof(lbuf), "%d\n", value);
 
-	if (copy_to_user(buf, page, len)) {
-		retval = -EFAULT;
-		goto exit;
-	}
-	*offset += len;
-	retval = len;
-
-exit:
-	free_page((unsigned long)page);
-	return retval;
+	return simple_read_from_buffer(buf, count, offset, lbuf, len);
 }
 
 static ssize_t remote_settings_file_write(struct file *file, const char __user *ubuff, size_t count, loff_t *offset)

From 0c1049dcb4ceec640d8bd797335bcbebdcab44d2 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@zonque.org>
Date: Fri, 6 Jul 2018 22:15:00 +0200
Subject: [PATCH 307/382] ARM: pxa: irq: fix handling of ICMR registers in
 suspend/resume

PXA3xx platforms have 56 interrupts that are stored in two ICMR
registers. The code in pxa_irq_suspend() and pxa_irq_resume() however
does a simple division by 32 which only leads to one register being
saved at suspend and restored at resume time. The NAND interrupt
setting, for instance, is lost.

Fix this by using DIV_ROUND_UP() instead.

Signed-off-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
---
 arch/arm/mach-pxa/irq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c
index 9c10248fadcc..4e8c2116808e 100644
--- a/arch/arm/mach-pxa/irq.c
+++ b/arch/arm/mach-pxa/irq.c
@@ -185,7 +185,7 @@ static int pxa_irq_suspend(void)
 {
 	int i;
 
-	for (i = 0; i < pxa_internal_irq_nr / 32; i++) {
+	for (i = 0; i < DIV_ROUND_UP(pxa_internal_irq_nr, 32); i++) {
 		void __iomem *base = irq_base(i);
 
 		saved_icmr[i] = __raw_readl(base + ICMR);
@@ -204,7 +204,7 @@ static void pxa_irq_resume(void)
 {
 	int i;
 
-	for (i = 0; i < pxa_internal_irq_nr / 32; i++) {
+	for (i = 0; i < DIV_ROUND_UP(pxa_internal_irq_nr, 32); i++) {
 		void __iomem *base = irq_base(i);
 
 		__raw_writel(saved_icmr[i], base + ICMR);

From 122c5770cff2c1df1a2384b68285be2812cd72c1 Mon Sep 17 00:00:00 2001
From: Christophe Jaillet <christophe.jaillet@wanadoo.fr>
Date: Wed, 27 Jun 2018 20:56:18 -0500
Subject: [PATCH 308/382] fpga: altera-cvp: Fix an error handling path in
 'altera_cvp_probe()'

If 'fpga_mgr_create()' fails, we should release some resources, as done
in the other error handling path of the function.

Fixes: 7085e2a94f7d ("fpga: manager: change api, don't use drvdata")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Moritz Fischer <mdf@kernel.org>
Acked-by: Alan Tull <atull@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fpga/altera-cvp.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/fpga/altera-cvp.c b/drivers/fpga/altera-cvp.c
index dd4edd8f22ce..7fa793672a7a 100644
--- a/drivers/fpga/altera-cvp.c
+++ b/drivers/fpga/altera-cvp.c
@@ -455,8 +455,10 @@ static int altera_cvp_probe(struct pci_dev *pdev,
 
 	mgr = fpga_mgr_create(&pdev->dev, conf->mgr_name,
 			      &altera_cvp_ops, conf);
-	if (!mgr)
-		return -ENOMEM;
+	if (!mgr) {
+		ret = -ENOMEM;
+		goto err_unmap;
+	}
 
 	pci_set_drvdata(pdev, mgr);
 

From 9421e45f5ff3d558cf8b75a8cc0824530caf3453 Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli@redhat.com>
Date: Fri, 6 Jul 2018 22:05:37 -0400
Subject: [PATCH 309/382] uio: use request_threaded_irq instead

Prepraing for changing to use mutex lock.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/uio.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index e8f4ac9400ea..b4b2ae1e0473 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -902,8 +902,9 @@ int __uio_register_device(struct module *owner,
 		 * FDs at the time of unregister and therefore may not be
 		 * freed until they are released.
 		 */
-		ret = request_irq(info->irq, uio_interrupt,
-				  info->irq_flags, info->name, idev);
+		ret = request_threaded_irq(info->irq, NULL, uio_interrupt,
+					   info->irq_flags, info->name, idev);
+
 		if (ret)
 			goto err_request_irq;
 	}

From 543af5861f41af0a5d2432f6fb5976af50f9cee5 Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli@redhat.com>
Date: Fri, 6 Jul 2018 22:05:38 -0400
Subject: [PATCH 310/382] uio: change to use the mutex lock instead of the spin
 lock

We are hitting a regression with the following commit:

commit a93e7b331568227500186a465fee3c2cb5dffd1f
Author: Hamish Martin <hamish.martin@alliedtelesis.co.nz>
Date:   Mon May 14 13:32:23 2018 +1200

    uio: Prevent device destruction while fds are open

The problem is the addition of spin_lock_irqsave in uio_write. This
leads to hitting  uio_write -> copy_from_user -> _copy_from_user ->
might_fault and the logs filling up with sleeping warnings.

I also noticed some uio drivers allocate memory, sleep, grab mutexes
from callouts like open() and release and uio is now doing
spin_lock_irqsave while calling them.

Reported-by: Mike Christie <mchristi@redhat.com>
CC: Hamish Martin <hamish.martin@alliedtelesis.co.nz>
Reviewed-by: Hamish Martin <hamish.martin@alliedtelesis.co.nz>
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/uio.c          | 32 +++++++++++++-------------------
 include/linux/uio_driver.h |  2 +-
 2 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index b4b2ae1e0473..655ade4fb3b1 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -433,7 +433,6 @@ static int uio_open(struct inode *inode, struct file *filep)
 	struct uio_device *idev;
 	struct uio_listener *listener;
 	int ret = 0;
-	unsigned long flags;
 
 	mutex_lock(&minor_lock);
 	idev = idr_find(&uio_idr, iminor(inode));
@@ -460,10 +459,10 @@ static int uio_open(struct inode *inode, struct file *filep)
 	listener->event_count = atomic_read(&idev->event);
 	filep->private_data = listener;
 
-	spin_lock_irqsave(&idev->info_lock, flags);
+	mutex_lock(&idev->info_lock);
 	if (idev->info && idev->info->open)
 		ret = idev->info->open(idev->info, inode);
-	spin_unlock_irqrestore(&idev->info_lock, flags);
+	mutex_unlock(&idev->info_lock);
 	if (ret)
 		goto err_infoopen;
 
@@ -495,12 +494,11 @@ static int uio_release(struct inode *inode, struct file *filep)
 	int ret = 0;
 	struct uio_listener *listener = filep->private_data;
 	struct uio_device *idev = listener->dev;
-	unsigned long flags;
 
-	spin_lock_irqsave(&idev->info_lock, flags);
+	mutex_lock(&idev->info_lock);
 	if (idev->info && idev->info->release)
 		ret = idev->info->release(idev->info, inode);
-	spin_unlock_irqrestore(&idev->info_lock, flags);
+	mutex_unlock(&idev->info_lock);
 
 	module_put(idev->owner);
 	kfree(listener);
@@ -513,12 +511,11 @@ static __poll_t uio_poll(struct file *filep, poll_table *wait)
 	struct uio_listener *listener = filep->private_data;
 	struct uio_device *idev = listener->dev;
 	__poll_t ret = 0;
-	unsigned long flags;
 
-	spin_lock_irqsave(&idev->info_lock, flags);
+	mutex_lock(&idev->info_lock);
 	if (!idev->info || !idev->info->irq)
 		ret = -EIO;
-	spin_unlock_irqrestore(&idev->info_lock, flags);
+	mutex_unlock(&idev->info_lock);
 
 	if (ret)
 		return ret;
@@ -537,12 +534,11 @@ static ssize_t uio_read(struct file *filep, char __user *buf,
 	DECLARE_WAITQUEUE(wait, current);
 	ssize_t retval = 0;
 	s32 event_count;
-	unsigned long flags;
 
-	spin_lock_irqsave(&idev->info_lock, flags);
+	mutex_lock(&idev->info_lock);
 	if (!idev->info || !idev->info->irq)
 		retval = -EIO;
-	spin_unlock_irqrestore(&idev->info_lock, flags);
+	mutex_unlock(&idev->info_lock);
 
 	if (retval)
 		return retval;
@@ -592,9 +588,8 @@ static ssize_t uio_write(struct file *filep, const char __user *buf,
 	struct uio_device *idev = listener->dev;
 	ssize_t retval;
 	s32 irq_on;
-	unsigned long flags;
 
-	spin_lock_irqsave(&idev->info_lock, flags);
+	mutex_lock(&idev->info_lock);
 	if (!idev->info || !idev->info->irq) {
 		retval = -EIO;
 		goto out;
@@ -618,7 +613,7 @@ static ssize_t uio_write(struct file *filep, const char __user *buf,
 	retval = idev->info->irqcontrol(idev->info, irq_on);
 
 out:
-	spin_unlock_irqrestore(&idev->info_lock, flags);
+	mutex_unlock(&idev->info_lock);
 	return retval ? retval : sizeof(s32);
 }
 
@@ -865,7 +860,7 @@ int __uio_register_device(struct module *owner,
 
 	idev->owner = owner;
 	idev->info = info;
-	spin_lock_init(&idev->info_lock);
+	mutex_init(&idev->info_lock);
 	init_waitqueue_head(&idev->wait);
 	atomic_set(&idev->event, 0);
 
@@ -929,7 +924,6 @@ EXPORT_SYMBOL_GPL(__uio_register_device);
 void uio_unregister_device(struct uio_info *info)
 {
 	struct uio_device *idev;
-	unsigned long flags;
 
 	if (!info || !info->uio_dev)
 		return;
@@ -943,9 +937,9 @@ void uio_unregister_device(struct uio_info *info)
 	if (info->irq && info->irq != UIO_IRQ_CUSTOM)
 		free_irq(info->irq, idev);
 
-	spin_lock_irqsave(&idev->info_lock, flags);
+	mutex_lock(&idev->info_lock);
 	idev->info = NULL;
-	spin_unlock_irqrestore(&idev->info_lock, flags);
+	mutex_unlock(&idev->info_lock);
 
 	device_unregister(&idev->dev);
 
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 6c5f2074e14f..6f8b68cd460f 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -75,7 +75,7 @@ struct uio_device {
         struct fasync_struct    *async_queue;
         wait_queue_head_t       wait;
         struct uio_info         *info;
-	spinlock_t		info_lock;
+	struct mutex		info_lock;
         struct kobject          *map_dir;
         struct kobject          *portio_dir;
 };

From 57c5f4df0a5a0ee83df799991251e2ee93a5e4e9 Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli@redhat.com>
Date: Fri, 6 Jul 2018 22:05:39 -0400
Subject: [PATCH 311/382] uio: fix crash after the device is unregistered

For the target_core_user use case, after the device is unregistered
it maybe still opened in user space, then the kernel will crash, like:

[  251.163692] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
[  251.163820] IP: [<ffffffffc0736213>] show_name+0x23/0x40 [uio]
[  251.163965] PGD 8000000062694067 PUD 62696067 PMD 0
[  251.164097] Oops: 0000 [#1] SMP
...
[  251.165605]  e1000 mptscsih mptbase drm_panel_orientation_quirks dm_mirror dm_region_hash dm_log dm_mod
[  251.166014] CPU: 0 PID: 13380 Comm: tcmu-runner Kdump: loaded Not tainted 3.10.0-916.el7.test.x86_64 #1
[  251.166381] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017
[  251.166747] task: ffff971eb91db0c0 ti: ffff971e9e384000 task.ti: ffff971e9e384000
[  251.167137] RIP: 0010:[<ffffffffc0736213>]  [<ffffffffc0736213>] show_name+0x23/0x40 [uio]
[  251.167563] RSP: 0018:ffff971e9e387dc8  EFLAGS: 00010282
[  251.167978] RAX: 0000000000000000 RBX: ffff971e9e3f8000 RCX: ffff971eb8368d98
[  251.168408] RDX: ffff971e9e3f8000 RSI: ffffffffc0738084 RDI: ffff971e9e3f8000
[  251.168856] RBP: ffff971e9e387dd0 R08: ffff971eb8bc0018 R09: 0000000000000000
[  251.169296] R10: 0000000000001000 R11: ffffffffa09d444d R12: ffffffffa1076e80
[  251.169750] R13: ffff971e9e387f18 R14: 0000000000000001 R15: ffff971e9cfb1c80
[  251.170213] FS:  00007ff37d175880(0000) GS:ffff971ebb600000(0000) knlGS:0000000000000000
[  251.170693] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  251.171248] CR2: 0000000000000008 CR3: 00000000001f6000 CR4: 00000000003607f0
[  251.172071] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  251.172640] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  251.173236] Call Trace:
[  251.173789]  [<ffffffffa0c9b2d3>] dev_attr_show+0x23/0x60
[  251.174356]  [<ffffffffa0f561b2>] ? mutex_lock+0x12/0x2f
[  251.174892]  [<ffffffffa0ac6d9f>] sysfs_kf_seq_show+0xcf/0x1f0
[  251.175433]  [<ffffffffa0ac54e6>] kernfs_seq_show+0x26/0x30
[  251.175981]  [<ffffffffa0a63be0>] seq_read+0x110/0x3f0
[  251.176609]  [<ffffffffa0ac5d45>] kernfs_fop_read+0xf5/0x160
[  251.177158]  [<ffffffffa0a3d3af>] vfs_read+0x9f/0x170
[  251.177707]  [<ffffffffa0a3e27f>] SyS_read+0x7f/0xf0
[  251.178268]  [<ffffffffa0f648af>] system_call_fastpath+0x1c/0x21
[  251.178823] Code: 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 53 48 89 d3 e8 7e 96 56 e0 48 8b 80 d8 02 00 00 48 89 df 48 c7 c6 84 80 73 c0 <48> 8b 50 08 31 c0 e8 e2 67 44 e0 5b 48 98 5d c3 0f 1f 00 66 2e
[  251.180115] RIP  [<ffffffffc0736213>] show_name+0x23/0x40 [uio]
[  251.180820]  RSP <ffff971e9e387dc8>
[  251.181473] CR2: 0000000000000008

CC: Hamish Martin <hamish.martin@alliedtelesis.co.nz>
CC: Mike Christie <mchristi@redhat.com>
Reviewed-by: Hamish Martin <hamish.martin@alliedtelesis.co.nz>
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/uio.c | 104 +++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 88 insertions(+), 16 deletions(-)

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 655ade4fb3b1..5d421d7e8904 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -215,7 +215,20 @@ static ssize_t name_show(struct device *dev,
 			 struct device_attribute *attr, char *buf)
 {
 	struct uio_device *idev = dev_get_drvdata(dev);
-	return sprintf(buf, "%s\n", idev->info->name);
+	int ret;
+
+	mutex_lock(&idev->info_lock);
+	if (!idev->info) {
+		ret = -EINVAL;
+		dev_err(dev, "the device has been unregistered\n");
+		goto out;
+	}
+
+	ret = sprintf(buf, "%s\n", idev->info->name);
+
+out:
+	mutex_unlock(&idev->info_lock);
+	return ret;
 }
 static DEVICE_ATTR_RO(name);
 
@@ -223,7 +236,20 @@ static ssize_t version_show(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
 	struct uio_device *idev = dev_get_drvdata(dev);
-	return sprintf(buf, "%s\n", idev->info->version);
+	int ret;
+
+	mutex_lock(&idev->info_lock);
+	if (!idev->info) {
+		ret = -EINVAL;
+		dev_err(dev, "the device has been unregistered\n");
+		goto out;
+	}
+
+	ret = sprintf(buf, "%s\n", idev->info->version);
+
+out:
+	mutex_unlock(&idev->info_lock);
+	return ret;
 }
 static DEVICE_ATTR_RO(version);
 
@@ -415,11 +441,15 @@ EXPORT_SYMBOL_GPL(uio_event_notify);
 static irqreturn_t uio_interrupt(int irq, void *dev_id)
 {
 	struct uio_device *idev = (struct uio_device *)dev_id;
-	irqreturn_t ret = idev->info->handler(irq, idev->info);
+	irqreturn_t ret;
 
+	mutex_lock(&idev->info_lock);
+
+	ret = idev->info->handler(irq, idev->info);
 	if (ret == IRQ_HANDLED)
 		uio_event_notify(idev->info);
 
+	mutex_unlock(&idev->info_lock);
 	return ret;
 }
 
@@ -460,6 +490,12 @@ static int uio_open(struct inode *inode, struct file *filep)
 	filep->private_data = listener;
 
 	mutex_lock(&idev->info_lock);
+	if (!idev->info) {
+		mutex_unlock(&idev->info_lock);
+		ret = -EINVAL;
+		goto err_alloc_listener;
+	}
+
 	if (idev->info && idev->info->open)
 		ret = idev->info->open(idev->info, inode);
 	mutex_unlock(&idev->info_lock);
@@ -590,6 +626,11 @@ static ssize_t uio_write(struct file *filep, const char __user *buf,
 	s32 irq_on;
 
 	mutex_lock(&idev->info_lock);
+	if (!idev->info) {
+		retval = -EINVAL;
+		goto out;
+	}
+
 	if (!idev->info || !idev->info->irq) {
 		retval = -EIO;
 		goto out;
@@ -635,10 +676,20 @@ static vm_fault_t uio_vma_fault(struct vm_fault *vmf)
 	struct page *page;
 	unsigned long offset;
 	void *addr;
+	int ret = 0;
+	int mi;
 
-	int mi = uio_find_mem_index(vmf->vma);
-	if (mi < 0)
-		return VM_FAULT_SIGBUS;
+	mutex_lock(&idev->info_lock);
+	if (!idev->info) {
+		ret = VM_FAULT_SIGBUS;
+		goto out;
+	}
+
+	mi = uio_find_mem_index(vmf->vma);
+	if (mi < 0) {
+		ret = VM_FAULT_SIGBUS;
+		goto out;
+	}
 
 	/*
 	 * We need to subtract mi because userspace uses offset = N*PAGE_SIZE
@@ -653,7 +704,11 @@ static vm_fault_t uio_vma_fault(struct vm_fault *vmf)
 		page = vmalloc_to_page(addr);
 	get_page(page);
 	vmf->page = page;
-	return 0;
+
+out:
+	mutex_unlock(&idev->info_lock);
+
+	return ret;
 }
 
 static const struct vm_operations_struct uio_logical_vm_ops = {
@@ -678,6 +733,7 @@ static int uio_mmap_physical(struct vm_area_struct *vma)
 	struct uio_device *idev = vma->vm_private_data;
 	int mi = uio_find_mem_index(vma);
 	struct uio_mem *mem;
+
 	if (mi < 0)
 		return -EINVAL;
 	mem = idev->info->mem + mi;
@@ -719,30 +775,46 @@ static int uio_mmap(struct file *filep, struct vm_area_struct *vma)
 
 	vma->vm_private_data = idev;
 
+	mutex_lock(&idev->info_lock);
+	if (!idev->info) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	mi = uio_find_mem_index(vma);
-	if (mi < 0)
-		return -EINVAL;
+	if (mi < 0) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	requested_pages = vma_pages(vma);
 	actual_pages = ((idev->info->mem[mi].addr & ~PAGE_MASK)
 			+ idev->info->mem[mi].size + PAGE_SIZE -1) >> PAGE_SHIFT;
-	if (requested_pages > actual_pages)
-		return -EINVAL;
+	if (requested_pages > actual_pages) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	if (idev->info->mmap) {
 		ret = idev->info->mmap(idev->info, vma);
-		return ret;
+		goto out;
 	}
 
 	switch (idev->info->mem[mi].memtype) {
 		case UIO_MEM_PHYS:
-			return uio_mmap_physical(vma);
+			ret = uio_mmap_physical(vma);
+			break;
 		case UIO_MEM_LOGICAL:
 		case UIO_MEM_VIRTUAL:
-			return uio_mmap_logical(vma);
+			ret = uio_mmap_logical(vma);
+			break;
 		default:
-			return -EINVAL;
+			ret = -EINVAL;
 	}
+
+out:
+	mutex_unlock(&idev->info_lock);
+	return 0;
 }
 
 static const struct file_operations uio_fops = {
@@ -932,12 +1004,12 @@ void uio_unregister_device(struct uio_info *info)
 
 	uio_free_minor(idev);
 
+	mutex_lock(&idev->info_lock);
 	uio_dev_del_attributes(idev);
 
 	if (info->irq && info->irq != UIO_IRQ_CUSTOM)
 		free_irq(info->irq, idev);
 
-	mutex_lock(&idev->info_lock);
 	idev->info = NULL;
 	mutex_unlock(&idev->info_lock);
 

From 007a74907deeceefef9dc3ec4679fbd7921eaa51 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 26 Jun 2018 14:46:35 +0300
Subject: [PATCH 312/382] thunderbolt: Notify userspace when boot_acl is
 changed

The commit 9aaa3b8b4c56 ("thunderbolt: Add support for preboot ACL")
introduced boot_acl attribute but missed the fact that now userspace
needs to poll the attribute constantly to find out whether it has
changed or not. Fix this by sending notification to the userspace
whenever the boot_acl attribute is changed.

Fixes: 9aaa3b8b4c56 ("thunderbolt: Add support for preboot ACL")
Reported-and-tested-by: Christian Kellner <christian@kellner.me>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Christian Kellner <christian@kellner.me>
Acked-by: Yehezkel Bernat <yehezkelshb@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/thunderbolt/domain.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/thunderbolt/domain.c b/drivers/thunderbolt/domain.c
index 6281266b8ec0..a923ebdeb73c 100644
--- a/drivers/thunderbolt/domain.c
+++ b/drivers/thunderbolt/domain.c
@@ -213,6 +213,10 @@ static ssize_t boot_acl_store(struct device *dev, struct device_attribute *attr,
 		goto err_free_acl;
 	}
 	ret = tb->cm_ops->set_boot_acl(tb, acl, tb->nboot_acl);
+	if (!ret) {
+		/* Notify userspace about the change */
+		kobject_uevent(&tb->dev.kobj, KOBJ_CHANGE);
+	}
 	mutex_unlock(&tb->lock);
 
 err_free_acl:

From 87ed1405ef09d29a14df43295f7b6a93b63bfe6e Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Mon, 18 Jun 2018 18:30:43 +0100
Subject: [PATCH 313/382] nvmem: Don't let a NULL cell_id for nvmem_cell_get()
 crash us

In commit ca04d9d3e1b1 ("phy: qcom-qusb2: New driver for QUSB2 PHY on
Qcom chips") you can see a call like:

  devm_nvmem_cell_get(dev, NULL);

Note that the cell ID passed to the function is NULL.  This is because
the qcom-qusb2 driver is expected to work only on systems where the
PHY node is hooked up via device-tree and is nameless.

This works OK for the most part.  The first thing nvmem_cell_get()
does is to call of_nvmem_cell_get() and there it's documented that a
NULL name is fine.  The problem happens when the call to
of_nvmem_cell_get() returns -EINVAL.  In such a case we'll fall back
to nvmem_cell_get_from_list() and eventually might (if nvmem_cells
isn't an empty list) crash with something that looks like:

 strcmp
 nvmem_find_cell
 __nvmem_device_get
 nvmem_cell_get_from_list
 nvmem_cell_get
 devm_nvmem_cell_get
 qusb2_phy_probe

There are several different ways we could fix this problem:

One could argue that perhaps the qcom-qusb2 driver should be changed
to use of_nvmem_cell_get() which is allowed to have a NULL name.  In
that case, we'd need to add a patche to introduce
devm_of_nvmem_cell_get() since the qcom-qusb2 driver is using devm
managed resources.

One could also argue that perhaps we could just add a name to
qcom-qusb2.  That would be OK but I believe it effectively changes the
device tree bindings, so maybe it's a no-go.

In this patch I have chosen to fix the problem by simply not crashing
when a NULL cell_id is passed to nvmem_cell_get().

NOTE: that for the qcom-qusb2 driver the "nvmem-cells" property is
defined to be optional and thus it's expected to be a common case that
we would hit this crash and this is more than just a theoretical fix.

Fixes: ca04d9d3e1b1 ("phy: qcom-qusb2: New driver for QUSB2 PHY on Qcom chips")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index b5b0cdc21d01..514d1dfc5630 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -936,6 +936,10 @@ struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *cell_id)
 			return cell;
 	}
 
+	/* NULL cell_id only allowed for device tree; invalid otherwise */
+	if (!cell_id)
+		return ERR_PTR(-EINVAL);
+
 	return nvmem_cell_get_from_list(cell_id);
 }
 EXPORT_SYMBOL_GPL(nvmem_cell_get);

From 15279df6f26cf2013d713904b4a0c957ae8abb96 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 6 Jul 2018 23:50:03 +0200
Subject: [PATCH 314/382] x86/mtrr: Don't copy out-of-bounds data in mtrr_write

Don't access the provided buffer out of bounds - this can cause a kernel
out-of-bounds read when invoked through sys_splice() or other things that
use kernel_write()/__kernel_write().

Fixes: 7f8ec5a4f01a ("x86/mtrr: Convert to use strncpy_from_user() helper")
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180706215003.156702-1-jannh@google.com
---
 arch/x86/kernel/cpu/mtrr/if.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 4021d3859499..40eee6cc4124 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -106,7 +106,8 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
 
 	memset(line, 0, LINE_SIZE);
 
-	length = strncpy_from_user(line, buf, LINE_SIZE - 1);
+	len = min_t(size_t, len, LINE_SIZE - 1);
+	length = strncpy_from_user(line, buf, len);
 	if (length < 0)
 		return length;
 

From 47a18a2dabba99c749b88867a9eef7ca48058e92 Mon Sep 17 00:00:00 2001
From: Adam Borowski <kilobyte@angband.pl>
Date: Sat, 7 Jul 2018 02:15:48 +0200
Subject: [PATCH 315/382] scripts: teach extract-vmlinux about LZ4 and ZSTD

Note that the LZ4 signature is different than that of modern LZ4 as we
use the "legacy" format which suffers from some downsides like inability
to disable compression.

Signed-off-by: Adam Borowski <kilobyte@angband.pl>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/extract-vmlinux | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/extract-vmlinux b/scripts/extract-vmlinux
index 5061abcc2540..e6239f39abad 100755
--- a/scripts/extract-vmlinux
+++ b/scripts/extract-vmlinux
@@ -57,6 +57,8 @@ try_decompress '\3757zXZ\000' abcde unxz
 try_decompress 'BZh'          xy    bunzip2
 try_decompress '\135\0\0\0'   xxx   unlzma
 try_decompress '\211\114\132' xy    'lzop -d'
+try_decompress '\002!L\030'   xxx   'lz4 -d'
+try_decompress '(\265/\375'   xxx   unzstd
 
 # Bail out:
 echo "$me: Cannot find vmlinux." >&2

From 1e4b044d22517cae7047c99038abb444423243ca Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 8 Jul 2018 16:34:02 -0700
Subject: [PATCH 316/382] Linux 4.18-rc4

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index d15ac32afbaf..925c55f2524f 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 18
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Merciless Moray
 
 # *DOCUMENTATION*

From fa85015c0d95884c8dc42f38e2f2d6137d436b67 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 9 Jul 2018 11:01:07 +0200
Subject: [PATCH 317/382] ACPICA: Clear status of all events when entering S5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After commit 18996f2db918 (ACPICA: Events: Stop unconditionally
clearing ACPI IRQs during suspend/resume) the status of ACPI events
is not cleared any more when entering the ACPI S5 system state (power
off) which causes some systems to power up immediately after turing
off power in certain situations.

That is a functional regression, so address it by making the code
clear the status of all ACPI events again when entering S5 (for
system-wide suspend or hibernation the clearing of the status of all
events is not desirable, as it might cause the kernel to miss wakeup
events sometimes).

Fixes: 18996f2db918 (ACPICA: Events: Stop unconditionally clearing ACPI IRQs during suspend/resume)
Reported-by: Takashi Iwai <tiwai@suse.de>
Tested-by: Thomas Hänig <haenig@cosifan.de>
Cc: 4.17+ <stable@vger.kernel.org> # 4.17+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/hwsleep.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c
index fc0c2e2328cd..fe9d46d81750 100644
--- a/drivers/acpi/acpica/hwsleep.c
+++ b/drivers/acpi/acpica/hwsleep.c
@@ -51,16 +51,23 @@ acpi_status acpi_hw_legacy_sleep(u8 sleep_state)
 		return_ACPI_STATUS(status);
 	}
 
-	/*
-	 * 1) Disable all GPEs
-	 * 2) Enable all wakeup GPEs
-	 */
+	/* Disable all GPEs */
 	status = acpi_hw_disable_all_gpes();
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
 	}
+	/*
+	 * If the target sleep state is S5, clear all GPEs and fixed events too
+	 */
+	if (sleep_state == ACPI_STATE_S5) {
+		status = acpi_hw_clear_acpi_status();
+		if (ACPI_FAILURE(status)) {
+			return_ACPI_STATUS(status);
+		}
+	}
 	acpi_gbl_system_awake_and_running = FALSE;
 
+	 /* Enable all wakeup GPEs */
 	status = acpi_hw_enable_all_wakeup_gpes();
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);

From 92748beac07c471d995fbec642b63572dc01b3dc Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Wed, 4 Jul 2018 17:07:45 +0200
Subject: [PATCH 318/382] mmc: sdhci-esdhc-imx: allow 1.8V modes without
 100/200MHz pinctrl states

If pinctrl nodes for 100/200MHz are missing, the controller should
not select any mode which need signal frequencies 100MHz or higher.
To prevent such speed modes the driver currently uses the quirk flag
SDHCI_QUIRK2_NO_1_8_V. This works nicely for SD cards since 1.8V
signaling is required for all faster modes and slower modes use 3.3V
signaling only.

However, there are eMMC modes which use 1.8V signaling and run below
100MHz, e.g. DDR52 at 1.8V. With using SDHCI_QUIRK2_NO_1_8_V this
mode is prevented. When using a fixed 1.8V regulator as vqmmc-supply
the stack has no valid mode to use. In this tenuous situation the
kernel continuously prints voltage switching errors:
  mmc1: Switching to 3.3V signalling voltage failed

Avoid using SDHCI_QUIRK2_NO_1_8_V and prevent faster modes by
altering the SDHCI capability register. With that the stack is able
to select 1.8V modes even if no faster pinctrl states are available:
  # cat /sys/kernel/debug/mmc1/ios
  ...
  timing spec:    8 (mmc DDR52)
  signal voltage: 1 (1.80 V)
  ...

Link: http://lkml.kernel.org/r/20180628081331.13051-1-stefan@agner.ch
Signed-off-by: Stefan Agner <stefan@agner.ch>
Fixes: ad93220de7da ("mmc: sdhci-esdhc-imx: change pinctrl state according
to uhs mode")
Cc: <stable@vger.kernel.org> # v4.13+
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index d6aef70d34fa..4eb3d29ecde1 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -312,6 +312,15 @@ static u32 esdhc_readl_le(struct sdhci_host *host, int reg)
 
 			if (imx_data->socdata->flags & ESDHC_FLAG_HS400)
 				val |= SDHCI_SUPPORT_HS400;
+
+			/*
+			 * Do not advertise faster UHS modes if there are no
+			 * pinctrl states for 100MHz/200MHz.
+			 */
+			if (IS_ERR_OR_NULL(imx_data->pins_100mhz) ||
+			    IS_ERR_OR_NULL(imx_data->pins_200mhz))
+				val &= ~(SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_DDR50
+					 | SDHCI_SUPPORT_SDR104 | SDHCI_SUPPORT_HS400);
 		}
 	}
 
@@ -1158,18 +1167,6 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev,
 						ESDHC_PINCTRL_STATE_100MHZ);
 		imx_data->pins_200mhz = pinctrl_lookup_state(imx_data->pinctrl,
 						ESDHC_PINCTRL_STATE_200MHZ);
-		if (IS_ERR(imx_data->pins_100mhz) ||
-				IS_ERR(imx_data->pins_200mhz)) {
-			dev_warn(mmc_dev(host->mmc),
-				"could not get ultra high speed state, work on normal mode\n");
-			/*
-			 * fall back to not supporting uhs by specifying no
-			 * 1.8v quirk
-			 */
-			host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V;
-		}
-	} else {
-		host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V;
 	}
 
 	/* call to generic mmc_of_parse to support additional capabilities */

From ef6eaf27274c0351f7059163918f3795da13199c Mon Sep 17 00:00:00 2001
From: Jason Andryuk <jandryuk@gmail.com>
Date: Fri, 22 Jun 2018 12:25:49 -0400
Subject: [PATCH 319/382] HID: i2c-hid: Fix "incomplete report" noise

Commit ac75a041048b ("HID: i2c-hid: fix size check and type usage") started
writing messages when the ret_size is <= 2 from i2c_master_recv.  However, my
device i2c-DLL07D1 returns 2 for a short period of time (~0.5s) after I stop
moving the pointing stick or touchpad.  It varies, but you get ~50 messages
each time which spams the log hard.

[  95.925055] i2c_hid i2c-DLL07D1:01: i2c_hid_get_input: incomplete report (83/2)

This has also been observed with a i2c-ALP0017.

[ 1781.266353] i2c_hid i2c-ALP0017:00: i2c_hid_get_input: incomplete report (30/2)

Only print the message when ret_size is totally invalid and less than 2 to cut
down on the log spam.

Fixes: ac75a041048b ("HID: i2c-hid: fix size check and type usage")
Reported-by: John Smith <john-s-84@gmx.net>
Cc: stable@vger.kernel.org
Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/i2c-hid/i2c-hid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index c1652bb7bd15..eae0cb3ddec6 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -484,7 +484,7 @@ static void i2c_hid_get_input(struct i2c_hid *ihid)
 		return;
 	}
 
-	if ((ret_size > size) || (ret_size <= 2)) {
+	if ((ret_size > size) || (ret_size < 2)) {
 		dev_err(&ihid->client->dev, "%s: incomplete report (%d/%d)\n",
 			__func__, size, ret_size);
 		return;

From 4f65245f2d178b9cba48350620d76faa4a098841 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Fri, 29 Jun 2018 17:08:44 -0500
Subject: [PATCH 320/382] HID: hiddev: fix potential Spectre v1

uref->field_index, uref->usage_index, finfo.field_index and cinfo.index can be
indirectly controlled by user-space, hence leading to a potential exploitation
of the Spectre variant 1 vulnerability.

This issue was detected with the help of Smatch:

drivers/hid/usbhid/hiddev.c:473 hiddev_ioctl_usage() warn: potential spectre issue 'report->field' (local cap)
drivers/hid/usbhid/hiddev.c:477 hiddev_ioctl_usage() warn: potential spectre issue 'field->usage' (local cap)
drivers/hid/usbhid/hiddev.c:757 hiddev_ioctl() warn: potential spectre issue 'report->field' (local cap)
drivers/hid/usbhid/hiddev.c:801 hiddev_ioctl() warn: potential spectre issue 'hid->collection' (local cap)

Fix this by sanitizing such structure fields before using them to index
report->field, field->usage and hid->collection

Notice that given that speculation windows are large, the policy is
to kill the speculation on the first load and not worry if it can be
completed with a dependent load/store [1].

[1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2

Cc: stable@vger.kernel.org
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/usbhid/hiddev.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index e3ce233f8bdc..23872d08308c 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -36,6 +36,7 @@
 #include <linux/hiddev.h>
 #include <linux/compat.h>
 #include <linux/vmalloc.h>
+#include <linux/nospec.h>
 #include "usbhid.h"
 
 #ifdef CONFIG_USB_DYNAMIC_MINORS
@@ -469,10 +470,14 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd,
 
 		if (uref->field_index >= report->maxfield)
 			goto inval;
+		uref->field_index = array_index_nospec(uref->field_index,
+						       report->maxfield);
 
 		field = report->field[uref->field_index];
 		if (uref->usage_index >= field->maxusage)
 			goto inval;
+		uref->usage_index = array_index_nospec(uref->usage_index,
+						       field->maxusage);
 
 		uref->usage_code = field->usage[uref->usage_index].hid;
 
@@ -499,6 +504,8 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd,
 
 			if (uref->field_index >= report->maxfield)
 				goto inval;
+			uref->field_index = array_index_nospec(uref->field_index,
+							       report->maxfield);
 
 			field = report->field[uref->field_index];
 
@@ -753,6 +760,8 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 		if (finfo.field_index >= report->maxfield)
 			break;
+		finfo.field_index = array_index_nospec(finfo.field_index,
+						       report->maxfield);
 
 		field = report->field[finfo.field_index];
 		memset(&finfo, 0, sizeof(finfo));
@@ -797,6 +806,8 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 		if (cinfo.index >= hid->maxcollection)
 			break;
+		cinfo.index = array_index_nospec(cinfo.index,
+						 hid->maxcollection);
 
 		cinfo.type = hid->collection[cinfo.index].type;
 		cinfo.usage = hid->collection[cinfo.index].usage;

From 54836e2d03e76d80aec3399368ffaf5b7caadd1b Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Tue, 3 Jul 2018 09:55:43 +0100
Subject: [PATCH 321/382] i2c: tegra: Fix NACK error handling

On Tegra30 Cardhu the PCA9546 I2C mux is not ACK'ing I2C commands on
resume from suspend (which is caused by the reset signal for the I2C
mux not being configured correctl). However, this NACK is causing the
Tegra30 to hang on resuming from suspend which is not expected as we
detect NACKs and handle them. The hang observed appears to occur when
resetting the I2C controller to recover from the NACK.

Commit 77821b4678f9 ("i2c: tegra: proper handling of error cases") added
additional error handling for some error cases including NACK, however,
it appears that this change conflicts with an early fix by commit
f70893d08338 ("i2c: tegra: Add delay before resetting the controller
after NACK"). After commit 77821b4678f9 was made we now disable 'packet
mode' before the delay from commit f70893d08338 happens. Testing shows
that moving the delay to before disabling 'packet mode' fixes the hang
observed on Tegra30. The delay was added to give the I2C controller
chance to send a stop condition and so it makes sense to move this to
before we disable packet mode. Please note that packet mode is always
enabled for Tegra.

Fixes: 77821b4678f9 ("i2c: tegra: proper handling of error cases")
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Acked-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Cc: stable@vger.kernel.org
---
 drivers/i2c/busses/i2c-tegra.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index 5fccd1f1bca8..797def5319f1 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -545,6 +545,14 @@ static int tegra_i2c_disable_packet_mode(struct tegra_i2c_dev *i2c_dev)
 {
 	u32 cnfg;
 
+	/*
+	 * NACK interrupt is generated before the I2C controller generates
+	 * the STOP condition on the bus. So wait for 2 clock periods
+	 * before disabling the controller so that the STOP condition has
+	 * been delivered properly.
+	 */
+	udelay(DIV_ROUND_UP(2 * 1000000, i2c_dev->bus_clk_rate));
+
 	cnfg = i2c_readl(i2c_dev, I2C_CNFG);
 	if (cnfg & I2C_CNFG_PACKET_MODE_EN)
 		i2c_writel(i2c_dev, cnfg & ~I2C_CNFG_PACKET_MODE_EN, I2C_CNFG);
@@ -706,15 +714,6 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev,
 	if (likely(i2c_dev->msg_err == I2C_ERR_NONE))
 		return 0;
 
-	/*
-	 * NACK interrupt is generated before the I2C controller generates
-	 * the STOP condition on the bus. So wait for 2 clock periods
-	 * before resetting the controller so that the STOP condition has
-	 * been delivered properly.
-	 */
-	if (i2c_dev->msg_err == I2C_ERR_NO_ACK)
-		udelay(DIV_ROUND_UP(2 * 1000000, i2c_dev->bus_clk_rate));
-
 	tegra_i2c_init(i2c_dev);
 	if (i2c_dev->msg_err == I2C_ERR_NO_ACK) {
 		if (msg->flags & I2C_M_IGNORE_NAK)

From bdf33113d89f70186ffb6674e925fa9b8a0266b1 Mon Sep 17 00:00:00 2001
From: Russell King - ARM Linux <linux@armlinux.org.uk>
Date: Mon, 9 Jul 2018 13:15:32 +0100
Subject: [PATCH 322/382] Update TDA998x maintainer entry

Update my TDA998x HDMI encoder MAINTAINERS entry to include the
dt-bindings header, and a keyword pattern to catch patches containing
the DT compatible.  Also change the status to "maintained" rather than
"supported".

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 96e98e206b0d..19f2d12ace6b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10213,11 +10213,13 @@ F:	sound/soc/codecs/sgtl5000*
 
 NXP TDA998X DRM DRIVER
 M:	Russell King <linux@armlinux.org.uk>
-S:	Supported
+S:	Maintained
 T:	git git://git.armlinux.org.uk/~rmk/linux-arm.git drm-tda998x-devel
 T:	git git://git.armlinux.org.uk/~rmk/linux-arm.git drm-tda998x-fixes
 F:	drivers/gpu/drm/i2c/tda998x_drv.c
 F:	include/drm/i2c/tda998x.h
+F:	include/dt-bindings/display/tda998x.h
+K:	"nxp,tda998x"
 
 NXP TFA9879 DRIVER
 M:	Peter Rosin <peda@axentia.se>

From dc81aab1be9fac2e11f31fe7538a50705eba08cf Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Fri, 6 Jul 2018 09:28:42 +0200
Subject: [PATCH 323/382] MAINTAINERS: update drm tree

Mail to dri-devel went out, linux-next was updated, but we forgot this
one here.

Cc: David Airlie <airlied@linux.ie>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180706072842.9009-1-daniel.vetter@ffwll.ch
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 96e98e206b0d..ad0753fb5357 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -581,7 +581,7 @@ W:	https://www.infradead.org/~dhowells/kafs/
 
 AGPGART DRIVER
 M:	David Airlie <airlied@linux.ie>
-T:	git git://people.freedesktop.org/~airlied/linux (part of drm maint)
+T:	git git://anongit.freedesktop.org/drm/drm
 S:	Maintained
 F:	drivers/char/agp/
 F:	include/linux/agp*
@@ -4630,7 +4630,7 @@ F:	include/uapi/drm/vmwgfx_drm.h
 DRM DRIVERS
 M:	David Airlie <airlied@linux.ie>
 L:	dri-devel@lists.freedesktop.org
-T:	git git://people.freedesktop.org/~airlied/linux
+T:	git git://anongit.freedesktop.org/drm/drm
 B:	https://bugs.freedesktop.org/
 C:	irc://chat.freenode.net/dri-devel
 S:	Maintained

From 96f95a17c1cfe65a002e525114d96616e91a8f2d Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Mon, 9 Jul 2018 13:09:56 -0700
Subject: [PATCH 324/382] Revert "arm64: Use aarch64elf and aarch64elfb
 emulation mode variants"

This reverts commit 38fc4248677552ce35efc09902fdcb06b61d7ef9.

Distributions such as Fedora and Debian do not package the ELF linker
scripts with their toolchains, resulting in kernel build failures such
as:

  |   CHK     include/generated/compile.h
  |   LD [M]  arch/arm64/crypto/sha512-ce.o
  | aarch64-linux-gnu-ld: cannot open linker script file ldscripts/aarch64elf.xr: No such file or directory
  | make[1]: *** [scripts/Makefile.build:530: arch/arm64/crypto/sha512-ce.o] Error 1
  | make: *** [Makefile:1029: arch/arm64/crypto] Error 2

Revert back to the linux targets for now, adding a comment to the Makefile
so we don't accidentally break this in the future.

Cc: Paul Kocialkowski <contact@paulk.fr>
Cc: <stable@vger.kernel.org>
Fixes: 38fc42486775 ("arm64: Use aarch64elf and aarch64elfb emulation mode variants")
Tested-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Makefile | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 7976d2d242fa..e7101b19d590 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -60,13 +60,15 @@ ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
 KBUILD_CPPFLAGS	+= -mbig-endian
 CHECKFLAGS	+= -D__AARCH64EB__
 AS		+= -EB
-LDFLAGS		+= -EB -maarch64elfb
+# We must use the linux target here, since distributions don't tend to package
+# the ELF linker scripts with binutils, and this results in a build failure.
+LDFLAGS		+= -EB -maarch64linuxb
 UTS_MACHINE	:= aarch64_be
 else
 KBUILD_CPPFLAGS	+= -mlittle-endian
 CHECKFLAGS	+= -D__AARCH64EL__
 AS		+= -EL
-LDFLAGS		+= -EL -maarch64elf
+LDFLAGS		+= -EL -maarch64linux # See comment above
 UTS_MACHINE	:= aarch64
 endif
 

From 5b5ccbc2b041f98f26b984e013d303b7f9e6fb8e Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Mon, 9 Jul 2018 16:45:35 +0100
Subject: [PATCH 325/382] Revert "tick: Prefer a lower rating device only if
 it's CPU local device"

This reverts commit 1332a90558013ae4242e3dd7934bdcdeafb06c0d.

The original issue was not because of incorrect checking of cpumask for
both new and old tick device. It was incorrectly analysed was due to the
misunderstanding of the comment and misinterpretation of the return value
from tick_check_preferred. The main issue is with the clockevent driver
that sets the cpumask to cpu_all_mask instead of cpu_possible_mask.

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Kevin Hilman <khilman@baylibre.com>
Tested-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Marc Zyngier <marc.zyngier@arm.com>
Link: https://lkml.kernel.org/r/1531151136-18297-1-git-send-email-sudeep.holla@arm.com
---
 kernel/time/tick-common.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index b7005dd21ec1..14de3727b18e 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -277,8 +277,7 @@ static bool tick_check_preferred(struct clock_event_device *curdev,
 	 */
 	return !curdev ||
 		newdev->rating > curdev->rating ||
-	       (!cpumask_equal(curdev->cpumask, newdev->cpumask) &&
-	        !tick_check_percpu(curdev, newdev, smp_processor_id()));
+	       !cpumask_equal(curdev->cpumask, newdev->cpumask);
 }
 
 /*

From 5e18e412973d6bb1804de1d4d30a891c774b006e Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Mon, 9 Jul 2018 16:45:36 +0100
Subject: [PATCH 326/382] clocksource: arm_arch_timer: Set arch_mem_timer
 cpumask to cpu_possible_mask

Currently, arch_mem_timer cpumask is set to cpu_all_mask which should be
fine. However, cpu_possible_mask is more accurate and if there are other
clockevent source in the system which are set to cpu_possible_mask, then
having cpu_all_mask may result in issue.

E.g. on a platform with arm,sp804 timer with rating 300 and
cpu_possible_mask and this arch_mem_timer timer with rating 400 and
cpu_all_mask, tick_check_preferred may choose both preferred as the
cpumasks are not equal though they must be.

This issue was root caused incorrectly initially and a fix was merged as
commit 1332a9055801 ("tick: Prefer a lower rating device only if it's CPU
local device").

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Kevin Hilman <khilman@baylibre.com>
Tested-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Link: https://lkml.kernel.org/r/1531151136-18297-2-git-send-email-sudeep.holla@arm.com
---
 drivers/clocksource/arm_arch_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 57cb2f00fc07..d8c7f5750cdb 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -735,7 +735,7 @@ static void __arch_timer_setup(unsigned type,
 		clk->features |= CLOCK_EVT_FEAT_DYNIRQ;
 		clk->name = "arch_mem_timer";
 		clk->rating = 400;
-		clk->cpumask = cpu_all_mask;
+		clk->cpumask = cpu_possible_mask;
 		if (arch_timer_mem_use_virtual) {
 			clk->set_state_shutdown = arch_timer_shutdown_virt_mem;
 			clk->set_state_oneshot_stopped = arch_timer_shutdown_virt_mem;

From e96d71359e9bbea846a2111e4469a03a055dfa6f Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 9 Jul 2018 15:51:50 -0400
Subject: [PATCH 327/382] rseq: Use __u64 for rseq_cs fields, validate user
 inputs

Change the rseq ABI so rseq_cs start_ip, post_commit_offset and abort_ip
fields are seen as 64-bit fields by both 32-bit and 64-bit kernels rather
that ignoring the 32 upper bits on 32-bit kernels. This ensures we have a
consistent behavior for a 32-bit binary executed on 32-bit kernels and in
compat mode on 64-bit kernels.

Validating the value of abort_ip field to be below TASK_SIZE ensures the
kernel don't return to an invalid address when returning to userspace
after an abort. I don't fully trust each architecture code to consistently
deal with invalid return addresses.

Validating the value of the start_ip and post_commit_offset fields
prevents overflow on arithmetic performed on those values, used to
check whether abort_ip is within the rseq critical section.

If validation fails, the process is killed with a segmentation fault.

When the signature encountered before abort_ip does not match the expected
signature, return -EINVAL rather than -EPERM to be consistent with other
input validation return codes from rseq_get_rseq_cs().

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-api@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Watson <davejwatson@fb.com>
Cc: Paul Turner <pjt@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Chris Lameter <cl@linux.com>
Cc: Ben Maurer <bmaurer@fb.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lkml.kernel.org/r/20180709195155.7654-2-mathieu.desnoyers@efficios.com
---
 include/uapi/linux/rseq.h |  6 +++---
 kernel/rseq.c             | 14 ++++++++++----
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index d620fa43756c..519ad6e176d1 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -52,10 +52,10 @@ struct rseq_cs {
 	__u32 version;
 	/* enum rseq_cs_flags */
 	__u32 flags;
-	LINUX_FIELD_u32_u64(start_ip);
+	__u64 start_ip;
 	/* Offset from start_ip. */
-	LINUX_FIELD_u32_u64(post_commit_offset);
-	LINUX_FIELD_u32_u64(abort_ip);
+	__u64 post_commit_offset;
+	__u64 abort_ip;
 } __attribute__((aligned(4 * sizeof(__u64))));
 
 /*
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 22b6acf1ad63..16b38c5342f9 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -130,14 +130,20 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
 	urseq_cs = (struct rseq_cs __user *)ptr;
 	if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs)))
 		return -EFAULT;
-	if (rseq_cs->version > 0)
-		return -EINVAL;
 
+	if (rseq_cs->start_ip >= TASK_SIZE ||
+	    rseq_cs->start_ip + rseq_cs->post_commit_offset >= TASK_SIZE ||
+	    rseq_cs->abort_ip >= TASK_SIZE ||
+	    rseq_cs->version > 0)
+		return -EINVAL;
+	/* Check for overflow. */
+	if (rseq_cs->start_ip + rseq_cs->post_commit_offset < rseq_cs->start_ip)
+		return -EINVAL;
 	/* Ensure that abort_ip is not in the critical section. */
 	if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset)
 		return -EINVAL;
 
-	usig = (u32 __user *)(rseq_cs->abort_ip - sizeof(u32));
+	usig = (u32 __user *)(unsigned long)(rseq_cs->abort_ip - sizeof(u32));
 	ret = get_user(sig, usig);
 	if (ret)
 		return ret;
@@ -146,7 +152,7 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
 		printk_ratelimited(KERN_WARNING
 			"Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n",
 			sig, current->rseq_sig, current->pid, usig);
-		return -EPERM;
+		return -EINVAL;
 	}
 	return 0;
 }

From 8f28177014925f968baf45fc833c25848faf8c1c Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 9 Jul 2018 15:51:51 -0400
Subject: [PATCH 328/382] rseq: Use get_user/put_user rather than
 __get_user/__put_user

__get_user()/__put_user() is used to read values for address ranges that
were already checked with access_ok() on rseq registration.

It has been recognized that __get_user/__put_user are optimizing the
wrong thing. Replace them by get_user/put_user across rseq instead.

If those end up showing up in benchmarks, the proper approach would be to
use user_access_begin() / unsafe_{get,put}_user() / user_access_end()
anyway.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-api@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Watson <davejwatson@fb.com>
Cc: Paul Turner <pjt@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Chris Lameter <cl@linux.com>
Cc: Ben Maurer <bmaurer@fb.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lkml.kernel.org/r/20180709195155.7654-3-mathieu.desnoyers@efficios.com
---
 kernel/rseq.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/kernel/rseq.c b/kernel/rseq.c
index 16b38c5342f9..2c8463acb50d 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -85,9 +85,9 @@ static int rseq_update_cpu_id(struct task_struct *t)
 {
 	u32 cpu_id = raw_smp_processor_id();
 
-	if (__put_user(cpu_id, &t->rseq->cpu_id_start))
+	if (put_user(cpu_id, &t->rseq->cpu_id_start))
 		return -EFAULT;
-	if (__put_user(cpu_id, &t->rseq->cpu_id))
+	if (put_user(cpu_id, &t->rseq->cpu_id))
 		return -EFAULT;
 	trace_rseq_update(t);
 	return 0;
@@ -100,14 +100,14 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t)
 	/*
 	 * Reset cpu_id_start to its initial state (0).
 	 */
-	if (__put_user(cpu_id_start, &t->rseq->cpu_id_start))
+	if (put_user(cpu_id_start, &t->rseq->cpu_id_start))
 		return -EFAULT;
 	/*
 	 * Reset cpu_id to RSEQ_CPU_ID_UNINITIALIZED, so any user coming
 	 * in after unregistration can figure out that rseq needs to be
 	 * registered again.
 	 */
-	if (__put_user(cpu_id, &t->rseq->cpu_id))
+	if (put_user(cpu_id, &t->rseq->cpu_id))
 		return -EFAULT;
 	return 0;
 }
@@ -120,7 +120,7 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
 	u32 sig;
 	int ret;
 
-	ret = __get_user(ptr, &t->rseq->rseq_cs);
+	ret = get_user(ptr, &t->rseq->rseq_cs);
 	if (ret)
 		return ret;
 	if (!ptr) {
@@ -163,7 +163,7 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
 	int ret;
 
 	/* Get thread flags. */
-	ret = __get_user(flags, &t->rseq->flags);
+	ret = get_user(flags, &t->rseq->flags);
 	if (ret)
 		return ret;
 
@@ -203,7 +203,7 @@ static int clear_rseq_cs(struct task_struct *t)
 	 *
 	 * Set rseq_cs to NULL with single-copy atomicity.
 	 */
-	return __put_user(0UL, &t->rseq->rseq_cs);
+	return put_user(0UL, &t->rseq->rseq_cs);
 }
 
 /*

From 0fb9a1abc8c97f858997e962694eb36b4517144e Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 9 Jul 2018 15:51:52 -0400
Subject: [PATCH 329/382] rseq: uapi: Update uapi comments

Update rseq uapi header comments to reflect that user-space need to do
thread-local loads/stores from/to the struct rseq fields.

As a consequence of this added requirement, the kernel does not need
to perform loads/stores with single-copy atomicity.

Update the comment associated to the "flags" fields to describe
more accurately that it's only useful to facilitate single-stepping
through rseq critical sections with debuggers.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-api@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Watson <davejwatson@fb.com>
Cc: Paul Turner <pjt@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Chris Lameter <cl@linux.com>
Cc: Ben Maurer <bmaurer@fb.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lkml.kernel.org/r/20180709195155.7654-4-mathieu.desnoyers@efficios.com
---
 include/uapi/linux/rseq.h | 69 ++++++++++++++++++++-------------------
 kernel/rseq.c             |  2 +-
 2 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index 519ad6e176d1..bf4188c13bec 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -67,28 +67,30 @@ struct rseq_cs {
 struct rseq {
 	/*
 	 * Restartable sequences cpu_id_start field. Updated by the
-	 * kernel, and read by user-space with single-copy atomicity
-	 * semantics. Aligned on 32-bit. Always contains a value in the
-	 * range of possible CPUs, although the value may not be the
-	 * actual current CPU (e.g. if rseq is not initialized). This
-	 * CPU number value should always be compared against the value
-	 * of the cpu_id field before performing a rseq commit or
-	 * returning a value read from a data structure indexed using
-	 * the cpu_id_start value.
+	 * kernel. Read by user-space with single-copy atomicity
+	 * semantics. This field should only be read by the thread which
+	 * registered this data structure. Aligned on 32-bit. Always
+	 * contains a value in the range of possible CPUs, although the
+	 * value may not be the actual current CPU (e.g. if rseq is not
+	 * initialized). This CPU number value should always be compared
+	 * against the value of the cpu_id field before performing a rseq
+	 * commit or returning a value read from a data structure indexed
+	 * using the cpu_id_start value.
 	 */
 	__u32 cpu_id_start;
 	/*
-	 * Restartable sequences cpu_id field. Updated by the kernel,
-	 * and read by user-space with single-copy atomicity semantics.
-	 * Aligned on 32-bit. Values RSEQ_CPU_ID_UNINITIALIZED and
-	 * RSEQ_CPU_ID_REGISTRATION_FAILED have a special semantic: the
-	 * former means "rseq uninitialized", and latter means "rseq
-	 * initialization failed". This value is meant to be read within
-	 * rseq critical sections and compared with the cpu_id_start
-	 * value previously read, before performing the commit instruction,
-	 * or read and compared with the cpu_id_start value before returning
-	 * a value loaded from a data structure indexed using the
-	 * cpu_id_start value.
+	 * Restartable sequences cpu_id field. Updated by the kernel.
+	 * Read by user-space with single-copy atomicity semantics. This
+	 * field should only be read by the thread which registered this
+	 * data structure. Aligned on 32-bit. Values
+	 * RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED
+	 * have a special semantic: the former means "rseq uninitialized",
+	 * and latter means "rseq initialization failed". This value is
+	 * meant to be read within rseq critical sections and compared
+	 * with the cpu_id_start value previously read, before performing
+	 * the commit instruction, or read and compared with the
+	 * cpu_id_start value before returning a value loaded from a data
+	 * structure indexed using the cpu_id_start value.
 	 */
 	__u32 cpu_id;
 	/*
@@ -105,27 +107,28 @@ struct rseq {
 	 * targeted by the rseq_cs. Also needs to be set to NULL by user-space
 	 * before reclaiming memory that contains the targeted struct rseq_cs.
 	 *
-	 * Read and set by the kernel with single-copy atomicity semantics.
-	 * Set by user-space with single-copy atomicity semantics. Aligned
-	 * on 64-bit.
+	 * Read and set by the kernel. Set by user-space with single-copy
+	 * atomicity semantics. This field should only be updated by the
+	 * thread which registered this data structure. Aligned on 64-bit.
 	 */
 	LINUX_FIELD_u32_u64(rseq_cs);
 	/*
-	 * - RSEQ_DISABLE flag:
+	 * Restartable sequences flags field.
+	 *
+	 * This field should only be updated by the thread which
+	 * registered this data structure. Read by the kernel.
+	 * Mainly used for single-stepping through rseq critical sections
+	 * with debuggers.
 	 *
-	 * Fallback fast-track flag for single-stepping.
-	 * Set by user-space if lack of progress is detected.
-	 * Cleared by user-space after rseq finish.
-	 * Read by the kernel.
 	 * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
-	 *     Inhibit instruction sequence block restart and event
-	 *     counter increment on preemption for this thread.
+	 *     Inhibit instruction sequence block restart on preemption
+	 *     for this thread.
 	 * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
-	 *     Inhibit instruction sequence block restart and event
-	 *     counter increment on signal delivery for this thread.
+	 *     Inhibit instruction sequence block restart on signal
+	 *     delivery for this thread.
 	 * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
-	 *     Inhibit instruction sequence block restart and event
-	 *     counter increment on migration for this thread.
+	 *     Inhibit instruction sequence block restart on migration for
+	 *     this thread.
 	 */
 	__u32 flags;
 } __attribute__((aligned(4 * sizeof(__u64))));
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 2c8463acb50d..2a7748675be7 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -201,7 +201,7 @@ static int clear_rseq_cs(struct task_struct *t)
 	 * of code outside of the rseq assembly block. This performs
 	 * a lazy clear of the rseq_cs field.
 	 *
-	 * Set rseq_cs to NULL with single-copy atomicity.
+	 * Set rseq_cs to NULL.
 	 */
 	return put_user(0UL, &t->rseq->rseq_cs);
 }

From ec9c82e03a744e5698bd95eab872855861a821fa Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 9 Jul 2018 15:51:53 -0400
Subject: [PATCH 330/382] rseq: uapi: Declare rseq_cs field as union, update
 includes

Declaring the rseq_cs field as a union between __u64 and two __u32
allows both 32-bit and 64-bit kernels to read the full __u64, and
therefore validate that a 32-bit user-space cleared the upper 32
bits, thus ensuring a consistent behavior between native 32-bit
kernels and 32-bit compat tasks on 64-bit kernels.

Check that the rseq_cs value read is < TASK_SIZE.

The asm/byteorder.h header needs to be included by rseq.h, now
that it is not using linux/types_32_64.h anymore.

Considering that only __32 and __u64 types are declared in linux/rseq.h,
the linux/types.h header should always be included for both kernel and
user-space code: including stdint.h is just for u64 and u32, which are
not used in this header at all.

Use copy_from_user()/clear_user() to interact with a 64-bit field,
because arm32 does not implement 64-bit __get_user, and ppc32 does not
64-bit get_user. Considering that the rseq_cs pointer does not need to
be loaded/stored with single-copy atomicity from the kernel anymore, we
can simply use copy_from_user()/clear_user().

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-api@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Watson <davejwatson@fb.com>
Cc: Paul Turner <pjt@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Chris Lameter <cl@linux.com>
Cc: Ben Maurer <bmaurer@fb.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lkml.kernel.org/r/20180709195155.7654-5-mathieu.desnoyers@efficios.com
---
 include/uapi/linux/rseq.h           | 27 +++++++++++++++++++--------
 kernel/rseq.c                       | 15 +++++++++------
 tools/testing/selftests/rseq/rseq.h | 11 ++++++++++-
 3 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index bf4188c13bec..9a402fdb60e9 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -10,13 +10,8 @@
  * Copyright (c) 2015-2018 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
  */
 
-#ifdef __KERNEL__
-# include <linux/types.h>
-#else
-# include <stdint.h>
-#endif
-
-#include <linux/types_32_64.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
 
 enum rseq_cpu_id_state {
 	RSEQ_CPU_ID_UNINITIALIZED		= -1,
@@ -111,7 +106,23 @@ struct rseq {
 	 * atomicity semantics. This field should only be updated by the
 	 * thread which registered this data structure. Aligned on 64-bit.
 	 */
-	LINUX_FIELD_u32_u64(rseq_cs);
+	union {
+		__u64 ptr64;
+#ifdef __LP64__
+		__u64 ptr;
+#else
+		struct {
+#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN)
+			__u32 padding;		/* Initialized to zero. */
+			__u32 ptr32;
+#else /* LITTLE */
+			__u32 ptr32;
+			__u32 padding;		/* Initialized to zero. */
+#endif /* ENDIAN */
+		} ptr;
+#endif
+	} rseq_cs;
+
 	/*
 	 * Restartable sequences flags field.
 	 *
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 2a7748675be7..c6242d8594dc 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -115,19 +115,20 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t)
 static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
 {
 	struct rseq_cs __user *urseq_cs;
-	unsigned long ptr;
+	u64 ptr;
 	u32 __user *usig;
 	u32 sig;
 	int ret;
 
-	ret = get_user(ptr, &t->rseq->rseq_cs);
-	if (ret)
-		return ret;
+	if (copy_from_user(&ptr, &t->rseq->rseq_cs.ptr64, sizeof(ptr)))
+		return -EFAULT;
 	if (!ptr) {
 		memset(rseq_cs, 0, sizeof(*rseq_cs));
 		return 0;
 	}
-	urseq_cs = (struct rseq_cs __user *)ptr;
+	if (ptr >= TASK_SIZE)
+		return -EINVAL;
+	urseq_cs = (struct rseq_cs __user *)(unsigned long)ptr;
 	if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs)))
 		return -EFAULT;
 
@@ -203,7 +204,9 @@ static int clear_rseq_cs(struct task_struct *t)
 	 *
 	 * Set rseq_cs to NULL.
 	 */
-	return put_user(0UL, &t->rseq->rseq_cs);
+	if (clear_user(&t->rseq->rseq_cs.ptr64, sizeof(t->rseq->rseq_cs.ptr64)))
+		return -EFAULT;
+	return 0;
 }
 
 /*
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index a4684112676c..f2073cfa4448 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -133,6 +133,15 @@ static inline uint32_t rseq_current_cpu(void)
 	return cpu;
 }
 
+static inline void rseq_clear_rseq_cs(void)
+{
+#ifdef __LP64__
+	__rseq_abi.rseq_cs.ptr = 0;
+#else
+	__rseq_abi.rseq_cs.ptr.ptr32 = 0;
+#endif
+}
+
 /*
  * rseq_prepare_unload() should be invoked by each thread using rseq_finish*()
  * at least once between their last rseq_finish*() and library unload of the
@@ -143,7 +152,7 @@ static inline uint32_t rseq_current_cpu(void)
  */
 static inline void rseq_prepare_unload(void)
 {
-	__rseq_abi.rseq_cs = 0;
+	rseq_clear_rseq_cs();
 }
 
 #endif  /* RSEQ_H_ */

From 4f4c0acdf4652a964da869d578a3c8bf6df14ce2 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 9 Jul 2018 15:51:54 -0400
Subject: [PATCH 331/382] rseq: Remove unused types_32_64.h uapi header

This header was introduced in the 4.18 merge window, and rseq does
not need it anymore. Nuke it before the final release.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-api@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Watson <davejwatson@fb.com>
Cc: Paul Turner <pjt@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Chris Lameter <cl@linux.com>
Cc: Ben Maurer <bmaurer@fb.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lkml.kernel.org/r/20180709195155.7654-6-mathieu.desnoyers@efficios.com
---
 include/uapi/linux/types_32_64.h | 50 --------------------------------
 1 file changed, 50 deletions(-)
 delete mode 100644 include/uapi/linux/types_32_64.h

diff --git a/include/uapi/linux/types_32_64.h b/include/uapi/linux/types_32_64.h
deleted file mode 100644
index 0a87ace34a57..000000000000
--- a/include/uapi/linux/types_32_64.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
-#ifndef _UAPI_LINUX_TYPES_32_64_H
-#define _UAPI_LINUX_TYPES_32_64_H
-
-/*
- * linux/types_32_64.h
- *
- * Integer type declaration for pointers across 32-bit and 64-bit systems.
- *
- * Copyright (c) 2015-2018 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
- */
-
-#ifdef __KERNEL__
-# include <linux/types.h>
-#else
-# include <stdint.h>
-#endif
-
-#include <asm/byteorder.h>
-
-#ifdef __BYTE_ORDER
-# if (__BYTE_ORDER == __BIG_ENDIAN)
-#  define LINUX_BYTE_ORDER_BIG_ENDIAN
-# else
-#  define LINUX_BYTE_ORDER_LITTLE_ENDIAN
-# endif
-#else
-# ifdef __BIG_ENDIAN
-#  define LINUX_BYTE_ORDER_BIG_ENDIAN
-# else
-#  define LINUX_BYTE_ORDER_LITTLE_ENDIAN
-# endif
-#endif
-
-#ifdef __LP64__
-# define LINUX_FIELD_u32_u64(field)			__u64 field
-# define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v)	field = (intptr_t)v
-#else
-# ifdef LINUX_BYTE_ORDER_BIG_ENDIAN
-#  define LINUX_FIELD_u32_u64(field)	__u32 field ## _padding, field
-#  define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v)	\
-	field ## _padding = 0, field = (intptr_t)v
-# else
-#  define LINUX_FIELD_u32_u64(field)	__u32 field, field ## _padding
-#  define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v)	\
-	field = (intptr_t)v, field ## _padding = 0
-# endif
-#endif
-
-#endif /* _UAPI_LINUX_TYPES_32_64_H */

From 8a46580128a02bdc18d7dcc0cba19d3cea4fb9c4 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Mon, 9 Jul 2018 15:51:55 -0400
Subject: [PATCH 332/382] rseq/selftests: cleanup: Update comment above
 rseq_prepare_unload

rseq as it was merged does not have rseq_finish_*() in the user-space
selftests anymore. Update the rseq_prepare_unload() helper comment to
adapt to this reality.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-api@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dave Watson <davejwatson@fb.com>
Cc: Paul Turner <pjt@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Chris Lameter <cl@linux.com>
Cc: Ben Maurer <bmaurer@fb.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lkml.kernel.org/r/20180709195155.7654-7-mathieu.desnoyers@efficios.com
---
 tools/testing/selftests/rseq/rseq.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index f2073cfa4448..86ce22417e0d 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -143,12 +143,13 @@ static inline void rseq_clear_rseq_cs(void)
 }
 
 /*
- * rseq_prepare_unload() should be invoked by each thread using rseq_finish*()
- * at least once between their last rseq_finish*() and library unload of the
- * library defining the rseq critical section (struct rseq_cs). This also
- * applies to use of rseq in code generated by JIT: rseq_prepare_unload()
- * should be invoked at least once by each thread using rseq_finish*() before
- * reclaim of the memory holding the struct rseq_cs.
+ * rseq_prepare_unload() should be invoked by each thread executing a rseq
+ * critical section at least once between their last critical section and
+ * library unload of the library defining the rseq critical section
+ * (struct rseq_cs). This also applies to use of rseq in code generated by
+ * JIT: rseq_prepare_unload() should be invoked at least once by each
+ * thread executing a rseq critical section before reclaim of the memory
+ * holding the struct rseq_cs.
  */
 static inline void rseq_prepare_unload(void)
 {

From cea394772d3c41d04cb71a032f6ed878392bd134 Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Mon, 18 Jun 2018 14:33:03 +0100
Subject: [PATCH 333/382] ARM: 8775/1: NOMMU: Use instr_sync instead of plain
 isb in common code

Greg reported that commit 3c24121039c9d ("ARM: 8756/1: NOMMU: Postpone
MPU activation till __after_proc_init") is causing breakage for the
old Versatile platform in no-MMU mode (with out-of-tree patches):

  AS      arch/arm/kernel/head-nommu.o
arch/arm/kernel/head-nommu.S: Assembler messages:
arch/arm/kernel/head-nommu.S:180: Error: selected processor does not support `isb' in ARM mode
scripts/Makefile.build:417: recipe for target 'arch/arm/kernel/head-nommu.o' failed
make[2]: *** [arch/arm/kernel/head-nommu.o] Error 1
Makefile:1034: recipe for target 'arch/arm/kernel' failed
make[1]: *** [arch/arm/kernel] Error 2

Since the code is common for all NOMMU builds usage of the isb was a
bad idea (please, note that isb also used in MPU related code which is
fine because MPU has dependency on CPU_V7/CPU_V7M), instead use more
robust instr_sync assembler macro.

Fixes: 3c24121039c9 ("ARM: 8756/1: NOMMU: Postpone MPU activation till __after_proc_init")
Reported-by: Greg Ungerer <gerg@kernel.org>
Tested-by: Greg Ungerer <gerg@kernel.org>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/head-nommu.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index dd546d65a383..7a9b86978ee1 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -177,7 +177,7 @@ M_CLASS(streq	r3, [r12, #PMSAv8_MAIR1])
 	bic	r0, r0, #CR_I
 #endif
 	mcr	p15, 0, r0, c1, c0, 0		@ write control reg
-	isb
+	instr_sync
 #elif defined (CONFIG_CPU_V7M)
 #ifdef CONFIG_ARM_MPU
 	ldreq	r3, [r12, MPU_CTRL]

From e296701800f30d260a66f8aa1971b5b1bc3d2f81 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 11 Jul 2018 11:02:35 +0200
Subject: [PATCH 334/382] efi/x86: Fix mixed mode reboot loop by removing
 pointless call to PciIo->Attributes()

Hans de Goede reported that his mixed EFI mode Bay Trail tablet
would not boot at all any more, but enter a reboot loop without
any logs printed by the kernel.

Unbreak 64-bit Linux/x86 on 32-bit UEFI:

When it was first introduced, the EFI stub code that copies the
contents of PCI option ROMs originally only intended to do so if
the EFI_PCI_IO_ATTRIBUTE_EMBEDDED_ROM attribute was *not* set.

The reason was that the UEFI spec permits PCI option ROM images
to be provided by the platform directly, rather than via the ROM
BAR, and in this case, the OS can only access them at runtime if
they are preserved at boot time by copying them from the areas
described by PciIo->RomImage and PciIo->RomSize.

However, it implemented this check erroneously, as can be seen in
commit:

  dd5fc854de5fd ("EFI: Stash ROMs if they're not in the PCI BAR")

which introduced:

    if (!attributes & EFI_PCI_IO_ATTRIBUTE_EMBEDDED_ROM)
            continue;

and given that the numeric value of EFI_PCI_IO_ATTRIBUTE_EMBEDDED_ROM
is 0x4000, this condition never becomes true, and so the option ROMs
were copied unconditionally.

This was spotted and 'fixed' by commit:

  886d751a2ea99a160 ("x86, efi: correct precedence of operators in setup_efi_pci")

but inadvertently inverted the logic at the same time, defeating
the purpose of the code, since it now only preserves option ROM
images that can be read from the ROM BAR as well.

Unsurprisingly, this broke some systems, and so the check was removed
entirely in the following commit:

  739701888f5d ("x86, efi: remove attribute check from setup_efi_pci")

It is debatable whether this check should have been included in the
first place, since the option ROM image provided to the UEFI driver by
the firmware may be different from the one that is actually present in
the card's flash ROM, and so whatever PciIo->RomImage points at should
be preferred regardless of whether the attribute is set.

As this was the only use of the attributes field, we can remove
the call to PciIo->Attributes() entirely, which is especially
nice because its prototype involves uint64_t type by-value
arguments which the EFI mixed mode has trouble dealing with.

Any mixed mode system with PCI is likely to be affected.

Tested-by: Wilfried Klaebe <linux-kernel@lebenslange-mailadresse.de>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20180711090235.9327-2-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/boot/compressed/eboot.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index e57665b4ba1c..e98522ea6f09 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -114,18 +114,12 @@ __setup_efi_pci(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
 	struct pci_setup_rom *rom = NULL;
 	efi_status_t status;
 	unsigned long size;
-	uint64_t attributes, romsize;
+	uint64_t romsize;
 	void *romimage;
 
-	status = efi_call_proto(efi_pci_io_protocol, attributes, pci,
-				EfiPciIoAttributeOperationGet, 0ULL,
-				&attributes);
-	if (status != EFI_SUCCESS)
-		return status;
-
 	/*
-	 * Some firmware images contain EFI function pointers at the place where the
-	 * romimage and romsize fields are supposed to be. Typically the EFI
+	 * Some firmware images contain EFI function pointers at the place where
+	 * the romimage and romsize fields are supposed to be. Typically the EFI
 	 * code is mapped at high addresses, translating to an unrealistically
 	 * large romsize. The UEFI spec limits the size of option ROMs to 16
 	 * MiB so we reject any ROMs over 16 MiB in size to catch this.

From 877cc639686b68c7de179a485544f4761e376b30 Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jeremy@jcline.org>
Date: Tue, 8 May 2018 21:27:43 +0000
Subject: [PATCH 335/382] perf tools: Generate a Python script compatible with
 Python 2 and 3

When generating a Python script with "perf script -g python", produce
one that is compatible with Python 2 and 3. The difference between the
two generated scripts is:

  --- python2-perf-script.py	2018-05-08 15:35:00.865889705 -0400
  +++ python3-perf-script.py	2018-05-08 15:34:49.019789564 -0400
  @@ -7,6 +7,8 @@
   # be retrieved using Python functions of the form common_*(context).
   # See the perf-script-python Documentation for the list of available functions.

  +from __future__ import print_function
  +
   import os
   import sys

  @@ -18,10 +20,10 @@

   def trace_begin():
  -	print "in trace_begin"
  +	print("in trace_begin")

   def trace_end():
  -	print "in trace_end"
  +	print("in trace_end")

   def raw_syscalls__sys_enter(event_name, context, common_cpu,
   	common_secs, common_nsecs, common_pid, common_comm,
  @@ -29,26 +31,26 @@
   		print_header(event_name, common_cpu, common_secs, common_nsecs,
   			common_pid, common_comm)

  -		print "id=%d, args=%s" % \
  -		(id, args)
  +		print("id=%d, args=%s" % \
  +		(id, args))

  -		print 'Sample: {'+get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'
  +		print('Sample: {'+get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')

   		for node in common_callchain:
   			if 'sym' in node:
  -				print "\t[%x] %s" % (node['ip'], node['sym']['name'])
  +				print("\t[%x] %s" % (node['ip'], node['sym']['name']))
   			else:
  -				print "	[%x]" % (node['ip'])
  +				print("	[%x]" % (node['ip']))

  -		print "\n"
  +		print()

   def trace_unhandled(event_name, context, event_fields_dict, perf_sample_dict):
  -		print get_dict_as_string(event_fields_dict)
  -		print 'Sample: {'+get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'
  +		print(get_dict_as_string(event_fields_dict))
  +		print('Sample: {'+get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')

   def print_header(event_name, cpu, secs, nsecs, pid, comm):
  -	print "%-20s %5u %05u.%09u %8u %-20s " % \
  -	(event_name, cpu, secs, nsecs, pid, comm),
  +	print("%-20s %5u %05u.%09u %8u %-20s " % \
  +	(event_name, cpu, secs, nsecs, pid, comm), end="")

   def get_dict_as_string(a_dict, delimiter=' '):
   	return delimiter.join(['%s=%s'%(k,str(v))for k,v in sorted(a_dict.items())])

Signed-off-by: Jeremy Cline <jeremy@jcline.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Herton Krzesinski <herton@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/0100016341a7278a-d178c724-2b0f-49ca-be93-80a7d51aaa0d-000000@email.amazonses.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../scripting-engines/trace-event-python.c    | 29 ++++++++++---------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 46e9e19ab1ac..8b2eb6dbff4d 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1627,6 +1627,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 	fprintf(ofp, "# See the perf-script-python Documentation for the list "
 		"of available functions.\n\n");
 
+	fprintf(ofp, "from __future__ import print_function\n\n");
 	fprintf(ofp, "import os\n");
 	fprintf(ofp, "import sys\n\n");
 
@@ -1636,10 +1637,10 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 	fprintf(ofp, "from Core import *\n\n\n");
 
 	fprintf(ofp, "def trace_begin():\n");
-	fprintf(ofp, "\tprint \"in trace_begin\"\n\n");
+	fprintf(ofp, "\tprint(\"in trace_begin\")\n\n");
 
 	fprintf(ofp, "def trace_end():\n");
-	fprintf(ofp, "\tprint \"in trace_end\"\n\n");
+	fprintf(ofp, "\tprint(\"in trace_end\")\n\n");
 
 	while ((event = trace_find_next_event(pevent, event))) {
 		fprintf(ofp, "def %s__%s(", event->system, event->name);
@@ -1675,7 +1676,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 			"common_secs, common_nsecs,\n\t\t\t"
 			"common_pid, common_comm)\n\n");
 
-		fprintf(ofp, "\t\tprint \"");
+		fprintf(ofp, "\t\tprint(\"");
 
 		not_first = 0;
 		count = 0;
@@ -1736,31 +1737,31 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
 				fprintf(ofp, "%s", f->name);
 		}
 
-		fprintf(ofp, ")\n\n");
+		fprintf(ofp, "))\n\n");
 
-		fprintf(ofp, "\t\tprint 'Sample: {'+"
-			"get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'\n\n");
+		fprintf(ofp, "\t\tprint('Sample: {'+"
+			"get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')\n\n");
 
 		fprintf(ofp, "\t\tfor node in common_callchain:");
 		fprintf(ofp, "\n\t\t\tif 'sym' in node:");
-		fprintf(ofp, "\n\t\t\t\tprint \"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name'])");
+		fprintf(ofp, "\n\t\t\t\tprint(\"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name']))");
 		fprintf(ofp, "\n\t\t\telse:");
-		fprintf(ofp, "\n\t\t\t\tprint \"\t[%%x]\" %% (node['ip'])\n\n");
-		fprintf(ofp, "\t\tprint \"\\n\"\n\n");
+		fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x]\" %% (node['ip']))\n\n");
+		fprintf(ofp, "\t\tprint()\n\n");
 
 	}
 
 	fprintf(ofp, "def trace_unhandled(event_name, context, "
 		"event_fields_dict, perf_sample_dict):\n");
 
-	fprintf(ofp, "\t\tprint get_dict_as_string(event_fields_dict)\n");
-	fprintf(ofp, "\t\tprint 'Sample: {'+"
-		"get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'\n\n");
+	fprintf(ofp, "\t\tprint(get_dict_as_string(event_fields_dict))\n");
+	fprintf(ofp, "\t\tprint('Sample: {'+"
+		"get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')\n\n");
 
 	fprintf(ofp, "def print_header("
 		"event_name, cpu, secs, nsecs, pid, comm):\n"
-		"\tprint \"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t"
-		"(event_name, cpu, secs, nsecs, pid, comm),\n\n");
+		"\tprint(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t"
+		"(event_name, cpu, secs, nsecs, pid, comm), end=\"\")\n\n");
 
 	fprintf(ofp, "def get_dict_as_string(a_dict, delimiter=' '):\n"
 		"\treturn delimiter.join"

From 770d2f86c0051d4f2c0ab9d74d68434cb383241d Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jeremy@jcline.org>
Date: Tue, 8 May 2018 21:27:45 +0000
Subject: [PATCH 336/382] perf scripts python: Add Python 3 support to Core.py

Support both Python 2 and Python 3 in Core.py. This should have no
functional change.

Signed-off-by: Jeremy Cline <jeremy@jcline.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Herton Krzesinski <herton@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/0100016341a72ebe-e572899e-f445-4765-98f0-c314935727f9-000000@email.amazonses.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../Perf-Trace-Util/lib/Perf/Trace/Core.py    | 38 ++++++++-----------
 1 file changed, 16 insertions(+), 22 deletions(-)

diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
index 38dfb720fb6f..54ace2f6bc36 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
@@ -31,10 +31,8 @@ def flag_str(event_name, field_name, value):
     string = ""
 
     if flag_fields[event_name][field_name]:
-	print_delim = 0
-        keys = flag_fields[event_name][field_name]['values'].keys()
-        keys.sort()
-        for idx in keys:
+        print_delim = 0
+        for idx in sorted(flag_fields[event_name][field_name]['values']):
             if not value and not idx:
                 string += flag_fields[event_name][field_name]['values'][idx]
                 break
@@ -51,14 +49,12 @@ def symbol_str(event_name, field_name, value):
     string = ""
 
     if symbolic_fields[event_name][field_name]:
-        keys = symbolic_fields[event_name][field_name]['values'].keys()
-        keys.sort()
-        for idx in keys:
+        for idx in sorted(symbolic_fields[event_name][field_name]['values']):
             if not value and not idx:
-		string = symbolic_fields[event_name][field_name]['values'][idx]
+                string = symbolic_fields[event_name][field_name]['values'][idx]
                 break
-	    if (value == idx):
-		string = symbolic_fields[event_name][field_name]['values'][idx]
+            if (value == idx):
+                string = symbolic_fields[event_name][field_name]['values'][idx]
                 break
 
     return string
@@ -74,19 +70,17 @@ def trace_flag_str(value):
     string = ""
     print_delim = 0
 
-    keys = trace_flags.keys()
+    for idx in trace_flags:
+        if not value and not idx:
+            string += "NONE"
+            break
 
-    for idx in keys:
-	if not value and not idx:
-	    string += "NONE"
-	    break
-
-	if idx and (value & idx) == idx:
-	    if print_delim:
-		string += " | ";
-	    string += trace_flags[idx]
-	    print_delim = 1
-	    value &= ~idx
+        if idx and (value & idx) == idx:
+            if print_delim:
+                string += " | ";
+            string += trace_flags[idx]
+            print_delim = 1
+            value &= ~idx
 
     return string
 

From 2ab89262ff8895b8476b97345507c676fe3081fa Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jeremy@jcline.org>
Date: Tue, 8 May 2018 21:27:45 +0000
Subject: [PATCH 337/382] perf scripts python: Add Python 3 support to
 SchedGui.py

Fix a single syntax error in SchedGui.py to support both Python 2 and
Python 3. This should have no functional change.

Signed-off-by: Jeremy Cline <jeremy@jcline.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Herton Krzesinski <herton@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/0100016341a72d26-75729663-fe55-4309-8c9b-302e065ed2f1-000000@email.amazonses.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
index fdd92f699055..cac7b2542ee8 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
@@ -11,7 +11,7 @@
 try:
 	import wx
 except ImportError:
-	raise ImportError, "You need to install the wxpython lib for this script"
+	raise ImportError("You need to install the wxpython lib for this script")
 
 
 class RootFrame(wx.Frame):

From c45b168effb12719f6dfc8d2c20ba8c057e8c16b Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jeremy@jcline.org>
Date: Tue, 8 May 2018 21:27:46 +0000
Subject: [PATCH 338/382] perf scripts python: Add Python 3 support to Util.py

Support both Python 2 and Python 3 in Util.py. The dict class no longer
has a ``has_key`` method and print is now a function rather than a
statement. This should have no functional change.

Signed-off-by: Jeremy Cline <jeremy@jcline.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Herton Krzesinski <herton@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/0100016341a730c6-8db8b9b1-da2d-4ee3-96bf-47e0ae9796bd-000000@email.amazonses.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../python/Perf-Trace-Util/lib/Perf/Trace/Util.py     | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
index f6c84966e4f8..7384dcb628c4 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
@@ -5,6 +5,7 @@
 # This software may be distributed under the terms of the GNU General
 # Public License ("GPL") version 2 as published by the Free Software
 # Foundation.
+from __future__ import print_function
 
 import errno, os
 
@@ -33,7 +34,7 @@ def nsecs_str(nsecs):
     return str
 
 def add_stats(dict, key, value):
-	if not dict.has_key(key):
+	if key not in dict:
 		dict[key] = (value, value, value, 1)
 	else:
 		min, max, avg, count = dict[key]
@@ -72,10 +73,10 @@ try:
 except:
 	if not audit_package_warned:
 		audit_package_warned = True
-		print "Install the audit-libs-python package to get syscall names.\n" \
-                    "For example:\n  # apt-get install python-audit (Ubuntu)" \
-                    "\n  # yum install audit-libs-python (Fedora)" \
-                    "\n  etc.\n"
+		print("Install the audit-libs-python package to get syscall names.\n"
+                    "For example:\n  # apt-get install python-audit (Ubuntu)"
+                    "\n  # yum install audit-libs-python (Fedora)"
+                    "\n  etc.\n")
 
 def syscall_name(id):
 	try:

From 8c1c1ab2d2a77cb50841822168e56d11c4ebfd6e Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jeremy@jcline.org>
Date: Tue, 8 May 2018 21:27:47 +0000
Subject: [PATCH 339/382] perf scripts python: Add Python 3 support to
 sched-migration.py

Support both Python 2 and Python 3 in the sched-migration.py script.
This should have no functional change.

Signed-off-by: Jeremy Cline <jeremy@jcline.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Herton Krzesinski <herton@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/0100016341a737a5-44ec436f-3440-4cac-a03f-ddfa589bf308-000000@email.amazonses.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/scripts/python/sched-migration.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tools/perf/scripts/python/sched-migration.py b/tools/perf/scripts/python/sched-migration.py
index de66cb3b72c9..3473e7f66081 100644
--- a/tools/perf/scripts/python/sched-migration.py
+++ b/tools/perf/scripts/python/sched-migration.py
@@ -9,13 +9,17 @@
 # This software is distributed under the terms of the GNU General
 # Public License ("GPL") version 2 as published by the Free Software
 # Foundation.
-
+from __future__ import print_function
 
 import os
 import sys
 
 from collections import defaultdict
-from UserList import UserList
+try:
+    from UserList import UserList
+except ImportError:
+    # Python 3: UserList moved to the collections package
+    from collections import UserList
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
 	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
@@ -300,7 +304,7 @@ class TimeSliceList(UserList):
 		if i == -1:
 			return
 
-		for i in xrange(i, len(self.data)):
+		for i in range(i, len(self.data)):
 			timeslice = self.data[i]
 			if timeslice.start > end:
 				return
@@ -336,8 +340,8 @@ class SchedEventProxy:
 		on_cpu_task = self.current_tsk[headers.cpu]
 
 		if on_cpu_task != -1 and on_cpu_task != prev_pid:
-			print "Sched switch event rejected ts: %s cpu: %d prev: %s(%d) next: %s(%d)" % \
-				(headers.ts_format(), headers.cpu, prev_comm, prev_pid, next_comm, next_pid)
+			print("Sched switch event rejected ts: %s cpu: %d prev: %s(%d) next: %s(%d)" % \
+				headers.ts_format(), headers.cpu, prev_comm, prev_pid, next_comm, next_pid)
 
 		threads[prev_pid] = prev_comm
 		threads[next_pid] = next_comm

From 12aa6c7389a321b4b5d921f89c3f83b9750598f7 Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jeremy@jcline.org>
Date: Tue, 8 May 2018 21:27:48 +0000
Subject: [PATCH 340/382] perf scripts python: Add Python 3 support to
 EventClass.py

Support both Python 2 and Python 3 in EventClass.py. ``print`` is now a
function rather than a statement. This should have no functional change.

Signed-off-by: Jeremy Cline <jeremy@jcline.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Herton Krzesinski <herton@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/0100016341a73aac-e0734bdc-dcab-4c61-8333-d8be97524aa0-000000@email.amazonses.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py       | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py
index 81a56cd2b3c1..21a7a1298094 100755
--- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py
@@ -8,6 +8,7 @@
 # PerfEvent is the base class for all perf event sample, PebsEvent
 # is a HW base Intel x86 PEBS event, and user could add more SW/HW
 # event classes based on requirements.
+from __future__ import print_function
 
 import struct
 
@@ -44,7 +45,8 @@ class PerfEvent(object):
                 PerfEvent.event_num += 1
 
         def show(self):
-                print "PMU event: name=%12s, symbol=%24s, comm=%8s, dso=%12s" % (self.name, self.symbol, self.comm, self.dso)
+                print("PMU event: name=%12s, symbol=%24s, comm=%8s, dso=%12s" %
+                      (self.name, self.symbol, self.comm, self.dso))
 
 #
 # Basic Intel PEBS (Precise Event-based Sampling) event, whose raw buffer

From 508ef3e737bbb9858a30e7bb153507429ea89644 Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@arm.com>
Date: Fri, 29 Jun 2018 12:46:33 -0500
Subject: [PATCH 341/382] perf test shell: Replace '|&' with '2>&1 |' to work
 with more shells

Since we do not specify bash (and/or zsh) as a requirement, use the
standard error redirection that is more widely supported.

BEFORE:

 $ sudo perf test -v 62
 62: Check open filename arg using perf trace + vfs_getname:
 --- start ---
 test child forked, pid 27305
 ./tests/shell/trace+probe_vfs_getname.sh: 20: ./tests/shell/trace+probe_vfs_getname.sh: Syntax error: "&" unexpected
 test child finished with -2
 ---- end ----
 Check open filename arg using perf trace + vfs_getname: Skip

AFTER:

 $ sudo perf test -v 62
 64: Check open filename arg using perf trace + vfs_getname               :
 --- start ---
 test child forked, pid 23008
 Added new event:
   probe:vfs_getname    (on getname_flags:72 with pathname=result->name:string)

 You can now use it in all perf tools, such as:

         perf record -e probe:vfs_getname -aR sleep 1

      0.361 ( 0.008 ms): touch/23032 openat(dfd: CWD, filename: /tmp/temporary_file.VEh0n, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 4
 test child finished with 0
 ---- end ----
 Check open filename arg using perf trace + vfs_getname: Ok

Similar to commit 35435cd06081, with the same title.

Signed-off-by: Kim Phillips <kim.phillips@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sandipan Das <sandipan@linux.vnet.ibm.com>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20180629124633.0a9f4bea54b8d2c28f265de2@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/trace+probe_vfs_getname.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
index 55ad9793d544..4ce276efe6b4 100755
--- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
@@ -17,7 +17,7 @@ skip_if_no_perf_probe || exit 2
 file=$(mktemp /tmp/temporary_file.XXXXX)
 
 trace_open_vfs_getname() {
-	evts=$(echo $(perf list syscalls:sys_enter_open* |& egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/')
+	evts=$(echo $(perf list syscalls:sys_enter_open* 2>&1 | egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/')
 	perf trace -e $evts touch $file 2>&1 | \
 	egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$"
 }

From 98c6c8a1d00fba66625311730f80c6ceaa9c1d19 Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@arm.com>
Date: Fri, 29 Jun 2018 12:46:43 -0500
Subject: [PATCH 342/382] perf test shell: Make perf's inet_pton test more
 portable

Debian based systems such as Ubuntu have dash as their default shell.
Even if the normal or root user's shell is bash, certain scripts still
call /bin/sh, which points to dash, so we fix this perf test by
rewriting it in a more portable way.

BEFORE:

 $ sudo perf test -v 64
 64: probe libc's inet_pton & backtrace it with ping       :
 --- start ---
 test child forked, pid 31942
 ./tests/shell/record+probe_libc_inet_pton.sh: 18: ./tests/shell/record+probe_libc_inet_pton.sh: expected[0]=ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\): not found
 ./tests/shell/record+probe_libc_inet_pton.sh: 19: ./tests/shell/record+probe_libc_inet_pton.sh: expected[1]=.*inet_pton\+0x[[:xdigit:]]+[[:space:]]\(/lib/x86_64-linux-gnu/libc-2.27.so|inlined\)$: not found
 ./tests/shell/record+probe_libc_inet_pton.sh: 29: ./tests/shell/record+probe_libc_inet_pton.sh: expected[2]=getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\(/lib/x86_64-linux-gnu/libc-2.27.so\)$: not found
 ./tests/shell/record+probe_libc_inet_pton.sh: 30: ./tests/shell/record+probe_libc_inet_pton.sh: expected[3]=.*\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$: not found
 ping 31963 [004] 83577.670613: probe_libc:inet_pton: (7fe15f87f4b0)
 ./tests/shell/record+probe_libc_inet_pton.sh: 39: ./tests/shell/record+probe_libc_inet_pton.sh: Bad substitution
 ./tests/shell/record+probe_libc_inet_pton.sh: 41: ./tests/shell/record+probe_libc_inet_pton.sh: Bad substitution
 test child finished with -2
 ---- end ----
 probe libc's inet_pton & backtrace it with ping: Skip

AFTER:

 $ sudo perf test -v 64
 64: probe libc's inet_pton & backtrace it with ping       :
 --- start ---
 test child forked, pid 32277
 ping 32295 [001] 83679.690020: probe_libc:inet_pton: (7ff244f504b0)
 7ff244f504b0 __GI___inet_pton+0x0 (/lib/x86_64-linux-gnu/libc-2.27.so)
 7ff244f14ce4 getaddrinfo+0x124 (/lib/x86_64-linux-gnu/libc-2.27.so)
 556ac036b57d _init+0xb75 (/bin/ping)
 test child finished with 0
 ---- end ----
 probe libc's inet_pton & backtrace it with ping: Ok

Signed-off-by: Kim Phillips <kim.phillips@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sandipan Das <sandipan@linux.vnet.ibm.com>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20180629124643.2089b3ce59960eba34e87b27@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../shell/record+probe_libc_inet_pton.sh      | 37 +++++++++++--------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
index 263057039693..94e513e62b34 100755
--- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -14,35 +14,40 @@ libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1
 nm -Dg $libc 2>/dev/null | fgrep -q inet_pton || exit 254
 
 trace_libc_inet_pton_backtrace() {
-	idx=0
-	expected[0]="ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)"
-	expected[1]=".*inet_pton\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$"
+
+	expected=`mktemp -u /tmp/expected.XXX`
+
+	echo "ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)" > $expected
+	echo ".*inet_pton\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected
 	case "$(uname -m)" in
 	s390x)
 		eventattr='call-graph=dwarf,max-stack=4'
-		expected[2]="gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$"
-		expected[3]="(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$"
-		expected[4]="main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$"
+		echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected
+		echo "(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected
+		echo "main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected
 		;;
 	*)
 		eventattr='max-stack=3'
-		expected[2]="getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$"
-		expected[3]=".*\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$"
+		echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected
+		echo ".*\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected
 		;;
 	esac
 
-	file=`mktemp -u /tmp/perf.data.XXX`
+	perf_data=`mktemp -u /tmp/perf.data.XXX`
+	perf_script=`mktemp -u /tmp/perf.script.XXX`
+	perf record -e probe_libc:inet_pton/$eventattr/ -o $perf_data ping -6 -c 1 ::1 > /dev/null 2>&1
+	perf script -i $perf_data > $perf_script
 
-	perf record -e probe_libc:inet_pton/$eventattr/ -o $file ping -6 -c 1 ::1 > /dev/null 2>&1
-	perf script -i $file | while read line ; do
+	exec 3<$perf_script
+	exec 4<$expected
+	while read line <&3 && read -r pattern <&4; do
+		[ -z "$pattern" ] && break
 		echo $line
-		echo "$line" | egrep -q "${expected[$idx]}"
+		echo "$line" | egrep -q "$pattern"
 		if [ $? -ne 0 ] ; then
-			printf "FAIL: expected backtrace entry %d \"%s\" got \"%s\"\n" $idx "${expected[$idx]}" "$line"
+			printf "FAIL: expected backtrace entry \"%s\" got \"%s\"\n" "$pattern" "$line"
 			exit 1
 		fi
-		let idx+=1
-		[ -z "${expected[$idx]}" ] && break
 	done
 
 	# If any statements are executed from this point onwards,
@@ -58,6 +63,6 @@ skip_if_no_perf_probe && \
 perf probe -q $libc inet_pton && \
 trace_libc_inet_pton_backtrace
 err=$?
-rm -f ${file}
+rm -f ${perf_data} ${perf_script} ${expected}
 perf probe -q -d probe_libc:inet_pton
 exit $err

From f6432b9f65001651412dbc3589d251534822d4ab Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@arm.com>
Date: Fri, 29 Jun 2018 12:46:52 -0500
Subject: [PATCH 343/382] perf llvm-utils: Remove bashism from kernel include
 fetch script

Like system(), popen() calls /bin/sh, which may/may not be bash.

Script when run on dash and encounters the line, yields:

 exit: Illegal number: -1

checkbashisms report on script content:

 possible bashism (exit|return with negative status code):
 exit -1

Remove the bashism and use the more portable non-zero failure
status code 1.

Signed-off-by: Kim Phillips <kim.phillips@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sandipan Das <sandipan@linux.vnet.ibm.com>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20180629124652.8d0af7e2281fd3fd8262cacc@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/llvm-utils.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 976e658e38dc..5e94857dfca2 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -266,16 +266,16 @@ static const char *kinc_fetch_script =
 "#!/usr/bin/env sh\n"
 "if ! test -d \"$KBUILD_DIR\"\n"
 "then\n"
-"	exit -1\n"
+"	exit 1\n"
 "fi\n"
 "if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n"
 "then\n"
-"	exit -1\n"
+"	exit 1\n"
 "fi\n"
 "TMPDIR=`mktemp -d`\n"
 "if test -z \"$TMPDIR\"\n"
 "then\n"
-"    exit -1\n"
+"    exit 1\n"
 "fi\n"
 "cat << EOF > $TMPDIR/Makefile\n"
 "obj-y := dummy.o\n"

From db8fec583f250557ddd6def1505a6c466c9747aa Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@arm.com>
Date: Fri, 29 Jun 2018 12:46:58 -0500
Subject: [PATCH 344/382] perf test shell: Prevent temporary editor files from
 being considered test scripts

Allows a perf shell test developer to concurrently edit and run their
test scripts, avoiding perf test attempts to execute their editor
temporary files, such as seen here:

 $ sudo taskset -c 0 ./perf test -vvvvvvvv -F 63
 63: 0VIM 8.0                                              :
 --- start ---
 sh: 1: ./tests/shell/.record+probe_libc_inet_pton.sh.swp: Permission denied
 ---- end ----
 0VIM 8.0: FAILED!

Signed-off-by: Kim Phillips <kim.phillips@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sandipan Das <sandipan@linux.vnet.ibm.com>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20180629124658.15a506b41fc4539c08eb9426@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/builtin-test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 2bde505e2e7e..dd850a26d579 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -422,7 +422,7 @@ static const char *shell_test__description(char *description, size_t size,
 
 #define for_each_shell_test(dir, base, ent)	\
 	while ((ent = readdir(dir)) != NULL)	\
-		if (!is_directory(base, ent))
+		if (!is_directory(base, ent) && ent->d_name[0] != '.')
 
 static const char *shell_tests__dir(char *path, size_t size)
 {

From a09603f851045b031e990d2d663958ccb49db525 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 2 Jul 2018 15:42:01 +0200
Subject: [PATCH 345/382] perf tools: Fix compilation errors on gcc8
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We are getting following warnings on gcc8 that break compilation:

  $ make
    CC       jvmti/jvmti_agent.o
  jvmti/jvmti_agent.c: In function ‘jvmti_open’:
  jvmti/jvmti_agent.c:252:35: error: ‘/jit-’ directive output may be truncated \
    writing 5 bytes into a region of size between 1 and 4096 [-Werror=format-truncation=]
    snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid());

There's no point in checking the result of snprintf call in
jvmti_open, the following open call will fail in case the
name is mangled or too long.

Using tools/lib/ function scnprintf that touches the return value from
the snprintf() calls and thus get rid of those warnings.

  $ make DEBUG=1
    CC       arch/x86/util/perf_regs.o
  arch/x86/util/perf_regs.c: In function ‘arch_sdt_arg_parse_op’:
  arch/x86/util/perf_regs.c:229:4: error: ‘strncpy’ output truncated before terminating nul
  copying 2 bytes from a string of the same length [-Werror=stringop-truncation]
    strncpy(prefix, "+0", 2);
    ^~~~~~~~~~~~~~~~~~~~~~~~

Using scnprintf instead of the strncpy (which we know is safe in here)
to get rid of that warning.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180702134202.17745-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/util/perf_regs.c | 2 +-
 tools/perf/jvmti/jvmti_agent.c       | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index 4b2caf6d48e7..fead6b3b4206 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -226,7 +226,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
 		else if (rm[2].rm_so != rm[2].rm_eo)
 			prefix[0] = '+';
 		else
-			strncpy(prefix, "+0", 2);
+			scnprintf(prefix, sizeof(prefix), "+0");
 	}
 
 	/* Rename register */
diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
index 0c6d1002b524..ac1bcdc17dae 100644
--- a/tools/perf/jvmti/jvmti_agent.c
+++ b/tools/perf/jvmti/jvmti_agent.c
@@ -35,6 +35,7 @@
 #include <sys/mman.h>
 #include <syscall.h> /* for gettid() */
 #include <err.h>
+#include <linux/kernel.h>
 
 #include "jvmti_agent.h"
 #include "../util/jitdump.h"
@@ -249,7 +250,7 @@ void *jvmti_open(void)
 	/*
 	 * jitdump file name
 	 */
-	snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid());
+	scnprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid());
 
 	fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666);
 	if (fd == -1)

From c818cc063089e78bc31845d1eb6a1f0bedee1eae Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 2 Jul 2018 15:42:02 +0200
Subject: [PATCH 346/382] perf stat: Fix --interval_clear option

Currently we display extra header line, like:

  # perf stat -I 1000 -a --interval-clear
  #           time             counts unit events
         insn per cycle branch-misses of all branches
       2.964917103        3855.349912      cpu-clock (msec)          #    3.855 CPUs utilized
       2.964917103             23,993      context-switches          #    0.006 M/sec
       2.964917103              1,301      cpu-migrations            #    0.329 K/sec
       ...

Fixing the condition and getting proper:

  # perf stat -I 1000 -a --interval-clear
  #           time             counts unit events
       2.359048938        1432.492228      cpu-clock (msec)          #    1.432 CPUs utilized
       2.359048938              7,613      context-switches          #    0.002 M/sec
       2.359048938                419      cpu-migrations            #    0.133 K/sec
       ...

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: 9660e08ee8cb ("perf stat: Add --interval-clear option")
Link: http://lkml.kernel.org/r/20180702134202.17745-2-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 22547a490e1f..05be023c3f0e 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1742,7 +1742,7 @@ static void print_interval(char *prefix, struct timespec *ts)
 		}
 	}
 
-	if ((num_print_interval == 0 && metric_only) || interval_clear)
+	if ((num_print_interval == 0 || interval_clear) && metric_only)
 		print_metric_headers(" ", true);
 	if (++num_print_interval == 25)
 		num_print_interval = 0;

From db0ba84c04ef2cf293aaada5ae97531127844d9d Mon Sep 17 00:00:00 2001
From: Janne Huttunen <janne.huttunen@nokia.com>
Date: Mon, 9 Jul 2018 13:59:50 +0300
Subject: [PATCH 347/382] perf script python: Fix dict reference counting

The dictionaries are attached to the parameter tuple that steals the
references and takes care of releasing them when appropriate.  The code
should not decrement the reference counts explicitly.  E.g. if libpython
has been built with reference debugging enabled, the superfluous DECREFs
will trigger this error when running perf script:

  Fatal Python error: Objects/tupleobject.c:238 object at
  0x7f10f2041b40 has negative ref count -1
  Aborted (core dumped)

If the reference debugging is not enabled, the superfluous DECREFs might
cause the dict objects to be silently released while they are still in
use. This may trigger various other assertions or just cause perf
crashes and/or weird and unexpected data changes in the stored Python
objects.

Signed-off-by: Janne Huttunen <janne.huttunen@nokia.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jaroslav Skarvada <jskarvad@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1531133990-17485-1-git-send-email-janne.huttunen@nokia.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/scripting-engines/trace-event-python.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 8b2eb6dbff4d..bc32e57d17be 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -908,14 +908,11 @@ static void python_process_tracepoint(struct perf_sample *sample,
 	if (_PyTuple_Resize(&t, n) == -1)
 		Py_FatalError("error resizing Python tuple");
 
-	if (!dict) {
+	if (!dict)
 		call_object(handler, t, handler_name);
-	} else {
+	else
 		call_object(handler, t, default_handler_name);
-		Py_DECREF(dict);
-	}
 
-	Py_XDECREF(all_entries_dict);
 	Py_DECREF(t);
 }
 
@@ -1235,7 +1232,6 @@ static void python_process_general_event(struct perf_sample *sample,
 
 	call_object(handler, t, handler_name);
 
-	Py_DECREF(dict);
 	Py_DECREF(t);
 }
 

From 32aa928a7b817140c84987b726d5014911808fa4 Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jcline@redhat.com>
Date: Tue, 10 Jul 2018 11:46:12 -0400
Subject: [PATCH 348/382] perf tools: Use python-config --includes rather than
 --cflags

Builds started failing in Fedora on Python 3.7 with:

    `.gnu.debuglto_.debug_macro' referenced in section
    `.gnu.debuglto_.debug_macro' of
    util/scripting-engines/trace-event-python.o: defined in discarded
    section

In Fedora, Python 3.7 added -flto to the list of --cflags and since it
was only applied to util/scripting-engines/trace-event-python.c and
scripts/python/Perf-Trace-Util/Context.c, linking failed.

It's not the first time the addition of flags has broken builds: commit
c6707fdef7e2 ("perf tools: Fix up build in hardnened environments")
appears to have fixed a similar problem. "python-config --includes"
provides the proper -I flags and doesn't introduce additional CFLAGS.

Signed-off-by: Jeremy Cline <jcline@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180710154612.6285-1-jcline@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.config | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index b5ac356ba323..f5a3b402589e 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -207,8 +207,7 @@ ifdef PYTHON_CONFIG
   PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
   PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
   PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
-  PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
-  PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
+  PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null)
   FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
 endif
 

From 70dbcc2254fa2a9add74a122b9dac954c4736e01 Mon Sep 17 00:00:00 2001
From: Tony Battersby <tonyb@cybernetics.com>
Date: Wed, 11 Jul 2018 10:46:03 -0400
Subject: [PATCH 349/382] bsg: fix bogus EINVAL on non-data commands

Fix a regression introduced in Linux kernel 4.17 where sending a SCSI
command that does not transfer data (such as TEST UNIT READY) via
/dev/bsg/* results in EINVAL.

Fixes: 17cb960f29c2 ("bsg: split handling of SCSI CDBs vs transport requeues")
Cc: <stable@vger.kernel.org> # 4.17+
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Tony Battersby <tonyb@cybernetics.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bsg.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/block/bsg.c b/block/bsg.c
index 66602c489956..3da540faf673 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -267,8 +267,6 @@ bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode)
 	} else if (hdr->din_xfer_len) {
 		ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr->din_xferp),
 				hdr->din_xfer_len, GFP_KERNEL);
-	} else {
-		ret = blk_rq_map_user(q, rq, NULL, NULL, 0, GFP_KERNEL);
 	}
 
 	if (ret)

From 2fd8eb4ad87104c54800ef3cea498c92eb15c78a Mon Sep 17 00:00:00 2001
From: Yandong Zhao <yandong77520@gmail.com>
Date: Wed, 11 Jul 2018 19:06:28 +0800
Subject: [PATCH 350/382] arm64: neon: Fix function may_use_simd() return error
 status

It does not matter if the caller of may_use_simd() migrates to
another cpu after the call, but it is still important that the
kernel_neon_busy percpu instance that is read matches the cpu the
task is running on at the time of the read.

This means that raw_cpu_read() is not sufficient.  kernel_neon_busy
may appear true if the caller migrates during the execution of
raw_cpu_read() and the next task to be scheduled in on the initial
cpu calls kernel_neon_begin().

This patch replaces raw_cpu_read() with this_cpu_read() to protect
against this race.

Cc: <stable@vger.kernel.org>
Fixes: cb84d11e1625 ("arm64: neon: Remove support for nested or hardirq kernel-mode NEON")
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Yandong Zhao <yandong77520@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/simd.h | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index fa8b3fe932e6..6495cc51246f 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -29,20 +29,15 @@ DECLARE_PER_CPU(bool, kernel_neon_busy);
 static __must_check inline bool may_use_simd(void)
 {
 	/*
-	 * The raw_cpu_read() is racy if called with preemption enabled.
-	 * This is not a bug: kernel_neon_busy is only set when
-	 * preemption is disabled, so we cannot migrate to another CPU
-	 * while it is set, nor can we migrate to a CPU where it is set.
-	 * So, if we find it clear on some CPU then we're guaranteed to
-	 * find it clear on any CPU we could migrate to.
-	 *
-	 * If we are in between kernel_neon_begin()...kernel_neon_end(),
-	 * the flag will be set, but preemption is also disabled, so we
-	 * can't migrate to another CPU and spuriously see it become
-	 * false.
+	 * kernel_neon_busy is only set while preemption is disabled,
+	 * and is clear whenever preemption is enabled. Since
+	 * this_cpu_read() is atomic w.r.t. preemption, kernel_neon_busy
+	 * cannot change under our feet -- if it's set we cannot be
+	 * migrated, and if it's clear we cannot be migrated to a CPU
+	 * where it is set.
 	 */
 	return !in_irq() && !irqs_disabled() && !in_nmi() &&
-		!raw_cpu_read(kernel_neon_busy);
+		!this_cpu_read(kernel_neon_busy);
 }
 
 #else /* ! CONFIG_KERNEL_MODE_NEON */

From ee6581ceba7f8314b81b2f2a81f1cf3f67c679e2 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Wed, 11 Jul 2018 10:10:11 -0700
Subject: [PATCH 351/382] nfit: fix unchecked dereference in acpi_nfit_ctl

Incremental patch to fix the unchecked dereference in acpi_nfit_ctl.
Reported by Dan Carpenter:

"acpi/nfit: fix cmd_rc for acpi_nfit_ctl to
always return a value" from Jun 28, 2018, leads to the following
Smatch complaint:

    drivers/acpi/nfit/core.c:578 acpi_nfit_ctl()
     warn: variable dereferenced before check 'cmd_rc' (see line 411)

drivers/acpi/nfit/core.c
   410
   411		*cmd_rc = -EINVAL;
                ^^^^^^^^^^^^^^^^^^
Patch adds unchecked dereference.

Fixes: c1985cefd844 ("acpi/nfit: fix cmd_rc for acpi_nfit_ctl to always return a value")

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/acpi/nfit/core.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index b8040fed2a69..7c479002e798 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -408,7 +408,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 	const guid_t *guid;
 	int rc, i;
 
-	*cmd_rc = -EINVAL;
+	if (cmd_rc)
+		*cmd_rc = -EINVAL;
 	func = cmd;
 	if (cmd == ND_CMD_CALL) {
 		call_pkg = buf;
@@ -519,7 +520,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 		 * If we return an error (like elsewhere) then caller wouldn't
 		 * be able to rely upon data returned to make calculation.
 		 */
-		*cmd_rc = 0;
+		if (cmd_rc)
+			*cmd_rc = 0;
 		return 0;
 	}
 

From 498e8bf51c633cc4496343e6113f340f8e9301ae Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Fri, 6 Jul 2018 23:50:06 +0300
Subject: [PATCH 352/382] sample: vfio-mdev: avoid deadlock in mdev_access()

mdev_access() calls mbochs_get_page() with mdev_state->ops_lock held,
while mbochs_get_page() locks the mutex by itself.
It leads to unavoidable deadlock.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 samples/vfio-mdev/mbochs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
index d5d5a499160c..2535c3677c7b 100644
--- a/samples/vfio-mdev/mbochs.c
+++ b/samples/vfio-mdev/mbochs.c
@@ -178,6 +178,8 @@ static const char *vbe_name(u32 index)
 	return "(invalid)";
 }
 
+static struct page *__mbochs_get_page(struct mdev_state *mdev_state,
+				      pgoff_t pgoff);
 static struct page *mbochs_get_page(struct mdev_state *mdev_state,
 				    pgoff_t pgoff);
 
@@ -394,7 +396,7 @@ static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count,
 		   MBOCHS_MEMORY_BAR_OFFSET + mdev_state->memsize) {
 		pos -= MBOCHS_MMIO_BAR_OFFSET;
 		poff = pos & ~PAGE_MASK;
-		pg = mbochs_get_page(mdev_state, pos >> PAGE_SHIFT);
+		pg = __mbochs_get_page(mdev_state, pos >> PAGE_SHIFT);
 		map = kmap(pg);
 		if (is_write)
 			memcpy(map + poff, buf, count);

From 0fc8c3581dd42bc8f530314ca86db2d861485731 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 9 Jul 2018 16:19:06 +0200
Subject: [PATCH 353/382] tracing/kprobe: Release kprobe print_fmt properly

We don't release tk->tp.call.print_fmt when destroying
local uprobe. Also there's missing print_fmt kfree in
create_local_trace_kprobe error path.

Link: http://lkml.kernel.org/r/20180709141906.2390-1-jolsa@kernel.org

Cc: stable@vger.kernel.org
Fixes: e12f03d7031a ("perf/core: Implement the 'perf_kprobe' PMU")
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_kprobe.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index daa81571b22a..21f718472942 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1480,8 +1480,10 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
 	}
 
 	ret = __register_trace_kprobe(tk);
-	if (ret < 0)
+	if (ret < 0) {
+		kfree(tk->tp.call.print_fmt);
 		goto error;
+	}
 
 	return &tk->tp.call;
 error:
@@ -1501,6 +1503,8 @@ void destroy_local_trace_kprobe(struct trace_event_call *event_call)
 	}
 
 	__unregister_trace_kprobe(tk);
+
+	kfree(tk->tp.call.print_fmt);
 	free_trace_kprobe(tk);
 }
 #endif /* CONFIG_PERF_EVENTS */

From d63c46734c545ad0488761059004a65c46efdde3 Mon Sep 17 00:00:00 2001
From: Kamal Heib <kamalheib1@gmail.com>
Date: Tue, 10 Jul 2018 11:56:50 +0300
Subject: [PATCH 354/382] RDMA/mlx5: Fix memory leak in mlx5_ib_create_srq()
 error path

Fix memory leak in the error path of mlx5_ib_create_srq() by making sure
to free the allocated srq.

Fixes: c2b37f76485f ("IB/mlx5: Fix integer overflows in mlx5_ib_create_srq")
Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
Acked-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/srq.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 0af7b7905550..f5de5adc9b1a 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -266,18 +266,24 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
 
 	desc_size = sizeof(struct mlx5_wqe_srq_next_seg) +
 		    srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg);
-	if (desc_size == 0 || srq->msrq.max_gs > desc_size)
-		return ERR_PTR(-EINVAL);
+	if (desc_size == 0 || srq->msrq.max_gs > desc_size) {
+		err = -EINVAL;
+		goto err_srq;
+	}
 	desc_size = roundup_pow_of_two(desc_size);
 	desc_size = max_t(size_t, 32, desc_size);
-	if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg))
-		return ERR_PTR(-EINVAL);
+	if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg)) {
+		err = -EINVAL;
+		goto err_srq;
+	}
 	srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) /
 		sizeof(struct mlx5_wqe_data_seg);
 	srq->msrq.wqe_shift = ilog2(desc_size);
 	buf_size = srq->msrq.max * desc_size;
-	if (buf_size < desc_size)
-		return ERR_PTR(-EINVAL);
+	if (buf_size < desc_size) {
+		err = -EINVAL;
+		goto err_srq;
+	}
 	in.type = init_attr->srq_type;
 
 	if (pd->uobject)

From b4c7e2bd2eb4764afe3af9409ff3b1b87116fa30 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 10 Jul 2018 08:22:40 +0100
Subject: [PATCH 355/382] ARM: 8780/1: ftrace: Only set kernel memory back to
 read-only after boot

Dynamic ftrace requires modifying the code segments that are usually
set to read-only. To do this, a per arch function is called both before
and after the ftrace modifications are performed. The "before" function
will set kernel code text to read-write to allow for ftrace to make the
modifications, and the "after" function will set the kernel code text
back to "read-only" to keep the kernel code text protected.

The issue happens when dynamic ftrace is tested at boot up. The test is
done before the kernel code text has been set to read-only. But the
"before" and "after" calls are still performed. The "after" call will
change the kernel code text to read-only prematurely, and other boot
code that expects this code to be read-write will fail.

The solution is to add a variable that is set when the kernel code text
is expected to be converted to read-only, and make the ftrace "before"
and "after" calls do nothing if that variable is not yet set. This is
similar to the x86 solution from commit 162396309745 ("ftrace, x86:
make kernel text writable only for conversions").

Link: http://lkml.kernel.org/r/20180620212906.24b7b66e@vmware.local.home

Reported-by: Stefan Agner <stefan@agner.ch>
Tested-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/init.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index c186474422f3..0cc8e04295a4 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -736,20 +736,29 @@ static int __mark_rodata_ro(void *unused)
 	return 0;
 }
 
+static int kernel_set_to_readonly __read_mostly;
+
 void mark_rodata_ro(void)
 {
+	kernel_set_to_readonly = 1;
 	stop_machine(__mark_rodata_ro, NULL, NULL);
 	debug_checkwx();
 }
 
 void set_kernel_text_rw(void)
 {
+	if (!kernel_set_to_readonly)
+		return;
+
 	set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), false,
 				current->active_mm);
 }
 
 void set_kernel_text_ro(void)
 {
+	if (!kernel_set_to_readonly)
+		return;
+
 	set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), true,
 				current->active_mm);
 }

From dad59262b79b0588cfeeda19fd88307e3e91c0ea Mon Sep 17 00:00:00 2001
From: Alastair Bridgewater <alastair.bridgewater@gmail.com>
Date: Wed, 11 Jul 2018 18:09:45 -0400
Subject: [PATCH 356/382] ALSA: hda/ca0132: Add Recon3Di quirk for Gigabyte
 G1.Sniper Z97

These motherboards have Sound Core3D and apparently "support"
Recon3Di.  Added to the quirk list as QUIRK_R3DI.

Issue report, PCI Subsystem ID, and testing by a contributor on
IRC who wished to remain anonymous.

Signed-off-by: Alastair Bridgewater <alastair.bridgewater@gmail.com>
Reviewed-by: Connor McAdams <conmanx360@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_ca0132.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index 4ff5320378e2..f5203f681b6e 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -1048,6 +1048,7 @@ static const struct snd_pci_quirk ca0132_quirks[] = {
 	SND_PCI_QUIRK(0x1102, 0x0010, "Sound Blaster Z", QUIRK_SBZ),
 	SND_PCI_QUIRK(0x1102, 0x0023, "Sound Blaster Z", QUIRK_SBZ),
 	SND_PCI_QUIRK(0x1458, 0xA016, "Recon3Di", QUIRK_R3DI),
+	SND_PCI_QUIRK(0x1458, 0xA026, "Gigabyte G1.Sniper Z97", QUIRK_R3DI),
 	SND_PCI_QUIRK(0x1458, 0xA036, "Recon3Di", QUIRK_R3DI),
 	{}
 };

From c5a59d2477abf04e1b77152bef49383fd212da8d Mon Sep 17 00:00:00 2001
From: Alastair Bridgewater <alastair.bridgewater@gmail.com>
Date: Wed, 11 Jul 2018 18:09:46 -0400
Subject: [PATCH 357/382] ALSA: hda/ca0132: Update a pci quirk device name

The PCI subsystem in question for this quirk rule has been
identified as a Gigabyte GA-Z170X-Gaming 7 motherboard.  Set the
device name appropriately.

Signed-off-by: Alastair Bridgewater <alastair.bridgewater@gmail.com>
Reviewed-by: Connor McAdams <conmanx360@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_ca0132.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index f5203f681b6e..321e95c409c1 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -1049,7 +1049,7 @@ static const struct snd_pci_quirk ca0132_quirks[] = {
 	SND_PCI_QUIRK(0x1102, 0x0023, "Sound Blaster Z", QUIRK_SBZ),
 	SND_PCI_QUIRK(0x1458, 0xA016, "Recon3Di", QUIRK_R3DI),
 	SND_PCI_QUIRK(0x1458, 0xA026, "Gigabyte G1.Sniper Z97", QUIRK_R3DI),
-	SND_PCI_QUIRK(0x1458, 0xA036, "Recon3Di", QUIRK_R3DI),
+	SND_PCI_QUIRK(0x1458, 0xA036, "Gigabyte GA-Z170X-Gaming 7", QUIRK_R3DI),
 	{}
 };
 

From e69b5d308da72cbf4e7911c3979f9a46d28532af Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Mon, 2 Jul 2018 12:00:18 +0200
Subject: [PATCH 358/382] xen: remove global bit from __default_kernel_pte_mask
 for pv guests

When removing the global bit from __supported_pte_mask do the same for
__default_kernel_pte_mask in order to avoid the WARN_ONCE() in
check_pgprot() when setting a kernel pte before having called
init_mem_mapping().

Cc: <stable@vger.kernel.org> # 4.17
Reported-by: Michael Young <m.a.young@durham.ac.uk>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/enlighten_pv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 8d4e2e1ae60b..4816b6f82a9a 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1229,6 +1229,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
 
 	/* Prevent unwanted bits from being set in PTEs. */
 	__supported_pte_mask &= ~_PAGE_GLOBAL;
+	__default_kernel_pte_mask &= ~_PAGE_GLOBAL;
 
 	/*
 	 * Prevent page tables from being allocated in highmem, even

From 2f8b5b21830aea95989a6e67d8a971297272a086 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Tue, 10 Jul 2018 14:47:25 -0500
Subject: [PATCH 359/382] ARM: DRA7/OMAP5: Enable ACTLR[0] (Enable invalidates
 of BTB) for secondary cores

Call secure services to enable ACTLR[0] (Enable invalidates of BTB with
ICIALLU) when branch hardening is enabled for kernel.

On GP devices OMAP5/DRA7, there is no possibility to update secure
side since "secure world" is ROM and there are no override mechanisms
possible. On HS devices, appropriate PPA should do the workarounds as
well.

However, the configuration is only done for secondary core, since it is
expected that firmware/bootloader will have enabled the required
configuration for the primary boot core (note: bootloaders typically
will NOT enable secondary processors, since it has no need to do so).

Signed-off-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/omap-smp.c | 41 ++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index 69df3620eca5..1c73694c871a 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -109,6 +109,45 @@ void omap5_erratum_workaround_801819(void)
 static inline void omap5_erratum_workaround_801819(void) { }
 #endif
 
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+/*
+ * Configure ACR and enable ACTLR[0] (Enable invalidates of BTB with
+ * ICIALLU) to activate the workaround for secondary Core.
+ * NOTE: it is assumed that the primary core's configuration is done
+ * by the boot loader (kernel will detect a misconfiguration and complain
+ * if this is not done).
+ *
+ * In General Purpose(GP) devices, ACR bit settings can only be done
+ * by ROM code in "secure world" using the smc call and there is no
+ * option to update the "firmware" on such devices. This also works for
+ * High security(HS) devices, as a backup option in case the
+ * "update" is not done in the "security firmware".
+ */
+static void omap5_secondary_harden_predictor(void)
+{
+	u32 acr, acr_mask;
+
+	asm volatile ("mrc p15, 0, %0, c1, c0, 1" : "=r" (acr));
+
+	/*
+	 * ACTLR[0] (Enable invalidates of BTB with ICIALLU)
+	 */
+	acr_mask = BIT(0);
+
+	/* Do we already have it done.. if yes, skip expensive smc */
+	if ((acr & acr_mask) == acr_mask)
+		return;
+
+	acr |= acr_mask;
+	omap_smc1(OMAP5_DRA7_MON_SET_ACR_INDEX, acr);
+
+	pr_debug("%s: ARM ACR setup for CVE_2017_5715 applied on CPU%d\n",
+		 __func__, smp_processor_id());
+}
+#else
+static inline void omap5_secondary_harden_predictor(void) { }
+#endif
+
 static void omap4_secondary_init(unsigned int cpu)
 {
 	/*
@@ -131,6 +170,8 @@ static void omap4_secondary_init(unsigned int cpu)
 		set_cntfreq();
 		/* Configure ACR to disable streaming WA for 801819 */
 		omap5_erratum_workaround_801819();
+		/* Enable ACR to allow for ICUALLU workaround */
+		omap5_secondary_harden_predictor();
 	}
 
 	/*

From 923847413f7316b5ced3491769b3fefa6c56a79a Mon Sep 17 00:00:00 2001
From: Adam Ford <aford173@gmail.com>
Date: Wed, 11 Jul 2018 12:54:54 -0500
Subject: [PATCH 360/382] ARM: dts: am3517.dtsi:  Disable reference to OMAP3
 OTG controller

The AM3517 has a different OTG controller location than the OMAP3,
which is included from omap3.dtsi.  This results in a hwmod error.
Since the AM3517 has a different OTG controller address, this patch
disabes one that is isn't available.

Signed-off-by: Adam Ford <aford173@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am3517.dtsi | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm/boot/dts/am3517.dtsi b/arch/arm/boot/dts/am3517.dtsi
index 4b6062b631b1..23ea381d363f 100644
--- a/arch/arm/boot/dts/am3517.dtsi
+++ b/arch/arm/boot/dts/am3517.dtsi
@@ -91,6 +91,11 @@ hecc: can@5c050000 {
 	};
 };
 
+/* Table Table 5-79 of the TRM shows 480ab000 is reserved */
+&usb_otg_hs {
+	status = "disabled";
+};
+
 &iva {
 	status = "disabled";
 };

From 9feeb638cde083c737e295c0547f1b4f28e99583 Mon Sep 17 00:00:00 2001
From: Paul Menzel <pmenzel@molgen.mpg.de>
Date: Tue, 5 Jun 2018 19:00:22 +0200
Subject: [PATCH 361/382] tools build: fix # escaping in .cmd files for future
 Make

In 2016 GNU Make made a backwards incompatible change to the way '#'
characters were handled in Makefiles when used inside functions or
macros:

http://git.savannah.gnu.org/cgit/make.git/commit/?id=c6966b323811c37acedff05b57

Due to this change, when attempting to run `make prepare' I get a
spurious make syntax error:

    /home/earnest/linux/tools/objtool/.fixdep.o.cmd:1: *** missing separator.  Stop.

When inspecting `.fixdep.o.cmd' it includes two lines which use
unescaped comment characters at the top:

    \# cannot find fixdep (/home/earnest/linux/tools/objtool//fixdep)
    \# using basic dep data

This is because `tools/build/Build.include' prints these '\#'
characters:

    printf '\# cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \
    printf '\# using basic dep data\n\n' >> $(dot-target).cmd;           \

This completes commit 9564a8cf422d ("Kbuild: fix # escaping in .cmd files
for future Make").

Link: https://bugzilla.kernel.org/show_bug.cgi?id=197847
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: stable@vger.kernel.org
Signed-off-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 tools/build/Build.include | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/build/Build.include b/tools/build/Build.include
index a4bbb984941d..d9048f145f97 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -63,8 +63,8 @@ dep-cmd = $(if $(wildcard $(fixdep)),
            $(fixdep) $(depfile) $@ '$(make-cmd)' > $(dot-target).tmp;           \
            rm -f $(depfile);                                                    \
            mv -f $(dot-target).tmp $(dot-target).cmd,                           \
-           printf '\# cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \
-           printf '\# using basic dep data\n\n' >> $(dot-target).cmd;           \
+           printf '$(pound) cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \
+           printf '$(pound) using basic dep data\n\n' >> $(dot-target).cmd;           \
            cat $(depfile) >> $(dot-target).cmd;                                 \
            printf '\n%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd)
 

From 6fdbd824fd7a3876aac43d32fdf1f30b9ef72ce4 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Mon, 9 Jul 2018 17:45:56 -0700
Subject: [PATCH 362/382] tools: build: Fixup host c flags

Commit 0c3b7e42616f ("tools build: Add support for host programs format")
introduced host_c_flags which referenced CHOSTFLAGS. The actual name of the
variable is HOSTCFLAGS. Fix this up.

Fixes: 0c3b7e42616f ("tools build: Add support for host programs format")
Signed-off-by: Laura Abbott <labbott@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 tools/build/Build.include   | 2 +-
 tools/perf/pmu-events/Build | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/build/Build.include b/tools/build/Build.include
index d9048f145f97..950c1504ca37 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -98,4 +98,4 @@ cxx_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXX
 ###
 ## HOSTCC C flags
 
-host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CHOSTFLAGS) -D"BUILD_STR(s)=\#s" $(CHOSTFLAGS_$(basetarget).o) $(CHOSTFLAGS_$(obj))
+host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj))
diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build
index 17783913d330..215ba30b8534 100644
--- a/tools/perf/pmu-events/Build
+++ b/tools/perf/pmu-events/Build
@@ -1,7 +1,7 @@
 hostprogs := jevents
 
 jevents-y	+= json.o jsmn.o jevents.o
-CHOSTFLAGS_jevents.o	= -I$(srctree)/tools/include
+HOSTCFLAGS_jevents.o	= -I$(srctree)/tools/include
 pmu-events-y	+= pmu-events.o
 JDIR		=  pmu-events/arch/$(SRCARCH)
 JSON		=  $(shell [ -d $(JDIR) ] &&				\

From 8b247a92ebd0cda7dec49a6f771d9c4950f3d3ad Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Mon, 9 Jul 2018 17:45:57 -0700
Subject: [PATCH 363/382] tools: build: Use HOSTLDFLAGS with fixdep

The final link of fixdep uses LDFLAGS but not the existing HOSTLDFLAGS.
Fix this.

Signed-off-by: Laura Abbott <labbott@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 tools/build/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/build/Makefile b/tools/build/Makefile
index 5eb4b5ad79cb..5edf65e684ab 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -43,7 +43,7 @@ $(OUTPUT)fixdep-in.o: FORCE
 	$(Q)$(MAKE) $(build)=fixdep
 
 $(OUTPUT)fixdep: $(OUTPUT)fixdep-in.o
-	$(QUIET_LINK)$(HOSTCC) $(LDFLAGS) -o $@ $<
+	$(QUIET_LINK)$(HOSTCC) $(HOSTLDFLAGS) -o $@ $<
 
 FORCE:
 

From e23ba825db245724fec08d7285bc0272a57d38d4 Mon Sep 17 00:00:00 2001
From: Constantine Shulyupin <const@makelinux.com>
Date: Wed, 11 Jul 2018 21:36:42 +0300
Subject: [PATCH 364/382] scripts/tags.sh: add __ro_after_init

Signed-off-by: Constantine Shulyupin <const@MakeLinux.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/tags.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/tags.sh b/scripts/tags.sh
index 66f08bb1cce9..412a70cce558 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -245,7 +245,7 @@ exuberant()
 {
 	setup_regex exuberant asm c
 	all_target_sources | xargs $1 -a                        \
-	-I __initdata,__exitdata,__initconst,			\
+	-I __initdata,__exitdata,__initconst,__ro_after_init	\
 	-I __initdata_memblock					\
 	-I __refdata,__attribute,__maybe_unused,__always_unused \
 	-I __acquires,__releases,__deprecated			\

From 6d79a7b424a5630a6fcab31fd7c38af4ea9c9a0f Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 12 Jul 2018 19:38:36 +0900
Subject: [PATCH 365/382] kbuild: suppress warnings from 'getconf LFS_*'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Suppress warnings for systems that do not recognize LFS_*.

 getconf: no such configuration parameter `LFS_CFLAGS'
 getconf: no such configuration parameter `LFS_LDFLAGS'
 getconf: no such configuration parameter `LFS_LIBS'

Fixes: d7f14c66c273 ("kbuild: Enable Large File Support for hostprogs")
Reported-by: Chen Feng <puck.chen@hisilicon.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
 Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 5b26847909ec..4a457302e3d4 100644
--- a/Makefile
+++ b/Makefile
@@ -353,9 +353,9 @@ CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \
 	  else if [ -x /bin/bash ]; then echo /bin/bash; \
 	  else echo sh; fi ; fi)
 
-HOST_LFS_CFLAGS := $(shell getconf LFS_CFLAGS)
-HOST_LFS_LDFLAGS := $(shell getconf LFS_LDFLAGS)
-HOST_LFS_LIBS := $(shell getconf LFS_LIBS)
+HOST_LFS_CFLAGS := $(shell getconf LFS_CFLAGS 2>/dev/null)
+HOST_LFS_LDFLAGS := $(shell getconf LFS_LDFLAGS 2>/dev/null)
+HOST_LFS_LIBS := $(shell getconf LFS_LIBS 2>/dev/null)
 
 HOSTCC       = gcc
 HOSTCXX      = g++

From abe41184abac487264a4904bfcff2d5500dccce6 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 10 Jul 2018 23:42:15 +0200
Subject: [PATCH 366/382] i2c: recovery: if possible send STOP with recovery
 pulses

I2C clients may misunderstand recovery pulses if they can't read SDA to
bail out early. In the worst case, as a write operation. To avoid that
and if we can write SDA, try to send STOP to avoid the
misinterpretation.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Cc: stable@kernel.org
---
 drivers/i2c/i2c-core-base.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 31d16ada6e7d..301285c54603 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -198,7 +198,16 @@ int i2c_generic_scl_recovery(struct i2c_adapter *adap)
 
 		val = !val;
 		bri->set_scl(adap, val);
-		ndelay(RECOVERY_NDELAY);
+
+		/*
+		 * If we can set SDA, we will always create STOP here to ensure
+		 * the additional pulses will do no harm. This is achieved by
+		 * letting SDA follow SCL half a cycle later.
+		 */
+		ndelay(RECOVERY_NDELAY / 2);
+		if (bri->set_sda)
+			bri->set_sda(adap, val);
+		ndelay(RECOVERY_NDELAY / 2);
 	}
 
 	/* check if recovery actually succeeded */

From f8494fa3dd10b52eab47a9666a8bc34719a129aa Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Mon, 25 Jun 2018 17:08:22 -0700
Subject: [PATCH 367/382] tracing: Reorder display of TGID to be after PID

Currently ftrace displays data in trace output like so:

                                       _-----=> irqs-off
                                      / _----=> need-resched
                                     | / _---=> hardirq/softirq
                                     || / _--=> preempt-depth
                                     ||| /     delay
            TASK-PID   CPU    TGID   ||||    TIMESTAMP  FUNCTION
               | |       |      |    ||||       |         |
            bash-1091  [000] ( 1091) d..2    28.313544: sched_switch:

However Android's trace visualization tools expect a slightly different
format due to an out-of-tree patch patch that was been carried for a
decade, notice that the TGID and CPU fields are reversed:

                                       _-----=> irqs-off
                                      / _----=> need-resched
                                     | / _---=> hardirq/softirq
                                     || / _--=> preempt-depth
                                     ||| /     delay
            TASK-PID    TGID   CPU   ||||    TIMESTAMP  FUNCTION
               | |        |      |   ||||       |         |
            bash-1091  ( 1091) [002] d..2    64.965177: sched_switch:

From kernel v4.13 onwards, during which TGID was introduced, tracing
with systrace on all Android kernels will break (most Android kernels
have been on 4.9 with Android patches, so this issues hasn't been seen
yet). From v4.13 onwards things will break.

The chrome browser's tracing tools also embed the systrace viewer which
uses the legacy TGID format and updates to that are known to be
difficult to make.

Considering this, I suggest we make this change to the upstream kernel
and backport it to all Android kernels. I believe this feature is merged
recently enough into the upstream kernel that it shouldn't be a problem.
Also logically, IMO it makes more sense to group the TGID with the
TASK-PID and the CPU after these.

Link: http://lkml.kernel.org/r/20180626000822.113931-1-joel@joelfernandes.org

Cc: jreck@google.com
Cc: tkjos@google.com
Cc: stable@vger.kernel.org
Fixes: 441dae8f2f29 ("tracing: Add support for display of tgid in trace output")
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c        | 8 ++++----
 kernel/trace/trace_output.c | 5 +++--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f054bd6a1c66..87cf25171fb8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3365,8 +3365,8 @@ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
 
 	print_event_info(buf, m);
 
-	seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
-	seq_printf(m, "#              | |       |    %s     |         |\n",	 tgid ? "  |      " : "");
+	seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
+	seq_printf(m, "#              | |     %s    |       |         |\n",	 tgid ? "  |      " : "");
 }
 
 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
@@ -3386,9 +3386,9 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file
 		   tgid ? tgid_space : space);
 	seq_printf(m, "#                          %s||| /     delay\n",
 		   tgid ? tgid_space : space);
-	seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n",
+	seq_printf(m, "#           TASK-PID %sCPU#  ||||    TIMESTAMP  FUNCTION\n",
 		   tgid ? "   TGID   " : space);
-	seq_printf(m, "#              | |       | %s||||       |         |\n",
+	seq_printf(m, "#              | |   %s  |   ||||       |         |\n",
 		   tgid ? "     |    " : space);
 }
 
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 90db994ac900..1c8e30fda46a 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -594,8 +594,7 @@ int trace_print_context(struct trace_iterator *iter)
 
 	trace_find_cmdline(entry->pid, comm);
 
-	trace_seq_printf(s, "%16s-%-5d [%03d] ",
-			       comm, entry->pid, iter->cpu);
+	trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
 
 	if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
 		unsigned int tgid = trace_find_tgid(entry->pid);
@@ -606,6 +605,8 @@ int trace_print_context(struct trace_iterator *iter)
 			trace_seq_printf(s, "(%5d) ", tgid);
 	}
 
+	trace_seq_printf(s, "[%03d] ", iter->cpu);
+
 	if (tr->trace_flags & TRACE_ITER_IRQ_INFO)
 		trace_print_lat_fmt(s, entry);
 

From 0ce0bba4e5e0eb9b753bb821785de5d23c494392 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 12 Jul 2018 17:40:34 +0200
Subject: [PATCH 368/382] xen: setup pv irq ops vector earlier

Setting pv_irq_ops for Xen PV domains should be done as early as
possible in order to support e.g. very early printk() usage.

The same applies to xen_vcpu_info_reset(0), as it is needed for the
pv irq ops.

Move the call of xen_setup_machphys_mapping() after initializing the
pv functions as it contains a WARN_ON(), too.

Remove the no longer necessary conditional in xen_init_irq_ops()
from PVH V1 times to make clear this is a PV only function.

Cc: <stable@vger.kernel.org> # 4.14
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/enlighten_pv.c | 24 +++++++++++-------------
 arch/x86/xen/irq.c          |  4 +---
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 4816b6f82a9a..439a94bf89ad 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1207,12 +1207,20 @@ asmlinkage __visible void __init xen_start_kernel(void)
 
 	xen_setup_features();
 
-	xen_setup_machphys_mapping();
-
 	/* Install Xen paravirt ops */
 	pv_info = xen_info;
 	pv_init_ops.patch = paravirt_patch_default;
 	pv_cpu_ops = xen_cpu_ops;
+	xen_init_irq_ops();
+
+	/*
+	 * Setup xen_vcpu early because it is needed for
+	 * local_irq_disable(), irqs_disabled(), e.g. in printk().
+	 *
+	 * Don't do the full vcpu_info placement stuff until we have
+	 * the cpu_possible_mask and a non-dummy shared_info.
+	 */
+	xen_vcpu_info_reset(0);
 
 	x86_platform.get_nmi_reason = xen_get_nmi_reason;
 
@@ -1225,6 +1233,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	 * Set up some pagetable state before starting to set any ptes.
 	 */
 
+	xen_setup_machphys_mapping();
 	xen_init_mmu_ops();
 
 	/* Prevent unwanted bits from being set in PTEs. */
@@ -1250,20 +1259,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	get_cpu_cap(&boot_cpu_data);
 	x86_configure_nx();
 
-	xen_init_irq_ops();
-
 	/* Let's presume PV guests always boot on vCPU with id 0. */
 	per_cpu(xen_vcpu_id, 0) = 0;
 
-	/*
-	 * Setup xen_vcpu early because idt_setup_early_handler needs it for
-	 * local_irq_disable(), irqs_disabled().
-	 *
-	 * Don't do the full vcpu_info placement stuff until we have
-	 * the cpu_possible_mask and a non-dummy shared_info.
-	 */
-	xen_vcpu_info_reset(0);
-
 	idt_setup_early_handler();
 
 	xen_init_capabilities();
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 74179852e46c..7515a19fd324 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -128,8 +128,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
 
 void __init xen_init_irq_ops(void)
 {
-	/* For PVH we use default pv_irq_ops settings. */
-	if (!xen_feature(XENFEAT_hvm_callback_vector))
-		pv_irq_ops = xen_irq_ops;
+	pv_irq_ops = xen_irq_ops;
 	x86_init.irqs.intr_init = xen_init_IRQ;
 }

From fd6792bb022e43faa0c4a45b6f25285e21206f9d Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Thu, 12 Jul 2018 12:22:44 +0200
Subject: [PATCH 369/382] rtc: fix alarm read and set offset

The offset needs to be added after reading the alarm value.

It also needs to be subtracted after the now < alarm test.

Tested-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/interface.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 6d4012dd6922..bac1eeb3d312 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -265,8 +265,10 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 			return err;
 
 		/* full-function RTCs won't have such missing fields */
-		if (rtc_valid_tm(&alarm->time) == 0)
+		if (rtc_valid_tm(&alarm->time) == 0) {
+			rtc_add_offset(rtc, &alarm->time);
 			return 0;
+		}
 
 		/* get the "after" timestamp, to detect wrapped fields */
 		err = rtc_read_time(rtc, &now);
@@ -409,7 +411,6 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 	if (err)
 		return err;
 
-	rtc_subtract_offset(rtc, &alarm->time);
 	scheduled = rtc_tm_to_time64(&alarm->time);
 
 	/* Make sure we're not setting alarms in the past */
@@ -426,6 +427,8 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 	 * over right here, before we set the alarm.
 	 */
 
+	rtc_subtract_offset(rtc, &alarm->time);
+
 	if (!rtc->ops)
 		err = -ENODEV;
 	else if (!rtc->ops->set_alarm)
@@ -467,7 +470,6 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 
 	mutex_unlock(&rtc->ops_lock);
 
-	rtc_add_offset(rtc, &alarm->time);
 	return err;
 }
 EXPORT_SYMBOL_GPL(rtc_set_alarm);

From e181ae0c5db9544de9c53239eb22bc012ce75033 Mon Sep 17 00:00:00 2001
From: Pavel Tatashin <pasha.tatashin@oracle.com>
Date: Sat, 14 Jul 2018 09:15:07 -0400
Subject: [PATCH 370/382] mm: zero unavailable pages before memmap init

We must zero struct pages for memory that is not backed by physical
memory, or kernel does not have access to.

Recently, there was a change which zeroed all memmap for all holes in
e820.  Unfortunately, it introduced a bug that is discussed here:

  https://www.spinics.net/lists/linux-mm/msg156764.html

Linus, also saw this bug on his machine, and confirmed that reverting
commit 124049decbb1 ("x86/e820: put !E820_TYPE_RAM regions into
memblock.reserved") fixes the issue.

The problem is that we incorrectly zero some struct pages after they
were setup.

The fix is to zero unavailable struct pages prior to initializing of
struct pages.

A more detailed fix should come later that would avoid double zeroing
cases: one in __init_single_page(), the other one in
zero_resv_unavail().

Fixes: 124049decbb1 ("x86/e820: put !E820_TYPE_RAM regions into memblock.reserved")
Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1521100f1e63..5d800d61ddb7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6847,6 +6847,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 	/* Initialise every node */
 	mminit_verify_pageflags_layout();
 	setup_nr_node_ids();
+	zero_resv_unavail();
 	for_each_online_node(nid) {
 		pg_data_t *pgdat = NODE_DATA(nid);
 		free_area_init_node(nid, NULL,
@@ -6857,7 +6858,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 			node_set_state(nid, N_MEMORY);
 		check_for_memory(pgdat, nid);
 	}
-	zero_resv_unavail();
 }
 
 static int __init cmdline_parse_core(char *p, unsigned long *core,
@@ -7033,9 +7033,9 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
 
 void __init free_area_init(unsigned long *zones_size)
 {
+	zero_resv_unavail();
 	free_area_init_node(0, zones_size,
 			__pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
-	zero_resv_unavail();
 }
 
 static int page_alloc_cpu_dead(unsigned int cpu)

From bce73e4842390f7b7309c8e253e139db71288ac3 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 13 Jul 2018 16:58:52 -0700
Subject: [PATCH 371/382] mm: do not drop unused pages when userfaultd is
 running

KVM guests on s390 can notify the host of unused pages.  This can result
in pte_unused callbacks to be true for KVM guest memory.

If a page is unused (checked with pte_unused) we might drop this page
instead of paging it.  This can have side-effects on userfaultd, when
the page in question was already migrated:

The next access of that page will trigger a fault and a user fault
instead of faulting in a new and empty zero page.  As QEMU does not
expect a userfault on an already migrated page this migration will fail.

The most straightforward solution is to ignore the pte_unused hint if a
userfault context is active for this VMA.

Link: http://lkml.kernel.org/r/20180703171854.63981-1-borntraeger@de.ibm.com
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/rmap.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/mm/rmap.c b/mm/rmap.c
index 6db729dc4c50..eb477809a5c0 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -64,6 +64,7 @@
 #include <linux/backing-dev.h>
 #include <linux/page_idle.h>
 #include <linux/memremap.h>
+#include <linux/userfaultfd_k.h>
 
 #include <asm/tlbflush.h>
 
@@ -1481,11 +1482,16 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 				set_pte_at(mm, address, pvmw.pte, pteval);
 			}
 
-		} else if (pte_unused(pteval)) {
+		} else if (pte_unused(pteval) && !userfaultfd_armed(vma)) {
 			/*
 			 * The guest indicated that the page content is of no
 			 * interest anymore. Simply discard the pte, vmscan
 			 * will take care of the rest.
+			 * A future reference will then fault in a new zero
+			 * page. When userfaultfd is active, we must not drop
+			 * this page though, as its main user (postcopy
+			 * migration) will not expect userfaults on already
+			 * copied pages.
 			 */
 			dec_mm_counter(mm, mm_counter(page));
 			/* We have to invalidate as we cleared the pte */

From e70cc2bd579e8a9d6d153762f0fe294d0e652ff0 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Fri, 13 Jul 2018 16:58:56 -0700
Subject: [PATCH 372/382] fs/proc/task_mmu.c: fix Locked field in
 /proc/pid/smaps*

Thomas reports:
 "While looking around in /proc on my v4.14.52 system I noticed that all
  processes got a lot of "Locked" memory in /proc/*/smaps. A lot more
  memory than a regular user can usually lock with mlock().

  Commit 493b0e9d945f (in v4.14-rc1) seems to have changed the behavior
  of "Locked".

  Before that commit the code was like this. Notice the VM_LOCKED check.

           (vma->vm_flags & VM_LOCKED) ?
                (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);

  After that commit Locked is now the same as Pss:

	  (unsigned long)(mss->pss >> (10 + PSS_SHIFT)));

  This looks like a mistake."

Indeed, the commit has added mss->pss_locked with the correct value that
depends on VM_LOCKED, but forgot to actually use it.  Fix it.

Link: http://lkml.kernel.org/r/ebf6c7fb-fec3-6a26-544f-710ed193c154@suse.cz
Fixes: 493b0e9d945f ("mm: add /proc/pid/smaps_rollup")
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reported-by: Thomas Lindroth <thomas.lindroth@gmail.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Daniel Colascione <dancol@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index e9679016271f..dfd73a4616ce 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -831,7 +831,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
 		SEQ_PUT_DEC(" kB\nSwap:           ", mss->swap);
 		SEQ_PUT_DEC(" kB\nSwapPss:        ",
 						mss->swap_pss >> PSS_SHIFT);
-		SEQ_PUT_DEC(" kB\nLocked:         ", mss->pss >> PSS_SHIFT);
+		SEQ_PUT_DEC(" kB\nLocked:         ",
+						mss->pss_locked >> PSS_SHIFT);
 		seq_puts(m, " kB\n");
 	}
 	if (!rollup_mode) {

From 02f51d45937f7bc7f4dee21e9f85b2d5eac37104 Mon Sep 17 00:00:00 2001
From: Tomas Bortoli <tomasbortoli@gmail.com>
Date: Fri, 13 Jul 2018 16:58:59 -0700
Subject: [PATCH 373/382] autofs: fix slab out of bounds read in
 getname_kernel()

The autofs subsystem does not check that the "path" parameter is present
for all cases where it is required when it is passed in via the "param"
struct.

In particular it isn't checked for the AUTOFS_DEV_IOCTL_OPENMOUNT_CMD
ioctl command.

To solve it, modify validate_dev_ioctl(function to check that a path has
been provided for ioctl commands that require it.

Link: http://lkml.kernel.org/r/153060031527.26631.18306637892746301555.stgit@pluto.themaw.net
Signed-off-by: Tomas Bortoli <tomasbortoli@gmail.com>
Signed-off-by: Ian Kent <raven@themaw.net>
Reported-by: syzbot+60c837b428dc84e83a93@syzkaller.appspotmail.com
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs/dev-ioctl.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c
index ea4ca1445ab7..86eafda4a652 100644
--- a/fs/autofs/dev-ioctl.c
+++ b/fs/autofs/dev-ioctl.c
@@ -135,6 +135,15 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
 				cmd);
 			goto out;
 		}
+	} else {
+		unsigned int inr = _IOC_NR(cmd);
+
+		if (inr == AUTOFS_DEV_IOCTL_OPENMOUNT_CMD ||
+		    inr == AUTOFS_DEV_IOCTL_REQUESTER_CMD ||
+		    inr == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) {
+			err = -EINVAL;
+			goto out;
+		}
 	}
 
 	err = 0;
@@ -271,7 +280,8 @@ static int autofs_dev_ioctl_openmount(struct file *fp,
 	dev_t devid;
 	int err, fd;
 
-	/* param->path has already been checked */
+	/* param->path has been checked in validate_dev_ioctl() */
+
 	if (!param->openmount.devid)
 		return -EINVAL;
 
@@ -433,10 +443,7 @@ static int autofs_dev_ioctl_requester(struct file *fp,
 	dev_t devid;
 	int err = -ENOENT;
 
-	if (param->size <= AUTOFS_DEV_IOCTL_SIZE) {
-		err = -EINVAL;
-		goto out;
-	}
+	/* param->path has been checked in validate_dev_ioctl() */
 
 	devid = sbi->sb->s_dev;
 
@@ -521,10 +528,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
 	unsigned int devid, magic;
 	int err = -ENOENT;
 
-	if (param->size <= AUTOFS_DEV_IOCTL_SIZE) {
-		err = -EINVAL;
-		goto out;
-	}
+	/* param->path has been checked in validate_dev_ioctl() */
 
 	name = param->path;
 	type = param->ismountpoint.in.type;

From a90744bac57c3c07d0d4422af62f3e44549ade30 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 13 Jul 2018 16:59:03 -0700
Subject: [PATCH 374/382] mm: allow arch to supply p??_free_tlb functions

The mmu_gather APIs keep track of the invalidated address range
including the span covered by invalidated page table pages.  Ranges
covered by page tables but not ptes (and therefore no TLBs) still need
to be invalidated because some architectures (x86) can cache
intermediate page table entries, and invalidate those with normal TLB
invalidation instructions to be almost-backward-compatible.

Architectures which don't cache intermediate page table entries, or
which invalidate these caches separately from TLB invalidation, do not
require TLB invalidation range expanded over page tables.

Allow architectures to supply their own p??_free_tlb functions, which
can avoid the __tlb_adjust_range.

Link: http://lkml.kernel.org/r/20180703013131.2807-1-npiggin@gmail.com
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: "Aneesh Kumar K. V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/tlb.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index faddde44de8c..3063125197ad 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -265,33 +265,41 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
  * For now w.r.t page table cache, mark the range_size as PAGE_SIZE
  */
 
+#ifndef pte_free_tlb
 #define pte_free_tlb(tlb, ptep, address)			\
 	do {							\
 		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
 		__pte_free_tlb(tlb, ptep, address);		\
 	} while (0)
+#endif
 
+#ifndef pmd_free_tlb
 #define pmd_free_tlb(tlb, pmdp, address)			\
 	do {							\
 		__tlb_adjust_range(tlb, address, PAGE_SIZE);		\
 		__pmd_free_tlb(tlb, pmdp, address);		\
 	} while (0)
+#endif
 
 #ifndef __ARCH_HAS_4LEVEL_HACK
+#ifndef pud_free_tlb
 #define pud_free_tlb(tlb, pudp, address)			\
 	do {							\
 		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
 		__pud_free_tlb(tlb, pudp, address);		\
 	} while (0)
 #endif
+#endif
 
 #ifndef __ARCH_HAS_5LEVEL_HACK
+#ifndef p4d_free_tlb
 #define p4d_free_tlb(tlb, pudp, address)			\
 	do {							\
 		__tlb_adjust_range(tlb, address, PAGE_SIZE);		\
 		__p4d_free_tlb(tlb, pudp, address);		\
 	} while (0)
 #endif
+#endif
 
 #define tlb_migrate_finish(mm) do {} while (0)
 

From c290fba8c4ce6530cd941ea14db5a4ac2f77183f Mon Sep 17 00:00:00 2001
From: piaojun <piaojun@huawei.com>
Date: Fri, 13 Jul 2018 16:59:06 -0700
Subject: [PATCH 375/382] net/9p/client.c: put refcount of trans_mod in error
 case in parse_opts()

In my testing, the second mount will fail after umounting successfully.
The reason is that we put refcount of trans_mod in the correct case
rather than the error case in parse_opts() at last.  That will cause the
refcount decrease to -1, and when we try to get trans_mod again in
try_module_get(), we could only increase refcount to 0 which will cause
failure as follows:

parse_opts
  v9fs_get_trans_by_name
    try_module_get : return NULL to caller which cause error

So we should put refcount of trans_mod in error case.

Link: http://lkml.kernel.org/r/5B3F39A0.2030509@huawei.com
Fixes: 9421c3e64137ec ("net/9p/client.c: fix potential refcnt problem of trans module")
Signed-off-by: Jun Piao <piaojun@huawei.com>
Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Dominique Martinet <dominique.martinet@cea.fr>
Tested-by: Dominique Martinet <dominique.martinet@cea.fr>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Ron Minnich <rminnich@sandia.gov>
Cc: Latchesar Ionkov <lucho@ionkov.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/9p/client.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/9p/client.c b/net/9p/client.c
index 18c5271910dc..5c1343195292 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -225,7 +225,8 @@ static int parse_opts(char *opts, struct p9_client *clnt)
 	}
 
 free_and_return:
-	v9fs_put_trans(clnt->trans_mod);
+	if (ret)
+		v9fs_put_trans(clnt->trans_mod);
 	kfree(tmp_options);
 	return ret;
 }

From fa8cbda88db12e632a8987c94b66f5caf25bcec4 Mon Sep 17 00:00:00 2001
From: Philipp Rudo <prudo@linux.ibm.com>
Date: Fri, 13 Jul 2018 16:59:09 -0700
Subject: [PATCH 376/382] x86/purgatory: add missing FORCE to Makefile target

- Build the kernel without the fix
- Add some flag to the purgatories KBUILD_CFLAGS,I used
  -fno-asynchronous-unwind-tables
- Re-build the kernel

When you look at makes output you see that sha256.o is not re-build in the
last step.  Also readelf -S still shows the .eh_frame section for
sha256.o.

With the fix sha256.o is rebuilt in the last step.

Without FORCE make does not detect changes only made to the command line
options.  So object files might not be re-built even when they should be.
Fix this by adding FORCE where it is missing.

Link: http://lkml.kernel.org/r/20180704110044.29279-2-prudo@linux.ibm.com
Fixes: df6f2801f511 ("kernel/kexec_file.c: move purgatories sha256 to common code")
Signed-off-by: Philipp Rudo <prudo@linux.ibm.com>
Acked-by: Dave Young <dyoung@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: <stable@vger.kernel.org>	[4.17+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/purgatory/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 2e9ee023e6bc..81a8e33115ad 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -6,7 +6,7 @@ purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string
 targets += $(purgatory-y)
 PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
 
-$(obj)/sha256.o: $(srctree)/lib/sha256.c
+$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
 LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib

From 24962af7e1041b7e50c1bc71d8d10dc678c556b5 Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Fri, 13 Jul 2018 16:59:13 -0700
Subject: [PATCH 377/382] fs, elf: make sure to page align bss in
 load_elf_library

The current code does not make sure to page align bss before calling
vm_brk(), and this can lead to a VM_BUG_ON() in __mm_populate() due to
the requested lenght not being correctly aligned.

Let us make sure to align it properly.

Kees: only applicable to CONFIG_USELIB kernels: 32-bit and configured
for libc5.

Link: http://lkml.kernel.org/r/20180705145539.9627-1-osalvador@techadventures.net
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Reported-by: syzbot+5dcb560fe12aa5091c06@syzkaller.appspotmail.com
Tested-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0ac456b52bdd..816cc921cf36 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1259,9 +1259,8 @@ static int load_elf_library(struct file *file)
 		goto out_free_ph;
 	}
 
-	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
-			    ELF_MIN_ALIGN - 1);
-	bss = eppnt->p_memsz + eppnt->p_vaddr;
+	len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
+	bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
 	if (bss > len) {
 		error = vm_brk(len, bss - len);
 		if (error)

From e3d301cae0092062cbcd6b4e7ceebbab9d87e263 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 13 Jul 2018 16:59:16 -0700
Subject: [PATCH 378/382] mm/memblock.c: do not complain about top-down
 allocations for !MEMORY_HOTREMOVE

Mike Rapoport is converting architectures from bootmem to nobootmem
allocator.  While doing so for m68k Geert has noticed that he gets a
scary looking warning:

  WARNING: CPU: 0 PID: 0 at mm/memblock.c:230
  memblock_find_in_range_node+0x11c/0x1be
  memblock: bottom-up allocation failed, memory hotunplug may be affected
  Modules linked in:
  CPU: 0 PID: 0 Comm: swapper Not tainted
  4.18.0-rc3-atari-01343-gf2fb5f2e09a97a3c-dirty #7
  Call Trace: __warn+0xa8/0xc2
    kernel_pg_dir+0x0/0x1000
    netdev_lower_get_next+0x2/0x22
    warn_slowpath_fmt+0x2e/0x36
    memblock_find_in_range_node+0x11c/0x1be
    memblock_find_in_range_node+0x11c/0x1be
    memblock_find_in_range_node+0x0/0x1be
    vprintk_func+0x66/0x6e
    memblock_virt_alloc_internal+0xd0/0x156
    netdev_lower_get_next+0x2/0x22
    netdev_lower_get_next+0x2/0x22
    kernel_pg_dir+0x0/0x1000
    memblock_virt_alloc_try_nid_nopanic+0x58/0x7a
    netdev_lower_get_next+0x2/0x22
    kernel_pg_dir+0x0/0x1000
    kernel_pg_dir+0x0/0x1000
    EXPTBL+0x234/0x400
    EXPTBL+0x234/0x400
    alloc_node_mem_map+0x4a/0x66
    netdev_lower_get_next+0x2/0x22
    free_area_init_node+0xe2/0x29e
    EXPTBL+0x234/0x400
    paging_init+0x430/0x462
    kernel_pg_dir+0x0/0x1000
    printk+0x0/0x1a
    EXPTBL+0x234/0x400
    setup_arch+0x1b8/0x22c
    start_kernel+0x4a/0x40a
    _sinittext+0x344/0x9e8

The warning is basically saying that a top-down allocation can break
memory hotremove because memblock allocation is not movable.  But m68k
doesn't even support MEMORY_HOTREMOVE so there is no point to warn about
it.

Make the warning conditional only to configurations that care.

Link: http://lkml.kernel.org/r/20180706061750.GH32658@dhcp22.suse.cz
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Tested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: Sam Creasey <sammy@sammy.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memblock.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/memblock.c b/mm/memblock.c
index 03d48d8835ba..11e46f83e1ad 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -227,7 +227,8 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
 		 * so we use WARN_ONCE() here to see the stack trace if
 		 * fail happens.
 		 */
-		WARN_ONCE(1, "memblock: bottom-up allocation failed, memory hotunplug may be affected\n");
+		WARN_ONCE(IS_ENABLED(CONFIG_MEMORY_HOTREMOVE),
+			  "memblock: bottom-up allocation failed, memory hotremove may be affected\n");
 	}
 
 	return __memblock_find_range_top_down(start, end, size, align, nid,

From bb177a732c4369bb58a1fe1df8f552b6f0f7db5f Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 13 Jul 2018 16:59:20 -0700
Subject: [PATCH 379/382] mm: do not bug_on on incorrect length in
 __mm_populate()

syzbot has noticed that a specially crafted library can easily hit
VM_BUG_ON in __mm_populate

  kernel BUG at mm/gup.c:1242!
  invalid opcode: 0000 [#1] SMP
  CPU: 2 PID: 9667 Comm: a.out Not tainted 4.18.0-rc3 #644
  Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017
  RIP: 0010:__mm_populate+0x1e2/0x1f0
  Code: 55 d0 65 48 33 14 25 28 00 00 00 89 d8 75 21 48 83 c4 20 5b 41 5c 41 5d 41 5e 41 5f 5d c3 e8 75 18 f1 ff 0f 0b e8 6e 18 f1 ff <0f> 0b 31 db eb c9 e8 93 06 e0 ff 0f 1f 00 55 48 89 e5 53 48 89 fb
  Call Trace:
     vm_brk_flags+0xc3/0x100
     vm_brk+0x1f/0x30
     load_elf_library+0x281/0x2e0
     __ia32_sys_uselib+0x170/0x1e0
     do_fast_syscall_32+0xca/0x420
     entry_SYSENTER_compat+0x70/0x7f

The reason is that the length of the new brk is not page aligned when we
try to populate the it.  There is no reason to bug on that though.
do_brk_flags already aligns the length properly so the mapping is
expanded as it should.  All we need is to tell mm_populate about it.
Besides that there is absolutely no reason to to bug_on in the first
place.  The worst thing that could happen is that the last page wouldn't
get populated and that is far from putting system into an inconsistent
state.

Fix the issue by moving the length sanitization code from do_brk_flags
up to vm_brk_flags.  The only other caller of do_brk_flags is brk
syscall entry and it makes sure to provide the proper length so t here
is no need for sanitation and so we can use do_brk_flags without it.

Also remove the bogus BUG_ONs.

[osalvador@techadventures.net: fix up vm_brk_flags s@request@len@]
Link: http://lkml.kernel.org/r/20180706090217.GI32658@dhcp22.suse.cz
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: syzbot <syzbot+5dcb560fe12aa5091c06@syzkaller.appspotmail.com>
Tested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Zi Yan <zi.yan@cs.rutgers.edu>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/gup.c  |  2 --
 mm/mmap.c | 29 ++++++++++++-----------------
 2 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index b70d7ba7cc13..fc5f98069f4e 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1238,8 +1238,6 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
 	int locked = 0;
 	long ret = 0;
 
-	VM_BUG_ON(start & ~PAGE_MASK);
-	VM_BUG_ON(len != PAGE_ALIGN(len));
 	end = start + len;
 
 	for (nstart = start; nstart < end; nstart = nend) {
diff --git a/mm/mmap.c b/mm/mmap.c
index d1eb87ef4b1a..5801b5f0a634 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -186,8 +186,8 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
 	return next;
 }
 
-static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf);
-
+static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags,
+		struct list_head *uf);
 SYSCALL_DEFINE1(brk, unsigned long, brk)
 {
 	unsigned long retval;
@@ -245,7 +245,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 		goto out;
 
 	/* Ok, looks good - let it rip. */
-	if (do_brk(oldbrk, newbrk-oldbrk, &uf) < 0)
+	if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0)
 		goto out;
 
 set_brk:
@@ -2929,21 +2929,14 @@ static inline void verify_mm_writelocked(struct mm_struct *mm)
  *  anonymous maps.  eventually we may be able to do some
  *  brk-specific accounting here.
  */
-static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, struct list_head *uf)
+static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma, *prev;
-	unsigned long len;
 	struct rb_node **rb_link, *rb_parent;
 	pgoff_t pgoff = addr >> PAGE_SHIFT;
 	int error;
 
-	len = PAGE_ALIGN(request);
-	if (len < request)
-		return -ENOMEM;
-	if (!len)
-		return 0;
-
 	/* Until we need other flags, refuse anything except VM_EXEC. */
 	if ((flags & (~VM_EXEC)) != 0)
 		return -EINVAL;
@@ -3015,18 +3008,20 @@ static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long
 	return 0;
 }
 
-static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf)
-{
-	return do_brk_flags(addr, len, 0, uf);
-}
-
-int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags)
+int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags)
 {
 	struct mm_struct *mm = current->mm;
+	unsigned long len;
 	int ret;
 	bool populate;
 	LIST_HEAD(uf);
 
+	len = PAGE_ALIGN(request);
+	if (len < request)
+		return -ENOMEM;
+	if (!len)
+		return 0;
+
 	if (down_write_killable(&mm->mmap_sem))
 		return -EINTR;
 

From ffe075132af8b7967089c361e506d4fa747efd14 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 13 Jul 2018 16:59:23 -0700
Subject: [PATCH 380/382] checkpatch: fix duplicate invalid vsprintf pointer
 extension '%p<foo>' messages

Multiline statements with invalid %p<foo> uses produce multiple
warnings.  Fix that.

e.g.:

$ cat t_block.c
void foo(void)
{
	MY_DEBUG(drv->foo,
		 "%pk",
		 foo->boo);
}

$ ./scripts/checkpatch.pl -f t_block.c
WARNING: Missing or malformed SPDX-License-Identifier tag in line 1
#1: FILE: t_block.c:1:
+void foo(void)

WARNING: Invalid vsprintf pointer extension '%pk'
#3: FILE: t_block.c:3:
+	MY_DEBUG(drv->foo,
+		 "%pk",
+		 foo->boo);

WARNING: Invalid vsprintf pointer extension '%pk'
#3: FILE: t_block.c:3:
+	MY_DEBUG(drv->foo,
+		 "%pk",
+		 foo->boo);

total: 0 errors, 3 warnings, 6 lines checked

NOTE: For some of the reported defects, checkpatch may be able to
      mechanically convert to the typical style using --fix or --fix-inplace.

t_block.c has style problems, please review.

NOTE: If any of the errors are false positives, please report
      them to the maintainer, see CHECKPATCH in MAINTAINERS.

Link: http://lkml.kernel.org/r/9e8341bbe4c9877d159cb512bb701043cbfbb10b.camel@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Cc: "Tobin C. Harding" <me@tobin.cc>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index a9c05506e325..447857ffaf6b 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -5813,14 +5813,14 @@ sub process {
 		    defined $stat &&
 		    $stat =~ /^\+(?![^\{]*\{\s*).*\b(\w+)\s*\(.*$String\s*,/s &&
 		    $1 !~ /^_*volatile_*$/) {
-			my $specifier;
-			my $extension;
-			my $bad_specifier = "";
 			my $stat_real;
 
 			my $lc = $stat =~ tr@\n@@;
 			$lc = $lc + $linenr;
 		        for (my $count = $linenr; $count <= $lc; $count++) {
+				my $specifier;
+				my $extension;
+				my $bad_specifier = "";
 				my $fmt = get_quoted_string($lines[$count - 1], raw_line($count, 0));
 				$fmt =~ s/%%//g;
 

From fe10e398e860955bac4d28ec031b701d358465e4 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 13 Jul 2018 16:59:27 -0700
Subject: [PATCH 381/382] reiserfs: fix buffer overflow with long warning
 messages

ReiserFS prepares log messages into a 1024-byte buffer with no bounds
checks.  Long messages, such as the "unknown mount option" warning when
userspace passes a crafted mount options string, overflow this buffer.
This causes KASAN to report a global-out-of-bounds write.

Fix it by truncating messages to the buffer size.

Link: http://lkml.kernel.org/r/20180707203621.30922-1-ebiggers3@gmail.com
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzbot+b890b3335a4d8c608963@syzkaller.appspotmail.com
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/reiserfs/prints.c | 141 +++++++++++++++++++++++++------------------
 1 file changed, 81 insertions(+), 60 deletions(-)

diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c
index 7e288d97adcb..9fed1c05f1f4 100644
--- a/fs/reiserfs/prints.c
+++ b/fs/reiserfs/prints.c
@@ -76,83 +76,99 @@ static char *le_type(struct reiserfs_key *key)
 }
 
 /* %k */
-static void sprintf_le_key(char *buf, struct reiserfs_key *key)
+static int scnprintf_le_key(char *buf, size_t size, struct reiserfs_key *key)
 {
 	if (key)
-		sprintf(buf, "[%d %d %s %s]", le32_to_cpu(key->k_dir_id),
-			le32_to_cpu(key->k_objectid), le_offset(key),
-			le_type(key));
+		return scnprintf(buf, size, "[%d %d %s %s]",
+				 le32_to_cpu(key->k_dir_id),
+				 le32_to_cpu(key->k_objectid), le_offset(key),
+				 le_type(key));
 	else
-		sprintf(buf, "[NULL]");
+		return scnprintf(buf, size, "[NULL]");
 }
 
 /* %K */
-static void sprintf_cpu_key(char *buf, struct cpu_key *key)
+static int scnprintf_cpu_key(char *buf, size_t size, struct cpu_key *key)
 {
 	if (key)
-		sprintf(buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id,
-			key->on_disk_key.k_objectid, reiserfs_cpu_offset(key),
-			cpu_type(key));
+		return scnprintf(buf, size, "[%d %d %s %s]",
+				 key->on_disk_key.k_dir_id,
+				 key->on_disk_key.k_objectid,
+				 reiserfs_cpu_offset(key), cpu_type(key));
 	else
-		sprintf(buf, "[NULL]");
+		return scnprintf(buf, size, "[NULL]");
 }
 
-static void sprintf_de_head(char *buf, struct reiserfs_de_head *deh)
+static int scnprintf_de_head(char *buf, size_t size,
+			     struct reiserfs_de_head *deh)
 {
 	if (deh)
-		sprintf(buf,
-			"[offset=%d dir_id=%d objectid=%d location=%d state=%04x]",
-			deh_offset(deh), deh_dir_id(deh), deh_objectid(deh),
-			deh_location(deh), deh_state(deh));
+		return scnprintf(buf, size,
+				 "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]",
+				 deh_offset(deh), deh_dir_id(deh),
+				 deh_objectid(deh), deh_location(deh),
+				 deh_state(deh));
 	else
-		sprintf(buf, "[NULL]");
+		return scnprintf(buf, size, "[NULL]");
 
 }
 
-static void sprintf_item_head(char *buf, struct item_head *ih)
+static int scnprintf_item_head(char *buf, size_t size, struct item_head *ih)
 {
 	if (ih) {
-		strcpy(buf,
-		       (ih_version(ih) == KEY_FORMAT_3_6) ? "*3.6* " : "*3.5*");
-		sprintf_le_key(buf + strlen(buf), &(ih->ih_key));
-		sprintf(buf + strlen(buf), ", item_len %d, item_location %d, "
-			"free_space(entry_count) %d",
-			ih_item_len(ih), ih_location(ih), ih_free_space(ih));
+		char *p = buf;
+		char * const end = buf + size;
+
+		p += scnprintf(p, end - p, "%s",
+			       (ih_version(ih) == KEY_FORMAT_3_6) ?
+			       "*3.6* " : "*3.5*");
+
+		p += scnprintf_le_key(p, end - p, &ih->ih_key);
+
+		p += scnprintf(p, end - p,
+			       ", item_len %d, item_location %d, free_space(entry_count) %d",
+			       ih_item_len(ih), ih_location(ih),
+			       ih_free_space(ih));
+		return p - buf;
 	} else
-		sprintf(buf, "[NULL]");
+		return scnprintf(buf, size, "[NULL]");
 }
 
-static void sprintf_direntry(char *buf, struct reiserfs_dir_entry *de)
+static int scnprintf_direntry(char *buf, size_t size,
+			      struct reiserfs_dir_entry *de)
 {
 	char name[20];
 
 	memcpy(name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen);
 	name[de->de_namelen > 19 ? 19 : de->de_namelen] = 0;
-	sprintf(buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid);
+	return scnprintf(buf, size, "\"%s\"==>[%d %d]",
+			 name, de->de_dir_id, de->de_objectid);
 }
 
-static void sprintf_block_head(char *buf, struct buffer_head *bh)
+static int scnprintf_block_head(char *buf, size_t size, struct buffer_head *bh)
 {
-	sprintf(buf, "level=%d, nr_items=%d, free_space=%d rdkey ",
-		B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh));
+	return scnprintf(buf, size,
+			 "level=%d, nr_items=%d, free_space=%d rdkey ",
+			 B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh));
 }
 
-static void sprintf_buffer_head(char *buf, struct buffer_head *bh)
+static int scnprintf_buffer_head(char *buf, size_t size, struct buffer_head *bh)
 {
-	sprintf(buf,
-		"dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)",
-		bh->b_bdev, bh->b_size,
-		(unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)),
-		bh->b_state, bh->b_page,
-		buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE",
-		buffer_dirty(bh) ? "DIRTY" : "CLEAN",
-		buffer_locked(bh) ? "LOCKED" : "UNLOCKED");
+	return scnprintf(buf, size,
+			 "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)",
+			 bh->b_bdev, bh->b_size,
+			 (unsigned long long)bh->b_blocknr,
+			 atomic_read(&(bh->b_count)),
+			 bh->b_state, bh->b_page,
+			 buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE",
+			 buffer_dirty(bh) ? "DIRTY" : "CLEAN",
+			 buffer_locked(bh) ? "LOCKED" : "UNLOCKED");
 }
 
-static void sprintf_disk_child(char *buf, struct disk_child *dc)
+static int scnprintf_disk_child(char *buf, size_t size, struct disk_child *dc)
 {
-	sprintf(buf, "[dc_number=%d, dc_size=%u]", dc_block_number(dc),
-		dc_size(dc));
+	return scnprintf(buf, size, "[dc_number=%d, dc_size=%u]",
+			 dc_block_number(dc), dc_size(dc));
 }
 
 static char *is_there_reiserfs_struct(char *fmt, int *what)
@@ -189,55 +205,60 @@ static void prepare_error_buf(const char *fmt, va_list args)
 	char *fmt1 = fmt_buf;
 	char *k;
 	char *p = error_buf;
+	char * const end = &error_buf[sizeof(error_buf)];
 	int what;
 
 	spin_lock(&error_lock);
 
-	strcpy(fmt1, fmt);
+	if (WARN_ON(strscpy(fmt_buf, fmt, sizeof(fmt_buf)) < 0)) {
+		strscpy(error_buf, "format string too long", end - error_buf);
+		goto out_unlock;
+	}
 
 	while ((k = is_there_reiserfs_struct(fmt1, &what)) != NULL) {
 		*k = 0;
 
-		p += vsprintf(p, fmt1, args);
+		p += vscnprintf(p, end - p, fmt1, args);
 
 		switch (what) {
 		case 'k':
-			sprintf_le_key(p, va_arg(args, struct reiserfs_key *));
+			p += scnprintf_le_key(p, end - p,
+					      va_arg(args, struct reiserfs_key *));
 			break;
 		case 'K':
-			sprintf_cpu_key(p, va_arg(args, struct cpu_key *));
+			p += scnprintf_cpu_key(p, end - p,
+					       va_arg(args, struct cpu_key *));
 			break;
 		case 'h':
-			sprintf_item_head(p, va_arg(args, struct item_head *));
+			p += scnprintf_item_head(p, end - p,
+						 va_arg(args, struct item_head *));
 			break;
 		case 't':
-			sprintf_direntry(p,
-					 va_arg(args,
-						struct reiserfs_dir_entry *));
+			p += scnprintf_direntry(p, end - p,
+						va_arg(args, struct reiserfs_dir_entry *));
 			break;
 		case 'y':
-			sprintf_disk_child(p,
-					   va_arg(args, struct disk_child *));
+			p += scnprintf_disk_child(p, end - p,
+						  va_arg(args, struct disk_child *));
 			break;
 		case 'z':
-			sprintf_block_head(p,
-					   va_arg(args, struct buffer_head *));
+			p += scnprintf_block_head(p, end - p,
+						  va_arg(args, struct buffer_head *));
 			break;
 		case 'b':
-			sprintf_buffer_head(p,
-					    va_arg(args, struct buffer_head *));
+			p += scnprintf_buffer_head(p, end - p,
+						   va_arg(args, struct buffer_head *));
 			break;
 		case 'a':
-			sprintf_de_head(p,
-					va_arg(args,
-					       struct reiserfs_de_head *));
+			p += scnprintf_de_head(p, end - p,
+					       va_arg(args, struct reiserfs_de_head *));
 			break;
 		}
 
-		p += strlen(p);
 		fmt1 = k + 2;
 	}
-	vsprintf(p, fmt1, args);
+	p += vscnprintf(p, end - p, fmt1, args);
+out_unlock:
 	spin_unlock(&error_lock);
 
 }

From 9d3cce1e8b8561fed5f383d22a4d6949db4eadbe Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 15 Jul 2018 12:49:31 -0700
Subject: [PATCH 382/382] Linux 4.18-rc5

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 2d80fbbe51a8..a89d8a0d3ee1 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 18
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Merciless Moray
 
 # *DOCUMENTATION*