From 327eaf738ff97d19491362e30497954105d60414 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 12 Jun 2018 23:34:57 -0400 Subject: [PATCH 001/382] ext4: add warn_on_error mount option This is very handy when debugging bugs handling maliciously corrupted file systems. Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 1 + fs/ext4/super.c | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index fa52b7dd4542..856b6a54d82b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1108,6 +1108,7 @@ struct ext4_inode_info { #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ +#define EXT4_MOUNT_WARN_ON_ERROR 0x2000000 /* Trigger WARN_ON on error */ #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c1c5c8775ae7..c8b7b8302e90 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -405,6 +405,9 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) static void ext4_handle_error(struct super_block *sb) { + if (test_opt(sb, WARN_ON_ERROR)) + WARN_ON_ONCE(1); + if (sb_rdonly(sb)) return; @@ -740,6 +743,9 @@ __acquires(bitlock) va_end(args); } + if (test_opt(sb, WARN_ON_ERROR)) + WARN_ON_ONCE(1); + if (test_opt(sb, ERRORS_CONT)) { ext4_commit_super(sb, 0); return; @@ -1377,7 +1383,8 @@ enum { Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax, - Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, + Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error, + Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize, Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, Opt_inode_readahead_blks, Opt_journal_ioprio, @@ -1444,6 +1451,8 @@ static const match_table_t tokens = { {Opt_dax, "dax"}, {Opt_stripe, "stripe=%u"}, {Opt_delalloc, "delalloc"}, + {Opt_warn_on_error, "warn_on_error"}, + {Opt_nowarn_on_error, "nowarn_on_error"}, {Opt_lazytime, "lazytime"}, {Opt_nolazytime, "nolazytime"}, {Opt_debug_want_extra_isize, "debug_want_extra_isize=%u"}, @@ -1608,6 +1617,8 @@ static const struct mount_opts { MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, MOPT_EXT4_ONLY | MOPT_CLEAR}, + {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET}, + {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR}, {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, From 5369a762c882c0b6e9599e4ebbb3a9ba9eee7e2d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 13 Jun 2018 00:23:11 -0400 Subject: [PATCH 002/382] ext4: add corruption check in ext4_xattr_set_entry() In theory this should have been caught earlier when the xattr list was verified, but in case it got missed, it's simple enough to add check to make sure we don't overrun the xattr buffer. This addresses CVE-2018-10879. https://bugzilla.kernel.org/show_bug.cgi?id=200001 Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Cc: stable@kernel.org --- fs/ext4/xattr.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index fc4ced59c565..230ba79715f6 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1560,7 +1560,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, handle_t *handle, struct inode *inode, bool is_block) { - struct ext4_xattr_entry *last; + struct ext4_xattr_entry *last, *next; struct ext4_xattr_entry *here = s->here; size_t min_offs = s->end - s->base, name_len = strlen(i->name); int in_inode = i->in_inode; @@ -1595,7 +1595,13 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, /* Compute min_offs and last. */ last = s->first; - for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + for (; !IS_LAST_ENTRY(last); last = next) { + next = EXT4_XATTR_NEXT(last); + if ((void *)next >= s->end) { + EXT4_ERROR_INODE(inode, "corrupted xattr entries"); + ret = -EFSCORRUPTED; + goto out; + } if (!last->e_value_inum && last->e_value_size) { size_t offs = le16_to_cpu(last->e_value_offs); if (offs < min_offs) From 513f86d73855ce556ea9522b6bfd79f87356dc3a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 13 Jun 2018 00:51:28 -0400 Subject: [PATCH 003/382] ext4: always verify the magic number in xattr blocks If there an inode points to a block which is also some other type of metadata block (such as a block allocation bitmap), the buffer_verified flag can be set when it was validated as that other metadata block type; however, it would make a really terrible external attribute block. The reason why we use the verified flag is to avoid constantly reverifying the block. However, it doesn't take much overhead to make sure the magic number of the xattr block is correct, and this will avoid potential crashes. This addresses CVE-2018-10879. https://bugzilla.kernel.org/show_bug.cgi?id=200001 Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Cc: stable@kernel.org --- fs/ext4/xattr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 230ba79715f6..0263692979ec 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -230,12 +230,12 @@ __ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh, { int error = -EFSCORRUPTED; - if (buffer_verified(bh)) - return 0; - if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || BHDR(bh)->h_blocks != cpu_to_le32(1)) goto errout; + if (buffer_verified(bh)) + return 0; + error = -EFSBADCRC; if (!ext4_xattr_block_csum_verify(inode, bh)) goto errout; From 819b23f1c501b17b9694325471789e6b5cc2d0d2 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 13 Jun 2018 23:00:48 -0400 Subject: [PATCH 004/382] ext4: always check block group bounds in ext4_init_block_bitmap() Regardless of whether the flex_bg feature is set, we should always check to make sure the bits we are setting in the block bitmap are within the block group bounds. https://bugzilla.kernel.org/show_bug.cgi?id=199865 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/balloc.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index b00481c475cb..8a2e202ade8a 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -184,7 +184,6 @@ static int ext4_init_block_bitmap(struct super_block *sb, unsigned int bit, bit_max; struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t start, tmp; - int flex_bg = 0; J_ASSERT_BH(bh, buffer_locked(bh)); @@ -207,22 +206,19 @@ static int ext4_init_block_bitmap(struct super_block *sb, start = ext4_group_first_block_no(sb, block_group); - if (ext4_has_feature_flex_bg(sb)) - flex_bg = 1; - /* Set bits for block and inode bitmaps, and inode table */ tmp = ext4_block_bitmap(sb, gdp); - if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + if (ext4_block_in_group(sb, tmp, block_group)) ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data); tmp = ext4_inode_bitmap(sb, gdp); - if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + if (ext4_block_in_group(sb, tmp, block_group)) ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data); tmp = ext4_inode_table(sb, gdp); for (; tmp < ext4_inode_table(sb, gdp) + sbi->s_itb_per_group; tmp++) { - if (!flex_bg || ext4_block_in_group(sb, tmp, block_group)) + if (ext4_block_in_group(sb, tmp, block_group)) ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data); } From 77260807d1170a8cf35dbb06e07461a655f67eee Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 13 Jun 2018 23:08:26 -0400 Subject: [PATCH 005/382] ext4: make sure bitmaps and the inode table don't overlap with bg descriptors It's really bad when the allocation bitmaps and the inode table overlap with the block group descriptors, since it causes random corruption of the bg descriptors. So we really want to head those off at the pass. https://bugzilla.kernel.org/show_bug.cgi?id=199865 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/super.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c8b7b8302e90..c61675d62195 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2348,6 +2348,7 @@ static int ext4_check_descriptors(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block); ext4_fsblk_t last_block; + ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0) + 1; ext4_fsblk_t block_bitmap; ext4_fsblk_t inode_bitmap; ext4_fsblk_t inode_table; @@ -2380,6 +2381,14 @@ static int ext4_check_descriptors(struct super_block *sb, if (!sb_rdonly(sb)) return 0; } + if (block_bitmap >= sb_block + 1 && + block_bitmap <= last_bg_block) { + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Block bitmap for group %u overlaps " + "block group descriptors", i); + if (!sb_rdonly(sb)) + return 0; + } if (block_bitmap < first_block || block_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Block bitmap for group %u not in group " @@ -2394,6 +2403,14 @@ static int ext4_check_descriptors(struct super_block *sb, if (!sb_rdonly(sb)) return 0; } + if (inode_bitmap >= sb_block + 1 && + inode_bitmap <= last_bg_block) { + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Inode bitmap for group %u overlaps " + "block group descriptors", i); + if (!sb_rdonly(sb)) + return 0; + } if (inode_bitmap < first_block || inode_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode bitmap for group %u not in group " @@ -2408,6 +2425,14 @@ static int ext4_check_descriptors(struct super_block *sb, if (!sb_rdonly(sb)) return 0; } + if (inode_table >= sb_block + 1 && + inode_table <= last_bg_block) { + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Inode table for group %u overlaps " + "block group descriptors", i); + if (!sb_rdonly(sb)) + return 0; + } if (inode_table < first_block || inode_table + sbi->s_itb_per_group - 1 > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " From 8844618d8aa7a9973e7b527d038a2a589665002c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 14 Jun 2018 00:58:00 -0400 Subject: [PATCH 006/382] ext4: only look at the bg_flags field if it is valid The bg_flags field in the block group descripts is only valid if the uninit_bg or metadata_csum feature is enabled. We were not consistently looking at this field; fix this. Also block group #0 must never have uninitialized allocation bitmaps, or need to be zeroed, since that's where the root inode, and other special inodes are set up. Check for these conditions and mark the file system as corrupted if they are detected. This addresses CVE-2018-10876. https://bugzilla.kernel.org/show_bug.cgi?id=199403 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/balloc.c | 11 ++++++++++- fs/ext4/ialloc.c | 14 ++++++++++++-- fs/ext4/mballoc.c | 6 ++++-- fs/ext4/super.c | 11 ++++++++++- 4 files changed, 36 insertions(+), 6 deletions(-) diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 8a2e202ade8a..e68cefe08261 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -438,7 +438,16 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) goto verify; } ext4_lock_group(sb, block_group); - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { + if (block_group == 0) { + ext4_unlock_group(sb, block_group); + unlock_buffer(bh); + ext4_error(sb, "Block bitmap for bg 0 marked " + "uninitialized"); + err = -EFSCORRUPTED; + goto out; + } err = ext4_init_block_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 4d6e007f3569..da6c10c1e37a 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -150,7 +150,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) } ext4_lock_group(sb, block_group); - if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) { + if (block_group == 0) { + ext4_unlock_group(sb, block_group); + unlock_buffer(bh); + ext4_error(sb, "Inode bitmap for bg 0 marked " + "uninitialized"); + err = -EFSCORRUPTED; + goto out; + } memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, bh->b_data); @@ -994,7 +1003,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, /* recheck and clear flag under lock if we still need to */ ext4_lock_group(sb, group); - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_group_clusters_set(sb, gdp, ext4_free_clusters_after_init(sb, group, gdp)); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 243c42fdc155..402c769c51ea 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2444,7 +2444,8 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, * initialize bb_free to be able to skip * empty groups without initialization */ - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { meta_group_info[i]->bb_free = ext4_free_clusters_after_init(sb, group, desc); } else { @@ -3010,7 +3011,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, #endif ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_group_clusters_set(sb, gdp, ext4_free_clusters_after_init(sb, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c61675d62195..4d34430d75f6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3139,13 +3139,22 @@ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb) ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count; struct ext4_group_desc *gdp = NULL; + if (!ext4_has_group_desc_csum(sb)) + return ngroups; + for (group = 0; group < ngroups; group++) { gdp = ext4_get_group_desc(sb, group, NULL); if (!gdp) continue; - if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))) + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) + continue; + if (group != 0) break; + ext4_error(sb, "Inode table for bg 0 marked as " + "needing zeroing"); + if (sb_rdonly(sb)) + return ngroups; } return group; From bc890a60247171294acc0bd67d211fa4b88d40ba Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 14 Jun 2018 12:55:10 -0400 Subject: [PATCH 007/382] ext4: verify the depth of extent tree in ext4_find_extent() If there is a corupted file system where the claimed depth of the extent tree is -1, this can cause a massive buffer overrun leading to sadness. This addresses CVE-2018-10877. https://bugzilla.kernel.org/show_bug.cgi?id=199417 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/ext4_extents.h | 1 + fs/ext4/extents.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 98fb0c119c68..adf6668b596f 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -91,6 +91,7 @@ struct ext4_extent_header { }; #define EXT4_EXT_MAGIC cpu_to_le16(0xf30a) +#define EXT4_MAX_EXTENT_DEPTH 5 #define EXT4_EXTENT_TAIL_OFFSET(hdr) \ (sizeof(struct ext4_extent_header) + \ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c969275ce3ee..08226f72b7ee 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -869,6 +869,12 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, eh = ext_inode_hdr(inode); depth = ext_depth(inode); + if (depth < 0 || depth > EXT4_MAX_EXTENT_DEPTH) { + EXT4_ERROR_INODE(inode, "inode has invalid extent depth: %d", + depth); + ret = -EFSCORRUPTED; + goto err; + } if (path) { ext4_ext_drop_refs(path); From bdbd6ce01a70f02e9373a584d0ae9538dcf0a121 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 15 Jun 2018 12:27:16 -0400 Subject: [PATCH 008/382] ext4: include the illegal physical block in the bad map ext4_error msg Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2ea07efbe016..c2f4ccb880c4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -402,9 +402,9 @@ static int __check_block_validity(struct inode *inode, const char *func, if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, map->m_len)) { ext4_error_inode(inode, func, line, map->m_pblk, - "lblock %lu mapped to illegal pblock " + "lblock %lu mapped to illegal pblock %llu " "(length %d)", (unsigned long) map->m_lblk, - map->m_len); + map->m_pblk, map->m_len); return -EFSCORRUPTED; } return 0; From 6e8ab72a812396996035a37e5ca4b3b99b5d214b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 15 Jun 2018 12:28:16 -0400 Subject: [PATCH 009/382] ext4: clear i_data in ext4_inode_info when removing inline data When converting from an inode from storing the data in-line to a data block, ext4_destroy_inline_data_nolock() was only clearing the on-disk copy of the i_blocks[] array. It was not clearing copy of the i_blocks[] in ext4_inode_info, in i_data[], which is the copy actually used by ext4_map_blocks(). This didn't matter much if we are using extents, since the extents header would be invalid and thus the extents could would re-initialize the extents tree. But if we are using indirect blocks, the previous contents of the i_blocks array will be treated as block numbers, with potentially catastrophic results to the file system integrity and/or user data. This gets worse if the file system is using a 1k block size and s_first_data is zero, but even without this, the file system can get quite badly corrupted. This addresses CVE-2018-10881. https://bugzilla.kernel.org/show_bug.cgi?id=200015 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/inline.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 44b4fcdc3755..d79115d8d716 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -437,6 +437,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle, memset((void *)ext4_raw_inode(&is.iloc)->i_block, 0, EXT4_MIN_INLINE_DATA_SIZE); + memset(ei->i_data, 0, EXT4_MIN_INLINE_DATA_SIZE); if (ext4_has_feature_extents(inode->i_sb)) { if (S_ISDIR(inode->i_mode) || From 8cdb5240ec5928b20490a2bb34cb87e9a5f40226 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 16 Jun 2018 15:40:48 -0400 Subject: [PATCH 010/382] ext4: never move the system.data xattr out of the inode body When expanding the extra isize space, we must never move the system.data xattr out of the inode body. For performance reasons, it doesn't make any sense, and the inline data implementation assumes that system.data xattr is never in the external xattr block. This addresses CVE-2018-10880 https://bugzilla.kernel.org/show_bug.cgi?id=200005 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/xattr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 0263692979ec..72377b77fbd7 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2657,6 +2657,11 @@ static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode, last = IFIRST(header); /* Find the entry best suited to be pushed into EA block */ for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { + /* never move system.data out of the inode */ + if ((last->e_name_len == 4) && + (last->e_name_index == EXT4_XATTR_INDEX_SYSTEM) && + !memcmp(last->e_name, "data", 4)) + continue; total_size = EXT4_XATTR_LEN(last->e_name_len); if (!last->e_value_inum) total_size += EXT4_XATTR_SIZE( From e09463f220ca9a1a1ecfda84fcda658f99a1f12a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 16 Jun 2018 20:21:45 -0400 Subject: [PATCH 011/382] jbd2: don't mark block as modified if the handle is out of credits Do not set the b_modified flag in block's journal head should not until after we're sure that jbd2_journal_dirty_metadat() will not abort with an error due to there not being enough space reserved in the jbd2 handle. Otherwise, future attempts to modify the buffer may lead a large number of spurious errors and warnings. This addresses CVE-2018-10883. https://bugzilla.kernel.org/show_bug.cgi?id=200071 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/jbd2/transaction.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 51dd68e67b0f..c0b66a7a795b 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1361,6 +1361,13 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) if (jh->b_transaction == transaction && jh->b_jlist != BJ_Metadata) { jbd_lock_bh_state(bh); + if (jh->b_transaction == transaction && + jh->b_jlist != BJ_Metadata) + pr_err("JBD2: assertion failure: h_type=%u " + "h_line_no=%u block_no=%llu jlist=%u\n", + handle->h_type, handle->h_line_no, + (unsigned long long) bh->b_blocknr, + jh->b_jlist); J_ASSERT_JH(jh, jh->b_transaction != transaction || jh->b_jlist == BJ_Metadata); jbd_unlock_bh_state(bh); @@ -1380,11 +1387,11 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) * of the transaction. This needs to be done * once a transaction -bzzz */ - jh->b_modified = 1; if (handle->h_buffer_credits <= 0) { ret = -ENOSPC; goto out_unlock_bh; } + jh->b_modified = 1; handle->h_buffer_credits--; } From 8bc1379b82b8e809eef77a9fedbb75c6c297be19 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 16 Jun 2018 23:41:59 -0400 Subject: [PATCH 012/382] ext4: avoid running out of journal credits when appending to an inline file Use a separate journal transaction if it turns out that we need to convert an inline file to use an data block. Otherwise we could end up failing due to not having journal credits. This addresses CVE-2018-10883. https://bugzilla.kernel.org/show_bug.cgi?id=200071 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/ext4.h | 3 --- fs/ext4/inline.c | 38 +------------------------------------- fs/ext4/xattr.c | 19 ++----------------- 3 files changed, 3 insertions(+), 57 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 856b6a54d82b..859d6433dcc1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3013,9 +3013,6 @@ extern int ext4_inline_data_fiemap(struct inode *inode, struct iomap; extern int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap); -extern int ext4_try_to_evict_inline_data(handle_t *handle, - struct inode *inode, - int needed); extern int ext4_inline_data_truncate(struct inode *inode, int *has_inline); extern int ext4_convert_inline_data(struct inode *inode); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index d79115d8d716..851bc552d849 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -887,11 +887,11 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping, flags |= AOP_FLAG_NOFS; if (ret == -ENOSPC) { + ext4_journal_stop(handle); ret = ext4_da_convert_inline_data_to_extent(mapping, inode, flags, fsdata); - ext4_journal_stop(handle); if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry_journal; @@ -1891,42 +1891,6 @@ int ext4_inline_data_fiemap(struct inode *inode, return (error < 0 ? error : 0); } -/* - * Called during xattr set, and if we can sparse space 'needed', - * just create the extent tree evict the data to the outer block. - * - * We use jbd2 instead of page cache to move data to the 1st block - * so that the whole transaction can be committed as a whole and - * the data isn't lost because of the delayed page cache write. - */ -int ext4_try_to_evict_inline_data(handle_t *handle, - struct inode *inode, - int needed) -{ - int error; - struct ext4_xattr_entry *entry; - struct ext4_inode *raw_inode; - struct ext4_iloc iloc; - - error = ext4_get_inode_loc(inode, &iloc); - if (error) - return error; - - raw_inode = ext4_raw_inode(&iloc); - entry = (struct ext4_xattr_entry *)((void *)raw_inode + - EXT4_I(inode)->i_inline_off); - if (EXT4_XATTR_LEN(entry->e_name_len) + - EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)) < needed) { - error = -ENOSPC; - goto out; - } - - error = ext4_convert_inline_data_nolock(handle, inode, &iloc); -out: - brelse(iloc.bh); - return error; -} - int ext4_inline_data_truncate(struct inode *inode, int *has_inline) { handle_t *handle; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 72377b77fbd7..723df14f4084 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2212,23 +2212,8 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, if (EXT4_I(inode)->i_extra_isize == 0) return -ENOSPC; error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */); - if (error) { - if (error == -ENOSPC && - ext4_has_inline_data(inode)) { - error = ext4_try_to_evict_inline_data(handle, inode, - EXT4_XATTR_LEN(strlen(i->name) + - EXT4_XATTR_SIZE(i->value_len))); - if (error) - return error; - error = ext4_xattr_ibody_find(inode, i, is); - if (error) - return error; - error = ext4_xattr_set_entry(i, s, handle, inode, - false /* is_block */); - } - if (error) - return error; - } + if (error) + return error; header = IHDR(inode, ext4_raw_inode(&is->iloc)); if (!IS_LAST_ENTRY(s->first)) { header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); From c37e9e013469521d9adb932d17a1795c139b36db Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 17 Jun 2018 00:41:14 -0400 Subject: [PATCH 013/382] ext4: add more inode number paranoia checks If there is a directory entry pointing to a system inode (such as a journal inode), complain and declare the file system to be corrupted. Also, if the superblock's first inode number field is too small, refuse to mount the file system. This addresses CVE-2018-10882. https://bugzilla.kernel.org/show_bug.cgi?id=200069 Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/ext4.h | 5 ----- fs/ext4/inode.c | 3 ++- fs/ext4/super.c | 5 +++++ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 859d6433dcc1..4bd69649a048 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1502,11 +1502,6 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode) static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) { return ino == EXT4_ROOT_INO || - ino == EXT4_USR_QUOTA_INO || - ino == EXT4_GRP_QUOTA_INO || - ino == EXT4_BOOT_LOADER_INO || - ino == EXT4_JOURNAL_INO || - ino == EXT4_RESIZE_INO || (ino >= EXT4_FIRST_INO(sb) && ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c2f4ccb880c4..7d6c10017bdf 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4506,7 +4506,8 @@ static int __ext4_get_inode_loc(struct inode *inode, int inodes_per_block, inode_offset; iloc->bh = NULL; - if (!ext4_valid_inum(sb, inode->i_ino)) + if (inode->i_ino < EXT4_ROOT_INO || + inode->i_ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) return -EFSCORRUPTED; iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4d34430d75f6..1f955c128e0d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3858,6 +3858,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } else { sbi->s_inode_size = le16_to_cpu(es->s_inode_size); sbi->s_first_ino = le32_to_cpu(es->s_first_ino); + if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) { + ext4_msg(sb, KERN_ERR, "invalid first ino: %u", + sbi->s_first_ino); + goto failed_mount; + } if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || (!is_power_of_2(sbi->s_inode_size)) || (sbi->s_inode_size > blocksize)) { From bfe0a5f47ada40d7984de67e59a7d3390b9b9ecc Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 17 Jun 2018 18:11:20 -0400 Subject: [PATCH 014/382] ext4: add more mount time checks of the superblock The kernel's ext4 mount-time checks were more permissive than e2fsprogs's libext2fs checks when opening a file system. The superblock is considered too insane for debugfs or e2fsck to operate on it, the kernel has no business trying to mount it. This will make file system fuzzing tools work harder, but the failure cases that they find will be more useful and be easier to evaluate. Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/super.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1f955c128e0d..b37b00befd65 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3793,6 +3793,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) le32_to_cpu(es->s_log_block_size)); goto failed_mount; } + if (le32_to_cpu(es->s_log_cluster_size) > + (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log cluster size: %u", + le32_to_cpu(es->s_log_cluster_size)); + goto failed_mount; + } if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) { ext4_msg(sb, KERN_ERR, @@ -3939,13 +3946,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "block size (%d)", clustersize, blocksize); goto failed_mount; } - if (le32_to_cpu(es->s_log_cluster_size) > - (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { - ext4_msg(sb, KERN_ERR, - "Invalid log cluster size: %u", - le32_to_cpu(es->s_log_cluster_size)); - goto failed_mount; - } sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - le32_to_cpu(es->s_log_block_size); sbi->s_clusters_per_group = @@ -3966,10 +3966,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } } else { if (clustersize != blocksize) { - ext4_warning(sb, "fragment/cluster size (%d) != " - "block size (%d)", clustersize, - blocksize); - clustersize = blocksize; + ext4_msg(sb, KERN_ERR, + "fragment/cluster size (%d) != " + "block size (%d)", clustersize, blocksize); + goto failed_mount; } if (sbi->s_blocks_per_group > blocksize * 8) { ext4_msg(sb, KERN_ERR, @@ -4023,6 +4023,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_blocks_count(es)); goto failed_mount; } + if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) && + (sbi->s_cluster_ratio == 1)) { + ext4_msg(sb, KERN_WARNING, "bad geometry: first data " + "block is 0 with a 1k block and cluster size"); + goto failed_mount; + } + blocks_count = (ext4_blocks_count(es) - le32_to_cpu(es->s_first_data_block) + EXT4_BLOCKS_PER_GROUP(sb) - 1); @@ -4058,6 +4065,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ret = -ENOMEM; goto failed_mount; } + if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != + le32_to_cpu(es->s_inodes_count)) { + ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu", + le32_to_cpu(es->s_inodes_count), + ((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); + ret = -EINVAL; + goto failed_mount; + } bgl_lock_init(sbi->s_blockgroup_lock); From ba062ebb2cd561d404e0fba8ee4b3f5ebce7cbfc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 13 Jun 2018 09:13:39 -0700 Subject: [PATCH 015/382] netfilter: nf_queue: augment nfqa_cfg_policy Three attributes are currently not verified, thus can trigger KMSAN warnings such as : BUG: KMSAN: uninit-value in __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline] BUG: KMSAN: uninit-value in __fswab32 include/uapi/linux/swab.h:59 [inline] BUG: KMSAN: uninit-value in nfqnl_recv_config+0x939/0x17d0 net/netfilter/nfnetlink_queue.c:1268 CPU: 1 PID: 4521 Comm: syz-executor120 Not tainted 4.17.0+ #5 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:113 kmsan_report+0x188/0x2a0 mm/kmsan/kmsan.c:1117 __msan_warning_32+0x70/0xc0 mm/kmsan/kmsan_instr.c:620 __arch_swab32 arch/x86/include/uapi/asm/swab.h:10 [inline] __fswab32 include/uapi/linux/swab.h:59 [inline] nfqnl_recv_config+0x939/0x17d0 net/netfilter/nfnetlink_queue.c:1268 nfnetlink_rcv_msg+0xb2e/0xc80 net/netfilter/nfnetlink.c:212 netlink_rcv_skb+0x37e/0x600 net/netlink/af_netlink.c:2448 nfnetlink_rcv+0x2fe/0x680 net/netfilter/nfnetlink.c:513 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline] netlink_unicast+0x1680/0x1750 net/netlink/af_netlink.c:1336 netlink_sendmsg+0x104f/0x1350 net/netlink/af_netlink.c:1901 sock_sendmsg_nosec net/socket.c:629 [inline] sock_sendmsg net/socket.c:639 [inline] ___sys_sendmsg+0xec8/0x1320 net/socket.c:2117 __sys_sendmsg net/socket.c:2155 [inline] __do_sys_sendmsg net/socket.c:2164 [inline] __se_sys_sendmsg net/socket.c:2162 [inline] __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x43fd59 RSP: 002b:00007ffde0e30d28 EFLAGS: 00000213 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 000000000043fd59 RDX: 0000000000000000 RSI: 0000000020000080 RDI: 0000000000000003 RBP: 00000000006ca018 R08: 00000000004002c8 R09: 00000000004002c8 R10: 00000000004002c8 R11: 0000000000000213 R12: 0000000000401680 R13: 0000000000401710 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:189 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:315 kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan.c:322 slab_post_alloc_hook mm/slab.h:446 [inline] slab_alloc_node mm/slub.c:2753 [inline] __kmalloc_node_track_caller+0xb35/0x11b0 mm/slub.c:4395 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2cb/0x9e0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:988 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1182 [inline] netlink_sendmsg+0x76e/0x1350 net/netlink/af_netlink.c:1876 sock_sendmsg_nosec net/socket.c:629 [inline] sock_sendmsg net/socket.c:639 [inline] ___sys_sendmsg+0xec8/0x1320 net/socket.c:2117 __sys_sendmsg net/socket.c:2155 [inline] __do_sys_sendmsg net/socket.c:2164 [inline] __se_sys_sendmsg net/socket.c:2162 [inline] __x64_sys_sendmsg+0x331/0x460 net/socket.c:2162 do_syscall_64+0x15b/0x230 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: fdb694a01f1f ("netfilter: Add fail-open support") Fixes: 829e17a1a602 ("[NETFILTER]: nfnetlink_queue: allow changing queue length through netlink") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 4ccd2988f9db..ea4ba551abb2 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1243,6 +1243,9 @@ static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl, static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) }, [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, + [NFQA_CFG_QUEUE_MAXLEN] = { .type = NLA_U32 }, + [NFQA_CFG_MASK] = { .type = NLA_U32 }, + [NFQA_CFG_FLAGS] = { .type = NLA_U32 }, }; static const struct nf_queue_handler nfqh = { From 9ce7bc036ae4cfe3393232c86e9e1fea2153c237 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 13 Jun 2018 10:11:56 -0700 Subject: [PATCH 016/382] netfilter: ipv6: nf_defrag: reduce struct net memory waste It is a waste of memory to use a full "struct netns_sysctl_ipv6" while only one pointer is really used, considering netns_sysctl_ipv6 keeps growing. Also, since "struct netns_frags" has cache line alignment, it is better to move the frags_hdr pointer outside, otherwise we spend a full cache line for this pointer. This saves 192 bytes of memory per netns. Fixes: c038a767cd69 ("ipv6: add a new namespace for nf_conntrack_reasm") Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/net/net_namespace.h | 1 + include/net/netns/ipv6.h | 1 - net/ipv6/netfilter/nf_conntrack_reasm.c | 6 +++--- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 47e35cce3b64..a71264d75d7f 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -128,6 +128,7 @@ struct net { #endif #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct netns_nf_frag nf_frag; + struct ctl_table_header *nf_frag_frags_hdr; #endif struct sock *nfnl; struct sock *nfnl_stash; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index c978a31b0f84..762ac9931b62 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -109,7 +109,6 @@ struct netns_ipv6 { #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct netns_nf_frag { - struct netns_sysctl_ipv6 sysctl; struct netns_frags frags; }; #endif diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 5e0332014c17..a452d99c9f52 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -107,7 +107,7 @@ static int nf_ct_frag6_sysctl_register(struct net *net) if (hdr == NULL) goto err_reg; - net->nf_frag.sysctl.frags_hdr = hdr; + net->nf_frag_frags_hdr = hdr; return 0; err_reg: @@ -121,8 +121,8 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) { struct ctl_table *table; - table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg; - unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr); + table = net->nf_frag_frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->nf_frag_frags_hdr); if (!net_eq(net, &init_net)) kfree(table); } From ad9852af97587b8abe8102f9ddcb05c9769656f6 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Wed, 13 Jun 2018 12:26:13 +0800 Subject: [PATCH 017/382] netfilter: nf_ct_helper: Fix possible panic after nf_conntrack_helper_unregister The helper module would be unloaded after nf_conntrack_helper_unregister, so it may cause a possible panic caused by race. nf_ct_iterate_destroy(unhelp, me) reset the helper of conntrack as NULL, but maybe someone has gotten the helper pointer during this period. Then it would panic, when it accesses the helper and the module was unloaded. Take an example as following: CPU0 CPU1 ctnetlink_dump_helpinfo helper = rcu_dereference(help->helper); unhelp set helper as NULL unload helper module helper->to_nlattr(skb, ct); As above, the cpu0 tries to access the helper and its module is unloaded, then the panic happens. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 551a1eddf0fa..a75b11c39312 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -465,6 +465,11 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) nf_ct_expect_iterate_destroy(expect_iter_me, NULL); nf_ct_iterate_destroy(unhelp, me); + + /* Maybe someone has gotten the helper already when unhelp above. + * So need to wait it. + */ + synchronize_rcu(); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); From bfc9dfdcb6e9493de5d4fe0d3ed3ce57672f8d07 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 13 Jun 2018 08:39:49 -0700 Subject: [PATCH 018/382] MD: cleanup resources in failure We need destroy the memory pool in failure Signed-off-by: Shaohua Li --- drivers/md/md.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 29b0cd9ec951..994aed2f9dff 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5547,7 +5547,8 @@ int md_run(struct mddev *mddev) else pr_warn("md: personality for level %s is not loaded!\n", mddev->clevel); - return -EINVAL; + err = -EINVAL; + goto abort; } spin_unlock(&pers_lock); if (mddev->level != pers->level) { @@ -5560,7 +5561,8 @@ int md_run(struct mddev *mddev) pers->start_reshape == NULL) { /* This personality cannot handle reshaping... */ module_put(pers->owner); - return -EINVAL; + err = -EINVAL; + goto abort; } if (pers->sync_request) { @@ -5629,7 +5631,7 @@ int md_run(struct mddev *mddev) mddev->private = NULL; module_put(pers->owner); bitmap_destroy(mddev); - return err; + goto abort; } if (mddev->queue) { bool nonrot = true; From fae2a63737e5973f1426bc139935a0f42e232844 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 8 Jun 2018 18:26:33 +0800 Subject: [PATCH 019/382] libahci: Fix possible Spectre-v1 pmp indexing in ahci_led_store() Currently smatch warns of possible Spectre-V1 issue in ahci_led_store(): drivers/ata/libahci.c:1150 ahci_led_store() warn: potential spectre issue 'pp->em_priv' (local cap) Userspace controls @pmp from following callchain: em_message->store() ->ata_scsi_em_message_store() -->ap->ops->em_store() --->ahci_led_store() After the mask+shift @pmp is effectively an 8b value, which is used to index into an array of length 8, so sanitize the array index. Signed-off-by: John Garry Signed-off-by: Tejun Heo --- drivers/ata/libahci.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 965842a08743..09620c2ffa0f 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -1146,10 +1147,12 @@ static ssize_t ahci_led_store(struct ata_port *ap, const char *buf, /* get the slot number from the message */ pmp = (state & EM_MSG_LED_PMP_SLOT) >> 8; - if (pmp < EM_MAX_SLOTS) + if (pmp < EM_MAX_SLOTS) { + pmp = array_index_nospec(pmp, EM_MAX_SLOTS); emp = &pp->em_priv[pmp]; - else + } else { return -EINVAL; + } /* mask off the activity bits if we are in sw_activity * mode, user should turn off sw_activity before setting From 95ffcf471d05ec7c91993c91dea912f99dccfc26 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 6 Jun 2018 06:56:34 +0000 Subject: [PATCH 020/382] ata: ahci_mvebu: ahci_mvebu_stop_engine() can be static Fixes the following sparse warning: drivers/ata/ahci_mvebu.c:85:5: warning: symbol 'ahci_mvebu_stop_engine' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: Tejun Heo --- drivers/ata/ahci_mvebu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c index 0045dacd814b..72d90b4c3aae 100644 --- a/drivers/ata/ahci_mvebu.c +++ b/drivers/ata/ahci_mvebu.c @@ -82,7 +82,7 @@ static void ahci_mvebu_regret_option(struct ahci_host_priv *hpriv) * * Return: 0 on success; Error code otherwise. */ -int ahci_mvebu_stop_engine(struct ata_port *ap) +static int ahci_mvebu_stop_engine(struct ata_port *ap) { void __iomem *port_mmio = ahci_port_base(ap); u32 tmp, port_fbs; From 08ca1b52f69b4dfa8703d54e26e2c6e11aa453eb Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 18 Jun 2018 16:39:50 -0600 Subject: [PATCH 021/382] vfio/pci: Make IGD support a configurable option Allow the code which provides extensions to support direct assignment of Intel IGD (GVT-d) to be compiled out of the kernel if desired. The config option for this was previously automatically enabled on X86, therefore the default remains Y. This simply provides the option to disable it even for X86. Signed-off-by: Alex Williamson --- drivers/vfio/pci/Kconfig | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 24ee2605b9f0..42dc1d3d71cf 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -28,5 +28,13 @@ config VFIO_PCI_INTX def_bool y if !S390 config VFIO_PCI_IGD - depends on VFIO_PCI - def_bool y if X86 + bool "VFIO PCI extensions for Intel graphics (GVT-d)" + depends on VFIO_PCI && X86 + default y + help + Support for Intel IGD specific extensions to enable direct + assignment to virtual machines. This includes exposing an IGD + specific firmware table and read-only copies of the host bridge + and LPC bridge config space. + + To enable Intel IGD assignment through vfio-pci, say Y. From c9bd0946da243a8eb86b44ff613e2c813f9b683b Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Tue, 5 Jun 2018 18:59:57 +0200 Subject: [PATCH 022/382] dmaengine: ti: omap-dma: Fix OMAP1510 incorrect residue_granularity Commit 0198d7bb8a0c ("ASoC: omap-mcbsp: Convert to use the sdma-pcm instead of omap-pcm") resulted in broken audio playback on OMAP1510 (discovered on Amstrad Delta). When running on OMAP1510, omap-pcm used to obtain DMA offset from snd_dmaengine_pcm_pointer_no_residue() based on DMA interrupt triggered software calculations instead of snd_dmaengine_pcm_pointer() which depended on residue value calculated from omap_dma_get_src_pos(). Similar code path is still available in now used sound/soc/soc-generic-dmaengine-pcm.c but it is not triggered. It was verified already before that omap_get_dma_src_pos() from arch/arm/plat-omap/dma.c didn't work correctly for OMAP1510 - see commit 1bdd7419910c ("ASoC: OMAP: fix OMAP1510 broken PCM pointer callback") for details. Apparently the same applies to its successor, omap_dma_get_src_pos() from drivers/dma/ti/omap-dma.c. On the other hand, snd_dmaengine_pcm_pointer_no_residue() is described as depreciated and discouraged for use in new drivers because of its unreliable accuracy. However, it seems the only working option for OPAM1510 now, as long as a software calculated residue is not implemented as OMAP1510 fallback in omap-dma. Using snd_dmaengine_pcm_pointer_no_residue() code path instead of snd_dmaengine_pcm_pointer() in sound/soc/soc-generic-dmaengine-pcm.c can be triggered in two ways: - by passing pcm->flags |= SND_DMAENGINE_PCM_FLAG_NO_RESIDUE from sound/soc/omap/sdma-pcm.c, - by passing dma_caps.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR from DMA engine. Let's do the latter. Signed-off-by: Janusz Krzysztofik Acked-by: Peter Ujfalusi Signed-off-by: Vinod Koul --- drivers/dma/ti/omap-dma.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/dma/ti/omap-dma.c b/drivers/dma/ti/omap-dma.c index 9b5ca8691f27..a4a931ddf6f6 100644 --- a/drivers/dma/ti/omap-dma.c +++ b/drivers/dma/ti/omap-dma.c @@ -1485,7 +1485,11 @@ static int omap_dma_probe(struct platform_device *pdev) od->ddev.src_addr_widths = OMAP_DMA_BUSWIDTHS; od->ddev.dst_addr_widths = OMAP_DMA_BUSWIDTHS; od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); - od->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; + if (__dma_omap15xx(od->plat->dma_attr)) + od->ddev.residue_granularity = + DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + else + od->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; od->ddev.max_burst = SZ_16M - 1; /* CCEN: 24bit unsigned */ od->ddev.dev = &pdev->dev; INIT_LIST_HEAD(&od->ddev.channels); From 356073bcf6dbcc15cd8fb22d10cf8f35f4525271 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Thu, 14 Jun 2018 09:37:46 -0400 Subject: [PATCH 023/382] MAINTAINERS: Update email-id of Sinan Kaya I'm no longer with QCOM. I am still interested in maintaining or reviewing PCI/DMA engine patches. Update email-id to an active one. Signed-off-by: Sinan Kaya Signed-off-by: Vinod Koul --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9d5eeff51b5f..209ddcdc5b7a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11821,7 +11821,7 @@ S: Supported F: arch/hexagon/ QUALCOMM HIDMA DRIVER -M: Sinan Kaya +M: Sinan Kaya L: linux-arm-kernel@lists.infradead.org L: linux-arm-msm@vger.kernel.org L: dmaengine@vger.kernel.org From 6362f0a290023bafd7f991089e81dd9278f154b8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 19 Jun 2018 10:12:48 -0600 Subject: [PATCH 024/382] libata: add command iterator helpers Now that we have the internal tag as a special (higher) value tag, it gets a bit tricky to iterate the internal commands as some loops will exceed ATA_MAX_QUEUE. Add explicit helpers for iterating pending commands, both inflight and internal. Signed-off-by: Jens Axboe Signed-off-by: Tejun Heo --- include/linux/libata.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/include/linux/libata.h b/include/linux/libata.h index 8b8946dd63b9..a2257e380789 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1495,6 +1495,29 @@ static inline bool ata_tag_valid(unsigned int tag) return tag < ATA_MAX_QUEUE || ata_tag_internal(tag); } +#define __ata_qc_for_each(ap, qc, tag, max_tag, fn) \ + for ((tag) = 0; (tag) < (max_tag) && \ + ({ qc = fn((ap), (tag)); 1; }); (tag)++) \ + +/* + * Internal use only, iterate commands ignoring error handling and + * status of 'qc'. + */ +#define ata_qc_for_each_raw(ap, qc, tag) \ + __ata_qc_for_each(ap, qc, tag, ATA_MAX_QUEUE, __ata_qc_from_tag) + +/* + * Iterate all potential commands that can be queued + */ +#define ata_qc_for_each(ap, qc, tag) \ + __ata_qc_for_each(ap, qc, tag, ATA_MAX_QUEUE, ata_qc_from_tag) + +/* + * Like ata_qc_for_each, but with the internal tag included + */ +#define ata_qc_for_each_with_internal(ap, qc, tag) \ + __ata_qc_for_each(ap, qc, tag, ATA_MAX_QUEUE + 1, ata_qc_from_tag) + /* * device helpers */ From 258c4e5c65b21bdbe9735f49ea584b3059c810e4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 19 Jun 2018 10:12:49 -0600 Subject: [PATCH 025/382] libata: convert eh to command iterators Signed-off-by: Jens Axboe Signed-off-by: Tejun Heo --- drivers/ata/libata-eh.c | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index d5412145d76d..01306c018398 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -614,8 +614,7 @@ void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) { struct ata_queued_cmd *qc; - for (i = 0; i < ATA_MAX_QUEUE; i++) { - qc = __ata_qc_from_tag(ap, i); + ata_qc_for_each_raw(ap, qc, i) { if (qc->flags & ATA_QCFLAG_ACTIVE && qc->scsicmd == scmd) break; @@ -818,14 +817,13 @@ EXPORT_SYMBOL_GPL(ata_port_wait_eh); static int ata_eh_nr_in_flight(struct ata_port *ap) { + struct ata_queued_cmd *qc; unsigned int tag; int nr = 0; /* count only non-internal commands */ - for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { - if (ata_tag_internal(tag)) - continue; - if (ata_qc_from_tag(ap, tag)) + ata_qc_for_each(ap, qc, tag) { + if (qc) nr++; } @@ -847,13 +845,13 @@ void ata_eh_fastdrain_timerfn(struct timer_list *t) goto out_unlock; if (cnt == ap->fastdrain_cnt) { + struct ata_queued_cmd *qc; unsigned int tag; /* No progress during the last interval, tag all * in-flight qcs as timed out and freeze the port. */ - for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { - struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); + ata_qc_for_each(ap, qc, tag) { if (qc) qc->err_mask |= AC_ERR_TIMEOUT; } @@ -999,6 +997,7 @@ void ata_port_schedule_eh(struct ata_port *ap) static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) { + struct ata_queued_cmd *qc; int tag, nr_aborted = 0; WARN_ON(!ap->ops->error_handler); @@ -1007,9 +1006,7 @@ static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) ata_eh_set_pending(ap, 0); /* include internal tag in iteration */ - for (tag = 0; tag <= ATA_MAX_QUEUE; tag++) { - struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); - + ata_qc_for_each_with_internal(ap, qc, tag) { if (qc && (!link || qc->dev->link == link)) { qc->flags |= ATA_QCFLAG_FAILED; ata_qc_complete(qc); @@ -1712,9 +1709,7 @@ void ata_eh_analyze_ncq_error(struct ata_link *link) return; /* has LLDD analyzed already? */ - for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { - qc = __ata_qc_from_tag(ap, tag); - + ata_qc_for_each_raw(ap, qc, tag) { if (!(qc->flags & ATA_QCFLAG_FAILED)) continue; @@ -2136,6 +2131,7 @@ static void ata_eh_link_autopsy(struct ata_link *link) { struct ata_port *ap = link->ap; struct ata_eh_context *ehc = &link->eh_context; + struct ata_queued_cmd *qc; struct ata_device *dev; unsigned int all_err_mask = 0, eflags = 0; int tag, nr_failed = 0, nr_quiet = 0; @@ -2168,9 +2164,7 @@ static void ata_eh_link_autopsy(struct ata_link *link) all_err_mask |= ehc->i.err_mask; - for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { - struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); - + ata_qc_for_each_raw(ap, qc, tag) { if (!(qc->flags & ATA_QCFLAG_FAILED) || ata_dev_phys_link(qc->dev) != link) continue; @@ -2436,6 +2430,7 @@ static void ata_eh_link_report(struct ata_link *link) { struct ata_port *ap = link->ap; struct ata_eh_context *ehc = &link->eh_context; + struct ata_queued_cmd *qc; const char *frozen, *desc; char tries_buf[6] = ""; int tag, nr_failed = 0; @@ -2447,9 +2442,7 @@ static void ata_eh_link_report(struct ata_link *link) if (ehc->i.desc[0] != '\0') desc = ehc->i.desc; - for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { - struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); - + ata_qc_for_each_raw(ap, qc, tag) { if (!(qc->flags & ATA_QCFLAG_FAILED) || ata_dev_phys_link(qc->dev) != link || ((qc->flags & ATA_QCFLAG_QUIET) && @@ -2511,8 +2504,7 @@ static void ata_eh_link_report(struct ata_link *link) ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); #endif - for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { - struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); + ata_qc_for_each_raw(ap, qc, tag) { struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; char data_buf[20] = ""; char cdb_buf[70] = ""; @@ -3992,12 +3984,11 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, */ void ata_eh_finish(struct ata_port *ap) { + struct ata_queued_cmd *qc; int tag; /* retry or finish qcs */ - for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { - struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); - + ata_qc_for_each_raw(ap, qc, tag) { if (!(qc->flags & ATA_QCFLAG_FAILED)) continue; From d3543b4d1b48afd931ed4afb6f861e6122657b6f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 19 Jun 2018 10:12:50 -0600 Subject: [PATCH 026/382] sata_fsl: convert to command iterator We need to iterate all commands, including the internal one, for ATAPI error handling. Fixes: 28361c403683 ("libata: add extra internal command") Signed-off-by: Jens Axboe Signed-off-by: Tejun Heo --- drivers/ata/sata_fsl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index b8d9cfc60374..bb5ec5f71e73 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -1229,8 +1229,7 @@ static void sata_fsl_host_intr(struct ata_port *ap) /* Workaround for data length mismatch errata */ if (unlikely(hstatus & INT_ON_DATA_LENGTH_MISMATCH)) { - for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { - qc = ata_qc_from_tag(ap, tag); + ata_qc_for_each_with_internal(ap, qc, tag) { if (qc && ata_is_atapi(qc->tf.protocol)) { u32 hcontrol; /* Set HControl[27] to clear error registers */ From eb36333de4bfba57fa6f8f88052e53180d54708e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 19 Jun 2018 10:12:51 -0600 Subject: [PATCH 027/382] sata_fsl: remove dead code in tag retrieval We can never pass in the internal tag to this helper, it'll always be the hardware tag. So there's no need to check and do an internal translation of that tag. Signed-off-by: Jens Axboe Signed-off-by: Tejun Heo --- drivers/ata/sata_fsl.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index bb5ec5f71e73..4dc528bf8e85 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -395,12 +395,6 @@ static inline unsigned int sata_fsl_tag(unsigned int tag, { /* We let libATA core do actual (queue) tag allocation */ - /* all non NCQ/queued commands should have tag#0 */ - if (ata_tag_internal(tag)) { - DPRINTK("mapping internal cmds to tag#0\n"); - return 0; - } - if (unlikely(tag >= SATA_FSL_QUEUE_DEPTH)) { DPRINTK("tag %d invalid : out of range\n", tag); return 0; From 63ce3c384db26494615e3c8972bcd419ed71f4c4 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Tue, 19 Jun 2018 17:58:24 +0200 Subject: [PATCH 028/382] scsi: target: Fix truncated PR-in ReadKeys response SPC5r17 states that the contents of the ADDITIONAL LENGTH field are not altered based on the allocation length, so always calculate and pack the full key list length even if the list itself is truncated. According to Maged: Yes it fixes the "Storage Spaces Persistent Reservation" test in the Windows 2016 Server Failover Cluster validation suites when having many connections that result in more than 8 registrations. I tested your patch on 4.17 with iblock. This behaviour can be tested using the libiscsi PrinReadKeys.Truncate test. Cc: stable@vger.kernel.org Signed-off-by: David Disseldorp Reviewed-by: Mike Christie Tested-by: Maged Mokhtar Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/target/target_core_pr.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 01ac306131c1..10db5656fd5d 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -3727,11 +3727,16 @@ core_scsi3_pri_read_keys(struct se_cmd *cmd) * Check for overflow of 8byte PRI READ_KEYS payload and * next reservation key list descriptor. */ - if ((add_len + 8) > (cmd->data_length - 8)) - break; - - put_unaligned_be64(pr_reg->pr_res_key, &buf[off]); - off += 8; + if (off + 8 <= cmd->data_length) { + put_unaligned_be64(pr_reg->pr_res_key, &buf[off]); + off += 8; + } + /* + * SPC5r17: 6.16.2 READ KEYS service action + * The ADDITIONAL LENGTH field indicates the number of bytes in + * the Reservation key list. The contents of the ADDITIONAL + * LENGTH field are not altered based on the allocation length + */ add_len += 8; } spin_unlock(&dev->t10_pr.registration_lock); From ca95ef7c98674a8eb5d411fc0835783051c0662b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 12 Jun 2018 15:03:02 +0300 Subject: [PATCH 029/382] rtc: mrst: fix error code in probe() We should be returning "retval". The "mrst_rtc.rtc" variable is a valid pointer. Fixes: 32b41f93dcaf ("rtc: mrst: switch to devm functions") Signed-off-by: Dan Carpenter Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-mrst.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-mrst.c b/drivers/rtc/rtc-mrst.c index 097a4d4e2aba..1925aaf09093 100644 --- a/drivers/rtc/rtc-mrst.c +++ b/drivers/rtc/rtc-mrst.c @@ -367,10 +367,8 @@ static int vrtc_mrst_do_probe(struct device *dev, struct resource *iomem, } retval = rtc_register_device(mrst_rtc.rtc); - if (retval) { - retval = PTR_ERR(mrst_rtc.rtc); + if (retval) goto cleanup0; - } dev_dbg(dev, "initialised\n"); return 0; From 12f8c553a503d98b519cca650b188bf51ebdbdbf Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 18 Apr 2018 20:52:31 +0900 Subject: [PATCH 030/382] clk: sunxi-ng: replace lib-y with obj-y We had commit 06e226c7fb23 ("clk: sunxi-ng: Move all clock types to a library") and commit 799c43415442 ("kbuild: thin archives make default for all archs") in the same development cycle, from different trees. With migration to the thin archive, the entire drivers/clk/sunxi-ng/lib.a is linked to the vmlinux. This does not break build, but we do not get any size saving. However, we do not need to go back to the individual Kconfig options. The default configuration pulls in all (or most) of the CCU parts anyway. Also, once we enable CONFIG_LD_DEAD_CODE_DATA_ELIMINATION, we can simply list all files with obj-y, and the linker will drop all unused functions by itself. After the long discussion [1], people there agreed to fix this, but nobody sent a patch after all. I am doing it now. I lifted up CONFIG_SUNXI_CCU to drivers/clk/Makefile because everything in drivers/clk/sunxi-ng/ depends on SUNXI_CCU. [1] https://patchwork.kernel.org/patch/9796521/ Signed-off-by: Masahiro Yamada Acked-by: Stephen Boyd Signed-off-by: Maxime Ripard Acked-by: Chen-Yu Tsai Signed-off-by: Stephen Boyd --- drivers/clk/Makefile | 2 +- drivers/clk/sunxi-ng/Makefile | 39 ++++++++++++++--------------------- 2 files changed, 16 insertions(+), 25 deletions(-) diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index ae40cbe770f0..0bb25dd009d1 100644 --- a/drivers/clk/Makefile +++ b/drivers/clk/Makefile @@ -96,7 +96,7 @@ obj-$(CONFIG_ARCH_SPRD) += sprd/ obj-$(CONFIG_ARCH_STI) += st/ obj-$(CONFIG_ARCH_STRATIX10) += socfpga/ obj-$(CONFIG_ARCH_SUNXI) += sunxi/ -obj-$(CONFIG_ARCH_SUNXI) += sunxi-ng/ +obj-$(CONFIG_SUNXI_CCU) += sunxi-ng/ obj-$(CONFIG_ARCH_TEGRA) += tegra/ obj-y += ti/ obj-$(CONFIG_CLK_UNIPHIER) += uniphier/ diff --git a/drivers/clk/sunxi-ng/Makefile b/drivers/clk/sunxi-ng/Makefile index acaa14cfa25c..49454700f2e5 100644 --- a/drivers/clk/sunxi-ng/Makefile +++ b/drivers/clk/sunxi-ng/Makefile @@ -1,24 +1,24 @@ # SPDX-License-Identifier: GPL-2.0 # Common objects -lib-$(CONFIG_SUNXI_CCU) += ccu_common.o -lib-$(CONFIG_SUNXI_CCU) += ccu_mmc_timing.o -lib-$(CONFIG_SUNXI_CCU) += ccu_reset.o +obj-y += ccu_common.o +obj-y += ccu_mmc_timing.o +obj-y += ccu_reset.o # Base clock types -lib-$(CONFIG_SUNXI_CCU) += ccu_div.o -lib-$(CONFIG_SUNXI_CCU) += ccu_frac.o -lib-$(CONFIG_SUNXI_CCU) += ccu_gate.o -lib-$(CONFIG_SUNXI_CCU) += ccu_mux.o -lib-$(CONFIG_SUNXI_CCU) += ccu_mult.o -lib-$(CONFIG_SUNXI_CCU) += ccu_phase.o -lib-$(CONFIG_SUNXI_CCU) += ccu_sdm.o +obj-y += ccu_div.o +obj-y += ccu_frac.o +obj-y += ccu_gate.o +obj-y += ccu_mux.o +obj-y += ccu_mult.o +obj-y += ccu_phase.o +obj-y += ccu_sdm.o # Multi-factor clocks -lib-$(CONFIG_SUNXI_CCU) += ccu_nk.o -lib-$(CONFIG_SUNXI_CCU) += ccu_nkm.o -lib-$(CONFIG_SUNXI_CCU) += ccu_nkmp.o -lib-$(CONFIG_SUNXI_CCU) += ccu_nm.o -lib-$(CONFIG_SUNXI_CCU) += ccu_mp.o +obj-y += ccu_nk.o +obj-y += ccu_nkm.o +obj-y += ccu_nkmp.o +obj-y += ccu_nm.o +obj-y += ccu_mp.o # SoC support obj-$(CONFIG_SUN50I_A64_CCU) += ccu-sun50i-a64.o @@ -38,12 +38,3 @@ obj-$(CONFIG_SUN8I_R40_CCU) += ccu-sun8i-r40.o obj-$(CONFIG_SUN9I_A80_CCU) += ccu-sun9i-a80.o obj-$(CONFIG_SUN9I_A80_CCU) += ccu-sun9i-a80-de.o obj-$(CONFIG_SUN9I_A80_CCU) += ccu-sun9i-a80-usb.o - -# The lib-y file goals is supposed to work only in arch/*/lib or lib/. In our -# case, we want to use that goal, but even though lib.a will be properly -# generated, it will not be linked in, eventually resulting in a linker error -# for missing symbols. -# -# We can work around that by explicitly adding lib.a to the obj-y goal. This is -# an undocumented behaviour, but works well for now. -obj-$(CONFIG_SUNXI_CCU) += lib.a From 957f9a13df6c70aac31a1dade5e417c286d6d258 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 20 Jun 2018 11:42:45 -0700 Subject: [PATCH 031/382] tools: bpftool: remove duplicated error message on prog load do_pin_fd() will already print out an error message if something goes wrong. Printing another error is unnecessary and will break JSON output, since error messages are full objects: $ bpftool -jp prog load tracex1_kern.o /sys/fs/bpf/a { "error": "can't pin the object (/sys/fs/bpf/a): File exists" },{ "error": "failed to pin program" } Fixes: 49a086c201a9 ("bpftool: implement prog load command") Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/prog.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 05f42a46d6ed..12b694fe0404 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -694,10 +694,8 @@ static int do_load(int argc, char **argv) return -1; } - if (do_pin_fd(prog_fd, argv[1])) { - p_err("failed to pin program"); + if (do_pin_fd(prog_fd, argv[1])) return -1; - } if (json_output) jsonw_null(json_wtr); From bfee71fb7376081349117fdc89f685a9e14a58c2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 20 Jun 2018 11:42:46 -0700 Subject: [PATCH 032/382] tools: bpftool: remember to close the libbpf object after prog load Remembering to close all descriptors and free memory may not seem important in a user space tool like bpftool, but if we were to run in batch mode the consumed resources start to add up quickly. Make sure program load closes the libbpf object (which unloads and frees it). Fixes: 49a086c201a9 ("bpftool: implement prog load command") Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/prog.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 12b694fe0404..959aa53ab678 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -695,12 +695,18 @@ static int do_load(int argc, char **argv) } if (do_pin_fd(prog_fd, argv[1])) - return -1; + goto err_close_obj; if (json_output) jsonw_null(json_wtr); + bpf_object__close(obj); + return 0; + +err_close_obj: + bpf_object__close(obj); + return -1; } static int do_help(int argc, char **argv) From 81e167c2a216e7b54e6add9d2badcda267fe33b1 Mon Sep 17 00:00:00 2001 From: Jeffrin Jose T Date: Thu, 21 Jun 2018 22:30:20 +0530 Subject: [PATCH 033/382] selftests: bpf: notification about privilege required to run test_kmod.sh testing script The test_kmod.sh script require root privilege for the successful execution of the test. This patch is to notify the user about the privilege the script demands for the successful execution of the test. Signed-off-by: Jeffrin Jose T (Rajagiri SET) Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_kmod.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh index 35669ccd4d23..9df0d2ac45f8 100755 --- a/tools/testing/selftests/bpf/test_kmod.sh +++ b/tools/testing/selftests/bpf/test_kmod.sh @@ -1,6 +1,15 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +msg="skip all tests:" +if [ "$(id -u)" != "0" ]; then + echo $msg please run this as root >&2 + exit $ksft_skip +fi + SRC_TREE=../../../../ test_run() From 3e1a61b30c6a3a31314570c0c418a6bf84b056b1 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 21 Jun 2018 10:02:20 -0700 Subject: [PATCH 034/382] tools/bpf: fix test_sockmap failure On one of our production test machine, when running bpf selftest test_sockmap, I got the following error: # sudo ./test_sockmap libbpf: failed to create map (name: 'sock_map'): Operation not permitted libbpf: failed to load object 'test_sockmap_kern.o' libbpf: Can't get the 0th fd from program sk_skb1: only -1 instances ...... load_bpf_file: (-1) Operation not permitted ERROR: (-1) load bpf failed The error is due to not-big-enough rlimit struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY}; The test already includes "bpf_rlimit.h", which sets current and max rlimit to RLIM_INFINITY. Let us just use it. Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_sockmap.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 05c8cb71724a..9e78df207919 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -1413,18 +1413,12 @@ static int test_suite(void) int main(int argc, char **argv) { - struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY}; int iov_count = 1, length = 1024, rate = 1; struct sockmap_options options = {0}; int opt, longindex, err, cg_fd = 0; char *bpf_file = BPF_SOCKMAP_FILENAME; int test = PING_PONG; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - if (argc < 2) return test_suite(); From 2068db53b6337f8c6c7bcd07dea0b5e82b1a1a02 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 20 Jun 2018 07:19:01 -0500 Subject: [PATCH 035/382] ipmi: Cleanup oops on initialization failure Commit 93c303d2045b3 "ipmi_si: Clean up shutdown a bit" didn't copy the behavior of the cleanup in one spot, it needed to check for a non-NULL interface before cleaning it up. Reported-by: Meelis Roos Signed-off-by: Corey Minyard Tested-by: Meelis Roos --- drivers/char/ipmi/ipmi_si_intf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index ad353be871bf..90ec010bffbd 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2088,8 +2088,10 @@ static int try_smi_init(struct smi_info *new_smi) return 0; out_err: - ipmi_unregister_smi(new_smi->intf); - new_smi->intf = NULL; + if (new_smi->intf) { + ipmi_unregister_smi(new_smi->intf); + new_smi->intf = NULL; + } kfree(init_name); From dc0f0a026d33819bb82d5c26ab2fca838e2004be Mon Sep 17 00:00:00 2001 From: Haiyue Wang Date: Sat, 23 Jun 2018 21:51:13 +0800 Subject: [PATCH 036/382] ipmi: kcs_bmc: fix IRQ exception if the channel is not open When kcs_bmc_handle_event calls kcs_force_abort function to handle the not open (no user running) KCS channel transaction, the returned status value -ENODEV causes the low level IRQ handler indicating that the irq was not for him by returning IRQ_NONE. After some time, this IRQ will be treated to be spurious one, and the exception dump happens. irq 30: nobody cared (try booting with the "irqpoll" option) CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.10.15-npcm750 #1 Hardware name: NPCMX50 Chip family [] (unwind_backtrace) from [] (show_stack+0x20/0x24) [] (show_stack) from [] (dump_stack+0x8c/0xa0) [] (dump_stack) from [] (__report_bad_irq+0x3c/0xdc) [] (__report_bad_irq) from [] (note_interrupt+0x29c/0x2ec) [] (note_interrupt) from [] (handle_irq_event_percpu+0x5c/0x68) [] (handle_irq_event_percpu) from [] (handle_irq_event+0x48/0x6c) [] (handle_irq_event) from [] (handle_fasteoi_irq+0xc8/0x198) [] (handle_fasteoi_irq) from [] (__handle_domain_irq+0x90/0xe8) [] (__handle_domain_irq) from [] (gic_handle_irq+0x58/0x9c) [] (gic_handle_irq) from [] (__irq_svc+0x6c/0x90) Exception stack(0xc0a01de8 to 0xc0a01e30) 1de0: 00002080 c0a6fbc0 00000000 00000000 00000000 c096d294 1e00: 00000000 00000001 dc406400 f03ff100 00000082 c0a01e94 c0a6fbc0 c0a01e38 1e20: 00200102 c01015bc 60000113 ffffffff [] (__irq_svc) from [] (__do_softirq+0xbc/0x358) [] (__do_softirq) from [] (irq_exit+0xb8/0xec) [] (irq_exit) from [] (__handle_domain_irq+0x94/0xe8) [] (__handle_domain_irq) from [] (gic_handle_irq+0x58/0x9c) [] (gic_handle_irq) from [] (__irq_svc+0x6c/0x90) Exception stack(0xc0a01ef8 to 0xc0a01f40) 1ee0: 00000000 000003ae 1f00: dcc0f338 c0111060 c0a00000 c0a0cc44 c0a0cbe4 c0a1c22b c07bc218 00000001 1f20: dcffca40 c0a01f54 c0a01f58 c0a01f48 c0103524 c0103528 60000013 ffffffff [] (__irq_svc) from [] (arch_cpu_idle+0x48/0x4c) [] (arch_cpu_idle) from [] (default_idle_call+0x30/0x3c) [] (default_idle_call) from [] (do_idle+0xc8/0x134) [] (do_idle) from [] (cpu_startup_entry+0x28/0x2c) [] (cpu_startup_entry) from [] (rest_init+0x84/0x88) [] (rest_init) from [] (start_kernel+0x388/0x394) [] (start_kernel) from [<0000807c>] (0x807c) handlers: [] npcm7xx_kcs_irq Disabling IRQ #30 It needs to change the returned status from -ENODEV to 0. The -ENODEV was originally used to tell the low level IRQ handler that no user was running, but not consider the IRQ handling desgin. And multiple KCS channels share one IRQ handler, it needs to check the IBF flag before doing force abort. If the IBF is set, after handling, return 0 to low level IRQ handler to indicate that the IRQ is handled. Signed-off-by: Haiyue Wang Signed-off-by: Corey Minyard --- drivers/char/ipmi/kcs_bmc.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/drivers/char/ipmi/kcs_bmc.c b/drivers/char/ipmi/kcs_bmc.c index fbfc05e3f3d1..bb882ab161fe 100644 --- a/drivers/char/ipmi/kcs_bmc.c +++ b/drivers/char/ipmi/kcs_bmc.c @@ -210,34 +210,23 @@ static void kcs_bmc_handle_cmd(struct kcs_bmc *kcs_bmc) int kcs_bmc_handle_event(struct kcs_bmc *kcs_bmc) { unsigned long flags; - int ret = 0; + int ret = -ENODATA; u8 status; spin_lock_irqsave(&kcs_bmc->lock, flags); - if (!kcs_bmc->running) { - kcs_force_abort(kcs_bmc); - ret = -ENODEV; - goto out_unlock; + status = read_status(kcs_bmc); + if (status & KCS_STATUS_IBF) { + if (!kcs_bmc->running) + kcs_force_abort(kcs_bmc); + else if (status & KCS_STATUS_CMD_DAT) + kcs_bmc_handle_cmd(kcs_bmc); + else + kcs_bmc_handle_data(kcs_bmc); + + ret = 0; } - status = read_status(kcs_bmc) & (KCS_STATUS_IBF | KCS_STATUS_CMD_DAT); - - switch (status) { - case KCS_STATUS_IBF | KCS_STATUS_CMD_DAT: - kcs_bmc_handle_cmd(kcs_bmc); - break; - - case KCS_STATUS_IBF: - kcs_bmc_handle_data(kcs_bmc); - break; - - default: - ret = -ENODATA; - break; - } - -out_unlock: spin_unlock_irqrestore(&kcs_bmc->lock, flags); return ret; From 49a6ec5b807ea4ad7ebe1f58080ebb8497cb2d2c Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Sun, 17 Jun 2018 13:53:09 +0200 Subject: [PATCH 037/382] ARM: dts: am437x: make edt-ft5x06 a wakeup source The touchscreen driver no longer configures the device as wakeup source by default. A "wakeup-source" property is needed. Signed-off-by: Daniel Mack Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am437x-sk-evm.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/boot/dts/am437x-sk-evm.dts b/arch/arm/boot/dts/am437x-sk-evm.dts index 440351ad0b80..d4be3fd0b6f4 100644 --- a/arch/arm/boot/dts/am437x-sk-evm.dts +++ b/arch/arm/boot/dts/am437x-sk-evm.dts @@ -610,6 +610,8 @@ edt-ft5306@38 { touchscreen-size-x = <480>; touchscreen-size-y = <272>; + + wakeup-source; }; tlv320aic3106: tlv320aic3106@1b { From 891f6a726cacbb87e5b06076693ffab53bd378d7 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 21 Jun 2018 14:49:38 +0200 Subject: [PATCH 038/382] s390: Correct register corruption in critical section cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the critical section cleanup we must not mess with r1. For march=z9 or older, larl + ex (instead of exrl) are used with r1 as a temporary register. This can clobber r1 in several interrupt handlers. Fix this by using r11 as a temp register. r11 is being saved by all callers of cleanup_critical. Fixes: 6dd85fbb87 ("s390: move expoline assembler macros to a header") Cc: stable@vger.kernel.org #v4.16 Reported-by: Oliver Kurz Reported-by: Petr Tesařík Signed-off-by: Christian Borntraeger Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index f03402efab4b..3891805bfcdd 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -1265,7 +1265,7 @@ cleanup_critical: jl 0f clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end jl .Lcleanup_load_fpu_regs -0: BR_EX %r14 +0: BR_EX %r14,%r11 .align 8 .Lcleanup_table: @@ -1301,7 +1301,7 @@ cleanup_critical: ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit - BR_EX %r14 + BR_EX %r14,%r11 #endif .Lcleanup_system_call: From 68d676a089625daed9d61df56ac63b5cd7189efe Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 22 Jun 2018 11:56:56 -0700 Subject: [PATCH 039/382] nfp: bpf: don't stop offload if replace failed Stopping offload completely if replace of program failed dates back to days of transparent offload. Back then we wanted to silently fall back to the in-driver processing. Today we mark programs for offload when they are loaded into the kernel, so the transparent offload is no longer a reality. Flags check in the driver will only allow replace of a driver program with another driver program or an offload program with another offload program. When driver program is replaced stopping offload is a no-op, because driver program isn't offloaded. When replacing offloaded program if the offload fails the entire operation will fail all the way back to user space and we should continue using the old program. IOW when replacing a driver program stopping offload is unnecessary and when replacing offloaded program - it's a bug, old program should continue to run. In practice this bug would mean that if offload operation was to fail (either due to FW communication error, kernel OOM or new program being offloaded but for a different netdev) driver would continue reporting that previous XDP program is offloaded but in fact no program will be loaded in hardware. The failure is fairly unlikely (found by inspection, when working on the code) but it's unpleasant. Backport note: even though the bug was introduced in commit cafa92ac2553 ("nfp: bpf: add support for XDP_FLAGS_HW_MODE"), this fix depends on commit 441a33031fe5 ("net: xdp: don't allow device-bound programs in driver mode"), so this fix is sufficient only in v4.15 or newer. Kernels v4.13.x and v4.14.x do need to stop offload if it was transparent/opportunistic, i.e. if XDP_FLAGS_HW_MODE was not set on running program. Fixes: cafa92ac2553 ("nfp: bpf: add support for XDP_FLAGS_HW_MODE") Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index fcdfb8e7fdea..7184af94aa53 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -81,10 +81,10 @@ nfp_bpf_xdp_offload(struct nfp_app *app, struct nfp_net *nn, ret = nfp_net_bpf_offload(nn, prog, running, extack); /* Stop offload if replace not possible */ - if (ret && prog) - nfp_bpf_xdp_offload(app, nn, NULL, extack); + if (ret) + return ret; - nn->dp.bpf_offload_xdp = prog && !ret; + nn->dp.bpf_offload_xdp = !!prog; return ret; } From 560b423dd9af4272a1f3685c2d6b073fdc4af7c7 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Mon, 18 Jun 2018 06:52:58 +0900 Subject: [PATCH 040/382] openrisc: Call destructor during __pte_free_tlb This fixes an issue uncovered when a recent change to add the "page table" flag was merged. During bootup we see many errors like the following: BUG: Bad page state in process mkdir pfn:00bae page:c1ff15c0 count:0 mapcount:-1024 mapping:00000000 index:0x0 flags: 0x0() raw: 00000000 00000000 00000000 fffffbff 00000000 00000100 00000200 00000000 page dumped because: nonzero mapcount Modules linked in: CPU: 0 PID: 46 Comm: mkdir Tainted: G B 4.17.0-simple-smp-07461-g1d40a5ea01d5-dirty #993 Call trace: [<(ptrval)>] show_stack+0x44/0x54 [<(ptrval)>] dump_stack+0xb0/0xe8 [<(ptrval)>] bad_page+0x138/0x174 [<(ptrval)>] ? cpumask_next+0x24/0x34 [<(ptrval)>] free_pages_check_bad+0x6c/0xd0 [<(ptrval)>] free_pcppages_bulk+0x174/0x42c [<(ptrval)>] free_unref_page_commit.isra.17+0xb8/0xc8 [<(ptrval)>] free_unref_page_list+0x10c/0x190 [<(ptrval)>] ? set_reset_devices+0x0/0x2c [<(ptrval)>] release_pages+0x3a0/0x414 [<(ptrval)>] tlb_flush_mmu_free+0x5c/0x90 [<(ptrval)>] tlb_flush_mmu+0x90/0xa4 [<(ptrval)>] arch_tlb_finish_mmu+0x50/0x94 [<(ptrval)>] tlb_finish_mmu+0x30/0x64 [<(ptrval)>] exit_mmap+0x110/0x1e0 [<(ptrval)>] mmput+0x50/0xf0 [<(ptrval)>] do_exit+0x274/0xa94 [<(ptrval)>] do_group_exit+0x50/0x110 [<(ptrval)>] __wake_up_parent+0x0/0x38 [<(ptrval)>] _syscall_return+0x0/0x4 During the __pte_free_tlb path openrisc fails to call the page destructor which would clear the new bits that were introduced. To fix this we are calling the destructor. It seem openrisc was the only architecture missing this, all other architectures either call the destructor like we are doing here or use pte_free. Note: failing to call the destructor was also messing up the zone stats (and will be cause other problems if you were using SPLIT_PTE_PTLOCKS, which we are not yet). Fixes: 1d40a5ea01d53 ("mm: mark pages in use for page tables") Acked-by: Matthew Wilcox Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/pgalloc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h index 3e1a46615120..8999b9226512 100644 --- a/arch/openrisc/include/asm/pgalloc.h +++ b/arch/openrisc/include/asm/pgalloc.h @@ -98,8 +98,12 @@ static inline void pte_free(struct mm_struct *mm, struct page *pte) __free_page(pte); } +#define __pte_free_tlb(tlb, pte, addr) \ +do { \ + pgtable_page_dtor(pte); \ + tlb_remove_page((tlb), (pte)); \ +} while (0) -#define __pte_free_tlb(tlb, pte, addr) tlb_remove_page((tlb), (pte)) #define pmd_pgtable(pmd) pmd_page(pmd) #define check_pgt_cache() do { } while (0) From 8f732850df1b2b4d8d719f7e606dfb3050e7ea11 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 31 May 2018 13:49:29 +0200 Subject: [PATCH 041/382] HID: core: allow concurrent registration of drivers Detected on the Dell XPS 9365. The laptop has 2 devices that benefit from the hid-generic auto-unbinding. When those 2 devices are presented to the userspace, udev loads both wacom and hid-multitouch. When this happens, the code in __hid_bus_reprobe_drivers() is called concurrently and the second device gets reprobed twice. An other bug in the power_supply subsystem prevent to remove the wacom driver if it just finished its initialization, which basically kills the wacom node. [jkosina@suse.cz: reformat changelog a bit] Fixes c17a7476e4c4 ("HID: core: rewrite the hid-generic automatic unbind") Cc: stable@vger.kernel.org # v4.17 Tested-by: Mario Limonciello Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 5 ++++- include/linux/hid.h | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 355dc7e49562..a460ec147aee 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1949,6 +1949,8 @@ static int hid_device_probe(struct device *dev) } hdev->io_started = false; + clear_bit(ffs(HID_STAT_REPROBED), &hdev->status); + if (!hdev->driver) { id = hid_match_device(hdev, hdrv); if (id == NULL) { @@ -2212,7 +2214,8 @@ static int __hid_bus_reprobe_drivers(struct device *dev, void *data) struct hid_device *hdev = to_hid_device(dev); if (hdev->driver == hdrv && - !hdrv->match(hdev, hid_ignore_special_drivers)) + !hdrv->match(hdev, hid_ignore_special_drivers) && + !test_and_set_bit(ffs(HID_STAT_REPROBED), &hdev->status)) return device_reprobe(dev); return 0; diff --git a/include/linux/hid.h b/include/linux/hid.h index 41a3d5775394..773bcb1d4044 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -511,6 +511,7 @@ struct hid_output_fifo { #define HID_STAT_ADDED BIT(0) #define HID_STAT_PARSED BIT(1) #define HID_STAT_DUP_DETECTED BIT(2) +#define HID_STAT_REPROBED BIT(3) struct hid_input { struct list_head list; @@ -579,7 +580,7 @@ struct hid_device { /* device report descriptor */ bool battery_avoid_query; #endif - unsigned int status; /* see STAT flags above */ + unsigned long status; /* see STAT flags above */ unsigned claimed; /* Claimed by hidinput, hiddev? */ unsigned quirks; /* Various quirks the device can pull on us */ bool io_started; /* If IO has started */ From 7b72717a20bba8bdd01b14c0460be7d15061cd6b Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 21 Jun 2018 07:43:21 -0700 Subject: [PATCH 042/382] iw_cxgb4: correctly enforce the max reg_mr depth The code was mistakenly using the length of the page array memory instead of the depth of the page array. This would cause MR creation to fail in some cases. Fixes: 8376b86de7d3 ("iw_cxgb4: Support the new memory registration API") Cc: stable@vger.kernel.org Signed-off-by: Steve Wise Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 1445918e3239..7b76e6f81aeb 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -774,7 +774,7 @@ static int c4iw_set_page(struct ib_mr *ibmr, u64 addr) { struct c4iw_mr *mhp = to_c4iw_mr(ibmr); - if (unlikely(mhp->mpl_len == mhp->max_mpl_len)) + if (unlikely(mhp->mpl_len == mhp->attr.pbl_size)) return -ENOMEM; mhp->mpl[mhp->mpl_len++] = addr; From 940efcc8889f0d15567eb07fc9fd69b06e366aa5 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 24 Jun 2018 11:23:42 +0300 Subject: [PATCH 043/382] RDMA/uverbs: Protect from attempts to create flows on unsupported QP Flows can be created on UD and RAW_PACKET QP types. Attempts to provide other QP types as an input causes to various unpredictable failures. The reason is that in order to support all various types (e.g. XRC), we are supposed to use real_qp handle and not qp handle and expect to driver/FW to fail such (XRC) flows. The simpler and safer variant is to ban all QP types except UD and RAW_PACKET, instead of relying on driver/FW. Cc: # 3.11 Fixes: 436f2ad05a0b ("IB/core: Export ib_create/destroy_flow through uverbs") Cc: syzkaller Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 3e90b6a1d9d2..89c4ce2da78b 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3559,6 +3559,11 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, goto err_uobj; } + if (qp->qp_type != IB_QPT_UD && qp->qp_type != IB_QPT_RAW_PACKET) { + err = -EINVAL; + goto err_put; + } + flow_attr = kzalloc(struct_size(flow_attr, flows, cmd.flow_attr.num_of_specs), GFP_KERNEL); if (!flow_attr) { From 4fae7f170416f970e5655f7e945ce69286b1c4ff Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 24 Jun 2018 11:23:53 +0300 Subject: [PATCH 044/382] RDMA/uverbs: Fix slab-out-of-bounds in ib_uverbs_ex_create_flow The check of cmd.flow_attr.size should check into account the size of the reserved field (2 bytes), otherwise user can provide a size which will cause a slab-out-of-bounds warning below. ================================================================== BUG: KASAN: slab-out-of-bounds in ib_uverbs_ex_create_flow+0x1740/0x1d00 Read of size 2 at addr ffff880068dff1a6 by task syz-executor775/269 CPU: 0 PID: 269 Comm: syz-executor775 Not tainted 4.18.0-rc1+ #245 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.0-0-g63451fca13-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0xef/0x17e print_address_description+0x83/0x3b0 kasan_report+0x18d/0x4d0 ib_uverbs_ex_create_flow+0x1740/0x1d00 ib_uverbs_write+0x923/0x1010 __vfs_write+0x10d/0x720 vfs_write+0x1b0/0x550 ksys_write+0xc6/0x1a0 do_syscall_64+0xa7/0x590 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x433899 Code: fd ff 48 81 c4 80 00 00 00 e9 f1 fe ff ff 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 3b 91 fd ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007ffc2724db58 EFLAGS: 00000217 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000020006880 RCX: 0000000000433899 RDX: 00000000000000e0 RSI: 0000000020002480 RDI: 0000000000000003 RBP: 00000000006d7018 R08: 00000000004002f8 R09: 00000000004002f8 R10: 00000000004002f8 R11: 0000000000000217 R12: 0000000000000000 R13: 000000000040cd20 R14: 000000000040cdb0 R15: 0000000000000006 Allocated by task 269: kasan_kmalloc+0xa0/0xd0 __kmalloc+0x1a9/0x510 ib_uverbs_ex_create_flow+0x26c/0x1d00 ib_uverbs_write+0x923/0x1010 __vfs_write+0x10d/0x720 vfs_write+0x1b0/0x550 ksys_write+0xc6/0x1a0 do_syscall_64+0xa7/0x590 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 0: __kasan_slab_free+0x12e/0x180 kfree+0x159/0x630 detach_buf+0x559/0x7a0 virtqueue_get_buf_ctx+0x3cc/0xab0 virtblk_done+0x1eb/0x3d0 vring_interrupt+0x16d/0x2b0 __handle_irq_event_percpu+0x10a/0x980 handle_irq_event_percpu+0x77/0x190 handle_irq_event+0xc6/0x1a0 handle_edge_irq+0x211/0xd80 handle_irq+0x3d/0x60 do_IRQ+0x9b/0x220 The buggy address belongs to the object at ffff880068dff180 which belongs to the cache kmalloc-64 of size 64 The buggy address is located 38 bytes inside of 64-byte region [ffff880068dff180, ffff880068dff1c0) The buggy address belongs to the page: page:ffffea0001a37fc0 count:1 mapcount:0 mapping:ffff88006c401780 index:0x0 flags: 0x4000000000000100(slab) raw: 4000000000000100 ffffea0001a31100 0000001100000011 ffff88006c401780 raw: 0000000000000000 00000000802a002a 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff880068dff080: fb fb fb fb fc fc fc fc fb fb fb fb fb fb fb fb ffff880068dff100: fc fc fc fc fb fb fb fb fb fb fb fb fc fc fc fc >ffff880068dff180: 00 00 00 00 07 fc fc fc fc fc fc fc fb fb fb fb ^ ffff880068dff200: fb fb fb fb fc fc fc fc 00 00 00 00 00 00 fc fc ffff880068dff280: fc fc fc fc 00 00 00 00 00 00 00 00 fc fc fc fc ================================================================== Cc: # 3.12 Fixes: f88482743872 ("IB/core: clarify overflow/underflow checks on ib_create/destroy_flow") Cc: syzkaller Reported-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 89c4ce2da78b..87ffeebc0b28 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3488,8 +3488,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, struct ib_flow_attr *flow_attr; struct ib_qp *qp; struct ib_uflow_resources *uflow_res; + struct ib_uverbs_flow_spec_hdr *kern_spec; int err = 0; - void *kern_spec; void *ib_spec; int i; @@ -3538,8 +3538,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, if (!kern_flow_attr) return -ENOMEM; - memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr)); - err = ib_copy_from_udata(kern_flow_attr + 1, ucore, + *kern_flow_attr = cmd.flow_attr; + err = ib_copy_from_udata(&kern_flow_attr->flow_specs, ucore, cmd.flow_attr.size); if (err) goto err_free_attr; @@ -3583,21 +3583,22 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, flow_attr->flags = kern_flow_attr->flags; flow_attr->size = sizeof(*flow_attr); - kern_spec = kern_flow_attr + 1; + kern_spec = kern_flow_attr->flow_specs; ib_spec = flow_attr + 1; for (i = 0; i < flow_attr->num_of_specs && - cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) && - cmd.flow_attr.size >= - ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) { - err = kern_spec_to_ib_spec(file->ucontext, kern_spec, ib_spec, - uflow_res); + cmd.flow_attr.size > sizeof(*kern_spec) && + cmd.flow_attr.size >= kern_spec->size; + i++) { + err = kern_spec_to_ib_spec( + file->ucontext, (struct ib_uverbs_flow_spec *)kern_spec, + ib_spec, uflow_res); if (err) goto err_free; flow_attr->size += ((union ib_flow_spec *) ib_spec)->size; - cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size; - kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size; + cmd.flow_attr.size -= kern_spec->size; + kern_spec = ((void *)kern_spec) + kern_spec->size; ib_spec += ((union ib_flow_spec *) ib_spec)->size; } if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) { From 0613de37373bbbc747d434f643620472bd13303b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 2 Jun 2018 10:52:56 +0300 Subject: [PATCH 045/382] clk: davinci: cfgchip: testing the wrong variable There is a copy and paste bug here. We should be testing "usb1" instead of "usb0". Fixes: 58e1e2d2cd89 ("clk: davinci: cfgchip: Add TI DA8XX USB PHY clocks") Signed-off-by: Dan Carpenter Signed-off-by: David Lechner --- drivers/clk/davinci/da8xx-cfgchip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/davinci/da8xx-cfgchip.c b/drivers/clk/davinci/da8xx-cfgchip.c index aae62a5b8734..d1bbee19ed0f 100644 --- a/drivers/clk/davinci/da8xx-cfgchip.c +++ b/drivers/clk/davinci/da8xx-cfgchip.c @@ -672,7 +672,7 @@ static int of_da8xx_usb_phy_clk_init(struct device *dev, struct regmap *regmap) usb1 = da8xx_cfgchip_register_usb1_clk48(dev, regmap); if (IS_ERR(usb1)) { - if (PTR_ERR(usb0) == -EPROBE_DEFER) + if (PTR_ERR(usb1) == -EPROBE_DEFER) return -EPROBE_DEFER; dev_warn(dev, "Failed to register usb1_clk48 (%ld)\n", From 176cbf5f62e7a0aa0180af4826da9e7da1359988 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 25 Jun 2018 19:25:50 +0200 Subject: [PATCH 046/382] clk: davinci: fix a typo (which leads to build failures) This should have been DM365, not DM356. Fixes: 4eff0bebf4ed5 ("clk: davinci: Fix link errors when not all SoCs are enabled") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: David Lechner --- drivers/clk/davinci/psc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/davinci/psc.h b/drivers/clk/davinci/psc.h index 6a42529d31a9..cc5614567a70 100644 --- a/drivers/clk/davinci/psc.h +++ b/drivers/clk/davinci/psc.h @@ -107,7 +107,7 @@ extern const struct davinci_psc_init_data of_da850_psc1_init_data; #ifdef CONFIG_ARCH_DAVINCI_DM355 extern const struct davinci_psc_init_data dm355_psc_init_data; #endif -#ifdef CONFIG_ARCH_DAVINCI_DM356 +#ifdef CONFIG_ARCH_DAVINCI_DM365 extern const struct davinci_psc_init_data dm365_psc_init_data; #endif #ifdef CONFIG_ARCH_DAVINCI_DM644x From 2203d8a76ea3b962559acfe3bb80eddfdfbc9cee Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 25 Jun 2018 08:44:51 +0200 Subject: [PATCH 047/382] sample/mdev/mbochs: remove mbochs_kmap_atomic_dmabuf Atomic mapping interface for dmabufs will be removed. Signed-off-by: Gerd Hoffmann Signed-off-by: Alex Williamson --- samples/vfio-mdev/mbochs.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c index 2960e26c6ea4..aa25cda21d22 100644 --- a/samples/vfio-mdev/mbochs.c +++ b/samples/vfio-mdev/mbochs.c @@ -803,15 +803,6 @@ static void mbochs_release_dmabuf(struct dma_buf *buf) mutex_unlock(&mdev_state->ops_lock); } -static void *mbochs_kmap_atomic_dmabuf(struct dma_buf *buf, - unsigned long page_num) -{ - struct mbochs_dmabuf *dmabuf = buf->priv; - struct page *page = dmabuf->pages[page_num]; - - return kmap_atomic(page); -} - static void *mbochs_kmap_dmabuf(struct dma_buf *buf, unsigned long page_num) { struct mbochs_dmabuf *dmabuf = buf->priv; @@ -824,7 +815,6 @@ static struct dma_buf_ops mbochs_dmabuf_ops = { .map_dma_buf = mbochs_map_dmabuf, .unmap_dma_buf = mbochs_unmap_dmabuf, .release = mbochs_release_dmabuf, - .map_atomic = mbochs_kmap_atomic_dmabuf, .map = mbochs_kmap_dmabuf, .mmap = mbochs_mmap_dmabuf, }; From 7733e05b34f5d855c5590d816b41ddfee972d188 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 25 Jun 2018 08:44:52 +0200 Subject: [PATCH 048/382] sample/mdev/mbochs: add mbochs_kunmap_dmabuf There is no default implementation for dma_buf_ops->unmap. So add a function unmapping the page, otherwise we'll leak them. Signed-off-by: Gerd Hoffmann Signed-off-by: Alex Williamson --- samples/vfio-mdev/mbochs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c index aa25cda21d22..85ac6037696f 100644 --- a/samples/vfio-mdev/mbochs.c +++ b/samples/vfio-mdev/mbochs.c @@ -811,11 +811,18 @@ static void *mbochs_kmap_dmabuf(struct dma_buf *buf, unsigned long page_num) return kmap(page); } +static void mbochs_kunmap_dmabuf(struct dma_buf *buf, unsigned long page_num, + void *vaddr) +{ + kunmap(vaddr); +} + static struct dma_buf_ops mbochs_dmabuf_ops = { .map_dma_buf = mbochs_map_dmabuf, .unmap_dma_buf = mbochs_unmap_dmabuf, .release = mbochs_release_dmabuf, .map = mbochs_kmap_dmabuf, + .unmap = mbochs_kunmap_dmabuf, .mmap = mbochs_mmap_dmabuf, }; From 0ee1f4734967af8321ecebaf9c74221ace34f2d5 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 25 Jun 2018 09:26:27 +0200 Subject: [PATCH 049/382] r8152: napi hangup fix after disconnect When unplugging an r8152 adapter while the interface is UP, the NIC becomes unusable. usb->disconnect (aka rtl8152_disconnect) deletes napi. Then, rtl8152_disconnect calls unregister_netdev and that invokes netdev->ndo_stop (aka rtl8152_close). rtl8152_close tries to napi_disable, but the napi is already deleted by disconnect above. So the first while loop in napi_disable never finishes. This results in complete deadlock of the network layer as there is rtnl_mutex held by unregister_netdev. So avoid the call to napi_disable in rtl8152_close when the device is already gone. The other calls to usb_kill_urb, cancel_delayed_work_sync, netif_stop_queue etc. seem to be fine. The urb and netdev is not destroyed yet. Signed-off-by: Jiri Slaby Cc: linux-usb@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 86f7196f9d91..2a58607a6aea 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3962,7 +3962,8 @@ static int rtl8152_close(struct net_device *netdev) #ifdef CONFIG_PM_SLEEP unregister_pm_notifier(&tp->pm_notifier); #endif - napi_disable(&tp->napi); + if (!test_bit(RTL8152_UNPLUG, &tp->flags)) + napi_disable(&tp->napi); clear_bit(WORK_ENABLE, &tp->flags); usb_kill_urb(tp->intr_urb); cancel_delayed_work_sync(&tp->schedule); From fdb5c4531c1e0e50e609df83f736b6f3a02896e2 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Tue, 19 Jun 2018 00:04:24 +0100 Subject: [PATCH 050/382] bpf: fix attach type BPF_LIRC_MODE2 dependency wrt CONFIG_CGROUP_BPF If the kernel is compiled with CONFIG_CGROUP_BPF not enabled, it is not possible to attach, detach or query IR BPF programs to /dev/lircN devices, making them impossible to use. For embedded devices, it should be possible to use IR decoding without cgroups or CONFIG_CGROUP_BPF enabled. This change requires some refactoring, since bpf_prog_{attach,detach,query} functions are now always compiled, but their code paths for cgroups need moving out. Rather than a #ifdef CONFIG_CGROUP_BPF in kernel/bpf/syscall.c, moving them to kernel/bpf/cgroup.c and kernel/bpf/sockmap.c does not require #ifdefs since that is already conditionally compiled. Fixes: f4364dcfc86d ("media: rc: introduce BPF_PROG_LIRC_MODE2") Signed-off-by: Sean Young Signed-off-by: Daniel Borkmann --- drivers/media/rc/bpf-lirc.c | 14 +----- include/linux/bpf-cgroup.h | 26 ++++++++++ include/linux/bpf.h | 8 +++ include/linux/bpf_lirc.h | 5 +- kernel/bpf/cgroup.c | 54 ++++++++++++++++++++ kernel/bpf/sockmap.c | 18 +++++++ kernel/bpf/syscall.c | 99 ++++++++----------------------------- 7 files changed, 132 insertions(+), 92 deletions(-) diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index 40826bba06b6..fcfab6635f9c 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -207,29 +207,19 @@ void lirc_bpf_free(struct rc_dev *rcdev) bpf_prog_array_free(rcdev->raw->progs); } -int lirc_prog_attach(const union bpf_attr *attr) +int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) { - struct bpf_prog *prog; struct rc_dev *rcdev; int ret; if (attr->attach_flags) return -EINVAL; - prog = bpf_prog_get_type(attr->attach_bpf_fd, - BPF_PROG_TYPE_LIRC_MODE2); - if (IS_ERR(prog)) - return PTR_ERR(prog); - rcdev = rc_dev_get_from_fd(attr->target_fd); - if (IS_ERR(rcdev)) { - bpf_prog_put(prog); + if (IS_ERR(rcdev)) return PTR_ERR(rcdev); - } ret = lirc_bpf_attach(rcdev, prog); - if (ret) - bpf_prog_put(prog); put_device(&rcdev->dev); diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 975fb4cf1bb7..79795c5fa7c3 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -188,12 +188,38 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, \ __ret; \ }) +int cgroup_bpf_prog_attach(const union bpf_attr *attr, + enum bpf_prog_type ptype, struct bpf_prog *prog); +int cgroup_bpf_prog_detach(const union bpf_attr *attr, + enum bpf_prog_type ptype); +int cgroup_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr); #else +struct bpf_prog; struct cgroup_bpf {}; static inline void cgroup_bpf_put(struct cgroup *cgrp) {} static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } +static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr, + enum bpf_prog_type ptype, + struct bpf_prog *prog) +{ + return -EINVAL; +} + +static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr, + enum bpf_prog_type ptype) +{ + return -EINVAL; +} + +static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + return -EINVAL; +} + #define cgroup_bpf_enabled (0) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7df32a3200f7..8827e797ff97 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -696,6 +696,8 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key); int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); +int sockmap_get_from_fd(const union bpf_attr *attr, int type, + struct bpf_prog *prog); #else static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) { @@ -714,6 +716,12 @@ static inline int sock_map_prog(struct bpf_map *map, { return -EOPNOTSUPP; } + +static inline int sockmap_get_from_fd(const union bpf_attr *attr, int type, + struct bpf_prog *prog) +{ + return -EINVAL; +} #endif #if defined(CONFIG_XDP_SOCKETS) diff --git a/include/linux/bpf_lirc.h b/include/linux/bpf_lirc.h index 5f8a4283092d..9d9ff755ec29 100644 --- a/include/linux/bpf_lirc.h +++ b/include/linux/bpf_lirc.h @@ -5,11 +5,12 @@ #include #ifdef CONFIG_BPF_LIRC_MODE2 -int lirc_prog_attach(const union bpf_attr *attr); +int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); int lirc_prog_detach(const union bpf_attr *attr); int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); #else -static inline int lirc_prog_attach(const union bpf_attr *attr) +static inline int lirc_prog_attach(const union bpf_attr *attr, + struct bpf_prog *prog) { return -EINVAL; } diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index f7c00bd6f8e4..3d83ee7df381 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -428,6 +428,60 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, return ret; } +int cgroup_bpf_prog_attach(const union bpf_attr *attr, + enum bpf_prog_type ptype, struct bpf_prog *prog) +{ + struct cgroup *cgrp; + int ret; + + cgrp = cgroup_get_from_fd(attr->target_fd); + if (IS_ERR(cgrp)) + return PTR_ERR(cgrp); + + ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, + attr->attach_flags); + cgroup_put(cgrp); + return ret; +} + +int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) +{ + struct bpf_prog *prog; + struct cgroup *cgrp; + int ret; + + cgrp = cgroup_get_from_fd(attr->target_fd); + if (IS_ERR(cgrp)) + return PTR_ERR(cgrp); + + prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); + if (IS_ERR(prog)) + prog = NULL; + + ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); + if (prog) + bpf_prog_put(prog); + + cgroup_put(cgrp); + return ret; +} + +int cgroup_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + struct cgroup *cgrp; + int ret; + + cgrp = cgroup_get_from_fd(attr->query.target_fd); + if (IS_ERR(cgrp)) + return PTR_ERR(cgrp); + + ret = cgroup_bpf_query(cgrp, attr, uattr); + + cgroup_put(cgrp); + return ret; +} + /** * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering * @sk: The socket sending or receiving traffic diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 52a91d816c0e..81d0c55a77aa 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -1915,6 +1915,24 @@ int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type) return 0; } +int sockmap_get_from_fd(const union bpf_attr *attr, int type, + struct bpf_prog *prog) +{ + int ufd = attr->target_fd; + struct bpf_map *map; + struct fd f; + int err; + + f = fdget(ufd); + map = __bpf_map_get(f); + if (IS_ERR(map)) + return PTR_ERR(map); + + err = sock_map_prog(map, prog, attr->attach_type); + fdput(f); + return err; +} + static void *sock_map_lookup(struct bpf_map *map, void *key) { return NULL; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 35dc466641f2..d10ecd78105f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1483,8 +1483,6 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) return err; } -#ifdef CONFIG_CGROUP_BPF - static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, enum bpf_attach_type attach_type) { @@ -1499,40 +1497,6 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, #define BPF_PROG_ATTACH_LAST_FIELD attach_flags -static int sockmap_get_from_fd(const union bpf_attr *attr, - int type, bool attach) -{ - struct bpf_prog *prog = NULL; - int ufd = attr->target_fd; - struct bpf_map *map; - struct fd f; - int err; - - f = fdget(ufd); - map = __bpf_map_get(f); - if (IS_ERR(map)) - return PTR_ERR(map); - - if (attach) { - prog = bpf_prog_get_type(attr->attach_bpf_fd, type); - if (IS_ERR(prog)) { - fdput(f); - return PTR_ERR(prog); - } - } - - err = sock_map_prog(map, prog, attr->attach_type); - if (err) { - fdput(f); - if (prog) - bpf_prog_put(prog); - return err; - } - - fdput(f); - return 0; -} - #define BPF_F_ATTACH_MASK \ (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) @@ -1540,7 +1504,6 @@ static int bpf_prog_attach(const union bpf_attr *attr) { enum bpf_prog_type ptype; struct bpf_prog *prog; - struct cgroup *cgrp; int ret; if (!capable(CAP_NET_ADMIN)) @@ -1577,12 +1540,15 @@ static int bpf_prog_attach(const union bpf_attr *attr) ptype = BPF_PROG_TYPE_CGROUP_DEVICE; break; case BPF_SK_MSG_VERDICT: - return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, true); + ptype = BPF_PROG_TYPE_SK_MSG; + break; case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: - return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true); + ptype = BPF_PROG_TYPE_SK_SKB; + break; case BPF_LIRC_MODE2: - return lirc_prog_attach(attr); + ptype = BPF_PROG_TYPE_LIRC_MODE2; + break; default: return -EINVAL; } @@ -1596,18 +1562,20 @@ static int bpf_prog_attach(const union bpf_attr *attr) return -EINVAL; } - cgrp = cgroup_get_from_fd(attr->target_fd); - if (IS_ERR(cgrp)) { - bpf_prog_put(prog); - return PTR_ERR(cgrp); + switch (ptype) { + case BPF_PROG_TYPE_SK_SKB: + case BPF_PROG_TYPE_SK_MSG: + ret = sockmap_get_from_fd(attr, ptype, prog); + break; + case BPF_PROG_TYPE_LIRC_MODE2: + ret = lirc_prog_attach(attr, prog); + break; + default: + ret = cgroup_bpf_prog_attach(attr, ptype, prog); } - ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, - attr->attach_flags); if (ret) bpf_prog_put(prog); - cgroup_put(cgrp); - return ret; } @@ -1616,9 +1584,6 @@ static int bpf_prog_attach(const union bpf_attr *attr) static int bpf_prog_detach(const union bpf_attr *attr) { enum bpf_prog_type ptype; - struct bpf_prog *prog; - struct cgroup *cgrp; - int ret; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -1651,29 +1616,17 @@ static int bpf_prog_detach(const union bpf_attr *attr) ptype = BPF_PROG_TYPE_CGROUP_DEVICE; break; case BPF_SK_MSG_VERDICT: - return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, false); + return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, NULL); case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: - return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false); + return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL); case BPF_LIRC_MODE2: return lirc_prog_detach(attr); default: return -EINVAL; } - cgrp = cgroup_get_from_fd(attr->target_fd); - if (IS_ERR(cgrp)) - return PTR_ERR(cgrp); - - prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); - if (IS_ERR(prog)) - prog = NULL; - - ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); - if (prog) - bpf_prog_put(prog); - cgroup_put(cgrp); - return ret; + return cgroup_bpf_prog_detach(attr, ptype); } #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt @@ -1681,9 +1634,6 @@ static int bpf_prog_detach(const union bpf_attr *attr) static int bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { - struct cgroup *cgrp; - int ret; - if (!capable(CAP_NET_ADMIN)) return -EPERM; if (CHECK_ATTR(BPF_PROG_QUERY)) @@ -1711,14 +1661,9 @@ static int bpf_prog_query(const union bpf_attr *attr, default: return -EINVAL; } - cgrp = cgroup_get_from_fd(attr->query.target_fd); - if (IS_ERR(cgrp)) - return PTR_ERR(cgrp); - ret = cgroup_bpf_query(cgrp, attr, uattr); - cgroup_put(cgrp); - return ret; + + return cgroup_bpf_prog_query(attr, uattr); } -#endif /* CONFIG_CGROUP_BPF */ #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration @@ -2365,7 +2310,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_OBJ_GET: err = bpf_obj_get(&attr); break; -#ifdef CONFIG_CGROUP_BPF case BPF_PROG_ATTACH: err = bpf_prog_attach(&attr); break; @@ -2375,7 +2319,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_PROG_QUERY: err = bpf_prog_query(&attr, uattr); break; -#endif case BPF_PROG_TEST_RUN: err = bpf_prog_test_run(&attr, uattr); break; From 5fadfc61934a33d362ca47c7f70786105d5681c1 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Mon, 25 Jun 2018 16:56:05 +0200 Subject: [PATCH 051/382] selftests: bpf: add missing NET_SCHED to config CONFIG_NET_SCHED wasn't enabled in arm64's defconfig only for x86. So bpf/test_tunnel.sh tests fails with: RTNETLINK answers: Operation not supported RTNETLINK answers: Operation not supported We have an error talking to the kernel, -1 Enable NET_SCHED and more tests pass. Fixes: 3bce593ac06b ("selftests: bpf: config: add config fragments") Signed-off-by: Anders Roxell Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 7eb613ffef55..b4994a94968b 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -6,6 +6,7 @@ CONFIG_TEST_BPF=m CONFIG_CGROUP_BPF=y CONFIG_NETDEVSIM=m CONFIG_NET_CLS_ACT=y +CONFIG_NET_SCHED=y CONFIG_NET_SCH_INGRESS=y CONFIG_NET_IPIP=y CONFIG_IPV6=y From b0e4b8bcd48bde31690f066044767475b2e4852b Mon Sep 17 00:00:00 2001 From: Jeffrin Jose T Date: Sat, 23 Jun 2018 00:24:17 +0530 Subject: [PATCH 052/382] selftests: bpf: notification about privilege required to run test_lirc_mode2.sh testing script The test_lirc_mode2.sh script require root privilege for the successful execution of the test. This patch is to notify the user about the privilege the script demands for the successful execution of the test. Signed-off-by: Jeffrin Jose T (Rajagiri SET) Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_lirc_mode2.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh index ce2e15e4f976..677686198df3 100755 --- a/tools/testing/selftests/bpf/test_lirc_mode2.sh +++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh @@ -1,6 +1,15 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +msg="skip all tests:" +if [ $UID != 0 ]; then + echo $msg please run this as root >&2 + exit $ksft_skip +fi + GREEN='\033[0;92m' RED='\033[0;31m' NC='\033[0m' # No Color From dd349c3ffd93d15d01f4f5ace73870ca45ea249d Mon Sep 17 00:00:00 2001 From: Jeffrin Jose T Date: Sat, 23 Jun 2018 03:10:32 +0530 Subject: [PATCH 053/382] selftests: bpf: notification about privilege required to run test_lwt_seg6local.sh testing script This test needs root privilege for it's successful execution. This patch is atleast used to notify the user about the privilege the script demands for the smooth execution of the test. Signed-off-by: Jeffrin Jose T (Rajagiri SET) Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_lwt_seg6local.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh index 1c77994b5e71..270fa8f49573 100755 --- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh +++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh @@ -21,6 +21,15 @@ # An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this # datagram can be read on NS6 when binding to fb00::6. +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +msg="skip all tests:" +if [ $UID != 0 ]; then + echo $msg please run this as root >&2 + exit $ksft_skip +fi + TMP_FILE="/tmp/selftest_lwt_seg6local.txt" cleanup() From 2259b7a64d71f27311a19fd7a5bed47413d75985 Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Mon, 25 Jun 2018 15:07:12 +0100 Subject: [PATCH 054/382] net: lan78xx: Allow for VLAN headers in timeout calcs The frame abort timeout being set by lan78xx_set_rx_max_frame_length didn't account for any VLAN headers, resulting in very low throughput if used with tagged VLANs. Use VLAN_ETH_HLEN instead of ETH_HLEN to correct for this. Signed-off-by: Dave Stevenson Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 8dff87ec6d99..1a1d3cc8e308 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2298,7 +2298,7 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu) if ((ll_mtu % dev->maxpacket) == 0) return -EDOM; - ret = lan78xx_set_rx_max_frame_length(dev, new_mtu + ETH_HLEN); + ret = lan78xx_set_rx_max_frame_length(dev, new_mtu + VLAN_ETH_HLEN); netdev->mtu = new_mtu; @@ -2587,7 +2587,8 @@ static int lan78xx_reset(struct lan78xx_net *dev) buf |= FCT_TX_CTL_EN_; ret = lan78xx_write_reg(dev, FCT_TX_CTL, buf); - ret = lan78xx_set_rx_max_frame_length(dev, dev->net->mtu + ETH_HLEN); + ret = lan78xx_set_rx_max_frame_length(dev, + dev->net->mtu + VLAN_ETH_HLEN); ret = lan78xx_read_reg(dev, MAC_RX, &buf); buf |= MAC_RX_RXEN_; From 4a27327b156e1e543c839a9358ba885564729ae7 Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Mon, 25 Jun 2018 15:07:13 +0100 Subject: [PATCH 055/382] net: lan78xx: Add support for VLAN filtering. HW_VLAN_CTAG_FILTER was partially implemented, but not advertised to Linux. Complete the implementation of this. Signed-off-by: Dave Stevenson Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 1a1d3cc8e308..bd41a36924db 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2363,7 +2363,7 @@ static int lan78xx_set_features(struct net_device *netdev, pdata->rfe_ctl &= ~(RFE_CTL_ICMP_COE_ | RFE_CTL_IGMP_COE_); } - if (features & NETIF_F_HW_VLAN_CTAG_RX) + if (features & NETIF_F_HW_VLAN_CTAG_FILTER) pdata->rfe_ctl |= RFE_CTL_VLAN_FILTER_; else pdata->rfe_ctl &= ~RFE_CTL_VLAN_FILTER_; @@ -2976,6 +2976,9 @@ static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf) if (DEFAULT_TSO_CSUM_ENABLE) dev->net->features |= NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_SG; + if (DEFAULT_VLAN_FILTER_ENABLE) + dev->net->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + dev->net->hw_features = dev->net->features; ret = lan78xx_setup_irq_domain(dev); From ec21ecf0aad27956dc64475e5acd78f3575df462 Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Mon, 25 Jun 2018 15:07:14 +0100 Subject: [PATCH 056/382] net: lan78xx: Add support for VLAN tag stripping. The chip supports stripping the VLAN tag and reporting it in metadata. Complete the support for this. Signed-off-by: Dave Stevenson Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index bd41a36924db..28da296f8cf7 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -64,6 +64,7 @@ #define DEFAULT_RX_CSUM_ENABLE (true) #define DEFAULT_TSO_CSUM_ENABLE (true) #define DEFAULT_VLAN_FILTER_ENABLE (true) +#define DEFAULT_VLAN_RX_OFFLOAD (true) #define TX_OVERHEAD (8) #define RXW_PADDING 2 @@ -2363,6 +2364,11 @@ static int lan78xx_set_features(struct net_device *netdev, pdata->rfe_ctl &= ~(RFE_CTL_ICMP_COE_ | RFE_CTL_IGMP_COE_); } + if (features & NETIF_F_HW_VLAN_CTAG_RX) + pdata->rfe_ctl |= RFE_CTL_VLAN_STRIP_; + else + pdata->rfe_ctl &= ~RFE_CTL_VLAN_STRIP_; + if (features & NETIF_F_HW_VLAN_CTAG_FILTER) pdata->rfe_ctl |= RFE_CTL_VLAN_FILTER_; else @@ -2976,6 +2982,9 @@ static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf) if (DEFAULT_TSO_CSUM_ENABLE) dev->net->features |= NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_SG; + if (DEFAULT_VLAN_RX_OFFLOAD) + dev->net->features |= NETIF_F_HW_VLAN_CTAG_RX; + if (DEFAULT_VLAN_FILTER_ENABLE) dev->net->features |= NETIF_F_HW_VLAN_CTAG_FILTER; @@ -3052,6 +3061,16 @@ static void lan78xx_rx_csum_offload(struct lan78xx_net *dev, } } +static void lan78xx_rx_vlan_offload(struct lan78xx_net *dev, + struct sk_buff *skb, + u32 rx_cmd_a, u32 rx_cmd_b) +{ + if ((dev->net->features & NETIF_F_HW_VLAN_CTAG_RX) && + (rx_cmd_a & RX_CMD_A_FVTG_)) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + (rx_cmd_b & 0xffff)); +} + static void lan78xx_skb_return(struct lan78xx_net *dev, struct sk_buff *skb) { int status; @@ -3116,6 +3135,8 @@ static int lan78xx_rx(struct lan78xx_net *dev, struct sk_buff *skb) if (skb->len == size) { lan78xx_rx_csum_offload(dev, skb, rx_cmd_a, rx_cmd_b); + lan78xx_rx_vlan_offload(dev, skb, + rx_cmd_a, rx_cmd_b); skb_trim(skb, skb->len - 4); /* remove fcs */ skb->truesize = size + sizeof(struct sk_buff); @@ -3134,6 +3155,7 @@ static int lan78xx_rx(struct lan78xx_net *dev, struct sk_buff *skb) skb_set_tail_pointer(skb2, size); lan78xx_rx_csum_offload(dev, skb2, rx_cmd_a, rx_cmd_b); + lan78xx_rx_vlan_offload(dev, skb2, rx_cmd_a, rx_cmd_b); skb_trim(skb2, skb2->len - 4); /* remove fcs */ skb2->truesize = size + sizeof(struct sk_buff); From 9343ac87f2a4e09bf6e27b5f31e72e9e3a82abff Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Mon, 25 Jun 2018 15:07:15 +0100 Subject: [PATCH 057/382] net: lan78xx: Use s/w csum check on VLANs without tag stripping Observations of VLANs dropping packets due to invalid checksums when not offloading VLAN tag receive. With VLAN tag stripping enabled no issue is observed. Drop back to s/w checksums if VLAN offload is disabled. Signed-off-by: Dave Stevenson Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 28da296f8cf7..2e4130746c40 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -3052,8 +3052,13 @@ static void lan78xx_rx_csum_offload(struct lan78xx_net *dev, struct sk_buff *skb, u32 rx_cmd_a, u32 rx_cmd_b) { + /* HW Checksum offload appears to be flawed if used when not stripping + * VLAN headers. Drop back to S/W checksums under these conditions. + */ if (!(dev->net->features & NETIF_F_RXCSUM) || - unlikely(rx_cmd_a & RX_CMD_A_ICSM_)) { + unlikely(rx_cmd_a & RX_CMD_A_ICSM_) || + ((rx_cmd_a & RX_CMD_A_FVTG_) && + !(dev->net->features & NETIF_F_HW_VLAN_CTAG_RX))) { skb->ip_summed = CHECKSUM_NONE; } else { skb->csum = ntohs((u16)(rx_cmd_b >> RX_CMD_B_CSUM_SHIFT_)); From dffd22aed2aa1e804bccf19b30a421e89ee2ae61 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 20 Jun 2018 18:33:45 +0200 Subject: [PATCH 058/382] netfilter: nf_log: fix uninit read in nf_log_proc_dostring When proc_dostring() is called with a non-zero offset in strict mode, it doesn't just write to the ->data buffer, it also reads. Make sure it doesn't read uninitialized data. Fixes: c6ac37d8d884 ("netfilter: nf_log: fix error on write NONE to [...]") Signed-off-by: Jann Horn Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_log.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 426457047578..2c47f9ec3511 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -424,6 +424,10 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write, if (write) { struct ctl_table tmp = *table; + /* proc_dostring() can append to existing strings, so we need to + * initialize it as an empty string. + */ + buf[0] = '\0'; tmp.data = buf; r = proc_dostring(&tmp, write, buffer, lenp, ppos); if (r) From ce00bf07cc95a57cd20b208e02b3c2604e532ae8 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 25 Jun 2018 17:22:00 +0200 Subject: [PATCH 059/382] netfilter: nf_log: don't hold nf_log_mutex during user access The old code would indefinitely block other users of nf_log_mutex if a userspace access in proc_dostring() blocked e.g. due to a userfaultfd region. Fix it by moving proc_dostring() out of the locked region. This is a followup to commit 266d07cb1c9a ("netfilter: nf_log: fix sleeping function called from invalid context"), which changed this code from using rcu_read_lock() to taking nf_log_mutex. Fixes: 266d07cb1c9a ("netfilter: nf_log: fix sleeping function calle[...]") Signed-off-by: Jann Horn Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_log.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 2c47f9ec3511..a61d6df6e5f6 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -446,14 +446,17 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write, rcu_assign_pointer(net->nf.nf_loggers[tindex], logger); mutex_unlock(&nf_log_mutex); } else { + struct ctl_table tmp = *table; + + tmp.data = buf; mutex_lock(&nf_log_mutex); logger = nft_log_dereference(net->nf.nf_loggers[tindex]); if (!logger) - table->data = "NONE"; + strlcpy(buf, "NONE", sizeof(buf)); else - table->data = logger->name; - r = proc_dostring(table, write, buffer, lenp, ppos); + strlcpy(buf, logger->name, sizeof(buf)); mutex_unlock(&nf_log_mutex); + r = proc_dostring(&tmp, write, buffer, lenp, ppos); } return r; From 59b433c825569ce251371485f0e29fca888b549d Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Fri, 22 Jun 2018 06:55:47 -0700 Subject: [PATCH 060/382] scsi: aacraid: Fix PD performance regression over incorrect qd being set The driver fails to set the correct queue depth for native devices, due to failing to set the device type prior to calling aac_set_safw_target_qd(). This results in slave configure setting the queue depth to 1. This causes around 30% performance degradation. Fixed by setting the dev type before trying to set queue depth. Reported-by: Steve Best Fixes: 0bcb45fb20c21 ("scsi: aacraid: Add helper function to set queue depth") cc: stable@vger.kernel.org Signed-off-by: Raghava Aditya Renukunta Reviewed-by: David Carroll Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/aachba.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index a9831bd37a73..a57f3a7d4748 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -1974,7 +1974,6 @@ static void aac_set_safw_attr_all_targets(struct aac_dev *dev) u32 lun_count, nexus; u32 i, bus, target; u8 expose_flag, attribs; - u8 devtype; lun_count = aac_get_safw_phys_lun_count(dev); @@ -1992,23 +1991,23 @@ static void aac_set_safw_attr_all_targets(struct aac_dev *dev) continue; if (expose_flag != 0) { - devtype = AAC_DEVTYPE_RAID_MEMBER; - goto update_devtype; + dev->hba_map[bus][target].devtype = + AAC_DEVTYPE_RAID_MEMBER; + continue; } if (nexus != 0 && (attribs & 8)) { - devtype = AAC_DEVTYPE_NATIVE_RAW; + dev->hba_map[bus][target].devtype = + AAC_DEVTYPE_NATIVE_RAW; dev->hba_map[bus][target].rmw_nexus = nexus; } else - devtype = AAC_DEVTYPE_ARC_RAW; + dev->hba_map[bus][target].devtype = + AAC_DEVTYPE_ARC_RAW; dev->hba_map[bus][target].scan_counter = dev->scan_counter; aac_set_safw_target_qd(dev, bus, target); - -update_devtype: - dev->hba_map[bus][target].devtype = devtype; } } From b36e4523d4d56e2595e28f16f6ccf1cd6a9fc452 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 20 Jun 2018 23:32:26 +0200 Subject: [PATCH 061/382] netfilter: nf_conncount: fix garbage collection confirm race Yi-Hung Wei and Justin Pettit found a race in the garbage collection scheme used by nf_conncount. When doing list walk, we lookup the tuple in the conntrack table. If the lookup fails we remove this tuple from our list because the conntrack entry is gone. This is the common cause, but turns out its not the only one. The list entry could have been created just before by another cpu, i.e. the conntrack entry might not yet have been inserted into the global hash. The avoid this, we introduce a timestamp and the owning cpu. If the entry appears to be stale, evict only if: 1. The current cpu is the one that added the entry, or, 2. The timestamp is older than two jiffies The second constraint allows GC to be taken over by other cpu too (e.g. because a cpu was offlined or napi got moved to another cpu). We can't pretend the 'doubtful' entry wasn't in our list. Instead, when we don't find an entry indicate via IS_ERR that entry was removed ('did not exist' or withheld ('might-be-unconfirmed'). This most likely also fixes a xt_connlimit imbalance earlier reported by Dmitry Andrianov. Cc: Dmitry Andrianov Reported-by: Justin Pettit Reported-by: Yi-Hung Wei Signed-off-by: Florian Westphal Acked-by: Yi-Hung Wei Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conncount.c | 52 ++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index d8383609fe28..510039862aa9 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -47,6 +47,8 @@ struct nf_conncount_tuple { struct hlist_node node; struct nf_conntrack_tuple tuple; struct nf_conntrack_zone zone; + int cpu; + u32 jiffies32; }; struct nf_conncount_rb { @@ -91,11 +93,42 @@ bool nf_conncount_add(struct hlist_head *head, return false; conn->tuple = *tuple; conn->zone = *zone; + conn->cpu = raw_smp_processor_id(); + conn->jiffies32 = (u32)jiffies; hlist_add_head(&conn->node, head); return true; } EXPORT_SYMBOL_GPL(nf_conncount_add); +static const struct nf_conntrack_tuple_hash * +find_or_evict(struct net *net, struct nf_conncount_tuple *conn) +{ + const struct nf_conntrack_tuple_hash *found; + unsigned long a, b; + int cpu = raw_smp_processor_id(); + __s32 age; + + found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple); + if (found) + return found; + b = conn->jiffies32; + a = (u32)jiffies; + + /* conn might have been added just before by another cpu and + * might still be unconfirmed. In this case, nf_conntrack_find() + * returns no result. Thus only evict if this cpu added the + * stale entry or if the entry is older than two jiffies. + */ + age = a - b; + if (conn->cpu == cpu || age >= 2) { + hlist_del(&conn->node); + kmem_cache_free(conncount_conn_cachep, conn); + return ERR_PTR(-ENOENT); + } + + return ERR_PTR(-EAGAIN); +} + unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone, @@ -103,18 +136,27 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, { const struct nf_conntrack_tuple_hash *found; struct nf_conncount_tuple *conn; - struct hlist_node *n; struct nf_conn *found_ct; + struct hlist_node *n; unsigned int length = 0; *addit = tuple ? true : false; /* check the saved connections */ hlist_for_each_entry_safe(conn, n, head, node) { - found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple); - if (found == NULL) { - hlist_del(&conn->node); - kmem_cache_free(conncount_conn_cachep, conn); + found = find_or_evict(net, conn); + if (IS_ERR(found)) { + /* Not found, but might be about to be confirmed */ + if (PTR_ERR(found) == -EAGAIN) { + length++; + if (!tuple) + continue; + + if (nf_ct_tuple_equal(&conn->tuple, tuple) && + nf_ct_zone_id(&conn->zone, conn->zone.dir) == + nf_ct_zone_id(zone, zone->dir)) + *addit = false; + } continue; } From 26b5b874aff5659a7e26e5b1997e3df2c41fa7fd Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 25 Jun 2018 16:25:44 +0200 Subject: [PATCH 062/382] scsi: sg: mitigate read/write abuse As Al Viro noted in commit 128394eff343 ("sg_write()/bsg_write() is not fit to be called under KERNEL_DS"), sg improperly accesses userspace memory outside the provided buffer, permitting kernel memory corruption via splice(). But it doesn't just do it on ->write(), also on ->read(). As a band-aid, make sure that the ->read() and ->write() handlers can not be called in weird contexts (kernel context or credentials different from file opener), like for ib_safe_file_access(). If someone needs to use these interfaces from different security contexts, a new interface should be written that goes through the ->ioctl() handler. I've mostly copypasted ib_safe_file_access() over as sg_safe_file_access() because I couldn't find a good common header - please tell me if you know a better way. [mkp: s/_safe_/_check_/] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: Signed-off-by: Jann Horn Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/sg.c | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 53ae52dbff84..cd2fdac000c9 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -51,6 +51,7 @@ static int sg_version_num = 30536; /* 2 digits for each component */ #include #include #include +#include /* for sg_check_file_access() */ #include "scsi.h" #include @@ -209,6 +210,33 @@ static void sg_device_destroy(struct kref *kref); sdev_prefix_printk(prefix, (sdp)->device, \ (sdp)->disk->disk_name, fmt, ##a) +/* + * The SCSI interfaces that use read() and write() as an asynchronous variant of + * ioctl(..., SG_IO, ...) are fundamentally unsafe, since there are lots of ways + * to trigger read() and write() calls from various contexts with elevated + * privileges. This can lead to kernel memory corruption (e.g. if these + * interfaces are called through splice()) and privilege escalation inside + * userspace (e.g. if a process with access to such a device passes a file + * descriptor to a SUID binary as stdin/stdout/stderr). + * + * This function provides protection for the legacy API by restricting the + * calling context. + */ +static int sg_check_file_access(struct file *filp, const char *caller) +{ + if (filp->f_cred != current_real_cred()) { + pr_err_once("%s: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", + caller, task_tgid_vnr(current), current->comm); + return -EPERM; + } + if (uaccess_kernel()) { + pr_err_once("%s: process %d (%s) called from kernel context, this is not allowed.\n", + caller, task_tgid_vnr(current), current->comm); + return -EACCES; + } + return 0; +} + static int sg_allow_access(struct file *filp, unsigned char *cmd) { struct sg_fd *sfp = filp->private_data; @@ -393,6 +421,14 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos) struct sg_header *old_hdr = NULL; int retval = 0; + /* + * This could cause a response to be stranded. Close the associated + * file descriptor to free up any resources being held. + */ + retval = sg_check_file_access(filp, __func__); + if (retval) + return retval; + if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, @@ -580,9 +616,11 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos) struct sg_header old_hdr; sg_io_hdr_t *hp; unsigned char cmnd[SG_MAX_CDB_SIZE]; + int retval; - if (unlikely(uaccess_kernel())) - return -EINVAL; + retval = sg_check_file_access(filp, __func__); + if (retval) + return retval; if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; From b697d7d8c741f27b728a878fc55852b06d0f6f5e Mon Sep 17 00:00:00 2001 From: "Michael J. Ruhl" Date: Wed, 20 Jun 2018 09:29:08 -0700 Subject: [PATCH 063/382] IB/hfi1: Fix incorrect mixing of ERR_PTR and NULL return values The __get_txreq() function can return a pointer, ERR_PTR(-EBUSY), or NULL. All of the relevant call sites look for IS_ERR, so the NULL return would lead to a NULL pointer exception. Do not use the ERR_PTR mechanism for this function. Update all call sites to handle the return value correctly. Clean up error paths to reflect return value. Fixes: 45842abbb292 ("staging/rdma/hfi1: move txreq header code") Cc: # 4.9.x+ Reported-by: Dan Carpenter Reviewed-by: Mike Marciniszyn Reviewed-by: Kamenee Arumugam Signed-off-by: Michael J. Ruhl Signed-off-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/rc.c | 2 +- drivers/infiniband/hw/hfi1/uc.c | 4 ++-- drivers/infiniband/hw/hfi1/ud.c | 4 ++-- drivers/infiniband/hw/hfi1/verbs_txreq.c | 4 ++-- drivers/infiniband/hw/hfi1/verbs_txreq.h | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 1a1a47ac53c6..f15c93102081 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -271,7 +271,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) lockdep_assert_held(&qp->s_lock); ps->s_txreq = get_txreq(ps->dev, qp); - if (IS_ERR(ps->s_txreq)) + if (!ps->s_txreq) goto bail_no_tx; if (priv->hdr_type == HFI1_PKT_TYPE_9B) { diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index b7b671017e59..e254dcec6f64 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -72,7 +72,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) int middle = 0; ps->s_txreq = get_txreq(ps->dev, qp); - if (IS_ERR(ps->s_txreq)) + if (!ps->s_txreq) goto bail_no_tx; if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 1ab332f1866e..70d39fc450a1 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -503,7 +503,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) u32 lid; ps->s_txreq = get_txreq(ps->dev, qp); - if (IS_ERR(ps->s_txreq)) + if (!ps->s_txreq) goto bail_no_tx; if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) { diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index 873e48ea923f..c4ab2d5b4502 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 - 2017 Intel Corporation. + * Copyright(c) 2016 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -94,7 +94,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, struct rvt_qp *qp) __must_hold(&qp->s_lock) { - struct verbs_txreq *tx = ERR_PTR(-EBUSY); + struct verbs_txreq *tx = NULL; write_seqlock(&dev->txwait_lock); if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index 729244c3086c..1c19bbc764b2 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -83,7 +83,7 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, if (unlikely(!tx)) { /* call slow path to get the lock */ tx = __get_txreq(dev, qp); - if (IS_ERR(tx)) + if (!tx) return tx; } tx->qp = qp; From 8ffd569aaa818f2624ca821d9a246342fa8b8c50 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 4 Jun 2018 19:46:53 +0300 Subject: [PATCH 064/382] net/mlx5e: Don't attempt to dereference the ppriv struct if not being eswitch manager The check for cpu hit statistics was not returning immediate false for any non vport rep netdev and hence we crashed (say on mlx5 probed VFs) if user-space tool was calling into any possible netdev in the system. Fix that by doing a proper check before dereferencing. Fixes: 1d447a39142e ('net/mlx5e: Extendable vport representor netdev private data') Signed-off-by: Or Gerlitz Reported-by: Eli Cohen Reviewed-by: Eli Cohen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 57987f6546e8..378ad74518ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -837,8 +837,12 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) { struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_eswitch_rep *rep; + if (!MLX5_CAP_GEN(priv->mdev, eswitch_flow_table)) + return false; + + rep = rpriv->rep; if (rep && rep->vport != FDB_UPLINK_VPORT) return true; From 0efc8562491b7d36f6bbc4fbc8f3348cb6641e9c Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 31 May 2018 11:16:18 +0300 Subject: [PATCH 065/382] net/mlx5: E-Switch, Avoid setup attempt if not being e-switch manager In smartnic env, the host (PF) driver might not be an e-switch manager, hence the FW will err on driver attempts to deal with setting/unsetting the eswitch and as a result the overall setup of sriov will fail. Fix that by avoiding the operation if e-switch management is not allowed for this driver instance. While here, move to use the correct name for the esw manager capability name. Fixes: 81848731ff40 ('net/mlx5: E-Switch, Add SR-IOV (FDB) support') Signed-off-by: Or Gerlitz Reported-by: Guy Kushnir Reviewed-by: Eli Cohen Tested-by: Eli Cohen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 5 +++-- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 7 ++++++- include/linux/mlx5/eswitch.h | 2 ++ include/linux/mlx5/mlx5_ifc.h | 2 +- 7 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 378ad74518ec..60236f73373c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -839,7 +839,7 @@ static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep; - if (!MLX5_CAP_GEN(priv->mdev, eswitch_flow_table)) + if (!MLX5_ESWITCH_MANAGER(priv->mdev)) return false; rep = rpriv->rep; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index f63dfbcd29fe..103fd6a0cc65 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1604,7 +1604,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) if (!ESW_ALLOWED(esw)) return 0; - if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) || + if (!MLX5_ESWITCH_MANAGER(esw->dev) || !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 49a75d31185e..f1a86cea86a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -32,6 +32,7 @@ #include #include +#include #include "mlx5_core.h" #include "fs_core.h" @@ -2652,7 +2653,7 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) goto err; } - if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { + if (MLX5_ESWITCH_MANAGER(dev)) { if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) { err = init_fdb_root_ns(steering); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index afd9f4fa22f4..41ad24f0de2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -32,6 +32,7 @@ #include #include +#include #include #include "mlx5_core.h" #include "../../mlxfw/mlxfw.h" @@ -159,13 +160,13 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) } if (MLX5_CAP_GEN(dev, vport_group_manager) && - MLX5_CAP_GEN(dev, eswitch_flow_table)) { + MLX5_ESWITCH_MANAGER(dev)) { err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE); if (err) return err; } - if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { + if (MLX5_ESWITCH_MANAGER(dev)) { err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 2a8b529ce6dd..a0674962f02c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -88,6 +88,9 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) return -EBUSY; } + if (!MLX5_ESWITCH_MANAGER(dev)) + goto enable_vfs_hca; + err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); if (err) { mlx5_core_warn(dev, @@ -95,6 +98,7 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) return err; } +enable_vfs_hca: for (vf = 0; vf < num_vfs; vf++) { err = mlx5_core_enable_hca(dev, vf + 1); if (err) { @@ -140,7 +144,8 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) } out: - mlx5_eswitch_disable_sriov(dev->priv.eswitch); + if (MLX5_ESWITCH_MANAGER(dev)) + mlx5_eswitch_disable_sriov(dev->priv.eswitch); if (mlx5_wait_for_vf_pages(dev)) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index d3c9db492b30..fab5121ffb8f 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -8,6 +8,8 @@ #include +#define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager) + enum { SRIOV_NONE, SRIOV_LEGACY, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 27134c4fcb76..ac281f5ec9b8 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -922,7 +922,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 vnic_env_queue_counters[0x1]; u8 ets[0x1]; u8 nic_flow_table[0x1]; - u8 eswitch_flow_table[0x1]; + u8 eswitch_manager[0x1]; u8 device_memory[0x1]; u8 mcam_reg[0x1]; u8 pcam_reg[0x1]; From 733d3e5497070d05971352ca5087bac83c197c3d Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 31 May 2018 11:32:56 +0300 Subject: [PATCH 066/382] net/mlx5e: Avoid dealing with vport representors if not being e-switch manager In smartnic env, the host (PF) driver might not be an e-switch manager, hence the switchdev mode representors are running on the embedded cpu (EC) and not at the host. As such, we should avoid dealing with vport representors if not being esw manager. While here, make sure to disallow eswitch switchdev related setups through devlink if we are not esw managers. Fixes: cb67b832921c ('net/mlx5e: Introduce SRIOV VF representors') Signed-off-by: Or Gerlitz Reviewed-by: Eli Cohen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 12 ++++++------ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 56c1b6f5593e..dae4156a710d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2846,7 +2846,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_activate_channels(&priv->channels); netif_tx_start_all_queues(priv->netdev); - if (MLX5_VPORT_MANAGER(priv->mdev)) + if (MLX5_ESWITCH_MANAGER(priv->mdev)) mlx5e_add_sqs_fwd_rules(priv); mlx5e_wait_channels_min_rx_wqes(&priv->channels); @@ -2857,7 +2857,7 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { mlx5e_redirect_rqts_to_drop(priv); - if (MLX5_VPORT_MANAGER(priv->mdev)) + if (MLX5_ESWITCH_MANAGER(priv->mdev)) mlx5e_remove_sqs_fwd_rules(priv); /* FIXME: This is a W/A only for tx timeout watch dog false alarm when @@ -4597,7 +4597,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) mlx5e_set_netdev_dev_addr(netdev); #if IS_ENABLED(CONFIG_MLX5_ESWITCH) - if (MLX5_VPORT_MANAGER(mdev)) + if (MLX5_ESWITCH_MANAGER(mdev)) netdev->switchdev_ops = &mlx5e_switchdev_ops; #endif @@ -4753,7 +4753,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5e_enable_async_events(priv); - if (MLX5_VPORT_MANAGER(priv->mdev)) + if (MLX5_ESWITCH_MANAGER(priv->mdev)) mlx5e_register_vport_reps(priv); if (netdev->reg_state != NETREG_REGISTERED) @@ -4788,7 +4788,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->set_rx_mode_work); - if (MLX5_VPORT_MANAGER(priv->mdev)) + if (MLX5_ESWITCH_MANAGER(priv->mdev)) mlx5e_unregister_vport_reps(priv); mlx5e_disable_async_events(priv); @@ -4972,7 +4972,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) return NULL; #ifdef CONFIG_MLX5_ESWITCH - if (MLX5_VPORT_MANAGER(mdev)) { + if (MLX5_ESWITCH_MANAGER(mdev)) { rpriv = mlx5e_alloc_nic_rep_priv(mdev); if (!rpriv) { mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 60236f73373c..2b8040a3cdbd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -823,7 +823,7 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep; - if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) + if (!MLX5_ESWITCH_MANAGER(priv->mdev)) return false; rep = rpriv->rep; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index cecd201f0b73..91f1209886ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1079,8 +1079,8 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink) if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return -EOPNOTSUPP; - if (!MLX5_CAP_GEN(dev, vport_group_manager)) - return -EOPNOTSUPP; + if(!MLX5_ESWITCH_MANAGER(dev)) + return -EPERM; if (dev->priv.eswitch->mode == SRIOV_NONE) return -EOPNOTSUPP; From aff2252a2ad3844ca47bf2f18af071101baace40 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 31 May 2018 11:40:17 +0300 Subject: [PATCH 067/382] IB/mlx5: Avoid dealing with vport representors if not being e-switch manager In smartnic env, the host (PF) driver might not be an e-switch manager, hence the switchdev mode representors are running on the embedded cpu (EC) and not at the host. As such, we should avoid dealing with vport representors if not being esw manager. Fixes: b5ca15ad7e61 ('IB/mlx5: Add proper representors support') Signed-off-by: Or Gerlitz Reviewed-by: Eli Cohen Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e52dd21519b4..634be96dcb86 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6107,7 +6107,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports), MLX5_CAP_GEN(mdev, num_vhca_ports)); - if (MLX5_VPORT_MANAGER(mdev) && + if (MLX5_ESWITCH_MANAGER(mdev) && mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0); From a8d70a054a718b63058b3d3ac58b6181815e4289 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 31 May 2018 12:24:48 +0300 Subject: [PATCH 068/382] net/mlx5: E-Switch, Disallow vlan/spoofcheck setup if not being esw manager In smartnic env, if the host (PF) driver is not an e-switch manager, we are not allowed to apply eswitch ports setups such as vlan (VST), spoof-checks, min/max rate or state. Make sure we are eswitch manager when coming to issue these callbacks and err otherwise. Also fix the definition of ESW_ALLOWED to rely on eswitch_manager capability and on the vport_group_manger. Operations on the VF nic vport context, such as setting a mac or reading the vport counters are allowed to the PF in this scheme. The modify nic vport guid code was modified to omit checking the nic_vport_node_guid_modify eswitch capability. The reason for doing so is that modifying node guid requires vport group manager capability, and there's no need to check further capabilities. 1. set_vf_vlan - disallowed 2. set_vf_spoofchk - disallowed 3. set_vf_mac - allowed 4. get_vf_config - allowed 5. set_vf_trust - disallowed 6. set_vf_rate - disallowed 7. get_vf_stat - allowed 8. set_vf_link_state - disallowed Fixes: f942380c1239 ('net/mlx5: E-Switch, Vport ingress/egress ACLs rules for spoofchk') Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Tested-by: Or Gerlitz --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 12 +++++------- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 2 -- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 103fd6a0cc65..b79d74860a30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1594,17 +1594,15 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) } /* Public E-Switch API */ -#define ESW_ALLOWED(esw) ((esw) && MLX5_VPORT_MANAGER((esw)->dev)) +#define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) + int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { int err; int i, enabled_events; - if (!ESW_ALLOWED(esw)) - return 0; - - if (!MLX5_ESWITCH_MANAGER(esw->dev) || + if (!ESW_ALLOWED(esw) || !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); return -EOPNOTSUPP; @@ -1806,7 +1804,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, u64 node_guid; int err = 0; - if (!ESW_ALLOWED(esw)) + if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) return -EPERM; if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac)) return -EINVAL; @@ -1883,7 +1881,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, { struct mlx5_vport *evport; - if (!ESW_ALLOWED(esw)) + if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) return -EPERM; if (!LEGAL_VPORT(esw, vport)) return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 719cecb182c6..7eecd5b07bb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -549,8 +549,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, return -EINVAL; if (!MLX5_CAP_GEN(mdev, vport_group_manager)) return -EACCES; - if (!MLX5_CAP_ESW(mdev, nic_vport_node_guid_modify)) - return -EOPNOTSUPP; in = kvzalloc(inlen, GFP_KERNEL); if (!in) From f811980444ec59ad62f9e041adbb576a821132c7 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 13 Jun 2018 10:27:34 +0300 Subject: [PATCH 069/382] net/mlx5: Fix required capability for manipulating MPFS Manipulating of the MPFS requires eswitch manager capabilities. Fixes: eeb66cdb6826 ('net/mlx5: Separate between E-Switch and MPFS') Signed-off-by: Eli Cohen Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index 7cb67122e8b5..98359559c77e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "mlx5_core.h" #include "lib/mpfs.h" @@ -98,7 +99,7 @@ int mlx5_mpfs_init(struct mlx5_core_dev *dev) int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); struct mlx5_mpfs *mpfs; - if (!MLX5_VPORT_MANAGER(dev)) + if (!MLX5_ESWITCH_MANAGER(dev)) return 0; mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL); @@ -122,7 +123,7 @@ void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) { struct mlx5_mpfs *mpfs = dev->priv.mpfs; - if (!MLX5_VPORT_MANAGER(dev)) + if (!MLX5_ESWITCH_MANAGER(dev)) return; WARN_ON(!hlist_empty(mpfs->hash)); @@ -137,7 +138,7 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) u32 index; int err; - if (!MLX5_VPORT_MANAGER(dev)) + if (!MLX5_ESWITCH_MANAGER(dev)) return 0; mutex_lock(&mpfs->lock); @@ -179,7 +180,7 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) int err = 0; u32 index; - if (!MLX5_VPORT_MANAGER(dev)) + if (!MLX5_ESWITCH_MANAGER(dev)) return 0; mutex_lock(&mpfs->lock); From d14fcb8d877caf1b8d6bd65d444bf62b21f2070c Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Tue, 22 May 2018 14:14:02 +0300 Subject: [PATCH 070/382] net/mlx5: Fix wrong size allocation for QoS ETC TC regitster The driver allocates wrong size (due to wrong struct name) when issuing a query/set request to NIC's register. Fixes: d8880795dabf ("net/mlx5e: Implement DCBNL IEEE max rate") Signed-off-by: Shay Agroskin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/port.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index fa9d0760dd36..31a9cbd85689 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -701,7 +701,7 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_prio_tc); static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in, int inlen) { - u32 out[MLX5_ST_SZ_DW(qtct_reg)]; + u32 out[MLX5_ST_SZ_DW(qetc_reg)]; if (!MLX5_CAP_GEN(mdev, ets)) return -EOPNOTSUPP; @@ -713,7 +713,7 @@ static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in, static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out, int outlen) { - u32 in[MLX5_ST_SZ_DW(qtct_reg)]; + u32 in[MLX5_ST_SZ_DW(qetc_reg)]; if (!MLX5_CAP_GEN(mdev, ets)) return -EOPNOTSUPP; From 603b7bcff824740500ddfa001d7a7168b0b38542 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Fri, 25 May 2018 20:25:59 +0300 Subject: [PATCH 071/382] net/mlx5: Fix incorrect raw command length parsing The NULL character was not set correctly for the string containing the command length, this caused failures reading the output of the command due to a random length. The fix is to initialize the output length string. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 487388aed98f..2db5fe571594 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1276,7 +1276,7 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf, { struct mlx5_core_dev *dev = filp->private_data; struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; - char outlen_str[8]; + char outlen_str[8] = {0}; int outlen; void *ptr; int err; @@ -1291,8 +1291,6 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf, if (copy_from_user(outlen_str, buf, count)) return -EFAULT; - outlen_str[7] = 0; - err = sscanf(outlen_str, "%d", &outlen); if (err < 0) return err; From d412c31dae053bf30a1bc15582a9990df297a660 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Tue, 12 Jun 2018 16:14:31 +0300 Subject: [PATCH 072/382] net/mlx5: Fix command interface race in polling mode The command interface can work in two modes: Events and Polling. In the general case, each time we invoke a command, a work is queued to handle it. When working in events, the interrupt handler completes the command execution. On the other hand, when working in polling mode, the work itself completes it. Due to a bug in the work handler, a command could have been completed by the interrupt handler, while the work handler hasn't finished yet, causing the it to complete once again if the command interface mode was changed from Events to polling after the interrupt handler was called. mlx5_unload_one() mlx5_stop_eqs() // Destroy the EQ before cmd EQ ...cmd_work_handler() write_doorbell() --> EVENT_TYPE_CMD mlx5_cmd_comp_handler() // First free free_ent(cmd, ent->idx) complete(&ent->done) <-- mlx5_stop_eqs //cmd was complete // move to polling before destroying the last cmd EQ mlx5_cmd_use_polling() cmd->mode = POLL; --> cmd_work_handler (continues) if (cmd->mode == POLL) mlx5_cmd_comp_handler() // Double free The solution is to store the cmd->mode before writing the doorbell. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 2db5fe571594..384c1fa49081 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -807,6 +807,7 @@ static void cmd_work_handler(struct work_struct *work) unsigned long flags; bool poll_cmd = ent->polling; int alloc_ret; + int cmd_mode; sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); @@ -853,6 +854,7 @@ static void cmd_work_handler(struct work_struct *work) set_signature(ent, !cmd->checksum_disabled); dump_command(dev, ent, 1); ent->ts1 = ktime_get_ns(); + cmd_mode = cmd->mode; if (ent->callback) schedule_delayed_work(&ent->cb_timeout_work, cb_timeout); @@ -877,7 +879,7 @@ static void cmd_work_handler(struct work_struct *work) iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell); mmiowb(); /* if not in polling don't use ent after this point */ - if (cmd->mode == CMD_MODE_POLLING || poll_cmd) { + if (cmd_mode == CMD_MODE_POLLING || poll_cmd) { poll_timeout(ent); /* make sure we read the descriptor after ownership is SW */ rmb(); From c809195f5523dd4d09403bbb1c9732d548aa0d1e Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Mon, 25 Jun 2018 06:41:25 -0700 Subject: [PATCH 073/382] rds: clean up loopback rds_connections on netns deletion The RDS core module creates rds_connections based on callbacks from rds_loop_transport when sending/receiving packets to local addresses. These connections will need to be cleaned up when they are created from a netns that is not init_net, and that netns is deleted. Add the changes aligned with the changes from commit ebeeb1ad9b8a ("rds: tcp: use rds_destroy_pending() to synchronize netns/module teardown and rds connection/workq management") for rds_loop_transport Reported-and-tested-by: syzbot+4c20b3866171ce8441d2@syzkaller.appspotmail.com Acked-by: Santosh Shilimkar Signed-off-by: Sowmini Varadhan Signed-off-by: David S. Miller --- net/rds/connection.c | 11 ++++++++- net/rds/loop.c | 56 ++++++++++++++++++++++++++++++++++++++++++++ net/rds/loop.h | 2 ++ 3 files changed, 68 insertions(+), 1 deletion(-) diff --git a/net/rds/connection.c b/net/rds/connection.c index abef75da89a7..cfb05953b0e5 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -659,11 +659,19 @@ static void rds_conn_info(struct socket *sock, unsigned int len, int rds_conn_init(void) { + int ret; + + ret = rds_loop_net_init(); /* register pernet callback */ + if (ret) + return ret; + rds_conn_slab = kmem_cache_create("rds_connection", sizeof(struct rds_connection), 0, 0, NULL); - if (!rds_conn_slab) + if (!rds_conn_slab) { + rds_loop_net_exit(); return -ENOMEM; + } rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info); rds_info_register_func(RDS_INFO_SEND_MESSAGES, @@ -676,6 +684,7 @@ int rds_conn_init(void) void rds_conn_exit(void) { + rds_loop_net_exit(); /* unregister pernet callback */ rds_loop_exit(); WARN_ON(!hlist_empty(rds_conn_hash)); diff --git a/net/rds/loop.c b/net/rds/loop.c index dac6218a460e..feea1f96ee2a 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -33,6 +33,8 @@ #include #include #include +#include +#include #include "rds_single_path.h" #include "rds.h" @@ -40,6 +42,17 @@ static DEFINE_SPINLOCK(loop_conns_lock); static LIST_HEAD(loop_conns); +static atomic_t rds_loop_unloading = ATOMIC_INIT(0); + +static void rds_loop_set_unloading(void) +{ + atomic_set(&rds_loop_unloading, 1); +} + +static bool rds_loop_is_unloading(struct rds_connection *conn) +{ + return atomic_read(&rds_loop_unloading) != 0; +} /* * This 'loopback' transport is a special case for flows that originate @@ -165,6 +178,8 @@ void rds_loop_exit(void) struct rds_loop_connection *lc, *_lc; LIST_HEAD(tmp_list); + rds_loop_set_unloading(); + synchronize_rcu(); /* avoid calling conn_destroy with irqs off */ spin_lock_irq(&loop_conns_lock); list_splice(&loop_conns, &tmp_list); @@ -177,6 +192,46 @@ void rds_loop_exit(void) } } +static void rds_loop_kill_conns(struct net *net) +{ + struct rds_loop_connection *lc, *_lc; + LIST_HEAD(tmp_list); + + spin_lock_irq(&loop_conns_lock); + list_for_each_entry_safe(lc, _lc, &loop_conns, loop_node) { + struct net *c_net = read_pnet(&lc->conn->c_net); + + if (net != c_net) + continue; + list_move_tail(&lc->loop_node, &tmp_list); + } + spin_unlock_irq(&loop_conns_lock); + + list_for_each_entry_safe(lc, _lc, &tmp_list, loop_node) { + WARN_ON(lc->conn->c_passive); + rds_conn_destroy(lc->conn); + } +} + +static void __net_exit rds_loop_exit_net(struct net *net) +{ + rds_loop_kill_conns(net); +} + +static struct pernet_operations rds_loop_net_ops = { + .exit = rds_loop_exit_net, +}; + +int rds_loop_net_init(void) +{ + return register_pernet_device(&rds_loop_net_ops); +} + +void rds_loop_net_exit(void) +{ + unregister_pernet_device(&rds_loop_net_ops); +} + /* * This is missing .xmit_* because loop doesn't go through generic * rds_send_xmit() and doesn't call rds_recv_incoming(). .listen_stop and @@ -194,4 +249,5 @@ struct rds_transport rds_loop_transport = { .inc_free = rds_loop_inc_free, .t_name = "loopback", .t_type = RDS_TRANS_LOOP, + .t_unloading = rds_loop_is_unloading, }; diff --git a/net/rds/loop.h b/net/rds/loop.h index 469fa4b2da4f..bbc8cdd030df 100644 --- a/net/rds/loop.h +++ b/net/rds/loop.h @@ -5,6 +5,8 @@ /* loop.c */ extern struct rds_transport rds_loop_transport; +int rds_loop_net_init(void); +void rds_loop_net_exit(void); void rds_loop_exit(void); #endif From 7c8f4e6dc30996bff806285730a0bb4e714d3d52 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 26 Jun 2018 01:39:32 +0200 Subject: [PATCH 074/382] fib_rules: match rules based on suppress_* properties too Two rules with different values of suppress_prefix or suppress_ifgroup are not the same. This fixes an -EEXIST when running: $ ip -4 rule add table main suppress_prefixlength 0 Signed-off-by: Jason A. Donenfeld Fixes: f9d4b0c1e969 ("fib_rules: move common handling of newrule delrule msgs into fib_nl2rule") Signed-off-by: David S. Miller --- net/core/fib_rules.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 126ffc5bc630..bc8425d81022 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -416,6 +416,14 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops, if (rule->mark && r->mark != rule->mark) continue; + if (rule->suppress_ifgroup != -1 && + r->suppress_ifgroup != rule->suppress_ifgroup) + continue; + + if (rule->suppress_prefixlen != -1 && + r->suppress_prefixlen != rule->suppress_prefixlen) + continue; + if (rule->mark_mask && r->mark_mask != rule->mark_mask) continue; From a64119415ff248efa61301783bc26551df5dabf6 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Mon, 25 Jun 2018 20:36:27 -0700 Subject: [PATCH 075/382] nfp: flower: fix mpls ether type detection Previously it was not possible to distinguish between mpls ether types and other ether types. This leads to incorrect classification of offloaded filters that match on mpls ether type. For example the following two filters overlap: # tc filter add dev eth0 parent ffff: \ protocol 0x8847 flower \ action mirred egress redirect dev eth1 # tc filter add dev eth0 parent ffff: \ protocol 0x0800 flower \ action mirred egress redirect dev eth2 The driver now correctly includes the mac_mpls layer where HW stores mpls fields, when it detects an mpls ether type. It also sets the MPLS_Q bit to indicate that the filter should match mpls packets. Fixes: bb055c198d9b ("nfp: add mpls match offloading support") Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Simon Horman Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/match.c | 14 ++++++++++++++ .../net/ethernet/netronome/nfp/flower/offload.c | 8 ++++++++ 2 files changed, 22 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index 91935405f586..84f7a5dbea9d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -123,6 +123,20 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *frame, NFP_FLOWER_MASK_MPLS_Q; frame->mpls_lse = cpu_to_be32(t_mpls); + } else if (dissector_uses_key(flow->dissector, + FLOW_DISSECTOR_KEY_BASIC)) { + /* Check for mpls ether type and set NFP_FLOWER_MASK_MPLS_Q + * bit, which indicates an mpls ether type but without any + * mpls fields. + */ + struct flow_dissector_key_basic *key_basic; + + key_basic = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_BASIC, + flow->key); + if (key_basic->n_proto == cpu_to_be16(ETH_P_MPLS_UC) || + key_basic->n_proto == cpu_to_be16(ETH_P_MPLS_MC)) + frame->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q); } } diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index c42e64f32333..477f584f6d28 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -264,6 +264,14 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, case cpu_to_be16(ETH_P_ARP): return -EOPNOTSUPP; + case cpu_to_be16(ETH_P_MPLS_UC): + case cpu_to_be16(ETH_P_MPLS_MC): + if (!(key_layer & NFP_FLOWER_LAYER_MAC)) { + key_layer |= NFP_FLOWER_LAYER_MAC; + key_size += sizeof(struct nfp_flower_mac_mpls); + } + break; + /* Will be included in layer 2. */ case cpu_to_be16(ETH_P_8021Q): break; From 951a8ee6def39e25d0e60b9394e5a249ba8b2390 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Mon, 25 Jun 2018 20:36:28 -0700 Subject: [PATCH 076/382] nfp: reject binding to shared blocks TC shared blocks allow multiple qdiscs to be grouped together and filters shared between them. Currently the chains of filters attached to a block are only flushed when the block is removed. If a qdisc is removed from a block but the block still exists, flow del messages are not passed to the callback registered for that qdisc. For the NFP, this presents the possibility of rules still existing in hw when they should be removed. Prevent binding to shared blocks until the kernel can send per qdisc del messages when block unbinds occur. tcf_block_shared() was not used outside of the core until now, so also add an empty implementation for builds with CONFIG_NET_CLS=n. Fixes: 4861738775d7 ("net: sched: introduce shared filter blocks infrastructure") Signed-off-by: John Hurley Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 3 +++ drivers/net/ethernet/netronome/nfp/flower/offload.c | 3 +++ include/net/pkt_cls.h | 5 +++++ 3 files changed, 11 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index fcdfb8e7fdea..6b15e3b11956 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -202,6 +202,9 @@ static int nfp_bpf_setup_tc_block(struct net_device *netdev, if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) return -EOPNOTSUPP; + if (tcf_block_shared(f->block)) + return -EOPNOTSUPP; + switch (f->command) { case TC_BLOCK_BIND: return tcf_block_cb_register(f->block, diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 477f584f6d28..525057bee0ed 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -631,6 +631,9 @@ static int nfp_flower_setup_tc_block(struct net_device *netdev, if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) return -EOPNOTSUPP; + if (tcf_block_shared(f->block)) + return -EOPNOTSUPP; + switch (f->command) { case TC_BLOCK_BIND: return tcf_block_cb_register(f->block, diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index a3c1a2c47cd4..20b059574e60 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -111,6 +111,11 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, { } +static inline bool tcf_block_shared(struct tcf_block *block) +{ + return false; +} + static inline struct Qdisc *tcf_block_q(struct tcf_block *block) { return NULL; From 2d2595719a97c876f35b1e60e5768e58753b268c Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Tue, 26 Jun 2018 09:16:31 +0800 Subject: [PATCH 077/382] nfp: cast sizeof() to int when comparing with error code sizeof() will return unsigned value so in the error check negative error code will be always larger than sizeof(). Fixes: a0d8e02c35ff ("nfp: add support for reading nffw info") Signed-off-by: Chengguang Xu Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c index cd34097b79f1..37a6d7822a38 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nffw.c @@ -232,7 +232,7 @@ struct nfp_nffw_info *nfp_nffw_info_open(struct nfp_cpp *cpp) err = nfp_cpp_read(cpp, nfp_resource_cpp_id(state->res), nfp_resource_address(state->res), fwinf, sizeof(*fwinf)); - if (err < sizeof(*fwinf)) + if (err < (int)sizeof(*fwinf)) goto err_release; if (!nffw_res_flg_init_get(fwinf)) From 45a0faaba9c8c5ba1e31a08a391aed0bad327167 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 27 Jun 2018 10:07:45 -0300 Subject: [PATCH 078/382] drm/etnaviv: Check for platform_device_register_simple() failure platform_device_register_simple() may fail, so we should better check its return value and propagate it in the case of error. Cc: Fixes: 246774d17fc0 ("drm/etnaviv: remove the need for a gpu-subsystem DT node") Signed-off-by: Fabio Estevam Reviewed-by: Philipp Zabel Signed-off-by: Lucas Stach --- drivers/gpu/drm/etnaviv/etnaviv_drv.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index e5013a999147..19abd3ca6618 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -631,8 +631,11 @@ static struct platform_driver etnaviv_platform_driver = { }, }; +static struct platform_device *etnaviv_drm; + static int __init etnaviv_init(void) { + struct platform_device *pdev; int ret; struct device_node *np; @@ -644,7 +647,7 @@ static int __init etnaviv_init(void) ret = platform_driver_register(&etnaviv_platform_driver); if (ret != 0) - platform_driver_unregister(&etnaviv_gpu_driver); + goto unregister_gpu_driver; /* * If the DT contains at least one available GPU device, instantiate @@ -653,12 +656,24 @@ static int __init etnaviv_init(void) for_each_compatible_node(np, NULL, "vivante,gc") { if (!of_device_is_available(np)) continue; - - platform_device_register_simple("etnaviv", -1, NULL, 0); + pdev = platform_device_register_simple("etnaviv", -1, + NULL, 0); + if (IS_ERR(pdev)) { + ret = PTR_ERR(pdev); + of_node_put(np); + goto unregister_platform_driver; + } + etnaviv_drm = pdev; of_node_put(np); break; } + return 0; + +unregister_platform_driver: + platform_driver_unregister(&etnaviv_platform_driver); +unregister_gpu_driver: + platform_driver_unregister(&etnaviv_gpu_driver); return ret; } module_init(etnaviv_init); From bf6ba3aeb2962e5ee4a78e7535af579ecba630bb Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 27 Jun 2018 10:07:46 -0300 Subject: [PATCH 079/382] drm/etnaviv: Fix driver unregistering Russell King reported: "When removing and reloading the etnaviv module, the following splat occurs: sysfs: cannot create duplicate filename '/devices/platform/etnaviv' CPU: 0 PID: 1471 Comm: modprobe Not tainted 4.17.0+ #1608 Hardware name: Marvell Dove (Cubox) Backtrace: [] (dump_backtrace) from [] (show_stack+0x18/0x1c) r6:ef033e38 r5:ee07b340 r4:edb9d000 r3:00000000 [] (show_stack) from [] (dump_stack+0x20/0x28) [] (dump_stack) from [] (sysfs_warn_dup+0x5c/0x70) [] (sysfs_warn_dup) from [] (sysfs_create_dir_ns+0x90/0x98) ..." Commit 246774d17fc0 ("drm/etnaviv: remove the need for a gpu-subsystem DT node") introduced DRM registration via platform_device_register_simple(), but missed to call platform_device_unregister() inside etnaviv_exit(). Fix the problem by calling platform_device_unregister() inside etnaviv_exit(). While at it, also rearrange the function calls in the exit path to make them happen in the opposite order of registration. Tested on a imx6-sabresd board. Cc: Fixes: 246774d17fc0 ("drm/etnaviv: remove the need for a gpu-subsystem DT node") Reported-by: Russell King Signed-off-by: Fabio Estevam Reviewed-by: Philipp Zabel Signed-off-by: Lucas Stach --- drivers/gpu/drm/etnaviv/etnaviv_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index 19abd3ca6618..540b59fb4103 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -680,8 +680,9 @@ module_init(etnaviv_init); static void __exit etnaviv_exit(void) { - platform_driver_unregister(&etnaviv_gpu_driver); + platform_device_unregister(etnaviv_drm); platform_driver_unregister(&etnaviv_platform_driver); + platform_driver_unregister(&etnaviv_gpu_driver); } module_exit(etnaviv_exit); From 82be2ab159a3a0ae4024b946a31f12b221f6c8ff Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 27 Jun 2018 17:47:37 +0800 Subject: [PATCH 080/382] hinic: reset irq affinity before freeing irq Following warning is seen when rmmod hinic. This is because affinity value is not reset before calling free_irq(). This patch fixes it. [ 55.181232] WARNING: CPU: 38 PID: 19589 at kernel/irq/manage.c:1608 __free_irq+0x2aa/0x2c0 Fixes: 352f58b0d9f2 ("net-next/hinic: Set Rxq irq to specific cpu for NUMA") Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_rx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index e2e5cdc7119c..4c0f7eda1166 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -439,6 +439,7 @@ static void rx_free_irq(struct hinic_rxq *rxq) { struct hinic_rq *rq = rxq->rq; + irq_set_affinity_hint(rq->irq, NULL); free_irq(rq->irq, rxq); rx_del_napi(rxq); } From 88e85a7daf8e21f2d6cb054374d480c540725cde Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 26 Jun 2018 12:55:35 +0900 Subject: [PATCH 081/382] bpfilter: check compiler capability in Kconfig With the brand-new syntax extension of Kconfig, we can directly check the compiler capability in the configuration phase. If the cc-can-link.sh fails, the BPFILTER_UMH is automatically hidden by the dependency. I also deleted 'default n', which is no-op. Signed-off-by: Masahiro Yamada Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- Makefile | 5 ----- net/Makefile | 4 ---- net/bpfilter/Kconfig | 2 +- scripts/cc-can-link.sh | 2 +- 4 files changed, 2 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index c9132594860b..e1bd4c3627bc 100644 --- a/Makefile +++ b/Makefile @@ -507,11 +507,6 @@ ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLA KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO endif -ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/cc-can-link.sh $(CC)), y) - CC_CAN_LINK := y - export CC_CAN_LINK -endif - # The expansion should be delayed until arch/$(SRCARCH)/Makefile is included. # Some architectures define CROSS_COMPILE in arch/$(SRCARCH)/Makefile. # CC_VERSION_TEXT is referenced from Kconfig (so it needs export), diff --git a/net/Makefile b/net/Makefile index 13ec0d5415c7..bdaf53925acd 100644 --- a/net/Makefile +++ b/net/Makefile @@ -20,11 +20,7 @@ obj-$(CONFIG_TLS) += tls/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX) += unix/ obj-$(CONFIG_NET) += ipv6/ -ifneq ($(CC_CAN_LINK),y) -$(warning CC cannot link executables. Skipping bpfilter.) -else obj-$(CONFIG_BPFILTER) += bpfilter/ -endif obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_BRIDGE) += bridge/ diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig index a948b072c28f..76deb6615883 100644 --- a/net/bpfilter/Kconfig +++ b/net/bpfilter/Kconfig @@ -1,6 +1,5 @@ menuconfig BPFILTER bool "BPF based packet filtering framework (BPFILTER)" - default n depends on NET && BPF && INET help This builds experimental bpfilter framework that is aiming to @@ -9,6 +8,7 @@ menuconfig BPFILTER if BPFILTER config BPFILTER_UMH tristate "bpfilter kernel module with user mode helper" + depends on $(success,$(srctree)/scripts/cc-can-link.sh $(CC)) default m help This builds bpfilter kernel module with embedded user mode helper diff --git a/scripts/cc-can-link.sh b/scripts/cc-can-link.sh index 208eb2825dab..6efcead31989 100755 --- a/scripts/cc-can-link.sh +++ b/scripts/cc-can-link.sh @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -cat << "END" | $@ -x c - -o /dev/null >/dev/null 2>&1 && echo "y" +cat << "END" | $@ -x c - -o /dev/null >/dev/null 2>&1 #include int main(void) { From fec9d3b1dc4c481f20f5d2f5aef3ad1cb7504186 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 26 Jun 2018 10:44:01 +0200 Subject: [PATCH 082/382] net: macb: initialize bp->queues[0].bp for at91rm9200 The macb driver currently crashes on at91rm9200 with the following trace: Unable to handle kernel NULL pointer dereference at virtual address 00000014 [...] [] (macb_rx_desc) from [] (at91ether_open+0x2e8/0x3f8) [] (at91ether_open) from [] (__dev_open+0x120/0x13c) [] (__dev_open) from [] (__dev_change_flags+0x17c/0x1a8) [] (__dev_change_flags) from [] (dev_change_flags+0x18/0x4c) [] (dev_change_flags) from [] (ip_auto_config+0x220/0x10b0) [] (ip_auto_config) from [] (do_one_initcall+0x78/0x18c) [] (do_one_initcall) from [] (kernel_init_freeable+0x184/0x1c4) [] (kernel_init_freeable) from [] (kernel_init+0x8/0xe8) [] (kernel_init) from [] (ret_from_fork+0x14/0x34) Solve that by initializing bp->queues[0].bp in at91ether_init (as is done in macb_init). Fixes: ae1f2a56d273 ("net: macb: Added support for many RX queues") Signed-off-by: Alexandre Belloni Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 3e93df5d4e3b..96cc03a6d942 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3726,6 +3726,8 @@ static int at91ether_init(struct platform_device *pdev) int err; u32 reg; + bp->queues[0].bp = bp; + dev->netdev_ops = &at91ether_netdev_ops; dev->ethtool_ops = &macb_ethtool_ops; From ad088ec480768850db019a5cc543685e868a513d Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 26 Jun 2018 17:39:48 +0200 Subject: [PATCH 083/382] ixgbe: split XDP_TX tail and XDP_REDIRECT map flushing The driver was combining the XDP_TX tail flush and XDP_REDIRECT map flushing (xdp_do_flush_map). This is suboptimal, these two flush operations should be kept separate. Fixes: 11393cc9b9be ("xdp: Add batching support to redirect map") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 3e87dbbc9024..62e57b05a0ae 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2186,9 +2186,10 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, return skb; } -#define IXGBE_XDP_PASS 0 -#define IXGBE_XDP_CONSUMED 1 -#define IXGBE_XDP_TX 2 +#define IXGBE_XDP_PASS 0 +#define IXGBE_XDP_CONSUMED BIT(0) +#define IXGBE_XDP_TX BIT(1) +#define IXGBE_XDP_REDIR BIT(2) static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter, struct xdp_frame *xdpf); @@ -2225,7 +2226,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter, case XDP_REDIRECT: err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog); if (!err) - result = IXGBE_XDP_TX; + result = IXGBE_XDP_REDIR; else result = IXGBE_XDP_CONSUMED; break; @@ -2285,7 +2286,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, unsigned int mss = 0; #endif /* IXGBE_FCOE */ u16 cleaned_count = ixgbe_desc_unused(rx_ring); - bool xdp_xmit = false; + unsigned int xdp_xmit = 0; struct xdp_buff xdp; xdp.rxq = &rx_ring->xdp_rxq; @@ -2328,8 +2329,10 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, } if (IS_ERR(skb)) { - if (PTR_ERR(skb) == -IXGBE_XDP_TX) { - xdp_xmit = true; + unsigned int xdp_res = -PTR_ERR(skb); + + if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) { + xdp_xmit |= xdp_res; ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size); } else { rx_buffer->pagecnt_bias++; @@ -2401,7 +2404,10 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, total_rx_packets++; } - if (xdp_xmit) { + if (xdp_xmit & IXGBE_XDP_REDIR) + xdp_do_flush_map(); + + if (xdp_xmit & IXGBE_XDP_TX) { struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()]; /* Force memory writes to complete before letting h/w @@ -2409,8 +2415,6 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, */ wmb(); writel(ring->next_to_use, ring->tail); - - xdp_do_flush_map(); } u64_stats_update_begin(&rx_ring->syncp); From 2e6893123830d04edc366e0ce59d46e622e140c1 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 26 Jun 2018 17:39:53 +0200 Subject: [PATCH 084/382] i40e: split XDP_TX tail and XDP_REDIRECT map flushing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver was combining the XDP_TX tail flush and XDP_REDIRECT map flushing (xdp_do_flush_map). This is suboptimal, these two flush operations should be kept separate. It looks like the mistake was copy-pasted from ixgbe. Fixes: d9314c474d4f ("i40e: add support for XDP_REDIRECT") Signed-off-by: Jesper Dangaard Brouer Acked-by: Björn Töpel Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 24 +++++++++++++-------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index ed6dbcfd4e96..b151ae316546 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2199,9 +2199,10 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring, return true; } -#define I40E_XDP_PASS 0 -#define I40E_XDP_CONSUMED 1 -#define I40E_XDP_TX 2 +#define I40E_XDP_PASS 0 +#define I40E_XDP_CONSUMED BIT(0) +#define I40E_XDP_TX BIT(1) +#define I40E_XDP_REDIR BIT(2) static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf, struct i40e_ring *xdp_ring); @@ -2248,7 +2249,7 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring, break; case XDP_REDIRECT: err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); - result = !err ? I40E_XDP_TX : I40E_XDP_CONSUMED; + result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED; break; default: bpf_warn_invalid_xdp_action(act); @@ -2311,7 +2312,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) unsigned int total_rx_bytes = 0, total_rx_packets = 0; struct sk_buff *skb = rx_ring->skb; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); - bool failure = false, xdp_xmit = false; + unsigned int xdp_xmit = 0; + bool failure = false; struct xdp_buff xdp; xdp.rxq = &rx_ring->xdp_rxq; @@ -2372,8 +2374,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) } if (IS_ERR(skb)) { - if (PTR_ERR(skb) == -I40E_XDP_TX) { - xdp_xmit = true; + unsigned int xdp_res = -PTR_ERR(skb); + + if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) { + xdp_xmit |= xdp_res; i40e_rx_buffer_flip(rx_ring, rx_buffer, size); } else { rx_buffer->pagecnt_bias++; @@ -2427,12 +2431,14 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) total_rx_packets++; } - if (xdp_xmit) { + if (xdp_xmit & I40E_XDP_REDIR) + xdp_do_flush_map(); + + if (xdp_xmit & I40E_XDP_TX) { struct i40e_ring *xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index]; i40e_xdp_ring_update_tail(xdp_ring); - xdp_do_flush_map(); } rx_ring->skb = skb; From 2471c75efed32529698c26da499954f0253cb401 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 26 Jun 2018 17:39:58 +0200 Subject: [PATCH 085/382] virtio_net: split XDP_TX kick and XDP_REDIRECT map flushing The driver was combining XDP_TX virtqueue_kick and XDP_REDIRECT map flushing (xdp_do_flush_map). This is suboptimal, these two flush operations should be kept separate. The suboptimal behavior was introduced in commit 9267c430c6b6 ("virtio-net: add missing virtqueue kick when flushing packets"). Fixes: 9267c430c6b6 ("virtio-net: add missing virtqueue kick when flushing packets") Signed-off-by: Jesper Dangaard Brouer Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b6c9a2af3732..53085c63277b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -53,6 +53,10 @@ module_param(napi_tx, bool, 0644); /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */ #define VIRTIO_XDP_HEADROOM 256 +/* Separating two types of XDP xmit */ +#define VIRTIO_XDP_TX BIT(0) +#define VIRTIO_XDP_REDIR BIT(1) + /* RX packet size EWMA. The average packet size is used to determine the packet * buffer size when refilling RX rings. As the entire RX ring may be refilled * at once, the weight is chosen so that the EWMA will be insensitive to short- @@ -582,7 +586,7 @@ static struct sk_buff *receive_small(struct net_device *dev, struct receive_queue *rq, void *buf, void *ctx, unsigned int len, - bool *xdp_xmit) + unsigned int *xdp_xmit) { struct sk_buff *skb; struct bpf_prog *xdp_prog; @@ -654,14 +658,14 @@ static struct sk_buff *receive_small(struct net_device *dev, trace_xdp_exception(vi->dev, xdp_prog, act); goto err_xdp; } - *xdp_xmit = true; + *xdp_xmit |= VIRTIO_XDP_TX; rcu_read_unlock(); goto xdp_xmit; case XDP_REDIRECT: err = xdp_do_redirect(dev, &xdp, xdp_prog); if (err) goto err_xdp; - *xdp_xmit = true; + *xdp_xmit |= VIRTIO_XDP_REDIR; rcu_read_unlock(); goto xdp_xmit; default: @@ -723,7 +727,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, void *buf, void *ctx, unsigned int len, - bool *xdp_xmit) + unsigned int *xdp_xmit) { struct virtio_net_hdr_mrg_rxbuf *hdr = buf; u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); @@ -818,7 +822,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, put_page(xdp_page); goto err_xdp; } - *xdp_xmit = true; + *xdp_xmit |= VIRTIO_XDP_TX; if (unlikely(xdp_page != page)) put_page(page); rcu_read_unlock(); @@ -830,7 +834,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, put_page(xdp_page); goto err_xdp; } - *xdp_xmit = true; + *xdp_xmit |= VIRTIO_XDP_REDIR; if (unlikely(xdp_page != page)) put_page(page); rcu_read_unlock(); @@ -939,7 +943,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, - void *buf, unsigned int len, void **ctx, bool *xdp_xmit) + void *buf, unsigned int len, void **ctx, + unsigned int *xdp_xmit) { struct net_device *dev = vi->dev; struct sk_buff *skb; @@ -1232,7 +1237,8 @@ static void refill_work(struct work_struct *work) } } -static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit) +static int virtnet_receive(struct receive_queue *rq, int budget, + unsigned int *xdp_xmit) { struct virtnet_info *vi = rq->vq->vdev->priv; unsigned int len, received = 0, bytes = 0; @@ -1321,7 +1327,7 @@ static int virtnet_poll(struct napi_struct *napi, int budget) struct virtnet_info *vi = rq->vq->vdev->priv; struct send_queue *sq; unsigned int received, qp; - bool xdp_xmit = false; + unsigned int xdp_xmit = 0; virtnet_poll_cleantx(rq); @@ -1331,12 +1337,14 @@ static int virtnet_poll(struct napi_struct *napi, int budget) if (received < budget) virtqueue_napi_complete(napi, rq->vq, received); - if (xdp_xmit) { + if (xdp_xmit & VIRTIO_XDP_REDIR) + xdp_do_flush_map(); + + if (xdp_xmit & VIRTIO_XDP_TX) { qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); sq = &vi->sq[qp]; virtqueue_kick(sq->vq); - xdp_do_flush_map(); } return received; From e3f329c600033f011a978a8bc4ddb1e2e94c4f4d Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 19 Jun 2018 15:20:50 +0200 Subject: [PATCH 086/382] dmaengine: pl330: report BURST residue granularity The reported residue is already calculated in BURST unit granularity, so advertise this capability properly to other devices in the system. Fixes: aee4d1fac887 ("dmaengine: pl330: improve pl330_tx_status() function") Signed-off-by: Marek Szyprowski Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index defcdde4d358..de0957fe9668 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -3033,7 +3033,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) pd->src_addr_widths = PL330_DMA_BUSWIDTHS; pd->dst_addr_widths = PL330_DMA_BUSWIDTHS; pd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); - pd->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT; + pd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; pd->max_burst = ((pl330->quirks & PL330_QUIRK_BROKEN_NO_FLUSHP) ? 1 : PL330_MAX_BURST); From c4c2b7644cc9a41f17a8cc8904efe3f66ae4c7ed Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 22 Jun 2018 14:15:47 +0300 Subject: [PATCH 087/382] dmaengine: k3dma: Off by one in k3_of_dma_simple_xlate() The d->chans[] array has d->dma_requests elements so the > should be >= here. Fixes: 8e6152bc660e ("dmaengine: Add hisilicon k3 DMA engine driver") Signed-off-by: Dan Carpenter Signed-off-by: Vinod Koul --- drivers/dma/k3dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c index fa31cccbe04f..6bfa217ed6d0 100644 --- a/drivers/dma/k3dma.c +++ b/drivers/dma/k3dma.c @@ -794,7 +794,7 @@ static struct dma_chan *k3_of_dma_simple_xlate(struct of_phandle_args *dma_spec, struct k3_dma_dev *d = ofdma->of_dma_data; unsigned int request = dma_spec->args[0]; - if (request > d->dma_requests) + if (request >= d->dma_requests) return NULL; return dma_get_slave_channel(&(d->chans[request].vc.chan)); From 933e671f8cff87552d4d26cf3874633b11ae78ba Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 26 Jun 2018 12:22:28 -0600 Subject: [PATCH 088/382] selftests/net: Fix permissions for fib_tests.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fib_tests.sh became non-executable at some point. This is what happens: selftests: net: fib_tests.sh: Warning: file fib_tests.sh is not executable, correct this. not ok 1..11 selftests: net: fib_tests.sh [FAIL] Fixes: d69faad76584 ("selftests: fib_tests: Add prefix route tests with metric") Signed-off-by: Daniel Díaz Signed-off-by: David S. Miller --- tools/testing/selftests/net/fib_tests.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tools/testing/selftests/net/fib_tests.sh diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh old mode 100644 new mode 100755 From 977c7114ebda2e746a114840d3a875e0cdb826fb Mon Sep 17 00:00:00 2001 From: Doron Roberts-Kedes Date: Tue, 26 Jun 2018 18:33:33 -0700 Subject: [PATCH 089/382] strparser: Remove early eaten to fix full tcp receive buffer stall On receving an incomplete message, the existing code stores the remaining length of the cloned skb in the early_eaten field instead of incrementing the value returned by __strp_recv. This defers invocation of sock_rfree for the current skb until the next invocation of __strp_recv, which returns early_eaten if early_eaten is non-zero. This behavior causes a stall when the current message occupies the very tail end of a massive skb, and strp_peek/need_bytes indicates that the remainder of the current message has yet to arrive on the socket. The TCP receive buffer is totally full, causing the TCP window to go to zero, so the remainder of the message will never arrive. Incrementing the value returned by __strp_recv by the amount otherwise stored in early_eaten prevents stalls of this nature. Signed-off-by: Doron Roberts-Kedes Signed-off-by: David S. Miller --- net/strparser/strparser.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 373836615c57..625acb27efcc 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -35,7 +35,6 @@ struct _strp_msg { */ struct strp_msg strp; int accum_len; - int early_eaten; }; static inline struct _strp_msg *_strp_msg(struct sk_buff *skb) @@ -115,20 +114,6 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, head = strp->skb_head; if (head) { /* Message already in progress */ - - stm = _strp_msg(head); - if (unlikely(stm->early_eaten)) { - /* Already some number of bytes on the receive sock - * data saved in skb_head, just indicate they - * are consumed. - */ - eaten = orig_len <= stm->early_eaten ? - orig_len : stm->early_eaten; - stm->early_eaten -= eaten; - - return eaten; - } - if (unlikely(orig_offset)) { /* Getting data with a non-zero offset when a message is * in progress is not expected. If it does happen, we @@ -297,9 +282,9 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, } stm->accum_len += cand_len; + eaten += cand_len; strp->need_bytes = stm->strp.full_len - stm->accum_len; - stm->early_eaten = cand_len; STRP_STATS_ADD(strp->stats.bytes, cand_len); desc->count = 0; /* Stop reading socket */ break; From 8e75887d321d102200abf3a9fa621e2c10ff4cc5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 26 Jun 2018 20:13:48 -0700 Subject: [PATCH 090/382] bpfilter: include bpfilter_umh in assembly instead of using objcopy What we want here is to embed a user-space program into the kernel. Instead of the complex ELF magic, let's simply wrap it in the assembly with the '.incbin' directive. Signed-off-by: Masahiro Yamada Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- net/bpfilter/Makefile | 17 ++--------------- net/bpfilter/bpfilter_kern.c | 11 +++++------ net/bpfilter/bpfilter_umh_blob.S | 7 +++++++ 3 files changed, 14 insertions(+), 21 deletions(-) create mode 100644 net/bpfilter/bpfilter_umh_blob.S diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile index 051dc18b8ccb..39c6980b5d99 100644 --- a/net/bpfilter/Makefile +++ b/net/bpfilter/Makefile @@ -15,20 +15,7 @@ ifeq ($(CONFIG_BPFILTER_UMH), y) HOSTLDFLAGS += -static endif -# a bit of elf magic to convert bpfilter_umh binary into a binary blob -# inside bpfilter_umh.o elf file referenced by -# _binary_net_bpfilter_bpfilter_umh_start symbol -# which bpfilter_kern.c passes further into umh blob loader at run-time -quiet_cmd_copy_umh = GEN $@ - cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \ - $(OBJCOPY) -I binary \ - `LC_ALL=C $(OBJDUMP) -f net/bpfilter/bpfilter_umh \ - |awk -F' |,' '/file format/{print "-O",$$NF} \ - /^architecture:/{print "-B",$$2}'` \ - --rename-section .data=.init.rodata $< $@ - -$(obj)/bpfilter_umh.o: $(obj)/bpfilter_umh - $(call cmd,copy_umh) +$(obj)/bpfilter_umh_blob.o: $(obj)/bpfilter_umh obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o -bpfilter-objs += bpfilter_kern.o bpfilter_umh.o +bpfilter-objs += bpfilter_kern.o bpfilter_umh_blob.o diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index 09522573f611..f0fc182d3db7 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -10,11 +10,8 @@ #include #include "msgfmt.h" -#define UMH_start _binary_net_bpfilter_bpfilter_umh_start -#define UMH_end _binary_net_bpfilter_bpfilter_umh_end - -extern char UMH_start; -extern char UMH_end; +extern char bpfilter_umh_start; +extern char bpfilter_umh_end; static struct umh_info info; /* since ip_getsockopt() can run in parallel, serialize access to umh */ @@ -93,7 +90,9 @@ static int __init load_umh(void) int err; /* fork usermode process */ - err = fork_usermode_blob(&UMH_start, &UMH_end - &UMH_start, &info); + err = fork_usermode_blob(&bpfilter_umh_start, + &bpfilter_umh_end - &bpfilter_umh_start, + &info); if (err) return err; pr_info("Loaded bpfilter_umh pid %d\n", info.pid); diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S new file mode 100644 index 000000000000..40311d10d2f2 --- /dev/null +++ b/net/bpfilter/bpfilter_umh_blob.S @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + .section .init.rodata, "a" + .global bpfilter_umh_start +bpfilter_umh_start: + .incbin "net/bpfilter/bpfilter_umh" + .global bpfilter_umh_end +bpfilter_umh_end: From 15ecbe94a45ef88491ca459b26efdd02f91edb6d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Jun 2018 08:47:21 -0700 Subject: [PATCH 091/382] tcp: add one more quick ack after after ECN events Larry Brakmo proposal ( https://patchwork.ozlabs.org/patch/935233/ tcp: force cwnd at least 2 in tcp_cwnd_reduction) made us rethink about our recent patch removing ~16 quick acks after ECN events. tcp_enter_quickack_mode(sk, 1) makes sure one immediate ack is sent, but in the case the sender cwnd was lowered to 1, we do not want to have a delayed ack for the next packet we will receive. Fixes: 522040ea5fdd ("tcp: do not aggressively quick ack after ECN events") Signed-off-by: Eric Dumazet Reported-by: Neal Cardwell Cc: Lawrence Brakmo Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 355d3dffd021..045d930d01a9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -265,7 +265,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) * it is probably a retransmit. */ if (tp->ecn_flags & TCP_ECN_SEEN) - tcp_enter_quickack_mode(sk, 1); + tcp_enter_quickack_mode(sk, 2); break; case INET_ECN_CE: if (tcp_ca_needs_ecn(sk)) @@ -273,7 +273,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { /* Better not delay acks, sender can have a very low cwnd */ - tcp_enter_quickack_mode(sk, 1); + tcp_enter_quickack_mode(sk, 2); tp->ecn_flags |= TCP_ECN_DEMAND_CWR; } tp->ecn_flags |= TCP_ECN_SEEN; From 24ac3a08e65845a098140ff270229dec4a897404 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 27 Jun 2018 17:59:50 +0200 Subject: [PATCH 092/382] net/smc: rebuild nonblocking connect The recent poll change may lead to stalls for non-blocking connecting SMC sockets, since sock_poll_wait is no longer performed on the internal CLC socket, but on the outer SMC socket. kernel_connect() on the internal CLC socket returns with -EINPROGRESS, but the wake up logic does not work in all cases. If the internal CLC socket is still in state TCP_SYN_SENT when polled, sock_poll_wait() from sock_poll() does not sleep. It is supposed to sleep till the state of the internal CLC socket switches to TCP_ESTABLISHED. This problem triggered a redesign of the SMC nonblocking connect logic. This patch introduces a connect worker covering all connect steps followed by a wake up of socket waiters. It allows to get rid of all delays and locks in smc_poll(). Fixes: c0129a061442 ("smc: convert to ->poll_mask") Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 91 +++++++++++++++++++++++++++++++++--------------- net/smc/smc.h | 8 +++++ 2 files changed, 71 insertions(+), 28 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index da7f02edcd37..4d1d4ddb2f49 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -45,6 +45,7 @@ static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group */ static void smc_tcp_listen_work(struct work_struct *); +static void smc_connect_work(struct work_struct *); static void smc_set_keepalive(struct sock *sk, int val) { @@ -122,6 +123,12 @@ static int smc_release(struct socket *sock) goto out; smc = smc_sk(sk); + + /* cleanup for a dangling non-blocking connect */ + flush_work(&smc->connect_work); + kfree(smc->connect_info); + smc->connect_info = NULL; + if (sk->sk_state == SMC_LISTEN) /* smc_close_non_accepted() is called and acquires * sock lock for child sockets again @@ -186,6 +193,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, sk->sk_protocol = protocol; smc = smc_sk(sk); INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); + INIT_WORK(&smc->connect_work, smc_connect_work); INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); INIT_LIST_HEAD(&smc->accept_q); spin_lock_init(&smc->accept_q_lock); @@ -576,6 +584,35 @@ static int __smc_connect(struct smc_sock *smc) return 0; } +static void smc_connect_work(struct work_struct *work) +{ + struct smc_sock *smc = container_of(work, struct smc_sock, + connect_work); + int rc; + + lock_sock(&smc->sk); + rc = kernel_connect(smc->clcsock, &smc->connect_info->addr, + smc->connect_info->alen, smc->connect_info->flags); + if (smc->clcsock->sk->sk_err) { + smc->sk.sk_err = smc->clcsock->sk->sk_err; + goto out; + } + if (rc < 0) { + smc->sk.sk_err = -rc; + goto out; + } + + rc = __smc_connect(smc); + if (rc < 0) + smc->sk.sk_err = -rc; + +out: + smc->sk.sk_state_change(&smc->sk); + kfree(smc->connect_info); + smc->connect_info = NULL; + release_sock(&smc->sk); +} + static int smc_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) { @@ -605,15 +642,32 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, smc_copy_sock_settings_to_clc(smc); tcp_sk(smc->clcsock->sk)->syn_smc = 1; - rc = kernel_connect(smc->clcsock, addr, alen, flags); - if (rc) - goto out; + if (flags & O_NONBLOCK) { + if (smc->connect_info) { + rc = -EALREADY; + goto out; + } + smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL); + if (!smc->connect_info) { + rc = -ENOMEM; + goto out; + } + smc->connect_info->alen = alen; + smc->connect_info->flags = flags ^ O_NONBLOCK; + memcpy(&smc->connect_info->addr, addr, alen); + schedule_work(&smc->connect_work); + rc = -EINPROGRESS; + } else { + rc = kernel_connect(smc->clcsock, addr, alen, flags); + if (rc) + goto out; - rc = __smc_connect(smc); - if (rc < 0) - goto out; - else - rc = 0; /* success cases including fallback */ + rc = __smc_connect(smc); + if (rc < 0) + goto out; + else + rc = 0; /* success cases including fallback */ + } out: release_sock(sk); @@ -1278,34 +1332,17 @@ static __poll_t smc_poll_mask(struct socket *sock, __poll_t events) struct sock *sk = sock->sk; __poll_t mask = 0; struct smc_sock *smc; - int rc; if (!sk) return EPOLLNVAL; smc = smc_sk(sock->sk); - sock_hold(sk); - lock_sock(sk); if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { /* delegate to CLC child sock */ - release_sock(sk); mask = smc->clcsock->ops->poll_mask(smc->clcsock, events); - lock_sock(sk); sk->sk_err = smc->clcsock->sk->sk_err; - if (sk->sk_err) { + if (sk->sk_err) mask |= EPOLLERR; - } else { - /* if non-blocking connect finished ... */ - if (sk->sk_state == SMC_INIT && - mask & EPOLLOUT && - smc->clcsock->sk->sk_state != TCP_CLOSE) { - rc = __smc_connect(smc); - if (rc < 0) - mask |= EPOLLERR; - /* success cases including fallback */ - mask |= EPOLLOUT | EPOLLWRNORM; - } - } } else { if (sk->sk_err) mask |= EPOLLERR; @@ -1334,8 +1371,6 @@ static __poll_t smc_poll_mask(struct socket *sock, __poll_t events) mask |= EPOLLPRI; } - release_sock(sk); - sock_put(sk); return mask; } diff --git a/net/smc/smc.h b/net/smc/smc.h index 51ae1f10d81a..d7ca26570482 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -187,11 +187,19 @@ struct smc_connection { struct work_struct close_work; /* peer sent some closing */ }; +struct smc_connect_info { + int flags; + int alen; + struct sockaddr addr; +}; + struct smc_sock { /* smc sock container */ struct sock sk; struct socket *clcsock; /* internal tcp socket */ struct smc_connection conn; /* smc connection */ struct smc_sock *listen_smc; /* listen parent */ + struct smc_connect_info *connect_info; /* connect address & flags */ + struct work_struct connect_work; /* handle non-blocking connect*/ struct work_struct tcp_listen_work;/* handle tcp socket accepts */ struct work_struct smc_listen_work;/* prepare new accept socket */ struct list_head accept_q; /* sockets to be accepted */ From 4205c88eaf17b5f3ee30032d68df55cd5d9077a1 Mon Sep 17 00:00:00 2001 From: Jose Abreu Date: Wed, 27 Jun 2018 15:03:20 +0100 Subject: [PATCH 093/382] net: stmmac: Set DMA buffer size in HW This is clearly a bug. We need to set the DMA buffer size in the HW otherwise corruption can occur when receiving packets. This is probably not occuring because of small MTU values and because HW has a default value internally (which currently is bigger than default buffer size). Signed-off-by: Jose Abreu Cc: David S. Miller Cc: Joao Pinto Cc: Vitor Soares Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c | 12 ++++++++++++ drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h | 2 ++ drivers/net/ethernet/stmicro/stmmac/hwif.h | 3 +++ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 ++ 4 files changed, 19 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index d37f17ca62fe..65bc3556bd8f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -407,6 +407,16 @@ static void dwmac4_enable_tso(void __iomem *ioaddr, bool en, u32 chan) } } +static void dwmac4_set_bfsize(void __iomem *ioaddr, int bfsize, u32 chan) +{ + u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(chan)); + + value &= ~DMA_RBSZ_MASK; + value |= (bfsize << DMA_RBSZ_SHIFT) & DMA_RBSZ_MASK; + + writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan)); +} + const struct stmmac_dma_ops dwmac4_dma_ops = { .reset = dwmac4_dma_reset, .init = dwmac4_dma_init, @@ -431,6 +441,7 @@ const struct stmmac_dma_ops dwmac4_dma_ops = { .set_rx_tail_ptr = dwmac4_set_rx_tail_ptr, .set_tx_tail_ptr = dwmac4_set_tx_tail_ptr, .enable_tso = dwmac4_enable_tso, + .set_bfsize = dwmac4_set_bfsize, }; const struct stmmac_dma_ops dwmac410_dma_ops = { @@ -457,4 +468,5 @@ const struct stmmac_dma_ops dwmac410_dma_ops = { .set_rx_tail_ptr = dwmac4_set_rx_tail_ptr, .set_tx_tail_ptr = dwmac4_set_tx_tail_ptr, .enable_tso = dwmac4_enable_tso, + .set_bfsize = dwmac4_set_bfsize, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h index c63c1fe3f26b..22a4a6dbb1a4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h @@ -120,6 +120,8 @@ /* DMA Rx Channel X Control register defines */ #define DMA_CONTROL_SR BIT(0) +#define DMA_RBSZ_MASK GENMASK(14, 1) +#define DMA_RBSZ_SHIFT 1 /* Interrupt status per channel */ #define DMA_CHAN_STATUS_REB GENMASK(21, 19) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index e44e7b26ce82..fe8b536b13f8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -183,6 +183,7 @@ struct stmmac_dma_ops { void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan); void (*set_tx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan); void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan); + void (*set_bfsize)(void __iomem *ioaddr, int bfsize, u32 chan); }; #define stmmac_reset(__priv, __args...) \ @@ -235,6 +236,8 @@ struct stmmac_dma_ops { stmmac_do_void_callback(__priv, dma, set_tx_tail_ptr, __args) #define stmmac_enable_tso(__priv, __args...) \ stmmac_do_void_callback(__priv, dma, enable_tso, __args) +#define stmmac_set_dma_bfsize(__priv, __args...) \ + stmmac_do_void_callback(__priv, dma, set_bfsize, __args) struct mac_device_info; struct net_device; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index cba46b62a1cd..60f59abab009 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1804,6 +1804,8 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan, rxfifosz, qmode); + stmmac_set_dma_bfsize(priv, priv->ioaddr, priv->dma_buf_sz, + chan); } for (chan = 0; chan < tx_channels_count; chan++) { From 5a267832c2ec47b2dad0fdb291a96bb5b8869315 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 22 Jun 2018 10:55:45 -0700 Subject: [PATCH 094/382] MIPS: Call dump_stack() from show_regs() The generic nmi_cpu_backtrace() function calls show_regs() when a struct pt_regs is available, and dump_stack() otherwise. If we were to make use of the generic nmi_cpu_backtrace() with MIPS' current implementation of show_regs() this would mean that we see only register data with no accompanying stack information, in contrast with our current implementation which calls dump_stack() regardless of whether register state is available. In preparation for making use of the generic nmi_cpu_backtrace() to implement arch_trigger_cpumask_backtrace(), have our implementation of show_regs() call dump_stack() and drop the explicit dump_stack() call in arch_dump_stack() which is invoked by arch_trigger_cpumask_backtrace(). This will allow the output we produce to remain the same after a later patch switches to using nmi_cpu_backtrace(). It may mean that we produce extra stack output in other uses of show_regs(), but this: 1) Seems harmless. 2) Is good for consistency between arch_trigger_cpumask_backtrace() and other users of show_regs(). 3) Matches the behaviour of the ARM & PowerPC architectures. Marked for stable back to v4.9 as a prerequisite of the following patch "MIPS: Call dump_stack() from show_regs()". Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/19596/ Cc: James Hogan Cc: Ralf Baechle Cc: Huacai Chen Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.9+ --- arch/mips/kernel/process.c | 4 ++-- arch/mips/kernel/traps.c | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 8d85046adcc8..d4cfeb931382 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -663,8 +663,8 @@ static void arch_dump_stack(void *info) if (regs) show_regs(regs); - - dump_stack(); + else + dump_stack(); } void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index d67fa74622ee..8d505a21396e 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -351,6 +351,7 @@ static void __show_regs(const struct pt_regs *regs) void show_regs(struct pt_regs *regs) { __show_regs((struct pt_regs *)regs); + dump_stack(); } void show_registers(struct pt_regs *regs) From 363a3d3fb7d478d7dd49b8c6294436b8ba5984cc Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 11 Jun 2018 16:46:40 +0800 Subject: [PATCH 095/382] drm/amd/powerplay: correct vega12 thermal support as true Thermal support is enabled on vega12. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index 782e2098824d..6955c359b706 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -81,6 +81,7 @@ static void vega12_set_default_registry_data(struct pp_hwmgr *hwmgr) data->registry_data.disallowed_features = 0x0; data->registry_data.od_state_in_dc_support = 0; + data->registry_data.thermal_support = 1; data->registry_data.skip_baco_hardware = 0; data->registry_data.log_avfs_param = 0; From 36953aa94a8fcc884b970d63d3e4c650257dd03e Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 28 May 2018 08:59:16 +0800 Subject: [PATCH 096/382] drm/amd/powerplay: correct vega12 bootup values settings The vbios firmware structure changed between v3_1 and v3_2. So, the code to setup bootup values needs different paths based on header version. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/powerplay/hwmgr/ppatomfwctrl.c | 94 ++++++++++++++++--- .../drm/amd/powerplay/hwmgr/ppatomfwctrl.h | 3 + .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 3 + .../drm/amd/powerplay/hwmgr/vega12_hwmgr.h | 3 + 4 files changed, 91 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c index 5325661fedff..aa2faffef034 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c @@ -512,14 +512,82 @@ int pp_atomfwctrl_get_clk_information_by_clkid(struct pp_hwmgr *hwmgr, BIOS_CLKI return 0; } +static void pp_atomfwctrl_copy_vbios_bootup_values_3_2(struct pp_hwmgr *hwmgr, + struct pp_atomfwctrl_bios_boot_up_values *boot_values, + struct atom_firmware_info_v3_2 *fw_info) +{ + uint32_t frequency = 0; + + boot_values->ulRevision = fw_info->firmware_revision; + boot_values->ulGfxClk = fw_info->bootup_sclk_in10khz; + boot_values->ulUClk = fw_info->bootup_mclk_in10khz; + boot_values->usVddc = fw_info->bootup_vddc_mv; + boot_values->usVddci = fw_info->bootup_vddci_mv; + boot_values->usMvddc = fw_info->bootup_mvddc_mv; + boot_values->usVddGfx = fw_info->bootup_vddgfx_mv; + boot_values->ucCoolingID = fw_info->coolingsolution_id; + boot_values->ulSocClk = 0; + boot_values->ulDCEFClk = 0; + + if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_SOCCLK_ID, &frequency)) + boot_values->ulSocClk = frequency; + + if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_DCEFCLK_ID, &frequency)) + boot_values->ulDCEFClk = frequency; + + if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_ECLK_ID, &frequency)) + boot_values->ulEClk = frequency; + + if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_VCLK_ID, &frequency)) + boot_values->ulVClk = frequency; + + if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU11_SYSPLL0_DCLK_ID, &frequency)) + boot_values->ulDClk = frequency; +} + +static void pp_atomfwctrl_copy_vbios_bootup_values_3_1(struct pp_hwmgr *hwmgr, + struct pp_atomfwctrl_bios_boot_up_values *boot_values, + struct atom_firmware_info_v3_1 *fw_info) +{ + uint32_t frequency = 0; + + boot_values->ulRevision = fw_info->firmware_revision; + boot_values->ulGfxClk = fw_info->bootup_sclk_in10khz; + boot_values->ulUClk = fw_info->bootup_mclk_in10khz; + boot_values->usVddc = fw_info->bootup_vddc_mv; + boot_values->usVddci = fw_info->bootup_vddci_mv; + boot_values->usMvddc = fw_info->bootup_mvddc_mv; + boot_values->usVddGfx = fw_info->bootup_vddgfx_mv; + boot_values->ucCoolingID = fw_info->coolingsolution_id; + boot_values->ulSocClk = 0; + boot_values->ulDCEFClk = 0; + + if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_SOCCLK_ID, &frequency)) + boot_values->ulSocClk = frequency; + + if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_DCEFCLK_ID, &frequency)) + boot_values->ulDCEFClk = frequency; + + if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_ECLK_ID, &frequency)) + boot_values->ulEClk = frequency; + + if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_VCLK_ID, &frequency)) + boot_values->ulVClk = frequency; + + if (!pp_atomfwctrl_get_clk_information_by_clkid(hwmgr, SMU9_SYSPLL0_DCLK_ID, &frequency)) + boot_values->ulDClk = frequency; +} + int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr, struct pp_atomfwctrl_bios_boot_up_values *boot_values) { - struct atom_firmware_info_v3_1 *info = NULL; + struct atom_firmware_info_v3_2 *fwinfo_3_2; + struct atom_firmware_info_v3_1 *fwinfo_3_1; + struct atom_common_table_header *info = NULL; uint16_t ix; ix = GetIndexIntoMasterDataTable(firmwareinfo); - info = (struct atom_firmware_info_v3_1 *) + info = (struct atom_common_table_header *) smu_atom_get_data_table(hwmgr->adev, ix, NULL, NULL, NULL); @@ -528,16 +596,18 @@ int pp_atomfwctrl_get_vbios_bootup_values(struct pp_hwmgr *hwmgr, return -EINVAL; } - boot_values->ulRevision = info->firmware_revision; - boot_values->ulGfxClk = info->bootup_sclk_in10khz; - boot_values->ulUClk = info->bootup_mclk_in10khz; - boot_values->usVddc = info->bootup_vddc_mv; - boot_values->usVddci = info->bootup_vddci_mv; - boot_values->usMvddc = info->bootup_mvddc_mv; - boot_values->usVddGfx = info->bootup_vddgfx_mv; - boot_values->ucCoolingID = info->coolingsolution_id; - boot_values->ulSocClk = 0; - boot_values->ulDCEFClk = 0; + if ((info->format_revision == 3) && (info->content_revision == 2)) { + fwinfo_3_2 = (struct atom_firmware_info_v3_2 *)info; + pp_atomfwctrl_copy_vbios_bootup_values_3_2(hwmgr, + boot_values, fwinfo_3_2); + } else if ((info->format_revision == 3) && (info->content_revision == 1)) { + fwinfo_3_1 = (struct atom_firmware_info_v3_1 *)info; + pp_atomfwctrl_copy_vbios_bootup_values_3_1(hwmgr, + boot_values, fwinfo_3_1); + } else { + pr_info("Fw info table revision does not match!"); + return -EINVAL; + } return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h index fe10aa4db5e6..745bd3809549 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h @@ -136,6 +136,9 @@ struct pp_atomfwctrl_bios_boot_up_values { uint32_t ulUClk; uint32_t ulSocClk; uint32_t ulDCEFClk; + uint32_t ulEClk; + uint32_t ulVClk; + uint32_t ulDClk; uint16_t usVddc; uint16_t usVddci; uint16_t usMvddc; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c index 6955c359b706..c98e5de777cd 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c @@ -804,6 +804,9 @@ static int vega12_init_smc_table(struct pp_hwmgr *hwmgr) data->vbios_boot_state.soc_clock = boot_up_values.ulSocClk; data->vbios_boot_state.dcef_clock = boot_up_values.ulDCEFClk; data->vbios_boot_state.uc_cooling_id = boot_up_values.ucCoolingID; + data->vbios_boot_state.eclock = boot_up_values.ulEClk; + data->vbios_boot_state.dclock = boot_up_values.ulDClk; + data->vbios_boot_state.vclock = boot_up_values.ulVClk; smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetMinDeepSleepDcefclk, (uint32_t)(data->vbios_boot_state.dcef_clock / 100)); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h index e81ded1ec198..49b38df8c7f2 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.h @@ -167,6 +167,9 @@ struct vega12_vbios_boot_state { uint32_t mem_clock; uint32_t soc_clock; uint32_t dcef_clock; + uint32_t eclock; + uint32_t dclock; + uint32_t vclock; }; #define DPMTABLE_OD_UPDATE_SCLK 0x00000001 From 1513b1c93f825f74751897258e0bdde64fd3bb6a Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 11 Jun 2018 15:20:39 +0800 Subject: [PATCH 097/382] drm/amd/powerplay: smc_dpm_info structure change A new member Vr2_I2C_address is added. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/atomfirmware.h | 5 ++++- drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c | 2 ++ drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h | 2 ++ drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c | 2 ++ drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h | 5 ++++- 5 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 092d800b703a..33b4de4ad66e 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -1433,7 +1433,10 @@ struct atom_smc_dpm_info_v4_1 uint8_t acggfxclkspreadpercent; uint16_t acggfxclkspreadfreq; - uint32_t boardreserved[10]; + uint8_t Vr2_I2C_address; + uint8_t padding_vr2[3]; + + uint32_t boardreserved[9]; }; /* diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c index aa2faffef034..d27c1c9df286 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.c @@ -699,5 +699,7 @@ int pp_atomfwctrl_get_smc_dpm_information(struct pp_hwmgr *hwmgr, param->acggfxclkspreadpercent = info->acggfxclkspreadpercent; param->acggfxclkspreadfreq = info->acggfxclkspreadfreq; + param->Vr2_I2C_address = info->Vr2_I2C_address; + return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h index 745bd3809549..22e21668c93a 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomfwctrl.h @@ -210,6 +210,8 @@ struct pp_atomfwctrl_smc_dpm_parameters uint8_t acggfxclkspreadenabled; uint8_t acggfxclkspreadpercent; uint16_t acggfxclkspreadfreq; + + uint8_t Vr2_I2C_address; }; int pp_atomfwctrl_get_gpu_pll_dividers_vega10(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c index 888ddca902d8..29914700ee82 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_processpptables.c @@ -230,6 +230,8 @@ static int append_vbios_pptable(struct pp_hwmgr *hwmgr, PPTable_t *ppsmc_pptable ppsmc_pptable->AcgThresholdFreqLow = 0xFFFF; } + ppsmc_pptable->Vr2_I2C_address = smc_dpm_table.Vr2_I2C_address; + return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h index 2f8a3b983cce..b08526fd1619 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h +++ b/drivers/gpu/drm/amd/powerplay/inc/vega12/smu9_driver_if.h @@ -499,7 +499,10 @@ typedef struct { uint8_t AcgGfxclkSpreadPercent; uint16_t AcgGfxclkSpreadFreq; - uint32_t BoardReserved[10]; + uint8_t Vr2_I2C_address; + uint8_t padding_vr2[3]; + + uint32_t BoardReserved[9]; uint32_t MmHubPadding[7]; From bda3153998f3eb2cafa4a6311971143628eacdbc Mon Sep 17 00:00:00 2001 From: BingJing Chang Date: Thu, 28 Jun 2018 18:40:11 +0800 Subject: [PATCH 098/382] md/raid10: fix that replacement cannot complete recovery after reassemble During assemble, the spare marked for replacement is not checked. conf->fullsync cannot be updated to be 1. As a result, recovery will treat it as a clean array. All recovering sectors are skipped. Original device is replaced with the not-recovered spare. mdadm -C /dev/md0 -l10 -n4 -pn2 /dev/loop[0123] mdadm /dev/md0 -a /dev/loop4 mdadm /dev/md0 --replace /dev/loop0 mdadm -S /dev/md0 # stop array during recovery mdadm -A /dev/md0 /dev/loop[01234] After reassemble, you can see recovery go on, but it completes immediately. In fact, recovery is not actually processed. To solve this problem, we just add the missing logics for replacment spares. (In raid1.c or raid5.c, they have already been checked.) Reported-by: Alex Chen Reviewed-by: Alex Wu Reviewed-by: Chung-Chiang Cheng Signed-off-by: BingJing Chang Signed-off-by: Shaohua Li --- drivers/md/raid10.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 478cf446827f..35bd3a62451b 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3893,6 +3893,13 @@ static int raid10_run(struct mddev *mddev) disk->rdev->saved_raid_disk < 0) conf->fullsync = 1; } + + if (disk->replacement && + !test_bit(In_sync, &disk->replacement->flags) && + disk->replacement->saved_raid_disk < 0) { + conf->fullsync = 1; + } + disk->recovery_disabled = mddev->recovery_disabled - 1; } From 2cd5fe22d9a45cdf11c62bbe8db3ce9101207510 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 25 Jun 2018 21:09:04 -0400 Subject: [PATCH 099/382] drm/amdgpu: Make struct amdgpu_atif private to amdgpu_acpi.c Currently, there is nothing in amdgpu that actually uses these structs other than amdgpu_acpi.c. Additionally, since we're about to start saving the correct ACPI handle to use for calling ATIF in this struct this saves us from having to handle making sure that the acpi_handle (and by proxy, the type definition for acpi_handle and all of the other acpi headers) doesn't need to be included within the amdgpu_drv struct itself. This follows the example set by amdgpu_atpx_handler.c. Signed-off-by: Lyude Paul Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 40 +----------------- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 54 ++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a59c07590cee..7df5d3d11aff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -190,6 +190,7 @@ struct amdgpu_job; struct amdgpu_irq_src; struct amdgpu_fpriv; struct amdgpu_bo_va_mapping; +struct amdgpu_atif; enum amdgpu_cp_irq { AMDGPU_CP_IRQ_GFX_EOP = 0, @@ -1269,43 +1270,6 @@ struct amdgpu_vram_scratch { /* * ACPI */ -struct amdgpu_atif_notification_cfg { - bool enabled; - int command_code; -}; - -struct amdgpu_atif_notifications { - bool display_switch; - bool expansion_mode_change; - bool thermal_state; - bool forced_power_state; - bool system_power_state; - bool display_conf_change; - bool px_gfx_switch; - bool brightness_change; - bool dgpu_display_event; -}; - -struct amdgpu_atif_functions { - bool system_params; - bool sbios_requests; - bool select_active_disp; - bool lid_state; - bool get_tv_standard; - bool set_tv_standard; - bool get_panel_expansion_mode; - bool set_panel_expansion_mode; - bool temperature_change; - bool graphics_device_types; -}; - -struct amdgpu_atif { - struct amdgpu_atif_notifications notifications; - struct amdgpu_atif_functions functions; - struct amdgpu_atif_notification_cfg notification_cfg; - struct amdgpu_encoder *encoder_for_bl; -}; - struct amdgpu_atcs_functions { bool get_ext_state; bool pcie_perf_req; @@ -1466,7 +1430,7 @@ struct amdgpu_device { #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_regs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; #endif - struct amdgpu_atif atif; + struct amdgpu_atif *atif; struct amdgpu_atcs atcs; struct mutex srbm_mutex; /* GRBM index mutex. Protects concurrent access to GRBM index */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 8fa850a070e0..22c7e8ec0b9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -34,6 +34,43 @@ #include "amd_acpi.h" #include "atom.h" +struct amdgpu_atif_notification_cfg { + bool enabled; + int command_code; +}; + +struct amdgpu_atif_notifications { + bool display_switch; + bool expansion_mode_change; + bool thermal_state; + bool forced_power_state; + bool system_power_state; + bool display_conf_change; + bool px_gfx_switch; + bool brightness_change; + bool dgpu_display_event; +}; + +struct amdgpu_atif_functions { + bool system_params; + bool sbios_requests; + bool select_active_disp; + bool lid_state; + bool get_tv_standard; + bool set_tv_standard; + bool get_panel_expansion_mode; + bool set_panel_expansion_mode; + bool temperature_change; + bool graphics_device_types; +}; + +struct amdgpu_atif { + struct amdgpu_atif_notifications notifications; + struct amdgpu_atif_functions functions; + struct amdgpu_atif_notification_cfg notification_cfg; + struct amdgpu_encoder *encoder_for_bl; +}; + /* Call the ATIF method */ /** @@ -292,7 +329,7 @@ static int amdgpu_atif_get_sbios_requests(acpi_handle handle, static int amdgpu_atif_handler(struct amdgpu_device *adev, struct acpi_bus_event *event) { - struct amdgpu_atif *atif = &adev->atif; + struct amdgpu_atif *atif = adev->atif; struct atif_sbios_requests req; acpi_handle handle; int count; @@ -303,7 +340,8 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, if (strcmp(event->device_class, ACPI_VIDEO_CLASS) != 0) return NOTIFY_DONE; - if (!atif->notification_cfg.enabled || + if (!atif || + !atif->notification_cfg.enabled || event->type != atif->notification_cfg.command_code) /* Not our event */ return NOTIFY_DONE; @@ -642,7 +680,7 @@ static int amdgpu_acpi_event(struct notifier_block *nb, int amdgpu_acpi_init(struct amdgpu_device *adev) { acpi_handle handle; - struct amdgpu_atif *atif = &adev->atif; + struct amdgpu_atif *atif; struct amdgpu_atcs *atcs = &adev->atcs; int ret; @@ -659,11 +697,19 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) } /* Call the ATIF method */ + atif = kzalloc(sizeof(*atif), GFP_KERNEL); + if (!atif) { + DRM_WARN("Not enough memory to initialize ATIF\n"); + goto out; + } + ret = amdgpu_atif_verify_interface(handle, atif); if (ret) { DRM_DEBUG_DRIVER("Call to ATIF verify_interface failed: %d\n", ret); + kfree(atif); goto out; } + adev->atif = atif; if (atif->notifications.brightness_change) { struct drm_encoder *tmp; @@ -720,4 +766,6 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) void amdgpu_acpi_fini(struct amdgpu_device *adev) { unregister_acpi_notifier(&adev->acpi_nb); + if (adev->atif) + kfree(adev->atif); } From 4aa5d5eb82bb237d0bb3a38b2a7555054d018081 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 25 Jun 2018 21:09:06 -0400 Subject: [PATCH 100/382] drm/amdgpu: Add amdgpu_atpx_get_dhandle() Since it seems that some vendors are storing the ATIF ACPI methods under the same handle that ATPX lives under instead of the device's own handle, we're going to need to be able to retrieve this handle later so we can probe for ATIF there. Signed-off-by: Lyude Paul Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 7df5d3d11aff..7dcbac8af9a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1858,6 +1858,12 @@ static inline bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return false; static inline bool amdgpu_has_atpx(void) { return false; } #endif +#if defined(CONFIG_VGA_SWITCHEROO) && defined(CONFIG_ACPI) +void *amdgpu_atpx_get_dhandle(void); +#else +static inline void *amdgpu_atpx_get_dhandle(void) { return NULL; } +#endif + /* * KMS */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index daa06e7c5bb7..9ab89371d9e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -90,6 +90,12 @@ bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return amdgpu_atpx_priv.atpx.dgpu_req_power_for_displays; } +#if defined(CONFIG_ACPI) +void *amdgpu_atpx_get_dhandle(void) { + return amdgpu_atpx_priv.dhandle; +} +#endif + /** * amdgpu_atpx_call - call an ATPX method * From f9ff68521a5541e1fdaeb0ef11871c035b30e409 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Mon, 25 Jun 2018 21:09:07 -0400 Subject: [PATCH 101/382] drm/amdgpu: Dynamically probe for ATIF handle (v2) The other day I was testing one of the HP laptops at my office with an i915/amdgpu hybrid setup and noticed that hotplugging was non-functional on almost all of the display outputs. I eventually discovered that all of the external outputs were connected to the amdgpu device instead of i915, and that the hotplugs weren't being detected so long as the GPU was in runtime suspend. After some talking with folks at AMD, I learned that amdgpu is actually supposed to support hotplug detection in runtime suspend so long as the OEM has implemented it properly in the firmware. On this HP ZBook 15 G4 (the machine in question), amdgpu wasn't managing to find the ATIF handle at all despite the fact that I could see acpi events being sent in response to any hotplugging. After going through dumps of the firmware, I discovered that this machine did in fact support ATIF, but that it's ATIF method lived in an entirely different namespace than this device's handle (the device handle was \_SB_.PCI0.PEG0.PEGP, but ATIF lives in ATPX's handle at \_SB_.PCI0.GFX0). So, fix this by probing ATPX's ACPI parent's namespace if we can't find ATIF elsewhere, along with storing a pointer to the proper handle to use for ATIF and using that instead of the device's handle. This fixes HPD detection while in runtime suspend for this ZBook! v2: Update the comment to reflect how the namespaces are arranged based on the system configuration. (Alex) Signed-off-by: Lyude Paul Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 79 +++++++++++++++++------- 1 file changed, 58 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 22c7e8ec0b9a..0d8c3fc6eace 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -65,6 +65,8 @@ struct amdgpu_atif_functions { }; struct amdgpu_atif { + acpi_handle handle; + struct amdgpu_atif_notifications notifications; struct amdgpu_atif_functions functions; struct amdgpu_atif_notification_cfg notification_cfg; @@ -83,8 +85,9 @@ struct amdgpu_atif { * Executes the requested ATIF function (all asics). * Returns a pointer to the acpi output buffer. */ -static union acpi_object *amdgpu_atif_call(acpi_handle handle, int function, - struct acpi_buffer *params) +static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif, + int function, + struct acpi_buffer *params) { acpi_status status; union acpi_object atif_arg_elements[2]; @@ -107,7 +110,8 @@ static union acpi_object *amdgpu_atif_call(acpi_handle handle, int function, atif_arg_elements[1].integer.value = 0; } - status = acpi_evaluate_object(handle, "ATIF", &atif_arg, &buffer); + status = acpi_evaluate_object(atif->handle, NULL, &atif_arg, + &buffer); /* Fail only if calling the method fails and ATIF is supported */ if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { @@ -178,15 +182,14 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas * (all asics). * returns 0 on success, error on failure. */ -static int amdgpu_atif_verify_interface(acpi_handle handle, - struct amdgpu_atif *atif) +static int amdgpu_atif_verify_interface(struct amdgpu_atif *atif) { union acpi_object *info; struct atif_verify_interface output; size_t size; int err = 0; - info = amdgpu_atif_call(handle, ATIF_FUNCTION_VERIFY_INTERFACE, NULL); + info = amdgpu_atif_call(atif, ATIF_FUNCTION_VERIFY_INTERFACE, NULL); if (!info) return -EIO; @@ -213,6 +216,35 @@ static int amdgpu_atif_verify_interface(acpi_handle handle, return err; } +static acpi_handle amdgpu_atif_probe_handle(acpi_handle dhandle) +{ + acpi_handle handle = NULL; + char acpi_method_name[255] = { 0 }; + struct acpi_buffer buffer = { sizeof(acpi_method_name), acpi_method_name }; + acpi_status status; + + /* For PX/HG systems, ATIF and ATPX are in the iGPU's namespace, on dGPU only + * systems, ATIF is in the dGPU's namespace. + */ + status = acpi_get_handle(dhandle, "ATIF", &handle); + if (ACPI_SUCCESS(status)) + goto out; + + if (amdgpu_has_atpx()) { + status = acpi_get_handle(amdgpu_atpx_get_dhandle(), "ATIF", + &handle); + if (ACPI_SUCCESS(status)) + goto out; + } + + DRM_DEBUG_DRIVER("No ATIF handle found\n"); + return NULL; +out: + acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer); + DRM_DEBUG_DRIVER("Found ATIF handle %s\n", acpi_method_name); + return handle; +} + /** * amdgpu_atif_get_notification_params - determine notify configuration * @@ -225,15 +257,16 @@ static int amdgpu_atif_verify_interface(acpi_handle handle, * where n is specified in the result if a notifier is used. * Returns 0 on success, error on failure. */ -static int amdgpu_atif_get_notification_params(acpi_handle handle, - struct amdgpu_atif_notification_cfg *n) +static int amdgpu_atif_get_notification_params(struct amdgpu_atif *atif) { union acpi_object *info; + struct amdgpu_atif_notification_cfg *n = &atif->notification_cfg; struct atif_system_params params; size_t size; int err = 0; - info = amdgpu_atif_call(handle, ATIF_FUNCTION_GET_SYSTEM_PARAMETERS, NULL); + info = amdgpu_atif_call(atif, ATIF_FUNCTION_GET_SYSTEM_PARAMETERS, + NULL); if (!info) { err = -EIO; goto out; @@ -287,14 +320,15 @@ static int amdgpu_atif_get_notification_params(acpi_handle handle, * (all asics). * Returns 0 on success, error on failure. */ -static int amdgpu_atif_get_sbios_requests(acpi_handle handle, - struct atif_sbios_requests *req) +static int amdgpu_atif_get_sbios_requests(struct amdgpu_atif *atif, + struct atif_sbios_requests *req) { union acpi_object *info; size_t size; int count = 0; - info = amdgpu_atif_call(handle, ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS, NULL); + info = amdgpu_atif_call(atif, ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS, + NULL); if (!info) return -EIO; @@ -327,11 +361,10 @@ static int amdgpu_atif_get_sbios_requests(acpi_handle handle, * Returns NOTIFY code */ static int amdgpu_atif_handler(struct amdgpu_device *adev, - struct acpi_bus_event *event) + struct acpi_bus_event *event) { struct amdgpu_atif *atif = adev->atif; struct atif_sbios_requests req; - acpi_handle handle; int count; DRM_DEBUG_DRIVER("event, device_class = %s, type = %#x\n", @@ -347,8 +380,7 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, return NOTIFY_DONE; /* Check pending SBIOS requests */ - handle = ACPI_HANDLE(&adev->pdev->dev); - count = amdgpu_atif_get_sbios_requests(handle, &req); + count = amdgpu_atif_get_sbios_requests(atif, &req); if (count <= 0) return NOTIFY_DONE; @@ -679,7 +711,7 @@ static int amdgpu_acpi_event(struct notifier_block *nb, */ int amdgpu_acpi_init(struct amdgpu_device *adev) { - acpi_handle handle; + acpi_handle handle, atif_handle; struct amdgpu_atif *atif; struct amdgpu_atcs *atcs = &adev->atcs; int ret; @@ -696,14 +728,20 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) DRM_DEBUG_DRIVER("Call to ATCS verify_interface failed: %d\n", ret); } - /* Call the ATIF method */ + /* Probe for ATIF, and initialize it if found */ + atif_handle = amdgpu_atif_probe_handle(handle); + if (!atif_handle) + goto out; + atif = kzalloc(sizeof(*atif), GFP_KERNEL); if (!atif) { DRM_WARN("Not enough memory to initialize ATIF\n"); goto out; } + atif->handle = atif_handle; - ret = amdgpu_atif_verify_interface(handle, atif); + /* Call the ATIF method */ + ret = amdgpu_atif_verify_interface(atif); if (ret) { DRM_DEBUG_DRIVER("Call to ATIF verify_interface failed: %d\n", ret); kfree(atif); @@ -739,8 +777,7 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) } if (atif->functions.system_params) { - ret = amdgpu_atif_get_notification_params(handle, - &atif->notification_cfg); + ret = amdgpu_atif_get_notification_params(atif); if (ret) { DRM_DEBUG_DRIVER("Call to GET_SYSTEM_PARAMS failed: %d\n", ret); From b63e132b6433a41cf311e8bc382d33fd2b73b505 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 22 Jun 2018 10:55:46 -0700 Subject: [PATCH 102/382] MIPS: Use async IPIs for arch_trigger_cpumask_backtrace() The current MIPS implementation of arch_trigger_cpumask_backtrace() is broken because it attempts to use synchronous IPIs despite the fact that it may be run with interrupts disabled. This means that when arch_trigger_cpumask_backtrace() is invoked, for example by the RCU CPU stall watchdog, we may: - Deadlock due to use of synchronous IPIs with interrupts disabled, causing the CPU that's attempting to generate the backtrace output to hang itself. - Not succeed in generating the desired output from remote CPUs. - Produce warnings about this from smp_call_function_many(), for example: [42760.526910] INFO: rcu_sched detected stalls on CPUs/tasks: [42760.535755] 0-...!: (1 GPs behind) idle=ade/140000000000000/0 softirq=526944/526945 fqs=0 [42760.547874] 1-...!: (0 ticks this GP) idle=e4a/140000000000000/0 softirq=547885/547885 fqs=0 [42760.559869] (detected by 2, t=2162 jiffies, g=266689, c=266688, q=33) [42760.568927] ------------[ cut here ]------------ [42760.576146] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:416 smp_call_function_many+0x88/0x20c [42760.587839] Modules linked in: [42760.593152] CPU: 2 PID: 1216 Comm: sh Not tainted 4.15.4-00373-gee058bb4d0c2 #2 [42760.603767] Stack : 8e09bd20 8e09bd20 8e09bd20 fffffff0 00000007 00000006 00000000 8e09bca8 [42760.616937] 95b2b379 95b2b379 807a0080 00000007 81944518 0000018a 00000032 00000000 [42760.630095] 00000000 00000030 80000000 00000000 806eca74 00000009 8017e2b8 000001a0 [42760.643169] 00000000 00000002 00000000 8e09baa4 00000008 808b8008 86d69080 8e09bca0 [42760.656282] 8e09ad50 805e20aa 00000000 00000000 00000000 8017e2b8 00000009 801070ca [42760.669424] ... [42760.673919] Call Trace: [42760.678672] [<27fde568>] show_stack+0x70/0xf0 [42760.685417] [<84751641>] dump_stack+0xaa/0xd0 [42760.692188] [<699d671c>] __warn+0x80/0x92 [42760.698549] [<68915d41>] warn_slowpath_null+0x28/0x36 [42760.705912] [] smp_call_function_many+0x88/0x20c [42760.713696] [<6bbdfc2a>] arch_trigger_cpumask_backtrace+0x30/0x4a [42760.722216] [] rcu_dump_cpu_stacks+0x6a/0x98 [42760.729580] [<796e7629>] rcu_check_callbacks+0x672/0x6ac [42760.737476] [<059b3b43>] update_process_times+0x18/0x34 [42760.744981] [<6eb94941>] tick_sched_handle.isra.5+0x26/0x38 [42760.752793] [<478d3d70>] tick_sched_timer+0x1c/0x50 [42760.759882] [] __hrtimer_run_queues+0xc6/0x226 [42760.767418] [] hrtimer_interrupt+0x88/0x19a [42760.775031] [<6765a19e>] gic_compare_interrupt+0x2e/0x3a [42760.782761] [<0558bf5f>] handle_percpu_devid_irq+0x78/0x168 [42760.790795] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.798117] [<1b6d462c>] gic_handle_local_int+0x38/0x86 [42760.805545] [] gic_irq_dispatch+0xa/0x14 [42760.812534] [<90c11ba2>] generic_handle_irq+0x1e/0x2c [42760.820086] [] do_IRQ+0x16/0x20 [42760.826274] [<9aef3ce6>] plat_irq_dispatch+0x62/0x94 [42760.833458] [<6a94b53c>] except_vec_vi_end+0x70/0x78 [42760.840655] [<22284043>] smp_call_function_many+0x1ba/0x20c [42760.848501] [<54022b58>] smp_call_function+0x1e/0x2c [42760.855693] [] flush_tlb_mm+0x2a/0x98 [42760.862730] [<0844cdd0>] tlb_flush_mmu+0x1c/0x44 [42760.869628] [] arch_tlb_finish_mmu+0x26/0x3e [42760.877021] [<1aeaaf74>] tlb_finish_mmu+0x18/0x66 [42760.883907] [] exit_mmap+0x76/0xea [42760.890428] [] mmput+0x80/0x11a [42760.896632] [] do_exit+0x1f4/0x80c [42760.903158] [] do_group_exit+0x20/0x7e [42760.909990] [<13fa8d54>] __wake_up_parent+0x0/0x1e [42760.917045] [<46cf89d0>] smp_call_function_many+0x1a2/0x20c [42760.924893] [<8c21a93b>] syscall_common+0x14/0x1c [42760.931765] ---[ end trace 02aa09da9dc52a60 ]--- [42760.938342] ------------[ cut here ]------------ [42760.945311] WARNING: CPU: 2 PID: 1216 at kernel/smp.c:291 smp_call_function_single+0xee/0xf8 ... This patch switches MIPS' arch_trigger_cpumask_backtrace() to use async IPIs & smp_call_function_single_async() in order to resolve this problem. We ensure use of the pre-allocated call_single_data_t structures is serialized by maintaining a cpumask indicating that they're busy, and refusing to attempt to send an IPI when a CPU's bit is set in this mask. This should only happen if a CPU hasn't responded to a previous backtrace IPI - ie. if it's hung - and we print a warning to the console in this case. I've marked this for stable branches as far back as v4.9, to which it applies cleanly. Strictly speaking the faulty MIPS implementation can be traced further back to commit 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") in v3.19, but kernel versions v3.19 through v4.8 will require further work to backport due to the rework performed in commit 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods"). Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/19597/ Cc: James Hogan Cc: Ralf Baechle Cc: Huacai Chen Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.9+ Fixes: 856839b76836 ("MIPS: Add arch_trigger_all_cpu_backtrace() function") Fixes: 9a01c3ed5cdb ("nmi_backtrace: add more trigger_*_cpu_backtrace() methods") --- arch/mips/kernel/process.c | 45 +++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index d4cfeb931382..9670e70139fd 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -655,28 +656,42 @@ unsigned long arch_align_stack(unsigned long sp) return sp & ALMASK; } -static void arch_dump_stack(void *info) +static DEFINE_PER_CPU(call_single_data_t, backtrace_csd); +static struct cpumask backtrace_csd_busy; + +static void handle_backtrace(void *info) { - struct pt_regs *regs; + nmi_cpu_backtrace(get_irq_regs()); + cpumask_clear_cpu(smp_processor_id(), &backtrace_csd_busy); +} - regs = get_irq_regs(); +static void raise_backtrace(cpumask_t *mask) +{ + call_single_data_t *csd; + int cpu; - if (regs) - show_regs(regs); - else - dump_stack(); + for_each_cpu(cpu, mask) { + /* + * If we previously sent an IPI to the target CPU & it hasn't + * cleared its bit in the busy cpumask then it didn't handle + * our previous IPI & it's not safe for us to reuse the + * call_single_data_t. + */ + if (cpumask_test_and_set_cpu(cpu, &backtrace_csd_busy)) { + pr_warn("Unable to send backtrace IPI to CPU%u - perhaps it hung?\n", + cpu); + continue; + } + + csd = &per_cpu(backtrace_csd, cpu); + csd->func = handle_backtrace; + smp_call_function_single_async(cpu, csd); + } } void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) { - long this_cpu = get_cpu(); - - if (cpumask_test_cpu(this_cpu, mask) && !exclude_self) - dump_stack(); - - smp_call_function_many(mask, arch_dump_stack, NULL, 1); - - put_cpu(); + nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace); } int mips_get_process_fp_mode(struct task_struct *task) From 3203c9010060806ff88c9989aeab4dc8d9a474dc Mon Sep 17 00:00:00 2001 From: Kleber Sacilotto de Souza Date: Wed, 27 Jun 2018 17:19:21 +0200 Subject: [PATCH 103/382] test_bpf: flag tests that cannot be jited on s390 Flag with FLAG_EXPECTED_FAIL the BPF_MAXINSNS tests that cannot be jited on s390 because they exceed BPF_SIZE_MAX and fail when CONFIG_BPF_JIT_ALWAYS_ON is set. Also set .expected_errcode to -ENOTSUPP so the tests pass in that case. Signed-off-by: Kleber Sacilotto de Souza Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- lib/test_bpf.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 60aedc879361..08d3d59dca17 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5282,21 +5282,31 @@ static struct bpf_test tests[] = { { /* Mainly checking JIT here. */ "BPF_MAXINSNS: Ctx heavy transformations", { }, +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) + CLASSIC | FLAG_EXPECTED_FAIL, +#else CLASSIC, +#endif { }, { { 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }, { 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) } }, .fill_helper = bpf_fill_maxinsns6, + .expected_errcode = -ENOTSUPP, }, { /* Mainly checking JIT here. */ "BPF_MAXINSNS: Call heavy transformations", { }, +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, +#else CLASSIC | FLAG_NO_DATA, +#endif { }, { { 1, 0 }, { 10, 0 } }, .fill_helper = bpf_fill_maxinsns7, + .expected_errcode = -ENOTSUPP, }, { /* Mainly checking JIT here. */ "BPF_MAXINSNS: Jump heavy test", @@ -5347,18 +5357,28 @@ static struct bpf_test tests[] = { { "BPF_MAXINSNS: exec all MSH", { }, +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) + CLASSIC | FLAG_EXPECTED_FAIL, +#else CLASSIC, +#endif { 0xfa, 0xfb, 0xfc, 0xfd, }, { { 4, 0xababab83 } }, .fill_helper = bpf_fill_maxinsns13, + .expected_errcode = -ENOTSUPP, }, { "BPF_MAXINSNS: ld_abs+get_processor_id", { }, +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) + CLASSIC | FLAG_EXPECTED_FAIL, +#else CLASSIC, +#endif { }, { { 1, 0xbee } }, .fill_helper = bpf_fill_ld_abs_get_processor_id, + .expected_errcode = -ENOTSUPP, }, /* * LD_IND / LD_ABS on fragmented SKBs From 4c79579b44b1876444f4d04de31c1a37098a0350 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 26 Jun 2018 16:21:18 -0700 Subject: [PATCH 104/382] bpf: Change bpf_fib_lookup to return lookup status For ACLs implemented using either FIB rules or FIB entries, the BPF program needs the FIB lookup status to be able to drop the packet. Since the bpf_fib_lookup API has not reached a released kernel yet, change the return code to contain an encoding of the FIB lookup result and return the nexthop device index in the params struct. In addition, inform the BPF program of any post FIB lookup reason as to why the packet needs to go up the stack. The fib result for unicast routes must have an egress device, so remove the check that it is non-NULL. Signed-off-by: David Ahern Signed-off-by: Daniel Borkmann --- include/uapi/linux/bpf.h | 28 ++++++++++--- net/core/filter.c | 86 ++++++++++++++++++++++++-------------- samples/bpf/xdp_fwd_kern.c | 8 ++-- 3 files changed, 81 insertions(+), 41 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 59b19b6a40d7..b7db3261c62d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1857,7 +1857,8 @@ union bpf_attr { * is resolved), the nexthop address is returned in ipv4_dst * or ipv6_dst based on family, smac is set to mac address of * egress device, dmac is set to nexthop mac address, rt_metric - * is set to metric from route (IPv4/IPv6 only). + * is set to metric from route (IPv4/IPv6 only), and ifindex + * is set to the device index of the nexthop from the FIB lookup. * * *plen* argument is the size of the passed in struct. * *flags* argument can be a combination of one or more of the @@ -1873,9 +1874,10 @@ union bpf_attr { * *ctx* is either **struct xdp_md** for XDP programs or * **struct sk_buff** tc cls_act programs. * Return - * Egress device index on success, 0 if packet needs to continue - * up the stack for further processing or a negative error in case - * of failure. + * * < 0 if any input argument is invalid + * * 0 on success (packet is forwarded, nexthop neighbor exists) + * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the + * * packet is not forwarded or needs assist from full stack * * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) * Description @@ -2612,6 +2614,18 @@ struct bpf_raw_tracepoint_args { #define BPF_FIB_LOOKUP_DIRECT BIT(0) #define BPF_FIB_LOOKUP_OUTPUT BIT(1) +enum { + BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ + BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */ + BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */ + BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */ + BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */ + BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */ + BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ + BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ + BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ +}; + struct bpf_fib_lookup { /* input: network family for lookup (AF_INET, AF_INET6) * output: network family of egress nexthop @@ -2625,7 +2639,11 @@ struct bpf_fib_lookup { /* total length of packet from network header - used for MTU check */ __u16 tot_len; - __u32 ifindex; /* L3 device index for lookup */ + + /* input: L3 device index for lookup + * output: device index from FIB lookup + */ + __u32 ifindex; union { /* inputs to lookup */ diff --git a/net/core/filter.c b/net/core/filter.c index e7f12e9f598c..0ca6907d7efe 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4073,8 +4073,9 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, memcpy(params->smac, dev->dev_addr, ETH_ALEN); params->h_vlan_TCI = 0; params->h_vlan_proto = 0; + params->ifindex = dev->ifindex; - return dev->ifindex; + return 0; } #endif @@ -4098,7 +4099,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, /* verify forwarding is enabled on this interface */ in_dev = __in_dev_get_rcu(dev); if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev))) - return 0; + return BPF_FIB_LKUP_RET_FWD_DISABLED; if (flags & BPF_FIB_LOOKUP_OUTPUT) { fl4.flowi4_iif = 1; @@ -4123,7 +4124,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, tb = fib_get_table(net, tbid); if (unlikely(!tb)) - return 0; + return BPF_FIB_LKUP_RET_NOT_FWDED; err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF); } else { @@ -4135,8 +4136,20 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF); } - if (err || res.type != RTN_UNICAST) - return 0; + if (err) { + /* map fib lookup errors to RTN_ type */ + if (err == -EINVAL) + return BPF_FIB_LKUP_RET_BLACKHOLE; + if (err == -EHOSTUNREACH) + return BPF_FIB_LKUP_RET_UNREACHABLE; + if (err == -EACCES) + return BPF_FIB_LKUP_RET_PROHIBIT; + + return BPF_FIB_LKUP_RET_NOT_FWDED; + } + + if (res.type != RTN_UNICAST) + return BPF_FIB_LKUP_RET_NOT_FWDED; if (res.fi->fib_nhs > 1) fib_select_path(net, &res, &fl4, NULL); @@ -4144,19 +4157,16 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (check_mtu) { mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst); if (params->tot_len > mtu) - return 0; + return BPF_FIB_LKUP_RET_FRAG_NEEDED; } nh = &res.fi->fib_nh[res.nh_sel]; /* do not handle lwt encaps right now */ if (nh->nh_lwtstate) - return 0; + return BPF_FIB_LKUP_RET_UNSUPP_LWT; dev = nh->nh_dev; - if (unlikely(!dev)) - return 0; - if (nh->nh_gw) params->ipv4_dst = nh->nh_gw; @@ -4166,10 +4176,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, * rcu_read_lock_bh is not needed here */ neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst); - if (neigh) - return bpf_fib_set_fwd_params(params, neigh, dev); + if (!neigh) + return BPF_FIB_LKUP_RET_NO_NEIGH; - return 0; + return bpf_fib_set_fwd_params(params, neigh, dev); } #endif @@ -4190,7 +4200,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, /* link local addresses are never forwarded */ if (rt6_need_strict(dst) || rt6_need_strict(src)) - return 0; + return BPF_FIB_LKUP_RET_NOT_FWDED; dev = dev_get_by_index_rcu(net, params->ifindex); if (unlikely(!dev)) @@ -4198,7 +4208,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, idev = __in6_dev_get_safely(dev); if (unlikely(!idev || !net->ipv6.devconf_all->forwarding)) - return 0; + return BPF_FIB_LKUP_RET_FWD_DISABLED; if (flags & BPF_FIB_LOOKUP_OUTPUT) { fl6.flowi6_iif = 1; @@ -4225,7 +4235,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, tb = ipv6_stub->fib6_get_table(net, tbid); if (unlikely(!tb)) - return 0; + return BPF_FIB_LKUP_RET_NOT_FWDED; f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict); } else { @@ -4238,11 +4248,23 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, } if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry)) - return 0; + return BPF_FIB_LKUP_RET_NOT_FWDED; - if (unlikely(f6i->fib6_flags & RTF_REJECT || - f6i->fib6_type != RTN_UNICAST)) - return 0; + if (unlikely(f6i->fib6_flags & RTF_REJECT)) { + switch (f6i->fib6_type) { + case RTN_BLACKHOLE: + return BPF_FIB_LKUP_RET_BLACKHOLE; + case RTN_UNREACHABLE: + return BPF_FIB_LKUP_RET_UNREACHABLE; + case RTN_PROHIBIT: + return BPF_FIB_LKUP_RET_PROHIBIT; + default: + return BPF_FIB_LKUP_RET_NOT_FWDED; + } + } + + if (f6i->fib6_type != RTN_UNICAST) + return BPF_FIB_LKUP_RET_NOT_FWDED; if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0) f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6, @@ -4252,11 +4274,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (check_mtu) { mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src); if (params->tot_len > mtu) - return 0; + return BPF_FIB_LKUP_RET_FRAG_NEEDED; } if (f6i->fib6_nh.nh_lwtstate) - return 0; + return BPF_FIB_LKUP_RET_UNSUPP_LWT; if (f6i->fib6_flags & RTF_GATEWAY) *dst = f6i->fib6_nh.nh_gw; @@ -4270,10 +4292,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, */ neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128, ndisc_hashfn, dst, dev); - if (neigh) - return bpf_fib_set_fwd_params(params, neigh, dev); + if (!neigh) + return BPF_FIB_LKUP_RET_NO_NEIGH; - return 0; + return bpf_fib_set_fwd_params(params, neigh, dev); } #endif @@ -4315,7 +4337,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, struct bpf_fib_lookup *, params, int, plen, u32, flags) { struct net *net = dev_net(skb->dev); - int index = -EAFNOSUPPORT; + int rc = -EAFNOSUPPORT; if (plen < sizeof(*params)) return -EINVAL; @@ -4326,25 +4348,25 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, switch (params->family) { #if IS_ENABLED(CONFIG_INET) case AF_INET: - index = bpf_ipv4_fib_lookup(net, params, flags, false); + rc = bpf_ipv4_fib_lookup(net, params, flags, false); break; #endif #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - index = bpf_ipv6_fib_lookup(net, params, flags, false); + rc = bpf_ipv6_fib_lookup(net, params, flags, false); break; #endif } - if (index > 0) { + if (!rc) { struct net_device *dev; - dev = dev_get_by_index_rcu(net, index); + dev = dev_get_by_index_rcu(net, params->ifindex); if (!is_skb_forwardable(dev, skb)) - index = 0; + rc = BPF_FIB_LKUP_RET_FRAG_NEEDED; } - return index; + return rc; } static const struct bpf_func_proto bpf_skb_fib_lookup_proto = { diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c index 6673cdb9f55c..a7e94e7ff87d 100644 --- a/samples/bpf/xdp_fwd_kern.c +++ b/samples/bpf/xdp_fwd_kern.c @@ -48,9 +48,9 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags) struct ethhdr *eth = data; struct ipv6hdr *ip6h; struct iphdr *iph; - int out_index; u16 h_proto; u64 nh_off; + int rc; nh_off = sizeof(*eth); if (data + nh_off > data_end) @@ -101,7 +101,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags) fib_params.ifindex = ctx->ingress_ifindex; - out_index = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags); + rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags); /* verify egress index has xdp support * TO-DO bpf_map_lookup_elem(&tx_port, &key) fails with @@ -109,7 +109,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags) * NOTE: without verification that egress index supports XDP * forwarding packets are dropped. */ - if (out_index > 0) { + if (rc == 0) { if (h_proto == htons(ETH_P_IP)) ip_decrease_ttl(iph); else if (h_proto == htons(ETH_P_IPV6)) @@ -117,7 +117,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags) memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN); memcpy(eth->h_source, fib_params.smac, ETH_ALEN); - return bpf_redirect_map(&tx_port, out_index, 0); + return bpf_redirect_map(&tx_port, fib_params.ifindex, 0); } return XDP_PASS; From b62cc6fdd793eaac50e4191c8637ffff9e9574d6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 18 Jun 2018 10:07:35 -0700 Subject: [PATCH 105/382] libnvdimm, pmem: Fix memcpy_mcsafe() return code handling in nsio_rw_bytes() Commit 60622d68227d "x86/asm/memcpy_mcsafe: Return bytes remaining" converted callers of memcpy_mcsafe() to expect a positive 'bytes remaining' value rather than a negative error code. The nsio_rw_bytes() conversion failed to return success. The failure is benign in that nsio_rw_bytes() will end up writing back what it just read. Fixes: 60622d68227d ("x86/asm/memcpy_mcsafe: Return bytes remaining") Cc: Dan Williams Reviewed-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/claim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 2e96b34bc936..fb667bf469c7 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -278,6 +278,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, return -EIO; if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0) return -EIO; + return 0; } if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) { From 5a14e91d559aee5bdb0e002e1153fd9c4338a29e Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Wed, 27 Jun 2018 11:43:58 -0400 Subject: [PATCH 106/382] dev-dax: check_vma: ratelimit dev_info-s This is easily triggered from userspace, so let's ratelimit the messages. Signed-off-by: Jeff Moyer Signed-off-by: Dan Williams --- drivers/dax/device.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/dax/device.c b/drivers/dax/device.c index de2f8297a210..108c37fca782 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -189,14 +189,16 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, /* prevent private mappings from being established */ if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) { - dev_info(dev, "%s: %s: fail, attempted private mapping\n", + dev_info_ratelimited(dev, + "%s: %s: fail, attempted private mapping\n", current->comm, func); return -EINVAL; } mask = dax_region->align - 1; if (vma->vm_start & mask || vma->vm_end & mask) { - dev_info(dev, "%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n", + dev_info_ratelimited(dev, + "%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n", current->comm, func, vma->vm_start, vma->vm_end, mask); return -EINVAL; @@ -204,13 +206,15 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV && (vma->vm_flags & VM_DONTCOPY) == 0) { - dev_info(dev, "%s: %s: fail, dax range requires MADV_DONTFORK\n", + dev_info_ratelimited(dev, + "%s: %s: fail, dax range requires MADV_DONTFORK\n", current->comm, func); return -EINVAL; } if (!vma_is_dax(vma)) { - dev_info(dev, "%s: %s: fail, vma is not DAX capable\n", + dev_info_ratelimited(dev, + "%s: %s: fail, vma is not DAX capable\n", current->comm, func); return -EINVAL; } From e7441c9274a6a5453e06f4c2b8b5f72eca0a3f17 Mon Sep 17 00:00:00 2001 From: Denis Kenzior Date: Tue, 19 Jun 2018 10:39:50 -0500 Subject: [PATCH 107/382] mac80211: disable BHs/preemption in ieee80211_tx_control_port() On pre-emption enabled kernels the following print was being seen due to missing local_bh_disable/local_bh_enable calls. mac80211 assumes that pre-emption is disabled in the data path. BUG: using smp_processor_id() in preemptible [00000000] code: iwd/517 caller is __ieee80211_subif_start_xmit+0x144/0x210 [mac80211] [...] Call Trace: dump_stack+0x5c/0x80 check_preemption_disabled.cold.0+0x46/0x51 __ieee80211_subif_start_xmit+0x144/0x210 [mac80211] Fixes: 911806491425 ("mac80211: Add support for tx_control_port") Signed-off-by: Denis Kenzior [commit message rewrite, fixes tag] Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 44b5dfe8727d..fa1f1e63a264 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4845,7 +4845,9 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, skb_reset_network_header(skb); skb_reset_mac_header(skb); + local_bh_disable(); __ieee80211_subif_start_xmit(skb, skb->dev, flags); + local_bh_enable(); return 0; } From 188f60ab8e787fcbb5ac9d64ede23a0070231f09 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sun, 24 Jun 2018 21:10:49 -0400 Subject: [PATCH 108/382] nl80211: relax ht operation checks for mesh Commit 9757235f451c, "nl80211: correct checks for NL80211_MESHCONF_HT_OPMODE value") relaxed the range for the HT operation field in meshconf, while also adding checks requiring the non-greenfield and non-ht-sta bits to be set in certain circumstances. The latter bit is actually reserved for mesh BSSes according to Table 9-168 in 802.11-2016, so in fact it should not be set. wpa_supplicant sets these bits because the mesh and AP code share the same implementation, but authsae does not. As a result, some meshconf updates from authsae which set only the NONHT_MIXED protection bits were being rejected. In order to avoid breaking userspace by changing the rules again, simply accept the values with or without the bits set, and mask off the reserved bit to match the spec. While in here, update the 802.11-2012 reference to 802.11-2016. Fixes: 9757235f451c ("nl80211: correct checks for NL80211_MESHCONF_HT_OPMODE value") Cc: Masashi Honma Signed-off-by: Bob Copeland Reviewed-by: Masashi Honma Reviewed-by: Masashi Honma Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c7bbe5f0aae8..351eeaf16abe 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6231,7 +6231,7 @@ do { \ nl80211_check_s32); /* * Check HT operation mode based on - * IEEE 802.11 2012 8.4.2.59 HT Operation element. + * IEEE 802.11-2016 9.4.2.57 HT Operation element. */ if (tb[NL80211_MESHCONF_HT_OPMODE]) { ht_opmode = nla_get_u16(tb[NL80211_MESHCONF_HT_OPMODE]); @@ -6241,22 +6241,9 @@ do { \ IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) return -EINVAL; - if ((ht_opmode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT) && - (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) - return -EINVAL; + /* NON_HT_STA bit is reserved, but some programs set it */ + ht_opmode &= ~IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT; - switch (ht_opmode & IEEE80211_HT_OP_MODE_PROTECTION) { - case IEEE80211_HT_OP_MODE_PROTECTION_NONE: - case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ: - if (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT) - return -EINVAL; - break; - case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER: - case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED: - if (!(ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) - return -EINVAL; - break; - } cfg->ht_opmode = ht_opmode; mask |= (1 << (NL80211_MESHCONF_HT_OPMODE - 1)); } From 95bca62fb723a121954fc7ae5473bb2c1f0d5986 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 29 Jun 2018 09:33:39 +0200 Subject: [PATCH 109/382] nl80211: check nla_parse_nested() return values At the very least we should check the return value if nla_parse_nested() is called with a non-NULL policy. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 351eeaf16abe..4eece06be1e7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10949,9 +10949,12 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) rem) { u8 *mask_pat; - nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, - nl80211_packet_pattern_policy, - info->extack); + err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, + nl80211_packet_pattern_policy, + info->extack); + if (err) + goto error; + err = -EINVAL; if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) @@ -11200,8 +11203,11 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, rem) { u8 *mask_pat; - nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, - nl80211_packet_pattern_policy, NULL); + err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, + nl80211_packet_pattern_policy, NULL); + if (err) + return err; + if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) return -EINVAL; From 69705ad21029b6e92c9facb0eb12cde1a4fd52b7 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 7 Jun 2018 13:06:06 +0200 Subject: [PATCH 110/382] drm/exynos: ipp: Rework checking for the correct buffer formats Prepare a common function for size and scale checks and call it for source and destination buffers. Then also move there the state-less checks from exynos_drm_ipp_task_setup_buffer, so the format information is already available in limits processing. Finally perform the IPP_LIMIT_BUFFER check on the real width of the buffer (the width calculated from the provided buffer pitch). Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_ipp.c | 108 +++++++++++++----------- 1 file changed, 57 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c index 26374e58c557..8840e6ec8e43 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c @@ -345,27 +345,6 @@ static int exynos_drm_ipp_task_setup_buffer(struct exynos_drm_ipp_buffer *buf, int ret = 0; int i; - /* basic checks */ - if (buf->buf.width == 0 || buf->buf.height == 0) - return -EINVAL; - buf->format = drm_format_info(buf->buf.fourcc); - for (i = 0; i < buf->format->num_planes; i++) { - unsigned int width = (i == 0) ? buf->buf.width : - DIV_ROUND_UP(buf->buf.width, buf->format->hsub); - - if (buf->buf.pitch[i] == 0) - buf->buf.pitch[i] = width * buf->format->cpp[i]; - if (buf->buf.pitch[i] < width * buf->format->cpp[i]) - return -EINVAL; - if (!buf->buf.gem_id[i]) - return -ENOENT; - } - - /* pitch for additional planes must match */ - if (buf->format->num_planes > 2 && - buf->buf.pitch[1] != buf->buf.pitch[2]) - return -EINVAL; - /* get GEM buffers and check their size */ for (i = 0; i < buf->format->num_planes; i++) { unsigned int height = (i == 0) ? buf->buf.height : @@ -495,12 +474,13 @@ static int exynos_drm_ipp_check_size_limits(struct exynos_drm_ipp_buffer *buf, enum drm_ipp_size_id id = rotate ? IPP_LIMIT_ROTATED : IPP_LIMIT_AREA; struct drm_ipp_limit l; struct drm_exynos_ipp_limit_val *lh = &l.h, *lv = &l.v; + int real_width = buf->buf.pitch[0] / buf->format->cpp[0]; if (!limits) return 0; __get_size_limit(limits, num_limits, IPP_LIMIT_BUFFER, &l); - if (!__size_limit_check(buf->buf.width, &l.h) || + if (!__size_limit_check(real_width, &l.h) || !__size_limit_check(buf->buf.height, &l.v)) return -EINVAL; @@ -560,10 +540,62 @@ static int exynos_drm_ipp_check_scale_limits( return 0; } +static int exynos_drm_ipp_check_format(struct exynos_drm_ipp_task *task, + struct exynos_drm_ipp_buffer *buf, + struct exynos_drm_ipp_buffer *src, + struct exynos_drm_ipp_buffer *dst, + bool rotate, bool swap) +{ + const struct exynos_drm_ipp_formats *fmt; + int ret, i; + + fmt = __ipp_format_get(task->ipp, buf->buf.fourcc, buf->buf.modifier, + buf == src ? DRM_EXYNOS_IPP_FORMAT_SOURCE : + DRM_EXYNOS_IPP_FORMAT_DESTINATION); + if (!fmt) { + DRM_DEBUG_DRIVER("Task %pK: %s format not supported\n", task, + buf == src ? "src" : "dst"); + return -EINVAL; + } + + /* basic checks */ + if (buf->buf.width == 0 || buf->buf.height == 0) + return -EINVAL; + + buf->format = drm_format_info(buf->buf.fourcc); + for (i = 0; i < buf->format->num_planes; i++) { + unsigned int width = (i == 0) ? buf->buf.width : + DIV_ROUND_UP(buf->buf.width, buf->format->hsub); + + if (buf->buf.pitch[i] == 0) + buf->buf.pitch[i] = width * buf->format->cpp[i]; + if (buf->buf.pitch[i] < width * buf->format->cpp[i]) + return -EINVAL; + if (!buf->buf.gem_id[i]) + return -ENOENT; + } + + /* pitch for additional planes must match */ + if (buf->format->num_planes > 2 && + buf->buf.pitch[1] != buf->buf.pitch[2]) + return -EINVAL; + + /* check driver limits */ + ret = exynos_drm_ipp_check_size_limits(buf, fmt->limits, + fmt->num_limits, + rotate, + buf == dst ? swap : false); + if (ret) + return ret; + ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect, + fmt->limits, + fmt->num_limits, swap); + return ret; +} + static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) { struct exynos_drm_ipp *ipp = task->ipp; - const struct exynos_drm_ipp_formats *src_fmt, *dst_fmt; struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst; unsigned int rotation = task->transform.rotation; int ret = 0; @@ -607,37 +639,11 @@ static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) return -EINVAL; } - src_fmt = __ipp_format_get(ipp, src->buf.fourcc, src->buf.modifier, - DRM_EXYNOS_IPP_FORMAT_SOURCE); - if (!src_fmt) { - DRM_DEBUG_DRIVER("Task %pK: src format not supported\n", task); - return -EINVAL; - } - ret = exynos_drm_ipp_check_size_limits(src, src_fmt->limits, - src_fmt->num_limits, - rotate, false); - if (ret) - return ret; - ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect, - src_fmt->limits, - src_fmt->num_limits, swap); + ret = exynos_drm_ipp_check_format(task, src, src, dst, rotate, swap); if (ret) return ret; - dst_fmt = __ipp_format_get(ipp, dst->buf.fourcc, dst->buf.modifier, - DRM_EXYNOS_IPP_FORMAT_DESTINATION); - if (!dst_fmt) { - DRM_DEBUG_DRIVER("Task %pK: dst format not supported\n", task); - return -EINVAL; - } - ret = exynos_drm_ipp_check_size_limits(dst, dst_fmt->limits, - dst_fmt->num_limits, - false, swap); - if (ret) - return ret; - ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect, - dst_fmt->limits, - dst_fmt->num_limits, swap); + ret = exynos_drm_ipp_check_format(task, dst, src, dst, false, swap); if (ret) return ret; From 1b0966c3e951da6ae523c4e954b1f43b22927948 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 7 Jun 2018 13:06:07 +0200 Subject: [PATCH 111/382] drm/exynos: rotator: Fix DRM_MODE_REFLECT_{X,Y} interpretation Horizontal (DRM_MODE_REFLECT_Y) and vertical (DMR_MODE_REFLECT_Y) flip were swapped in Rotator driver. Fix this by swapping code for interpreting them. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_rotator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c index 1a76dd3d52e1..a820a68429b9 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c +++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c @@ -168,9 +168,9 @@ static void rotator_dst_set_transf(struct rot_context *rot, val &= ~ROT_CONTROL_FLIP_MASK; if (rotation & DRM_MODE_REFLECT_X) - val |= ROT_CONTROL_FLIP_HORIZONTAL; - if (rotation & DRM_MODE_REFLECT_Y) val |= ROT_CONTROL_FLIP_VERTICAL; + if (rotation & DRM_MODE_REFLECT_Y) + val |= ROT_CONTROL_FLIP_HORIZONTAL; val &= ~ROT_CONTROL_ROT_MASK; From 280e54c9f614c88292685383cf2d65057586e9fb Mon Sep 17 00:00:00 2001 From: Andrzej Pietrasiewicz Date: Thu, 7 Jun 2018 13:06:08 +0200 Subject: [PATCH 112/382] drm/exynos: scaler: Reset hardware before starting the operation Ensure that Scaler hardware is properly reset and interrupts are cleared before processing next image. Signed-off-by: Andrzej Pietrasiewicz Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_scaler.c | 32 ++++++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c index 91d4382343d0..d25069e88186 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c +++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c @@ -30,6 +30,7 @@ #define scaler_write(cfg, offset) writel(cfg, scaler->regs + (offset)) #define SCALER_MAX_CLK 4 #define SCALER_AUTOSUSPEND_DELAY 2000 +#define SCALER_RESET_WAIT_RETRIES 100 struct scaler_data { const char *clk_name[SCALER_MAX_CLK]; @@ -100,6 +101,23 @@ static u32 scaler_get_format(u32 drm_fmt) return 0; } +static inline int scaler_reset(struct scaler_context *scaler) +{ + int retry = SCALER_RESET_WAIT_RETRIES; + + scaler_write(SCALER_CFG_SOFT_RESET, SCALER_CFG); + do { + cpu_relax(); + } while (retry > 1 && + scaler_read(SCALER_CFG) & SCALER_CFG_SOFT_RESET); + do { + cpu_relax(); + scaler_write(1, SCALER_INT_EN); + } while (retry > 0 && scaler_read(SCALER_INT_EN) != 1); + + return retry ? 0 : -EIO; +} + static inline void scaler_enable_int(struct scaler_context *scaler) { u32 val; @@ -354,9 +372,13 @@ static int scaler_commit(struct exynos_drm_ipp *ipp, u32 dst_fmt = scaler_get_format(task->dst.buf.fourcc); struct drm_exynos_ipp_task_rect *dst_pos = &task->dst.rect; - scaler->task = task; - pm_runtime_get_sync(scaler->dev); + if (scaler_reset(scaler)) { + pm_runtime_put(scaler->dev); + return -EIO; + } + + scaler->task = task; scaler_set_src_fmt(scaler, src_fmt); scaler_set_src_base(scaler, &task->src); @@ -394,7 +416,11 @@ static inline void scaler_disable_int(struct scaler_context *scaler) static inline u32 scaler_get_int_status(struct scaler_context *scaler) { - return scaler_read(SCALER_INT_STATUS); + u32 val = scaler_read(SCALER_INT_STATUS); + + scaler_write(val, SCALER_INT_STATUS); + + return val; } static inline int scaler_task_done(u32 val) From 4e1a6230313ad76c08633fa57853349de56f60a2 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 7 Jun 2018 13:06:09 +0200 Subject: [PATCH 113/382] drm/exynos: scaler: Fix support for YUV420, YUV422 and YUV444 modes Fix Cb/CR components order in two-planar YUV420, YUV422 and YUV444 modes. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_scaler.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c index d25069e88186..0ddb6eec7b11 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_scaler.c +++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c @@ -52,9 +52,9 @@ struct scaler_context { static u32 scaler_get_format(u32 drm_fmt) { switch (drm_fmt) { - case DRM_FORMAT_NV21: - return SCALER_YUV420_2P_UV; case DRM_FORMAT_NV12: + return SCALER_YUV420_2P_UV; + case DRM_FORMAT_NV21: return SCALER_YUV420_2P_VU; case DRM_FORMAT_YUV420: return SCALER_YUV420_3P; @@ -64,15 +64,15 @@ static u32 scaler_get_format(u32 drm_fmt) return SCALER_YUV422_1P_UYVY; case DRM_FORMAT_YVYU: return SCALER_YUV422_1P_YVYU; - case DRM_FORMAT_NV61: - return SCALER_YUV422_2P_UV; case DRM_FORMAT_NV16: + return SCALER_YUV422_2P_UV; + case DRM_FORMAT_NV61: return SCALER_YUV422_2P_VU; case DRM_FORMAT_YUV422: return SCALER_YUV422_3P; - case DRM_FORMAT_NV42: - return SCALER_YUV444_2P_UV; case DRM_FORMAT_NV24: + return SCALER_YUV444_2P_UV; + case DRM_FORMAT_NV42: return SCALER_YUV444_2P_VU; case DRM_FORMAT_YUV444: return SCALER_YUV444_3P; From 4958a1c0c9c4a48d6ba9e2b184d93cab0dce68e1 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 7 Jun 2018 13:06:10 +0200 Subject: [PATCH 114/382] drm/exynos: gsc: Use real buffer width for configuring the hardware DMA hardware should respect buffer pitch, so use the width calculated from the buffer pitch instead of the virtual one. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 35ac66730563..8af7f167015b 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -577,7 +577,7 @@ static void gsc_src_set_size(struct gsc_context *ctx, cfg &= ~(GSC_SRCIMG_HEIGHT_MASK | GSC_SRCIMG_WIDTH_MASK); - cfg |= (GSC_SRCIMG_WIDTH(buf->buf.width) | + cfg |= (GSC_SRCIMG_WIDTH(buf->buf.pitch[0] / buf->format->cpp[0]) | GSC_SRCIMG_HEIGHT(buf->buf.height)); gsc_write(cfg, GSC_SRCIMG_SIZE); @@ -868,7 +868,7 @@ static void gsc_dst_set_size(struct gsc_context *ctx, /* original size */ cfg = gsc_read(GSC_DSTIMG_SIZE); cfg &= ~(GSC_DSTIMG_HEIGHT_MASK | GSC_DSTIMG_WIDTH_MASK); - cfg |= GSC_DSTIMG_WIDTH(buf->buf.width) | + cfg |= GSC_DSTIMG_WIDTH(buf->buf.pitch[0] / buf->format->cpp[0]) | GSC_DSTIMG_HEIGHT(buf->buf.height); gsc_write(cfg, GSC_DSTIMG_SIZE); From 28b676329cc1adfa37b5291e13055e0819a80e42 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 7 Jun 2018 13:06:11 +0200 Subject: [PATCH 115/382] drm/exynos: gsc: Increase Exynos5433 buffer width alignment to 16 pixels Investigation revealed that GScaler hardware requires the real buffer width (pitch) to be aligned to 16 pixels. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 8af7f167015b..246300771d55 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1341,7 +1341,7 @@ static const struct drm_exynos_ipp_limit gsc_5420_limits[] = { }; static const struct drm_exynos_ipp_limit gsc_5433_limits[] = { - { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 8191, 2 }, .v = { 16, 8191, 2 }) }, + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 8191, 16 }, .v = { 16, 8191, 2 }) }, { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 1 }, .v = { 8, 3344, 1 }) }, { IPP_SIZE_LIMIT(ROTATED, .h = { 32, 2047 }, .v = { 8, 8191 }) }, { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 }, From 4cc11a5f53049c8a2b02763a53ecd42371355c10 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 7 Jun 2018 13:06:12 +0200 Subject: [PATCH 116/382] drm/exynos: gsc: Fix DRM_MODE_REFLECT_{X,Y} interpretation Horizontal (DRM_MODE_REFLECT_Y) and vertical (DMR_MODE_REFLECT_Y) flip were swapped in GScaler driver. Fix this by swapping code for interpreting them. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gsc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 246300771d55..177e31c3a51d 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -523,30 +523,30 @@ static void gsc_src_set_transf(struct gsc_context *ctx, unsigned int rotation) switch (degree) { case DRM_MODE_ROTATE_0: - if (rotation & DRM_MODE_REFLECT_Y) - cfg |= GSC_IN_ROT_XFLIP; if (rotation & DRM_MODE_REFLECT_X) + cfg |= GSC_IN_ROT_XFLIP; + if (rotation & DRM_MODE_REFLECT_Y) cfg |= GSC_IN_ROT_YFLIP; break; case DRM_MODE_ROTATE_90: cfg |= GSC_IN_ROT_90; - if (rotation & DRM_MODE_REFLECT_Y) - cfg |= GSC_IN_ROT_XFLIP; if (rotation & DRM_MODE_REFLECT_X) + cfg |= GSC_IN_ROT_XFLIP; + if (rotation & DRM_MODE_REFLECT_Y) cfg |= GSC_IN_ROT_YFLIP; break; case DRM_MODE_ROTATE_180: cfg |= GSC_IN_ROT_180; - if (rotation & DRM_MODE_REFLECT_Y) - cfg &= ~GSC_IN_ROT_XFLIP; if (rotation & DRM_MODE_REFLECT_X) + cfg &= ~GSC_IN_ROT_XFLIP; + if (rotation & DRM_MODE_REFLECT_Y) cfg &= ~GSC_IN_ROT_YFLIP; break; case DRM_MODE_ROTATE_270: cfg |= GSC_IN_ROT_270; - if (rotation & DRM_MODE_REFLECT_Y) - cfg &= ~GSC_IN_ROT_XFLIP; if (rotation & DRM_MODE_REFLECT_X) + cfg &= ~GSC_IN_ROT_XFLIP; + if (rotation & DRM_MODE_REFLECT_Y) cfg &= ~GSC_IN_ROT_YFLIP; break; } From dd209ef809080ced903e7747ee3ef640c923a1d2 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 7 Jun 2018 13:06:13 +0200 Subject: [PATCH 117/382] drm/exynos: gsc: Fix support for NV16/61, YUV420/YVU420 and YUV422 modes Fix following issues related to planar YUV pixel format configuration: - NV16/61 modes were incorrectly programmed as NV12/21, - YVU420 was programmed as YUV420 on source, - YVU420 and YUV422 were programmed as YUV420 on output. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gsc.c | 29 +++++++++++++++++-------- drivers/gpu/drm/exynos/regs-gsc.h | 1 + 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 177e31c3a51d..7ba414b52faa 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -492,21 +492,25 @@ static void gsc_src_set_fmt(struct gsc_context *ctx, u32 fmt) GSC_IN_CHROMA_ORDER_CRCB); break; case DRM_FORMAT_NV21: + cfg |= (GSC_IN_CHROMA_ORDER_CRCB | GSC_IN_YUV420_2P); + break; case DRM_FORMAT_NV61: - cfg |= (GSC_IN_CHROMA_ORDER_CRCB | - GSC_IN_YUV420_2P); + cfg |= (GSC_IN_CHROMA_ORDER_CRCB | GSC_IN_YUV422_2P); break; case DRM_FORMAT_YUV422: cfg |= GSC_IN_YUV422_3P; break; case DRM_FORMAT_YUV420: + cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV420_3P); + break; case DRM_FORMAT_YVU420: - cfg |= GSC_IN_YUV420_3P; + cfg |= (GSC_IN_CHROMA_ORDER_CRCB | GSC_IN_YUV420_3P); break; case DRM_FORMAT_NV12: + cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV420_2P); + break; case DRM_FORMAT_NV16: - cfg |= (GSC_IN_CHROMA_ORDER_CBCR | - GSC_IN_YUV420_2P); + cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV422_2P); break; } @@ -672,18 +676,25 @@ static void gsc_dst_set_fmt(struct gsc_context *ctx, u32 fmt) GSC_OUT_CHROMA_ORDER_CRCB); break; case DRM_FORMAT_NV21: - case DRM_FORMAT_NV61: cfg |= (GSC_OUT_CHROMA_ORDER_CRCB | GSC_OUT_YUV420_2P); break; + case DRM_FORMAT_NV61: + cfg |= (GSC_OUT_CHROMA_ORDER_CRCB | GSC_OUT_YUV422_2P); + break; case DRM_FORMAT_YUV422: + cfg |= GSC_OUT_YUV422_3P; + break; case DRM_FORMAT_YUV420: + cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV420_3P); + break; case DRM_FORMAT_YVU420: - cfg |= GSC_OUT_YUV420_3P; + cfg |= (GSC_OUT_CHROMA_ORDER_CRCB | GSC_OUT_YUV420_3P); break; case DRM_FORMAT_NV12: + cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV420_2P); + break; case DRM_FORMAT_NV16: - cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | - GSC_OUT_YUV420_2P); + cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV422_2P); break; } diff --git a/drivers/gpu/drm/exynos/regs-gsc.h b/drivers/gpu/drm/exynos/regs-gsc.h index 4704a993cbb7..16b39734115c 100644 --- a/drivers/gpu/drm/exynos/regs-gsc.h +++ b/drivers/gpu/drm/exynos/regs-gsc.h @@ -138,6 +138,7 @@ #define GSC_OUT_YUV420_3P (3 << 4) #define GSC_OUT_YUV422_1P (4 << 4) #define GSC_OUT_YUV422_2P (5 << 4) +#define GSC_OUT_YUV422_3P (6 << 4) #define GSC_OUT_YUV444 (7 << 4) #define GSC_OUT_TILE_TYPE_MASK (1 << 2) #define GSC_OUT_TILE_C_16x8 (0 << 2) From 5d5657aac0b40867fe57c50d395e9bb08274ceb8 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 7 Jun 2018 13:06:14 +0200 Subject: [PATCH 118/382] drm/exynos: fimc: Use real buffer width for configuring the hardware DMA hardware should respect buffer pitch, so use the width calculated from the buffer pitch instead of the virtual one. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fimc.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index 6127ef25acd6..e8d0670bb5f8 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -470,17 +470,18 @@ static void fimc_src_set_transf(struct fimc_context *ctx, unsigned int rotation) static void fimc_set_window(struct fimc_context *ctx, struct exynos_drm_ipp_buffer *buf) { + unsigned int real_width = buf->buf.pitch[0] / buf->format->cpp[0]; u32 cfg, h1, h2, v1, v2; /* cropped image */ h1 = buf->rect.x; - h2 = buf->buf.width - buf->rect.w - buf->rect.x; + h2 = real_width - buf->rect.w - buf->rect.x; v1 = buf->rect.y; v2 = buf->buf.height - buf->rect.h - buf->rect.y; DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]hsize[%d]vsize[%d]\n", buf->rect.x, buf->rect.y, buf->rect.w, buf->rect.h, - buf->buf.width, buf->buf.height); + real_width, buf->buf.height); DRM_DEBUG_KMS("h1[%d]h2[%d]v1[%d]v2[%d]\n", h1, h2, v1, v2); /* @@ -503,12 +504,13 @@ static void fimc_set_window(struct fimc_context *ctx, static void fimc_src_set_size(struct fimc_context *ctx, struct exynos_drm_ipp_buffer *buf) { + unsigned int real_width = buf->buf.pitch[0] / buf->format->cpp[0]; u32 cfg; - DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", buf->buf.width, buf->buf.height); + DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", real_width, buf->buf.height); /* original size */ - cfg = (EXYNOS_ORGISIZE_HORIZONTAL(buf->buf.width) | + cfg = (EXYNOS_ORGISIZE_HORIZONTAL(real_width) | EXYNOS_ORGISIZE_VERTICAL(buf->buf.height)); fimc_write(ctx, cfg, EXYNOS_ORGISIZE); @@ -529,7 +531,7 @@ static void fimc_src_set_size(struct fimc_context *ctx, * for now, we support only ITU601 8 bit mode */ cfg = (EXYNOS_CISRCFMT_ITU601_8BIT | - EXYNOS_CISRCFMT_SOURCEHSIZE(buf->buf.width) | + EXYNOS_CISRCFMT_SOURCEHSIZE(real_width) | EXYNOS_CISRCFMT_SOURCEVSIZE(buf->buf.height)); fimc_write(ctx, cfg, EXYNOS_CISRCFMT); @@ -842,12 +844,13 @@ static void fimc_set_scaler(struct fimc_context *ctx, struct fimc_scaler *sc) static void fimc_dst_set_size(struct fimc_context *ctx, struct exynos_drm_ipp_buffer *buf) { + unsigned int real_width = buf->buf.pitch[0] / buf->format->cpp[0]; u32 cfg, cfg_ext; - DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", buf->buf.width, buf->buf.height); + DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", real_width, buf->buf.height); /* original size */ - cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(buf->buf.width) | + cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(real_width) | EXYNOS_ORGOSIZE_VERTICAL(buf->buf.height)); fimc_write(ctx, cfg, EXYNOS_ORGOSIZE); From ab337fc274a1957ff0771f19e826c736253f7c39 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 7 Jun 2018 13:07:40 +0200 Subject: [PATCH 119/382] drm/exynos: decon5433: Fix per-plane global alpha for XRGB modes Set per-plane global alpha to maximum value to get proper blending of XRGB and ARGB planes. This fixes the strange order of overlapping planes. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 82c95c34447f..92d28b5b1077 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -356,8 +356,8 @@ static void decon_update_plane(struct exynos_drm_crtc *crtc, writel(val, ctx->addr + DECON_VIDOSDxB(win)); } - val = VIDOSD_Wx_ALPHA_R_F(0x0) | VIDOSD_Wx_ALPHA_G_F(0x0) | - VIDOSD_Wx_ALPHA_B_F(0x0); + val = VIDOSD_Wx_ALPHA_R_F(0xff) | VIDOSD_Wx_ALPHA_G_F(0xff) | + VIDOSD_Wx_ALPHA_B_F(0xff); writel(val, ctx->addr + DECON_VIDOSDxC(win)); val = VIDOSD_Wx_ALPHA_R_F(0x0) | VIDOSD_Wx_ALPHA_G_F(0x0) | From 7b7aa62c05eac9789c208b946f515983a9255d8d Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 7 Jun 2018 13:07:49 +0200 Subject: [PATCH 120/382] drm/exynos: decon5433: Fix WINCONx reset value The only bits that should be preserved in decon_win_set_fmt() is WINCONx_ENWIN_F. All other bits depends on the selected pixel formats and are set by the mentioned function. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 92d28b5b1077..e868773ea509 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -265,7 +265,7 @@ static void decon_win_set_pixfmt(struct decon_context *ctx, unsigned int win, unsigned long val; val = readl(ctx->addr + DECON_WINCONx(win)); - val &= ~WINCONx_BPPMODE_MASK; + val &= WINCONx_ENWIN_F; switch (fb->format->format) { case DRM_FORMAT_XRGB1555: From e94595b0f59c26581f6ae8a3972b99f56a415b76 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 17 Jun 2018 10:55:29 +0200 Subject: [PATCH 121/382] drm/exynos: ipp: use correct enum type The limit_id_fallback array uses enum drm_ipp_size_id to index its content. The content itself is of type enum drm_exynos_ipp_limit_type. Cc: Marek Szyprowski Signed-off-by: Stefan Agner Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_ipp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c index 8840e6ec8e43..b435db8fc916 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c @@ -407,7 +407,7 @@ enum drm_ipp_size_id { IPP_LIMIT_BUFFER, IPP_LIMIT_AREA, IPP_LIMIT_ROTATED, IPP_LIMIT_MAX }; -static const enum drm_ipp_size_id limit_id_fallback[IPP_LIMIT_MAX][4] = { +static const enum drm_exynos_ipp_limit_type limit_id_fallback[IPP_LIMIT_MAX][4] = { [IPP_LIMIT_BUFFER] = { DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER }, [IPP_LIMIT_AREA] = { DRM_EXYNOS_IPP_LIMIT_SIZE_AREA, DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER }, From e699e2c6a654ff8d7303f5297ab5dd83da7b23e0 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Wed, 27 Jun 2018 15:16:42 -0700 Subject: [PATCH 122/382] net, mm: account sock objects to kmemcg Currently the kernel accounts the memory for network traffic through mem_cgroup_[un]charge_skmem() interface. However the memory accounted only includes the truesize of sk_buff which does not include the size of sock objects. In our production environment, with opt-out kmem accounting, the sock kmem caches (TCP[v6], UDP[v6], RAW[v6], UNIX) are among the top most charged kmem caches and consume a significant amount of memory which can not be left as system overhead. So, this patch converts the kmem caches of all sock objects to SLAB_ACCOUNT. Signed-off-by: Shakeel Butt Suggested-by: Eric Dumazet Reviewed-by: Kirill Tkhai Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index bcc41829a16d..9e8f65585b81 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3243,7 +3243,8 @@ static int req_prot_init(const struct proto *prot) rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name, rsk_prot->obj_size, 0, - prot->slab_flags, NULL); + SLAB_ACCOUNT | prot->slab_flags, + NULL); if (!rsk_prot->slab) { pr_crit("%s: Can't create request sock SLAB cache!\n", @@ -3258,7 +3259,8 @@ int proto_register(struct proto *prot, int alloc_slab) if (alloc_slab) { prot->slab = kmem_cache_create_usercopy(prot->name, prot->obj_size, 0, - SLAB_HWCACHE_ALIGN | prot->slab_flags, + SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT | + prot->slab_flags, prot->useroffset, prot->usersize, NULL); @@ -3281,6 +3283,7 @@ int proto_register(struct proto *prot, int alloc_slab) kmem_cache_create(prot->twsk_prot->twsk_slab_name, prot->twsk_prot->twsk_obj_size, 0, + SLAB_ACCOUNT | prot->slab_flags, NULL); if (prot->twsk_prot->twsk_slab == NULL) From 18d405af30bf6506bf2fc49056de7691c949812e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 28 Jun 2018 23:34:57 +0200 Subject: [PATCH 123/382] bpf, arm32: fix to use bpf_jit_binary_lock_ro api Any eBPF JIT that where its underlying arch supports ARCH_HAS_SET_MEMORY would need to use bpf_jit_binary_{un,}lock_ro() pair instead of the set_memory_{ro,rw}() pair directly as otherwise changes to the former might break. arm32's eBPF conversion missed to change it, so fix this up here. Fixes: 39c13c204bb1 ("arm: eBPF JIT compiler") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- arch/arm/net/bpf_jit_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 6e8b71613039..f6a62ae44a65 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1844,7 +1844,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* there are 2 passes here */ bpf_jit_dump(prog->len, image_size, 2, ctx.target); - set_memory_ro((unsigned long)header, header->pages); + bpf_jit_binary_lock_ro(header); prog->bpf_func = (void *)ctx.target; prog->jited = 1; prog->jited_len = image_size; From f605ce5eb26ac934fb8106d75d46a2c875a2bf23 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 28 Jun 2018 23:34:58 +0200 Subject: [PATCH 124/382] bpf, s390: fix potential memleak when later bpf_jit_prog fails If we would ever fail in the bpf_jit_prog() pass that writes the actual insns to the image after we got header via bpf_jit_binary_alloc() then we also need to make sure to free it through bpf_jit_binary_free() again when bailing out. Given we had prior bpf_jit_prog() passes to initially probe for clobbered registers, program size and to fill in addrs arrray for jump targets, this is more of a theoretical one, but at least make sure this doesn't break with future changes. Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend") Signed-off-by: Daniel Borkmann Cc: Martin Schwidefsky Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- arch/s390/net/bpf_jit_comp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index d2db8acb1a55..5f0234ec8038 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1286,6 +1286,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) goto free_addrs; } if (bpf_jit_prog(&jit, fp)) { + bpf_jit_binary_free(header); fp = orig_fp; goto free_addrs; } From 85782e037f8aba8922dadb24a1523ca0b82ab8bc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 28 Jun 2018 23:34:59 +0200 Subject: [PATCH 125/382] bpf: undo prog rejection on read-only lock failure Partially undo commit 9facc336876f ("bpf: reject any prog that failed read-only lock") since it caused a regression, that is, syzkaller was able to manage to cause a panic via fault injection deep in set_memory_ro() path by letting an allocation fail: In x86's __change_page_attr_set_clr() it was able to change the attributes of the primary mapping but not in the alias mapping via cpa_process_alias(), so the second, inner call to the __change_page_attr() via __change_page_attr_set_clr() had to split a larger page and failed in the alloc_pages() with the artifically triggered allocation error which is then propagated down to the call site. Thus, for set_memory_ro() this means that it returned with an error, but from debugging a probe_kernel_write() revealed EFAULT on that memory since the primary mapping succeeded to get changed. Therefore the subsequent hdr->locked = 0 reset triggered the panic as it was performed on read-only memory, so call-site assumptions were infact wrong to assume that it would either succeed /or/ not succeed at all since there's no such rollback in set_memory_*() calls from partial change of mappings, in other words, we're left in a state that is "half done". A later undo via set_memory_rw() is succeeding though due to matching permissions on that part (aka due to the try_preserve_large_page() succeeding). While reproducing locally with explicitly triggering this error, the initial splitting only happens on rare occasions and in real world it would additionally need oom conditions, but that said, it could partially fail. Therefore, it is definitely wrong to bail out on set_memory_ro() error and reject the program with the set_memory_*() semantics we have today. Shouldn't have gone the extra mile since no other user in tree today infact checks for any set_memory_*() errors, e.g. neither module_enable_ro() / module_disable_ro() for module RO/NX handling which is mostly default these days nor kprobes core with alloc_insn_page() / free_insn_page() as examples that could be invoked long after bootup and original 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit against spraying attacks") did neither when it got first introduced to BPF so "improving" with bailing out was clearly not right when set_memory_*() cannot handle it today. Kees suggested that if set_memory_*() can fail, we should annotate it with __must_check, and all callers need to deal with it gracefully given those set_memory_*() markings aren't "advisory", but they're expected to actually do what they say. This might be an option worth to move forward in future but would at the same time require that set_memory_*() calls from supporting archs are guaranteed to be "atomic" in that they provide rollback if part of the range fails, once that happened, the transition from RW -> RO could be made more robust that way, while subsequent RO -> RW transition /must/ continue guaranteeing to always succeed the undo part. Reported-by: syzbot+a4eb8c7766952a1ca872@syzkaller.appspotmail.com Reported-by: syzbot+d866d1925855328eac3b@syzkaller.appspotmail.com Fixes: 9facc336876f ("bpf: reject any prog that failed read-only lock") Cc: Laura Abbott Cc: Kees Cook Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 56 ++++++------------------------------------ kernel/bpf/core.c | 28 --------------------- 2 files changed, 8 insertions(+), 76 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 20f2659dd829..300baad62c88 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -470,9 +470,7 @@ struct sock_fprog_kern { }; struct bpf_binary_header { - u16 pages; - u16 locked:1; - + u32 pages; /* Some arches need word alignment for their instructions */ u8 image[] __aligned(4); }; @@ -481,7 +479,7 @@ struct bpf_prog { u16 pages; /* Number of allocated pages */ u16 jited:1, /* Is our filter JIT'ed? */ jit_requested:1,/* archs need to JIT the prog */ - locked:1, /* Program image locked? */ + undo_set_mem:1, /* Passed set_memory_ro() checkpoint */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ dst_needed:1, /* Do we need dst entry? */ @@ -677,46 +675,24 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default) static inline void bpf_prog_lock_ro(struct bpf_prog *fp) { -#ifdef CONFIG_ARCH_HAS_SET_MEMORY - fp->locked = 1; - if (set_memory_ro((unsigned long)fp, fp->pages)) - fp->locked = 0; -#endif + fp->undo_set_mem = 1; + set_memory_ro((unsigned long)fp, fp->pages); } static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) { -#ifdef CONFIG_ARCH_HAS_SET_MEMORY - if (fp->locked) { - WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages)); - /* In case set_memory_rw() fails, we want to be the first - * to crash here instead of some random place later on. - */ - fp->locked = 0; - } -#endif + if (fp->undo_set_mem) + set_memory_rw((unsigned long)fp, fp->pages); } static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) { -#ifdef CONFIG_ARCH_HAS_SET_MEMORY - hdr->locked = 1; - if (set_memory_ro((unsigned long)hdr, hdr->pages)) - hdr->locked = 0; -#endif + set_memory_ro((unsigned long)hdr, hdr->pages); } static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) { -#ifdef CONFIG_ARCH_HAS_SET_MEMORY - if (hdr->locked) { - WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages)); - /* In case set_memory_rw() fails, we want to be the first - * to crash here instead of some random place later on. - */ - hdr->locked = 0; - } -#endif + set_memory_rw((unsigned long)hdr, hdr->pages); } static inline struct bpf_binary_header * @@ -728,22 +704,6 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp) return (void *)addr; } -#ifdef CONFIG_ARCH_HAS_SET_MEMORY -static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp) -{ - if (!fp->locked) - return -ENOLCK; - if (fp->jited) { - const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp); - - if (!hdr->locked) - return -ENOLCK; - } - - return 0; -} -#endif - int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index a9e6c04d0f4a..1e5625d46414 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -598,8 +598,6 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, bpf_fill_ill_insns(hdr, size); hdr->pages = size / PAGE_SIZE; - hdr->locked = 0; - hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), PAGE_SIZE - sizeof(*hdr)); start = (get_random_int() % hole) & ~(alignment - 1); @@ -1450,22 +1448,6 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) return 0; } -static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp) -{ -#ifdef CONFIG_ARCH_HAS_SET_MEMORY - int i, err; - - for (i = 0; i < fp->aux->func_cnt; i++) { - err = bpf_prog_check_pages_ro_single(fp->aux->func[i]); - if (err) - return err; - } - - return bpf_prog_check_pages_ro_single(fp); -#endif - return 0; -} - static void bpf_prog_select_func(struct bpf_prog *fp) { #ifndef CONFIG_BPF_JIT_ALWAYS_ON @@ -1524,17 +1506,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) * all eBPF JITs might immediately support all features. */ *err = bpf_check_tail_call(fp); - if (*err) - return fp; - /* Checkpoint: at this point onwards any cBPF -> eBPF or - * native eBPF program is read-only. If we failed to change - * the page attributes (e.g. allocation failure from - * splitting large pages), then reject the whole program - * in order to guarantee not ending up with any W+X pages - * from BPF side in kernel. - */ - *err = bpf_prog_check_pages_ro_locked(fp); return fp; } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); From 55c5e0c602c20cb6f350e5ae357cfd7e04ebb189 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 2 Jun 2018 11:02:02 -0300 Subject: [PATCH 126/382] dt-bindings: clock: imx6ul: Do not change the clock definition order Commit f5a4670de966 ("clk: imx: Add new clo01 and clo2 controlled by CCOSR") introduced the CLK_CLKO definitions, but didn't put them at the end of the list, which may cause dtb breakage when running an old dtb with a newer kernel. In order to avoid that, simply add the new CLK_CKO clock definitions at the end of the list. Fixes: f5a4670de966 ("clk: imx: Add new clo01 and clo2 controlled by CCOSR") Reported-by: Stefan Wahren Signed-off-by: Fabio Estevam Acked-by: Rob Herring Reviewed-by: Stefan Agner Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/imx6ul-clock.h | 40 +++++++++++------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/include/dt-bindings/clock/imx6ul-clock.h b/include/dt-bindings/clock/imx6ul-clock.h index 9564597cbfac..0aa1d9c3e0b9 100644 --- a/include/dt-bindings/clock/imx6ul-clock.h +++ b/include/dt-bindings/clock/imx6ul-clock.h @@ -235,27 +235,25 @@ #define IMX6UL_CLK_CSI_PODF 222 #define IMX6UL_CLK_PLL3_120M 223 #define IMX6UL_CLK_KPP 224 -#define IMX6UL_CLK_CKO1_SEL 225 -#define IMX6UL_CLK_CKO1_PODF 226 -#define IMX6UL_CLK_CKO1 227 -#define IMX6UL_CLK_CKO2_SEL 228 -#define IMX6UL_CLK_CKO2_PODF 229 -#define IMX6UL_CLK_CKO2 230 -#define IMX6UL_CLK_CKO 231 - -/* For i.MX6ULL */ -#define IMX6ULL_CLK_ESAI_PRED 232 -#define IMX6ULL_CLK_ESAI_PODF 233 -#define IMX6ULL_CLK_ESAI_EXTAL 234 -#define IMX6ULL_CLK_ESAI_MEM 235 -#define IMX6ULL_CLK_ESAI_IPG 236 -#define IMX6ULL_CLK_DCP_CLK 237 -#define IMX6ULL_CLK_EPDC_PRE_SEL 238 -#define IMX6ULL_CLK_EPDC_SEL 239 -#define IMX6ULL_CLK_EPDC_PODF 240 -#define IMX6ULL_CLK_EPDC_ACLK 241 -#define IMX6ULL_CLK_EPDC_PIX 242 -#define IMX6ULL_CLK_ESAI_SEL 243 +#define IMX6ULL_CLK_ESAI_PRED 225 +#define IMX6ULL_CLK_ESAI_PODF 226 +#define IMX6ULL_CLK_ESAI_EXTAL 227 +#define IMX6ULL_CLK_ESAI_MEM 228 +#define IMX6ULL_CLK_ESAI_IPG 229 +#define IMX6ULL_CLK_DCP_CLK 230 +#define IMX6ULL_CLK_EPDC_PRE_SEL 231 +#define IMX6ULL_CLK_EPDC_SEL 232 +#define IMX6ULL_CLK_EPDC_PODF 233 +#define IMX6ULL_CLK_EPDC_ACLK 234 +#define IMX6ULL_CLK_EPDC_PIX 235 +#define IMX6ULL_CLK_ESAI_SEL 236 +#define IMX6UL_CLK_CKO1_SEL 237 +#define IMX6UL_CLK_CKO1_PODF 238 +#define IMX6UL_CLK_CKO1 239 +#define IMX6UL_CLK_CKO2_SEL 240 +#define IMX6UL_CLK_CKO2_PODF 241 +#define IMX6UL_CLK_CKO2 242 +#define IMX6UL_CLK_CKO 243 #define IMX6UL_CLK_END 244 #endif /* __DT_BINDINGS_CLOCK_IMX6UL_H */ From 4050360f964694a3ac0c83badd1a441207c86889 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 29 Jun 2018 13:47:38 -0500 Subject: [PATCH 127/382] PCI: rcar: Shut the PHY down in failpath If anything fails past phy_init_fn() and the system is a Gen3 with a PHY, the PHY will be left on and inited. This is caused by the phy_init_fn, which is in fact a pointer to rcar_pcie_phy_init_gen3() function, which starts the PHY, yet has no counterpart in the failpath. Add that counterpart. Fixes: 517ca93a7159 ("PCI: rcar: Add R-Car gen3 PHY support") Signed-off-by: Marek Vasut Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Geert Uytterhoeven Acked-by: Simon Horman Cc: Geert Uytterhoeven Cc: Phil Edworthy Cc: Wolfram Sang --- drivers/pci/controller/pcie-rcar.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/pcie-rcar.c b/drivers/pci/controller/pcie-rcar.c index 874d75c9ee4a..8b0d3206cbc4 100644 --- a/drivers/pci/controller/pcie-rcar.c +++ b/drivers/pci/controller/pcie-rcar.c @@ -1165,7 +1165,7 @@ static int rcar_pcie_probe(struct platform_device *pdev) if (rcar_pcie_hw_init(pcie)) { dev_info(dev, "PCIe link down\n"); err = -ENODEV; - goto err_clk_disable; + goto err_phy_shutdown; } data = rcar_pci_read_reg(pcie, MACSR); @@ -1177,7 +1177,7 @@ static int rcar_pcie_probe(struct platform_device *pdev) dev_err(dev, "failed to enable MSI support: %d\n", err); - goto err_clk_disable; + goto err_phy_shutdown; } } @@ -1191,6 +1191,12 @@ static int rcar_pcie_probe(struct platform_device *pdev) if (IS_ENABLED(CONFIG_PCI_MSI)) rcar_pcie_teardown_msi(pcie); +err_phy_shutdown: + if (pcie->phy) { + phy_power_off(pcie->phy); + phy_exit(pcie->phy); + } + err_clk_disable: clk_disable_unprepare(pcie->bus_clk); From 3c5777c372b6eb2e17802b3dc4bd5ebea45d9bcc Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 29 Jun 2018 13:48:15 -0500 Subject: [PATCH 128/382] PCI: rcar: Clean up PHY init on failure If the Gen3 PHY fails to power up, the code does not undo the initialization caused by phy_init(). Add the missing failure handling to the rcar_pcie_phy_init_gen3() function. Fixes: 517ca93a7159 ("PCI: rcar: Add R-Car gen3 PHY support") Reported-by: Geert Uytterhoeven Signed-off-by: Marek Vasut Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Geert Uytterhoeven Acked-by: Simon Horman Cc: Geert Uytterhoeven Cc: Phil Edworthy Cc: Wolfram Sang --- drivers/pci/controller/pcie-rcar.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-rcar.c b/drivers/pci/controller/pcie-rcar.c index 8b0d3206cbc4..c8febb009454 100644 --- a/drivers/pci/controller/pcie-rcar.c +++ b/drivers/pci/controller/pcie-rcar.c @@ -680,7 +680,11 @@ static int rcar_pcie_phy_init_gen3(struct rcar_pcie *pcie) if (err) return err; - return phy_power_on(pcie->phy); + err = phy_power_on(pcie->phy); + if (err) + phy_exit(pcie->phy); + + return err; } static int rcar_msi_alloc(struct rcar_msi *chip) From 9bc5f0833a55c7bf768d517b5fdcf89ead43b794 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 29 Jun 2018 13:49:04 -0500 Subject: [PATCH 129/382] PCI: controller: dwc: Do not let PCIE_DW_PLAT_HOST default to yes PCIE_DW_PLAT_HOST does not have any platform dependency, so it should not default to yes. Fixes: 1d906b22076e12cf ("PCI: dwc: Add support for EP mode") Signed-off-by: Geert Uytterhoeven Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Acked-by: Gustavo Pimentel --- drivers/pci/controller/dwc/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig index 16f52c626b4b..91b0194240a5 100644 --- a/drivers/pci/controller/dwc/Kconfig +++ b/drivers/pci/controller/dwc/Kconfig @@ -58,7 +58,6 @@ config PCIE_DW_PLAT_HOST depends on PCI && PCI_MSI_IRQ_DOMAIN select PCIE_DW_HOST select PCIE_DW_PLAT - default y help Enables support for the PCIe controller in the Designware IP to work in host mode. There are two instances of PCIe controller in From fb0de5b8dcc68da4a0d39b0f5fcd5368085ad7c1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 29 Jun 2018 13:49:24 -0500 Subject: [PATCH 130/382] PCI: endpoint: Use after free in pci_epf_unregister_driver() We need to use list_for_each_entry_safe() because the pci_ep_cfs_remove_epf_group() function frees "group". Fixes: ef1433f717a2 ("PCI: endpoint: Create configfs entry for each pci_epf_device_id table entry") Signed-off-by: Dan Carpenter [lorenzo.pieralisi@arm.com: updated commit log] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Acked-by: Kishon Vijay Abraham I --- drivers/pci/endpoint/pci-epf-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/endpoint/pci-epf-core.c b/drivers/pci/endpoint/pci-epf-core.c index 523a8cab3bfb..bf53fad636a5 100644 --- a/drivers/pci/endpoint/pci-epf-core.c +++ b/drivers/pci/endpoint/pci-epf-core.c @@ -145,10 +145,10 @@ EXPORT_SYMBOL_GPL(pci_epf_alloc_space); */ void pci_epf_unregister_driver(struct pci_epf_driver *driver) { - struct config_group *group; + struct config_group *group, *tmp; mutex_lock(&pci_epf_mutex); - list_for_each_entry(group, &driver->epf_group, group_entry) + list_for_each_entry_safe(group, tmp, &driver->epf_group, group_entry) pci_ep_cfs_remove_epf_group(group); list_del(&driver->epf_group); mutex_unlock(&pci_epf_mutex); From 8c3f9bd851a4d3acf0a0f222d4e9e41c0cd1ea8e Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Fri, 29 Jun 2018 13:49:54 -0500 Subject: [PATCH 131/382] PCI: xilinx: Add missing of_node_put() The call to of_get_next_child() returns a node pointer with refcount incremented thus it must be explicitly decremented here after the last usage. Fixes: 8961def56845 ("PCI: xilinx: Add Xilinx AXI PCIe Host Bridge IP driver") Signed-off-by: Nicholas Mc Guire [lorenzo.pieralisi@arm.com: reworked commit log] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-xilinx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/pcie-xilinx.c b/drivers/pci/controller/pcie-xilinx.c index b110a3a814e3..7b1389d8e2a5 100644 --- a/drivers/pci/controller/pcie-xilinx.c +++ b/drivers/pci/controller/pcie-xilinx.c @@ -509,6 +509,7 @@ static int xilinx_pcie_init_irq_domain(struct xilinx_pcie_port *port) port->leg_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX, &intx_domain_ops, port); + of_node_put(pcie_intc_node); if (!port->leg_domain) { dev_err(dev, "Failed to get a INTx IRQ domain\n"); return -ENODEV; From 342639d996f18bc0a4db2f42a84230c0a966dc94 Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Fri, 29 Jun 2018 13:50:10 -0500 Subject: [PATCH 132/382] PCI: xilinx-nwl: Add missing of_node_put() The call to of_get_next_child() returns a node pointer with refcount incremented thus it must be explicitly decremented here after the last usage. Fixes: ab597d35ef11 ("PCI: xilinx-nwl: Add support for Xilinx NWL PCIe Host Controller") Signed-off-by: Nicholas Mc Guire [lorenzo.pieralisi@arm.com: updated commit log] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pcie-xilinx-nwl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c index 6a4bbb5b3de0..fb32840ce8e6 100644 --- a/drivers/pci/controller/pcie-xilinx-nwl.c +++ b/drivers/pci/controller/pcie-xilinx-nwl.c @@ -559,7 +559,7 @@ static int nwl_pcie_init_irq_domain(struct nwl_pcie *pcie) PCI_NUM_INTX, &legacy_domain_ops, pcie); - + of_node_put(legacy_intc_node); if (!pcie->legacy_irq_domain) { dev_err(dev, "failed to create IRQ domain\n"); return -ENOMEM; From 3dc6ddfedc2818eaaa36842fbb049191e0c5e50f Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire Date: Fri, 29 Jun 2018 13:50:27 -0500 Subject: [PATCH 133/382] PCI: faraday: Add missing of_node_put() The call to of_get_next_child() returns a node pointer with refcount incremented thus it must be explicitly decremented here in the error path and after the last usage. Fixes: d3c68e0a7e34 ("PCI: faraday: Add Faraday Technology FTPCI100 PCI Host Bridge driver") Signed-off-by: Nicholas Mc Guire [lorenzo.pieralisi@arm.com: updated commit log] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Linus Walleij --- drivers/pci/controller/pci-ftpci100.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/controller/pci-ftpci100.c b/drivers/pci/controller/pci-ftpci100.c index a1ebe9ed441f..20bb2564a6b3 100644 --- a/drivers/pci/controller/pci-ftpci100.c +++ b/drivers/pci/controller/pci-ftpci100.c @@ -355,11 +355,13 @@ static int faraday_pci_setup_cascaded_irq(struct faraday_pci *p) irq = of_irq_get(intc, 0); if (irq <= 0) { dev_err(p->dev, "failed to get parent IRQ\n"); + of_node_put(intc); return irq ?: -EINVAL; } p->irqdomain = irq_domain_add_linear(intc, PCI_NUM_INTX, &faraday_pci_irqdomain_ops, p); + of_node_put(intc); if (!p->irqdomain) { dev_err(p->dev, "failed to create Gemini PCI IRQ domain\n"); return -EINVAL; From 38972375ef7bdc7dd989bcb48d5448662a95bca2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 29 Jun 2018 15:08:52 -0500 Subject: [PATCH 134/382] PCI/IOV: Reset total_VFs limit after detaching PF driver The TotalVFs register in the SR-IOV capability is the hardware limit on the number of VFs. A PF driver can limit the number of VFs further with pci_sriov_set_totalvfs(). When the PF driver is removed, reset any VF limit that was imposed by the driver because that limit may not apply to other drivers. Before 8d85a7a4f2c9 ("PCI/IOV: Allow PF drivers to limit total_VFs to 0"), pci_sriov_set_totalvfs(pdev, 0) meant "we can enable TotalVFs virtual functions", and the nfp driver used that to remove the VF limit when the driver unloads. 8d85a7a4f2c9 broke that because instead of removing the VF limit, pci_sriov_set_totalvfs(pdev, 0) actually sets the limit to zero, and that limit persists even if another driver is loaded. We could fix that by making the nfp driver reset the limit when it unloads, but it seems more robust to do it in the PCI core instead of relying on the driver. The regression scenario is: nfp_pci_probe (driver 1) ... nfp_pci_remove pci_sriov_set_totalvfs(pf->pdev, 0) # limits VFs to 0 ... nfp_pci_probe (driver 2) nfp_rtsym_read_le("nfd_vf_cfg_max_vfs") # no VF limit from firmware Now driver 2 is broken because the VF limit is still 0 from driver 1. Fixes: 8d85a7a4f2c9 ("PCI/IOV: Allow PF drivers to limit total_VFs to 0") Signed-off-by: Jakub Kicinski [bhelgaas: changelog, rename functions] Signed-off-by: Bjorn Helgaas --- drivers/pci/iov.c | 16 ++++++++++++++++ drivers/pci/pci-driver.c | 1 + drivers/pci/pci.h | 4 ++++ 3 files changed, 21 insertions(+) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index d0d73dbbd5ca..0f04ae648cf1 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -574,6 +574,22 @@ void pci_iov_release(struct pci_dev *dev) sriov_release(dev); } +/** + * pci_iov_remove - clean up SR-IOV state after PF driver is detached + * @dev: the PCI device + */ +void pci_iov_remove(struct pci_dev *dev) +{ + struct pci_sriov *iov = dev->sriov; + + if (!dev->is_physfn) + return; + + iov->driver_max_VFs = iov->total_VFs; + if (iov->num_VFs) + pci_warn(dev, "driver left SR-IOV enabled after remove\n"); +} + /** * pci_iov_update_resource - update a VF BAR * @dev: the PCI device diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index c125d53033c6..6792292b5fc7 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -445,6 +445,7 @@ static int pci_device_remove(struct device *dev) } pcibios_free_irq(pci_dev); pci_dev->driver = NULL; + pci_iov_remove(pci_dev); } /* Undo the runtime PM settings in local_pci_probe() */ diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index c358e7a07f3f..882f1f9596df 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -311,6 +311,7 @@ static inline void pci_restore_ats_state(struct pci_dev *dev) #ifdef CONFIG_PCI_IOV int pci_iov_init(struct pci_dev *dev); void pci_iov_release(struct pci_dev *dev); +void pci_iov_remove(struct pci_dev *dev); void pci_iov_update_resource(struct pci_dev *dev, int resno); resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno); void pci_restore_iov_state(struct pci_dev *dev); @@ -323,6 +324,9 @@ static inline int pci_iov_init(struct pci_dev *dev) } static inline void pci_iov_release(struct pci_dev *dev) +{ +} +static inline void pci_iov_remove(struct pci_dev *dev) { } static inline void pci_restore_iov_state(struct pci_dev *dev) From 83235822b8b4fe47ecbd6b6bcbcc902860ac00fc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 29 Jun 2018 15:09:00 -0500 Subject: [PATCH 135/382] nfp: stop limiting VFs to 0 Before 8d85a7a4f2c9 ("PCI/IOV: Allow PF drivers to limit total_VFs to 0"), pci_sriov_set_totalvfs(pdev, 0) meant "we can enable TotalVFs virtual functions". After 8d85a7a4f2c9, it means "we can't enable *any* VFs". That broke this scenario where nfp intends to remove any limit on the number of VFs that can be enabled: nfp_pci_probe nfp_pcie_sriov_read_nfd_limit nfp_rtsym_read_le("nfd_vf_cfg_max_vfs", &err) pci_sriov_set_totalvfs(pf->pdev, 0) # if FW didn't expose a limit ... # userspace writes N to sysfs "sriov_numvfs": sriov_numvfs_store pci_sriov_get_totalvfs # now returns 0 return -ERANGE Prior to 8d85a7a4f2c9, pci_sriov_get_totalvfs() returned TotalVFs, but it now returns 0. Remove the pci_sriov_set_totalvfs(pdev, 0) calls so we don't limit the number of VFs that can be enabled. Fixes: 8d85a7a4f2c9 ("PCI/IOV: Allow PF drivers to limit total_VFs to 0") Signed-off-by: Jakub Kicinski [bhelgaas: changelog] Signed-off-by: Bjorn Helgaas --- drivers/net/ethernet/netronome/nfp/nfp_main.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index 46b76d5a726c..152283d7e59c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -240,7 +240,6 @@ static int nfp_pcie_sriov_read_nfd_limit(struct nfp_pf *pf) return pci_sriov_set_totalvfs(pf->pdev, pf->limit_vfs); pf->limit_vfs = ~0; - pci_sriov_set_totalvfs(pf->pdev, 0); /* 0 is unset */ /* Allow any setting for backwards compatibility if symbol not found */ if (err == -ENOENT) return 0; @@ -668,7 +667,7 @@ static int nfp_pci_probe(struct pci_dev *pdev, err = nfp_net_pci_probe(pf); if (err) - goto err_sriov_unlimit; + goto err_fw_unload; err = nfp_hwmon_register(pf); if (err) { @@ -680,8 +679,6 @@ static int nfp_pci_probe(struct pci_dev *pdev, err_net_remove: nfp_net_pci_remove(pf); -err_sriov_unlimit: - pci_sriov_set_totalvfs(pf->pdev, 0); err_fw_unload: kfree(pf->rtbl); nfp_mip_close(pf->mip); @@ -715,7 +712,6 @@ static void nfp_pci_remove(struct pci_dev *pdev) nfp_hwmon_unregister(pf); nfp_pcie_sriov_disable(pdev); - pci_sriov_set_totalvfs(pf->pdev, 0); nfp_net_pci_remove(pf); From c860e997e9170a6d68f9d1e6e2cf61f572191aaf Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 27 Jun 2018 16:04:48 -0700 Subject: [PATCH 136/382] tcp: fix Fast Open key endianness Fast Open key could be stored in different endian based on the CPU. Previously hosts in different endianness in a server farm using the same key config (sysctl value) would produce different cookies. This patch fixes it by always storing it as little endian to keep same API for LE hosts. Reported-by: Daniele Iamartino Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d06247ba08b2..af0a857d8352 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -265,8 +265,9 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write, ipv4.sysctl_tcp_fastopen); struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; struct tcp_fastopen_context *ctxt; - int ret; u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ + __le32 key[4]; + int ret, i; tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); if (!tbl.data) @@ -275,11 +276,14 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write, rcu_read_lock(); ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); if (ctxt) - memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); + memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); else - memset(user_key, 0, sizeof(user_key)); + memset(key, 0, sizeof(key)); rcu_read_unlock(); + for (i = 0; i < ARRAY_SIZE(key); i++) + user_key[i] = le32_to_cpu(key[i]); + snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x", user_key[0], user_key[1], user_key[2], user_key[3]); ret = proc_dostring(&tbl, write, buffer, lenp, ppos); @@ -290,13 +294,17 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write, ret = -EINVAL; goto bad_key; } - tcp_fastopen_reset_cipher(net, NULL, user_key, + + for (i = 0; i < ARRAY_SIZE(user_key); i++) + key[i] = cpu_to_le32(user_key[i]); + + tcp_fastopen_reset_cipher(net, NULL, key, TCP_FASTOPEN_KEY_LENGTH); } bad_key: pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n", - user_key[0], user_key[1], user_key[2], user_key[3], + user_key[0], user_key[1], user_key[2], user_key[3], (char *)tbl.data, ret); kfree(tbl.data); return ret; From 92291c95e71a7898109c1f95adfb48aa69e5ba7b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 28 Jun 2018 12:24:42 +0300 Subject: [PATCH 137/382] atm: iphase: fix a 64 bit bug The code assumes that there is 4 bytes in a pointer and it doesn't allocate enough memory. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/atm/iphase.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index ff81a576347e..82532c299bb5 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -1618,7 +1618,7 @@ static int rx_init(struct atm_dev *dev) skb_queue_head_init(&iadev->rx_dma_q); iadev->rx_free_desc_qhead = NULL; - iadev->rx_open = kcalloc(4, iadev->num_vc, GFP_KERNEL); + iadev->rx_open = kcalloc(iadev->num_vc, sizeof(void *), GFP_KERNEL); if (!iadev->rx_open) { printk(KERN_ERR DEV_LABEL "itf %d couldn't get free page\n", dev->number); From 5037c6280606396ab4d3065b066d4b574df020dc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 28 Jun 2018 12:31:25 +0300 Subject: [PATCH 138/382] cnic: tidy up a size calculation Static checkers complain that id_tbl->table points to longs and 4 bytes is smaller than sizeof(long). But the since other side is dividing by 32 instead of sizeof(long), that means the current code works fine. Anyway, it's more conventional to use the BITS_TO_LONGS() macro when we're allocating a bitmap. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/cnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index 30273a7717e2..4fd829b5e65d 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -660,7 +660,7 @@ static int cnic_init_id_tbl(struct cnic_id_tbl *id_tbl, u32 size, u32 start_id, id_tbl->max = size; id_tbl->next = next; spin_lock_init(&id_tbl->lock); - id_tbl->table = kcalloc(DIV_ROUND_UP(size, 32), 4, GFP_KERNEL); + id_tbl->table = kcalloc(BITS_TO_LONGS(size), sizeof(long), GFP_KERNEL); if (!id_tbl->table) return -ENOMEM; From 484c016d9392786ce5c74017c206c706f29f823d Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Thu, 28 Jun 2018 04:52:15 -0700 Subject: [PATCH 139/382] bnx2x: Fix receiving tx-timeout in error or recovery state. Driver performs the internal reload when it receives tx-timeout event from the OS. Internal reload might fail in some scenarios e.g., fatal HW issues. In such cases OS still see the link, which would result in undesirable functionalities such as re-generation of tx-timeouts. The patch addresses this issue by indicating the link-down to OS when tx-timeout is detected, and keeping the link in down state till the internal reload is successful. Please consider applying it to 'net' branch. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 1 + drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 6 ++++++ drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 6 ++++++ 3 files changed, 13 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index d847e1b9c37b..be1506169076 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1533,6 +1533,7 @@ struct bnx2x { struct link_vars link_vars; u32 link_cnt; struct bnx2x_link_report_data last_reported_link; + bool force_link_down; struct mdio_if_info mdio; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 8cd73ff5debc..af7b5a4d8ba0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -1261,6 +1261,11 @@ void __bnx2x_link_report(struct bnx2x *bp) { struct bnx2x_link_report_data cur_data; + if (bp->force_link_down) { + bp->link_vars.link_up = 0; + return; + } + /* reread mf_cfg */ if (IS_PF(bp) && !CHIP_IS_E1(bp)) bnx2x_read_mf_cfg(bp); @@ -2817,6 +2822,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) bp->pending_max = 0; } + bp->force_link_down = false; if (bp->port.pmf) { rc = bnx2x_initial_phy_init(bp, load_mode); if (rc) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 5b1ed240bf18..57348f2b49a3 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -10279,6 +10279,12 @@ static void bnx2x_sp_rtnl_task(struct work_struct *work) bp->sp_rtnl_state = 0; smp_mb(); + /* Immediately indicate link as down */ + bp->link_vars.link_up = 0; + bp->force_link_down = true; + netif_carrier_off(bp->dev); + BNX2X_ERR("Indicating link is down due to Tx-timeout\n"); + bnx2x_nic_unload(bp, UNLOAD_NORMAL, true); /* When ret value shows failure of allocation failure, * the nic is rebooted again. If open still fails, a error From b95f6fbc8e15803a596ca5e5e21008fba29694c6 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Thu, 28 Jun 2018 15:26:50 +0300 Subject: [PATCH 140/382] fsl/fman: fix parser reporting bad checksum on short frames The FMan hardware parser needs to be configured to remove the short frame padding from the checksum calculation, otherwise short UDP and TCP frames are likely to be marked as having a bad checksum. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/fman_port.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c index ce6e24c74978..ecbf6187e13a 100644 --- a/drivers/net/ethernet/freescale/fman/fman_port.c +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -324,6 +324,10 @@ struct fman_port_qmi_regs { #define HWP_HXS_PHE_REPORT 0x00000800 #define HWP_HXS_PCAC_PSTAT 0x00000100 #define HWP_HXS_PCAC_PSTOP 0x00000001 +#define HWP_HXS_TCP_OFFSET 0xA +#define HWP_HXS_UDP_OFFSET 0xB +#define HWP_HXS_SH_PAD_REM 0x80000000 + struct fman_port_hwp_regs { struct { u32 ssa; /* Soft Sequence Attachment */ @@ -728,6 +732,10 @@ static void init_hwp(struct fman_port *port) iowrite32be(0xffffffff, ®s->pmda[i].lcv); } + /* Short packet padding removal from checksum calculation */ + iowrite32be(HWP_HXS_SH_PAD_REM, ®s->pmda[HWP_HXS_TCP_OFFSET].ssa); + iowrite32be(HWP_HXS_SH_PAD_REM, ®s->pmda[HWP_HXS_UDP_OFFSET].ssa); + start_port_hwp(port); } From 595e802e53f24642a145cf7f3e4ac9afab4c21ec Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Thu, 28 Jun 2018 15:26:51 +0300 Subject: [PATCH 141/382] dpaa_eth: DPAA SGT needs to be 256B The DPAA HW requires that at least 256 bytes from the start of the first scatter-gather table entry are allocated and accessible. The hardware reads the maximum size the table can have in one access, thus requiring that the allocation and mapping to be done for the maximum size of 256B even if there is a smaller number of entries in the table. Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 5f4e1ffa7b95..ab02057ac730 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -125,6 +125,9 @@ MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms"); /* Default alignment for start of data in an Rx FD */ #define DPAA_FD_DATA_ALIGNMENT 16 +/* The DPAA requires 256 bytes reserved and mapped for the SGT */ +#define DPAA_SGT_SIZE 256 + /* Values for the L3R field of the FM Parse Results */ /* L3 Type field: First IP Present IPv4 */ @@ -1617,8 +1620,8 @@ static struct sk_buff *dpaa_cleanup_tx_fd(const struct dpaa_priv *priv, if (unlikely(qm_fd_get_format(fd) == qm_fd_sg)) { nr_frags = skb_shinfo(skb)->nr_frags; - dma_unmap_single(dev, addr, qm_fd_get_offset(fd) + - sizeof(struct qm_sg_entry) * (1 + nr_frags), + dma_unmap_single(dev, addr, + qm_fd_get_offset(fd) + DPAA_SGT_SIZE, dma_dir); /* The sgt buffer has been allocated with netdev_alloc_frag(), @@ -1903,8 +1906,7 @@ static int skb_to_sg_fd(struct dpaa_priv *priv, void *sgt_buf; /* get a page frag to store the SGTable */ - sz = SKB_DATA_ALIGN(priv->tx_headroom + - sizeof(struct qm_sg_entry) * (1 + nr_frags)); + sz = SKB_DATA_ALIGN(priv->tx_headroom + DPAA_SGT_SIZE); sgt_buf = netdev_alloc_frag(sz); if (unlikely(!sgt_buf)) { netdev_err(net_dev, "netdev_alloc_frag() failed for size %d\n", @@ -1972,9 +1974,8 @@ static int skb_to_sg_fd(struct dpaa_priv *priv, skbh = (struct sk_buff **)buffer_start; *skbh = skb; - addr = dma_map_single(dev, buffer_start, priv->tx_headroom + - sizeof(struct qm_sg_entry) * (1 + nr_frags), - dma_dir); + addr = dma_map_single(dev, buffer_start, + priv->tx_headroom + DPAA_SGT_SIZE, dma_dir); if (unlikely(dma_mapping_error(dev, addr))) { dev_err(dev, "DMA mapping failed"); err = -EINVAL; From d14b56f508ad70eca3e659545aab3c45200f258c Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 28 Jun 2018 17:53:06 +0200 Subject: [PATCH 142/382] net: cleanup gfp mask in alloc_skb_with_frags alloc_skb_with_frags uses __GFP_NORETRY for non-sleeping allocations which is just a noop and a little bit confusing. __GFP_NORETRY was added by ed98df3361f0 ("net: use __GFP_NORETRY for high order allocations") to prevent from the OOM killer. Yet this was not enough because fb05e7a89f50 ("net: don't wait for order-3 page allocation") didn't want an excessive reclaim for non-costly orders so it made it completely NOWAIT while it preserved __GFP_NORETRY in place which is now redundant. Drop the pointless __GFP_NORETRY because this function is used as copy&paste source for other places. Reviewed-by: Eric Dumazet Signed-off-by: Michal Hocko Signed-off-by: David S. Miller --- net/core/skbuff.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c642304f178c..eba8dae22c25 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5276,8 +5276,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, if (npages >= 1 << order) { page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | - __GFP_NOWARN | - __GFP_NORETRY, + __GFP_NOWARN, order); if (page) goto fill_page; From e7c7faa936680a2f0e78fc6d9683a5728421a150 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 28 Jun 2018 13:36:55 -0700 Subject: [PATCH 143/382] net/ipv6: Fix updates to prefix route Sowmini reported that a recent commit broke prefix routes for linklocal addresses. The newly added modify_prefix_route is attempting to add a new prefix route when the ifp priority does not match the route metric however the check needs to account for the default priority. In addition, the route add fails because the route already exists, and then the delete removes the one that exists. Flip the order to do the delete first. Fixes: 8308f3ff1753 ("net/ipv6: Add support for specifying metric of connected routes") Reported-by: Sowmini Varadhan Tested-by: Sowmini Varadhan Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c134286d6a41..91580c62bb86 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4528,6 +4528,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, u32 flags) { struct fib6_info *f6i; + u32 prio; f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len, @@ -4536,13 +4537,15 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, if (!f6i) return -ENOENT; - if (f6i->fib6_metric != ifp->rt_priority) { + prio = ifp->rt_priority ? : IP6_RT_PRIO_ADDRCONF; + if (f6i->fib6_metric != prio) { + /* delete old one */ + ip6_del_rt(dev_net(ifp->idev->dev), f6i); + /* add new one */ addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->rt_priority, ifp->idev->dev, expires, flags, GFP_KERNEL); - /* delete old one */ - ip6_del_rt(dev_net(ifp->idev->dev), f6i); } else { if (!expires) fib6_clean_expires(f6i); From 713b4a33524721767703956654f4d6713c556f2b Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Fri, 29 Jun 2018 10:28:31 -0500 Subject: [PATCH 144/382] net: phy: DP83TC811: Fix disabling interrupts Fix a bug where INT_STAT1 was written twice and INT_STAT2 was ignored when disabling interrupts. Fixes: b753a9faaf9a ("net: phy: DP83TC811: Introduce support for the DP83TC811 phy") Reviewed-by: Andrew Lunn Signed-off-by: Dan Murphy Signed-off-by: David S. Miller --- drivers/net/phy/dp83tc811.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/dp83tc811.c b/drivers/net/phy/dp83tc811.c index 081d99aa3985..49ac678eb2dc 100644 --- a/drivers/net/phy/dp83tc811.c +++ b/drivers/net/phy/dp83tc811.c @@ -222,7 +222,7 @@ static int dp83811_config_intr(struct phy_device *phydev) if (err < 0) return err; - err = phy_write(phydev, MII_DP83811_INT_STAT1, 0); + err = phy_write(phydev, MII_DP83811_INT_STAT2, 0); } return err; From cafb39600e7a73263122a0e2db052d691686378f Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Fri, 29 Jun 2018 16:29:28 +0100 Subject: [PATCH 145/382] sfc: correctly initialise filter rwsem for farch Fixes: fc7a6c287ff3 ("sfc: use a semaphore to lock farch filters too") Suggested-by: Joseph Korty Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/farch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index 8edf20967c82..e045a5d6b938 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -2794,6 +2794,7 @@ int efx_farch_filter_table_probe(struct efx_nic *efx) if (!state) return -ENOMEM; efx->filter_state = state; + init_rwsem(&state->lock); table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP]; table->id = EFX_FARCH_FILTER_TABLE_RX_IP; From bc800e8b39bad60ccdb83be828da63af71ab87b3 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 29 Jun 2018 17:51:26 +0200 Subject: [PATCH 146/382] alx: take rtnl before calling __alx_open from resume The __alx_open function can be called from ndo_open, which is called under RTNL, or from alx_resume, which isn't. Since commit d768319cd427, we're calling the netif_set_real_num_{tx,rx}_queues functions, which need to be called under RTNL. This is similar to commit 0c2cc02e571a ("igb: Move the calls to set the Tx and Rx queues into igb_open"). Fixes: d768319cd427 ("alx: enable multiple tx queues") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 567ee54504bc..5e5022fa1d04 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1897,13 +1897,19 @@ static int alx_resume(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct alx_priv *alx = pci_get_drvdata(pdev); struct alx_hw *hw = &alx->hw; + int err; alx_reset_phy(hw); if (!netif_running(alx->dev)) return 0; netif_device_attach(alx->dev); - return __alx_open(alx, true); + + rtnl_lock(); + err = __alx_open(alx, true); + rtnl_unlock(); + + return err; } static SIMPLE_DEV_PM_OPS(alx_pm_ops, alx_suspend, alx_resume); From 4664610537d398d55be19432f9cd9c29c831e159 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 29 Jun 2018 19:45:50 +0200 Subject: [PATCH 147/382] Revert "s390/qeth: use Read device to query hypervisor for MAC" This reverts commit b7493e91c11a757cf0f8ab26989642ee4bb2c642. On its own, querying RDEV for a MAC address works fine. But when upgrading from a qeth that previously queried DDEV on a z/VM NIC (ie. any kernel with commit ec61bd2fd2a2), the RDEV query now returns a _different_ MAC address than the DDEV query. If the NIC is configured with MACPROTECT, z/VM apparently requires us to use the MAC that was initially returned (on DDEV) and registered. So after upgrading to a kernel that uses RDEV, the SETVMAC registration cmd for the new MAC address fails and we end up with a non-operabel interface. To avoid regressions on upgrade, switch back to using DDEV for the MAC address query. The downgrade path (first RDEV, later DDEV) is fine, in this case both queries return the same MAC address. Fixes: b7493e91c11a ("s390/qeth: use Read device to query hypervisor for MAC") Reported-by: Michal Kubecek Tested-by: Karsten Graul Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 8e1474f1ffac..9d9182ed8ac4 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -4834,7 +4834,7 @@ int qeth_vm_request_mac(struct qeth_card *card) goto out; } - ccw_device_get_id(CARD_RDEV(card), &id); + ccw_device_get_id(CARD_DDEV(card), &id); request->resp_buf_len = sizeof(*response); request->resp_version = DIAG26C_VERSION2; request->op_code = DIAG26C_GET_MAC; From 4789a21880488048105590049fc41a99f53d565d Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 29 Jun 2018 19:45:51 +0200 Subject: [PATCH 148/382] s390/qeth: fix race when setting MAC address When qeth_l2_set_mac_address() finds the card in a non-reachable state, it merely copies the new MAC address into dev->dev_addr so that __qeth_l2_set_online() can later register it with the HW. But __qeth_l2_set_online() may very well be running concurrently, so we can't trust the card state without appropriate locking: If the online sequence is past the point where it registers dev->dev_addr (but not yet in SOFTSETUP state), any address change needs to be properly programmed into the HW. Otherwise the netdevice ends up with a different MAC address than what's set in the HW, and inbound traffic is not forwarded as expected. This is most likely to occur for OSD in LPAR, where commit 21b1702af12e ("s390/qeth: improve fallback to random MAC address") now triggers eg. systemd to immediately change the MAC when the netdevice is registered with a NET_ADDR_RANDOM address. Fixes: bcacfcbc82b4 ("s390/qeth: fix MAC address update sequence") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index a7cb37da6a21..7daf125dae76 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -501,27 +501,34 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p) return -ERESTARTSYS; } + /* avoid racing against concurrent state change: */ + if (!mutex_trylock(&card->conf_mutex)) + return -EAGAIN; + if (!qeth_card_hw_is_reachable(card)) { ether_addr_copy(dev->dev_addr, addr->sa_data); - return 0; + goto out_unlock; } /* don't register the same address twice */ if (ether_addr_equal_64bits(dev->dev_addr, addr->sa_data) && (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)) - return 0; + goto out_unlock; /* add the new address, switch over, drop the old */ rc = qeth_l2_send_setmac(card, addr->sa_data); if (rc) - return rc; + goto out_unlock; ether_addr_copy(old_addr, dev->dev_addr); ether_addr_copy(dev->dev_addr, addr->sa_data); if (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED) qeth_l2_remove_mac(card, old_addr); card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; - return 0; + +out_unlock: + mutex_unlock(&card->conf_mutex); + return rc; } static void qeth_promisc_to_bridge(struct qeth_card *card) From 9d0a58fb9747afd27d490c02a97889a1b59f6be4 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 29 Jun 2018 19:45:52 +0200 Subject: [PATCH 149/382] s390/qeth: avoid using is_multicast_ether_addr_64bits on (u8 *)[6] *ether_addr*_64bits functions have been introduced to optimize performance critical paths, which access 6-byte ethernet address as u64 value to get "nice" assembly. A harmless hack works nicely on ethernet addresses shoved into a structure or a larger buffer, until busted by Kasan on smth like plain (u8 *)[6]. qeth_l2_set_mac_address calls qeth_l2_remove_mac passing u8 old_addr[ETH_ALEN] as an argument. Adding/removing macs for an ethernet adapter is not that performance critical. Moreover is_multicast_ether_addr_64bits itself on s390 is not faster than is_multicast_ether_addr: is_multicast_ether_addr(%r2) -> %r2 llc %r2,0(%r2) risbg %r2,%r2,63,191,0 is_multicast_ether_addr_64bits(%r2) -> %r2 llgc %r2,0(%r2) risbg %r2,%r2,63,191,0 So, let's just use is_multicast_ether_addr instead of is_multicast_ether_addr_64bits. Fixes: bcacfcbc82b4 ("s390/qeth: fix MAC address update sequence") Reviewed-by: Julian Wiedmann Signed-off-by: Vasily Gorbik Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 7daf125dae76..5464515b71f1 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -140,7 +140,7 @@ static int qeth_l2_send_setmac(struct qeth_card *card, __u8 *mac) static int qeth_l2_write_mac(struct qeth_card *card, u8 *mac) { - enum qeth_ipa_cmds cmd = is_multicast_ether_addr_64bits(mac) ? + enum qeth_ipa_cmds cmd = is_multicast_ether_addr(mac) ? IPA_CMD_SETGMAC : IPA_CMD_SETVMAC; int rc; @@ -157,7 +157,7 @@ static int qeth_l2_write_mac(struct qeth_card *card, u8 *mac) static int qeth_l2_remove_mac(struct qeth_card *card, u8 *mac) { - enum qeth_ipa_cmds cmd = is_multicast_ether_addr_64bits(mac) ? + enum qeth_ipa_cmds cmd = is_multicast_ether_addr(mac) ? IPA_CMD_DELGMAC : IPA_CMD_DELVMAC; int rc; From ce28867fd20c23cd769e78b4d619c4755bf71a1c Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 29 Jun 2018 19:45:53 +0200 Subject: [PATCH 150/382] s390/qeth: don't clobber buffer on async TX completion If qeth_qdio_output_handler() detects that a transmit requires async completion, it replaces the pending buffer's metadata object (qeth_qdio_out_buffer) so that this queue buffer can be re-used while the data is pending completion. Later when the CQ indicates async completion of such a metadata object, qeth_qdio_cq_handler() tries to free any data associated with this object (since HW has now completed the transfer). By calling qeth_clear_output_buffer(), it erronously operates on the queue buffer that _previously_ belonged to this transfer ... but which has been potentially re-used several times by now. This results in double-free's of the buffer's data, and failing transmits as the buffer descriptor is scrubbed in mid-air. The correct way of handling this situation is to 1. scrub the queue buffer when it is prepared for re-use, and 2. later obtain the data addresses from the async-completion notifier (ie. the AOB), instead of the queue buffer. All this only affects qeth devices used for af_iucv HiperTransport. Fixes: 0da9581ddb0f ("qeth: exploit asynchronous delivery of storage blocks") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 11 +++++++++++ drivers/s390/net/qeth_core_main.c | 22 ++++++++++++++++------ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 2a5fec55bf60..940fd7b558d3 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -829,6 +829,17 @@ struct qeth_trap_id { /*some helper functions*/ #define QETH_CARD_IFNAME(card) (((card)->dev)? (card)->dev->name : "") +static inline void qeth_scrub_qdio_buffer(struct qdio_buffer *buf, + unsigned int elements) +{ + unsigned int i; + + for (i = 0; i < elements; i++) + memset(&buf->element[i], 0, sizeof(struct qdio_buffer_element)); + buf->element[14].sflags = 0; + buf->element[15].sflags = 0; +} + /** * qeth_get_elements_for_range() - find number of SBALEs to cover range. * @start: Start of the address range. diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 9d9182ed8ac4..d20a69a3bc40 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -73,9 +73,6 @@ static void qeth_notify_skbs(struct qeth_qdio_out_q *queue, struct qeth_qdio_out_buffer *buf, enum iucv_tx_notify notification); static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf); -static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue, - struct qeth_qdio_out_buffer *buf, - enum qeth_qdio_buffer_states newbufstate); static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *, int); struct workqueue_struct *qeth_wq; @@ -489,6 +486,7 @@ static void qeth_qdio_handle_aob(struct qeth_card *card, struct qaob *aob; struct qeth_qdio_out_buffer *buffer; enum iucv_tx_notify notification; + unsigned int i; aob = (struct qaob *) phys_to_virt(phys_aob_addr); QETH_CARD_TEXT(card, 5, "haob"); @@ -513,10 +511,18 @@ static void qeth_qdio_handle_aob(struct qeth_card *card, qeth_notify_skbs(buffer->q, buffer, notification); buffer->aob = NULL; - qeth_clear_output_buffer(buffer->q, buffer, - QETH_QDIO_BUF_HANDLED_DELAYED); + /* Free dangling allocations. The attached skbs are handled by + * qeth_cleanup_handled_pending(). + */ + for (i = 0; + i < aob->sb_count && i < QETH_MAX_BUFFER_ELEMENTS(card); + i++) { + if (aob->sba[i] && buffer->is_header[i]) + kmem_cache_free(qeth_core_header_cache, + (void *) aob->sba[i]); + } + atomic_set(&buffer->state, QETH_QDIO_BUF_HANDLED_DELAYED); - /* from here on: do not touch buffer anymore */ qdio_release_aob(aob); } @@ -3759,6 +3765,10 @@ static void qeth_qdio_output_handler(struct ccw_device *ccwdev, QETH_CARD_TEXT(queue->card, 5, "aob"); QETH_CARD_TEXT_(queue->card, 5, "%lx", virt_to_phys(buffer->aob)); + + /* prepare the queue slot for re-use: */ + qeth_scrub_qdio_buffer(buffer->buffer, + QETH_MAX_BUFFER_ELEMENTS(card)); if (qeth_init_qdio_out_buf(queue, bidx)) { QETH_CARD_TEXT(card, 2, "outofbuf"); qeth_schedule_recovery(card); From d025da9eb1e48d3e5f2a2ff13ac5ac536ba4be43 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Fri, 29 Jun 2018 19:45:54 +0200 Subject: [PATCH 151/382] s390/qeth: consistently re-enable device features commit e830baa9c3f0 ("qeth: restore device features after recovery") and commit ce3443564145 ("s390/qeth: rely on kernel for feature recovery") made sure that the HW functions for device features get re-programmed after recovery. But we missed that the same handling is also required when a card is first set offline (destroying all HW context), and then online again. Fix this by moving the re-enable action out of the recovery-only path. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core.h | 2 +- drivers/s390/net/qeth_core_main.c | 23 +++++++++++------------ drivers/s390/net/qeth_l2_main.c | 5 ++--- drivers/s390/net/qeth_l3_main.c | 3 ++- 4 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 940fd7b558d3..a246a618f9a4 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -1040,7 +1040,7 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *, __u16, __u16, enum qeth_prot_versions); int qeth_set_features(struct net_device *, netdev_features_t); -void qeth_recover_features(struct net_device *dev); +void qeth_enable_hw_features(struct net_device *dev); netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t); netdev_features_t qeth_features_check(struct sk_buff *skb, struct net_device *dev, diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index d20a69a3bc40..d01ac29fd986 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -6469,28 +6469,27 @@ static int qeth_set_ipa_rx_csum(struct qeth_card *card, bool on) #define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO | \ NETIF_F_IPV6_CSUM) /** - * qeth_recover_features() - Restore device features after recovery - * @dev: the recovering net_device - * - * Caller must hold rtnl lock. + * qeth_enable_hw_features() - (Re-)Enable HW functions for device features + * @dev: a net_device */ -void qeth_recover_features(struct net_device *dev) +void qeth_enable_hw_features(struct net_device *dev) { - netdev_features_t features = dev->features; struct qeth_card *card = dev->ml_priv; + netdev_features_t features; + rtnl_lock(); + features = dev->features; /* force-off any feature that needs an IPA sequence. * netdev_update_features() will restart them. */ dev->features &= ~QETH_HW_FEATURES; netdev_update_features(dev); - - if (features == dev->features) - return; - dev_warn(&card->gdev->dev, - "Device recovery failed to restore all offload features\n"); + if (features != dev->features) + dev_warn(&card->gdev->dev, + "Device recovery failed to restore all offload features\n"); + rtnl_unlock(); } -EXPORT_SYMBOL_GPL(qeth_recover_features); +EXPORT_SYMBOL_GPL(qeth_enable_hw_features); int qeth_set_features(struct net_device *dev, netdev_features_t features) { diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 5464515b71f1..2487f0aeb165 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1119,6 +1119,8 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) netif_carrier_off(card->dev); qeth_set_allowed_threads(card, 0xffffffff, 0); + + qeth_enable_hw_features(card->dev); if (recover_flag == CARD_STATE_RECOVER) { if (recovery_mode && card->info.type != QETH_CARD_TYPE_OSN) { @@ -1130,9 +1132,6 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) } /* this also sets saved unicast addresses */ qeth_l2_set_rx_mode(card->dev); - rtnl_lock(); - qeth_recover_features(card->dev); - rtnl_unlock(); } /* let user_space know that device is online */ kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index e7fa479adf47..5905dc63e256 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2662,6 +2662,8 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode) netif_carrier_on(card->dev); else netif_carrier_off(card->dev); + + qeth_enable_hw_features(card->dev); if (recover_flag == CARD_STATE_RECOVER) { rtnl_lock(); if (recovery_mode) @@ -2669,7 +2671,6 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode) else dev_open(card->dev); qeth_l3_set_rx_mode(card->dev); - qeth_recover_features(card->dev); rtnl_unlock(); } qeth_trace_features(card); From ced9e191501e52b95e1b57b8e0db00943869eed0 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 29 Jun 2018 13:28:07 -0500 Subject: [PATCH 152/382] atm: zatm: Fix potential Spectre v1 pool can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/atm/zatm.c:1491 zatm_ioctl() warn: potential spectre issue 'zatm_dev->pool_info' (local cap) Fix this by sanitizing pool before using it to index zatm_dev->pool_info Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/atm/zatm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index a8d2eb0ceb8d..2c288d1f42bb 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -1483,6 +1483,8 @@ static int zatm_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg) return -EFAULT; if (pool < 0 || pool > ZATM_LAST_POOL) return -EINVAL; + pool = array_index_nospec(pool, + ZATM_LAST_POOL + 1); if (copy_from_user(&info, &((struct zatm_pool_req __user *) arg)->info, sizeof(info))) return -EFAULT; From 3f76df198288ceec92fc9eddecad1e73c52769b0 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 29 Jun 2018 13:42:48 -0700 Subject: [PATCH 153/382] net: use dev_change_tx_queue_len() for SIOCSIFTXQLEN As noticed by Eric, we need to switch to the helper dev_change_tx_queue_len() for SIOCSIFTXQLEN call path too, otheriwse still miss dev_qdisc_change_tx_queue_len(). Fixes: 6a643ddb5624 ("net: introduce helper dev_change_tx_queue_len()") Reported-by: Eric Dumazet Signed-off-by: Cong Wang Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev_ioctl.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index a04e1e88bf3a..50537ff961a7 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -285,16 +285,9 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) if (ifr->ifr_qlen < 0) return -EINVAL; if (dev->tx_queue_len ^ ifr->ifr_qlen) { - unsigned int orig_len = dev->tx_queue_len; - - dev->tx_queue_len = ifr->ifr_qlen; - err = call_netdevice_notifiers( - NETDEV_CHANGE_TX_QUEUE_LEN, dev); - err = notifier_to_errno(err); - if (err) { - dev->tx_queue_len = orig_len; + err = dev_change_tx_queue_len(dev, ifr->ifr_qlen); + if (err) return err; - } } return 0; From 3ffe64f1a641b80a82d9ef4efa7a05ce69049871 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 29 Jun 2018 14:07:16 -0700 Subject: [PATCH 154/382] hv_netvsc: split sub-channel setup into async and sync When doing device hotplug the sub channel must be async to avoid deadlock issues because device is discovered in softirq context. When doing changes to MTU and number of channels, the setup must be synchronous to avoid races such as when MTU and device settings are done in a single ip command. Reported-by: Thomas Walker Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug") Fixes: 732e49850c5e ("netvsc: fix race on sub channel creation") Signed-off-by: Stephen Hemminger Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 2 +- drivers/net/hyperv/netvsc.c | 37 ++++++++++++++++++- drivers/net/hyperv/netvsc_drv.c | 17 ++++++++- drivers/net/hyperv/rndis_filter.c | 61 ++++++------------------------- 4 files changed, 65 insertions(+), 52 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 1a924b867b07..4b6e308199d2 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -210,7 +210,7 @@ int netvsc_recv_callback(struct net_device *net, void netvsc_channel_cb(void *context); int netvsc_poll(struct napi_struct *napi, int budget); -void rndis_set_subchannel(struct work_struct *w); +int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev); int rndis_filter_open(struct netvsc_device *nvdev); int rndis_filter_close(struct netvsc_device *nvdev); struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 5d5bd513847f..8e9d0ee1572b 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -65,6 +65,41 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf) VM_PKT_DATA_INBAND, 0); } +/* Worker to setup sub channels on initial setup + * Initial hotplug event occurs in softirq context + * and can't wait for channels. + */ +static void netvsc_subchan_work(struct work_struct *w) +{ + struct netvsc_device *nvdev = + container_of(w, struct netvsc_device, subchan_work); + struct rndis_device *rdev; + int i, ret; + + /* Avoid deadlock with device removal already under RTNL */ + if (!rtnl_trylock()) { + schedule_work(w); + return; + } + + rdev = nvdev->extension; + if (rdev) { + ret = rndis_set_subchannel(rdev->ndev, nvdev); + if (ret == 0) { + netif_device_attach(rdev->ndev); + } else { + /* fallback to only primary channel */ + for (i = 1; i < nvdev->num_chn; i++) + netif_napi_del(&nvdev->chan_table[i].napi); + + nvdev->max_chn = 1; + nvdev->num_chn = 1; + } + } + + rtnl_unlock(); +} + static struct netvsc_device *alloc_net_device(void) { struct netvsc_device *net_device; @@ -81,7 +116,7 @@ static struct netvsc_device *alloc_net_device(void) init_completion(&net_device->channel_init_wait); init_waitqueue_head(&net_device->subchan_open); - INIT_WORK(&net_device->subchan_work, rndis_set_subchannel); + INIT_WORK(&net_device->subchan_work, netvsc_subchan_work); return net_device; } diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index fe2256bf1d13..dd1d6e115145 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -905,8 +905,20 @@ static int netvsc_attach(struct net_device *ndev, if (IS_ERR(nvdev)) return PTR_ERR(nvdev); - /* Note: enable and attach happen when sub-channels setup */ + if (nvdev->num_chn > 1) { + ret = rndis_set_subchannel(ndev, nvdev); + /* if unavailable, just proceed with one queue */ + if (ret) { + nvdev->max_chn = 1; + nvdev->num_chn = 1; + } + } + + /* In any case device is now ready */ + netif_device_attach(ndev); + + /* Note: enable and attach happen when sub-channels setup */ netif_carrier_off(ndev); if (netif_running(ndev)) { @@ -2089,6 +2101,9 @@ static int netvsc_probe(struct hv_device *dev, memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); + if (nvdev->num_chn > 1) + schedule_work(&nvdev->subchan_work); + /* hw_features computed in rndis_netdev_set_hwcaps() */ net->features = net->hw_features | NETIF_F_HIGHDMA | NETIF_F_SG | diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 5428bb261102..9b4e3c3787e5 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1062,29 +1062,15 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) * This breaks overlap of processing the host message for the * new primary channel with the initialization of sub-channels. */ -void rndis_set_subchannel(struct work_struct *w) +int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev) { - struct netvsc_device *nvdev - = container_of(w, struct netvsc_device, subchan_work); struct nvsp_message *init_packet = &nvdev->channel_init_pkt; - struct net_device_context *ndev_ctx; - struct rndis_device *rdev; - struct net_device *ndev; - struct hv_device *hv_dev; + struct net_device_context *ndev_ctx = netdev_priv(ndev); + struct hv_device *hv_dev = ndev_ctx->device_ctx; + struct rndis_device *rdev = nvdev->extension; int i, ret; - if (!rtnl_trylock()) { - schedule_work(w); - return; - } - - rdev = nvdev->extension; - if (!rdev) - goto unlock; /* device was removed */ - - ndev = rdev->ndev; - ndev_ctx = netdev_priv(ndev); - hv_dev = ndev_ctx->device_ctx; + ASSERT_RTNL(); memset(init_packet, 0, sizeof(struct nvsp_message)); init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL; @@ -1100,13 +1086,13 @@ void rndis_set_subchannel(struct work_struct *w) VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret) { netdev_err(ndev, "sub channel allocate send failed: %d\n", ret); - goto failed; + return ret; } wait_for_completion(&nvdev->channel_init_wait); if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) { netdev_err(ndev, "sub channel request failed\n"); - goto failed; + return -EIO; } nvdev->num_chn = 1 + @@ -1125,21 +1111,7 @@ void rndis_set_subchannel(struct work_struct *w) for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) ndev_ctx->tx_table[i] = i % nvdev->num_chn; - netif_device_attach(ndev); - rtnl_unlock(); - return; - -failed: - /* fallback to only primary channel */ - for (i = 1; i < nvdev->num_chn; i++) - netif_napi_del(&nvdev->chan_table[i].napi); - - nvdev->max_chn = 1; - nvdev->num_chn = 1; - - netif_device_attach(ndev); -unlock: - rtnl_unlock(); + return 0; } static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, @@ -1360,21 +1332,12 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, netif_napi_add(net, &net_device->chan_table[i].napi, netvsc_poll, NAPI_POLL_WEIGHT); - if (net_device->num_chn > 1) - schedule_work(&net_device->subchan_work); + return net_device; out: - /* if unavailable, just proceed with one queue */ - if (ret) { - net_device->max_chn = 1; - net_device->num_chn = 1; - } - - /* No sub channels, device is ready */ - if (net_device->num_chn == 1) - netif_device_attach(net); - - return net_device; + /* setting up multiple channels failed */ + net_device->max_chn = 1; + net_device->num_chn = 1; err_dev_remv: rndis_filter_device_remove(dev, net_device); From 35e8c7ba0863acadd4b9badd09740af7a8331125 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Fri, 29 Jun 2018 14:32:15 -0700 Subject: [PATCH 155/382] net: fib_rules: bring back rule_exists to match rule during add After commit f9d4b0c1e969 ("fib_rules: move common handling of newrule delrule msgs into fib_nl2rule"), rule_exists got replaced by rule_find for existing rule lookup in both the add and del paths. While this is good for the delete path, it solves a few problems but opens up a few invalid key matches in the add path. $ip -4 rule add table main tos 10 fwmark 1 $ip -4 rule add table main tos 10 RTNETLINK answers: File exists The problem here is rule_find does not check if the key masks in the new and old rule are the same and hence ends up matching a more secific rule. Rule key masks cannot be easily compared today without an elaborate if-else block. Its best to introduce key masks for easier and accurate rule comparison in the future. Until then, due to fear of regressions this patch re-introduces older loose rule_exists during add. Also fixes both rule_exists and rule_find to cover missing attributes. Fixes: f9d4b0c1e969 ("fib_rules: move common handling of newrule delrule msgs into fib_nl2rule") Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/core/fib_rules.c | 72 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index bc8425d81022..f64aa13811ea 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -444,6 +444,9 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops, if (rule->ip_proto && r->ip_proto != rule->ip_proto) continue; + if (rule->proto && r->proto != rule->proto) + continue; + if (fib_rule_port_range_set(&rule->sport_range) && !fib_rule_port_range_compare(&r->sport_range, &rule->sport_range)) @@ -653,6 +656,73 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } +static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, + struct nlattr **tb, struct fib_rule *rule) +{ + struct fib_rule *r; + + list_for_each_entry(r, &ops->rules_list, list) { + if (r->action != rule->action) + continue; + + if (r->table != rule->table) + continue; + + if (r->pref != rule->pref) + continue; + + if (memcmp(r->iifname, rule->iifname, IFNAMSIZ)) + continue; + + if (memcmp(r->oifname, rule->oifname, IFNAMSIZ)) + continue; + + if (r->mark != rule->mark) + continue; + + if (r->suppress_ifgroup != rule->suppress_ifgroup) + continue; + + if (r->suppress_prefixlen != rule->suppress_prefixlen) + continue; + + if (r->mark_mask != rule->mark_mask) + continue; + + if (r->tun_id != rule->tun_id) + continue; + + if (r->fr_net != rule->fr_net) + continue; + + if (r->l3mdev != rule->l3mdev) + continue; + + if (!uid_eq(r->uid_range.start, rule->uid_range.start) || + !uid_eq(r->uid_range.end, rule->uid_range.end)) + continue; + + if (r->ip_proto != rule->ip_proto) + continue; + + if (r->proto != rule->proto) + continue; + + if (!fib_rule_port_range_compare(&r->sport_range, + &rule->sport_range)) + continue; + + if (!fib_rule_port_range_compare(&r->dport_range, + &rule->dport_range)) + continue; + + if (!ops->compare(r, frh, tb)) + continue; + return 1; + } + return 0; +} + int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -687,7 +757,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; if ((nlh->nlmsg_flags & NLM_F_EXCL) && - rule_find(ops, frh, tb, rule, user_priority)) { + rule_exists(ops, frh, tb, rule)) { err = -EEXIST; goto errout_free; } From c1985cefd844e26bd19673a6df8d8f0b1918c2db Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 28 Jun 2018 09:56:55 -0700 Subject: [PATCH 156/382] acpi/nfit: fix cmd_rc for acpi_nfit_ctl to always return a value cmd_rc is passed in by reference to the acpi_nfit_ctl() function and the caller expects a value returned. However, when the package is pass through via the ND_CMD_CALL command, cmd_rc is not touched. Make sure cmd_rc is always set. Fixes: aef253382266 ("libnvdimm, nfit: centralize command status translation") Signed-off-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index d15814e1727f..471402cee1f1 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -408,6 +408,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, const guid_t *guid; int rc, i; + *cmd_rc = -EINVAL; func = cmd; if (cmd == ND_CMD_CALL) { call_pkg = buf; @@ -518,6 +519,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, * If we return an error (like elsewhere) then caller wouldn't * be able to rely upon data returned to make calculation. */ + *cmd_rc = 0; return 0; } From 1273c253c32b9a073a4d8921ed079177ccc7c8af Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Mon, 18 Jun 2018 17:17:02 -0600 Subject: [PATCH 157/382] tools/testing/nvdimm: advertise a write cache for nfit_test Commit 546eb0317cfa "libnvdimm, pmem: Do not flush power-fail protected CPU caches" fixed the write_cache detection to correctly show the lack of a write cache based on the platform capabilities described in the ACPI NFIT. The nfit_test unit tests expected a write cache to be present, so change the nfit test namespaces to only advertise a persistence domain limited to the memory controller. This allows the kernel to show a write_cache attribute, and the test behaviour remains unchanged. Signed-off-by: Vishal Verma Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index a8fb63edcf89..e2926f72a821 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -1991,8 +1991,7 @@ static void nfit_test0_setup(struct nfit_test *t) pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES; pcap->header.length = sizeof(*pcap); pcap->highest_capability = 1; - pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH | - ACPI_NFIT_CAPABILITY_MEM_FLUSH; + pcap->capabilities = ACPI_NFIT_CAPABILITY_MEM_FLUSH; offset += pcap->header.length; if (t->setup_hotplug) { From bb94b55af3461e26b32f0e23d455abeae0cfca5d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 29 Jun 2018 11:31:50 -0600 Subject: [PATCH 158/382] vfio: Use get_user_pages_longterm correctly The patch noted in the fixes below converted get_user_pages_fast() to get_user_pages_longterm(), however the two calls differ in a few ways. First _fast() is documented to not require the mmap_sem, while _longterm() is documented to need it. Hold the mmap sem as required. Second, _fast accepts an 'int write' while _longterm uses 'unsigned int gup_flags', so the expression '!!(prot & IOMMU_WRITE)' is only working by luck as FOLL_WRITE is currently == 0x1. Use the expected FOLL_WRITE constant instead. Fixes: 94db151dc892 ("vfio: disable filesystem-dax page pinning") Cc: Signed-off-by: Jason Gunthorpe Acked-by: Dan Williams Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 2c75b33db4ac..3e5b17710a4f 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -343,18 +343,16 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, struct page *page[1]; struct vm_area_struct *vma; struct vm_area_struct *vmas[1]; + unsigned int flags = 0; int ret; + if (prot & IOMMU_WRITE) + flags |= FOLL_WRITE; + + down_read(&mm->mmap_sem); if (mm == current->mm) { - ret = get_user_pages_longterm(vaddr, 1, !!(prot & IOMMU_WRITE), - page, vmas); + ret = get_user_pages_longterm(vaddr, 1, flags, page, vmas); } else { - unsigned int flags = 0; - - if (prot & IOMMU_WRITE) - flags |= FOLL_WRITE; - - down_read(&mm->mmap_sem); ret = get_user_pages_remote(NULL, mm, vaddr, 1, flags, page, vmas, NULL); /* @@ -368,8 +366,8 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, ret = -EOPNOTSUPP; put_page(page[0]); } - up_read(&mm->mmap_sem); } + up_read(&mm->mmap_sem); if (ret == 1) { *pfn = page_to_pfn(page[0]); From 26112ddc254c98681b224aa9ededefc01b6e02d2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 30 Jun 2018 23:19:33 +0200 Subject: [PATCH 159/382] PCI / ACPI / PM: Resume bridges w/o drivers on suspend-to-RAM It is reported that commit c62ec4610c40 (PM / core: Fix direct_complete handling for devices with no callbacks) introduced a system suspend regression on Samsung 305V4A by allowing a PCI bridge (not a PCIe port) to stay in D3 over suspend-to-RAM, which is a side effect of setting power.direct_complete for the children of that bridge that have no PM callbacks. On the majority of systems PCI bridges are not allowed to be runtime-suspended (the power/control sysfs attribute is set to "on" for them by default), but user space can change that setting and if it does so and a given bridge has no children with PM callbacks, the direct_complete optimization will be applied to it and it will stay in suspend over system suspend. Apparently, that confuses the platform firmware on the affected machine and that may very well happen elsewhere, so avoid the direct_complete optimization for PCI bridges with no drivers (if there is a driver, it should take care of the PM handling) on suspend-to-RAM altogether (that should not matter for suspend-to-idle as platform firmware is not involved in it). Fixes: c62ec4610c40 (PM / core: Fix direct_complete handling for devices with no callbacks) Link: https://bugzilla.kernel.org/show_bug.cgi?id=199941 Reported-by: n0000b.n000b@gmail.com Tested-by: n0000b.n000b@gmail.com Reviewed-by: Mika Westerberg Acked-by: Bjorn Helgaas Cc: 4.15+ # 4.15+ Signed-off-by: Rafael J. Wysocki --- drivers/pci/pci-acpi.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 65113b6eed14..89ee6a2b6eb8 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -629,6 +629,18 @@ static bool acpi_pci_need_resume(struct pci_dev *dev) { struct acpi_device *adev = ACPI_COMPANION(&dev->dev); + /* + * In some cases (eg. Samsung 305V4A) leaving a bridge in suspend over + * system-wide suspend/resume confuses the platform firmware, so avoid + * doing that, unless the bridge has a driver that should take care of + * the PM handling. According to Section 16.1.6 of ACPI 6.2, endpoint + * devices are expected to be in D3 before invoking the S3 entry path + * from the firmware, so they should not be affected by this issue. + */ + if (pci_is_bridge(dev) && !dev->driver && + acpi_target_system_state() != ACPI_STATE_S0) + return true; + if (!adev || !acpi_device_power_manageable(adev)) return false; From a0d5f3b69af7733f3173a8e19d51f68a08017c76 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 30 Jun 2018 23:24:04 +0200 Subject: [PATCH 160/382] ACPICA: Drop leading newlines from error messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 5088814a6e93 (ACPICA: AML parser: attempt to continue loading table after error) unintentionally added leading newlines to error messages emitted by ACPICA which caused unexpected things to be printed to the kernel log. Drop these newlines (which effectively reverts the part of commit 5088814a6e93 adding them). Fixes: 5088814a6e93 (ACPICA: AML parser: attempt to continue loading table after error) Reported-by: Toralf Förster Reported-by: Guenter Roeck Cc: 4.17+ # 4.17+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/uterror.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/acpica/uterror.c b/drivers/acpi/acpica/uterror.c index 5a64ddaed8a3..e47430272692 100644 --- a/drivers/acpi/acpica/uterror.c +++ b/drivers/acpi/acpica/uterror.c @@ -182,19 +182,19 @@ acpi_ut_prefixed_namespace_error(const char *module_name, switch (lookup_status) { case AE_ALREADY_EXISTS: - acpi_os_printf("\n" ACPI_MSG_BIOS_ERROR); + acpi_os_printf(ACPI_MSG_BIOS_ERROR); message = "Failure creating"; break; case AE_NOT_FOUND: - acpi_os_printf("\n" ACPI_MSG_BIOS_ERROR); + acpi_os_printf(ACPI_MSG_BIOS_ERROR); message = "Could not resolve"; break; default: - acpi_os_printf("\n" ACPI_MSG_ERROR); + acpi_os_printf(ACPI_MSG_ERROR); message = "Failure resolving"; break; } From 9901c5d77e969d8215a8e8d087ef02e6feddc84c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sat, 30 Jun 2018 06:17:36 -0700 Subject: [PATCH 161/382] bpf: sockmap, fix crash when ipv6 sock is added This fixes a crash where we assign tcp_prot to IPv6 sockets instead of tcpv6_prot. Previously we overwrote the sk->prot field with tcp_prot even in the AF_INET6 case. This patch ensures the correct tcp_prot and tcpv6_prot are used. Tested with 'netserver -6' and 'netperf -H [IPv6]' as well as 'netperf -H [IPv4]'. The ESTABLISHED check resolves the previously crashing case here. Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support") Reported-by: syzbot+5c063698bdbfac19f363@syzkaller.appspotmail.com Acked-by: Martin KaFai Lau Signed-off-by: John Fastabend Signed-off-by: Wei Wang Signed-off-by: Daniel Borkmann --- kernel/bpf/sockmap.c | 58 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 81d0c55a77aa..bfdfbd199c3b 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -140,6 +140,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); static int bpf_tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); +static void bpf_tcp_close(struct sock *sk, long timeout); static inline struct smap_psock *smap_psock_sk(const struct sock *sk) { @@ -161,7 +162,42 @@ static bool bpf_tcp_stream_read(const struct sock *sk) return !empty; } -static struct proto tcp_bpf_proto; +enum { + SOCKMAP_IPV4, + SOCKMAP_IPV6, + SOCKMAP_NUM_PROTS, +}; + +enum { + SOCKMAP_BASE, + SOCKMAP_TX, + SOCKMAP_NUM_CONFIGS, +}; + +static struct proto *saved_tcpv6_prot __read_mostly; +static DEFINE_SPINLOCK(tcpv6_prot_lock); +static struct proto bpf_tcp_prots[SOCKMAP_NUM_PROTS][SOCKMAP_NUM_CONFIGS]; +static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS], + struct proto *base) +{ + prot[SOCKMAP_BASE] = *base; + prot[SOCKMAP_BASE].close = bpf_tcp_close; + prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg; + prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read; + + prot[SOCKMAP_TX] = prot[SOCKMAP_BASE]; + prot[SOCKMAP_TX].sendmsg = bpf_tcp_sendmsg; + prot[SOCKMAP_TX].sendpage = bpf_tcp_sendpage; +} + +static void update_sk_prot(struct sock *sk, struct smap_psock *psock) +{ + int family = sk->sk_family == AF_INET6 ? SOCKMAP_IPV6 : SOCKMAP_IPV4; + int conf = psock->bpf_tx_msg ? SOCKMAP_TX : SOCKMAP_BASE; + + sk->sk_prot = &bpf_tcp_prots[family][conf]; +} + static int bpf_tcp_init(struct sock *sk) { struct smap_psock *psock; @@ -181,14 +217,17 @@ static int bpf_tcp_init(struct sock *sk) psock->save_close = sk->sk_prot->close; psock->sk_proto = sk->sk_prot; - if (psock->bpf_tx_msg) { - tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg; - tcp_bpf_proto.sendpage = bpf_tcp_sendpage; - tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg; - tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read; + /* Build IPv6 sockmap whenever the address of tcpv6_prot changes */ + if (sk->sk_family == AF_INET6 && + unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) { + spin_lock_bh(&tcpv6_prot_lock); + if (likely(sk->sk_prot != saved_tcpv6_prot)) { + build_protos(bpf_tcp_prots[SOCKMAP_IPV6], sk->sk_prot); + smp_store_release(&saved_tcpv6_prot, sk->sk_prot); + } + spin_unlock_bh(&tcpv6_prot_lock); } - - sk->sk_prot = &tcp_bpf_proto; + update_sk_prot(sk, psock); rcu_read_unlock(); return 0; } @@ -1111,8 +1150,7 @@ static void bpf_tcp_msg_add(struct smap_psock *psock, static int bpf_tcp_ulp_register(void) { - tcp_bpf_proto = tcp_prot; - tcp_bpf_proto.close = bpf_tcp_close; + build_protos(bpf_tcp_prots[SOCKMAP_IPV4], &tcp_prot); /* Once BPF TX ULP is registered it is never unregistered. It * will be in the ULP list for the lifetime of the system. Doing * duplicate registers is not a problem. From 54fedb42c6537dcb0102e4a58a88456a6286999d Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sat, 30 Jun 2018 06:17:41 -0700 Subject: [PATCH 162/382] bpf: sockmap, fix smap_list_map_remove when psock is in many maps If a hashmap is free'd with open socks it removes the reference to the hash entry from the psock. If that is the last reference to the psock then it will also be free'd by the reference counting logic. However the current logic that removes the hash reference from the list of references is broken. In smap_list_remove() we first check if the sockmap entry matches and then check if the hashmap entry matches. But, the sockmap entry sill always match because its NULL in this case which causes the first entry to be removed from the list. If this is always the "right" entry (because the user adds/removes entries in order) then everything is OK but otherwise a subsequent bpf_tcp_close() may reference a free'd object. To fix this create two list handlers one for sockmap and one for sockhash. Reported-by: syzbot+0ce137753c78f7b6acc1@syzkaller.appspotmail.com Fixes: 81110384441a ("bpf: sockmap, add hash map support") Acked-by: Martin KaFai Lau Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- kernel/bpf/sockmap.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index bfdfbd199c3b..65a937ed5762 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -1602,17 +1602,27 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) return ERR_PTR(err); } -static void smap_list_remove(struct smap_psock *psock, - struct sock **entry, - struct htab_elem *hash_link) +static void smap_list_map_remove(struct smap_psock *psock, + struct sock **entry) { struct smap_psock_map_entry *e, *tmp; list_for_each_entry_safe(e, tmp, &psock->maps, list) { - if (e->entry == entry || e->hash_link == hash_link) { + if (e->entry == entry) + list_del(&e->list); + } +} + +static void smap_list_hash_remove(struct smap_psock *psock, + struct htab_elem *hash_link) +{ + struct smap_psock_map_entry *e, *tmp; + + list_for_each_entry_safe(e, tmp, &psock->maps, list) { + struct htab_elem *c = e->hash_link; + + if (c == hash_link) list_del(&e->list); - break; - } } } @@ -1647,7 +1657,7 @@ static void sock_map_free(struct bpf_map *map) * to be null and queued for garbage collection. */ if (likely(psock)) { - smap_list_remove(psock, &stab->sock_map[i], NULL); + smap_list_map_remove(psock, &stab->sock_map[i]); smap_release_sock(psock, sock); } write_unlock_bh(&sock->sk_callback_lock); @@ -1706,7 +1716,7 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key) if (psock->bpf_parse) smap_stop_sock(psock, sock); - smap_list_remove(psock, &stab->sock_map[k], NULL); + smap_list_map_remove(psock, &stab->sock_map[k]); smap_release_sock(psock, sock); out: write_unlock_bh(&sock->sk_callback_lock); @@ -1908,7 +1918,7 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops, struct smap_psock *opsock = smap_psock_sk(osock); write_lock_bh(&osock->sk_callback_lock); - smap_list_remove(opsock, &stab->sock_map[i], NULL); + smap_list_map_remove(opsock, &stab->sock_map[i]); smap_release_sock(opsock, osock); write_unlock_bh(&osock->sk_callback_lock); } @@ -2142,7 +2152,7 @@ static void sock_hash_free(struct bpf_map *map) * (psock) to be null and queued for garbage collection. */ if (likely(psock)) { - smap_list_remove(psock, NULL, l); + smap_list_hash_remove(psock, l); smap_release_sock(psock, sock); } write_unlock_bh(&sock->sk_callback_lock); @@ -2322,7 +2332,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops, psock = smap_psock_sk(l_old->sk); hlist_del_rcu(&l_old->hash_node); - smap_list_remove(psock, NULL, l_old); + smap_list_hash_remove(psock, l_old); smap_release_sock(psock, l_old->sk); free_htab_elem(htab, l_old); } @@ -2390,7 +2400,7 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key) * to be null and queued for garbage collection. */ if (likely(psock)) { - smap_list_remove(psock, NULL, l); + smap_list_hash_remove(psock, l); smap_release_sock(psock, sock); } write_unlock_bh(&sock->sk_callback_lock); From e9db4ef6bf4ca9894bb324c76e01b8f1a16b2650 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sat, 30 Jun 2018 06:17:47 -0700 Subject: [PATCH 163/382] bpf: sockhash fix omitted bucket lock in sock_close First the sk_callback_lock() was being used to protect both the sock callback hooks and the psock->maps list. This got overly convoluted after the addition of sockhash (in sockmap it made some sense because masp and callbacks were tightly coupled) so lets split out a specific lock for maps and only use the callback lock for its intended purpose. This fixes a couple cases where we missed using maps lock when it was in fact needed. Also this makes it easier to follow the code because now we can put the locking closer to the actual code its serializing. Next, in sock_hash_delete_elem() the pattern was as follows, sock_hash_delete_elem() [...] spin_lock(bucket_lock) l = lookup_elem_raw() if (l) hlist_del_rcu() write_lock(sk_callback_lock) .... destroy psock ... write_unlock(sk_callback_lock) spin_unlock(bucket_lock) The ordering is necessary because we only know the {p}sock after dereferencing the hash table which we can't do unless we have the bucket lock held. Once we have the bucket lock and the psock element it is deleted from the hashmap to ensure any other path doing a lookup will fail. Finally, the refcnt is decremented and if zero the psock is destroyed. In parallel with the above (or free'ing the map) a tcp close event may trigger tcp_close(). Which at the moment omits the bucket lock altogether (oops!) where the flow looks like this, bpf_tcp_close() [...] write_lock(sk_callback_lock) for each psock->maps // list of maps this sock is part of hlist_del_rcu(ref_hash_node); .... destroy psock ... write_unlock(sk_callback_lock) Obviously, and demonstrated by syzbot, this is broken because we can have multiple threads deleting entries via hlist_del_rcu(). To fix this we might be tempted to wrap the hlist operation in a bucket lock but that would create a lock inversion problem. In summary to follow locking rules the psocks maps list needs the sk_callback_lock (after this patch maps_lock) but we need the bucket lock to do the hlist_del_rcu. To resolve the lock inversion problem pop the head of the maps list repeatedly and remove the reference until no more are left. If a delete happens in parallel from the BPF API that is OK as well because it will do a similar action, lookup the lock in the map/hash, delete it from the map/hash, and dec the refcnt. We check for this case before doing a destroy on the psock to ensure we don't have two threads tearing down a psock. The new logic is as follows, bpf_tcp_close() e = psock_map_pop(psock->maps) // done with map lock bucket_lock() // lock hash list bucket l = lookup_elem_raw(head, hash, key, key_size); if (l) { //only get here if elmnt was not already removed hlist_del_rcu() ... destroy psock... } bucket_unlock() And finally for all the above to work add missing locking around map operations per above. Then add RCU annotations and use rcu_dereference/rcu_assign_pointer to manage values relying on RCU so that the object is not free'd from sock_hash_free() while it is being referenced in bpf_tcp_close(). Reported-by: syzbot+0ce137753c78f7b6acc1@syzkaller.appspotmail.com Fixes: 81110384441a ("bpf: sockmap, add hash map support") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- kernel/bpf/sockmap.c | 145 ++++++++++++++++++++++++++++--------------- 1 file changed, 96 insertions(+), 49 deletions(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 65a937ed5762..ac09b35a9567 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -72,6 +72,7 @@ struct bpf_htab { u32 n_buckets; u32 elem_size; struct bpf_sock_progs progs; + struct rcu_head rcu; }; struct htab_elem { @@ -89,8 +90,8 @@ enum smap_psock_state { struct smap_psock_map_entry { struct list_head list; struct sock **entry; - struct htab_elem *hash_link; - struct bpf_htab *htab; + struct htab_elem __rcu *hash_link; + struct bpf_htab __rcu *htab; }; struct smap_psock { @@ -120,6 +121,7 @@ struct smap_psock { struct bpf_prog *bpf_parse; struct bpf_prog *bpf_verdict; struct list_head maps; + spinlock_t maps_lock; /* Back reference used when sock callback trigger sockmap operations */ struct sock *sock; @@ -258,16 +260,54 @@ static void bpf_tcp_release(struct sock *sk) rcu_read_unlock(); } +static struct htab_elem *lookup_elem_raw(struct hlist_head *head, + u32 hash, void *key, u32 key_size) +{ + struct htab_elem *l; + + hlist_for_each_entry_rcu(l, head, hash_node) { + if (l->hash == hash && !memcmp(&l->key, key, key_size)) + return l; + } + + return NULL; +} + +static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) +{ + return &htab->buckets[hash & (htab->n_buckets - 1)]; +} + +static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) +{ + return &__select_bucket(htab, hash)->head; +} + static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) { atomic_dec(&htab->count); kfree_rcu(l, rcu); } +static struct smap_psock_map_entry *psock_map_pop(struct sock *sk, + struct smap_psock *psock) +{ + struct smap_psock_map_entry *e; + + spin_lock_bh(&psock->maps_lock); + e = list_first_entry_or_null(&psock->maps, + struct smap_psock_map_entry, + list); + if (e) + list_del(&e->list); + spin_unlock_bh(&psock->maps_lock); + return e; +} + static void bpf_tcp_close(struct sock *sk, long timeout) { void (*close_fun)(struct sock *sk, long timeout); - struct smap_psock_map_entry *e, *tmp; + struct smap_psock_map_entry *e; struct sk_msg_buff *md, *mtmp; struct smap_psock *psock; struct sock *osk; @@ -286,7 +326,6 @@ static void bpf_tcp_close(struct sock *sk, long timeout) */ close_fun = psock->save_close; - write_lock_bh(&sk->sk_callback_lock); if (psock->cork) { free_start_sg(psock->sock, psock->cork); kfree(psock->cork); @@ -299,20 +338,38 @@ static void bpf_tcp_close(struct sock *sk, long timeout) kfree(md); } - list_for_each_entry_safe(e, tmp, &psock->maps, list) { + e = psock_map_pop(sk, psock); + while (e) { if (e->entry) { osk = cmpxchg(e->entry, sk, NULL); if (osk == sk) { - list_del(&e->list); smap_release_sock(psock, sk); } } else { - hlist_del_rcu(&e->hash_link->hash_node); - smap_release_sock(psock, e->hash_link->sk); - free_htab_elem(e->htab, e->hash_link); + struct htab_elem *link = rcu_dereference(e->hash_link); + struct bpf_htab *htab = rcu_dereference(e->htab); + struct hlist_head *head; + struct htab_elem *l; + struct bucket *b; + + b = __select_bucket(htab, link->hash); + head = &b->head; + raw_spin_lock_bh(&b->lock); + l = lookup_elem_raw(head, + link->hash, link->key, + htab->map.key_size); + /* If another thread deleted this object skip deletion. + * The refcnt on psock may or may not be zero. + */ + if (l) { + hlist_del_rcu(&link->hash_node); + smap_release_sock(psock, link->sk); + free_htab_elem(htab, link); + } + raw_spin_unlock_bh(&b->lock); } + e = psock_map_pop(sk, psock); } - write_unlock_bh(&sk->sk_callback_lock); rcu_read_unlock(); close_fun(sk, timeout); } @@ -1395,7 +1452,9 @@ static void smap_release_sock(struct smap_psock *psock, struct sock *sock) { if (refcount_dec_and_test(&psock->refcnt)) { tcp_cleanup_ulp(sock); + write_lock_bh(&sock->sk_callback_lock); smap_stop_sock(psock, sock); + write_unlock_bh(&sock->sk_callback_lock); clear_bit(SMAP_TX_RUNNING, &psock->state); rcu_assign_sk_user_data(sock, NULL); call_rcu_sched(&psock->rcu, smap_destroy_psock); @@ -1546,6 +1605,7 @@ static struct smap_psock *smap_init_psock(struct sock *sock, int node) INIT_LIST_HEAD(&psock->maps); INIT_LIST_HEAD(&psock->ingress); refcount_set(&psock->refcnt, 1); + spin_lock_init(&psock->maps_lock); rcu_assign_sk_user_data(sock, psock); sock_hold(sock); @@ -1607,10 +1667,12 @@ static void smap_list_map_remove(struct smap_psock *psock, { struct smap_psock_map_entry *e, *tmp; + spin_lock_bh(&psock->maps_lock); list_for_each_entry_safe(e, tmp, &psock->maps, list) { if (e->entry == entry) list_del(&e->list); } + spin_unlock_bh(&psock->maps_lock); } static void smap_list_hash_remove(struct smap_psock *psock, @@ -1618,12 +1680,14 @@ static void smap_list_hash_remove(struct smap_psock *psock, { struct smap_psock_map_entry *e, *tmp; + spin_lock_bh(&psock->maps_lock); list_for_each_entry_safe(e, tmp, &psock->maps, list) { - struct htab_elem *c = e->hash_link; + struct htab_elem *c = rcu_dereference(e->hash_link); if (c == hash_link) list_del(&e->list); } + spin_unlock_bh(&psock->maps_lock); } static void sock_map_free(struct bpf_map *map) @@ -1649,7 +1713,6 @@ static void sock_map_free(struct bpf_map *map) if (!sock) continue; - write_lock_bh(&sock->sk_callback_lock); psock = smap_psock_sk(sock); /* This check handles a racing sock event that can get the * sk_callback_lock before this case but after xchg happens @@ -1660,7 +1723,6 @@ static void sock_map_free(struct bpf_map *map) smap_list_map_remove(psock, &stab->sock_map[i]); smap_release_sock(psock, sock); } - write_unlock_bh(&sock->sk_callback_lock); } rcu_read_unlock(); @@ -1709,7 +1771,6 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key) if (!sock) return -EINVAL; - write_lock_bh(&sock->sk_callback_lock); psock = smap_psock_sk(sock); if (!psock) goto out; @@ -1719,7 +1780,6 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key) smap_list_map_remove(psock, &stab->sock_map[k]); smap_release_sock(psock, sock); out: - write_unlock_bh(&sock->sk_callback_lock); return 0; } @@ -1800,7 +1860,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map, } } - write_lock_bh(&sock->sk_callback_lock); psock = smap_psock_sk(sock); /* 2. Do not allow inheriting programs if psock exists and has @@ -1857,7 +1916,9 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map, if (err) goto out_free; smap_init_progs(psock, verdict, parse); + write_lock_bh(&sock->sk_callback_lock); smap_start_sock(psock, sock); + write_unlock_bh(&sock->sk_callback_lock); } /* 4. Place psock in sockmap for use and stop any programs on @@ -1867,9 +1928,10 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map, */ if (map_link) { e->entry = map_link; + spin_lock_bh(&psock->maps_lock); list_add_tail(&e->list, &psock->maps); + spin_unlock_bh(&psock->maps_lock); } - write_unlock_bh(&sock->sk_callback_lock); return err; out_free: smap_release_sock(psock, sock); @@ -1880,7 +1942,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map, } if (tx_msg) bpf_prog_put(tx_msg); - write_unlock_bh(&sock->sk_callback_lock); kfree(e); return err; } @@ -1917,10 +1978,8 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops, if (osock) { struct smap_psock *opsock = smap_psock_sk(osock); - write_lock_bh(&osock->sk_callback_lock); smap_list_map_remove(opsock, &stab->sock_map[i]); smap_release_sock(opsock, osock); - write_unlock_bh(&osock->sk_callback_lock); } out: return err; @@ -2109,14 +2168,13 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) return ERR_PTR(err); } -static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) +static void __bpf_htab_free(struct rcu_head *rcu) { - return &htab->buckets[hash & (htab->n_buckets - 1)]; -} + struct bpf_htab *htab; -static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) -{ - return &__select_bucket(htab, hash)->head; + htab = container_of(rcu, struct bpf_htab, rcu); + bpf_map_area_free(htab->buckets); + kfree(htab); } static void sock_hash_free(struct bpf_map *map) @@ -2135,16 +2193,18 @@ static void sock_hash_free(struct bpf_map *map) */ rcu_read_lock(); for (i = 0; i < htab->n_buckets; i++) { - struct hlist_head *head = select_bucket(htab, i); + struct bucket *b = __select_bucket(htab, i); + struct hlist_head *head; struct hlist_node *n; struct htab_elem *l; + raw_spin_lock_bh(&b->lock); + head = &b->head; hlist_for_each_entry_safe(l, n, head, hash_node) { struct sock *sock = l->sk; struct smap_psock *psock; hlist_del_rcu(&l->hash_node); - write_lock_bh(&sock->sk_callback_lock); psock = smap_psock_sk(sock); /* This check handles a racing sock event that can get * the sk_callback_lock before this case but after xchg @@ -2155,13 +2215,12 @@ static void sock_hash_free(struct bpf_map *map) smap_list_hash_remove(psock, l); smap_release_sock(psock, sock); } - write_unlock_bh(&sock->sk_callback_lock); - kfree(l); + free_htab_elem(htab, l); } + raw_spin_unlock_bh(&b->lock); } rcu_read_unlock(); - bpf_map_area_free(htab->buckets); - kfree(htab); + call_rcu(&htab->rcu, __bpf_htab_free); } static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab, @@ -2188,19 +2247,6 @@ static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab, return l_new; } -static struct htab_elem *lookup_elem_raw(struct hlist_head *head, - u32 hash, void *key, u32 key_size) -{ - struct htab_elem *l; - - hlist_for_each_entry_rcu(l, head, hash_node) { - if (l->hash == hash && !memcmp(&l->key, key, key_size)) - return l; - } - - return NULL; -} - static inline u32 htab_map_hash(const void *key, u32 key_len) { return jhash(key, key_len, 0); @@ -2320,9 +2366,12 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops, goto bucket_err; } - e->hash_link = l_new; - e->htab = container_of(map, struct bpf_htab, map); + rcu_assign_pointer(e->hash_link, l_new); + rcu_assign_pointer(e->htab, + container_of(map, struct bpf_htab, map)); + spin_lock_bh(&psock->maps_lock); list_add_tail(&e->list, &psock->maps); + spin_unlock_bh(&psock->maps_lock); /* add new element to the head of the list, so that * concurrent search will find it before old elem @@ -2392,7 +2441,6 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key) struct smap_psock *psock; hlist_del_rcu(&l->hash_node); - write_lock_bh(&sock->sk_callback_lock); psock = smap_psock_sk(sock); /* This check handles a racing sock event that can get the * sk_callback_lock before this case but after xchg happens @@ -2403,7 +2451,6 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key) smap_list_hash_remove(psock, l); smap_release_sock(psock, sock); } - write_unlock_bh(&sock->sk_callback_lock); free_htab_elem(htab, l); ret = 0; } From caac76a5170eb508529bbff9d9300e9c57126444 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sat, 30 Jun 2018 06:17:52 -0700 Subject: [PATCH 164/382] bpf: sockhash, add release routine Add map_release_uref pointer to hashmap ops. This was dropped when original sockhash code was ported into bpf-next before initial commit. Fixes: 81110384441a ("bpf: sockmap, add hash map support") Acked-by: Martin KaFai Lau Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- kernel/bpf/sockmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index ac09b35a9567..cf7b6a6dbd1f 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -2496,6 +2496,7 @@ const struct bpf_map_ops sock_hash_ops = { .map_get_next_key = sock_hash_get_next_key, .map_update_elem = sock_hash_update_elem, .map_delete_elem = sock_hash_delete_elem, + .map_release_uref = sock_map_release, }; BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock, From 06d793b114e9d922c03aa077ac6c5c51fdda2722 Mon Sep 17 00:00:00 2001 From: Nick Dyer Date: Thu, 21 Jun 2018 19:10:00 +0100 Subject: [PATCH 165/382] ARM: dts: imx51-zii-rdu1: fix touchscreen pinctrl The pinctrl settings were incorrect for the touchscreen interrupt line, causing an interrupt storm. This change has been tested with both the atmel_mxt_ts and RMI4 drivers on the RDU1 units. The value 0x4 comes from the value of register IOMUXC_SW_PAD_CTL_PAD_CSI1_D8 from the old vendor kernel. Signed-off-by: Nick Dyer Fixes: ceef0396f367 ("ARM: dts: imx: add ZII RDU1 board") Cc: # 4.15+ Reviewed-by: Fabio Estevam Tested-by: Chris Healy Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx51-zii-rdu1.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx51-zii-rdu1.dts b/arch/arm/boot/dts/imx51-zii-rdu1.dts index df9eca94d812..8a878687197b 100644 --- a/arch/arm/boot/dts/imx51-zii-rdu1.dts +++ b/arch/arm/boot/dts/imx51-zii-rdu1.dts @@ -770,7 +770,7 @@ MX51_PAD_NANDF_D15__GPIO3_25 0x21e6 pinctrl_ts: tsgrp { fsl,pins = < - MX51_PAD_CSI1_D8__GPIO3_12 0x85 + MX51_PAD_CSI1_D8__GPIO3_12 0x04 MX51_PAD_CSI1_D9__GPIO3_13 0x85 >; }; From ae15a41a641449f536578b0d9ec0e4ade130deb5 Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Sun, 1 Jul 2018 14:17:36 +0900 Subject: [PATCH 166/382] openrisc: entry: Fix delay slot exception detection Originally in patch e6d20c55a4 ("openrisc: entry: Fix delay slot detection") I fixed delay slot detection, but only for QEMU. We missed that hardware delay slot detection using delay slot exception flag (DSX) was still broken. This was because QEMU set the DSX flag in both pre-exception supervision register (ESR) and supervision register (SR) register, but on real hardware the DSX flag is only set on the SR register during exceptions. Fix this by carrying the DSX flag into the SR register during exception. We also update the DSX flag read locations to read the value from the SR register not the pt_regs SR register which represents ESR. The ESR should never have the DSX flag set. In the process I updated/removed a few comments to match the current state. Including removing a comment saying that the DSX detection logic was inefficient and needed to be rewritten. I have tested this on QEMU with a patch ensuring it matches the hardware specification. Link: https://lists.gnu.org/archive/html/qemu-devel/2018-07/msg00000.html Fixes: e6d20c55a4 ("openrisc: entry: Fix delay slot detection") Signed-off-by: Stafford Horne --- arch/openrisc/kernel/entry.S | 8 +------- arch/openrisc/kernel/head.S | 9 ++++++--- arch/openrisc/kernel/traps.c | 2 +- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index 690d55272ba6..0c826ad6e994 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -277,12 +277,6 @@ EXCEPTION_ENTRY(_data_page_fault_handler) l.addi r3,r1,0 // pt_regs /* r4 set be EXCEPTION_HANDLE */ // effective address of fault - /* - * __PHX__: TODO - * - * all this can be written much simpler. look at - * DTLB miss handler in the CONFIG_GUARD_PROTECTED_CORE part - */ #ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX l.lwz r6,PT_PC(r3) // address of an offending insn l.lwz r6,0(r6) // instruction that caused pf @@ -314,7 +308,7 @@ EXCEPTION_ENTRY(_data_page_fault_handler) #else - l.lwz r6,PT_SR(r3) // SR + l.mfspr r6,r0,SPR_SR // SR l.andi r6,r6,SPR_SR_DSX // check for delay slot exception l.sfne r6,r0 // exception happened in delay slot l.bnf 7f diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index fb02b2a1d6f2..9fc6b60140f0 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -210,8 +210,7 @@ * r4 - EEAR exception EA * r10 - current pointing to current_thread_info struct * r12 - syscall 0, since we didn't come from syscall - * r13 - temp it actually contains new SR, not needed anymore - * r31 - handler address of the handler we'll jump to + * r30 - handler address of the handler we'll jump to * * handler has to save remaining registers to the exception * ksp frame *before* tainting them! @@ -244,6 +243,7 @@ /* r1 is KSP, r30 is __pa(KSP) */ ;\ tophys (r30,r1) ;\ l.sw PT_GPR12(r30),r12 ;\ + /* r4 use for tmp before EA */ ;\ l.mfspr r12,r0,SPR_EPCR_BASE ;\ l.sw PT_PC(r30),r12 ;\ l.mfspr r12,r0,SPR_ESR_BASE ;\ @@ -263,7 +263,10 @@ /* r12 == 1 if we come from syscall */ ;\ CLEAR_GPR(r12) ;\ /* ----- turn on MMU ----- */ ;\ - l.ori r30,r0,(EXCEPTION_SR) ;\ + /* Carry DSX into exception SR */ ;\ + l.mfspr r30,r0,SPR_SR ;\ + l.andi r30,r30,SPR_SR_DSX ;\ + l.ori r30,r30,(EXCEPTION_SR) ;\ l.mtspr r0,r30,SPR_ESR_BASE ;\ /* r30: EA address of handler */ ;\ LOAD_SYMBOL_2_GPR(r30,handler) ;\ diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index fac246e6f37a..d8981cbb852a 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -300,7 +300,7 @@ static inline int in_delay_slot(struct pt_regs *regs) return 0; } #else - return regs->sr & SPR_SR_DSX; + return mfspr(SPR_SR) & SPR_SR_DSX; #endif } From 1236f22fbae15df3736ab4a984c64c0c6ee6254c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 29 Jun 2018 13:07:53 +0300 Subject: [PATCH 167/382] tcp: prevent bogus FRTO undos with non-SACK flows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If SACK is not enabled and the first cumulative ACK after the RTO retransmission covers more than the retransmitted skb, a spurious FRTO undo will trigger (assuming FRTO is enabled for that RTO). The reason is that any non-retransmitted segment acknowledged will set FLAG_ORIG_SACK_ACKED in tcp_clean_rtx_queue even if there is no indication that it would have been delivered for real (the scoreboard is not kept with TCPCB_SACKED_ACKED bits in the non-SACK case so the check for that bit won't help like it does with SACK). Having FLAG_ORIG_SACK_ACKED set results in the spurious FRTO undo in tcp_process_loss. We need to use more strict condition for non-SACK case and check that none of the cumulatively ACKed segments were retransmitted to prove that progress is due to original transmissions. Only then keep FLAG_ORIG_SACK_ACKED set, allowing FRTO undo to proceed in non-SACK case. (FLAG_ORIG_SACK_ACKED is planned to be renamed to FLAG_ORIG_PROGRESS to better indicate its purpose but to keep this change minimal, it will be done in another patch). Besides burstiness and congestion control violations, this problem can result in RTO loop: When the loss recovery is prematurely undoed, only new data will be transmitted (if available) and the next retransmission can occur only after a new RTO which in case of multiple losses (that are not for consecutive packets) requires one RTO per loss to recover. Signed-off-by: Ilpo Järvinen Tested-by: Neal Cardwell Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 045d930d01a9..8e5522c6833a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3181,6 +3181,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, if (tcp_is_reno(tp)) { tcp_remove_reno_sacks(sk, pkts_acked); + + /* If any of the cumulatively ACKed segments was + * retransmitted, non-SACK case cannot confirm that + * progress was due to original transmission due to + * lack of TCPCB_SACKED_ACKED bits even if some of + * the packets may have been never retransmitted. + */ + if (flag & FLAG_RETRANS_DATA_ACKED) + flag &= ~FLAG_ORIG_SACK_ACKED; } else { int delta; From a068b94d74ddb7776ca707b6d39d1ac0d2d057e6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 18 Jun 2018 15:33:23 -0700 Subject: [PATCH 168/382] crypto: arm/speck - fix building in Thumb2 mode Building the kernel with CONFIG_THUMB2_KERNEL=y and CONFIG_CRYPTO_SPECK_NEON set fails with the following errors: arch/arm/crypto/speck-neon-core.S: Assembler messages: arch/arm/crypto/speck-neon-core.S:419: Error: r13 not allowed here -- `bic sp,#0xf' arch/arm/crypto/speck-neon-core.S:423: Error: r13 not allowed here -- `bic sp,#0xf' arch/arm/crypto/speck-neon-core.S:427: Error: r13 not allowed here -- `bic sp,#0xf' arch/arm/crypto/speck-neon-core.S:431: Error: r13 not allowed here -- `bic sp,#0xf' The problem is that the 'bic' instruction can't operate on the 'sp' register in Thumb2 mode. Fix it by using a temporary register. This isn't in the main loop, so the performance difference is negligible. This also matches what aes-neonbs-core.S does. Reported-by: Stefan Agner Fixes: ede9622162fa ("crypto: arm/speck - add NEON-accelerated implementation of Speck-XTS") Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel Reviewed-by: Stefan Agner Signed-off-by: Herbert Xu --- arch/arm/crypto/speck-neon-core.S | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/crypto/speck-neon-core.S b/arch/arm/crypto/speck-neon-core.S index 3c1e203e53b9..57caa742016e 100644 --- a/arch/arm/crypto/speck-neon-core.S +++ b/arch/arm/crypto/speck-neon-core.S @@ -272,9 +272,11 @@ * Allocate stack space to store 128 bytes worth of tweaks. For * performance, this space is aligned to a 16-byte boundary so that we * can use the load/store instructions that declare 16-byte alignment. + * For Thumb2 compatibility, don't do the 'bic' directly on 'sp'. */ - sub sp, #128 - bic sp, #0xf + sub r12, sp, #128 + bic r12, #0xf + mov sp, r12 .if \n == 64 // Load first tweak From 221e00d1fce976d8a04ff591a0150caf84e176f8 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 23 Jun 2018 12:36:22 +0200 Subject: [PATCH 169/382] crypto: x86 - Add missing RETs Add explicit RETs to the tail calls of AEGIS and MORUS crypto algorithms otherwise they run into INT3 padding due to ("x86/asm: Pad assembly functions with INT3 instructions") leading to spurious debug exceptions. Mike Galbraith took care of all the remaining callsites. Signed-off-by: Borislav Petkov Acked-by: Ondrej Mosnacek Signed-off-by: Herbert Xu --- arch/x86/crypto/aegis128-aesni-asm.S | 1 + arch/x86/crypto/aegis128l-aesni-asm.S | 1 + arch/x86/crypto/aegis256-aesni-asm.S | 1 + arch/x86/crypto/morus1280-avx2-asm.S | 1 + arch/x86/crypto/morus1280-sse2-asm.S | 1 + arch/x86/crypto/morus640-sse2-asm.S | 1 + 6 files changed, 6 insertions(+) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index 9254e0b6cc06..717bf0776421 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -535,6 +535,7 @@ ENTRY(crypto_aegis128_aesni_enc_tail) movdqu STATE3, 0x40(STATEP) FRAME_END + ret ENDPROC(crypto_aegis128_aesni_enc_tail) .macro decrypt_block a s0 s1 s2 s3 s4 i diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S b/arch/x86/crypto/aegis128l-aesni-asm.S index 9263c344f2c7..4eda2b8db9e1 100644 --- a/arch/x86/crypto/aegis128l-aesni-asm.S +++ b/arch/x86/crypto/aegis128l-aesni-asm.S @@ -645,6 +645,7 @@ ENTRY(crypto_aegis128l_aesni_enc_tail) state_store0 FRAME_END + ret ENDPROC(crypto_aegis128l_aesni_enc_tail) /* diff --git a/arch/x86/crypto/aegis256-aesni-asm.S b/arch/x86/crypto/aegis256-aesni-asm.S index 1d977d515bf9..32aae8397268 100644 --- a/arch/x86/crypto/aegis256-aesni-asm.S +++ b/arch/x86/crypto/aegis256-aesni-asm.S @@ -543,6 +543,7 @@ ENTRY(crypto_aegis256_aesni_enc_tail) state_store0 FRAME_END + ret ENDPROC(crypto_aegis256_aesni_enc_tail) /* diff --git a/arch/x86/crypto/morus1280-avx2-asm.S b/arch/x86/crypto/morus1280-avx2-asm.S index 37d422e77931..07653d4582a6 100644 --- a/arch/x86/crypto/morus1280-avx2-asm.S +++ b/arch/x86/crypto/morus1280-avx2-asm.S @@ -453,6 +453,7 @@ ENTRY(crypto_morus1280_avx2_enc_tail) vmovdqu STATE4, (4 * 32)(%rdi) FRAME_END + ret ENDPROC(crypto_morus1280_avx2_enc_tail) /* diff --git a/arch/x86/crypto/morus1280-sse2-asm.S b/arch/x86/crypto/morus1280-sse2-asm.S index 1fe637c7be9d..bd1aa1b60869 100644 --- a/arch/x86/crypto/morus1280-sse2-asm.S +++ b/arch/x86/crypto/morus1280-sse2-asm.S @@ -652,6 +652,7 @@ ENTRY(crypto_morus1280_sse2_enc_tail) movdqu STATE4_HI, (9 * 16)(%rdi) FRAME_END + ret ENDPROC(crypto_morus1280_sse2_enc_tail) /* diff --git a/arch/x86/crypto/morus640-sse2-asm.S b/arch/x86/crypto/morus640-sse2-asm.S index 71c72a0a0862..efa02816d921 100644 --- a/arch/x86/crypto/morus640-sse2-asm.S +++ b/arch/x86/crypto/morus640-sse2-asm.S @@ -437,6 +437,7 @@ ENTRY(crypto_morus640_sse2_enc_tail) movdqu STATE4, (4 * 16)(%rdi) FRAME_END + ret ENDPROC(crypto_morus640_sse2_enc_tail) /* From ecd60532e060e45c63c57ecf1c8549b1d656d34d Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 18 Jun 2018 15:34:14 +1000 Subject: [PATCH 170/382] m68k: fix "bad page state" oops on ColdFire boot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Booting a ColdFire m68k core with MMU enabled causes a "bad page state" oops since commit 1d40a5ea01d5 ("mm: mark pages in use for page tables"): BUG: Bad page state in process sh pfn:01ce2 page:004fefc8 count:0 mapcount:-1024 mapping:00000000 index:0x0 flags: 0x0() raw: 00000000 00000000 00000000 fffffbff 00000000 00000100 00000200 00000000 raw: 039c4000 page dumped because: nonzero mapcount Modules linked in: CPU: 0 PID: 22 Comm: sh Not tainted 4.17.0-07461-g1d40a5ea01d5 #13 Fix by calling pgtable_page_dtor() in our __pte_free_tlb() code path, so that the PG_table flag is cleared before we free the pte page. Note that I had to change the type of pte_free() to be static from extern. Otherwise you get a lot of warnings like this: ./arch/m68k/include/asm/mcf_pgalloc.h:80:2: warning: ‘pgtable_page_dtor’ is static but used in inline function ‘pte_free’ which is not static pgtable_page_dtor(page); ^ And making it static is consistent with our use of this in the other m68k pgalloc definitions of pte_free(). Signed-off-by: Greg Ungerer CC: Matthew Wilcox Reviewed-by: Geert Uytterhoeven --- arch/m68k/include/asm/mcf_pgalloc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h index 8b707c249026..12fe700632f4 100644 --- a/arch/m68k/include/asm/mcf_pgalloc.h +++ b/arch/m68k/include/asm/mcf_pgalloc.h @@ -44,6 +44,7 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address) static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page, unsigned long address) { + pgtable_page_dtor(page); __free_page(page); } @@ -74,8 +75,9 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, return page; } -extern inline void pte_free(struct mm_struct *mm, struct page *page) +static inline void pte_free(struct mm_struct *mm, struct page *page) { + pgtable_page_dtor(page); __free_page(page); } From fa7743b141678116c19b89c5492b330671a51e02 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 18 Jun 2018 15:17:37 +0200 Subject: [PATCH 171/382] drm/exynos: Replace drm_framebuffer_{un/reference} with put,get functions This patch unifies the naming of DRM functions for reference counting of struct drm_framebuffer. The resulting code is more aligned with the rest of the Linux kernel interfaces. Signed-off-by: Thomas Zimmermann Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_plane.c b/drivers/gpu/drm/exynos/exynos_drm_plane.c index 38a2a7f1204b..7098c6d35266 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_plane.c +++ b/drivers/gpu/drm/exynos/exynos_drm_plane.c @@ -132,7 +132,7 @@ static void exynos_drm_plane_reset(struct drm_plane *plane) if (plane->state) { exynos_state = to_exynos_plane_state(plane->state); if (exynos_state->base.fb) - drm_framebuffer_unreference(exynos_state->base.fb); + drm_framebuffer_put(exynos_state->base.fb); kfree(exynos_state); plane->state = NULL; } From af7d9101a08d752bb6da9a80df6edbc618fab4cc Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 18 Jun 2018 15:17:38 +0200 Subject: [PATCH 172/382] drm/exynos: Replace drm_gem_object_unreference_unlocked with put function This patch unifies the naming of DRM functions for reference counting of struct drm_gem_object. The resulting code is more aligned with the rest of the Linux kernel interfaces. Signed-off-by: Thomas Zimmermann Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fb.c | 2 +- drivers/gpu/drm/exynos/exynos_drm_gem.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index 7fcc1a7ab1a0..27b7d34d776c 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -138,7 +138,7 @@ exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv, err: while (i--) - drm_gem_object_unreference_unlocked(&exynos_gem[i]->base); + drm_gem_object_put_unlocked(&exynos_gem[i]->base); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 6e1494fa71b4..bdf5a7655228 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -143,7 +143,7 @@ static int exynos_drm_gem_handle_create(struct drm_gem_object *obj, DRM_DEBUG_KMS("gem handle = 0x%x\n", *handle); /* drop reference from allocate - handle holds it now. */ - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return 0; } @@ -186,7 +186,7 @@ unsigned long exynos_drm_gem_get_size(struct drm_device *dev, exynos_gem = to_exynos_gem(obj); - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return exynos_gem->size; } @@ -329,13 +329,13 @@ void exynos_drm_gem_put_dma_addr(struct drm_device *dev, return; } - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); /* * decrease obj->refcount one more time because we has already * increased it at exynos_drm_gem_get_dma_addr(). */ - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); } static int exynos_drm_gem_mmap_buffer(struct exynos_drm_gem *exynos_gem, @@ -383,7 +383,7 @@ int exynos_drm_gem_get_ioctl(struct drm_device *dev, void *data, args->flags = exynos_gem->flags; args->size = exynos_gem->size; - drm_gem_object_unreference_unlocked(obj); + drm_gem_object_put_unlocked(obj); return 0; } From aab109b340eaf3968337e1d19d71ff0551c57365 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 18 Jun 2018 15:17:39 +0200 Subject: [PATCH 173/382] drm/exynos: Replace drm_dev_unref with drm_dev_put This patch unifies the naming of DRM functions for reference counting of struct drm_device. The resulting code is more aligned with the rest of the Linux kernel interfaces. Signed-off-by: Thomas Zimmermann Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index a81b4a5e24a7..ed3cc2989f93 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -420,7 +420,7 @@ static int exynos_drm_bind(struct device *dev) err_free_private: kfree(private); err_free_drm: - drm_dev_unref(drm); + drm_dev_put(drm); return ret; } @@ -444,7 +444,7 @@ static void exynos_drm_unbind(struct device *dev) drm->dev_private = NULL; dev_set_drvdata(dev, NULL); - drm_dev_unref(drm); + drm_dev_put(drm); } static const struct component_master_ops exynos_drm_ops = { From 510fe10b6180137773dce9032b51bb82ff946c2d Mon Sep 17 00:00:00 2001 From: Zhao Yan Date: Tue, 19 Jun 2018 15:44:11 +0800 Subject: [PATCH 174/382] drm/i915/gvt: fix a bug of partially write ggtt enties when guest writes ggtt entries, it could write 8 bytes a time if gtt_entry_size is 8. But, qemu could split the 8 bytes into 2 consecutive 4-byte writes. If each 4-byte partial write could trigger a host ggtt write, it is very possible that a wrong combination is written to the host ggtt. E.g. the higher 4 bytes is the old value, but the lower 4 bytes is the new value, and this 8-byte combination is wrong but written to the ggtt, thus causing bugs. To handle this condition, we just record the first 4-byte write, then wait until the second 4-byte write comes and write the combined 64-bit data to host ggtt table. To save memory space and to spot partial write as early as possible, we don't keep this information for every ggtt index. Instread, we just record the last ggtt write position, and assume the two 4-byte writes come in consecutively for each vgpu. This assumption is right based on the characteristic of ggtt entry which stores memory address. When gtt_entry_size is 8, the guest memory physical address should be 64 bits, so any sane guest driver should write 8-byte long data at a time, so 2 consecutive 4-byte writes at the same ggtt index should be trapped in gvt. v2: when incomplete ggtt entry write is located, e.g. 1. guest only writes 4 bytes at a ggtt offset and no long writes the rest 4 bytes. 2. guest writes 4 bytes of a ggtt offset, then write at other ggtt offsets, then return back to write the left 4 bytes of the first ggtt offset. add error handling logic to remap host entry to scratch page, and mark guest virtual ggtt entry as not present. (zhenyu wang) Signed-off-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 58 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/gtt.h | 2 ++ 2 files changed, 60 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 23296547da95..4efec8fa6c1d 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1592,6 +1592,7 @@ static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu) vgpu_free_mm(mm); return ERR_PTR(-ENOMEM); } + mm->ggtt_mm.last_partial_off = -1UL; return mm; } @@ -1616,6 +1617,7 @@ void _intel_vgpu_mm_release(struct kref *mm_ref) invalidate_ppgtt_mm(mm); } else { vfree(mm->ggtt_mm.virtual_ggtt); + mm->ggtt_mm.last_partial_off = -1UL; } vgpu_free_mm(mm); @@ -1868,6 +1870,62 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data, bytes); + /* If ggtt entry size is 8 bytes, and it's split into two 4 bytes + * write, we assume the two 4 bytes writes are consecutive. + * Otherwise, we abort and report error + */ + if (bytes < info->gtt_entry_size) { + if (ggtt_mm->ggtt_mm.last_partial_off == -1UL) { + /* the first partial part*/ + ggtt_mm->ggtt_mm.last_partial_off = off; + ggtt_mm->ggtt_mm.last_partial_data = e.val64; + return 0; + } else if ((g_gtt_index == + (ggtt_mm->ggtt_mm.last_partial_off >> + info->gtt_entry_size_shift)) && + (off != ggtt_mm->ggtt_mm.last_partial_off)) { + /* the second partial part */ + + int last_off = ggtt_mm->ggtt_mm.last_partial_off & + (info->gtt_entry_size - 1); + + memcpy((void *)&e.val64 + last_off, + (void *)&ggtt_mm->ggtt_mm.last_partial_data + + last_off, bytes); + + ggtt_mm->ggtt_mm.last_partial_off = -1UL; + } else { + int last_offset; + + gvt_vgpu_err("failed to populate guest ggtt entry: abnormal ggtt entry write sequence, last_partial_off=%lx, offset=%x, bytes=%d, ggtt entry size=%d\n", + ggtt_mm->ggtt_mm.last_partial_off, off, + bytes, info->gtt_entry_size); + + /* set host ggtt entry to scratch page and clear + * virtual ggtt entry as not present for last + * partially write offset + */ + last_offset = ggtt_mm->ggtt_mm.last_partial_off & + (~(info->gtt_entry_size - 1)); + + ggtt_get_host_entry(ggtt_mm, &m, last_offset); + ggtt_invalidate_pte(vgpu, &m); + ops->set_pfn(&m, gvt->gtt.scratch_mfn); + ops->clear_present(&m); + ggtt_set_host_entry(ggtt_mm, &m, last_offset); + ggtt_invalidate(gvt->dev_priv); + + ggtt_get_guest_entry(ggtt_mm, &e, last_offset); + ops->clear_present(&e); + ggtt_set_guest_entry(ggtt_mm, &e, last_offset); + + ggtt_mm->ggtt_mm.last_partial_off = off; + ggtt_mm->ggtt_mm.last_partial_data = e.val64; + + return 0; + } + } + if (ops->test_present(&e)) { gfn = ops->get_pfn(&e); m = e; diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 3792f2b7f4ff..97e62647418a 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -150,6 +150,8 @@ struct intel_vgpu_mm { } ppgtt_mm; struct { void *virtual_ggtt; + unsigned long last_partial_off; + u64 last_partial_data; } ggtt_mm; }; }; From a4cae23cc05ccec749c2fc70fa9d8cda7c582319 Mon Sep 17 00:00:00 2001 From: Xiaolin Zhang Date: Thu, 21 Jun 2018 14:33:43 +0800 Subject: [PATCH 175/382] drm/i915/gvt: changed DDI mode emulation type changed gvt display transcode DDI mode from DP_SST to DVI to address below calltrace issue during guest booting up which is caused by zero dotclock initial value with DP_SST mode. transcode DVI mode emulation also align with native with DP connection. [drm:drm_calc_timestamping_constants] ERROR crtc 41: Can't calculate constants, dotclock = 0! WARNING: at drivers/gpu/drm/drm_vblank.c:620 drm_calc_vbltimestamp_from_scanoutpos Call Trace: ? drm_calc_timestamping_constants+0x144/0x150 [drm] drm_get_last_vbltimestamp+0x54/0x90 [drm] drm_reset_vblank_timestamp+0x59/0xd0 [drm] drm_crtc_vblank_on+0x7b/0xd0 [drm] intel_modeset_setup_hw_state+0xb67/0xfd0 [i915] ? gen2_read32+0x110/0x110 [i915] ? drm_modeset_lock+0x30/0xa0 [drm] intel_modeset_init+0x794/0x19d0 [i915] ? intel_setup_gmbus+0x232/0x2e0 [i915] i915_driver_load+0xb4a/0xf40 [i915] Signed-off-by: Xiaolin Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 6d8180e8d1e2..4b072ade8c38 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -196,7 +196,7 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | TRANS_DDI_PORT_MASK); vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= - (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | + (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DVI | (PORT_B << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); if (IS_BROADWELL(dev_priv)) { @@ -216,7 +216,7 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | TRANS_DDI_PORT_MASK); vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= - (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | + (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DVI | (PORT_C << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); if (IS_BROADWELL(dev_priv)) { @@ -236,7 +236,7 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) ~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK | TRANS_DDI_PORT_MASK); vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |= - (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST | + (TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DVI | (PORT_D << TRANS_DDI_PORT_SHIFT) | TRANS_DDI_FUNC_ENABLE); if (IS_BROADWELL(dev_priv)) { From 08b393d01c88aff27347ed2b1b354eb4db2f1532 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 27 Jun 2018 17:03:45 -0500 Subject: [PATCH 176/382] objtool: Support GCC 8 '-fnoreorder-functions' Since the following commit: cd77849a69cf ("objtool: Fix GCC 8 cold subfunction detection for aliased functions") ... if the kernel is built with EXTRA_CFLAGS='-fno-reorder-functions', objtool can get stuck in an infinite loop. That flag causes the new GCC 8 cold subfunctions to be placed in .text instead of .text.unlikely. But it also has an unfortunate quirk: in the symbol table, the subfunction (e.g., nmi_panic.cold.7) is nested inside the parent (nmi_panic). That function overlap confuses objtool, and causes it to get into an infinite loop in next_insn_same_func(). Here's Allan's description of the loop: "Objtool iterates through the instructions in nmi_panic using next_insn_same_func. Once it reaches the end of nmi_panic at 0x534 it jumps to 0x528 as that's the start of nmi_panic.cold.7. However, since the instructions starting at 0x528 are still associated with nmi_panic objtool will get stuck in a loop, continually jumping back to 0x528 after reaching 0x534." Fix it by shortening the length of the parent function so that the functions no longer overlap. Reported-and-analyzed-by: Allan Xavier Signed-off-by: Josh Poimboeuf Cc: Allan Xavier Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/9e704c52bee651129b036be14feda317ae5606ae.1530136978.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- tools/objtool/elf.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 4e60e105583e..0d1acb704f64 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -302,19 +302,34 @@ static int read_symbols(struct elf *elf) continue; sym->pfunc = sym->cfunc = sym; coldstr = strstr(sym->name, ".cold."); - if (coldstr) { - coldstr[0] = '\0'; - pfunc = find_symbol_by_name(elf, sym->name); - coldstr[0] = '.'; + if (!coldstr) + continue; - if (!pfunc) { - WARN("%s(): can't find parent function", - sym->name); - goto err; - } + coldstr[0] = '\0'; + pfunc = find_symbol_by_name(elf, sym->name); + coldstr[0] = '.'; - sym->pfunc = pfunc; - pfunc->cfunc = sym; + if (!pfunc) { + WARN("%s(): can't find parent function", + sym->name); + goto err; + } + + sym->pfunc = pfunc; + pfunc->cfunc = sym; + + /* + * Unfortunately, -fnoreorder-functions puts the child + * inside the parent. Remove the overlap so we can + * have sane assumptions. + * + * Note that pfunc->len now no longer matches + * pfunc->sym.st_size. + */ + if (sym->sec == pfunc->sec && + sym->offset >= pfunc->offset && + sym->offset + sym->len == pfunc->offset + pfunc->len) { + pfunc->len -= sym->len; } } } From 920c92448839bd4f8eb87a92b08cad56d449caff Mon Sep 17 00:00:00 2001 From: Murray McAllister Date: Mon, 2 Jul 2018 13:07:28 +1200 Subject: [PATCH 177/382] staging: rtl8723bs: Prevent an underflow in rtw_check_beacon_data(). Dan Carpenter reported an integer underflow issue in the rtl8188eu driver. This is also needed for the length (signed integer) in rtl8723bs, as it is later converted to an unsigned integer and used in a memcpy operation. Original issue is at https://patchwork.kernel.org/patch/9796371/ Reported-by: Dan Carpenter Signed-off-by: Murray McAllister Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8723bs/core/rtw_ap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/rtl8723bs/core/rtw_ap.c b/drivers/staging/rtl8723bs/core/rtw_ap.c index 45c05527a57a..faf4b4158cfa 100644 --- a/drivers/staging/rtl8723bs/core/rtw_ap.c +++ b/drivers/staging/rtl8723bs/core/rtw_ap.c @@ -1051,7 +1051,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf, int len) return _FAIL; - if (len > MAX_IE_SZ) + if (len < 0 || len > MAX_IE_SZ) return _FAIL; pbss_network->IELength = len; From 3284da34a87ab7a527a593f89bbdaf6debe9e713 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Tue, 12 Jun 2018 16:47:10 +0200 Subject: [PATCH 178/382] s390/dasd: reduce the default queue depth and nr of hardware queues Reduce the default values for the number of hardware queues and queue depth to significantly reduce the memory footprint of a DASD device. The memory consumption per DASD device reduces from approximately 40MB to approximately 1.5MB. This is necessary to build systems with a large number of DASD devices and a reasonable amount of memory. Performance measurements showed that good performance results are possible with the new default values even on systems with lots of CPUs and lots of alias devices. Fixes: e443343e509a ("s390/dasd: blk-mq conversion") Reviewed-by: Jan Hoeppner Reviewed-by: Peter Oberparleiter Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 13 +++++++++++-- drivers/s390/block/dasd_int.h | 8 -------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index d3a38c421503..a9f60d0ee02e 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -41,6 +41,15 @@ #define DASD_DIAG_MOD "dasd_diag_mod" +static unsigned int queue_depth = 32; +static unsigned int nr_hw_queues = 4; + +module_param(queue_depth, uint, 0444); +MODULE_PARM_DESC(queue_depth, "Default queue depth for new DASD devices"); + +module_param(nr_hw_queues, uint, 0444); +MODULE_PARM_DESC(nr_hw_queues, "Default number of hardware queues for new DASD devices"); + /* * SECTION: exported variables of dasd.c */ @@ -3115,8 +3124,8 @@ static int dasd_alloc_queue(struct dasd_block *block) block->tag_set.ops = &dasd_mq_ops; block->tag_set.cmd_size = sizeof(struct dasd_ccw_req); - block->tag_set.nr_hw_queues = DASD_NR_HW_QUEUES; - block->tag_set.queue_depth = DASD_MAX_LCU_DEV * DASD_REQ_PER_DEV; + block->tag_set.nr_hw_queues = nr_hw_queues; + block->tag_set.queue_depth = queue_depth; block->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; rc = blk_mq_alloc_tag_set(&block->tag_set); diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 976b6bd4fb05..de6b96036aa4 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -228,14 +228,6 @@ struct dasd_ccw_req { #define DASD_CQR_SUPPRESS_IL 6 /* Suppress 'Incorrect Length' error */ #define DASD_CQR_SUPPRESS_CR 7 /* Suppress 'Command Reject' error */ -/* - * There is no reliable way to determine the number of available CPUs on - * LPAR but there is no big performance difference between 1 and the - * maximum CPU number. - * 64 is a good trade off performance wise. - */ -#define DASD_NR_HW_QUEUES 64 -#define DASD_MAX_LCU_DEV 256 #define DASD_REQ_PER_DEV 4 /* Signature for error recovery functions. */ From dfa758638fd2d1184760deb2693abf76e982c53a Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 29 Jun 2018 19:54:01 +0200 Subject: [PATCH 179/382] s390/mm: fix refcount usage for 4K pgste s390 no longer uses the _mapcount field in struct page to identify the page table format being used. While the code was diligent in handling the different mappings, it neglected to turn "off" the map bits when alloc_pgste was being used. This resulted in bits remaining "on" in the _refcount field, and thus an artifically huge "in use" count that prevents the pages from actually being released by __free_page. There's opportunity for improvement in the "1 vs 3" vs "1U vs 3U" vs "0x1 vs 0x11" etc. variations for all these calls, I am just keeping things simple compared to neighboring code. Fixes: 620b4e903179 ("s390: use _refcount for pgtables") Reported-by: Halil Pasic Bisected-by: Vasily Gorbik Signed-off-by: Eric Farman Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgalloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 84bd6329a88d..e3bd5627afef 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -252,6 +252,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) spin_unlock_bh(&mm->context.lock); if (mask != 0) return; + } else { + atomic_xor_bits(&page->_refcount, 3U << 24); } pgtable_page_dtor(page); @@ -304,6 +306,8 @@ static void __tlb_remove_table(void *_table) break; /* fallthrough */ case 3: /* 4K page table with pgstes */ + if (mask & 3) + atomic_xor_bits(&page->_refcount, 3 << 24); pgtable_page_dtor(page); __free_page(page); break; From 07eaa43e66f505980d00e0f5fe697f3da7c6a730 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Tue, 29 May 2018 12:00:54 +0300 Subject: [PATCH 180/382] ARM: dts: dra7: Disable metastability workaround for USB2 Disable the metastability workaround for USB2. The original patch disabled the workaround on the wrong USB port. Fixes: b8c9c6fa2002 ("ARM: dts: dra7: Disable USB metastability workaround for USB2") Cc: [4.16+] Signed-off-by: Roger Quadros Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/dra7.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 9dcd14edc202..e03495a799ce 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1580,7 +1580,6 @@ usb1: usb@48890000 { dr_mode = "otg"; snps,dis_u3_susphy_quirk; snps,dis_u2_susphy_quirk; - snps,dis_metastability_quirk; }; }; @@ -1608,6 +1607,7 @@ usb2: usb@488d0000 { dr_mode = "otg"; snps,dis_u3_susphy_quirk; snps,dis_u2_susphy_quirk; + snps,dis_metastability_quirk; }; }; From 207490517cf80d99f471d1b54e747eb95a4b8bea Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 18 Jun 2018 10:45:49 -0700 Subject: [PATCH 181/382] bus: ti-sysc: Use 2-factor allocator arguments This adjusts the allocator calls to use 2-factor argument call style, as done treewide already for improved defense against allocation overflows. Signed-off-by: Kees Cook Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 1cc29629d238..80d60f43db56 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -169,9 +169,9 @@ static int sysc_get_clocks(struct sysc *ddata) const char *name; int nr_fck = 0, nr_ick = 0, i, error = 0; - ddata->clock_roles = devm_kzalloc(ddata->dev, - sizeof(*ddata->clock_roles) * + ddata->clock_roles = devm_kcalloc(ddata->dev, SYSC_MAX_CLOCKS, + sizeof(*ddata->clock_roles), GFP_KERNEL); if (!ddata->clock_roles) return -ENOMEM; @@ -200,8 +200,8 @@ static int sysc_get_clocks(struct sysc *ddata) return -EINVAL; } - ddata->clocks = devm_kzalloc(ddata->dev, - sizeof(*ddata->clocks) * ddata->nr_clocks, + ddata->clocks = devm_kcalloc(ddata->dev, + ddata->nr_clocks, sizeof(*ddata->clocks), GFP_KERNEL); if (!ddata->clocks) return -ENOMEM; From 603d4cf8fe095b1ee78f423d514427be507fb513 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Sat, 30 Jun 2018 17:38:55 +0200 Subject: [PATCH 182/382] net: fix use-after-free in GRO with ESP Since the addition of GRO for ESP, gro_receive can consume the skb and return -EINPROGRESS. In that case, the lower layer GRO handler cannot touch the skb anymore. Commit 5f114163f2f5 ("net: Add a skb_gro_flush_final helper.") converted some of the gro_receive handlers that can lead to ESP's gro_receive so that they wouldn't access the skb when -EINPROGRESS is returned, but missed other spots, mainly in tunneling protocols. This patch finishes the conversion to using skb_gro_flush_final(), and adds a new helper, skb_gro_flush_final_remcsum(), used in VXLAN and GUE. Fixes: 5f114163f2f5 ("net: Add a skb_gro_flush_final helper.") Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller --- drivers/net/geneve.c | 2 +- drivers/net/vxlan.c | 4 +--- include/linux/netdevice.h | 20 ++++++++++++++++++++ net/8021q/vlan.c | 2 +- net/ipv4/fou.c | 4 +--- net/ipv4/gre_offload.c | 2 +- net/ipv4/udp_offload.c | 2 +- 7 files changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 750eaa53bf0c..ada33c2d9ac2 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -476,7 +476,7 @@ static struct sk_buff **geneve_gro_receive(struct sock *sk, out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final(skb, pp, flush); return pp; } diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index aee0e60471f1..f6bb1d54d4bd 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -623,9 +623,7 @@ static struct sk_buff **vxlan_gro_receive(struct sock *sk, flush = 0; out: - skb_gro_remcsum_cleanup(skb, &grc); - skb->remcsum_offload = 0; - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final_remcsum(skb, pp, flush, &grc); return pp; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3ec9850c7936..3d0cc0b5cec2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2789,11 +2789,31 @@ static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, if (PTR_ERR(pp) != -EINPROGRESS) NAPI_GRO_CB(skb)->flush |= flush; } +static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, + struct sk_buff **pp, + int flush, + struct gro_remcsum *grc) +{ + if (PTR_ERR(pp) != -EINPROGRESS) { + NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_remcsum_cleanup(skb, grc); + skb->remcsum_offload = 0; + } +} #else static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush) { NAPI_GRO_CB(skb)->flush |= flush; } +static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, + struct sk_buff **pp, + int flush, + struct gro_remcsum *grc) +{ + NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_remcsum_cleanup(skb, grc); + skb->remcsum_offload = 0; +} #endif static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 73a65789271b..8ccee3d01822 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -693,7 +693,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head, out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final(skb, pp, flush); return pp; } diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 1540db65241a..c9ec1603666b 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -448,9 +448,7 @@ static struct sk_buff **gue_gro_receive(struct sock *sk, out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; - skb_gro_remcsum_cleanup(skb, &grc); - skb->remcsum_offload = 0; + skb_gro_flush_final_remcsum(skb, pp, flush, &grc); return pp; } diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 1859c473b21a..6a7d980105f6 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -223,7 +223,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final(skb, pp, flush); return pp; } diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 92dc9e5a7ff3..69c54540d5b4 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -394,7 +394,7 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, out_unlock: rcu_read_unlock(); out: - NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_flush_final(skb, pp, flush); return pp; } EXPORT_SYMBOL(udp_gro_receive); From fc9c2029e37c3ae9efc28bf47045e0b87e09660c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 30 Jun 2018 15:26:56 -0700 Subject: [PATCH 183/382] ipv6: sr: fix passing wrong flags to crypto_alloc_shash() The 'mask' argument to crypto_alloc_shash() uses the CRYPTO_ALG_* flags, not 'gfp_t'. So don't pass GFP_KERNEL to it. Fixes: bf355b8d2c30 ("ipv6: sr: add core files for SR HMAC support") Signed-off-by: Eric Biggers Signed-off-by: David S. Miller --- net/ipv6/seg6_hmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index 33fb35cbfac1..558fe8cc6d43 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -373,7 +373,7 @@ static int seg6_hmac_init_algo(void) return -ENOMEM; for_each_possible_cpu(cpu) { - tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL); + tfm = crypto_alloc_shash(algo->name, 0, 0); if (IS_ERR(tfm)) return PTR_ERR(tfm); p_tfm = per_cpu_ptr(algo->tfms, cpu); From 5dc2d3996a8b221c20dd0900bdad45031a572530 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sun, 1 Jul 2018 16:21:21 +0800 Subject: [PATCH 184/382] ipvlan: call dev_change_flags when ipvlan mode is reset After we change the ipvlan mode from l3 to l2, or vice versa, we only reset IFF_NOARP flag, but don't flush the ARP table cache, which will cause eth->h_dest to be equal to eth->h_source in ipvlan_xmit_mode_l2(). Then the message will not come out of host. Here is the reproducer on local host: ip link set eth1 up ip addr add 192.168.1.1/24 dev eth1 ip link add link eth1 ipvlan1 type ipvlan mode l3 ip netns add net1 ip link set ipvlan1 netns net1 ip netns exec net1 ip link set ipvlan1 up ip netns exec net1 ip addr add 192.168.2.1/24 dev ipvlan1 ip route add 192.168.2.0/24 via 192.168.1.2 ping 192.168.2.2 -c 2 ip netns exec net1 ip link set ipvlan1 type ipvlan mode l2 ping 192.168.2.2 -c 2 Add the same configuration on remote host. After we set the mode to l2, we could find that the src/dst MAC addresses are the same on eth1: 21:26:06.648565 00:b7:13:ad:d3:05 > 00:b7:13:ad:d3:05, ethertype IPv4 (0x0800), length 98: (tos 0x0, ttl 64, id 58356, offset 0, flags [DF], proto ICMP (1), length 84) 192.168.2.1 > 192.168.2.2: ICMP echo request, id 22686, seq 1, length 64 Fix this by calling dev_change_flags(), which will call netdevice notifier with flag change info. v2: a) As pointed out by Wang Cong, check return value for dev_change_flags() when change dev flags. b) As suggested by Stefano and Sabrina, move flags setting before l3mdev_ops. So we don't need to redo ipvlan_{, un}register_nf_hook() again in err path. Reported-by: Jianlin Shi Reviewed-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Fixes: 2ad7bf3638411 ("ipvlan: Initial check-in of the IPVLAN driver.") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_main.c | 36 +++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 23c1d6600241..4a949569ec4c 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -75,10 +75,23 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) { struct ipvl_dev *ipvlan; struct net_device *mdev = port->dev; - int err = 0; + unsigned int flags; + int err; ASSERT_RTNL(); if (port->mode != nval) { + list_for_each_entry(ipvlan, &port->ipvlans, pnode) { + flags = ipvlan->dev->flags; + if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) { + err = dev_change_flags(ipvlan->dev, + flags | IFF_NOARP); + } else { + err = dev_change_flags(ipvlan->dev, + flags & ~IFF_NOARP); + } + if (unlikely(err)) + goto fail; + } if (nval == IPVLAN_MODE_L3S) { /* New mode is L3S */ err = ipvlan_register_nf_hook(read_pnet(&port->pnet)); @@ -86,21 +99,28 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) mdev->l3mdev_ops = &ipvl_l3mdev_ops; mdev->priv_flags |= IFF_L3MDEV_MASTER; } else - return err; + goto fail; } else if (port->mode == IPVLAN_MODE_L3S) { /* Old mode was L3S */ mdev->priv_flags &= ~IFF_L3MDEV_MASTER; ipvlan_unregister_nf_hook(read_pnet(&port->pnet)); mdev->l3mdev_ops = NULL; } - list_for_each_entry(ipvlan, &port->ipvlans, pnode) { - if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) - ipvlan->dev->flags |= IFF_NOARP; - else - ipvlan->dev->flags &= ~IFF_NOARP; - } port->mode = nval; } + return 0; + +fail: + /* Undo the flags changes that have been done so far. */ + list_for_each_entry_continue_reverse(ipvlan, &port->ipvlans, pnode) { + flags = ipvlan->dev->flags; + if (port->mode == IPVLAN_MODE_L3 || + port->mode == IPVLAN_MODE_L3S) + dev_change_flags(ipvlan->dev, flags | IFF_NOARP); + else + dev_change_flags(ipvlan->dev, flags & ~IFF_NOARP); + } + return err; } From bb7858ba1102f82470a917e041fd23e6385c31be Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Sun, 1 Jul 2018 20:03:05 -0700 Subject: [PATCH 185/382] qed: Limit msix vectors in kdump kernel to the minimum required count. Memory size is limited in the kdump kernel environment. Allocation of more msix-vectors (or queues) consumes few tens of MBs of memory, which might lead to the kdump kernel failure. This patch adds changes to limit the number of MSI-X vectors in kdump kernel to minimum required value (i.e., 2 per engine). Fixes: fe56b9e6a ("qed: Add module with basic common support") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 5c10fd7210c3..0cbc74d6ca8b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -789,6 +789,14 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev, /* We want a minimum of one slowpath and one fastpath vector per hwfn */ cdev->int_params.in.min_msix_cnt = cdev->num_hwfns * 2; + if (is_kdump_kernel()) { + DP_INFO(cdev, + "Kdump kernel: Limit the max number of requested MSI-X vectors to %hd\n", + cdev->int_params.in.min_msix_cnt); + cdev->int_params.in.num_vectors = + cdev->int_params.in.min_msix_cnt; + } + rc = qed_set_int_mode(cdev, false); if (rc) { DP_ERR(cdev, "qed_slowpath_setup_int ERR\n"); From 538f8d00ba8bb417c4d9e76c61dee59d812d8287 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Sun, 1 Jul 2018 20:03:06 -0700 Subject: [PATCH 186/382] qed: Fix setting of incorrect eswitch mode. By default, driver sets the eswitch mode incorrectly as VEB (virtual Ethernet bridging). Need to set VEB eswitch mode only when sriov is enabled, and it should be to set NONE by default. The patch incorporates this change. Fixes: 0fefbfbaa ("qed*: Management firmware - notifications and defaults") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 2 +- drivers/net/ethernet/qlogic/qed/qed_sriov.c | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 329781cda77f..e5249b4741d0 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -1804,7 +1804,7 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params) DP_INFO(p_hwfn, "Failed to update driver state\n"); rc = qed_mcp_ov_update_eswitch(p_hwfn, p_hwfn->p_main_ptt, - QED_OV_ESWITCH_VEB); + QED_OV_ESWITCH_NONE); if (rc) DP_INFO(p_hwfn, "Failed to update eswitch mode\n"); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index f01bf52bc381..fd59cf45f4be 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -4513,6 +4513,8 @@ static void qed_sriov_enable_qid_config(struct qed_hwfn *hwfn, static int qed_sriov_enable(struct qed_dev *cdev, int num) { struct qed_iov_vf_init_params params; + struct qed_hwfn *hwfn; + struct qed_ptt *ptt; int i, j, rc; if (num >= RESC_NUM(&cdev->hwfns[0], QED_VPORT)) { @@ -4525,8 +4527,8 @@ static int qed_sriov_enable(struct qed_dev *cdev, int num) /* Initialize HW for VF access */ for_each_hwfn(cdev, j) { - struct qed_hwfn *hwfn = &cdev->hwfns[j]; - struct qed_ptt *ptt = qed_ptt_acquire(hwfn); + hwfn = &cdev->hwfns[j]; + ptt = qed_ptt_acquire(hwfn); /* Make sure not to use more than 16 queues per VF */ params.num_queues = min_t(int, @@ -4562,6 +4564,19 @@ static int qed_sriov_enable(struct qed_dev *cdev, int num) goto err; } + hwfn = QED_LEADING_HWFN(cdev); + ptt = qed_ptt_acquire(hwfn); + if (!ptt) { + DP_ERR(hwfn, "Failed to acquire ptt\n"); + rc = -EBUSY; + goto err; + } + + rc = qed_mcp_ov_update_eswitch(hwfn, ptt, QED_OV_ESWITCH_VEB); + if (rc) + DP_INFO(cdev, "Failed to update eswitch mode\n"); + qed_ptt_release(hwfn, ptt); + return num; err: From cc9b27cdf7bd3c86df73439758ac1564bc8f5bbe Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Sun, 1 Jul 2018 20:03:07 -0700 Subject: [PATCH 187/382] qed: Fix use of incorrect size in memcpy call. Use the correct size value while copying chassis/port id values. Fixes: 6ad8c632e ("qed: Add support for query/config dcbx.") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index f0b01385d5cb..e0680ce91328 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -709,9 +709,9 @@ qed_dcbx_get_local_lldp_params(struct qed_hwfn *p_hwfn, p_local = &p_hwfn->p_dcbx_info->lldp_local[LLDP_NEAREST_BRIDGE]; memcpy(params->lldp_local.local_chassis_id, p_local->local_chassis_id, - ARRAY_SIZE(p_local->local_chassis_id)); + sizeof(p_local->local_chassis_id)); memcpy(params->lldp_local.local_port_id, p_local->local_port_id, - ARRAY_SIZE(p_local->local_port_id)); + sizeof(p_local->local_port_id)); } static void @@ -723,9 +723,9 @@ qed_dcbx_get_remote_lldp_params(struct qed_hwfn *p_hwfn, p_remote = &p_hwfn->p_dcbx_info->lldp_remote[LLDP_NEAREST_BRIDGE]; memcpy(params->lldp_remote.peer_chassis_id, p_remote->peer_chassis_id, - ARRAY_SIZE(p_remote->peer_chassis_id)); + sizeof(p_remote->peer_chassis_id)); memcpy(params->lldp_remote.peer_port_id, p_remote->peer_port_id, - ARRAY_SIZE(p_remote->peer_port_id)); + sizeof(p_remote->peer_port_id)); } static int From 82a4e71b1565dea8387f54503e806cf374e779ec Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Sun, 1 Jul 2018 20:03:08 -0700 Subject: [PATCH 188/382] qede: Adverstise software timestamp caps when PHC is not available. When ptp clock is not available for a PF (e.g., higher PFs in NPAR mode), get-tsinfo() callback should return the software timestamp capabilities instead of returning the error. Fixes: 4c55215c ("qede: Add driver support for PTP") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_ptp.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c index 02adb513f475..013ff567283c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c @@ -337,8 +337,14 @@ int qede_ptp_get_ts_info(struct qede_dev *edev, struct ethtool_ts_info *info) { struct qede_ptp *ptp = edev->ptp; - if (!ptp) - return -EIO; + if (!ptp) { + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + info->phc_index = -1; + + return 0; + } info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_RX_SOFTWARE | From fe6e04941aa12479a1a58656362bec74100bf7d7 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 29 Jun 2018 19:01:44 +0900 Subject: [PATCH 189/382] mmc: renesas_sdhi_internal_dmac: Fix missing unmap in error patch This patch fixes an issue that lacks the dma_unmap_sg() calling in the error patch of renesas_sdhi_internal_dmac_start_dma(). Fixes: 0cbc94daa554 ("mmc: renesas_sdhi_internal_dmac: limit DMA RX for old SoCs") Cc: # v4.17+ Signed-off-by: Yoshihiro Shimoda Reviewed-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_internal_dmac.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c index f7f9773d161f..d676a20d959f 100644 --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c @@ -164,17 +164,14 @@ renesas_sdhi_internal_dmac_start_dma(struct tmio_mmc_host *host, goto force_pio; /* This DMAC cannot handle if buffer is not 8-bytes alignment */ - if (!IS_ALIGNED(sg_dma_address(sg), 8)) { - dma_unmap_sg(&host->pdev->dev, sg, host->sg_len, - mmc_get_dma_dir(data)); - goto force_pio; - } + if (!IS_ALIGNED(sg_dma_address(sg), 8)) + goto force_pio_with_unmap; if (data->flags & MMC_DATA_READ) { dtran_mode |= DTRAN_MODE_CH_NUM_CH1; if (test_bit(SDHI_INTERNAL_DMAC_ONE_RX_ONLY, &global_flags) && test_and_set_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags)) - goto force_pio; + goto force_pio_with_unmap; } else { dtran_mode |= DTRAN_MODE_CH_NUM_CH0; } @@ -189,6 +186,9 @@ renesas_sdhi_internal_dmac_start_dma(struct tmio_mmc_host *host, return; +force_pio_with_unmap: + dma_unmap_sg(&host->pdev->dev, sg, host->sg_len, mmc_get_dma_dir(data)); + force_pio: host->force_pio = true; renesas_sdhi_internal_dmac_enable_dma(host, false); From 25a98edd5795719c5187e16ea271e8de86e02809 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Fri, 29 Jun 2018 19:01:45 +0900 Subject: [PATCH 190/382] mmc: renesas_sdhi_internal_dmac: Cannot clear the RX_IN_USE in abort This patch is fixes an issue that the SDHI_INTERNAL_DMAC_RX_IN_USE flag cannot be cleared because tmio_mmc_core sets the host->data to NULL before the tmio_mmc_core calls tmio_mmc_abort_dma(). So, this patch clears the SDHI_INTERNAL_DMAC_RX_IN_USE in the renesas_sdhi_internal_dmac_abort_dma() anyway. This doesn't cause any side effects. Fixes: 0cbc94daa554 ("mmc: renesas_sdhi_internal_dmac: limit DMA RX for old SoCs") Cc: # v4.17+ Signed-off-by: Yoshihiro Shimoda Reviewed-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_internal_dmac.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c index d676a20d959f..d032bd63444d 100644 --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c @@ -139,8 +139,7 @@ renesas_sdhi_internal_dmac_abort_dma(struct tmio_mmc_host *host) { renesas_sdhi_internal_dmac_dm_write(host, DM_CM_RST, RST_RESERVED_BITS | val); - if (host->data && host->data->flags & MMC_DATA_READ) - clear_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags); + clear_bit(SDHI_INTERNAL_DMAC_RX_IN_USE, &global_flags); renesas_sdhi_internal_dmac_enable_dma(host, true); } From aaa23f86001bdb82d2f937c5c7bce0a1e11a6c5b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 27 Jun 2018 07:25:32 +0100 Subject: [PATCH 191/382] ALSA: hda - Handle pm failure during hotplug Obtaining the runtime pm wakeref can fail, especially in a hotplug scenario where i915.ko has been unloaded. If we do not catch the failure, we end up with an unbalanced pm. v2 additions by tiwai: hdmi_present_sense() checks the return value and handle only a negative error case and bails out only if it's really still suspended. Also, snd_hda_power_down() is called at the error path so that the refcount is balanced. Along with it, the spec->pcm_lock is taken outside hdmi_present_sense() in the caller side, so that it won't cause deadlock at reentrace via runtime resume. v3 fix by tiwai: Missing linux/pm_runtime.h is included. References: 222bde03881c ("ALSA: hda - Fix mutex deadlock at HDMI/DP hotplug") Signed-off-by: Chris Wilson Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_hdmi.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 98e1c411c56a..8a49415aebac 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -764,8 +765,10 @@ static void check_presence_and_report(struct hda_codec *codec, hda_nid_t nid, if (pin_idx < 0) return; + mutex_lock(&spec->pcm_lock); if (hdmi_present_sense(get_pin(spec, pin_idx), 1)) snd_hda_jack_report_sync(codec); + mutex_unlock(&spec->pcm_lock); } static void jack_callback(struct hda_codec *codec, @@ -1628,21 +1631,23 @@ static void sync_eld_via_acomp(struct hda_codec *codec, static bool hdmi_present_sense(struct hdmi_spec_per_pin *per_pin, int repoll) { struct hda_codec *codec = per_pin->codec; - struct hdmi_spec *spec = codec->spec; int ret; /* no temporary power up/down needed for component notifier */ - if (!codec_has_acomp(codec)) - snd_hda_power_up_pm(codec); + if (!codec_has_acomp(codec)) { + ret = snd_hda_power_up_pm(codec); + if (ret < 0 && pm_runtime_suspended(hda_codec_dev(codec))) { + snd_hda_power_down_pm(codec); + return false; + } + } - mutex_lock(&spec->pcm_lock); if (codec_has_acomp(codec)) { sync_eld_via_acomp(codec, per_pin); ret = false; /* don't call snd_hda_jack_report_sync() */ } else { ret = hdmi_present_sense_via_verbs(per_pin, repoll); } - mutex_unlock(&spec->pcm_lock); if (!codec_has_acomp(codec)) snd_hda_power_down_pm(codec); @@ -1654,12 +1659,16 @@ static void hdmi_repoll_eld(struct work_struct *work) { struct hdmi_spec_per_pin *per_pin = container_of(to_delayed_work(work), struct hdmi_spec_per_pin, work); + struct hda_codec *codec = per_pin->codec; + struct hdmi_spec *spec = codec->spec; if (per_pin->repoll_count++ > 6) per_pin->repoll_count = 0; + mutex_lock(&spec->pcm_lock); if (hdmi_present_sense(per_pin, per_pin->repoll_count)) snd_hda_jack_report_sync(per_pin->codec); + mutex_unlock(&spec->pcm_lock); } static void intel_haswell_fixup_connect_list(struct hda_codec *codec, From 64dafbc9530c10300acffc57fae3269d95fa8f93 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 25 Jun 2018 11:39:52 +0200 Subject: [PATCH 192/382] drbd: fix access after free We have struct drbd_requests { ... struct bio *private_bio; ... } to hold a bio clone for local submission. On local IO completion, we put that bio, and in case we want to use the result later, we overload that member to hold the ERR_PTR() of the completion result, Which, before v4.3, used to be the passed in "int error", so we could first bio_put(), then assign. v4.3-rc1~100^2~21 4246a0b63bd8 block: add a bi_error field to struct bio changed that: bio_put(req->private_bio); - req->private_bio = ERR_PTR(error); + req->private_bio = ERR_PTR(bio->bi_error); Which introduces an access after free, because it was non obvious that req->private_bio == bio. Impact of that was mostly unnoticable, because we only use that value in a multiple-failure case, and even then map any "unexpected" error code to EIO, so worst case we could potentially mask a more specific error with EIO in a multiple failure case. Unless the pointed to memory region was unmapped, as is the case with CONFIG_DEBUG_PAGEALLOC, in which case this results in BUG: unable to handle kernel paging request v4.13-rc1~70^2~75 4e4cbee93d56 block: switch bios to blk_status_t changes it further to bio_put(req->private_bio); req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status)); And blk_status_to_errno() now contains a WARN_ON_ONCE() for unexpected values, which catches this "sometimes", if the memory has been reused quickly enough for other things. Should also go into stable since 4.3, with the trivial change around 4.13. Cc: stable@vger.kernel.org Fixes: 4246a0b63bd8 block: add a bi_error field to struct bio Reported-by: Sarah Newman Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_worker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 1476cb3439f4..5e793dd7adfb 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -282,8 +282,8 @@ void drbd_request_endio(struct bio *bio) what = COMPLETED_OK; } - bio_put(req->private_bio); req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status)); + bio_put(bio); /* not req_mod(), we need irqsave here! */ spin_lock_irqsave(&device->resource->req_lock, flags); From 8740fa6f5c834a881b9b0d560e62c6fe3c1e60ef Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 2 Jul 2018 08:30:22 +0100 Subject: [PATCH 193/382] sata_nv: remove redundant pointers sdev0 and sdev1 Pointers sdev0 and sdev1 are being assigned but are never used hence they are redundant and can be removed. Cleans up clang warnings: warning: variable 'sdev0' set but not used [-Wunused-but-set-variable] warning: variable 'sdev1' set but not used [-Wunused-but-set-variable] Signed-off-by: Colin Ian King Signed-off-by: Tejun Heo --- drivers/ata/sata_nv.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index 10ae11aa1926..72c9b922a77b 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -675,7 +675,6 @@ static int nv_adma_slave_config(struct scsi_device *sdev) struct ata_port *ap = ata_shost_to_port(sdev->host); struct nv_adma_port_priv *pp = ap->private_data; struct nv_adma_port_priv *port0, *port1; - struct scsi_device *sdev0, *sdev1; struct pci_dev *pdev = to_pci_dev(ap->host->dev); unsigned long segment_boundary, flags; unsigned short sg_tablesize; @@ -736,8 +735,6 @@ static int nv_adma_slave_config(struct scsi_device *sdev) port0 = ap->host->ports[0]->private_data; port1 = ap->host->ports[1]->private_data; - sdev0 = ap->host->ports[0]->link.device[0].sdev; - sdev1 = ap->host->ports[1]->link.device[0].sdev; if ((port0->flags & NV_ADMA_ATAPI_SETUP_COMPLETE) || (port1->flags & NV_ADMA_ATAPI_SETUP_COMPLETE)) { /* From 240630e61870e62e39a97225048f9945848fa5f5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 1 Jul 2018 12:15:46 +0200 Subject: [PATCH 194/382] ahci: Disable LPM on Lenovo 50 series laptops with a too old BIOS There have been several reports of LPM related hard freezes about once a day on multiple Lenovo 50 series models. Strange enough these reports where not disk model specific as LPM issues usually are and some users with the exact same disk + laptop where seeing them while other users where not seeing these issues. It turns out that enabling LPM triggers a firmware bug somewhere, which has been fixed in later BIOS versions. This commit adds a new ahci_broken_lpm() function and a new ATA_FLAG_NO_LPM for dealing with this. The ahci_broken_lpm() function contains DMI match info for the 4 models which are known to be affected by this and the DMI BIOS date field for known good BIOS versions. If the BIOS date is older then the one in the table LPM will be disabled and a warning will be printed. Note the BIOS dates are for known good versions, some older versions may work too, but we don't know for sure, the table is using dates from BIOS versions for which users have confirmed that upgrading to that version makes the problem go away. Unfortunately I've been unable to get hold of the reporter who reported that BIOS version 2.35 fixed the problems on the W541 for him. I've been able to verify the DMI_SYS_VENDOR and DMI_PRODUCT_VERSION from an older dmidecode, but I don't know the exact BIOS date as reported in the DMI. Lenovo keeps a changelog with dates in their release notes, but the dates there are the release dates not the build dates which are in DMI. So I've chosen to set the date to which we compare to one day past the release date of the 2.34 BIOS. I plan to fix this with a follow up commit once I've the necessary info. Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo --- drivers/ata/ahci.c | 59 +++++++++++++++++++++++++++++++++++++++ drivers/ata/libata-core.c | 3 ++ include/linux/libata.h | 1 + 3 files changed, 63 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 738fb22978dd..fdeb3b4d0f4a 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1280,6 +1280,59 @@ static bool ahci_broken_suspend(struct pci_dev *pdev) return strcmp(buf, dmi->driver_data) < 0; } +static bool ahci_broken_lpm(struct pci_dev *pdev) +{ + static const struct dmi_system_id sysids[] = { + /* Various Lenovo 50 series have LPM issues with older BIOSen */ + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X250"), + }, + .driver_data = "20180406", /* 1.31 */ + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad L450"), + }, + .driver_data = "20180420", /* 1.28 */ + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T450s"), + }, + .driver_data = "20180315", /* 1.33 */ + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad W541"), + }, + /* + * Note date based on release notes, 2.35 has been + * reported to be good, but I've been unable to get + * a hold of the reporter to get the DMI BIOS date. + * TODO: fix this. + */ + .driver_data = "20180310", /* 2.35 */ + }, + { } /* terminate list */ + }; + const struct dmi_system_id *dmi = dmi_first_match(sysids); + int year, month, date; + char buf[9]; + + if (!dmi) + return false; + + dmi_get_date(DMI_BIOS_DATE, &year, &month, &date); + snprintf(buf, sizeof(buf), "%04d%02d%02d", year, month, date); + + return strcmp(buf, dmi->driver_data) < 0; +} + static bool ahci_broken_online(struct pci_dev *pdev) { #define ENCODE_BUSDEVFN(bus, slot, func) \ @@ -1694,6 +1747,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) "quirky BIOS, skipping spindown on poweroff\n"); } + if (ahci_broken_lpm(pdev)) { + pi.flags |= ATA_FLAG_NO_LPM; + dev_warn(&pdev->dev, + "BIOS update required for Link Power Management support\n"); + } + if (ahci_broken_suspend(pdev)) { hpriv->flags |= AHCI_HFLAG_NO_SUSPEND; dev_warn(&pdev->dev, diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 27d15ed7fa3d..cc71c63df381 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2493,6 +2493,9 @@ int ata_dev_configure(struct ata_device *dev) (id[ATA_ID_SATA_CAPABILITY] & 0xe) == 0x2) dev->horkage |= ATA_HORKAGE_NOLPM; + if (ap->flags & ATA_FLAG_NO_LPM) + dev->horkage |= ATA_HORKAGE_NOLPM; + if (dev->horkage & ATA_HORKAGE_NOLPM) { ata_dev_warn(dev, "LPM support broken, forcing max_power\n"); dev->link->ap->target_lpm_policy = ATA_LPM_MAX_POWER; diff --git a/include/linux/libata.h b/include/linux/libata.h index a2257e380789..32f247cb5e9e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -210,6 +210,7 @@ enum { ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */ /* (doesn't imply presence) */ ATA_FLAG_SATA = (1 << 1), + ATA_FLAG_NO_LPM = (1 << 2), /* host not happy with LPM */ ATA_FLAG_NO_LOG_PAGE = (1 << 5), /* do not issue log page read */ ATA_FLAG_NO_ATAPI = (1 << 6), /* No ATAPI support */ ATA_FLAG_PIO_DMA = (1 << 7), /* PIO cmds via DMA */ From 74cb319bd97dd62881f97ea5a3228f7c2546bf56 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 2 Jul 2018 17:13:31 +0300 Subject: [PATCH 195/382] usb: xhci: dbc: Don't decrement runtime PM counter if DBC is not started pm_runtime_put_sync() gets called everytime in xhci_dbc_stop(). If dbc is not started, this makes the runtime PM counter incorrectly becomes 0, and calls autosuspend function. Then we'll keep seeing this: [54664.762220] xhci_hcd 0000:00:14.0: Root hub is not suspended So only calls pm_runtime_put_sync() when dbc was started. Signed-off-by: Kai-Heng Feng Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-dbgcap.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index 1fbfd89d0a0f..387f124a8334 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -508,16 +508,18 @@ static int xhci_do_dbc_start(struct xhci_hcd *xhci) return 0; } -static void xhci_do_dbc_stop(struct xhci_hcd *xhci) +static int xhci_do_dbc_stop(struct xhci_hcd *xhci) { struct xhci_dbc *dbc = xhci->dbc; if (dbc->state == DS_DISABLED) - return; + return -1; writel(0, &dbc->regs->control); xhci_dbc_mem_cleanup(xhci); dbc->state = DS_DISABLED; + + return 0; } static int xhci_dbc_start(struct xhci_hcd *xhci) @@ -544,6 +546,7 @@ static int xhci_dbc_start(struct xhci_hcd *xhci) static void xhci_dbc_stop(struct xhci_hcd *xhci) { + int ret; unsigned long flags; struct xhci_dbc *dbc = xhci->dbc; struct dbc_port *port = &dbc->port; @@ -556,10 +559,11 @@ static void xhci_dbc_stop(struct xhci_hcd *xhci) xhci_dbc_tty_unregister_device(xhci); spin_lock_irqsave(&dbc->lock, flags); - xhci_do_dbc_stop(xhci); + ret = xhci_do_dbc_stop(xhci); spin_unlock_irqrestore(&dbc->lock, flags); - pm_runtime_put_sync(xhci_to_hcd(xhci)->self.controller); + if (!ret) + pm_runtime_put_sync(xhci_to_hcd(xhci)->self.controller); } static void From 146971e6244b18eca60f614a1b94048844066d23 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 1 Jul 2018 11:48:34 +0200 Subject: [PATCH 196/382] typec: tcpm: Correctly report power_supply current and voltage for non pd supply Commit f2a8aa053c17 ("typec: tcpm: Represent source supply through power_supply") moved the code to register a power_supply representing the device supplying power to the type-C connector, from the fusb302 code to the generic tcpm code so that we have a psy reporting the supply voltage and current for all tcpm devices. This broke the reporting of current and voltage through the psy interface when supplied by a a non pd supply (5V, current as reported by get_current_limit). The cause of this breakage is port->supply_voltage and port->current_limit not being set in that case. This commit fixes this by setting port->supply_voltage and port->current_limit from tcpm_set_current_limit(). This commit also removes setting supply_voltage and current_limit from tcpm_reset_port() as that calls tcpm_set_current_limit(0, 0) which now already sets these to 0. Fixes: f2a8aa053c17 ("typec: tcpm: Represent source supply through...") Signed-off-by: Hans de Goede Reviewed-by: Guenter Roeck Reviewed-by: Adam Thomson Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c index d961f1ec0e08..150f43668bec 100644 --- a/drivers/usb/typec/tcpm.c +++ b/drivers/usb/typec/tcpm.c @@ -725,6 +725,9 @@ static int tcpm_set_current_limit(struct tcpm_port *port, u32 max_ma, u32 mv) tcpm_log(port, "Setting voltage/current limit %u mV %u mA", mv, max_ma); + port->supply_voltage = mv; + port->current_limit = max_ma; + if (port->tcpc->set_current_limit) ret = port->tcpc->set_current_limit(port->tcpc, max_ma, mv); @@ -2595,8 +2598,6 @@ static void tcpm_reset_port(struct tcpm_port *port) tcpm_set_attached_state(port, false); port->try_src_count = 0; port->try_snk_count = 0; - port->supply_voltage = 0; - port->current_limit = 0; port->usb_type = POWER_SUPPLY_USB_TYPE_C; power_supply_changed(port->psy); From b3a653288e1aeebcc7367dc6d5a42bccbff8bde1 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 1 Jul 2018 11:48:35 +0200 Subject: [PATCH 197/382] i2c-cht-wc: Fix bq24190 supplier Commit f2a8aa053c17 ("typec: tcpm: Represent source supply through power_supply") moved the code to register a power_supply representing the device supplying power to the type-C connector, from the fusb302 code to the generic tcpm code. This has caused the power-supply registered by the fusb302 driver, which determines how much current the bq24190 can draw, to change name from "fusb302-typec-source" to "tcpm-source-psy-i2c-fusb302". Fixes: f2a8aa053c17 ("typec: tcpm: Represent source supply through...") Signed-off-by: Hans de Goede Acked-by: Wolfram Sang Reviewed-by: Guenter Roeck Acked-by: Adam Thomson Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-cht-wc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-cht-wc.c b/drivers/i2c/busses/i2c-cht-wc.c index 44cffad43701..c4d176f5ed79 100644 --- a/drivers/i2c/busses/i2c-cht-wc.c +++ b/drivers/i2c/busses/i2c-cht-wc.c @@ -234,7 +234,8 @@ static const struct irq_chip cht_wc_i2c_irq_chip = { .name = "cht_wc_ext_chrg_irq_chip", }; -static const char * const bq24190_suppliers[] = { "fusb302-typec-source" }; +static const char * const bq24190_suppliers[] = { + "tcpm-source-psy-i2c-fusb302" }; static const struct property_entry bq24190_props[] = { PROPERTY_ENTRY_STRING_ARRAY("supplied-from", bq24190_suppliers), From ba44579141f9e2c0229e6e7eeb00b5fa68f0f74a Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 27 Jun 2018 15:15:40 +0300 Subject: [PATCH 198/382] ahci: Add Intel Ice Lake LP PCI ID This should also be using the default LPM policy for mobile chipsets so add the PCI ID to the driver list of supported devices. Signed-off-by: Mika Westerberg Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- drivers/ata/ahci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index fdeb3b4d0f4a..b2b9eba1d214 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -400,6 +400,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x0f23), board_ahci_mobile }, /* Bay Trail AHCI */ { PCI_VDEVICE(INTEL, 0x22a3), board_ahci_mobile }, /* Cherry Tr. AHCI */ { PCI_VDEVICE(INTEL, 0x5ae3), board_ahci_mobile }, /* ApolloLake AHCI */ + { PCI_VDEVICE(INTEL, 0x34d3), board_ahci_mobile }, /* Ice Lake LP AHCI */ /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, From b320a0a9f23c98f21631eb27bcbbca91c79b1c6e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 26 Jun 2018 20:56:54 +0900 Subject: [PATCH 199/382] ata: Fix ZBC_OUT command block check The block (LBA) specified must not exceed the last addressable LBA, which is dev->nr_sectors - 1. So fix the correct check is "if (block >= dev->n_sectors)" and not "if (block > dev->n_sectords)". Additionally, the asc/ascq to return for an LBA that is not a zone start LBA should be ILLEGAL REQUEST, regardless if the bad LBA is out of range. Reported-by: David Butterfield Signed-off-by: Damien Le Moal Cc: stable@vger.kernel.org Signed-off-by: Tejun Heo --- drivers/ata/libata-scsi.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 6a91d04351d9..a5543751f446 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3805,8 +3805,13 @@ static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc) */ goto invalid_param_len; } - if (block > dev->n_sectors) - goto out_of_range; + if (block >= dev->n_sectors) { + /* + * Block must be a valid zone ID (a zone start LBA). + */ + fp = 2; + goto invalid_fld; + } all = cdb[14] & 0x1; @@ -3837,10 +3842,6 @@ static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc) invalid_fld: ata_scsi_set_invalid_field(qc->dev, scmd, fp, 0xff); return 1; - out_of_range: - /* "Logical Block Address out of range" */ - ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x21, 0x00); - return 1; invalid_param_len: /* "Parameter list length error" */ ata_scsi_set_sense(qc->dev, scmd, ILLEGAL_REQUEST, 0x1a, 0x0); From 6edf1d4cb0acde3a0a5dac849f33031bd7abb7b1 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 26 Jun 2018 20:56:55 +0900 Subject: [PATCH 200/382] ata: Fix ZBC_OUT all bit handling If the ALL bit is set in the ZBC_OUT command, the command zone ID field (block) should be ignored. Reported-by: David Butterfield Signed-off-by: Damien Le Moal Cc: stable@vger.kernel.org Signed-off-by: Tejun Heo --- drivers/ata/libata-scsi.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index a5543751f446..aad1b01447de 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3805,7 +3805,14 @@ static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc) */ goto invalid_param_len; } - if (block >= dev->n_sectors) { + + all = cdb[14] & 0x1; + if (all) { + /* + * Ignore the block address (zone ID) as defined by ZBC. + */ + block = 0; + } else if (block >= dev->n_sectors) { /* * Block must be a valid zone ID (a zone start LBA). */ @@ -3813,8 +3820,6 @@ static unsigned int ata_scsi_zbc_out_xlat(struct ata_queued_cmd *qc) goto invalid_fld; } - all = cdb[14] & 0x1; - if (ata_ncq_enabled(qc->dev) && ata_fpdma_zac_mgmt_out_supported(qc->dev)) { tf->protocol = ATA_PROT_NCQ_NODATA; From 718b5406cd76f1aa6434311241b7febf0e8571ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Fri, 29 Jun 2018 16:27:10 +0200 Subject: [PATCH 201/382] drm: Use kvzalloc for allocating blob property memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The property size may be controlled by userspace, can be large (I've seen failure with order 4, i.e. 16 pages / 64 KB) and doesn't need to be physically contiguous. Signed-off-by: Michel Dänzer Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20180629142710.2069-1-michel@daenzer.net Cc: stable@vger.kernel.org --- drivers/gpu/drm/drm_property.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c index 1f8031e30f53..cdb10f885a4f 100644 --- a/drivers/gpu/drm/drm_property.c +++ b/drivers/gpu/drm/drm_property.c @@ -532,7 +532,7 @@ static void drm_property_free_blob(struct kref *kref) drm_mode_object_unregister(blob->dev, &blob->base); - kfree(blob); + kvfree(blob); } /** @@ -559,7 +559,7 @@ drm_property_create_blob(struct drm_device *dev, size_t length, if (!length || length > ULONG_MAX - sizeof(struct drm_property_blob)) return ERR_PTR(-EINVAL); - blob = kzalloc(sizeof(struct drm_property_blob)+length, GFP_KERNEL); + blob = kvzalloc(sizeof(struct drm_property_blob)+length, GFP_KERNEL); if (!blob) return ERR_PTR(-ENOMEM); @@ -576,7 +576,7 @@ drm_property_create_blob(struct drm_device *dev, size_t length, ret = __drm_mode_object_add(dev, &blob->base, DRM_MODE_OBJECT_BLOB, true, drm_property_free_blob); if (ret) { - kfree(blob); + kvfree(blob); return ERR_PTR(-EINVAL); } From 0859df22ab7cfb3ad2df2caed76cadce6ac33a80 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 28 Jun 2018 12:38:53 -0500 Subject: [PATCH 202/382] drm/amdgpu: fix swapped emit_ib_size in vce3 The phys and vm versions had the values swapped. Reviewed-by: Junwei Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 0999c843f623..a71b97519cc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -900,7 +900,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = { .emit_frame_size = 4 + /* vce_v3_0_emit_pipeline_sync */ 6, /* amdgpu_vce_ring_emit_fence x1 no user fence */ - .emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */ + .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */ .emit_ib = amdgpu_vce_ring_emit_ib, .emit_fence = amdgpu_vce_ring_emit_fence, .test_ring = amdgpu_vce_ring_test_ring, @@ -924,7 +924,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_vm_funcs = { 6 + /* vce_v3_0_emit_vm_flush */ 4 + /* vce_v3_0_emit_pipeline_sync */ 6 + 6, /* amdgpu_vce_ring_emit_fence x2 vm fence */ - .emit_ib_size = 4, /* amdgpu_vce_ring_emit_ib */ + .emit_ib_size = 5, /* vce_v3_0_ring_emit_ib */ .emit_ib = vce_v3_0_ring_emit_ib, .emit_vm_flush = vce_v3_0_emit_vm_flush, .emit_pipeline_sync = vce_v3_0_emit_pipeline_sync, From c3d0280be8d5c6de9e3848e280493f561799bb67 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 28 Jun 2018 12:44:25 -0500 Subject: [PATCH 203/382] drm/amdgpu/pm: fix display count in non-DC path new_active_crtcs is a bitmask, new_active_crtc_count is the actual count. Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index b455da487782..fc818b4d849c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -1882,7 +1882,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) if (!amdgpu_device_has_dc_support(adev)) { mutex_lock(&adev->pm.mutex); amdgpu_dpm_get_active_displays(adev); - adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtcs; + adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtc_count; adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev); adev->pm.pm_display_cfg.min_vblank_time = amdgpu_dpm_get_vblank_time(adev); /* we have issues with mclk switching with refresh rates over 120 hz on the non-DC code. */ From d55bac2754476624f23bdf3b908d117f3cdf469b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 22 Jun 2018 13:03:07 +0200 Subject: [PATCH 204/382] ata: Remove depends on HAS_DMA in case of platform dependency Remove dependencies on HAS_DMA where a Kconfig symbol depends on another symbol that implies HAS_DMA, and, optionally, on "|| COMPILE_TEST". In most cases this other symbol is an architecture or platform specific symbol, or PCI. Generic symbols and drivers without platform dependencies keep their dependencies on HAS_DMA, to prevent compiling subsystems or drivers that cannot work anyway. This simplifies the dependencies, and allows to improve compile-testing. Signed-off-by: Geert Uytterhoeven Reviewed-by: Mark Brown Acked-by: Robin Murphy Signed-off-by: Tejun Heo --- drivers/ata/Kconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 2b16e7c8fff3..39b181d6bd0d 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -398,7 +398,6 @@ config SATA_DWC_VDEBUG config SATA_HIGHBANK tristate "Calxeda Highbank SATA support" - depends on HAS_DMA depends on ARCH_HIGHBANK || COMPILE_TEST help This option enables support for the Calxeda Highbank SoC's @@ -408,7 +407,6 @@ config SATA_HIGHBANK config SATA_MV tristate "Marvell SATA support" - depends on HAS_DMA depends on PCI || ARCH_DOVE || ARCH_MV78XX0 || \ ARCH_MVEBU || ARCH_ORION5X || COMPILE_TEST select GENERIC_PHY From 30998033f62a9915e0f884d680569a39bc9ce133 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 29 Jun 2018 17:17:57 +0100 Subject: [PATCH 205/382] ACPI / PPTT: use ACPI ID whenever ACPI_PPTT_ACPI_PROCESSOR_ID_VALID is set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, we use the ACPI processor ID only for the leaf/processor nodes as the specification states it must match the value of the ACPI processor ID field in the processor’s entry in the MADT. However, if a PPTT structure represents a processors group, it matches a processor container UID in the namespace and the ACPI_PPTT_ACPI_PROCESSOR_ID_VALID flag indicates whether the ACPI processor ID is valid. Let's use UID whenever ACPI_PPTT_ACPI_PROCESSOR_ID_VALID is set to be consistent instead of using table offset as it's currently done for non-leaf nodes. Fixes: 2bd00bcd73e5 (ACPI/PPTT: Add Processor Properties Topology Table parsing) Signed-off-by: Sudeep Holla Acked-by: Jeremy Linton [ rjw: Changelog (minor) ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pptt.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c index e5ea1974d1e3..d1e26cb599bf 100644 --- a/drivers/acpi/pptt.c +++ b/drivers/acpi/pptt.c @@ -481,8 +481,14 @@ static int topology_get_acpi_cpu_tag(struct acpi_table_header *table, if (cpu_node) { cpu_node = acpi_find_processor_package_id(table, cpu_node, level, flag); - /* Only the first level has a guaranteed id */ - if (level == 0) + /* + * As per specification if the processor structure represents + * an actual processor, then ACPI processor ID must be valid. + * For processor containers ACPI_PPTT_ACPI_PROCESSOR_ID_VALID + * should be set if the UID is valid + */ + if (level == 0 || + cpu_node->flags & ACPI_PPTT_ACPI_PROCESSOR_ID_VALID) return cpu_node->acpi_processor_id; return ACPI_PTR_DIFF(cpu_node, table); } From a17712c8e4be4fa5404d20e9cd3b2b21eae7bc56 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Mon, 2 Jul 2018 18:45:18 -0400 Subject: [PATCH 206/382] ext4: check superblock mapped prior to committing This patch attempts to close a hole leading to a BUG seen with hot removals during writes [1]. A block device (NVME namespace in this test case) is formatted to EXT4 without partitions. It's mounted and write I/O is run to a file, then the device is hot removed from the slot. The superblock attempts to be written to the drive which is no longer present. The typical chain of events leading to the BUG: ext4_commit_super() __sync_dirty_buffer() submit_bh() submit_bh_wbc() BUG_ON(!buffer_mapped(bh)); This fix checks for the superblock's buffer head being mapped prior to syncing. [1] https://www.spinics.net/lists/linux-ext4/msg56527.html Signed-off-by: Jon Derrick Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/super.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index b37b00befd65..f36c9f774e83 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4808,6 +4808,14 @@ static int ext4_commit_super(struct super_block *sb, int sync) if (!sbh || block_device_ejected(sb)) return error; + + /* + * The superblock bh should be mapped, but it might not be if the + * device was hot-removed. Not much we can do but fail the I/O. + */ + if (!buffer_mapped(sbh)) + return error; + /* * If the file system is mounted read-only, don't update the * superblock write time. This avoids updating the superblock From 60feca8f59a0bbdee9545ab52775cad0171151ff Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Mon, 2 Jul 2018 09:27:47 +0800 Subject: [PATCH 207/382] mmc: core: cd_label must be last entry of mmc_gpio struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bfd694d5e21c ("mmc: core: Add tunable delay before detecting card after card is inserted") adds "u32 cd_debounce_delay_ms" to the last of mmc_gpio struct and cause "char cd_label[0]" NOT work as string pointer of card detect label, when "cat /proc/interrupts", the devname for card detect gpio is incorrect as below: 144: 0 gpio-mxc 22 Edge ▒ 161: 0 gpio-mxc 7 Edge ▒ Move the cd_label field down to fix this, and drop the zero from the array size to prevent future similar bugs, the result is correct as below: 144: 0 gpio-mxc 22 Edge 2198000.mmc cd 161: 0 gpio-mxc 7 Edge 2190000.mmc cd Fixes: bfd694d5e21c ("mmc: core: Add tunable delay before detecting card after card is inserted") Signed-off-by: Anson Huang Tested-by: Fabio Estevam Signed-off-by: Ulf Hansson --- drivers/mmc/core/slot-gpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c index ef05e0039378..2a833686784b 100644 --- a/drivers/mmc/core/slot-gpio.c +++ b/drivers/mmc/core/slot-gpio.c @@ -27,8 +27,8 @@ struct mmc_gpio { bool override_cd_active_level; irqreturn_t (*cd_gpio_isr)(int irq, void *dev_id); char *ro_label; - char cd_label[0]; u32 cd_debounce_delay_ms; + char cd_label[]; }; static irqreturn_t mmc_gpio_cd_irqt(int irq, void *dev_id) From 5d1c115241e4aa0034ddf44693c6bd2e89e10831 Mon Sep 17 00:00:00 2001 From: Robert Nelson Date: Mon, 2 Jul 2018 15:21:43 -0500 Subject: [PATCH 208/382] ARM: dts: am335x-bone-common: Fix mmc0 Write Protect Mainline Commit b74c2b21e1551018af53ee6c1efc051dfce2d788 added the pinmux settings for mmc1, however this pin (0x9a0) is routed to P9_42 on the cape header. Thus any BeagleBone cape that utilizes P9_42 triggers mmc0's Write Protect. Fixes: b74c2b21e155 ("ARM: dts: am33xx: Add pinmux data for mmc1 in am335x-evm, evmsk and beaglebone") Signed-off-by: Robert Nelson CC: Faiz Abbas CC: Tony Lindgren CC: Jason Kridner CC: Drew Fustini Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-bone-common.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi index f9e8667f5886..73b514dddf65 100644 --- a/arch/arm/boot/dts/am335x-bone-common.dtsi +++ b/arch/arm/boot/dts/am335x-bone-common.dtsi @@ -168,7 +168,6 @@ AM33XX_IOPAD(0x8f4, PIN_INPUT_PULLUP | MUX_MODE0) /* mmc0_dat2.mmc0_dat2 */ AM33XX_IOPAD(0x8f0, PIN_INPUT_PULLUP | MUX_MODE0) /* mmc0_dat3.mmc0_dat3 */ AM33XX_IOPAD(0x904, PIN_INPUT_PULLUP | MUX_MODE0) /* mmc0_cmd.mmc0_cmd */ AM33XX_IOPAD(0x900, PIN_INPUT_PULLUP | MUX_MODE0) /* mmc0_clk.mmc0_clk */ - AM33XX_IOPAD(0x9a0, PIN_INPUT | MUX_MODE4) /* mcasp0_aclkr.mmc0_sdwp */ >; }; From d9c0ffcabd6aae7ff1e34e8078354c13bb9f1183 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 28 Jun 2018 18:29:41 +0200 Subject: [PATCH 209/382] sched/nohz: Skip remote tick on idle task entirely Some people have reported that the warning in sched_tick_remote() occasionally triggers, especially in favour of some RCU-Torture pressure: WARNING: CPU: 11 PID: 906 at kernel/sched/core.c:3138 sched_tick_remote+0xb6/0xc0 Modules linked in: CPU: 11 PID: 906 Comm: kworker/u32:3 Not tainted 4.18.0-rc2+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Workqueue: events_unbound sched_tick_remote RIP: 0010:sched_tick_remote+0xb6/0xc0 Code: e8 0f 06 b8 00 c6 03 00 fb eb 9d 8b 43 04 85 c0 75 8d 48 8b 83 e0 0a 00 00 48 85 c0 75 81 eb 88 48 89 df e8 bc fe ff ff eb aa <0f> 0b eb +c5 66 0f 1f 44 00 00 bf 17 00 00 00 e8 b6 2e fe ff 0f b6 Call Trace: process_one_work+0x1df/0x3b0 worker_thread+0x44/0x3d0 kthread+0xf3/0x130 ? set_worker_desc+0xb0/0xb0 ? kthread_create_worker_on_cpu+0x70/0x70 ret_from_fork+0x35/0x40 This happens when the remote tick applies on an idle task. Usually the idle_cpu() check avoids that, but it is performed before we lock the runqueue and it is therefore racy. It was intended to be that way in order to prevent from useless runqueue locks since idle task tick callback is a no-op. Now if the racy check slips out of our hands and we end up remotely ticking an idle task, the empty task_tick_idle() is harmless. Still it won't pass the WARN_ON_ONCE() test that ensures rq_clock_task() is not too far from curr->se.exec_start because update_curr_idle() doesn't update the exec_start value like other scheduler policies. Hence the reported false positive. So let's have another check, while the rq is locked, to make sure we don't remote tick on an idle task. The lockless idle_cpu() still applies to avoid unecessary rq lock contention. Reported-by: Jacek Tomaka Reported-by: Paul E. McKenney Reported-by: Anna-Maria Gleixner Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1530203381-31234-1-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 78d8facba456..22fce36426c0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3113,7 +3113,9 @@ static void sched_tick_remote(struct work_struct *work) struct tick_work *twork = container_of(dwork, struct tick_work, work); int cpu = twork->cpu; struct rq *rq = cpu_rq(cpu); + struct task_struct *curr; struct rq_flags rf; + u64 delta; /* * Handle the tick only if it appears the remote CPU is running in full @@ -3122,24 +3124,28 @@ static void sched_tick_remote(struct work_struct *work) * statistics and checks timeslices in a time-independent way, regardless * of when exactly it is running. */ - if (!idle_cpu(cpu) && tick_nohz_tick_stopped_cpu(cpu)) { - struct task_struct *curr; - u64 delta; + if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu)) + goto out_requeue; - rq_lock_irq(rq, &rf); - update_rq_clock(rq); - curr = rq->curr; - delta = rq_clock_task(rq) - curr->se.exec_start; + rq_lock_irq(rq, &rf); + curr = rq->curr; + if (is_idle_task(curr)) + goto out_unlock; - /* - * Make sure the next tick runs within a reasonable - * amount of time. - */ - WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); - curr->sched_class->task_tick(rq, curr, 0); - rq_unlock_irq(rq, &rf); - } + update_rq_clock(rq); + delta = rq_clock_task(rq) - curr->se.exec_start; + /* + * Make sure the next tick runs within a reasonable + * amount of time. + */ + WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); + curr->sched_class->task_tick(rq, curr, 0); + +out_unlock: + rq_unlock_irq(rq, &rf); + +out_requeue: /* * Run the remote tick once per second (1Hz). This arbitrary * frequency is large enough to avoid overload but short enough From 296b2ffe7fa9ed756c41415c6b1512bc4ad687b1 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Tue, 26 Jun 2018 15:53:22 +0200 Subject: [PATCH 210/382] sched/rt: Fix call to cpufreq_update_util() With commit: 8f111bc357aa ("cpufreq/schedutil: Rewrite CPUFREQ_RT support") the schedutil governor uses rq->rt.rt_nr_running to detect whether an RT task is currently running on the CPU and to set frequency to max if necessary. cpufreq_update_util() is called in enqueue/dequeue_top_rt_rq() but rq->rt.rt_nr_running has not been updated yet when dequeue_top_rt_rq() is called so schedutil still considers that an RT task is running when the last task is dequeued. The update of rq->rt.rt_nr_running happens later in dequeue_rt_stack(). In fact, we can take advantage of the sequence that the dequeue then re-enqueue rt entities when a rt task is enqueued or dequeued; As a result enqueue_top_rt_rq() is always called when a task is enqueued or dequeued and also when groups are throttled or unthrottled. The only place that not use enqueue_top_rt_rq() is when root rt_rq is throttled. Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: efault@gmx.de Cc: juri.lelli@redhat.com Cc: patrick.bellasi@arm.com Cc: viresh.kumar@linaro.org Fixes: 8f111bc357aa ('cpufreq/schedutil: Rewrite CPUFREQ_RT support') Link: http://lkml.kernel.org/r/1530021202-21695-1-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar --- kernel/sched/cpufreq_schedutil.c | 2 +- kernel/sched/rt.c | 16 ++++++++++------ kernel/sched/sched.h | 5 +++++ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 3cde46483f0a..c907fde01eaa 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -192,7 +192,7 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu) { struct rq *rq = cpu_rq(sg_cpu->cpu); - if (rq->rt.rt_nr_running) + if (rt_rq_is_runnable(&rq->rt)) return sg_cpu->max; /* diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 47556b0c9a95..572567078b60 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -508,8 +508,11 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) rt_se = rt_rq->tg->rt_se[cpu]; - if (!rt_se) + if (!rt_se) { dequeue_top_rt_rq(rt_rq); + /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ + cpufreq_update_util(rq_of_rt_rq(rt_rq), 0); + } else if (on_rt_rq(rt_se)) dequeue_rt_entity(rt_se, 0); } @@ -1001,8 +1004,6 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq) sub_nr_running(rq, rt_rq->rt_nr_running); rt_rq->rt_queued = 0; - /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ - cpufreq_update_util(rq, 0); } static void @@ -1014,11 +1015,14 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq) if (rt_rq->rt_queued) return; - if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running) + + if (rt_rq_throttled(rt_rq)) return; - add_nr_running(rq, rt_rq->rt_nr_running); - rt_rq->rt_queued = 1; + if (rt_rq->rt_nr_running) { + add_nr_running(rq, rt_rq->rt_nr_running); + rt_rq->rt_queued = 1; + } /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ cpufreq_update_util(rq, 0); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6601baf2361c..27ddec334601 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -609,6 +609,11 @@ struct rt_rq { #endif }; +static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq) +{ + return rt_rq->rt_queued && rt_rq->rt_nr_running; +} + /* Deadline class' related fields in a runqueue */ struct dl_rq { /* runqueue is an rbtree, ordered by deadline */ From 512ac999d2755d2b7109e996a76b6fb8b888631d Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 20 Jun 2018 18:18:33 +0800 Subject: [PATCH 211/382] sched/fair: Fix bandwidth timer clock drift condition I noticed that cgroup task groups constantly get throttled even if they have low CPU usage, this causes some jitters on the response time to some of our business containers when enabling CPU quotas. It's very simple to reproduce: mkdir /sys/fs/cgroup/cpu/test cd /sys/fs/cgroup/cpu/test echo 100000 > cpu.cfs_quota_us echo $$ > tasks then repeat: cat cpu.stat | grep nr_throttled # nr_throttled will increase steadily After some analysis, we found that cfs_rq::runtime_remaining will be cleared by expire_cfs_rq_runtime() due to two equal but stale "cfs_{b|q}->runtime_expires" after period timer is re-armed. The current condition to judge clock drift in expire_cfs_rq_runtime() is wrong, the two runtime_expires are actually the same when clock drift happens, so this condtion can never hit. The orginal design was correctly done by this commit: a9cf55b28610 ("sched: Expire invalid runtime") ... but was changed to be the current implementation due to its locking bug. This patch introduces another way, it adds a new field in both structures cfs_rq and cfs_bandwidth to record the expiration update sequence, and uses them to figure out if clock drift happens (true if they are equal). Signed-off-by: Xunlei Pang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ben Segall Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 51f2176d74ac ("sched/fair: Fix unlocked reads of some cfs_b->quota/period") Link: http://lkml.kernel.org/r/20180620101834.24455-1-xlpang@linux.alibaba.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 14 ++++++++------ kernel/sched/sched.h | 6 ++++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1866e64792a7..791707c56886 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4590,6 +4590,7 @@ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) now = sched_clock_cpu(smp_processor_id()); cfs_b->runtime = cfs_b->quota; cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period); + cfs_b->expires_seq++; } static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) @@ -4612,6 +4613,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) struct task_group *tg = cfs_rq->tg; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); u64 amount = 0, min_amount, expires; + int expires_seq; /* note: this is a positive sum as runtime_remaining <= 0 */ min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining; @@ -4628,6 +4630,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) cfs_b->idle = 0; } } + expires_seq = cfs_b->expires_seq; expires = cfs_b->runtime_expires; raw_spin_unlock(&cfs_b->lock); @@ -4637,8 +4640,10 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) * spread between our sched_clock and the one on which runtime was * issued. */ - if ((s64)(expires - cfs_rq->runtime_expires) > 0) + if (cfs_rq->expires_seq != expires_seq) { + cfs_rq->expires_seq = expires_seq; cfs_rq->runtime_expires = expires; + } return cfs_rq->runtime_remaining > 0; } @@ -4664,12 +4669,9 @@ static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq) * has not truly expired. * * Fortunately we can check determine whether this the case by checking - * whether the global deadline has advanced. It is valid to compare - * cfs_b->runtime_expires without any locks since we only care about - * exact equality, so a partial write will still work. + * whether the global deadline(cfs_b->expires_seq) has advanced. */ - - if (cfs_rq->runtime_expires != cfs_b->runtime_expires) { + if (cfs_rq->expires_seq == cfs_b->expires_seq) { /* extend local deadline, drift is bounded above by 2 ticks */ cfs_rq->runtime_expires += TICK_NSEC; } else { diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 27ddec334601..c7742dcc136c 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -334,9 +334,10 @@ struct cfs_bandwidth { u64 runtime; s64 hierarchical_quota; u64 runtime_expires; + int expires_seq; - int idle; - int period_active; + short idle; + short period_active; struct hrtimer period_timer; struct hrtimer slack_timer; struct list_head throttled_cfs_rq; @@ -551,6 +552,7 @@ struct cfs_rq { #ifdef CONFIG_CFS_BANDWIDTH int runtime_enabled; + int expires_seq; u64 runtime_expires; s64 runtime_remaining; From f1d1be8aee6c461652aea8f58bedebaa73d7f4d3 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 20 Jun 2018 18:18:34 +0800 Subject: [PATCH 212/382] sched/fair: Advance global expiration when period timer is restarted When period gets restarted after some idle time, start_cfs_bandwidth() doesn't update the expiration information, expire_cfs_rq_runtime() will see cfs_rq->runtime_expires smaller than rq clock and go to the clock drift logic, wasting needless CPU cycles on the scheduler hot path. Update the global expiration in start_cfs_bandwidth() to avoid frequent expire_cfs_rq_runtime() calls once a new period begins. Signed-off-by: Xunlei Pang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ben Segall Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180620101834.24455-2-xlpang@linux.alibaba.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 791707c56886..840b92ee6f89 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5204,13 +5204,18 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) { + u64 overrun; + lockdep_assert_held(&cfs_b->lock); - if (!cfs_b->period_active) { - cfs_b->period_active = 1; - hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period); - hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED); - } + if (cfs_b->period_active) + return; + + cfs_b->period_active = 1; + overrun = hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period); + cfs_b->runtime_expires += (overrun + 1) * ktime_to_ns(cfs_b->period); + cfs_b->expires_seq++; + hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED); } static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) From 3482d98bbc730758b63a5d1cf41d05ea17481412 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Thu, 14 Jun 2018 12:33:00 +0200 Subject: [PATCH 213/382] sched/util_est: Fix util_est_dequeue() for throttled cfs_rq When a cfs_rq is throttled, parent cfs_rq->nr_running is decreased and everything happens at cfs_rq level. Currently util_est stays unchanged in such case and it keeps accounting the utilization of throttled tasks. This can somewhat make sense as we don't dequeue tasks but only throttled cfs_rq. If a task of another group is enqueued/dequeued and root cfs_rq becomes idle during the dequeue, util_est will be cleared whereas it was accounting util_est of throttled tasks before. So the behavior of util_est is not always the same regarding throttled tasks and depends of side activity. Furthermore, util_est will not be updated when the cfs_rq is unthrottled as everything happens at cfs_rq level. Main results is that util_est will stay null whereas we now have running tasks. We have to wait for the next dequeue/enqueue of the previously throttled tasks to get an up to date util_est. Remove the assumption that cfs_rq's estimated utilization of a CPU is 0 if there is no running task so the util_est of a task remains until the latter is dequeued even if its cfs_rq has been throttled. Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Patrick Bellasi Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 7f65ea42eb00 ("sched/fair: Add util_est on top of PELT") Link: http://lkml.kernel.org/r/1528972380-16268-1-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 840b92ee6f89..2f0a0be4d344 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3982,18 +3982,10 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) if (!sched_feat(UTIL_EST)) return; - /* - * Update root cfs_rq's estimated utilization - * - * If *p is the last task then the root cfs_rq's estimated utilization - * of a CPU is 0 by definition. - */ - ue.enqueued = 0; - if (cfs_rq->nr_running) { - ue.enqueued = cfs_rq->avg.util_est.enqueued; - ue.enqueued -= min_t(unsigned int, ue.enqueued, - (_task_util_est(p) | UTIL_AVG_UNCHANGED)); - } + /* Update root cfs_rq's estimated utilization */ + ue.enqueued = cfs_rq->avg.util_est.enqueued; + ue.enqueued -= min_t(unsigned int, ue.enqueued, + (_task_util_est(p) | UTIL_AVG_UNCHANGED)); WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued); /* From 1cef1150ef40ec52f507436a14230cbc2623299c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 7 Jun 2018 11:45:49 +0200 Subject: [PATCH 214/382] kthread, sched/core: Fix kthread_parkme() (again...) Gaurav reports that commit: 85f1abe0019f ("kthread, sched/wait: Fix kthread_parkme() completion issue") isn't working for him. Because of the following race: > controller Thread CPUHP Thread > takedown_cpu > kthread_park > kthread_parkme > Set KTHREAD_SHOULD_PARK > smpboot_thread_fn > set Task interruptible > > > wake_up_process > if (!(p->state & state)) > goto out; > > Kthread_parkme > SET TASK_PARKED > schedule > raw_spin_lock(&rq->lock) > ttwu_remote > waiting for __task_rq_lock > context_switch > > finish_lock_switch > > > > Case TASK_PARKED > kthread_park_complete > > > SET Running Furthermore, Oleg noticed that the whole scheduler TASK_PARKED handling is buggered because the TASK_DEAD thing is done with preemption disabled, the current code can still complete early on preemption :/ So basically revert that earlier fix and go with a variant of the alternative mentioned in the commit. Promote TASK_PARKED to special state to avoid the store-store issue on task->state leading to the WARN in kthread_unpark() -> __kthread_bind(). But in addition, add wait_task_inactive() to kthread_park() to ensure the task really is PARKED when we return from kthread_park(). This avoids the whole kthread still gets migrated nonsense -- although it would be really good to get this done differently. Reported-by: Gaurav Kohli Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 85f1abe0019f ("kthread, sched/wait: Fix kthread_parkme() completion issue") Signed-off-by: Ingo Molnar --- include/linux/kthread.h | 1 - include/linux/sched.h | 2 +- kernel/kthread.c | 30 ++++++++++++++++++++++++------ kernel/sched/core.c | 31 +++++++++++-------------------- 4 files changed, 36 insertions(+), 28 deletions(-) diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 2803264c512f..c1961761311d 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -62,7 +62,6 @@ void *kthread_probe_data(struct task_struct *k); int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); -void kthread_park_complete(struct task_struct *k); int kthreadd(void *unused); extern struct task_struct *kthreadd_task; diff --git a/include/linux/sched.h b/include/linux/sched.h index 9256118bd40c..43731fe51c97 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -118,7 +118,7 @@ struct task_group; * the comment with set_special_state(). */ #define is_special_task_state(state) \ - ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD)) + ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD)) #define __set_current_state(state_value) \ do { \ diff --git a/kernel/kthread.c b/kernel/kthread.c index 481951bf091d..750cb8082694 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -177,9 +177,20 @@ void *kthread_probe_data(struct task_struct *task) static void __kthread_parkme(struct kthread *self) { for (;;) { - set_current_state(TASK_PARKED); + /* + * TASK_PARKED is a special state; we must serialize against + * possible pending wakeups to avoid store-store collisions on + * task->state. + * + * Such a collision might possibly result in the task state + * changin from TASK_PARKED and us failing the + * wait_task_inactive() in kthread_park(). + */ + set_special_state(TASK_PARKED); if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags)) break; + + complete_all(&self->parked); schedule(); } __set_current_state(TASK_RUNNING); @@ -191,11 +202,6 @@ void kthread_parkme(void) } EXPORT_SYMBOL_GPL(kthread_parkme); -void kthread_park_complete(struct task_struct *k) -{ - complete_all(&to_kthread(k)->parked); -} - static int kthread(void *_create) { /* Copy data: it's on kthread's stack */ @@ -461,6 +467,9 @@ void kthread_unpark(struct task_struct *k) reinit_completion(&kthread->parked); clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + /* + * __kthread_parkme() will either see !SHOULD_PARK or get the wakeup. + */ wake_up_state(k, TASK_PARKED); } EXPORT_SYMBOL_GPL(kthread_unpark); @@ -487,7 +496,16 @@ int kthread_park(struct task_struct *k) set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); if (k != current) { wake_up_process(k); + /* + * Wait for __kthread_parkme() to complete(), this means we + * _will_ have TASK_PARKED and are about to call schedule(). + */ wait_for_completion(&kthread->parked); + /* + * Now wait for that schedule() to complete and the task to + * get scheduled out. + */ + WARN_ON_ONCE(!wait_task_inactive(k, TASK_PARKED)); } return 0; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 22fce36426c0..fe365c9a08e9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7,7 +7,6 @@ */ #include "sched.h" -#include #include #include @@ -2724,28 +2723,20 @@ static struct rq *finish_task_switch(struct task_struct *prev) membarrier_mm_sync_core_before_usermode(mm); mmdrop(mm); } - if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) { - switch (prev_state) { - case TASK_DEAD: - if (prev->sched_class->task_dead) - prev->sched_class->task_dead(prev); + if (unlikely(prev_state == TASK_DEAD)) { + if (prev->sched_class->task_dead) + prev->sched_class->task_dead(prev); - /* - * Remove function-return probe instances associated with this - * task and put them back on the free list. - */ - kprobe_flush_task(prev); + /* + * Remove function-return probe instances associated with this + * task and put them back on the free list. + */ + kprobe_flush_task(prev); - /* Task is done with its stack. */ - put_task_stack(prev); + /* Task is done with its stack. */ + put_task_stack(prev); - put_task_struct(prev); - break; - - case TASK_PARKED: - kthread_park_complete(prev); - break; - } + put_task_struct(prev); } tick_nohz_task_switch(); From 4fb5f58e8d191f7c81637ad81284e4848afb4244 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Mon, 2 Jul 2018 23:49:54 -0700 Subject: [PATCH 215/382] x86/mm/32: Initialize the CR4 shadow before __flush_tlb_all() On 32-bit kernels, __flush_tlb_all() may have read the CR4 shadow before the initialization of CR4 shadow in cpu_init(). Fix it by adding an explicit cr4_init_shadow() call into start_secondary() which is the first function called on non-boot SMP CPUs - ahead of the __flush_tlb_all() call. ( This is somewhat of a layering violation, but start_secondary() does CR4 bootstrap in the PCID case anyway. ) Signed-off-by: Zhenzhong Duan Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: "H. Peter Anvin" Link: http://lkml.kernel.org/r/b07b6ae9-4b57-4b40-b9bc-50c2c67f1d91@default Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c2f7d1d2a5c3..db9656e13ea0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -221,6 +221,11 @@ static void notrace start_secondary(void *unused) #ifdef CONFIG_X86_32 /* switch away from the initial page table */ load_cr3(swapper_pg_dir); + /* + * Initialize the CR4 shadow before doing anything that could + * try to read it. + */ + cr4_init_shadow(); __flush_tlb_all(); #endif load_current_idt(); From 0144eb204cdcdf09a76794b4a294291388e739bc Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Mon, 25 Jun 2018 07:41:33 -0500 Subject: [PATCH 216/382] ARM: dts: omap3: Fix am3517 mdio and emac clock references A previous patch removed OMAP clock aliases that were perceived to be unnecessary. Unfortunately, it broke the ethernet on the am3517-evm. This patch enables the MDIO clock and EMAC clock. Fixes: 0ed266d7ae5e ("clk: ti: omap3: cleanup unnecessary clock aliases") Cc: stable@vger.kernel.org #4.16+ Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am3517.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/am3517.dtsi b/arch/arm/boot/dts/am3517.dtsi index ca294914bbb1..4b6062b631b1 100644 --- a/arch/arm/boot/dts/am3517.dtsi +++ b/arch/arm/boot/dts/am3517.dtsi @@ -39,6 +39,8 @@ davinci_emac: ethernet@5c000000 { ti,davinci-ctrl-ram-size = <0x2000>; ti,davinci-rmii-en = /bits/ 8 <1>; local-mac-address = [ 00 00 00 00 00 00 ]; + clocks = <&emac_ick>; + clock-names = "ick"; }; davinci_mdio: ethernet@5c030000 { @@ -49,6 +51,8 @@ davinci_mdio: ethernet@5c030000 { bus_freq = <1000000>; #address-cells = <1>; #size-cells = <0>; + clocks = <&emac_fck>; + clock-names = "fck"; }; uart4: serial@4809e000 { From 845d382bb15c6e7dc5026c0ff919c5b13fc7e11b Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 2 Jul 2018 16:35:53 -0500 Subject: [PATCH 217/382] x86/bugs: Update when to check for the LS_CFG SSBD mitigation If either the X86_FEATURE_AMD_SSBD or X86_FEATURE_VIRT_SSBD features are present, then there is no need to perform the check for the LS_CFG SSBD mitigation support. Signed-off-by: Tom Lendacky Cc: Borislav Petkov Cc: David Woodhouse Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180702213553.29202.21089.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 082d7875cef8..38915fbfae73 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -543,7 +543,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) nodes_per_socket = ((value >> 3) & 7) + 1; } - if (c->x86 >= 0x15 && c->x86 <= 0x17) { + if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && + !boot_cpu_has(X86_FEATURE_VIRT_SSBD) && + c->x86 >= 0x15 && c->x86 <= 0x17) { unsigned int bit; switch (c->x86) { From 612bc3b3d4be749f73a513a17d9b3ee1330d3487 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 2 Jul 2018 16:36:02 -0500 Subject: [PATCH 218/382] x86/bugs: Fix the AMD SSBD usage of the SPEC_CTRL MSR On AMD, the presence of the MSR_SPEC_CTRL feature does not imply that the SSBD mitigation support should use the SPEC_CTRL MSR. Other features could have caused the MSR_SPEC_CTRL feature to be set, while a different SSBD mitigation option is in place. Update the SSBD support to check for the actual SSBD features that will use the SPEC_CTRL MSR. Signed-off-by: Tom Lendacky Cc: Borislav Petkov Cc: David Woodhouse Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 6ac2f49edb1e ("x86/bugs: Add AMD's SPEC_CTRL MSR usage") Link: http://lkml.kernel.org/r/20180702213602.29202.33151.stgit@tlendack-t1.amdoffice.net Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/bugs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 404df26b7de8..5c0ea39311fe 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -155,7 +155,8 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; /* SSBD controlled in MSR_SPEC_CTRL */ - if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) hostval |= ssbd_tif_to_spec_ctrl(ti->flags); if (hostval != guestval) { @@ -533,9 +534,10 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may * use a completely different MSR and bit dependent on family. */ - if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) && + !static_cpu_has(X86_FEATURE_AMD_SSBD)) { x86_amd_ssb_disable(); - else { + } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); From aa7eee8a143a7e8b530eb1e75fb86cae793d1e21 Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Sat, 30 Jun 2018 16:24:21 +0530 Subject: [PATCH 219/382] mtd: spi-nor: cadence-quadspi: Fix direct mode write timeouts Sometimes when writing large size files to flash in direct/memory mapped mode, it is seen that flash write enable command times out with error: [ 503.146293] cadence-qspi 47040000.ospi: Flash command execution timed out. This is because, we need to make sure previous direct write operation is complete by polling for IDLE bit in CONFIG_REG before starting the next operation. Fix this by polling for IDLE bit after memory mapped write. Fixes: a27f2eaf2b27 ("mtd: spi-nor: cadence-quadspi: Add support for direct access mode") Cc: stable@vger.kernel.org Signed-off-by: Vignesh R Reviewed-by: Marek Vasut Signed-off-by: Boris Brezillon --- drivers/mtd/spi-nor/cadence-quadspi.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c index c3f7aaa5d18f..d7e10b36a0b9 100644 --- a/drivers/mtd/spi-nor/cadence-quadspi.c +++ b/drivers/mtd/spi-nor/cadence-quadspi.c @@ -926,10 +926,12 @@ static ssize_t cqspi_write(struct spi_nor *nor, loff_t to, if (ret) return ret; - if (f_pdata->use_direct_mode) + if (f_pdata->use_direct_mode) { memcpy_toio(cqspi->ahb_base + to, buf, len); - else + ret = cqspi_wait_idle(cqspi); + } else { ret = cqspi_indirect_write_execute(nor, to, buf, len); + } if (ret) return ret; From d03db2bc26f0e4a6849ad649a09c9c73fccdc656 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 21 Jun 2018 09:23:22 -0700 Subject: [PATCH 220/382] compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations Functions marked extern inline do not emit an externally visible function when the gnu89 C standard is used. Some KBUILD Makefiles overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without an explicit C standard specified, the default is gnu11. Since c99, the semantics of extern inline have changed such that an externally visible function is always emitted. This can lead to multiple definition errors of extern inline functions at link time of compilation units whose build files have removed an explicit C standard compiler flag for users of GCC 5.1+ or Clang. Suggested-by: Arnd Bergmann Suggested-by: H. Peter Anvin Suggested-by: Joe Perches Signed-off-by: Nick Desaulniers Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@redhat.com Cc: akataria@vmware.com Cc: akpm@linux-foundation.org Cc: andrea.parri@amarulasolutions.com Cc: ard.biesheuvel@linaro.org Cc: aryabinin@virtuozzo.com Cc: astrachan@google.com Cc: boris.ostrovsky@oracle.com Cc: brijesh.singh@amd.com Cc: caoj.fnst@cn.fujitsu.com Cc: geert@linux-m68k.org Cc: ghackmann@google.com Cc: gregkh@linuxfoundation.org Cc: jan.kiszka@siemens.com Cc: jarkko.sakkinen@linux.intel.com Cc: jpoimboe@redhat.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: kstewart@linuxfoundation.org Cc: linux-efi@vger.kernel.org Cc: linux-kbuild@vger.kernel.org Cc: manojgupta@google.com Cc: mawilcox@microsoft.com Cc: michal.lkml@markovi.net Cc: mjg59@google.com Cc: mka@chromium.org Cc: pombredanne@nexb.com Cc: rientjes@google.com Cc: rostedt@goodmis.org Cc: sedat.dilek@gmail.com Cc: thomas.lendacky@amd.com Cc: tstellar@redhat.com Cc: tweek@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.deacon@arm.com Cc: yamada.masahiro@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulniers@google.com Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc.h | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index fd282c7d3e5e..573f5a7d42d4 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -65,6 +65,18 @@ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) #endif +/* + * Feature detection for gnu_inline (gnu89 extern inline semantics). Either + * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics, + * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not + * defined so the gnu89 semantics are the default. + */ +#ifdef __GNUC_STDC_INLINE__ +# define __gnu_inline __attribute__((gnu_inline)) +#else +# define __gnu_inline +#endif + /* * Force always-inline if the user requests it so via the .config, * or if gcc is too old. @@ -72,19 +84,22 @@ * -Wunused-function. This turns out to avoid the need for complex #ifdef * directives. Suppress the warning in clang as well by using "unused" * function attribute, which is redundant but not harmful for gcc. + * Prefer gnu_inline, so that extern inline functions do not emit an + * externally visible function. This makes extern inline behave as per gnu89 + * semantics rather than c99. This prevents multiple symbol definition errors + * of extern inline functions at link time. + * A lot of inline functions can cause havoc with function tracing. */ #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) -#define inline inline __attribute__((always_inline,unused)) notrace -#define __inline__ __inline__ __attribute__((always_inline,unused)) notrace -#define __inline __inline __attribute__((always_inline,unused)) notrace +#define inline \ + inline __attribute__((always_inline, unused)) notrace __gnu_inline #else -/* A lot of inline functions can cause havoc with function tracing */ -#define inline inline __attribute__((unused)) notrace -#define __inline__ __inline__ __attribute__((unused)) notrace -#define __inline __inline __attribute__((unused)) notrace +#define inline inline __attribute__((unused)) notrace __gnu_inline #endif +#define __inline__ inline +#define __inline inline #define __always_inline inline __attribute__((always_inline)) #define noinline __attribute__((noinline)) From 0e2e160033283e20f688d8bad5b89460cc5bfcc4 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 21 Jun 2018 09:23:23 -0700 Subject: [PATCH 221/382] x86/asm: Add _ASM_ARG* constants for argument registers to i386 and x86-64 uses different registers for arguments; make them available so we don't have to #ifdef in the actual code. Native size and specified size (q, l, w, b) versions are provided. Signed-off-by: H. Peter Anvin Signed-off-by: Nick Desaulniers Reviewed-by: Sedat Dilek Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@redhat.com Cc: akataria@vmware.com Cc: akpm@linux-foundation.org Cc: andrea.parri@amarulasolutions.com Cc: ard.biesheuvel@linaro.org Cc: arnd@arndb.de Cc: aryabinin@virtuozzo.com Cc: astrachan@google.com Cc: boris.ostrovsky@oracle.com Cc: brijesh.singh@amd.com Cc: caoj.fnst@cn.fujitsu.com Cc: geert@linux-m68k.org Cc: ghackmann@google.com Cc: gregkh@linuxfoundation.org Cc: jan.kiszka@siemens.com Cc: jarkko.sakkinen@linux.intel.com Cc: joe@perches.com Cc: jpoimboe@redhat.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: kstewart@linuxfoundation.org Cc: linux-efi@vger.kernel.org Cc: linux-kbuild@vger.kernel.org Cc: manojgupta@google.com Cc: mawilcox@microsoft.com Cc: michal.lkml@markovi.net Cc: mjg59@google.com Cc: mka@chromium.org Cc: pombredanne@nexb.com Cc: rientjes@google.com Cc: rostedt@goodmis.org Cc: thomas.lendacky@amd.com Cc: tstellar@redhat.com Cc: tweek@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.deacon@arm.com Cc: yamada.masahiro@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-3-ndesaulniers@google.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/asm.h | 59 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 219faaec51df..990770f9e76b 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -46,6 +46,65 @@ #define _ASM_SI __ASM_REG(si) #define _ASM_DI __ASM_REG(di) +#ifndef __x86_64__ +/* 32 bit */ + +#define _ASM_ARG1 _ASM_AX +#define _ASM_ARG2 _ASM_DX +#define _ASM_ARG3 _ASM_CX + +#define _ASM_ARG1L eax +#define _ASM_ARG2L edx +#define _ASM_ARG3L ecx + +#define _ASM_ARG1W ax +#define _ASM_ARG2W dx +#define _ASM_ARG3W cx + +#define _ASM_ARG1B al +#define _ASM_ARG2B dl +#define _ASM_ARG3B cl + +#else +/* 64 bit */ + +#define _ASM_ARG1 _ASM_DI +#define _ASM_ARG2 _ASM_SI +#define _ASM_ARG3 _ASM_DX +#define _ASM_ARG4 _ASM_CX +#define _ASM_ARG5 r8 +#define _ASM_ARG6 r9 + +#define _ASM_ARG1Q rdi +#define _ASM_ARG2Q rsi +#define _ASM_ARG3Q rdx +#define _ASM_ARG4Q rcx +#define _ASM_ARG5Q r8 +#define _ASM_ARG6Q r9 + +#define _ASM_ARG1L edi +#define _ASM_ARG2L esi +#define _ASM_ARG3L edx +#define _ASM_ARG4L ecx +#define _ASM_ARG5L r8d +#define _ASM_ARG6L r9d + +#define _ASM_ARG1W di +#define _ASM_ARG2W si +#define _ASM_ARG3W dx +#define _ASM_ARG4W cx +#define _ASM_ARG5W r8w +#define _ASM_ARG6W r9w + +#define _ASM_ARG1B dil +#define _ASM_ARG2B sil +#define _ASM_ARG3B dl +#define _ASM_ARG4B cl +#define _ASM_ARG5B r8b +#define _ASM_ARG6B r9b + +#endif + /* * Macros to generate condition code outputs from inline assembly, * The output operand must be type "bool". From d0a8d9378d16eb3c69bd8e6d23779fbdbee3a8c7 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 21 Jun 2018 09:23:24 -0700 Subject: [PATCH 222/382] x86/paravirt: Make native_save_fl() extern inline native_save_fl() is marked static inline, but by using it as a function pointer in arch/x86/kernel/paravirt.c, it MUST be outlined. paravirt's use of native_save_fl() also requires that no GPRs other than %rax are clobbered. Compilers have different heuristics which they use to emit stack guard code, the emittance of which can break paravirt's callee saved assumption by clobbering %rcx. Marking a function definition extern inline means that if this version cannot be inlined, then the out-of-line version will be preferred. By having the out-of-line version be implemented in assembly, it cannot be instrumented with a stack protector, which might violate custom calling conventions that code like paravirt rely on. The semantics of extern inline has changed since gnu89. This means that folks using GCC versions >= 5.1 may see symbol redefinition errors at link time for subdirs that override KBUILD_CFLAGS (making the C standard used implicit) regardless of this patch. This has been cleaned up earlier in the patch set, but is left as a note in the commit message for future travelers. Reports: https://lkml.org/lkml/2018/5/7/534 https://github.com/ClangBuiltLinux/linux/issues/16 Discussion: https://bugs.llvm.org/show_bug.cgi?id=37512 https://lkml.org/lkml/2018/5/24/1371 Thanks to the many folks that participated in the discussion. Debugged-by: Alistair Strachan Debugged-by: Matthias Kaehlcke Suggested-by: Arnd Bergmann Suggested-by: H. Peter Anvin Suggested-by: Tom Stellar Reported-by: Sedat Dilek Tested-by: Sedat Dilek Signed-off-by: Nick Desaulniers Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@redhat.com Cc: akataria@vmware.com Cc: akpm@linux-foundation.org Cc: andrea.parri@amarulasolutions.com Cc: ard.biesheuvel@linaro.org Cc: aryabinin@virtuozzo.com Cc: astrachan@google.com Cc: boris.ostrovsky@oracle.com Cc: brijesh.singh@amd.com Cc: caoj.fnst@cn.fujitsu.com Cc: geert@linux-m68k.org Cc: ghackmann@google.com Cc: gregkh@linuxfoundation.org Cc: jan.kiszka@siemens.com Cc: jarkko.sakkinen@linux.intel.com Cc: joe@perches.com Cc: jpoimboe@redhat.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: kstewart@linuxfoundation.org Cc: linux-efi@vger.kernel.org Cc: linux-kbuild@vger.kernel.org Cc: manojgupta@google.com Cc: mawilcox@microsoft.com Cc: michal.lkml@markovi.net Cc: mjg59@google.com Cc: mka@chromium.org Cc: pombredanne@nexb.com Cc: rientjes@google.com Cc: rostedt@goodmis.org Cc: thomas.lendacky@amd.com Cc: tweek@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.deacon@arm.com Cc: yamada.masahiro@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-4-ndesaulniers@google.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irqflags.h | 2 +- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/irqflags.S | 26 ++++++++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/irqflags.S diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 89f08955fff7..c4fc17220df9 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -13,7 +13,7 @@ * Interrupt control: */ -static inline unsigned long native_save_fl(void) +extern inline unsigned long native_save_fl(void) { unsigned long flags; diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 02d6f5cf4e70..8824d01c0c35 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -61,6 +61,7 @@ obj-y += alternative.o i8253.o hw_breakpoint.o obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o +obj-y += irqflags.o obj-y += process.o obj-y += fpu/ diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S new file mode 100644 index 000000000000..ddeeaac8adda --- /dev/null +++ b/arch/x86/kernel/irqflags.S @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include + +/* + * unsigned long native_save_fl(void) + */ +ENTRY(native_save_fl) + pushf + pop %_ASM_AX + ret +ENDPROC(native_save_fl) +EXPORT_SYMBOL(native_save_fl) + +/* + * void native_restore_fl(unsigned long flags) + * %eax/%rdi: flags + */ +ENTRY(native_restore_fl) + push %_ASM_ARG1 + popf + ret +ENDPROC(native_restore_fl) +EXPORT_SYMBOL(native_restore_fl) From 7a6b9f4d601dfce8cb68f0dcfd834270280e31e6 Mon Sep 17 00:00:00 2001 From: x00270170 Date: Tue, 3 Jul 2018 15:06:27 +0800 Subject: [PATCH 223/382] mmc: dw_mmc: fix card threshold control configuration Card write threshold control is supposed to be set since controller version 2.80a for data write in HS400 mode and data read in HS200/HS400/SDR104 mode. However the current code returns without configuring it in the case of data writing in HS400 mode. Meanwhile the patch fixes that the current code goes to 'disable' when doing data reading in HS400 mode. Fixes: 7e4bf1bc9543 ("mmc: dw_mmc: add the card write threshold for HS400 mode") Signed-off-by: Qing Xia Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 623f4d27fa01..80dc2fd6576c 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -1065,8 +1065,8 @@ static void dw_mci_ctrl_thld(struct dw_mci *host, struct mmc_data *data) * It's used when HS400 mode is enabled. */ if (data->flags & MMC_DATA_WRITE && - !(host->timing != MMC_TIMING_MMC_HS400)) - return; + host->timing != MMC_TIMING_MMC_HS400) + goto disable; if (data->flags & MMC_DATA_WRITE) enable = SDMMC_CARD_WR_THR_EN; @@ -1074,7 +1074,8 @@ static void dw_mci_ctrl_thld(struct dw_mci *host, struct mmc_data *data) enable = SDMMC_CARD_RD_THR_EN; if (host->timing != MMC_TIMING_MMC_HS200 && - host->timing != MMC_TIMING_UHS_SDR104) + host->timing != MMC_TIMING_UHS_SDR104 && + host->timing != MMC_TIMING_MMC_HS400) goto disable; blksz_depth = blksz / (1 << host->data_shift); From 717adfdaf14704fd3ec7fa2c04520c0723247eac Mon Sep 17 00:00:00 2001 From: Daniel Rosenberg Date: Mon, 2 Jul 2018 16:59:37 -0700 Subject: [PATCH 224/382] HID: debug: check length before copy_to_user() If our length is greater than the size of the buffer, we overflow the buffer Cc: stable@vger.kernel.org Signed-off-by: Daniel Rosenberg Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-debug.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 4f4e7a08a07b..4db8e140f709 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -1154,6 +1154,8 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer, goto out; if (list->tail > list->head) { len = list->tail - list->head; + if (len > count) + len = count; if (copy_to_user(buffer + ret, &list->hid_debug_buf[list->head], len)) { ret = -EFAULT; @@ -1163,6 +1165,8 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer, list->head += len; } else { len = HID_DEBUG_BUFSIZE - list->head; + if (len > count) + len = count; if (copy_to_user(buffer, &list->hid_debug_buf[list->head], len)) { ret = -EFAULT; @@ -1170,7 +1174,9 @@ static ssize_t hid_debug_events_read(struct file *file, char __user *buffer, } list->head = 0; ret += len; - goto copy_rest; + count -= len; + if (count > 0) + goto copy_rest; } } From 3b8d573586d1b9dee33edf6cb6f2ca05f4bca568 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Tue, 26 Jun 2018 09:58:02 -0700 Subject: [PATCH 225/382] HID: wacom: Correct touch maximum XY of 2nd-gen Intuos The touch sensors on the 2nd-gen Intuos tablets don't use a 4096x4096 sensor like other similar tablets (3rd-gen Bamboo, Intuos5, etc.). The incorrect maximum XY values don't normally affect userspace since touch input from these devices is typically relative rather than absolute. It does, however, cause problems when absolute distances need to be measured, e.g. for gesture recognition. Since the resolution of the touch sensor on these devices is 10 units / mm (versus 100 for the pen sensor), the proper maximum values can be calculated by simply dividing by 10. Fixes: b5fd2a3e92 ("Input: wacom - add support for three new Intuos devices") Signed-off-by: Jason Gerecke Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 0bb44d0088ed..ad7afa74d365 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -3365,8 +3365,14 @@ void wacom_setup_device_quirks(struct wacom *wacom) if (features->type >= INTUOSHT && features->type <= BAMBOO_PT) features->device_type |= WACOM_DEVICETYPE_PAD; - features->x_max = 4096; - features->y_max = 4096; + if (features->type == INTUOSHT2) { + features->x_max = features->x_max / 10; + features->y_max = features->y_max / 10; + } + else { + features->x_max = 4096; + features->y_max = 4096; + } } else if (features->pktlen == WACOM_PKGLEN_BBTOUCH) { features->device_type |= WACOM_DEVICETYPE_PAD; From 90d72ce079791399ac255c75728f3c9e747b093d Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jul 2018 19:27:13 -0700 Subject: [PATCH 226/382] vmw_balloon: fix inflation with batching Embarrassingly, the recent fix introduced worse problem than it solved, causing the balloon not to inflate. The VM informed the hypervisor that the pages for lock/unlock are sitting in the wrong address, as it used the page that is used the uninitialized page variable. Fixes: b23220fe054e9 ("vmw_balloon: fixing double free when batching mode is off") Cc: stable@vger.kernel.org Reviewed-by: Xavier Deguillard Signed-off-by: Nadav Amit Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_balloon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index efd733472a35..56c6f79a5c5a 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -467,7 +467,7 @@ static int vmballoon_send_batched_lock(struct vmballoon *b, unsigned int num_pages, bool is_2m_pages, unsigned int *target) { unsigned long status; - unsigned long pfn = page_to_pfn(b->page); + unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page)); STATS_INC(b->stats.lock[is_2m_pages]); @@ -515,7 +515,7 @@ static bool vmballoon_send_batched_unlock(struct vmballoon *b, unsigned int num_pages, bool is_2m_pages, unsigned int *target) { unsigned long status; - unsigned long pfn = page_to_pfn(b->page); + unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page)); STATS_INC(b->stats.unlock[is_2m_pages]); From b7a020bff31318fc8785e6f96b1d38c1625cf1fb Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Thu, 7 Jun 2018 00:31:48 +0300 Subject: [PATCH 227/382] mei: discard messages from not connected client during power down. This fixes regression introduced by commit 8d52af6795c0 ("mei: speed up the power down flow") In power down or suspend flow a message can still be received from the FW because the clients fake disconnection. In normal case we interpret messages w/o destination as corrupted and link reset is performed in order to clean the channel, but during power down link reset is already in progress resulting in endless loop. To resolve the issue under power down flow we discard messages silently. Cc: 4.16+ Fixes: 8d52af6795c0 ("mei: speed up the power down flow") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=199541 Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/interrupt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c index b0b8f18a85e3..6649f0d56d2f 100644 --- a/drivers/misc/mei/interrupt.c +++ b/drivers/misc/mei/interrupt.c @@ -310,8 +310,11 @@ int mei_irq_read_handler(struct mei_device *dev, if (&cl->link == &dev->file_list) { /* A message for not connected fixed address clients * should be silently discarded + * On power down client may be force cleaned, + * silently discard such messages */ - if (hdr_is_fixed(mei_hdr)) { + if (hdr_is_fixed(mei_hdr) || + dev->dev_state == MEI_DEV_POWER_DOWN) { mei_irq_discard_msg(dev, mei_hdr); ret = 0; goto reset_slots; From d403397c7c0821704a2f4da2694b46e423791261 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 30 Jun 2018 10:05:09 +0100 Subject: [PATCH 228/382] drm/i915: Try GGTT mmapping whole object as partial If the whole object is already pinned by HW for use as scanout, we will fail to move it to the mappable region and so must resort to using a partial VMA covering the whole object. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104513 Fixes: aa136d9d72c2 ("drm/i915: Convert partial ggtt vma to full ggtt if it spans the entire object") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180630090509.469-1-chris@chris-wilson.co.uk (cherry picked from commit 7e7367d3bc6cf27dd7e007e7897fcebfeff1ee8b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 28 +++++++++++++++++----------- drivers/gpu/drm/i915/i915_vma.c | 2 +- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d44ad7bc1e94..17c5097721e8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2002,7 +2002,6 @@ int i915_gem_fault(struct vm_fault *vmf) bool write = !!(vmf->flags & FAULT_FLAG_WRITE); struct i915_vma *vma; pgoff_t page_offset; - unsigned int flags; int ret; /* We don't use vmf->pgoff since that has the fake offset */ @@ -2038,27 +2037,34 @@ int i915_gem_fault(struct vm_fault *vmf) goto err_unlock; } - /* If the object is smaller than a couple of partial vma, it is - * not worth only creating a single partial vma - we may as well - * clear enough space for the full object. - */ - flags = PIN_MAPPABLE; - if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT) - flags |= PIN_NONBLOCK | PIN_NONFAULT; /* Now pin it into the GTT as needed */ - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags); + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | + PIN_NONBLOCK | + PIN_NONFAULT); if (IS_ERR(vma)) { /* Use a partial view if it is bigger than available space */ struct i915_ggtt_view view = compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES); + unsigned int flags; - /* Userspace is now writing through an untracked VMA, abandon + flags = PIN_MAPPABLE; + if (view.type == I915_GGTT_VIEW_NORMAL) + flags |= PIN_NONBLOCK; /* avoid warnings for pinned */ + + /* + * Userspace is now writing through an untracked VMA, abandon * all hope that the hardware is able to track future writes. */ obj->frontbuffer_ggtt_origin = ORIGIN_CPU; - vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); + if (IS_ERR(vma) && !view.type) { + flags = PIN_MAPPABLE; + view.type = I915_GGTT_VIEW_PARTIAL; + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, flags); + } } if (IS_ERR(vma)) { ret = PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 9324d476e0a7..0531c01c3604 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -109,7 +109,7 @@ vma_create(struct drm_i915_gem_object *obj, obj->base.size >> PAGE_SHIFT)); vma->size = view->partial.size; vma->size <<= PAGE_SHIFT; - GEM_BUG_ON(vma->size >= obj->base.size); + GEM_BUG_ON(vma->size > obj->base.size); } else if (view->type == I915_GGTT_VIEW_ROTATED) { vma->size = intel_rotation_info_size(&view->rotated); vma->size <<= PAGE_SHIFT; From 157bcc06094c3c5800d3f4676527047b79b618e7 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 25 Jun 2018 09:34:03 -0300 Subject: [PATCH 229/382] ARM: imx_v6_v7_defconfig: Select ULPI support Select CONFIG_USB_CHIPIDEA_ULPI and CONFIG_USB_ULPI_BUS so that USB ULPI can be functional on some boards like imx51-babbge. This fixes a kernel hang in 4.18-rc1 on i.mx51-babbage, caused by commit 03e6275ae381 ("usb: chipidea: Fix ULPI on imx51"). Suggested-by: Andrey Smirnov Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/configs/imx_v6_v7_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index f70507ab91ee..200ebda47e0c 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -302,6 +302,7 @@ CONFIG_USB_STORAGE=y CONFIG_USB_CHIPIDEA=y CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CHIPIDEA_HOST=y +CONFIG_USB_CHIPIDEA_ULPI=y CONFIG_USB_SERIAL=m CONFIG_USB_SERIAL_GENERIC=y CONFIG_USB_SERIAL_FTDI_SIO=m @@ -338,6 +339,7 @@ CONFIG_USB_GADGETFS=m CONFIG_USB_FUNCTIONFS=m CONFIG_USB_MASS_STORAGE=m CONFIG_USB_G_SERIAL=m +CONFIG_USB_ULPI_BUS=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y From 2ceb2780b790b74bc408a949f6aedbad8afa693e Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 26 Jun 2018 08:37:09 -0300 Subject: [PATCH 230/382] ARM: imx_v4_v5_defconfig: Select ULPI support Select CONFIG_USB_CHIPIDEA_ULPI and CONFIG_USB_ULPI_BUS so that USB ULPI can be functional on some boards like that use ULPI interface. Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/configs/imx_v4_v5_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/configs/imx_v4_v5_defconfig b/arch/arm/configs/imx_v4_v5_defconfig index 054591dc9a00..4cd2f4a2bff4 100644 --- a/arch/arm/configs/imx_v4_v5_defconfig +++ b/arch/arm/configs/imx_v4_v5_defconfig @@ -141,9 +141,11 @@ CONFIG_USB_STORAGE=y CONFIG_USB_CHIPIDEA=y CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CHIPIDEA_HOST=y +CONFIG_USB_CHIPIDEA_ULPI=y CONFIG_NOP_USB_XCEIV=y CONFIG_USB_GADGET=y CONFIG_USB_ETH=m +CONFIG_USB_ULPI_BUS=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y From 9fea4b395260175de4016b42982f45a3e6e03d0b Mon Sep 17 00:00:00 2001 From: Evan Green Date: Mon, 2 Jul 2018 16:03:46 -0700 Subject: [PATCH 231/382] loop: Add LOOP_SET_BLOCK_SIZE in compat ioctl This change adds LOOP_SET_BLOCK_SIZE as one of the supported ioctls in lo_compat_ioctl. It only takes an unsigned long argument, and in practice a 32-bit value works fine. Reviewed-by: Omar Sandoval Signed-off-by: Evan Green Signed-off-by: Jens Axboe --- drivers/block/loop.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 21e6d1b3b393..258355c5a93a 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1612,6 +1612,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, arg = (unsigned long) compat_ptr(arg); case LOOP_SET_FD: case LOOP_CHANGE_FD: + case LOOP_SET_BLOCK_SIZE: err = lo_ioctl(bdev, mode, cmd, arg); break; default: From 895b66129ad8c562865b64306032bdb378f4484f Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 29 Jun 2018 11:15:37 +0200 Subject: [PATCH 232/382] PM / Domains: Don't power on at attach for the multi PM domain case There are no legacy behavior in drivers to consider while attaching a device to genpd - for the multiple PM domain case. For that reason, let's instead require the driver to runtime resume the device, via calling pm_runtime_get_sync() for example, when it needs to power on the corresponding PM domain. This allows us to improve the situation during attach. Instead of always power on the PM domain, which may be unnecessary, let's leave it in its current state. Additionally, to avoid the PM domain to stay powered on, let's schedule a power off work. Fixes: 3c095f32a92b (PM / Domains: Add support for multi PM domains ...) Signed-off-by: Ulf Hansson Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index c298de8a8308..9e8484189034 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2235,7 +2235,7 @@ static void genpd_dev_pm_sync(struct device *dev) } static int __genpd_dev_pm_attach(struct device *dev, struct device_node *np, - unsigned int index) + unsigned int index, bool power_on) { struct of_phandle_args pd_args; struct generic_pm_domain *pd; @@ -2271,9 +2271,11 @@ static int __genpd_dev_pm_attach(struct device *dev, struct device_node *np, dev->pm_domain->detach = genpd_dev_pm_detach; dev->pm_domain->sync = genpd_dev_pm_sync; - genpd_lock(pd); - ret = genpd_power_on(pd, 0); - genpd_unlock(pd); + if (power_on) { + genpd_lock(pd); + ret = genpd_power_on(pd, 0); + genpd_unlock(pd); + } if (ret) genpd_remove_device(pd, dev); @@ -2307,7 +2309,7 @@ int genpd_dev_pm_attach(struct device *dev) "#power-domain-cells") != 1) return 0; - return __genpd_dev_pm_attach(dev, dev->of_node, 0); + return __genpd_dev_pm_attach(dev, dev->of_node, 0, true); } EXPORT_SYMBOL_GPL(genpd_dev_pm_attach); @@ -2359,14 +2361,14 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev, } /* Try to attach the device to the PM domain at the specified index. */ - ret = __genpd_dev_pm_attach(genpd_dev, dev->of_node, index); + ret = __genpd_dev_pm_attach(genpd_dev, dev->of_node, index, false); if (ret < 1) { device_unregister(genpd_dev); return ret ? ERR_PTR(ret) : NULL; } - pm_runtime_set_active(genpd_dev); pm_runtime_enable(genpd_dev); + genpd_queue_power_off_work(dev_to_genpd(genpd_dev)); return genpd_dev; } From 410da1e12ffed61129d61df5b7adce4d08c7f17c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 3 Jul 2018 09:53:43 -0700 Subject: [PATCH 233/382] net/smc: fix up merge error with poll changes My networking merge (commit 4e33d7d47943: "Pull networking fixes from David Miller") got the poll() handling conflict wrong for af_smc. The conflict between my a11e1d432b51 ("Revert changes to convert to ->poll_mask() and aio IOCB_CMD_POLL") and Ursula Braun's 24ac3a08e658 ("net/smc: rebuild nonblocking connect") should have left the call to sock_poll_wait() in place, just without the socket lock release/retake. And I really should have realized that. But happily, I at least asked Ursula to double-check the merge, and she set me right. This also fixes an incidental whitespace issue nearby that annoyed me while looking at this. Pointed-out-by: Ursula Braun Cc: David Miller Signed-off-by: Linus Torvalds --- net/smc/af_smc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index e017b6a4452b..3c1405df936c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1345,6 +1345,8 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, if (sk->sk_err) mask |= EPOLLERR; } else { + if (sk->sk_state != SMC_CLOSED) + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_err) mask |= EPOLLERR; if ((sk->sk_shutdown == SHUTDOWN_MASK) || @@ -1370,7 +1372,6 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, } if (smc->conn.urg_state == SMC_URG_VALID) mask |= EPOLLPRI; - } return mask; From fe48aecb4df837540f13b5216f27ddb306aaf4b9 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 1 Jul 2018 15:31:54 +0300 Subject: [PATCH 234/382] RDMA/uverbs: Don't fail in creation of multiple flows The conversion from offsetof() calculations to sizeof() wrongly behaved for missed exact size and in scenario with more than one flow. In such scenario we got "create flow failed, flow 10: 8 bytes left from uverb cmd" error, which is wrong because the size of kern_spec is exactly 8 bytes, and we were not supposed to fail. Cc: # 3.12 Fixes: 4fae7f170416 ("RDMA/uverbs: Fix slab-out-of-bounds in ib_uverbs_ex_create_flow") Reported-by: Ran Rozenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 87ffeebc0b28..cc06e8404e9b 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3586,7 +3586,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, kern_spec = kern_flow_attr->flow_specs; ib_spec = flow_attr + 1; for (i = 0; i < flow_attr->num_of_specs && - cmd.flow_attr.size > sizeof(*kern_spec) && + cmd.flow_attr.size >= sizeof(*kern_spec) && cmd.flow_attr.size >= kern_spec->size; i++) { err = kern_spec_to_ib_spec( From dbd39cf4249316c4089b8987f20850763ebbf43e Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 31 May 2018 09:24:02 -0300 Subject: [PATCH 235/382] i2c: stu300: use non-archaic spelling of failes Signed-off-by: Thadeu Lima de Souza Cascardo Acked-by: Linus Walleij Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-stu300.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-stu300.c b/drivers/i2c/busses/i2c-stu300.c index e866c481bfc3..fce52bdab2b7 100644 --- a/drivers/i2c/busses/i2c-stu300.c +++ b/drivers/i2c/busses/i2c-stu300.c @@ -127,7 +127,7 @@ enum stu300_error { /* * The number of address send athemps tried before giving up. - * If the first one failes it seems like 5 to 8 attempts are required. + * If the first one fails it seems like 5 to 8 attempts are required. */ #define NUM_ADDR_RESEND_ATTEMPTS 12 From cf4d418e653afc84c9c873236033e06be5d58f1c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 28 Mar 2018 16:09:10 +0200 Subject: [PATCH 236/382] tracing: Avoid string overflow 'err' is used as a NUL-terminated string, but using strncpy() with the length equal to the buffer size may result in lack of the termination: kernel/trace/trace_events_hist.c: In function 'hist_err_event': kernel/trace/trace_events_hist.c:396:3: error: 'strncpy' specified bound 256 equals destination size [-Werror=stringop-truncation] strncpy(err, var, MAX_FILTER_STR_VAL); This changes it to use the safer strscpy() instead. Link: http://lkml.kernel.org/r/20180328140920.2842153-1-arnd@arndb.de Cc: stable@vger.kernel.org Fixes: f404da6e1d46 ("tracing: Add 'last error' error facility for hist triggers") Acked-by: Tom Zanussi Signed-off-by: Arnd Bergmann Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 046c716a6536..aae18af94c94 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -393,7 +393,7 @@ static void hist_err_event(char *str, char *system, char *event, char *var) else if (system) snprintf(err, MAX_FILTER_STR_VAL, "%s.%s", system, event); else - strncpy(err, var, MAX_FILTER_STR_VAL); + strscpy(err, var, MAX_FILTER_STR_VAL); hist_err(str, err); } From f90658725ba7ebb031054866aff4cda0d099a3b1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 2 Jul 2018 11:41:38 -0400 Subject: [PATCH 237/382] tracing: Make create_filter() code match the comments The comment in create_filter() states that the passed in filter pointer (filterp) will either be NULL or contain an error message stating why the filter failed. But it also expects the filter pointer to point to NULL when passed in. If it is not, the function create_filter_start() will warn and return an error message without updating the filter pointer. This is not what the comment states. As we always expect the pointer to point to NULL, if it is not, trigger a WARN_ON(), set it to NULL, and then continue the path as the rest will work as the comment states. Also update the comment to state it must point to NULL. Reported-by: Dan Carpenter Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_filter.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 0dceb77d1d42..893a206bcba4 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1701,6 +1701,7 @@ static void create_filter_finish(struct filter_parse_error *pe) * @filter_str: filter string * @set_str: remember @filter_str and enable detailed error in filter * @filterp: out param for created filter (always updated on return) + * Must be a pointer that references a NULL pointer. * * Creates a filter for @call with @filter_str. If @set_str is %true, * @filter_str is copied and recorded in the new filter. @@ -1718,6 +1719,10 @@ static int create_filter(struct trace_event_call *call, struct filter_parse_error *pe = NULL; int err; + /* filterp must point to NULL */ + if (WARN_ON(*filterp)) + *filterp = NULL; + err = create_filter_start(filter_string, set_str, &pe, filterp); if (err) return err; From f26808ba7227a921e0e8549c7d3c52332b920085 Mon Sep 17 00:00:00 2001 From: yuan linyu Date: Sun, 8 Apr 2018 19:36:31 +0800 Subject: [PATCH 238/382] tracing: Optimize trace_buffer_iter() logic Simplify and optimize the logic in trace_buffer_iter() to use a conditional operation instead of an if conditional. Link: http://lkml.kernel.org/r/20180408113631.3947-1-cugyly@163.com Signed-off-by: yuan linyu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 630c5a24b2b2..f8f86231ad90 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -583,9 +583,7 @@ static __always_inline void trace_clear_recursion(int bit) static inline struct ring_buffer_iter * trace_buffer_iter(struct trace_iterator *iter, int cpu) { - if (iter->buffer_iter && iter->buffer_iter[cpu]) - return iter->buffer_iter[cpu]; - return NULL; + return iter->buffer_iter ? iter->buffer_iter[cpu] : NULL; } int tracer_init(struct tracer *t, struct trace_array *tr); From 26b68dd2f48fe7699a89f0cfbb9f4a650dc1c837 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 8 Mar 2018 21:58:43 +0100 Subject: [PATCH 239/382] tracing: Use __printf markup to silence compiler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Silence warnings (triggered at W=1) by adding relevant __printf attributes. CC kernel/trace/trace.o kernel/trace/trace.c: In function ‘__trace_array_vprintk’: kernel/trace/trace.c:2979:2: warning: function might be possible candidate for ‘gnu_printf’ format attribute [-Wsuggest-attribute=format] len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); ^~~ AR kernel/trace/built-in.o Link: http://lkml.kernel.org/r/20180308205843.27447-1-malat@debian.org Signed-off-by: Mathieu Malaterre Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a0079b4c7a49..f054bd6a1c66 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2953,6 +2953,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) } EXPORT_SYMBOL_GPL(trace_vbprintk); +__printf(3, 0) static int __trace_array_vprintk(struct ring_buffer *buffer, unsigned long ip, const char *fmt, va_list args) @@ -3007,12 +3008,14 @@ __trace_array_vprintk(struct ring_buffer *buffer, return len; } +__printf(3, 0) int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args) { return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args); } +__printf(3, 0) int trace_array_printk(struct trace_array *tr, unsigned long ip, const char *fmt, ...) { @@ -3028,6 +3031,7 @@ int trace_array_printk(struct trace_array *tr, return ret; } +__printf(3, 4) int trace_array_printk_buf(struct ring_buffer *buffer, unsigned long ip, const char *fmt, ...) { @@ -3043,6 +3047,7 @@ int trace_array_printk_buf(struct ring_buffer *buffer, return ret; } +__printf(2, 0) int trace_vprintk(unsigned long ip, const char *fmt, va_list args) { return trace_array_vprintk(&global_trace, ip, fmt, args); From 5ccba64a560fa6ca06008d4001f5d46ebeb34b41 Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Fri, 2 Feb 2018 10:14:49 +0800 Subject: [PATCH 240/382] ftrace: Nuke clear_ftrace_function clear_ftrace_function is not used outside of ftrace.c and is not help to use a function, so nuke it per Steve's suggestion. Link: http://lkml.kernel.org/r/1517537689-34947-1-git-send-email-xieyisheng1@huawei.com Suggested-by: Steven Rostedt Signed-off-by: Yisheng Xie Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 2 -- kernel/trace/ftrace.c | 13 +------------ 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8154f4920fcb..ebb77674be90 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -223,7 +223,6 @@ extern enum ftrace_tracing_type_t ftrace_tracing_type; */ int register_ftrace_function(struct ftrace_ops *ops); int unregister_ftrace_function(struct ftrace_ops *ops); -void clear_ftrace_function(void); extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op, struct pt_regs *regs); @@ -239,7 +238,6 @@ static inline int ftrace_nr_registered_ops(void) { return 0; } -static inline void clear_ftrace_function(void) { } static inline void ftrace_kill(void) { } static inline void ftrace_free_init_mem(void) { } static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index efed9c1cfb7e..caf9cbf35816 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -192,17 +192,6 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, op->saved_func(ip, parent_ip, op, regs); } -/** - * clear_ftrace_function - reset the ftrace function - * - * This NULLs the ftrace function and in essence stops - * tracing. There may be lag - */ -void clear_ftrace_function(void) -{ - ftrace_trace_function = ftrace_stub; -} - static void ftrace_sync(struct work_struct *work) { /* @@ -6689,7 +6678,7 @@ void ftrace_kill(void) { ftrace_disabled = 1; ftrace_enabled = 0; - clear_ftrace_function(); + ftrace_trace_function = ftrace_stub; } /** From 1fe4293f4b8de75824935f8d8e9a99c7fc6873da Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Wed, 31 Jan 2018 23:48:49 +0800 Subject: [PATCH 241/382] tracing: Fix missing return symbol in function_graph output The function_graph tracer does not show the interrupt return marker for the leaf entry. On leaf entries, we see an unbalanced interrupt marker (the interrupt was entered, but nevern left). Before: 1) | SyS_write() { 1) | __fdget_pos() { 1) 0.061 us | __fget_light(); 1) 0.289 us | } 1) | vfs_write() { 1) 0.049 us | rw_verify_area(); 1) + 15.424 us | __vfs_write(); 1) ==========> | 1) 6.003 us | smp_apic_timer_interrupt(); 1) 0.055 us | __fsnotify_parent(); 1) 0.073 us | fsnotify(); 1) + 23.665 us | } 1) + 24.501 us | } After: 0) | SyS_write() { 0) | __fdget_pos() { 0) 0.052 us | __fget_light(); 0) 0.328 us | } 0) | vfs_write() { 0) 0.057 us | rw_verify_area(); 0) | __vfs_write() { 0) ==========> | 0) 8.548 us | smp_apic_timer_interrupt(); 0) <========== | 0) + 36.507 us | } /* __vfs_write */ 0) 0.049 us | __fsnotify_parent(); 0) 0.066 us | fsnotify(); 0) + 50.064 us | } 0) + 50.952 us | } Link: http://lkml.kernel.org/r/1517413729-20411-1-git-send-email-changbin.du@intel.com Cc: stable@vger.kernel.org Fixes: f8b755ac8e0cc ("tracing/function-graph-tracer: Output arrows signal on hardirq call/return") Signed-off-by: Changbin Du Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_functions_graph.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 23c0b0cb5fb9..169b3c44ee97 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -831,6 +831,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, struct ftrace_graph_ret *graph_ret; struct ftrace_graph_ent *call; unsigned long long duration; + int cpu = iter->cpu; int i; graph_ret = &ret_entry->ret; @@ -839,7 +840,6 @@ print_graph_entry_leaf(struct trace_iterator *iter, if (data) { struct fgraph_cpu_data *cpu_data; - int cpu = iter->cpu; cpu_data = per_cpu_ptr(data->cpu_data, cpu); @@ -869,6 +869,9 @@ print_graph_entry_leaf(struct trace_iterator *iter, trace_seq_printf(s, "%ps();\n", (void *)call->func); + print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET, + cpu, iter->ent->pid, flags); + return trace_handle_return(s); } From 1e2c043628c7736dd56536d16c0ce009bc834ae7 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 3 Jul 2018 17:02:39 -0700 Subject: [PATCH 242/382] userfaultfd: hugetlbfs: fix userfaultfd_huge_must_wait() pte access Use huge_ptep_get() to translate huge ptes to normal ptes so we can check them with the huge_pte_* functions. Otherwise some architectures will check the wrong values and will not wait for userspace to bring in the memory. Link: http://lkml.kernel.org/r/20180626132421.78084-1-frankja@linux.ibm.com Fixes: 369cd2121be4 ("userfaultfd: hugetlbfs: userfaultfd_huge_must_wait for hugepmd ranges") Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand Reviewed-by: Mike Kravetz Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 123bf7d516fc..594d192b2331 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -222,24 +222,26 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, unsigned long reason) { struct mm_struct *mm = ctx->mm; - pte_t *pte; + pte_t *ptep, pte; bool ret = true; VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); - pte = huge_pte_offset(mm, address, vma_mmu_pagesize(vma)); - if (!pte) + ptep = huge_pte_offset(mm, address, vma_mmu_pagesize(vma)); + + if (!ptep) goto out; ret = false; + pte = huge_ptep_get(ptep); /* * Lockless access: we're in a wait_event so it's ok if it * changes under us. */ - if (huge_pte_none(*pte)) + if (huge_pte_none(pte)) ret = true; - if (!huge_pte_write(*pte) && (reason & VM_UFFD_WP)) + if (!huge_pte_write(pte) && (reason & VM_UFFD_WP)) ret = true; out: return ret; From 520495fe96d74e05db585fc748351e0504d8f40d Mon Sep 17 00:00:00 2001 From: Cannon Matthews Date: Tue, 3 Jul 2018 17:02:43 -0700 Subject: [PATCH 243/382] mm: hugetlb: yield when prepping struct pages When booting with very large numbers of gigantic (i.e. 1G) pages, the operations in the loop of gather_bootmem_prealloc, and specifically prep_compound_gigantic_page, takes a very long time, and can cause a softlockup if enough pages are requested at boot. For example booting with 3844 1G pages requires prepping (set_compound_head, init the count) over 1 billion 4K tail pages, which takes considerable time. Add a cond_resched() to the outer loop in gather_bootmem_prealloc() to prevent this lockup. Tested: Booted with softlockup_panic=1 hugepagesz=1G hugepages=3844 and no softlockup is reported, and the hugepages are reported as successfully setup. Link: http://lkml.kernel.org/r/20180627214447.260804-1-cannonmatthews@google.com Signed-off-by: Cannon Matthews Reviewed-by: Andrew Morton Reviewed-by: Mike Kravetz Acked-by: Michal Hocko Cc: Andres Lagar-Cavilla Cc: Peter Feiner Cc: Greg Thelen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3612fbb32e9d..039ddbc574e9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2163,6 +2163,7 @@ static void __init gather_bootmem_prealloc(void) */ if (hstate_is_gigantic(h)) adjust_managed_page_count(page, 1 << h->order); + cond_resched(); } } From 1e8e18f694a52d703665012ca486826f64bac29d Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 3 Jul 2018 17:02:46 -0700 Subject: [PATCH 244/382] kasan: fix shadow_size calculation error in kasan_module_alloc There is a special case that the size is "(N << KASAN_SHADOW_SCALE_SHIFT) Pages plus X", the value of X is [1, KASAN_SHADOW_SCALE_SIZE-1]. The operation "size >> KASAN_SHADOW_SCALE_SHIFT" will drop X, and the roundup operation can not retrieve the missed one page. For example: size=0x28006, PAGE_SIZE=0x1000, KASAN_SHADOW_SCALE_SHIFT=3, we will get shadow_size=0x5000, but actually we need 6 pages. shadow_size = round_up(size >> KASAN_SHADOW_SCALE_SHIFT, PAGE_SIZE); This can lead to a kernel crash when kasan is enabled and the value of mod->core_layout.size or mod->init_layout.size is like above. Because the shadow memory of X has not been allocated and mapped. move_module: ptr = module_alloc(mod->core_layout.size); ... memset(ptr, 0, mod->core_layout.size); //crashed Unable to handle kernel paging request at virtual address ffff0fffff97b000 ...... Call trace: __asan_storeN+0x174/0x1a8 memset+0x24/0x48 layout_and_allocate+0xcd8/0x1800 load_module+0x190/0x23e8 SyS_finit_module+0x148/0x180 Link: http://lkml.kernel.org/r/1529659626-12660-1-git-send-email-thunder.leizhen@huawei.com Signed-off-by: Zhen Lei Reviewed-by: Dmitriy Vyukov Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Hanjun Guo Cc: Libin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/kasan.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index f185455b3406..c3bd5209da38 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -619,12 +619,13 @@ void kasan_kfree_large(void *ptr, unsigned long ip) int kasan_module_alloc(void *addr, size_t size) { void *ret; + size_t scaled_size; size_t shadow_size; unsigned long shadow_start; shadow_start = (unsigned long)kasan_mem_to_shadow(addr); - shadow_size = round_up(size >> KASAN_SHADOW_SCALE_SHIFT, - PAGE_SIZE); + scaled_size = (size + KASAN_SHADOW_MASK) >> KASAN_SHADOW_SCALE_SHIFT; + shadow_size = round_up(scaled_size, PAGE_SIZE); if (WARN_ON(!PAGE_ALIGNED(shadow_start))) return -EINVAL; From 5e4e290d3751607726a62f0b49e11261a0a9345e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 3 Jul 2018 17:02:49 -0700 Subject: [PATCH 245/382] ARM: disable KCOV for trusted foundations code The ARM trusted foundations code is currently broken in linux-next when CONFIG_KCOV_INSTRUMENT_ALL is set: /tmp/ccHdQsCI.s: Assembler messages: /tmp/ccHdQsCI.s:37: Error: .err encountered /tmp/ccHdQsCI.s:38: Error: .err encountered /tmp/ccHdQsCI.s:39: Error: .err encountered scripts/Makefile.build:311: recipe for target 'arch/arm/firmware/trusted_foundations.o' failed I could not find a function attribute that lets me disable -fsanitize-coverage=trace-pc for just one function, so this turns it off for the entire file instead. Link: http://lkml.kernel.org/r/20180529103636.1535457-1-arnd@arndb.de Fixes: 758517202bd2e4 ("arm: port KCOV to arm") Signed-off-by: Arnd Bergmann Acked-by: Olof Johansson Tested-by: Olof Johansson Cc: Dmitry Vyukov Cc: Mark Rutland Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/firmware/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/firmware/Makefile b/arch/arm/firmware/Makefile index a71f16536b6c..6e41336b0bc4 100644 --- a/arch/arm/firmware/Makefile +++ b/arch/arm/firmware/Makefile @@ -1 +1,4 @@ obj-$(CONFIG_TRUSTED_FOUNDATIONS) += trusted_foundations.o + +# tf_generic_smc() fails to build with -fsanitize-coverage=trace-pc +KCOV_INSTRUMENT := n From fc36def997cfd6cbff3eda4f82853a5c311c5466 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Tue, 3 Jul 2018 17:02:53 -0700 Subject: [PATCH 246/382] mm: teach dump_page() to correctly output poisoned struct pages If struct page is poisoned, and uninitialized access is detected via PF_POISONED_CHECK(page) dump_page() is called to output the page. But, the dump_page() itself accesses struct page to determine how to print it, and therefore gets into a recursive loop. For example: dump_page() __dump_page() PageSlab(page) PF_POISONED_CHECK(page) VM_BUG_ON_PGFLAGS(PagePoisoned(page), page) dump_page() recursion loop. Link: http://lkml.kernel.org/r/20180702180536.2552-1-pasha.tatashin@oracle.com Fixes: f165b378bbdf ("mm: uninitialized struct page poisoning sanity checking") Signed-off-by: Pavel Tatashin Acked-by: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/debug.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/mm/debug.c b/mm/debug.c index 56e2d9125ea5..38c926520c97 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -43,12 +43,25 @@ const struct trace_print_flags vmaflag_names[] = { void __dump_page(struct page *page, const char *reason) { + bool page_poisoned = PagePoisoned(page); + int mapcount; + + /* + * If struct page is poisoned don't access Page*() functions as that + * leads to recursive loop. Page*() check for poisoned pages, and calls + * dump_page() when detected. + */ + if (page_poisoned) { + pr_emerg("page:%px is uninitialized and poisoned", page); + goto hex_only; + } + /* * Avoid VM_BUG_ON() in page_mapcount(). * page->_mapcount space in struct page is used by sl[aou]b pages to * encode own info. */ - int mapcount = PageSlab(page) ? 0 : page_mapcount(page); + mapcount = PageSlab(page) ? 0 : page_mapcount(page); pr_emerg("page:%px count:%d mapcount:%d mapping:%px index:%#lx", page, page_ref_count(page), mapcount, @@ -60,6 +73,7 @@ void __dump_page(struct page *page, const char *reason) pr_emerg("flags: %#lx(%pGp)\n", page->flags, &page->flags); +hex_only: print_hex_dump(KERN_ALERT, "raw: ", DUMP_PREFIX_NONE, 32, sizeof(unsigned long), page, sizeof(struct page), false); @@ -68,7 +82,7 @@ void __dump_page(struct page *page, const char *reason) pr_alert("page dumped because: %s\n", reason); #ifdef CONFIG_MEMCG - if (page->mem_cgroup) + if (!page_poisoned && page->mem_cgroup) pr_alert("page->mem_cgroup:%px\n", page->mem_cgroup); #endif } From 8bf935501a7ef1b2ec3b51c804d657d5895f221a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 30 Jun 2018 10:53:57 +0200 Subject: [PATCH 247/382] s390: wire up io_pgetevents system call Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/syscalls/syscall.tbl | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 8b210ead7956..4baefed5fefb 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -389,3 +389,4 @@ 379 common statx sys_statx compat_sys_statx 380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi 381 common kexec_file_load sys_kexec_file_load compat_sys_kexec_file_load +382 common io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents From 9d6d99e3ac8ccfd0945edb3c83cd912838775056 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 30 Jun 2018 10:54:15 +0200 Subject: [PATCH 248/382] s390: wire up rseq system call Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 + arch/s390/kernel/compat_wrapper.c | 1 + arch/s390/kernel/entry.S | 4 ++++ arch/s390/kernel/signal.c | 3 ++- arch/s390/kernel/syscalls/syscall.tbl | 1 + 5 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index baed39772c84..e44bb2b2873e 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -160,6 +160,7 @@ config S390 select HAVE_OPROFILE select HAVE_PERF_EVENTS select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select MODULES_USE_ELF_RELA diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index 607c5e9fba3d..2ce28bf0c5ec 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -183,3 +183,4 @@ COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb); COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer); COMPAT_SYSCALL_WRAP4(s390_sthyi, unsigned long, code, void __user *, info, u64 __user *, rc, unsigned long, flags); COMPAT_SYSCALL_WRAP5(kexec_file_load, int, kernel_fd, int, initrd_fd, unsigned long, cmdline_len, const char __user *, cmdline_ptr, unsigned long, flags) +COMPAT_SYSCALL_WRAP4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3891805bfcdd..150130c897c3 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -357,6 +357,10 @@ ENTRY(system_call) stg %r2,__PT_R2(%r11) # store return value .Lsysc_return: +#ifdef CONFIG_DEBUG_RSEQ + lgr %r2,%r11 + brasl %r14,rseq_syscall +#endif LOCKDEP_SYS_EXIT .Lsysc_tif: TSTMSK __PT_FLAGS(%r11),_PIF_WORK diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 2d2960ab3e10..22f08245aa5d 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -498,7 +498,7 @@ void do_signal(struct pt_regs *regs) } /* No longer in a system call */ clear_pt_regs_flag(regs, PIF_SYSCALL); - + rseq_signal_deliver(&ksig, regs); if (is_compat_task()) handle_signal32(&ksig, oldset, regs); else @@ -537,4 +537,5 @@ void do_notify_resume(struct pt_regs *regs) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 4baefed5fefb..022fc099b628 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -390,3 +390,4 @@ 380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi 381 common kexec_file_load sys_kexec_file_load compat_sys_kexec_file_load 382 common io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents +383 common rseq sys_rseq compat_sys_rseq From c660f40b711980b42d8beac4b395a10645b20d5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Fri, 29 Jun 2018 13:23:25 +0200 Subject: [PATCH 249/382] drm/amdgpu: fix user fence write race condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The buffer object backing the user fence is reserved using the non-user fence, i.e., as soon as the non-user fence is signaled, the user fence buffer object can be moved or even destroyed. Therefore, emit the user fence first. Both fences have the same cache invalidation behavior, so this should have no user-visible effect. Signed-off-by: Nicolai Hähnle Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index f70eeed9ed76..7aaa263ad8c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -231,6 +231,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE) fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY; + /* wrap the last IB with fence */ + if (job && job->uf_addr) { + amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, + fence_flags | AMDGPU_FENCE_FLAG_64BIT); + } + r = amdgpu_fence_emit(ring, f, fence_flags); if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); @@ -243,12 +249,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (ring->funcs->insert_end) ring->funcs->insert_end(ring); - /* wrap the last IB with fence */ - if (job && job->uf_addr) { - amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence, - fence_flags | AMDGPU_FENCE_FLAG_64BIT); - } - if (patch_offset != ~0 && ring->funcs->patch_cond_exec) amdgpu_ring_patch_cond_exec(ring, patch_offset); From 673b4271665a12fa839a12abb50e6f6e9953c081 Mon Sep 17 00:00:00 2001 From: Jouke Witteveen Date: Wed, 4 Jul 2018 12:27:15 +0200 Subject: [PATCH 250/382] ACPI / battery: Safe unregistering of hooks A hooking API was implemented for 4.17 in fa93854f7a7ed63d followed by hooks for Thinkpad laptops in 2801b9683f740012. The Thinkpad drivers did not support the Thinkpad 13 and the hooking API crashes on unsupported batteries by altering a list of hooks during unsafe iteration. Thus, Thinkpad 13 laptops could no longer boot. Additionally, a lock was kept in place and debugging information was printed out of order. Fixes: fa93854f7a7e (battery: Add the battery hooking API) Cc: 4.17+ # 4.17+ Signed-off-by: Jouke Witteveen Signed-off-by: Rafael J. Wysocki --- drivers/acpi/battery.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index b0113a5802a3..d79ad844c78f 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -717,10 +717,11 @@ void battery_hook_register(struct acpi_battery_hook *hook) */ pr_err("extension failed to load: %s", hook->name); __battery_hook_unregister(hook, 0); - return; + goto end; } } pr_info("new extension: %s\n", hook->name); +end: mutex_unlock(&hook_mutex); } EXPORT_SYMBOL_GPL(battery_hook_register); @@ -732,7 +733,7 @@ EXPORT_SYMBOL_GPL(battery_hook_register); */ static void battery_hook_add_battery(struct acpi_battery *battery) { - struct acpi_battery_hook *hook_node; + struct acpi_battery_hook *hook_node, *tmp; mutex_lock(&hook_mutex); INIT_LIST_HEAD(&battery->list); @@ -744,15 +745,15 @@ static void battery_hook_add_battery(struct acpi_battery *battery) * when a battery gets hotplugged or initialized * during the battery module initialization. */ - list_for_each_entry(hook_node, &battery_hook_list, list) { + list_for_each_entry_safe(hook_node, tmp, &battery_hook_list, list) { if (hook_node->add_battery(battery->bat)) { /* * The notification of the extensions has failed, to * prevent further errors we will unload the extension. */ - __battery_hook_unregister(hook_node, 0); pr_err("error in extension, unloading: %s", hook_node->name); + __battery_hook_unregister(hook_node, 0); } } mutex_unlock(&hook_mutex); From e33eab9ded328ccc14308afa51b5be7cbe78d30b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Jul 2018 12:29:38 +0300 Subject: [PATCH 251/382] USB: serial: ch341: fix type promotion bug in ch341_control_in() The "r" variable is an int and "bufsize" is an unsigned int so the comparison is type promoted to unsigned. If usb_control_msg() returns a negative that is treated as a high positive value and the error handling doesn't work. Fixes: 2d5a9c72d0c4 ("USB: serial: ch341: fix control-message error handling") Signed-off-by: Dan Carpenter Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/ch341.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index bdd7a5ad3bf1..3bb1fff02bed 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -128,7 +128,7 @@ static int ch341_control_in(struct usb_device *dev, r = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), request, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, value, index, buf, bufsize, DEFAULT_TIMEOUT); - if (r < bufsize) { + if (r < (int)bufsize) { if (r >= 0) { dev_err(&dev->dev, "short control message received (%d < %u)\n", From 367b160fe4717c14a2a978b6f9ffb75a7762d3ed Mon Sep 17 00:00:00 2001 From: Olli Salonen Date: Wed, 4 Jul 2018 14:07:42 +0300 Subject: [PATCH 252/382] USB: serial: cp210x: add another USB ID for Qivicon ZigBee stick There are two versions of the Qivicon Zigbee stick in circulation. This adds the second USB ID to the cp210x driver. Signed-off-by: Olli Salonen Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index ee0cc1d90b51..626a29d9aa58 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -149,6 +149,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8977) }, /* CEL MeshWorks DevKit Device */ { USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */ { USB_DEVICE(0x10C4, 0x89A4) }, /* CESINEL FTBC Flexible Thyristor Bridge Controller */ + { USB_DEVICE(0x10C4, 0x89FB) }, /* Qivicon ZigBee USB Radio Stick */ { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */ { USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */ { USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */ From f0139752f71b05c918d86101643eb00b312ab883 Mon Sep 17 00:00:00 2001 From: Maciej Purski Date: Thu, 28 Jun 2018 18:44:01 +0200 Subject: [PATCH 253/382] drm/bridge/sii8620: Send AVI infoframe in all MHL versions Currently AVI infoframe is sent only in MHL3. However, some MHL2 dongles need AVI infoframe to work correctly in either packed pixel mode or non-packed pixel mode. Send AVI infoframe in set_infoframes() in every case. Create an infoframe using drm_hdmi_infoframe_from_display_mode() instead of manually filling each infoframe structure's field. Signed-off-by: Maciej Purski Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1530204243-6370-2-git-send-email-m.purski@samsung.com --- drivers/gpu/drm/bridge/sil-sii8620.c | 39 ++++++++++++++-------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 250effa0e6b8..bd30ccf3d195 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -72,9 +73,7 @@ struct sii8620 { struct regulator_bulk_data supplies[2]; struct mutex lock; /* context lock, protects fields below */ int error; - int pixel_clock; unsigned int use_packed_pixel:1; - int video_code; enum sii8620_mode mode; enum sii8620_sink_type sink_type; u8 cbus_status; @@ -82,7 +81,6 @@ struct sii8620 { u8 xstat[MHL_XDS_SIZE]; u8 devcap[MHL_DCAP_SIZE]; u8 xdevcap[MHL_XDC_SIZE]; - u8 avif[HDMI_INFOFRAME_SIZE(AVI)]; bool feature_complete; bool devcap_read; bool sink_detected; @@ -1082,18 +1080,28 @@ static ssize_t mhl3_infoframe_pack(struct mhl3_infoframe *frame, return frm_len; } -static void sii8620_set_infoframes(struct sii8620 *ctx) +static void sii8620_set_infoframes(struct sii8620 *ctx, + struct drm_display_mode *mode) { struct mhl3_infoframe mhl_frm; union hdmi_infoframe frm; u8 buf[31]; int ret; + ret = drm_hdmi_avi_infoframe_from_display_mode(&frm.avi, + mode, + true); + if (ctx->use_packed_pixel) + frm.avi.colorspace = HDMI_COLORSPACE_YUV422; + + if (!ret) + ret = hdmi_avi_infoframe_pack(&frm.avi, buf, ARRAY_SIZE(buf)); + if (ret > 0) + sii8620_write_buf(ctx, REG_TPI_AVI_CHSUM, buf + 3, ret - 3); + if (!sii8620_is_mhl3(ctx) || !ctx->use_packed_pixel) { sii8620_write(ctx, REG_TPI_SC, BIT_TPI_SC_TPI_OUTPUT_MODE_0_HDMI); - sii8620_write_buf(ctx, REG_TPI_AVI_CHSUM, ctx->avif + 3, - ARRAY_SIZE(ctx->avif) - 3); sii8620_write(ctx, REG_PKT_FILTER_0, BIT_PKT_FILTER_0_DROP_CEA_GAMUT_PKT | BIT_PKT_FILTER_0_DROP_MPEG_PKT | @@ -1102,16 +1110,6 @@ static void sii8620_set_infoframes(struct sii8620 *ctx) return; } - ret = hdmi_avi_infoframe_init(&frm.avi); - frm.avi.colorspace = HDMI_COLORSPACE_YUV422; - frm.avi.active_aspect = HDMI_ACTIVE_ASPECT_PICTURE; - frm.avi.picture_aspect = HDMI_PICTURE_ASPECT_16_9; - frm.avi.colorimetry = HDMI_COLORIMETRY_ITU_709; - frm.avi.video_code = ctx->video_code; - if (!ret) - ret = hdmi_avi_infoframe_pack(&frm.avi, buf, ARRAY_SIZE(buf)); - if (ret > 0) - sii8620_write_buf(ctx, REG_TPI_AVI_CHSUM, buf + 3, ret - 3); sii8620_write(ctx, REG_PKT_FILTER_0, BIT_PKT_FILTER_0_DROP_CEA_GAMUT_PKT | BIT_PKT_FILTER_0_DROP_MPEG_PKT | @@ -1131,6 +1129,9 @@ static void sii8620_set_infoframes(struct sii8620 *ctx) static void sii8620_start_video(struct sii8620 *ctx) { + struct drm_display_mode *mode = + &ctx->bridge.encoder->crtc->state->adjusted_mode; + if (!sii8620_is_mhl3(ctx)) sii8620_stop_video(ctx); @@ -1167,7 +1168,7 @@ static void sii8620_start_video(struct sii8620 *ctx) MHL_XDS_LINK_RATE_6_0_GBPS, 0x40 }, }; u8 p0_ctrl = BIT_M3_P0CTRL_MHL3_P0_PORT_EN; - int clk = ctx->pixel_clock * (ctx->use_packed_pixel ? 2 : 3); + int clk = mode->clock * (ctx->use_packed_pixel ? 2 : 3); int i; for (i = 0; i < ARRAY_SIZE(clk_spec) - 1; ++i) @@ -1196,7 +1197,7 @@ static void sii8620_start_video(struct sii8620 *ctx) clk_spec[i].link_rate); } - sii8620_set_infoframes(ctx); + sii8620_set_infoframes(ctx, mode); } static void sii8620_disable_hpd(struct sii8620 *ctx) @@ -2242,8 +2243,6 @@ static bool sii8620_mode_fixup(struct drm_bridge *bridge, mutex_lock(&ctx->lock); ctx->use_packed_pixel = sii8620_is_packing_required(ctx, adjusted_mode); - ctx->video_code = drm_match_cea_mode(adjusted_mode); - ctx->pixel_clock = adjusted_mode->clock; mutex_unlock(&ctx->lock); From fdddc65ab35d575b42aab411b2dc687601eab680 Mon Sep 17 00:00:00 2001 From: Maciej Purski Date: Thu, 28 Jun 2018 18:44:02 +0200 Subject: [PATCH 254/382] drm/bridge/sii8620: Fix display of packed pixel modes Current implementation does not guarantee packed pixel modes working with every dongle. There are some dongles, which require selecting the output mode explicitly. Write proper values to registers in packed_pixel mode, based on how it is done in vendor's code. Select output color space: RGB (no packed pixel) or YCBCR422 (packed pixel). This reverts commit e8b92efa629dac0e70ea4145c5e70616de5f89c8 ("drm/bridge/sii8620: fix display of packed pixel modes in MHL2"). Signed-off-by: Maciej Purski Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1530204243-6370-3-git-send-email-m.purski@samsung.com --- drivers/gpu/drm/bridge/sil-sii8620.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index bd30ccf3d195..16fe7ea9d90c 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -1015,21 +1015,36 @@ static void sii8620_stop_video(struct sii8620 *ctx) static void sii8620_set_format(struct sii8620 *ctx) { + u8 out_fmt; + if (sii8620_is_mhl3(ctx)) { sii8620_setbits(ctx, REG_M3_P0CTRL, BIT_M3_P0CTRL_MHL3_P0_PIXEL_MODE_PACKED, ctx->use_packed_pixel ? ~0 : 0); } else { + if (ctx->use_packed_pixel) { + sii8620_write_seq_static(ctx, + REG_VID_MODE, BIT_VID_MODE_M1080P, + REG_MHL_TOP_CTL, BIT_MHL_TOP_CTL_MHL_PP_SEL | 1, + REG_MHLTX_CTL6, 0x60 + ); + } else { sii8620_write_seq_static(ctx, REG_VID_MODE, 0, REG_MHL_TOP_CTL, 1, REG_MHLTX_CTL6, 0xa0 ); + } } + if (ctx->use_packed_pixel) + out_fmt = VAL_TPI_FORMAT(YCBCR422, FULL); + else + out_fmt = VAL_TPI_FORMAT(RGB, FULL); + sii8620_write_seq(ctx, REG_TPI_INPUT, VAL_TPI_FORMAT(RGB, FULL), - REG_TPI_OUTPUT, VAL_TPI_FORMAT(RGB, FULL), + REG_TPI_OUTPUT, out_fmt, ); } From 44f9a4b0dc7563669772198e5b226cab58a00167 Mon Sep 17 00:00:00 2001 From: Maciej Purski Date: Thu, 28 Jun 2018 18:44:03 +0200 Subject: [PATCH 255/382] drm/bridge/sii8620: Fix link mode selection Current link mode values do not allow to enable packed pixel modes. Select packed pixel clock mode, if needed, every time the link mode register gets updated. Signed-off-by: Maciej Purski Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1530204243-6370-4-git-send-email-m.purski@samsung.com --- drivers/gpu/drm/bridge/sil-sii8620.c | 30 ++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 16fe7ea9d90c..a6e8f4591e63 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -1165,8 +1165,14 @@ static void sii8620_start_video(struct sii8620 *ctx) sii8620_set_format(ctx); if (!sii8620_is_mhl3(ctx)) { - sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE), - MHL_DST_LM_CLK_MODE_NORMAL | MHL_DST_LM_PATH_ENABLED); + u8 link_mode = MHL_DST_LM_PATH_ENABLED; + + if (ctx->use_packed_pixel) + link_mode |= MHL_DST_LM_CLK_MODE_PACKED_PIXEL; + else + link_mode |= MHL_DST_LM_CLK_MODE_NORMAL; + + sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE), link_mode); sii8620_set_auto_zone(ctx); } else { static const struct { @@ -1677,14 +1683,18 @@ static void sii8620_status_dcap_ready(struct sii8620 *ctx) static void sii8620_status_changed_path(struct sii8620 *ctx) { - if (ctx->stat[MHL_DST_LINK_MODE] & MHL_DST_LM_PATH_ENABLED) { - sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE), - MHL_DST_LM_CLK_MODE_NORMAL - | MHL_DST_LM_PATH_ENABLED); - } else { - sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE), - MHL_DST_LM_CLK_MODE_NORMAL); - } + u8 link_mode; + + if (ctx->use_packed_pixel) + link_mode = MHL_DST_LM_CLK_MODE_PACKED_PIXEL; + else + link_mode = MHL_DST_LM_CLK_MODE_NORMAL; + + if (ctx->stat[MHL_DST_LINK_MODE] & MHL_DST_LM_PATH_ENABLED) + link_mode |= MHL_DST_LM_PATH_ENABLED; + + sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE), + link_mode); } static void sii8620_msc_mr_write_stat(struct sii8620 *ctx) From 38fc4248677552ce35efc09902fdcb06b61d7ef9 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Mon, 2 Jul 2018 11:16:59 +0200 Subject: [PATCH 256/382] arm64: Use aarch64elf and aarch64elfb emulation mode variants The aarch64linux and aarch64linuxb emulation modes are not supported by bare-metal toolchains and Linux using them forbids building the kernel with these toolchains. Since there is apparently no reason to target these emulation modes, the more generic elf modes are used instead, allowing to build on bare-metal toolchains as well as the already-supported ones. Fixes: 3d6a7b99e3fa ("arm64: ensure the kernel is compiled for LP64") Cc: stable@vger.kernel.org Acked-by: Will Deacon Signed-off-by: Paul Kocialkowski Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 45272266dafb..27389adf511a 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -61,14 +61,14 @@ KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__AARCH64EB__ AS += -EB LD += -EB -LDFLAGS += -maarch64linuxb +LDFLAGS += -maarch64elfb UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian CHECKFLAGS += -D__AARCH64EL__ AS += -EL LD += -EL -LDFLAGS += -maarch64linux +LDFLAGS += -maarch64elf UTS_MACHINE := aarch64 endif From d7ef4899d7182f9d4267b4e4a5cc3689c1a04f25 Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Wed, 4 Jul 2018 21:00:14 +0530 Subject: [PATCH 257/382] sample/vfio-mdev: Change return type to vm_fault_t convert mbochs_region_vm_fault and mbochs_dmabuf_vm_fault to return vm_fault_t type. Signed-off-by: Souptick Joarder Signed-off-by: Alex Williamson --- samples/vfio-mdev/mbochs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c index 85ac6037696f..d5d5a499160c 100644 --- a/samples/vfio-mdev/mbochs.c +++ b/samples/vfio-mdev/mbochs.c @@ -657,7 +657,7 @@ static void mbochs_put_pages(struct mdev_state *mdev_state) dev_dbg(dev, "%s: %d pages released\n", __func__, count); } -static int mbochs_region_vm_fault(struct vm_fault *vmf) +static vm_fault_t mbochs_region_vm_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct mdev_state *mdev_state = vma->vm_private_data; @@ -695,7 +695,7 @@ static int mbochs_mmap(struct mdev_device *mdev, struct vm_area_struct *vma) return 0; } -static int mbochs_dmabuf_vm_fault(struct vm_fault *vmf) +static vm_fault_t mbochs_dmabuf_vm_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct mbochs_dmabuf *dmabuf = vma->vm_private_data; From d5fad48cfb4b183d87960904c177eeeb9412b929 Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 25 Jun 2018 16:49:37 +0800 Subject: [PATCH 258/382] RISC-V: Add conditional macro for zone of DMA32 The DMA32 is for 64-bit usage. Signed-off-by: Zong Li Reviewed-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index c77df8142be2..58a522f9bcc3 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -28,7 +28,9 @@ static void __init zone_sizes_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, }; +#ifdef CONFIG_ZONE_DMA32 max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G, max_low_pfn)); +#endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; free_area_init_nodes(max_zone_pfns); From 8f79125d285d2d71ed110e875754942256efa51d Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 25 Jun 2018 16:49:38 +0800 Subject: [PATCH 259/382] RISC-V: Select GENERIC_UCMPDI2 on RV32I On 32-bit, it need to use __ucmpdi2, otherwise, it can't find the __ucmpdi2 symbol. Signed-off-by: Zong Li Reviewed-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index f12680c9b947..4764fdeb4f1f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -107,6 +107,7 @@ config ARCH_RV32I select GENERIC_LIB_ASHLDI3 select GENERIC_LIB_ASHRDI3 select GENERIC_LIB_LSHRDI3 + select GENERIC_LIB_UCMPDI2 config ARCH_RV64I bool "RV64I" From c480d8911fda96a0f37634bd4dc4e2c8a87c38da Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 25 Jun 2018 16:49:39 +0800 Subject: [PATCH 260/382] RISC-V: Add definiion of extract symbol's index and type for 32-bit Use generic marco to get the index and type of symbol. Signed-off-by: Zong Li Signed-off-by: Palmer Dabbelt --- arch/riscv/include/uapi/asm/elf.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/uapi/asm/elf.h b/arch/riscv/include/uapi/asm/elf.h index 5cae4c30cd8e..1e0dfc36aab9 100644 --- a/arch/riscv/include/uapi/asm/elf.h +++ b/arch/riscv/include/uapi/asm/elf.h @@ -21,8 +21,13 @@ typedef struct user_regs_struct elf_gregset_t; typedef union __riscv_fp_state elf_fpregset_t; -#define ELF_RISCV_R_SYM(r_info) ((r_info) >> 32) -#define ELF_RISCV_R_TYPE(r_info) ((r_info) & 0xffffffff) +#if __riscv_xlen == 64 +#define ELF_RISCV_R_SYM(r_info) ELF64_R_SYM(r_info) +#define ELF_RISCV_R_TYPE(r_info) ELF64_R_TYPE(r_info) +#else +#define ELF_RISCV_R_SYM(r_info) ELF32_R_SYM(r_info) +#define ELF_RISCV_R_TYPE(r_info) ELF32_R_TYPE(r_info) +#endif /* * RISC-V relocation types From 7df85002178e708aa749c63020fd333d9f085ced Mon Sep 17 00:00:00 2001 From: Zong Li Date: Mon, 25 Jun 2018 16:49:40 +0800 Subject: [PATCH 261/382] RISC-V: Change variable type for 32-bit compatible Signed-off-by: Zong Li Reviewed-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 1d5e9b934b8c..e8c6aaa2aab4 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -37,7 +37,7 @@ static int apply_r_riscv_64_rela(struct module *me, u32 *location, Elf_Addr v) static int apply_r_riscv_branch_rela(struct module *me, u32 *location, Elf_Addr v) { - s64 offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - (void *)location; u32 imm12 = (offset & 0x1000) << (31 - 12); u32 imm11 = (offset & 0x800) >> (11 - 7); u32 imm10_5 = (offset & 0x7e0) << (30 - 10); @@ -50,7 +50,7 @@ static int apply_r_riscv_branch_rela(struct module *me, u32 *location, static int apply_r_riscv_jal_rela(struct module *me, u32 *location, Elf_Addr v) { - s64 offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - (void *)location; u32 imm20 = (offset & 0x100000) << (31 - 20); u32 imm19_12 = (offset & 0xff000); u32 imm11 = (offset & 0x800) << (20 - 11); @@ -63,7 +63,7 @@ static int apply_r_riscv_jal_rela(struct module *me, u32 *location, static int apply_r_riscv_rcv_branch_rela(struct module *me, u32 *location, Elf_Addr v) { - s64 offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - (void *)location; u16 imm8 = (offset & 0x100) << (12 - 8); u16 imm7_6 = (offset & 0xc0) >> (6 - 5); u16 imm5 = (offset & 0x20) >> (5 - 2); @@ -78,7 +78,7 @@ static int apply_r_riscv_rcv_branch_rela(struct module *me, u32 *location, static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location, Elf_Addr v) { - s64 offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - (void *)location; u16 imm11 = (offset & 0x800) << (12 - 11); u16 imm10 = (offset & 0x400) >> (10 - 8); u16 imm9_8 = (offset & 0x300) << (12 - 11); @@ -96,7 +96,7 @@ static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location, static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, Elf_Addr v) { - s64 offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - (void *)location; s32 hi20; if (offset != (s32)offset) { @@ -178,7 +178,7 @@ static int apply_r_riscv_lo12_s_rela(struct module *me, u32 *location, static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location, Elf_Addr v) { - s64 offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - (void *)location; s32 hi20; /* Always emit the got entry */ @@ -200,7 +200,7 @@ static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location, static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, Elf_Addr v) { - s64 offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - (void *)location; s32 fill_v = offset; u32 hi20, lo12; @@ -227,7 +227,7 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, static int apply_r_riscv_call_rela(struct module *me, u32 *location, Elf_Addr v) { - s64 offset = (void *)v - (void *)location; + ptrdiff_t offset = (void *)v - (void *)location; s32 fill_v = offset; u32 hi20, lo12; @@ -347,7 +347,7 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, unsigned int j; for (j = 0; j < sechdrs[relsec].sh_size / sizeof(*rel); j++) { - u64 hi20_loc = + unsigned long hi20_loc = sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[j].r_offset; u32 hi20_type = ELF_RISCV_R_TYPE(rel[j].r_info); @@ -360,12 +360,12 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, Elf_Sym *hi20_sym = (Elf_Sym *)sechdrs[symindex].sh_addr + ELF_RISCV_R_SYM(rel[j].r_info); - u64 hi20_sym_val = + unsigned long hi20_sym_val = hi20_sym->st_value + rel[j].r_addend; /* Calculate lo12 */ - u64 offset = hi20_sym_val - hi20_loc; + size_t offset = hi20_sym_val - hi20_loc; if (IS_ENABLED(CONFIG_MODULE_SECTIONS) && hi20_type == R_RISCV_GOT_HI20) { offset = module_emit_got_entry( From 781c8fe2da3d2c7c95cd7ffddbab63b80a79da4d Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Tue, 12 Jun 2018 19:26:36 +0200 Subject: [PATCH 262/382] RISC-V: fix R_RISCV_ADD32/R_RISCV_SUB32 relocations The R_RISCV_ADD32/R_RISCV_SUB32 relocations should add/subtract the address of the symbol (without overflow check), not its contents. Signed-off-by: Andreas Schwab Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 1d5e9b934b8c..6bb48315c973 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -263,14 +263,14 @@ static int apply_r_riscv_align_rela(struct module *me, u32 *location, static int apply_r_riscv_add32_rela(struct module *me, u32 *location, Elf_Addr v) { - *(u32 *)location += (*(u32 *)v); + *(u32 *)location += (u32)v; return 0; } static int apply_r_riscv_sub32_rela(struct module *me, u32 *location, Elf_Addr v) { - *(u32 *)location -= (*(u32 *)v); + *(u32 *)location -= (u32)v; return 0; } From f67f10b8a6c96ab6c1d6946d269e2ca5f9998cc2 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 19 Jun 2018 15:41:34 -0600 Subject: [PATCH 263/382] riscv: remove unnecessary of_platform_populate call The DT core will call of_platform_default_populate, so it is not necessary for arch specific code to call it unless there are custom match entries, auxdata or parent device. Neither of those apply here, so remove the call. Cc: Palmer Dabbelt Cc: Albert Ou Cc: linux-riscv@lists.infradead.org Signed-off-by: Rob Herring Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index ee44a48faf79..f0d2070866d4 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -220,8 +220,3 @@ void __init setup_arch(char **cmdline_p) riscv_fill_hwcap(); } -static int __init riscv_device_init(void) -{ - return of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); -} -subsys_initcall_sync(riscv_device_init); From 8606544890d7dc4f7a740963f70dc1e1d54f8e30 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Fri, 22 Jun 2018 15:46:28 -0700 Subject: [PATCH 264/382] RISC-V: Don't include irq-riscv-intc.h This file has never existed in the upstream kernel, but it's guarded by an #ifdef that's also never existed in the upstream kernel. As a part of our interrupt controller refactoring this header is no longer necessary, but this reference managed to sneak in anyway. Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/irq.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index b74cbfbce2d0..7bcdaed15703 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -16,10 +16,6 @@ #include #include -#ifdef CONFIG_RISCV_INTC -#include -#endif - void __init init_IRQ(void) { irqchip_init(); From 1db9b80980d26fe95c942e0bb8bde2ca715029ad Mon Sep 17 00:00:00 2001 From: Jim Wilson Date: Mon, 11 Jun 2018 14:48:22 -0700 Subject: [PATCH 265/382] RISC-V: Fix PTRACE_SETREGSET bug. In riscv_gpr_set, pass regs instead of ®s to user_regset_copyin to fix gdb segfault. Signed-off-by: Jim Wilson Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index ba3e80712797..9f82a7e34c64 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -50,7 +50,7 @@ static int riscv_gpr_set(struct task_struct *target, struct pt_regs *regs; regs = task_pt_regs(target); - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®s, 0, -1); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1); return ret; } From 2893af07e507e993ad71ca6d66a7b02be741571c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 3 Jul 2018 10:22:00 +0900 Subject: [PATCH 266/382] arm64: add endianness option to LDFLAGS instead of LD With the recent syntax extension, Kconfig is now able to evaluate the compiler / toolchain capability. However, accumulating flags to 'LD' is not compatible with the way it works; 'LD' must be passed to Kconfig to call $(ld-option,...) from Kconfig files. If you tweak 'LD' in arch Makefile depending on CONFIG_CPU_BIG_ENDIAN, this would end up with circular dependency between Makefile and Kconfig. Acked-by: Will Deacon Signed-off-by: Masahiro Yamada Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 27389adf511a..48158c550110 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -60,15 +60,13 @@ ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__AARCH64EB__ AS += -EB -LD += -EB -LDFLAGS += -maarch64elfb +LDFLAGS += -EB -maarch64elfb UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian CHECKFLAGS += -D__AARCH64EL__ AS += -EL -LD += -EL -LDFLAGS += -maarch64elf +LDFLAGS += -EL -maarch64elf UTS_MACHINE := aarch64 endif From 99ec9e77511dea55d81729fc80b6c63a61bfa8e0 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 3 Jun 2018 16:40:54 +0200 Subject: [PATCH 267/382] drm/udl: fix display corruption of the last line The displaylink hardware has such a peculiarity that it doesn't render a command until next command is received. This produces occasional corruption, such as when setting 22x11 font on the console, only the first line of the cursor will be blinking if the cursor is located at some specific columns. When we end up with a repeating pixel, the driver has a bug that it leaves one uninitialized byte after the command (and this byte is enough to flush the command and render it - thus it fixes the screen corruption), however whe we end up with a non-repeating pixel, there is no byte appended and this results in temporary screen corruption. This patch fixes the screen corruption by always appending a byte 0xAF at the end of URB. It also removes the uninitialized byte. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/udl/udl_fb.c | 5 ++++- drivers/gpu/drm/udl/udl_transfer.c | 11 +++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index 2ebdc6d5a76e..d5583190f3e4 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c @@ -137,7 +137,10 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y, if (cmd > (char *) urb->transfer_buffer) { /* Send partial buffer remaining before exiting */ - int len = cmd - (char *) urb->transfer_buffer; + int len; + if (cmd < (char *) urb->transfer_buffer + urb->transfer_buffer_length) + *cmd++ = 0xAF; + len = cmd - (char *) urb->transfer_buffer; ret = udl_submit_urb(dev, urb, len); bytes_sent += len; } else diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c index 0c87b1ac6b68..b992644c17e6 100644 --- a/drivers/gpu/drm/udl/udl_transfer.c +++ b/drivers/gpu/drm/udl/udl_transfer.c @@ -153,11 +153,11 @@ static void udl_compress_hline16( raw_pixels_count_byte = cmd++; /* we'll know this later */ raw_pixel_start = pixel; - cmd_pixel_end = pixel + (min(MAX_CMD_PIXELS + 1, - min((int)(pixel_end - pixel) / bpp, - (int)(cmd_buffer_end - cmd) / 2))) * bpp; + cmd_pixel_end = pixel + min3(MAX_CMD_PIXELS + 1UL, + (unsigned long)(pixel_end - pixel) / bpp, + (unsigned long)(cmd_buffer_end - 1 - cmd) / 2) * bpp; - prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp); + prefetch_range((void *) pixel, cmd_pixel_end - pixel); pixel_val16 = get_pixel_val16(pixel, bpp); while (pixel < cmd_pixel_end) { @@ -193,6 +193,9 @@ static void udl_compress_hline16( if (pixel > raw_pixel_start) { /* finalize last RAW span */ *raw_pixels_count_byte = ((pixel-raw_pixel_start) / bpp) & 0xFF; + } else { + /* undo unused byte */ + cmd--; } *cmd_pixels_count_byte = ((pixel - cmd_pixel_start) / bpp) & 0xFF; From b8ba3578752b7aae5b6f8bd235873ea112969ff9 Mon Sep 17 00:00:00 2001 From: Stefan Mavrodiev Date: Wed, 4 Jul 2018 09:28:59 +0300 Subject: [PATCH 268/382] mmc: sunxi: Disable irq during pm_suspend When mmc host controller enters suspend state, the clocks are disabled, but irqs are not. For some reason the irqchip emits false interrupts, which causes system lock loop. Debug log is: ... sunxi-mmc 1c11000.mmc: setting clk to 52000000, rounded 51200000 sunxi-mmc 1c11000.mmc: enabling the clock sunxi-mmc 1c11000.mmc: cmd 13(8000014d) arg 10000 ie 0x0000bbc6 len 0 sunxi-mmc 1c11000.mmc: irq: rq (ptrval) mi 00000004 idi 00000000 sunxi-mmc 1c11000.mmc: cmd 6(80000146) arg 3210101 ie 0x0000bbc6 len 0 sunxi-mmc 1c11000.mmc: irq: rq (ptrval) mi 00000004 idi 00000000 sunxi-mmc 1c11000.mmc: cmd 13(8000014d) arg 10000 ie 0x0000bbc6 len 0 sunxi-mmc 1c11000.mmc: irq: rq (ptrval) mi 00000004 idi 00000000 mmc1: new DDR MMC card at address 0001 mmcblk1: mmc1:0001 AGND3R 14.6 GiB mmcblk1boot0: mmc1:0001 AGND3R partition 1 4.00 MiB mmcblk1boot1: mmc1:0001 AGND3R partition 2 4.00 MiB sunxi-mmc 1c11000.mmc: cmd 18(80003352) arg 0 ie 0x0000fbc2 len 409 sunxi-mmc 1c11000.mmc: irq: rq (ptrval) mi 00004000 idi 00000002 mmcblk1: p1 sunxi-mmc 1c11000.mmc: irq: rq (null) mi 00000000 idi 00000000 sunxi-mmc 1c11000.mmc: irq: rq (null) mi 00000000 idi 00000000 sunxi-mmc 1c11000.mmc: irq: rq (null) mi 00000000 idi 00000000 sunxi-mmc 1c11000.mmc: irq: rq (null) mi 00000000 idi 00000000 and so on... This issue apears on eMMC cards, routed on MMC2 slot. The patch is tested with A20-OLinuXino-MICRO/LIME/LIME2 boards. Fixes: 9a8e1e8cc2c0 ("mmc: sunxi: Add runtime_pm support") Signed-off-by: Stefan Mavrodiev Acked-by: Maxime Ripard Signed-off-by: Ulf Hansson --- drivers/mmc/host/sunxi-mmc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c index e7472590f2ed..8e7f3e35ee3d 100644 --- a/drivers/mmc/host/sunxi-mmc.c +++ b/drivers/mmc/host/sunxi-mmc.c @@ -1446,6 +1446,7 @@ static int sunxi_mmc_runtime_resume(struct device *dev) sunxi_mmc_init_host(host); sunxi_mmc_set_bus_width(host, mmc->ios.bus_width); sunxi_mmc_set_clk(host, &mmc->ios); + enable_irq(host->irq); return 0; } @@ -1455,6 +1456,12 @@ static int sunxi_mmc_runtime_suspend(struct device *dev) struct mmc_host *mmc = dev_get_drvdata(dev); struct sunxi_mmc_host *host = mmc_priv(mmc); + /* + * When clocks are off, it's possible receiving + * fake interrupts, which will stall the system. + * Disabling the irq will prevent this. + */ + disable_irq(host->irq); sunxi_mmc_reset_host(host); sunxi_mmc_disable(host); From 2c83a726d6fbb5d130d8f2edd82a258adb675ac3 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 27 Jun 2018 15:58:13 +0200 Subject: [PATCH 269/382] drm/etnaviv: bring back progress check in job timeout handler When the hangcheck handler was replaced by the DRM scheduler timeout handling we dropped the forward progress check, as this might allow clients to hog the GPU for a long time with a big job. It turns out that even reasonably well behaved clients like the Armada Xorg driver occasionally trip over the 500ms timeout. Bring back the forward progress check to get rid of the userspace regression. We would still like to fix userspace to submit smaller batches if possible, but that is for another day. Cc: Fixes: 6d7a20c07760 (drm/etnaviv: replace hangcheck with scheduler timeout) Reported-by: Russell King Signed-off-by: Lucas Stach Reviewed-by: Eric Anholt --- drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 3 +++ drivers/gpu/drm/etnaviv/etnaviv_sched.c | 24 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index dd430f0f8ff5..90f17ff7888e 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -131,6 +131,9 @@ struct etnaviv_gpu { struct work_struct sync_point_work; int sync_point_event; + /* hang detection */ + u32 hangcheck_dma_addr; + void __iomem *mmio; int irq; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index a74eb57af15b..50d6b88cb7aa 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -10,6 +10,7 @@ #include "etnaviv_gem.h" #include "etnaviv_gpu.h" #include "etnaviv_sched.h" +#include "state.xml.h" static int etnaviv_job_hang_limit = 0; module_param_named(job_hang_limit, etnaviv_job_hang_limit, int , 0444); @@ -85,6 +86,29 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job) { struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job); struct etnaviv_gpu *gpu = submit->gpu; + u32 dma_addr; + int change; + + /* + * If the GPU managed to complete this jobs fence, the timout is + * spurious. Bail out. + */ + if (fence_completed(gpu, submit->out_fence->seqno)) + return; + + /* + * If the GPU is still making forward progress on the front-end (which + * should never loop) we shift out the timeout to give it a chance to + * finish the job. + */ + dma_addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS); + change = dma_addr - gpu->hangcheck_dma_addr; + if (change < 0 || change > 16) { + gpu->hangcheck_dma_addr = dma_addr; + schedule_delayed_work(&sched_job->work_tdr, + sched_job->sched->timeout); + return; + } /* block scheduler */ kthread_park(gpu->sched.thread); From a6311be8f062db5fe93a08a3722fae53a58f2499 Mon Sep 17 00:00:00 2001 From: Mikita Lipski Date: Wed, 4 Jul 2018 17:26:26 -0400 Subject: [PATCH 270/382] drm/amd/display: adding ycbcr420 pixel encoding for hdmi [why] HDMI EDID's VSDB contains spectial timings for specifically YCbCr 4:2:0 colour space. In those cases we need to verify if the mode provided is one of the special ones has to use YCbCr 4:2:0 pixel encoding for display info. [how] Verify if the mode is using specific ycbcr420 colour space with the help of DRM helper function and assign the mode to use ycbcr420 pixel encoding. Tested-by: Mike Lothian Reviewed-by: Harry Wentland Signed-off-by: Mikita Lipski Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3a8d6356afc2..f97dfc13a8f0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2183,6 +2183,7 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, const struct drm_connector *connector) { struct dc_crtc_timing *timing_out = &stream->timing; + const struct drm_display_info *info = &connector->display_info; memset(timing_out, 0, sizeof(struct dc_crtc_timing)); @@ -2191,8 +2192,10 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, timing_out->v_border_top = 0; timing_out->v_border_bottom = 0; /* TODO: un-hardcode */ - - if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCRCB444) + if (drm_mode_is_420_only(info, mode_in) + && stream->sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A) + timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR420; + else if ((connector->display_info.color_formats & DRM_COLOR_FORMAT_YCRCB444) && stream->sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A) timing_out->pixel_encoding = PIXEL_ENCODING_YCBCR444; else From e2a46a48b94d143b7fabd9da7d45eef1a0799986 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Jul 2018 12:27:53 +0200 Subject: [PATCH 271/382] MAINTAINERS: Add myself as driver core changes reviewer I really need to look at driver core changes before they are applied due to PM dependencies and they sometimes get lost in the LKML traffic, so add myself as an official driver core reviewer. Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 6cfd16790add..68aa943a672c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4456,6 +4456,7 @@ F: Documentation/blockdev/drbd/ DRIVER CORE, KOBJECTS, DEBUGFS AND SYSFS M: Greg Kroah-Hartman +R: "Rafael J. Wysocki" T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git S: Supported F: Documentation/kobject.txt From 413ff0b942481c7ac2e800abbbac5af318a65e61 Mon Sep 17 00:00:00 2001 From: Mikita Lipski Date: Wed, 4 Jul 2018 17:27:56 -0400 Subject: [PATCH 272/382] drm/amd/display: add a check for display depth validity [why] HDMI 2.0 fails to validate 4K@60 timing with 10 bpc [how] Adding a helper function that would verify if the display depth assigned would pass a bandwidth validation. Drop the display depth by one level till calculated pixel clk is lower than maximum TMDS clk. Bugzilla: https://bugs.freedesktop.org/106959 Tested-by: Mike Lothian Reviewed-by: Harry Wentland Signed-off-by: Mikita Lipski Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f97dfc13a8f0..770c6b24be0b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2175,6 +2175,46 @@ get_output_color_space(const struct dc_crtc_timing *dc_crtc_timing) return color_space; } +static void reduce_mode_colour_depth(struct dc_crtc_timing *timing_out) +{ + if (timing_out->display_color_depth <= COLOR_DEPTH_888) + return; + + timing_out->display_color_depth--; +} + +static void adjust_colour_depth_from_display_info(struct dc_crtc_timing *timing_out, + const struct drm_display_info *info) +{ + int normalized_clk; + if (timing_out->display_color_depth <= COLOR_DEPTH_888) + return; + do { + normalized_clk = timing_out->pix_clk_khz; + /* YCbCr 4:2:0 requires additional adjustment of 1/2 */ + if (timing_out->pixel_encoding == PIXEL_ENCODING_YCBCR420) + normalized_clk /= 2; + /* Adjusting pix clock following on HDMI spec based on colour depth */ + switch (timing_out->display_color_depth) { + case COLOR_DEPTH_101010: + normalized_clk = (normalized_clk * 30) / 24; + break; + case COLOR_DEPTH_121212: + normalized_clk = (normalized_clk * 36) / 24; + break; + case COLOR_DEPTH_161616: + normalized_clk = (normalized_clk * 48) / 24; + break; + default: + return; + } + if (normalized_clk <= info->max_tmds_clock) + return; + reduce_mode_colour_depth(timing_out); + + } while (timing_out->display_color_depth > COLOR_DEPTH_888); + +} /*****************************************************************************/ static void @@ -2231,6 +2271,8 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, stream->out_transfer_func->type = TF_TYPE_PREDEFINED; stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB; + if (stream->sink->sink_signal == SIGNAL_TYPE_HDMI_TYPE_A) + adjust_colour_depth_from_display_info(timing_out, info); } static void fill_audio_info(struct audio_info *audio_info, From 1a381d4a0a9a0f999a13faaba22bf6b3fc80dcb9 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Wed, 27 Jun 2018 12:46:14 -0700 Subject: [PATCH 273/382] arm64: remove no-op -p linker flag Linking the ARM64 defconfig kernel with LLVM lld fails with the error: ld.lld: error: unknown argument: -p Makefile:1015: recipe for target 'vmlinux' failed Without this flag, the ARM64 defconfig kernel successfully links with lld and boots on Dragonboard 410c. After digging through binutils source and changelogs, it turns out that -p is only relevant to ancient binutils installations targeting 32-bit ARM. binutils accepts -p for AArch64 too, but it's always been undocumented and silently ignored. A comment in ld/emultempl/aarch64elf.em explains that it's "Only here for backwards compatibility". Since this flag is a no-op on ARM64, we can safely drop it. Acked-by: Will Deacon Reviewed-by: Nick Desaulniers Signed-off-by: Greg Hackmann Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 48158c550110..7976d2d242fa 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux :=-p --no-undefined -X +LDFLAGS_vmlinux :=--no-undefined -X CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) GZFLAGS :=-9 From d02d21ea007b6b33cdaf15c2f84fb1fea996ecc2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 4 Jul 2018 18:17:51 -0700 Subject: [PATCH 274/382] autofs: rename 'autofs' module back to 'autofs4' It turns out that systemd has a bug: it wants to load the autofs module early because of some initialization ordering with udev, and it doesn't do that correctly. Everywhere else it does the proper "look up module name" that does the proper alias resolution, but in that early code, it just uses a hardcoded "autofs4" for the module name. The result of that is that as of commit a2225d931f75 ("autofs: remove left-over autofs4 stubs"), you get systemd[1]: Failed to insert module 'autofs4': No such file or directory in the system logs, and a lack of module loading. All this despite the fact that we had very clearly marked 'autofs4' as an alias for this module. What's so ridiculous about this is that literally everything else does the module alias handling correctly, including really old versions of systemd (that just used 'modprobe' to do this), and even all the other systemd module loading code. Only that special systemd early module load code is broken, hardcoding the module names for not just 'autofs4', but also "ipv6", "unix", "ip_tables" and "virtio_rng". Very annoying. Instead of creating an _additional_ separate compatibility 'autofs4' module, just rely on the fact that everybody else gets this right, and just call the module 'autofs4' for compatibility reasons, with 'autofs' as the alias name. That will allow the systemd people to fix their bugs, adding the proper alias handling, and maybe even fix the name of the module to be just "autofs" (so that they can _test_ the alias handling). And eventually, we can revert this silly compatibility hack. See also https://github.com/systemd/systemd/issues/9501 https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=902946 for the systemd bug reports upstream and in the Debian bug tracker respectively. Fixes: a2225d931f75 ("autofs: remove left-over autofs4 stubs") Reported-by: Ben Hutchings Reported-by: Michael Biebl Cc: Ian Kent Signed-off-by: Linus Torvalds --- fs/autofs/Makefile | 4 ++-- fs/autofs/init.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/autofs/Makefile b/fs/autofs/Makefile index 43fedde15c26..1f85d35ec8b7 100644 --- a/fs/autofs/Makefile +++ b/fs/autofs/Makefile @@ -2,6 +2,6 @@ # Makefile for the linux autofs-filesystem routines. # -obj-$(CONFIG_AUTOFS_FS) += autofs.o +obj-$(CONFIG_AUTOFS_FS) += autofs4.o -autofs-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o +autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o diff --git a/fs/autofs/init.c b/fs/autofs/init.c index cc9447e1903f..79ae07d9592f 100644 --- a/fs/autofs/init.c +++ b/fs/autofs/init.c @@ -23,7 +23,7 @@ static struct file_system_type autofs_fs_type = { .kill_sb = autofs_kill_sb, }; MODULE_ALIAS_FS("autofs"); -MODULE_ALIAS("autofs4"); +MODULE_ALIAS("autofs"); static int __init init_autofs_fs(void) { From 696e420bb2a6624478105651d5368d45b502b324 Mon Sep 17 00:00:00 2001 From: Lars Persson Date: Mon, 25 Jun 2018 14:05:25 +0200 Subject: [PATCH 275/382] cifs: Fix use after free of a mid_q_entry With protocol version 2.0 mounts we have seen crashes with corrupt mid entries. Either the server->pending_mid_q list becomes corrupt with a cyclic reference in one element or a mid object fetched by the demultiplexer thread becomes overwritten during use. Code review identified a race between the demultiplexer thread and the request issuing thread. The demultiplexer thread seems to be written with the assumption that it is the sole user of the mid object until it calls the mid callback which either wakes the issuer task or deletes the mid. This assumption is not true because the issuer task can be woken up earlier by a signal. If the demultiplexer thread has proceeded as far as setting the mid_state to MID_RESPONSE_RECEIVED then the issuer thread will happily end up calling cifs_delete_mid while the demultiplexer thread still is using the mid object. Inserting a delay in the cifs demultiplexer thread widens the race window and makes reproduction of the race very easy: if (server->large_buf) buf = server->bigbuf; + usleep_range(500, 4000); server->lstrp = jiffies; To resolve this I think the proper solution involves putting a reference count on the mid object. This patch makes sure that the demultiplexer thread holds a reference until it has finished processing the transaction. Cc: stable@vger.kernel.org Signed-off-by: Lars Persson Acked-by: Paulo Alcantara Reviewed-by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 1 + fs/cifs/cifsproto.h | 1 + fs/cifs/connect.c | 8 +++++++- fs/cifs/smb1ops.c | 1 + fs/cifs/smb2ops.c | 1 + fs/cifs/smb2transport.c | 1 + fs/cifs/transport.c | 18 +++++++++++++++++- 7 files changed, 29 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index bd78da59a4fd..a2962fd41c6f 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1416,6 +1416,7 @@ typedef int (mid_handle_t)(struct TCP_Server_Info *server, /* one of these for every pending CIFS request to the server */ struct mid_q_entry { struct list_head qhead; /* mids waiting on reply from this server */ + struct kref refcount; struct TCP_Server_Info *server; /* server corresponding to this mid */ __u64 mid; /* multiplex id */ __u32 pid; /* process id */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 03018be17283..1890f534c88b 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -82,6 +82,7 @@ extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server); extern void DeleteMidQEntry(struct mid_q_entry *midEntry); extern void cifs_delete_mid(struct mid_q_entry *mid); +extern void cifs_mid_q_entry_release(struct mid_q_entry *midEntry); extern void cifs_wake_up_task(struct mid_q_entry *mid); extern int cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a57da1b88bdf..5df2c0698cda 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -924,6 +924,7 @@ cifs_demultiplex_thread(void *p) server->pdu_size = next_offset; } + mid_entry = NULL; if (server->ops->is_transform_hdr && server->ops->receive_transform && server->ops->is_transform_hdr(buf)) { @@ -938,8 +939,11 @@ cifs_demultiplex_thread(void *p) length = mid_entry->receive(server, mid_entry); } - if (length < 0) + if (length < 0) { + if (mid_entry) + cifs_mid_q_entry_release(mid_entry); continue; + } if (server->large_buf) buf = server->bigbuf; @@ -956,6 +960,8 @@ cifs_demultiplex_thread(void *p) if (!mid_entry->multiRsp || mid_entry->multiEnd) mid_entry->callback(mid_entry); + + cifs_mid_q_entry_release(mid_entry); } else if (server->ops->is_oplock_break && server->ops->is_oplock_break(buf, server)) { cifs_dbg(FYI, "Received oplock break\n"); diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index aff8ce8ba34d..646dcd149de1 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -107,6 +107,7 @@ cifs_find_mid(struct TCP_Server_Info *server, char *buffer) if (compare_mid(mid->mid, buf) && mid->mid_state == MID_REQUEST_SUBMITTED && le16_to_cpu(mid->command) == buf->Command) { + kref_get(&mid->refcount); spin_unlock(&GlobalMid_Lock); return mid; } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 0356b5559c71..e9216ce88796 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -203,6 +203,7 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf) if ((mid->mid == wire_mid) && (mid->mid_state == MID_REQUEST_SUBMITTED) && (mid->command == shdr->Command)) { + kref_get(&mid->refcount); spin_unlock(&GlobalMid_Lock); return mid; } diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 51b9437c3c7b..50592976dcb4 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -548,6 +548,7 @@ smb2_mid_entry_alloc(const struct smb2_sync_hdr *shdr, temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); memset(temp, 0, sizeof(struct mid_q_entry)); + kref_init(&temp->refcount); temp->mid = le64_to_cpu(shdr->MessageId); temp->pid = current->pid; temp->command = shdr->Command; /* Always LE */ diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index fb57dfbfb749..208ecb830466 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -61,6 +61,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); memset(temp, 0, sizeof(struct mid_q_entry)); + kref_init(&temp->refcount); temp->mid = get_mid(smb_buffer); temp->pid = current->pid; temp->command = cpu_to_le16(smb_buffer->Command); @@ -82,6 +83,21 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server) return temp; } +static void _cifs_mid_q_entry_release(struct kref *refcount) +{ + struct mid_q_entry *mid = container_of(refcount, struct mid_q_entry, + refcount); + + mempool_free(mid, cifs_mid_poolp); +} + +void cifs_mid_q_entry_release(struct mid_q_entry *midEntry) +{ + spin_lock(&GlobalMid_Lock); + kref_put(&midEntry->refcount, _cifs_mid_q_entry_release); + spin_unlock(&GlobalMid_Lock); +} + void DeleteMidQEntry(struct mid_q_entry *midEntry) { @@ -110,7 +126,7 @@ DeleteMidQEntry(struct mid_q_entry *midEntry) } } #endif - mempool_free(midEntry, cifs_mid_poolp); + cifs_mid_q_entry_release(midEntry); } void From 27c32b49c3dbfe1e5f57d2b61823bf9474ae0875 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Sat, 23 Jun 2018 14:52:23 -0300 Subject: [PATCH 276/382] cifs: Fix validation of signed data in smb3+ Fixes: c713c8770fa5 ("cifs: push rfc1002 generation down the stack") We failed to validate signed data returned by the server because __cifs_calc_signature() now expects to sign the actual data in iov but we were also passing down the rfc1002 length. Fix smb3_calc_signature() to calculate signature of rfc1002 length prior to passing only the actual data iov[1-N] to __cifs_calc_signature(). In addition, there are a few cases where no rfc1002 length is passed so we make sure there's one (iov_len == 4). Signed-off-by: Paulo Alcantara Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2transport.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 50592976dcb4..1af46ca5a951 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -408,12 +408,14 @@ generate_smb311signingkey(struct cifs_ses *ses) int smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) { - int rc = 0; + int rc; unsigned char smb3_signature[SMB2_CMACAES_SIZE]; unsigned char *sigptr = smb3_signature; struct kvec *iov = rqst->rq_iov; struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base; struct cifs_ses *ses; + struct shash_desc *shash = &server->secmech.sdesccmacaes->shash; + struct smb_rqst drqst; ses = smb2_find_smb_ses(server, shdr->SessionId); if (!ses) { @@ -425,8 +427,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE); rc = crypto_shash_setkey(server->secmech.cmacaes, - ses->smb3signingkey, SMB2_CMACAES_SIZE); - + ses->smb3signingkey, SMB2_CMACAES_SIZE); if (rc) { cifs_dbg(VFS, "%s: Could not set key for cmac aes\n", __func__); return rc; @@ -437,15 +438,33 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) * so unlike smb2 case we do not have to check here if secmech are * initialized */ - rc = crypto_shash_init(&server->secmech.sdesccmacaes->shash); + rc = crypto_shash_init(shash); if (rc) { cifs_dbg(VFS, "%s: Could not init cmac aes\n", __func__); return rc; } - rc = __cifs_calc_signature(rqst, server, sigptr, - &server->secmech.sdesccmacaes->shash); + /* + * For SMB2+, __cifs_calc_signature() expects to sign only the actual + * data, that is, iov[0] should not contain a rfc1002 length. + * + * Sign the rfc1002 length prior to passing the data (iov[1-N]) down to + * __cifs_calc_signature(). + */ + drqst = *rqst; + if (drqst.rq_nvec >= 2 && iov[0].iov_len == 4) { + rc = crypto_shash_update(shash, iov[0].iov_base, + iov[0].iov_len); + if (rc) { + cifs_dbg(VFS, "%s: Could not update with payload\n", + __func__); + return rc; + } + drqst.rq_iov++; + drqst.rq_nvec--; + } + rc = __cifs_calc_signature(&drqst, server, sigptr, shash); if (!rc) memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE); From 8de8c4608fe9edc046c31bf82b2b7ebc1daae015 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Sat, 23 Jun 2018 14:52:24 -0300 Subject: [PATCH 277/382] cifs: Fix validation of signed data in smb2 Fixes: c713c8770fa5 ("cifs: push rfc1002 generation down the stack") We failed to validate signed data returned by the server because __cifs_calc_signature() now expects to sign the actual data in iov but we were also passing down the rfc1002 length. Fix smb3_calc_signature() to calculate signature of rfc1002 length prior to passing only the actual data iov[1-N] to __cifs_calc_signature(). In addition, there are a few cases where no rfc1002 length is passed so we make sure there's one (iov_len == 4). Signed-off-by: Paulo Alcantara Reviewed-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/smb2transport.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 1af46ca5a951..719d55e63d88 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -173,6 +173,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) struct kvec *iov = rqst->rq_iov; struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base; struct cifs_ses *ses; + struct shash_desc *shash = &server->secmech.sdeschmacsha256->shash; + struct smb_rqst drqst; ses = smb2_find_smb_ses(server, shdr->SessionId); if (!ses) { @@ -190,21 +192,39 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) } rc = crypto_shash_setkey(server->secmech.hmacsha256, - ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE); + ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE); if (rc) { cifs_dbg(VFS, "%s: Could not update with response\n", __func__); return rc; } - rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash); + rc = crypto_shash_init(shash); if (rc) { cifs_dbg(VFS, "%s: Could not init sha256", __func__); return rc; } - rc = __cifs_calc_signature(rqst, server, sigptr, - &server->secmech.sdeschmacsha256->shash); + /* + * For SMB2+, __cifs_calc_signature() expects to sign only the actual + * data, that is, iov[0] should not contain a rfc1002 length. + * + * Sign the rfc1002 length prior to passing the data (iov[1-N]) down to + * __cifs_calc_signature(). + */ + drqst = *rqst; + if (drqst.rq_nvec >= 2 && iov[0].iov_len == 4) { + rc = crypto_shash_update(shash, iov[0].iov_base, + iov[0].iov_len); + if (rc) { + cifs_dbg(VFS, "%s: Could not update with payload\n", + __func__); + return rc; + } + drqst.rq_iov++; + drqst.rq_nvec--; + } + rc = __cifs_calc_signature(&drqst, server, sigptr, shash); if (!rc) memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE); From 81f39f951b8098b1c59b01ad10d06d7dc01c7019 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 28 Jun 2018 10:47:14 +1000 Subject: [PATCH 278/382] cifs: fix SMB1 breakage SMB1 mounting broke in commit 35e2cc1ba755 ("cifs: Use correct packet length in SMB2_TRANSFORM header") Fix it and also rename smb2_rqst_len to smb_rqst_len to make it less unobvious that the function is also called from CIFS/SMB1 Good job by Paulo reviewing and cleaning up Ronnie's original patch. Signed-off-by: Ronnie Sahlberg Reviewed-by: Paulo Alcantara Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 2 +- fs/cifs/smb2proto.h | 4 ++-- fs/cifs/smbdirect.c | 5 +++-- fs/cifs/smbdirect.h | 4 ++-- fs/cifs/transport.c | 9 +++++---- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index e9216ce88796..04578f6e306b 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2522,7 +2522,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, struct smb_rqst *new_rq, if (!tr_hdr) goto err_free_iov; - orig_len = smb2_rqst_len(old_rq, false); + orig_len = smb_rqst_len(server, old_rq); /* fill the 2nd iov with a transform header */ fill_transform_hdr(tr_hdr, orig_len, old_rq); diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 3ae208ac2a77..6e6a4f2ec890 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -113,8 +113,8 @@ extern int smb2_unlock_range(struct cifsFileInfo *cfile, extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile); extern void smb2_reconnect_server(struct work_struct *work); extern int smb3_crypto_aead_allocate(struct TCP_Server_Info *server); -extern unsigned long -smb2_rqst_len(struct smb_rqst *rqst, bool skip_rfc1002_marker); +extern unsigned long smb_rqst_len(struct TCP_Server_Info *server, + struct smb_rqst *rqst); /* * SMB2 Worker functions - most of protocol specific implementation details diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 6fd94d9ffac2..c55ea4e6201b 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -2083,8 +2083,9 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg) * rqst: the data to write * return value: 0 if successfully write, otherwise error code */ -int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) +int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst) { + struct smbd_connection *info = server->smbd_conn; struct kvec vec; int nvecs; int size; @@ -2118,7 +2119,7 @@ int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) * rq_tailsz to PAGE_SIZE when the buffer has multiple pages and * ends at page boundary */ - buflen = smb2_rqst_len(rqst, true); + buflen = smb_rqst_len(server, rqst); if (buflen + sizeof(struct smbd_data_transfer) > info->max_fragmented_send_size) { diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h index 1e419c21dc60..a11096254f29 100644 --- a/fs/cifs/smbdirect.h +++ b/fs/cifs/smbdirect.h @@ -292,7 +292,7 @@ void smbd_destroy(struct smbd_connection *info); /* Interface for carrying upper layer I/O through send/recv */ int smbd_recv(struct smbd_connection *info, struct msghdr *msg); -int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst); +int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst); enum mr_state { MR_READY, @@ -332,7 +332,7 @@ static inline void *smbd_get_connection( static inline int smbd_reconnect(struct TCP_Server_Info *server) {return -1; } static inline void smbd_destroy(struct smbd_connection *info) {} static inline int smbd_recv(struct smbd_connection *info, struct msghdr *msg) {return -1; } -static inline int smbd_send(struct smbd_connection *info, struct smb_rqst *rqst) {return -1; } +static inline int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst) {return -1; } #endif #endif diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 208ecb830466..a341ec839c83 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -218,14 +218,15 @@ smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg, } unsigned long -smb2_rqst_len(struct smb_rqst *rqst, bool skip_rfc1002_marker) +smb_rqst_len(struct TCP_Server_Info *server, struct smb_rqst *rqst) { unsigned int i; struct kvec *iov; int nvec; unsigned long buflen = 0; - if (skip_rfc1002_marker && rqst->rq_iov[0].iov_len == 4) { + if (server->vals->header_preamble_size == 0 && + rqst->rq_nvec >= 2 && rqst->rq_iov[0].iov_len == 4) { iov = &rqst->rq_iov[1]; nvec = rqst->rq_nvec - 1; } else { @@ -276,7 +277,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, __be32 rfc1002_marker; if (cifs_rdma_enabled(server) && server->smbd_conn) { - rc = smbd_send(server->smbd_conn, rqst); + rc = smbd_send(server, rqst); goto smbd_done; } if (ssocket == NULL) @@ -287,7 +288,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, (char *)&val, sizeof(val)); for (j = 0; j < num_rqst; j++) - send_length += smb2_rqst_len(&rqst[j], true); + send_length += smb_rqst_len(server, &rqst[j]); rfc1002_marker = cpu_to_be32(send_length); /* Generate a rfc1002 marker for SMB2+ */ From 6aa0c114eceec8cc61715f74a4ce91b048d7561c Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 4 Jul 2018 14:16:16 -0300 Subject: [PATCH 279/382] cifs: Fix memory leak in smb2_set_ea() This patch fixes a memory leak when doing a setxattr(2) in SMB2+. Signed-off-by: Paulo Alcantara Cc: stable@vger.kernel.org Signed-off-by: Steve French Reviewed-by: Aurelien Aptel --- fs/cifs/smb2ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 04578f6e306b..1d5985bd760b 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -856,6 +856,8 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, rc = SMB2_set_ea(xid, tcon, fid.persistent_fid, fid.volatile_fid, ea, len); + kfree(ea); + SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); return rc; From f46ecbd97f508e68a7806291a139499794874f3d Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 5 Jul 2018 11:46:42 +0200 Subject: [PATCH 280/382] cifs: Fix slab-out-of-bounds in send_set_info() on SMB2 ACE setting A "small" CIFS buffer is not big enough in general to hold a setacl request for SMB2, and we end up overflowing the buffer in send_set_info(). For instance: # mount.cifs //127.0.0.1/test /mnt/test -o username=test,password=test,nounix,cifsacl # touch /mnt/test/acltest # getcifsacl /mnt/test/acltest REVISION:0x1 CONTROL:0x9004 OWNER:S-1-5-21-2926364953-924364008-418108241-1000 GROUP:S-1-22-2-1001 ACL:S-1-5-21-2926364953-924364008-418108241-1000:ALLOWED/0x0/0x1e01ff ACL:S-1-22-2-1001:ALLOWED/0x0/R ACL:S-1-22-2-1001:ALLOWED/0x0/R ACL:S-1-5-21-2926364953-924364008-418108241-1000:ALLOWED/0x0/0x1e01ff ACL:S-1-1-0:ALLOWED/0x0/R # setcifsacl -a "ACL:S-1-22-2-1004:ALLOWED/0x0/R" /mnt/test/acltest this setacl will cause the following KASAN splat: [ 330.777927] BUG: KASAN: slab-out-of-bounds in send_set_info+0x4dd/0xc20 [cifs] [ 330.779696] Write of size 696 at addr ffff88010d5e2860 by task setcifsacl/1012 [ 330.781882] CPU: 1 PID: 1012 Comm: setcifsacl Not tainted 4.18.0-rc2+ #2 [ 330.783140] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 330.784395] Call Trace: [ 330.784789] dump_stack+0xc2/0x16b [ 330.786777] print_address_description+0x6a/0x270 [ 330.787520] kasan_report+0x258/0x380 [ 330.788845] memcpy+0x34/0x50 [ 330.789369] send_set_info+0x4dd/0xc20 [cifs] [ 330.799511] SMB2_set_acl+0x76/0xa0 [cifs] [ 330.801395] set_smb2_acl+0x7ac/0xf30 [cifs] [ 330.830888] cifs_xattr_set+0x963/0xe40 [cifs] [ 330.840367] __vfs_setxattr+0x84/0xb0 [ 330.842060] __vfs_setxattr_noperm+0xe6/0x370 [ 330.843848] vfs_setxattr+0xc2/0xd0 [ 330.845519] setxattr+0x258/0x320 [ 330.859211] path_setxattr+0x15b/0x1b0 [ 330.864392] __x64_sys_setxattr+0xc0/0x160 [ 330.866133] do_syscall_64+0x14e/0x4b0 [ 330.876631] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 330.878503] RIP: 0033:0x7ff2e507db0a [ 330.880151] Code: 48 8b 0d 89 93 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 bc 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 56 93 2c 00 f7 d8 64 89 01 48 [ 330.885358] RSP: 002b:00007ffdc4903c18 EFLAGS: 00000246 ORIG_RAX: 00000000000000bc [ 330.887733] RAX: ffffffffffffffda RBX: 000055d1170de140 RCX: 00007ff2e507db0a [ 330.890067] RDX: 000055d1170de7d0 RSI: 000055d115b39184 RDI: 00007ffdc4904818 [ 330.892410] RBP: 0000000000000001 R08: 0000000000000000 R09: 000055d1170de7e4 [ 330.894785] R10: 00000000000002b8 R11: 0000000000000246 R12: 0000000000000007 [ 330.897148] R13: 000055d1170de0c0 R14: 0000000000000008 R15: 000055d1170de550 [ 330.901057] Allocated by task 1012: [ 330.902888] kasan_kmalloc+0xa0/0xd0 [ 330.904714] kmem_cache_alloc+0xc8/0x1d0 [ 330.906615] mempool_alloc+0x11e/0x380 [ 330.908496] cifs_small_buf_get+0x35/0x60 [cifs] [ 330.910510] smb2_plain_req_init+0x4a/0xd60 [cifs] [ 330.912551] send_set_info+0x198/0xc20 [cifs] [ 330.914535] SMB2_set_acl+0x76/0xa0 [cifs] [ 330.916465] set_smb2_acl+0x7ac/0xf30 [cifs] [ 330.918453] cifs_xattr_set+0x963/0xe40 [cifs] [ 330.920426] __vfs_setxattr+0x84/0xb0 [ 330.922284] __vfs_setxattr_noperm+0xe6/0x370 [ 330.924213] vfs_setxattr+0xc2/0xd0 [ 330.926008] setxattr+0x258/0x320 [ 330.927762] path_setxattr+0x15b/0x1b0 [ 330.929592] __x64_sys_setxattr+0xc0/0x160 [ 330.931459] do_syscall_64+0x14e/0x4b0 [ 330.933314] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 330.936843] Freed by task 0: [ 330.938588] (stack is not available) [ 330.941886] The buggy address belongs to the object at ffff88010d5e2800 which belongs to the cache cifs_small_rq of size 448 [ 330.946362] The buggy address is located 96 bytes inside of 448-byte region [ffff88010d5e2800, ffff88010d5e29c0) [ 330.950722] The buggy address belongs to the page: [ 330.952789] page:ffffea0004357880 count:1 mapcount:0 mapping:ffff880108fdca80 index:0x0 compound_mapcount: 0 [ 330.955665] flags: 0x17ffffc0008100(slab|head) [ 330.957760] raw: 0017ffffc0008100 dead000000000100 dead000000000200 ffff880108fdca80 [ 330.960356] raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 [ 330.963005] page dumped because: kasan: bad access detected [ 330.967039] Memory state around the buggy address: [ 330.969255] ffff88010d5e2880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 330.971833] ffff88010d5e2900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 330.974397] >ffff88010d5e2980: 00 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc [ 330.976956] ^ [ 330.979226] ffff88010d5e2a00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 330.981755] ffff88010d5e2a80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 330.984225] ================================================================== Fix this by allocating a regular CIFS buffer in smb2_plain_req_init() if the request command is SMB2_SET_INFO. Reported-by: Jianhong Yin Fixes: 366ed846df60 ("cifs: Use smb 2 - 3 and cifsacl mount options setacl function") CC: Stable Signed-off-by: Stefano Brivio Reviewed-and-tested-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 810b85787c91..dbfb83ae6137 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -340,7 +340,10 @@ smb2_plain_req_init(__le16 smb2_command, struct cifs_tcon *tcon, return rc; /* BB eventually switch this to SMB2 specific small buf size */ - *request_buf = cifs_small_buf_get(); + if (smb2_command == SMB2_SET_INFO) + *request_buf = cifs_buf_get(); + else + *request_buf = cifs_small_buf_get(); if (*request_buf == NULL) { /* BB should we add a retry in here if not a writepage? */ return -ENOMEM; @@ -3720,7 +3723,7 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov); - cifs_small_buf_release(req); + cifs_buf_release(req); rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base; if (rc != 0) { From 7ffbe65578b44fafdef577a360eb0583929f7c6e Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 5 Jul 2018 13:46:34 -0300 Subject: [PATCH 281/382] cifs: Fix infinite loop when using hard mount option For every request we send, whether it is SMB1 or SMB2+, we attempt to reconnect tcon (cifs_reconnect_tcon or smb2_reconnect) before carrying out the request. So, while server->tcpStatus != CifsNeedReconnect, we wait for the reconnection to succeed on wait_event_interruptible_timeout(). If it returns, that means that either the condition was evaluated to true, or timeout elapsed, or it was interrupted by a signal. Since we're not handling the case where the process woke up due to a received signal (-ERESTARTSYS), the next call to wait_event_interruptible_timeout() will _always_ fail and we end up looping forever inside either cifs_reconnect_tcon() or smb2_reconnect(). Here's an example of how to trigger that: $ mount.cifs //foo/share /mnt/test -o username=foo,password=foo,vers=1.0,hard (break connection to server before executing bellow cmd) $ stat -f /mnt/test & sleep 140 [1] 2511 $ ps -aux -q 2511 USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND root 2511 0.0 0.0 12892 1008 pts/0 S 12:24 0:00 stat -f /mnt/test $ kill -9 2511 (wait for a while; process is stuck in the kernel) $ ps -aux -q 2511 USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND root 2511 83.2 0.0 12892 1008 pts/0 R 12:24 30:01 stat -f /mnt/test By using 'hard' mount point means that cifs.ko will keep retrying indefinitely, however we must allow the process to be killed otherwise it would hang the system. Signed-off-by: Paulo Alcantara Cc: stable@vger.kernel.org Reviewed-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/cifssmb.c | 10 ++++++++-- fs/cifs/smb2pdu.c | 18 ++++++++++++------ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index d352da325de3..93408eab92e7 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -157,8 +157,14 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) * greater than cifs socket timeout which is 7 seconds */ while (server->tcpStatus == CifsNeedReconnect) { - wait_event_interruptible_timeout(server->response_q, - (server->tcpStatus != CifsNeedReconnect), 10 * HZ); + rc = wait_event_interruptible_timeout(server->response_q, + (server->tcpStatus != CifsNeedReconnect), + 10 * HZ); + if (rc < 0) { + cifs_dbg(FYI, "%s: aborting reconnect due to a received" + " signal by the process\n", __func__); + return -ERESTARTSYS; + } /* are we still trying to reconnect? */ if (server->tcpStatus != CifsNeedReconnect) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index dbfb83ae6137..e31375ecaa6f 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -155,7 +155,7 @@ smb2_hdr_assemble(struct smb2_sync_hdr *shdr, __le16 smb2_cmd, static int smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) { - int rc = 0; + int rc; struct nls_table *nls_codepage; struct cifs_ses *ses; struct TCP_Server_Info *server; @@ -166,10 +166,10 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) * for those three - in the calling routine. */ if (tcon == NULL) - return rc; + return 0; if (smb2_command == SMB2_TREE_CONNECT) - return rc; + return 0; if (tcon->tidStatus == CifsExiting) { /* @@ -212,8 +212,14 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) return -EAGAIN; } - wait_event_interruptible_timeout(server->response_q, - (server->tcpStatus != CifsNeedReconnect), 10 * HZ); + rc = wait_event_interruptible_timeout(server->response_q, + (server->tcpStatus != CifsNeedReconnect), + 10 * HZ); + if (rc < 0) { + cifs_dbg(FYI, "%s: aborting reconnect due to a received" + " signal by the process\n", __func__); + return -ERESTARTSYS; + } /* are we still trying to reconnect? */ if (server->tcpStatus != CifsNeedReconnect) @@ -231,7 +237,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) } if (!tcon->ses->need_reconnect && !tcon->need_reconnect) - return rc; + return 0; nls_codepage = load_nls_default(); From 729c0c9dd55204f0c9a823ac8a7bfa83d36c7e78 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 5 Jul 2018 15:10:02 +0200 Subject: [PATCH 282/382] cifs: Fix stack out-of-bounds in smb{2,3}_create_lease_buf() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit smb{2,3}_create_lease_buf() store a lease key in the lease context for later usage on a lease break. In most paths, the key is currently sourced from data that happens to be on the stack near local variables for oplock in SMB2_open() callers, e.g. from open_shroot(), whereas smb2_open_file() properly allocates space on its stack for it. The address of those local variables holding the oplock is then passed to create_lease_buf handlers via SMB2_open(), and 16 bytes near oplock are used. This causes a stack out-of-bounds access as reported by KASAN on SMB2.1 and SMB3 mounts (first out-of-bounds access is shown here): [ 111.528823] BUG: KASAN: stack-out-of-bounds in smb3_create_lease_buf+0x399/0x3b0 [cifs] [ 111.530815] Read of size 8 at addr ffff88010829f249 by task mount.cifs/985 [ 111.532838] CPU: 3 PID: 985 Comm: mount.cifs Not tainted 4.18.0-rc3+ #91 [ 111.534656] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 111.536838] Call Trace: [ 111.537528] dump_stack+0xc2/0x16b [ 111.540890] print_address_description+0x6a/0x270 [ 111.542185] kasan_report+0x258/0x380 [ 111.544701] smb3_create_lease_buf+0x399/0x3b0 [cifs] [ 111.546134] SMB2_open+0x1ef8/0x4b70 [cifs] [ 111.575883] open_shroot+0x339/0x550 [cifs] [ 111.591969] smb3_qfs_tcon+0x32c/0x1e60 [cifs] [ 111.617405] cifs_mount+0x4f3/0x2fc0 [cifs] [ 111.674332] cifs_smb3_do_mount+0x263/0xf10 [cifs] [ 111.677915] mount_fs+0x55/0x2b0 [ 111.679504] vfs_kern_mount.part.22+0xaa/0x430 [ 111.684511] do_mount+0xc40/0x2660 [ 111.698301] ksys_mount+0x80/0xd0 [ 111.701541] do_syscall_64+0x14e/0x4b0 [ 111.711807] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 111.713665] RIP: 0033:0x7f372385b5fa [ 111.715311] Code: 48 8b 0d 99 78 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 66 78 2c 00 f7 d8 64 89 01 48 [ 111.720330] RSP: 002b:00007ffff27049d8 EFLAGS: 00000206 ORIG_RAX: 00000000000000a5 [ 111.722601] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f372385b5fa [ 111.724842] RDX: 000055c2ecdc73b2 RSI: 000055c2ecdc73f9 RDI: 00007ffff270580f [ 111.727083] RBP: 00007ffff2705804 R08: 000055c2ee976060 R09: 0000000000001000 [ 111.729319] R10: 0000000000000000 R11: 0000000000000206 R12: 00007f3723f4d000 [ 111.731615] R13: 000055c2ee976060 R14: 00007f3723f4f90f R15: 0000000000000000 [ 111.735448] The buggy address belongs to the page: [ 111.737420] page:ffffea000420a7c0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 [ 111.739890] flags: 0x17ffffc0000000() [ 111.741750] raw: 0017ffffc0000000 0000000000000000 dead000000000200 0000000000000000 [ 111.744216] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 [ 111.746679] page dumped because: kasan: bad access detected [ 111.750482] Memory state around the buggy address: [ 111.752562] ffff88010829f100: 00 f2 f2 f2 f2 f2 f2 f2 00 00 00 00 00 00 00 00 [ 111.754991] ffff88010829f180: 00 00 f2 f2 00 00 00 00 00 00 00 00 00 00 00 00 [ 111.757401] >ffff88010829f200: 00 00 00 00 00 f1 f1 f1 f1 01 f2 f2 f2 f2 f2 f2 [ 111.759801] ^ [ 111.762034] ffff88010829f280: f2 02 f2 f2 f2 f2 f2 f2 f2 00 00 00 00 00 00 00 [ 111.764486] ffff88010829f300: f2 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 111.766913] ================================================================== Lease keys are however already generated and stored in fid data on open and create paths: pass them down to the lease context creation handlers and use them. Suggested-by: Aurélien Aptel Reviewed-by: Aurelien Aptel Fixes: b8c32dbb0deb ("CIFS: Request SMB2.1 leases") Signed-off-by: Stefano Brivio Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 2 +- fs/cifs/smb2file.c | 11 ++++------- fs/cifs/smb2ops.c | 9 +++------ fs/cifs/smb2pdu.c | 7 ++++--- fs/cifs/smb2pdu.h | 6 ++---- 5 files changed, 14 insertions(+), 21 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index a2962fd41c6f..c923c7854027 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -423,7 +423,7 @@ struct smb_version_operations { void (*set_oplock_level)(struct cifsInodeInfo *, __u32, unsigned int, bool *); /* create lease context buffer for CREATE request */ - char * (*create_lease_buf)(u8 *, u8); + char * (*create_lease_buf)(u8 *lease_key, u8 oplock); /* parse lease context buffer and return oplock/epoch info */ __u8 (*parse_lease_buf)(void *buf, unsigned int *epoch, char *lkey); ssize_t (*copychunk_range)(const unsigned int, diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 788412675723..4ed10dd086e6 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -41,7 +41,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, int rc; __le16 *smb2_path; struct smb2_file_all_info *smb2_data = NULL; - __u8 smb2_oplock[17]; + __u8 smb2_oplock; struct cifs_fid *fid = oparms->fid; struct network_resiliency_req nr_ioctl_req; @@ -59,12 +59,9 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, } oparms->desired_access |= FILE_READ_ATTRIBUTES; - *smb2_oplock = SMB2_OPLOCK_LEVEL_BATCH; + smb2_oplock = SMB2_OPLOCK_LEVEL_BATCH; - if (oparms->tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) - memcpy(smb2_oplock + 1, fid->lease_key, SMB2_LEASE_KEY_SIZE); - - rc = SMB2_open(xid, oparms, smb2_path, smb2_oplock, smb2_data, NULL, + rc = SMB2_open(xid, oparms, smb2_path, &smb2_oplock, smb2_data, NULL, NULL); if (rc) goto out; @@ -101,7 +98,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, move_smb2_info_to_cifs(buf, smb2_data); } - *oplock = *smb2_oplock; + *oplock = smb2_oplock; out: kfree(smb2_data); kfree(smb2_path); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 1d5985bd760b..ea92a38b2f08 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2222,8 +2222,7 @@ smb2_create_lease_buf(u8 *lease_key, u8 oplock) if (!buf) return NULL; - buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key)); - buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8))); + memcpy(&buf->lcontext.LeaseKey, lease_key, SMB2_LEASE_KEY_SIZE); buf->lcontext.LeaseState = map_oplock_to_lease(oplock); buf->ccontext.DataOffset = cpu_to_le16(offsetof @@ -2249,8 +2248,7 @@ smb3_create_lease_buf(u8 *lease_key, u8 oplock) if (!buf) return NULL; - buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key)); - buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8))); + memcpy(&buf->lcontext.LeaseKey, lease_key, SMB2_LEASE_KEY_SIZE); buf->lcontext.LeaseState = map_oplock_to_lease(oplock); buf->ccontext.DataOffset = cpu_to_le16(offsetof @@ -2287,8 +2285,7 @@ smb3_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key) if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS) return SMB2_OPLOCK_LEVEL_NOCHANGE; if (lease_key) - memcpy(lease_key, &lc->lcontext.LeaseKeyLow, - SMB2_LEASE_KEY_SIZE); + memcpy(lease_key, &lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); return le32_to_cpu(lc->lcontext.LeaseState); } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index e31375ecaa6f..3c92678cb45b 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1716,12 +1716,12 @@ parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp, static int add_lease_context(struct TCP_Server_Info *server, struct kvec *iov, - unsigned int *num_iovec, __u8 *oplock) + unsigned int *num_iovec, u8 *lease_key, __u8 *oplock) { struct smb2_create_req *req = iov[0].iov_base; unsigned int num = *num_iovec; - iov[num].iov_base = server->ops->create_lease_buf(oplock+1, *oplock); + iov[num].iov_base = server->ops->create_lease_buf(lease_key, *oplock); if (iov[num].iov_base == NULL) return -ENOMEM; iov[num].iov_len = server->vals->create_lease_size; @@ -2181,7 +2181,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, *oplock == SMB2_OPLOCK_LEVEL_NONE) req->RequestedOplockLevel = *oplock; else { - rc = add_lease_context(server, iov, &n_iov, oplock); + rc = add_lease_context(server, iov, &n_iov, + oparms->fid->lease_key, oplock); if (rc) { cifs_small_buf_release(req); kfree(copy_path); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 824dddeee3f2..a671adcc44a6 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -678,16 +678,14 @@ struct create_context { #define SMB2_LEASE_KEY_SIZE 16 struct lease_context { - __le64 LeaseKeyLow; - __le64 LeaseKeyHigh; + u8 LeaseKey[SMB2_LEASE_KEY_SIZE]; __le32 LeaseState; __le32 LeaseFlags; __le64 LeaseDuration; } __packed; struct lease_context_v2 { - __le64 LeaseKeyLow; - __le64 LeaseKeyHigh; + u8 LeaseKey[SMB2_LEASE_KEY_SIZE]; __le32 LeaseState; __le32 LeaseFlags; __le64 LeaseDuration; From 7ec916f82c48dcfc115eee2e3e0e6d400e310fc5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 5 Jul 2018 13:29:55 -0600 Subject: [PATCH 283/382] Revert "iommu/intel-iommu: Enable CONFIG_DMA_DIRECT_OPS=y and clean up intel_{alloc,free}_coherent()" This commit may cause a less than required dma mask to be used for some allocations, which apparently leads to module load failures for iwlwifi sometimes. This reverts commit d657c5c73ca987214a6f9436e435b34fc60f332a. Signed-off-by: Christoph Hellwig Reported-by: Fabio Coatti Tested-by: Fabio Coatti --- drivers/iommu/Kconfig | 1 - drivers/iommu/intel-iommu.c | 62 +++++++++++++++++++++++++++---------- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index e055d228bfb9..689ffe538370 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -142,7 +142,6 @@ config DMAR_TABLE config INTEL_IOMMU bool "Support for Intel IOMMU using DMA Remapping Devices" depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC) - select DMA_DIRECT_OPS select IOMMU_API select IOMMU_IOVA select NEED_DMA_MAP_STATE diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 14e4b3722428..b344a883f116 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -3713,30 +3712,61 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) { - void *vaddr; + struct page *page = NULL; + int order; - vaddr = dma_direct_alloc(dev, size, dma_handle, flags, attrs); - if (iommu_no_mapping(dev) || !vaddr) - return vaddr; + size = PAGE_ALIGN(size); + order = get_order(size); - *dma_handle = __intel_map_single(dev, virt_to_phys(vaddr), - PAGE_ALIGN(size), DMA_BIDIRECTIONAL, - dev->coherent_dma_mask); - if (!*dma_handle) - goto out_free_pages; - return vaddr; + if (!iommu_no_mapping(dev)) + flags &= ~(GFP_DMA | GFP_DMA32); + else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) { + if (dev->coherent_dma_mask < DMA_BIT_MASK(32)) + flags |= GFP_DMA; + else + flags |= GFP_DMA32; + } + + if (gfpflags_allow_blocking(flags)) { + unsigned int count = size >> PAGE_SHIFT; + + page = dma_alloc_from_contiguous(dev, count, order, flags); + if (page && iommu_no_mapping(dev) && + page_to_phys(page) + size > dev->coherent_dma_mask) { + dma_release_from_contiguous(dev, page, count); + page = NULL; + } + } + + if (!page) + page = alloc_pages(flags, order); + if (!page) + return NULL; + memset(page_address(page), 0, size); + + *dma_handle = __intel_map_single(dev, page_to_phys(page), size, + DMA_BIDIRECTIONAL, + dev->coherent_dma_mask); + if (*dma_handle) + return page_address(page); + if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) + __free_pages(page, order); -out_free_pages: - dma_direct_free(dev, size, vaddr, *dma_handle, attrs); return NULL; } static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { - if (!iommu_no_mapping(dev)) - intel_unmap(dev, dma_handle, PAGE_ALIGN(size)); - dma_direct_free(dev, size, vaddr, dma_handle, attrs); + int order; + struct page *page = virt_to_page(vaddr); + + size = PAGE_ALIGN(size); + order = get_order(size); + + intel_unmap(dev, dma_handle, size); + if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) + __free_pages(page, order); } static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist, From 0fa3ecd87848c9c93c2c828ef4c3a8ca36ce46c7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 3 Jul 2018 17:10:19 -0700 Subject: [PATCH 284/382] Fix up non-directory creation in SGID directories sgid directories have special semantics, making newly created files in the directory belong to the group of the directory, and newly created subdirectories will also become sgid. This is historically used for group-shared directories. But group directories writable by non-group members should not imply that such non-group members can magically join the group, so make sure to clear the sgid bit on non-directories for non-members (but remember that sgid without group execute means "mandatory locking", just to confuse things even more). Reported-by: Jann Horn Cc: Andy Lutomirski Cc: Al Viro Signed-off-by: Linus Torvalds --- fs/inode.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/inode.c b/fs/inode.c index 2c300e981796..8c86c809ca17 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1999,8 +1999,14 @@ void inode_init_owner(struct inode *inode, const struct inode *dir, inode->i_uid = current_fsuid(); if (dir && dir->i_mode & S_ISGID) { inode->i_gid = dir->i_gid; + + /* Directories are special, and always inherit S_ISGID */ if (S_ISDIR(mode)) mode |= S_ISGID; + else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) && + !in_group_p(inode->i_gid) && + !capable_wrt_inode_uidgid(dir, CAP_FSETID)) + mode &= ~S_ISGID; } else inode->i_gid = current_fsgid(); inode->i_mode = mode; From 523402fa9101090c91d2033b7ebdfdcf65880488 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 5 Jul 2018 14:37:52 -0700 Subject: [PATCH 285/382] MIPS: Fix ioremap() RAM check We currently attempt to check whether a physical address range provided to __ioremap() may be in use by the page allocator by examining the value of PageReserved for each page in the region - lowmem pages not marked reserved are presumed to be in use by the page allocator, and requests to ioremap them fail. The way we check this has been broken since commit 92923ca3aace ("mm: meminit: only set page reserved in the memblock region"), because memblock will typically not have any knowledge of non-RAM pages and therefore those pages will not have the PageReserved flag set. Thus when we attempt to ioremap a region outside of RAM we incorrectly fail believing that the region is RAM that may be in use. In most cases ioremap() on MIPS will take a fast-path to use the unmapped kseg1 or xkphys virtual address spaces and never hit this path, so the only way to hit it is for a MIPS32 system to attempt to ioremap() an address range in lowmem with flags other than _CACHE_UNCACHED. Perhaps the most straightforward way to do this is using ioremap_uncached_accelerated(), which is how the problem was discovered. Fix this by making use of walk_system_ram_range() to test the address range provided to __ioremap() against only RAM pages, rather than all lowmem pages. This means that if we have a lowmem I/O region, which is very common for MIPS systems, we're free to ioremap() address ranges within it. A nice bonus is that the test is no longer limited to lowmem. The approach here matches the way x86 performed the same test after commit c81c8a1eeede ("x86, ioremap: Speed up check for RAM pages") until x86 moved towards a slightly more complicated check using walk_mem_res() for unrelated reasons with commit 0e4c12b45aa8 ("x86/mm, resource: Use PAGE_KERNEL protection for ioremap of memory pages"). Signed-off-by: Paul Burton Reported-by: Serge Semin Tested-by: Serge Semin Fixes: 92923ca3aace ("mm: meminit: only set page reserved in the memblock region") Cc: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org # v4.2+ Patchwork: https://patchwork.linux-mips.org/patch/19786/ --- arch/mips/mm/ioremap.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c index 1986e09fb457..1601d90b087b 100644 --- a/arch/mips/mm/ioremap.c +++ b/arch/mips/mm/ioremap.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -98,6 +99,20 @@ static int remap_area_pages(unsigned long address, phys_addr_t phys_addr, return error; } +static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, + void *arg) +{ + unsigned long i; + + for (i = 0; i < nr_pages; i++) { + if (pfn_valid(start_pfn + i) && + !PageReserved(pfn_to_page(start_pfn + i))) + return 1; + } + + return 0; +} + /* * Generic mapping function (not visible outside): */ @@ -116,8 +131,8 @@ static int remap_area_pages(unsigned long address, phys_addr_t phys_addr, void __iomem * __ioremap(phys_addr_t phys_addr, phys_addr_t size, unsigned long flags) { + unsigned long offset, pfn, last_pfn; struct vm_struct * area; - unsigned long offset; phys_addr_t last_addr; void * addr; @@ -137,18 +152,16 @@ void __iomem * __ioremap(phys_addr_t phys_addr, phys_addr_t size, unsigned long return (void __iomem *) CKSEG1ADDR(phys_addr); /* - * Don't allow anybody to remap normal RAM that we're using.. + * Don't allow anybody to remap RAM that may be allocated by the page + * allocator, since that could lead to races & data clobbering. */ - if (phys_addr < virt_to_phys(high_memory)) { - char *t_addr, *t_end; - struct page *page; - - t_addr = __va(phys_addr); - t_end = t_addr + (size - 1); - - for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++) - if(!PageReserved(page)) - return NULL; + pfn = PFN_DOWN(phys_addr); + last_pfn = PFN_DOWN(last_addr); + if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL, + __ioremap_check_ram) == 1) { + WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n", + &phys_addr, &last_addr); + return NULL; } /* From 33cc2c9667561b224215e6dfb5bf98e8fa17914e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 5 Jul 2018 14:58:49 -0700 Subject: [PATCH 286/382] acpi, nfit: Fix scrub idle detection The notification of scrub completion happens within the scrub workqueue. That can clearly race someone running scrub_show() and work_busy() before the workqueue has a chance to flush the recently completed work. Add a flag to reliably indicate the idle vs busy state. Without this change applications using poll(2) to wait for scrub-completion may falsely wakeup and read ARS as being busy even though the thread is going idle and then hang indefinitely. Fixes: bc6ba8085842 ("nfit, address-range-scrub: rework and simplify ARS...") Cc: Reported-by: Vishal Verma Tested-by: Vishal Verma Reported-by: Lukasz Dorau Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 44 ++++++++++++++++++++++++++++++---------- drivers/acpi/nfit/nfit.h | 1 + 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 471402cee1f1..b8040fed2a69 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1275,7 +1275,7 @@ static ssize_t scrub_show(struct device *dev, mutex_lock(&acpi_desc->init_mutex); rc = sprintf(buf, "%d%s", acpi_desc->scrub_count, - work_busy(&acpi_desc->dwork.work) + acpi_desc->scrub_busy && !acpi_desc->cancel ? "+\n" : "\n"); mutex_unlock(&acpi_desc->init_mutex); } @@ -2941,6 +2941,32 @@ static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc, return 0; } +static void __sched_ars(struct acpi_nfit_desc *acpi_desc, unsigned int tmo) +{ + lockdep_assert_held(&acpi_desc->init_mutex); + + acpi_desc->scrub_busy = 1; + /* note this should only be set from within the workqueue */ + if (tmo) + acpi_desc->scrub_tmo = tmo; + queue_delayed_work(nfit_wq, &acpi_desc->dwork, tmo * HZ); +} + +static void sched_ars(struct acpi_nfit_desc *acpi_desc) +{ + __sched_ars(acpi_desc, 0); +} + +static void notify_ars_done(struct acpi_nfit_desc *acpi_desc) +{ + lockdep_assert_held(&acpi_desc->init_mutex); + + acpi_desc->scrub_busy = 0; + acpi_desc->scrub_count++; + if (acpi_desc->scrub_count_state) + sysfs_notify_dirent(acpi_desc->scrub_count_state); +} + static void acpi_nfit_scrub(struct work_struct *work) { struct acpi_nfit_desc *acpi_desc; @@ -2951,14 +2977,10 @@ static void acpi_nfit_scrub(struct work_struct *work) mutex_lock(&acpi_desc->init_mutex); query_rc = acpi_nfit_query_poison(acpi_desc); tmo = __acpi_nfit_scrub(acpi_desc, query_rc); - if (tmo) { - queue_delayed_work(nfit_wq, &acpi_desc->dwork, tmo * HZ); - acpi_desc->scrub_tmo = tmo; - } else { - acpi_desc->scrub_count++; - if (acpi_desc->scrub_count_state) - sysfs_notify_dirent(acpi_desc->scrub_count_state); - } + if (tmo) + __sched_ars(acpi_desc, tmo); + else + notify_ars_done(acpi_desc); memset(acpi_desc->ars_status, 0, acpi_desc->max_ars); mutex_unlock(&acpi_desc->init_mutex); } @@ -3039,7 +3061,7 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) break; } - queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0); + sched_ars(acpi_desc); return 0; } @@ -3241,7 +3263,7 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags) } } if (scheduled) { - queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0); + sched_ars(acpi_desc); dev_dbg(dev, "ars_scan triggered\n"); } mutex_unlock(&acpi_desc->init_mutex); diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 7d15856a739f..a97ff42fe311 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -203,6 +203,7 @@ struct acpi_nfit_desc { unsigned int max_ars; unsigned int scrub_count; unsigned int scrub_mode; + unsigned int scrub_busy:1; unsigned int cancel:1; unsigned long dimm_cmd_force_en; unsigned long bus_cmd_force_en; From 01b3cdfca263a17554f7b249d20a247b2a751521 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 4 Jul 2018 17:02:16 +0200 Subject: [PATCH 287/382] USB: serial: keyspan_pda: fix modem-status error handling Fix broken modem-status error handling which could lead to bits of slab data leaking to user space. Fixes: 3b36a8fd6777 ("usb: fix uninitialized variable warning in keyspan_pda") Cc: stable # 2.6.27 Signed-off-by: Johan Hovold --- drivers/usb/serial/keyspan_pda.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c index 5169624d8b11..38d43c4b7ce5 100644 --- a/drivers/usb/serial/keyspan_pda.c +++ b/drivers/usb/serial/keyspan_pda.c @@ -369,8 +369,10 @@ static int keyspan_pda_get_modem_info(struct usb_serial *serial, 3, /* get pins */ USB_TYPE_VENDOR|USB_RECIP_INTERFACE|USB_DIR_IN, 0, 0, data, 1, 2000); - if (rc >= 0) + if (rc == 1) *value = *data; + else if (rc >= 0) + rc = -EIO; kfree(data); return rc; From 794744abfffef8b1f3c0c8a4896177d6d13d653d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 4 Jul 2018 17:02:17 +0200 Subject: [PATCH 288/382] USB: serial: mos7840: fix status-register error handling Add missing transfer-length sanity check to the status-register completion handler to avoid leaking bits of uninitialised slab data to user space. Fixes: 3f5429746d91 ("USB: Moschip 7840 USB-Serial Driver") Cc: stable # 2.6.19 Signed-off-by: Johan Hovold --- drivers/usb/serial/mos7840.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index fdceb46d9fc6..b580b4c7fa48 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -468,6 +468,9 @@ static void mos7840_control_callback(struct urb *urb) } dev_dbg(dev, "%s urb buffer size is %d\n", __func__, urb->actual_length); + if (urb->actual_length < 1) + goto out; + dev_dbg(dev, "%s mos7840_port->MsrLsr is %d port %d\n", __func__, mos7840_port->MsrLsr, mos7840_port->port_num); data = urb->transfer_buffer; From 1268ed0c474a5c8f165ef386f3310521b5e00e27 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Tue, 3 Jul 2018 16:01:55 -0700 Subject: [PATCH 289/382] x86/hyper-v: Fix the circular dependency in IPI enlightenment The IPI hypercalls depend on being able to map the Linux notion of CPU ID to the hypervisor's notion of the CPU ID. The array hv_vp_index[] provides this mapping. Code for populating this array depends on the IPI functionality. Break this circular dependency. [ tglx: Use a proper define instead of '-1' with a u32 variable as pointed out by Vitaly ] Fixes: 68bb7bfb7985 ("X86/Hyper-V: Enable IPI enlightenments") Signed-off-by: K. Y. Srinivasan Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Cc: gregkh@linuxfoundation.org Cc: devel@linuxdriverproject.org Cc: olaf@aepfle.de Cc: apw@canonical.com Cc: jasowang@redhat.com Cc: hpa@zytor.com Cc: sthemmin@microsoft.com Cc: Michael.H.Kelley@microsoft.com Cc: vkuznets@redhat.com Link: https://lkml.kernel.org/r/20180703230155.15160-1-kys@linuxonhyperv.com --- arch/x86/hyperv/hv_apic.c | 5 +++++ arch/x86/hyperv/hv_init.c | 5 ++++- arch/x86/include/asm/mshyperv.h | 5 ++++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index f68855499391..402338365651 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -114,6 +114,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K; nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask); } + if (nr_bank < 0) + goto ipi_mask_ex_done; if (!nr_bank) ipi_arg->vp_set.format = HV_GENERIC_SET_ALL; @@ -158,6 +160,9 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector) for_each_cpu(cur_cpu, mask) { vcpu = hv_cpu_number_to_vp_number(cur_cpu); + if (vcpu == VP_INVAL) + goto ipi_mask_done; + /* * This particular version of the IPI hypercall can * only target upto 64 CPUs. diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 4c431e1c1eff..1ff420217298 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -265,7 +265,7 @@ void __init hyperv_init(void) { u64 guest_id, required_msrs; union hv_x64_msr_hypercall_contents hypercall_msr; - int cpuhp; + int cpuhp, i; if (x86_hyper_type != X86_HYPER_MS_HYPERV) return; @@ -293,6 +293,9 @@ void __init hyperv_init(void) if (!hv_vp_index) return; + for (i = 0; i < num_possible_cpus(); i++) + hv_vp_index[i] = VP_INVAL; + hv_vp_assist_page = kcalloc(num_possible_cpus(), sizeof(*hv_vp_assist_page), GFP_KERNEL); if (!hv_vp_assist_page) { diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 3cd14311edfa..5a7375ed5f7c 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -9,6 +9,8 @@ #include #include +#define VP_INVAL U32_MAX + struct ms_hyperv_info { u32 features; u32 misc_features; @@ -20,7 +22,6 @@ struct ms_hyperv_info { extern struct ms_hyperv_info ms_hyperv; - /* * Generate the guest ID. */ @@ -281,6 +282,8 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset, */ for_each_cpu(cpu, cpus) { vcpu = hv_cpu_number_to_vp_number(cpu); + if (vcpu == VP_INVAL) + return -1; vcpu_bank = vcpu / 64; vcpu_offset = vcpu % 64; __set_bit(vcpu_offset, (unsigned long *) From c6b17f1020d956f4113d478cae6171b9093817ba Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 6 Jul 2018 15:14:11 +0800 Subject: [PATCH 290/382] ALSA: hda/realtek - two more lenovo models need fixup of MIC_LOCATION We have two new lenovo desktop models which need to apply the fixup of ALC294_FIXUP_LENOVO_MIC_LOCATION, and they have the same pin cfg as the machine with subsystem id:0x17aa3136, now use the pincfg table to apply the fixup for them. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5ad6c7e5f92e..7496be4491b1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6612,7 +6612,6 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x312a, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x312f, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), - SND_PCI_QUIRK(0x17aa, 0x3136, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), @@ -6796,6 +6795,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x1a, 0x02a11040}, {0x1b, 0x01014020}, {0x21, 0x0221101f}), + SND_HDA_PIN_QUIRK(0x10ec0235, 0x17aa, "Lenovo", ALC294_FIXUP_LENOVO_MIC_LOCATION, + {0x14, 0x90170110}, + {0x19, 0x02a11020}, + {0x1a, 0x02a11030}, + {0x21, 0x0221101f}), SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60140}, {0x14, 0x90170110}, From 09b1565324cba029dd0079b179e329813a1520c6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 4 Jul 2018 12:57:54 -0700 Subject: [PATCH 291/382] kbuild: update ARCH alias info for sh In Kbuild documentation, add alias for 64-bit sh ARCH ("sh64") to the list of ARCH aliases. Signed-off-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- Documentation/kbuild/kbuild.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt index 6c9c69ec3986..ec5890b51bd6 100644 --- a/Documentation/kbuild/kbuild.txt +++ b/Documentation/kbuild/kbuild.txt @@ -88,6 +88,7 @@ In most cases the name of the architecture is the same as the directory name found in the arch/ directory. But some architectures such as x86 and sparc have aliases. x86: i386 for 32 bit, x86_64 for 64 bit +sh: sh for 32 bit, sh64 for 64 bit sparc: sparc for 32 bit, sparc64 for 64 bit CROSS_COMPILE From 5ba800962a80d4158b73fb91a7779df7b770c750 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 5 Jul 2018 12:12:03 -0700 Subject: [PATCH 292/382] kbuild: update ARCH alias info for sparc The supported alias for building sparc 32-bit is "sparc32", not "sparc", so update the alias documentation for that. Just using "sparc" produces a 64-bit config file. Signed-off-by: Randy Dunlap Acked-by: David S. Miller Signed-off-by: Masahiro Yamada --- Documentation/kbuild/kbuild.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt index ec5890b51bd6..d605d506b334 100644 --- a/Documentation/kbuild/kbuild.txt +++ b/Documentation/kbuild/kbuild.txt @@ -89,7 +89,7 @@ directory name found in the arch/ directory. But some architectures such as x86 and sparc have aliases. x86: i386 for 32 bit, x86_64 for 64 bit sh: sh for 32 bit, sh64 for 64 bit -sparc: sparc for 32 bit, sparc64 for 64 bit +sparc: sparc32 for 32 bit, sparc64 for 64 bit CROSS_COMPILE -------------------------------------------------- From 3f9cdee5929b7d035e86302dcf08fbf3e80b0739 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 4 Jul 2018 12:59:16 -0700 Subject: [PATCH 293/382] kbuild: delete INSTALL_FW_PATH from kbuild documentation Removed Kbuild documentation for INSTALL_FW_PATH. The kbuild symbol INSTALL_FW_PATH was removed from Kbuild tools in September 2017 (for 4.14) but the symbol was not deleted from the kbuild documentation, so do that now. Fixes: 5620a0d1aacd ("firmware: delete in-kernel firmware") Signed-off-by: Randy Dunlap Cc: stable@vger.kernel.org # 4.14+ Cc: Greg Kroah-Hartman Signed-off-by: Masahiro Yamada --- Documentation/kbuild/kbuild.txt | 9 --------- 1 file changed, 9 deletions(-) diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt index d605d506b334..1eb59cba242c 100644 --- a/Documentation/kbuild/kbuild.txt +++ b/Documentation/kbuild/kbuild.txt @@ -149,15 +149,6 @@ stripped after they are installed. If INSTALL_MOD_STRIP is '1', then the default option --strip-debug will be used. Otherwise, INSTALL_MOD_STRIP value will be used as the options to the strip command. -INSTALL_FW_PATH --------------------------------------------------- -INSTALL_FW_PATH specifies where to install the firmware blobs. -The default value is: - - $(INSTALL_MOD_PATH)/lib/firmware - -The value can be overridden in which case the default value is ignored. - INSTALL_HDR_PATH -------------------------------------------------- INSTALL_HDR_PATH specifies where to install user space headers when From 452d4c8673112ee72de6042d5d95c761acc2d35a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 4 Jul 2018 15:49:56 -0700 Subject: [PATCH 294/382] kconfig: update user kconfig tools doc. Update Documentation/kbuild/kconfig.txt, which mostly contains user help for using the kernel config tools. - Add mention of 'nconfig' embedded help text. - Make the section on new config symbols readable. - Correct how to find menuconfig search help. - Add section on 'nconfig' usage. - Mention that gconfig has multiple viewing modes/options. Signed-off-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- Documentation/kbuild/kconfig.txt | 51 +++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/Documentation/kbuild/kconfig.txt b/Documentation/kbuild/kconfig.txt index 7233118f3a05..68c82914c0f3 100644 --- a/Documentation/kbuild/kconfig.txt +++ b/Documentation/kbuild/kconfig.txt @@ -2,9 +2,9 @@ This file contains some assistance for using "make *config". Use "make help" to list all of the possible configuration targets. -The xconfig ('qconf') and menuconfig ('mconf') programs also -have embedded help text. Be sure to check it for navigation, -search, and other general help text. +The xconfig ('qconf'), menuconfig ('mconf'), and nconfig ('nconf') +programs also have embedded help text. Be sure to check that for +navigation, search, and other general help text. ====================================================================== General @@ -17,13 +17,16 @@ this happens, using a previously working .config file and running for you, so you may find that you need to see what NEW kernel symbols have been introduced. -To see a list of new config symbols when using "make oldconfig", use +To see a list of new config symbols, use cp user/some/old.config .config make listnewconfig and the config program will list any new symbols, one per line. +Alternatively, you can use the brute force method: + + make oldconfig scripts/diffconfig .config.old .config | less ______________________________________________________________________ @@ -160,7 +163,7 @@ Searching in menuconfig: This lists all config symbols that contain "hotplug", e.g., HOTPLUG_CPU, MEMORY_HOTPLUG. - For search help, enter / followed TAB-TAB-TAB (to highlight + For search help, enter / followed by TAB-TAB (to highlight ) and Enter. This will tell you that you can also use regular expressions (regexes) in the search string, so if you are not interested in MEMORY_HOTPLUG, you could try @@ -202,6 +205,39 @@ Example: make MENUCONFIG_MODE=single_menu menuconfig +====================================================================== +nconfig +-------------------------------------------------- + +nconfig is an alternate text-based configurator. It lists function +keys across the bottom of the terminal (window) that execute commands. +You can also just use the corresponding numeric key to execute the +commands unless you are in a data entry window. E.g., instead of F6 +for Save, you can just press 6. + +Use F1 for Global help or F3 for the Short help menu. + +Searching in nconfig: + + You can search either in the menu entry "prompt" strings + or in the configuration symbols. + + Use / to begin a search through the menu entries. This does + not support regular expressions. Use or for + Next hit and Previous hit, respectively. Use to + terminate the search mode. + + F8 (SymSearch) searches the configuration symbols for the + given string or regular expression (regex). + +NCONFIG_MODE +-------------------------------------------------- +This mode shows all sub-menus in one large tree. + +Example: + make NCONFIG_MODE=single_menu nconfig + + ====================================================================== xconfig -------------------------------------------------- @@ -230,8 +266,7 @@ gconfig Searching in gconfig: - None (gconfig isn't maintained as well as xconfig or menuconfig); - however, gconfig does have a few more viewing choices than - xconfig does. + There is no search command in gconfig. However, gconfig does + have several different viewing choices, modes, and options. ### From 00e0793f834cd233240b19532581e3205caaccd9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 4 Jul 2018 19:47:39 -0700 Subject: [PATCH 295/382] kbuild: document the KBUILD_KCONFIG env. variable Add usage info for the Kbuild environment variable KBUILD_KCONFIG. Signed-off-by: Randy Dunlap Reviewed-by: Cao jin Signed-off-by: Masahiro Yamada --- Documentation/kbuild/kbuild.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt index 1eb59cba242c..114c7ce7b58d 100644 --- a/Documentation/kbuild/kbuild.txt +++ b/Documentation/kbuild/kbuild.txt @@ -50,6 +50,11 @@ LDFLAGS_MODULE -------------------------------------------------- Additional options used for $(LD) when linking modules. +KBUILD_KCONFIG +-------------------------------------------------- +Set the top-level Kconfig file to the value of this environment +variable. The default name is "Kconfig". + KBUILD_VERBOSE -------------------------------------------------- Set the kbuild verbosity. Can be assigned same values as "V=...". From 48f6e3cf5bc6dd0ee00405342ff310c3b1fedb35 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 5 Jul 2018 11:48:21 +0900 Subject: [PATCH 296/382] kbuild: do not drop -I without parameter The comment line for addtree says "skip if -I has no parameter". What it actually does is "drop if -I has no parameter". For example, if you have the compiler flag '-I foo' (a space between), it will be converted to 'foo'. This completely changes the meaning. What we want is, "do nothing" for -I without parameter so that '-I foo' is kept as-is. Signed-off-by: Masahiro Yamada --- scripts/Kbuild.include | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index c8156d61678c..86321f06461e 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -214,7 +214,7 @@ hdr-inst := -f $(srctree)/scripts/Makefile.headersinst obj # Prefix -I with $(srctree) if it is not an absolute path. # skip if -I has no parameter addtree = $(if $(patsubst -I%,%,$(1)), \ -$(if $(filter-out -I/% -I./% -I../%,$(1)),$(patsubst -I%,-I$(srctree)/%,$(1)),$(1))) +$(if $(filter-out -I/% -I./% -I../%,$(1)),$(patsubst -I%,-I$(srctree)/%,$(1)),$(1)),$(1)) # Find all -I options and call addtree flags = $(foreach o,$($(1)),$(if $(filter -I%,$(o)),$(call addtree,$(o)),$(o))) From bd412d81b7ea4cfa265e3d2309a166181ec7bdab Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Thu, 5 Jul 2018 12:33:07 +0900 Subject: [PATCH 297/382] kbuild: .PHONY is not a variable, but PHONY is .PHONY is a target, not a variable. Signed-off-by: Ulf Magnusson Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c5ce55cbc543..5b26847909ec 100644 --- a/Makefile +++ b/Makefile @@ -1717,6 +1717,6 @@ endif # skip-makefile PHONY += FORCE FORCE: -# Declare the contents of the .PHONY variable as phony. We keep that +# Declare the contents of the PHONY variable as phony. We keep that # information in a variable so we can use it in if_changed and friends. .PHONY: $(PHONY) From 6916162c7308332d5a029521821f925466de311d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 5 Jul 2018 12:33:08 +0900 Subject: [PATCH 298/382] kbuild: remove duplicated comments about PHONY The comment is the same as in the top-level Makefile. Also, the comments contain typos: - the .PHONY variable -> the PHONY variable - se we can ... -> so we can ... Instead of fixing the typos, just remove the duplicated comments. Signed-off-by: Masahiro Yamada --- scripts/Makefile.build | 3 --- scripts/Makefile.clean | 3 --- scripts/Makefile.modbuiltin | 4 ---- scripts/Makefile.modinst | 4 ---- scripts/Makefile.modpost | 4 ---- scripts/Makefile.modsign | 3 --- 6 files changed, 21 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index e7889f486ca1..514ed63ff571 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -590,7 +590,4 @@ endif # We never want them to be removed automatically. .SECONDARY: $(targets) -# Declare the contents of the .PHONY variable as phony. We keep that -# information in a variable se we can use it in if_changed and friends. - .PHONY: $(PHONY) diff --git a/scripts/Makefile.clean b/scripts/Makefile.clean index 808d09f27ad4..17ef94c635cd 100644 --- a/scripts/Makefile.clean +++ b/scripts/Makefile.clean @@ -88,7 +88,4 @@ PHONY += $(subdir-ymn) $(subdir-ymn): $(Q)$(MAKE) $(clean)=$@ -# Declare the contents of the .PHONY variable as phony. We keep that -# information in a variable se we can use it in if_changed and friends. - .PHONY: $(PHONY) diff --git a/scripts/Makefile.modbuiltin b/scripts/Makefile.modbuiltin index a763b4775d06..40867a41615b 100644 --- a/scripts/Makefile.modbuiltin +++ b/scripts/Makefile.modbuiltin @@ -54,8 +54,4 @@ PHONY += $(subdir-ym) $(subdir-ym): $(Q)$(MAKE) $(modbuiltin)=$@ - -# Declare the contents of the .PHONY variable as phony. We keep that -# information in a variable se we can use it in if_changed and friends. - .PHONY: $(PHONY) diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index 51ca0244fc8a..ff5ca9817a85 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -35,8 +35,4 @@ modinst_dir = $(if $(KBUILD_EXTMOD),$(ext-mod-dir),kernel/$(@D)) $(modules): $(call cmd,modules_install,$(MODLIB)/$(modinst_dir)) - -# Declare the contents of the .PHONY variable as phony. We keep that -# information in a variable so we can use it in if_changed and friends. - .PHONY: $(PHONY) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index df4174405feb..dd92dbbbaa68 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -149,8 +149,4 @@ ifneq ($(cmd_files),) include $(cmd_files) endif - -# Declare the contents of the .PHONY variable as phony. We keep that -# information in a variable se we can use it in if_changed and friends. - .PHONY: $(PHONY) diff --git a/scripts/Makefile.modsign b/scripts/Makefile.modsign index 171483bc0538..da56aa78d245 100644 --- a/scripts/Makefile.modsign +++ b/scripts/Makefile.modsign @@ -27,7 +27,4 @@ modinst_dir = $(if $(KBUILD_EXTMOD),$(ext-mod-dir),kernel/$(@D)) $(modules): $(call cmd,sign_ko,$(MODLIB)/$(modinst_dir)) -# Declare the contents of the .PHONY variable as phony. We keep that -# information in a variable se we can use it in if_changed and friends. - .PHONY: $(PHONY) From c0addc9a5ba88350a3d9365a73290e34dc238b70 Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Thu, 5 Jul 2018 16:31:42 +0300 Subject: [PATCH 299/382] docs: kernel-parameters.txt: document xhci-hcd.quirks parameter This parameter introduced several years ago in the XHCI host controller driver was somehow left undocumented. Add a few lines in the kernel parameters text. Signed-off-by: Laurentiu Tudor Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index efc7aa7a0670..533ff5c68970 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4846,3 +4846,8 @@ xirc2ps_cs= [NET,PCMCIA] Format: ,,,,,[,[,[,]]] + + xhci-hcd.quirks [USB,KNL] + A hex value specifying bitmask with supplemental xhci + host controller quirks. Meaning of each bit can be + consulted in header drivers/usb/host/xhci.h. From aaa8fee7dfc974d7f057bc9f1d8e305a114469ad Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 6 Jul 2018 15:58:10 +0200 Subject: [PATCH 300/382] usb/gadget: aspeed-vhub: add USB_LIBCOMPOSITE dependency Without that option, we run into a link failure: drivers/usb/gadget/udc/aspeed-vhub/hub.o: In function `ast_vhub_std_hub_request': hub.c:(.text+0x5b0): undefined reference to `usb_gadget_get_string' Fixes: 7ecca2a4080c ("usb/gadget: Add driver for Aspeed SoC virtual hub") Acked-by: Felipe Balbi Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/aspeed-vhub/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/udc/aspeed-vhub/Kconfig b/drivers/usb/gadget/udc/aspeed-vhub/Kconfig index f0cdf89b8503..83ba8a2eb6af 100644 --- a/drivers/usb/gadget/udc/aspeed-vhub/Kconfig +++ b/drivers/usb/gadget/udc/aspeed-vhub/Kconfig @@ -2,6 +2,7 @@ config USB_ASPEED_VHUB tristate "Aspeed vHub UDC driver" depends on ARCH_ASPEED || COMPILE_TEST + depends on USB_LIBCOMPOSITE help USB peripheral controller for the Aspeed AST2500 family SoCs supporting the "vHub" functionality and USB2.0 From 313db3d6488bb03b61b99de9dbca061f1fd838e1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Jul 2018 12:48:53 +0300 Subject: [PATCH 301/382] xhci: xhci-mem: off by one in xhci_stream_id_to_ring() The > should be >= here so that we don't read one element beyond the end of the ep->stream_info->stream_rings[] array. Fixes: e9df17eb1408 ("USB: xhci: Correct assumptions about number of rings per endpoint.") Signed-off-by: Dan Carpenter Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 8a62eee9eee1..ef350c33dc4a 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -595,7 +595,7 @@ struct xhci_ring *xhci_stream_id_to_ring( if (!ep->stream_info) return NULL; - if (stream_id > ep->stream_info->num_streams) + if (stream_id >= ep->stream_info->num_streams) return NULL; return ep->stream_info->stream_rings[stream_id]; } From bba57eddadda936c94b5dccf73787cb9e159d0a5 Mon Sep 17 00:00:00 2001 From: Nico Sneck Date: Mon, 2 Jul 2018 19:26:07 +0300 Subject: [PATCH 302/382] usb: quirks: add delay quirks for Corsair Strafe Corsair Strafe appears to suffer from the same issues as the Corsair Strafe RGB. Apply the same quirks (control message delay and init delay) that the RGB version has to 1b1c:1b15. With these quirks in place the keyboard works correctly upon booting the system, and no longer requires reattaching the device. Signed-off-by: Nico Sneck Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index c55def2f1320..097057d2eacf 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -378,6 +378,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* Corsair K70 RGB */ { USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Corsair Strafe */ + { USB_DEVICE(0x1b1c, 0x1b15), .driver_info = USB_QUIRK_DELAY_INIT | + USB_QUIRK_DELAY_CTRL_MSG }, + /* Corsair Strafe RGB */ { USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT | USB_QUIRK_DELAY_CTRL_MSG }, From 568cc2f07c8ea5f71a0486464bd9703e4671045f Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Tue, 3 Jul 2018 06:24:20 +0300 Subject: [PATCH 303/382] ARM: dts: armada-38x: use the new thermal binding Commit 2f28e4c24b10e (thermal: armada: Clarify control registers accesses) introduced the new thermal binding. The new binding extends the second registers field size to 8. Switch to the new binding to fix thermal reading values. Without this change the fix for errata #132698 introduced in commit 8c0b888f661 (thermal: armada: Change sensors trim default value) has no effect. Cc: stable@vger.kernel.org # v4.16+ Reviewed-by: Miquel Raynal Signed-off-by: Baruch Siach Signed-off-by: Gregory CLEMENT --- arch/arm/boot/dts/armada-38x.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi index 18edc9bc7927..929459c42760 100644 --- a/arch/arm/boot/dts/armada-38x.dtsi +++ b/arch/arm/boot/dts/armada-38x.dtsi @@ -547,7 +547,7 @@ coredivclk: clock@e4250 { thermal: thermal@e8078 { compatible = "marvell,armada380-thermal"; - reg = <0xe4078 0x4>, <0xe4074 0x4>; + reg = <0xe4078 0x4>, <0xe4070 0x8>; status = "okay"; }; From f1e255d60ae66a9f672ff9a207ee6cd8e33d2679 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 6 Jul 2018 17:12:56 +0200 Subject: [PATCH 304/382] USB: yurex: fix out-of-bounds uaccess in read handler In general, accessing userspace memory beyond the length of the supplied buffer in VFS read/write handlers can lead to both kernel memory corruption (via kernel_read()/kernel_write(), which can e.g. be triggered via sys_splice()) and privilege escalation inside userspace. Fix it by using simple_read_from_buffer() instead of custom logic. Fixes: 6bc235a2e24a ("USB: add driver for Meywa-Denki & Kayac YUREX") Signed-off-by: Jann Horn Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/yurex.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c index 8abb6cbbd98a..3be40eaa1ac9 100644 --- a/drivers/usb/misc/yurex.c +++ b/drivers/usb/misc/yurex.c @@ -396,8 +396,7 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct usb_yurex *dev; - int retval = 0; - int bytes_read = 0; + int len = 0; char in_buffer[20]; unsigned long flags; @@ -405,26 +404,16 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count, mutex_lock(&dev->io_mutex); if (!dev->interface) { /* already disconnected */ - retval = -ENODEV; - goto exit; + mutex_unlock(&dev->io_mutex); + return -ENODEV; } spin_lock_irqsave(&dev->lock, flags); - bytes_read = snprintf(in_buffer, 20, "%lld\n", dev->bbu); + len = snprintf(in_buffer, 20, "%lld\n", dev->bbu); spin_unlock_irqrestore(&dev->lock, flags); - - if (*ppos < bytes_read) { - if (copy_to_user(buffer, in_buffer + *ppos, bytes_read - *ppos)) - retval = -EFAULT; - else { - retval = bytes_read - *ppos; - *ppos += bytes_read; - } - } - -exit: mutex_unlock(&dev->io_mutex); - return retval; + + return simple_read_from_buffer(buffer, count, ppos, in_buffer, len); } static ssize_t yurex_write(struct file *file, const char __user *user_buffer, From d59d2f9995d28974877750f429e821324bd603c7 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 6 Jul 2018 13:44:35 +0800 Subject: [PATCH 305/382] staging: r8822be: Fix RTL8822be can't find any wireless AP RTL8822be can't bring up properly on ASUS X530UN, and dmesg says: [ 8.591333] r8822be: module is from the staging directory, the quality is unknown, you have been warned. [ 8.593122] r8822be 0000:02:00.0: enabling device (0000 -> 0003) [ 8.669163] r8822be: Using firmware rtlwifi/rtl8822befw.bin [ 9.289939] r8822be: rtlwifi: wireless switch is on [ 10.056426] r8822be 0000:02:00.0 wlp2s0: renamed from wlan0 ... [ 11.952534] r8822be: halmac_init_hal failed [ 11.955933] r8822be: halmac_init_hal failed [ 11.956227] r8822be: halmac_init_hal failed [ 22.007942] r8822be: halmac_init_hal failed Jian-Hong reported it works if turn off ASPM with module parameter aspm=0. In order to fix this problem kindly, this commit don't turn off aspm but enlarge ASPM L1 latency to 7. Reported-by: Jian-Hong Pan Tested-by: Jian-Hong Pan Signed-off-by: Ping-Ke Shih Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtlwifi/rtl8822be/hw.c | 2 +- drivers/staging/rtlwifi/wifi.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/rtlwifi/rtl8822be/hw.c b/drivers/staging/rtlwifi/rtl8822be/hw.c index 7947edb239a1..88ba5b2fea6a 100644 --- a/drivers/staging/rtlwifi/rtl8822be/hw.c +++ b/drivers/staging/rtlwifi/rtl8822be/hw.c @@ -803,7 +803,7 @@ static void _rtl8822be_enable_aspm_back_door(struct ieee80211_hw *hw) return; pci_read_config_byte(rtlpci->pdev, 0x70f, &tmp); - pci_write_config_byte(rtlpci->pdev, 0x70f, tmp | BIT(7)); + pci_write_config_byte(rtlpci->pdev, 0x70f, tmp | ASPM_L1_LATENCY << 3); pci_read_config_byte(rtlpci->pdev, 0x719, &tmp); pci_write_config_byte(rtlpci->pdev, 0x719, tmp | BIT(3) | BIT(4)); diff --git a/drivers/staging/rtlwifi/wifi.h b/drivers/staging/rtlwifi/wifi.h index 012fb618840b..a45f0eb69d3f 100644 --- a/drivers/staging/rtlwifi/wifi.h +++ b/drivers/staging/rtlwifi/wifi.h @@ -88,6 +88,7 @@ #define RTL_USB_MAX_RX_COUNT 100 #define QBSS_LOAD_SIZE 5 #define MAX_WMMELE_LENGTH 64 +#define ASPM_L1_LATENCY 7 #define TOTAL_CAM_ENTRY 32 From a0341fc1981a950c1e902ab901e98f60e0e243f3 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sat, 7 Jul 2018 04:16:33 +0200 Subject: [PATCH 306/382] ibmasm: don't write out of bounds in read handler This read handler had a lot of custom logic and wrote outside the bounds of the provided buffer. This could lead to kernel and userspace memory corruption. Just use simple_read_from_buffer() with a stack buffer. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Jann Horn Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ibmasm/ibmasmfs.c | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index e05c3245930a..fa840666bdd1 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -507,35 +507,14 @@ static int remote_settings_file_close(struct inode *inode, struct file *file) static ssize_t remote_settings_file_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { void __iomem *address = (void __iomem *)file->private_data; - unsigned char *page; - int retval; int len = 0; unsigned int value; - - if (*offset < 0) - return -EINVAL; - if (count == 0 || count > 1024) - return 0; - if (*offset != 0) - return 0; - - page = (unsigned char *)__get_free_page(GFP_KERNEL); - if (!page) - return -ENOMEM; + char lbuf[20]; value = readl(address); - len = sprintf(page, "%d\n", value); + len = snprintf(lbuf, sizeof(lbuf), "%d\n", value); - if (copy_to_user(buf, page, len)) { - retval = -EFAULT; - goto exit; - } - *offset += len; - retval = len; - -exit: - free_page((unsigned long)page); - return retval; + return simple_read_from_buffer(buf, count, offset, lbuf, len); } static ssize_t remote_settings_file_write(struct file *file, const char __user *ubuff, size_t count, loff_t *offset) From 0c1049dcb4ceec640d8bd797335bcbebdcab44d2 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Fri, 6 Jul 2018 22:15:00 +0200 Subject: [PATCH 307/382] ARM: pxa: irq: fix handling of ICMR registers in suspend/resume PXA3xx platforms have 56 interrupts that are stored in two ICMR registers. The code in pxa_irq_suspend() and pxa_irq_resume() however does a simple division by 32 which only leads to one register being saved at suspend and restored at resume time. The NAND interrupt setting, for instance, is lost. Fix this by using DIV_ROUND_UP() instead. Signed-off-by: Daniel Mack Signed-off-by: Robert Jarzmik --- arch/arm/mach-pxa/irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c index 9c10248fadcc..4e8c2116808e 100644 --- a/arch/arm/mach-pxa/irq.c +++ b/arch/arm/mach-pxa/irq.c @@ -185,7 +185,7 @@ static int pxa_irq_suspend(void) { int i; - for (i = 0; i < pxa_internal_irq_nr / 32; i++) { + for (i = 0; i < DIV_ROUND_UP(pxa_internal_irq_nr, 32); i++) { void __iomem *base = irq_base(i); saved_icmr[i] = __raw_readl(base + ICMR); @@ -204,7 +204,7 @@ static void pxa_irq_resume(void) { int i; - for (i = 0; i < pxa_internal_irq_nr / 32; i++) { + for (i = 0; i < DIV_ROUND_UP(pxa_internal_irq_nr, 32); i++) { void __iomem *base = irq_base(i); __raw_writel(saved_icmr[i], base + ICMR); From 122c5770cff2c1df1a2384b68285be2812cd72c1 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Wed, 27 Jun 2018 20:56:18 -0500 Subject: [PATCH 308/382] fpga: altera-cvp: Fix an error handling path in 'altera_cvp_probe()' If 'fpga_mgr_create()' fails, we should release some resources, as done in the other error handling path of the function. Fixes: 7085e2a94f7d ("fpga: manager: change api, don't use drvdata") Signed-off-by: Christophe JAILLET Reviewed-by: Moritz Fischer Acked-by: Alan Tull Signed-off-by: Greg Kroah-Hartman --- drivers/fpga/altera-cvp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/fpga/altera-cvp.c b/drivers/fpga/altera-cvp.c index dd4edd8f22ce..7fa793672a7a 100644 --- a/drivers/fpga/altera-cvp.c +++ b/drivers/fpga/altera-cvp.c @@ -455,8 +455,10 @@ static int altera_cvp_probe(struct pci_dev *pdev, mgr = fpga_mgr_create(&pdev->dev, conf->mgr_name, &altera_cvp_ops, conf); - if (!mgr) - return -ENOMEM; + if (!mgr) { + ret = -ENOMEM; + goto err_unmap; + } pci_set_drvdata(pdev, mgr); From 9421e45f5ff3d558cf8b75a8cc0824530caf3453 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 6 Jul 2018 22:05:37 -0400 Subject: [PATCH 309/382] uio: use request_threaded_irq instead Prepraing for changing to use mutex lock. Signed-off-by: Xiubo Li Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index e8f4ac9400ea..b4b2ae1e0473 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -902,8 +902,9 @@ int __uio_register_device(struct module *owner, * FDs at the time of unregister and therefore may not be * freed until they are released. */ - ret = request_irq(info->irq, uio_interrupt, - info->irq_flags, info->name, idev); + ret = request_threaded_irq(info->irq, NULL, uio_interrupt, + info->irq_flags, info->name, idev); + if (ret) goto err_request_irq; } From 543af5861f41af0a5d2432f6fb5976af50f9cee5 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 6 Jul 2018 22:05:38 -0400 Subject: [PATCH 310/382] uio: change to use the mutex lock instead of the spin lock We are hitting a regression with the following commit: commit a93e7b331568227500186a465fee3c2cb5dffd1f Author: Hamish Martin Date: Mon May 14 13:32:23 2018 +1200 uio: Prevent device destruction while fds are open The problem is the addition of spin_lock_irqsave in uio_write. This leads to hitting uio_write -> copy_from_user -> _copy_from_user -> might_fault and the logs filling up with sleeping warnings. I also noticed some uio drivers allocate memory, sleep, grab mutexes from callouts like open() and release and uio is now doing spin_lock_irqsave while calling them. Reported-by: Mike Christie CC: Hamish Martin Reviewed-by: Hamish Martin Signed-off-by: Xiubo Li Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 32 +++++++++++++------------------- include/linux/uio_driver.h | 2 +- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index b4b2ae1e0473..655ade4fb3b1 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -433,7 +433,6 @@ static int uio_open(struct inode *inode, struct file *filep) struct uio_device *idev; struct uio_listener *listener; int ret = 0; - unsigned long flags; mutex_lock(&minor_lock); idev = idr_find(&uio_idr, iminor(inode)); @@ -460,10 +459,10 @@ static int uio_open(struct inode *inode, struct file *filep) listener->event_count = atomic_read(&idev->event); filep->private_data = listener; - spin_lock_irqsave(&idev->info_lock, flags); + mutex_lock(&idev->info_lock); if (idev->info && idev->info->open) ret = idev->info->open(idev->info, inode); - spin_unlock_irqrestore(&idev->info_lock, flags); + mutex_unlock(&idev->info_lock); if (ret) goto err_infoopen; @@ -495,12 +494,11 @@ static int uio_release(struct inode *inode, struct file *filep) int ret = 0; struct uio_listener *listener = filep->private_data; struct uio_device *idev = listener->dev; - unsigned long flags; - spin_lock_irqsave(&idev->info_lock, flags); + mutex_lock(&idev->info_lock); if (idev->info && idev->info->release) ret = idev->info->release(idev->info, inode); - spin_unlock_irqrestore(&idev->info_lock, flags); + mutex_unlock(&idev->info_lock); module_put(idev->owner); kfree(listener); @@ -513,12 +511,11 @@ static __poll_t uio_poll(struct file *filep, poll_table *wait) struct uio_listener *listener = filep->private_data; struct uio_device *idev = listener->dev; __poll_t ret = 0; - unsigned long flags; - spin_lock_irqsave(&idev->info_lock, flags); + mutex_lock(&idev->info_lock); if (!idev->info || !idev->info->irq) ret = -EIO; - spin_unlock_irqrestore(&idev->info_lock, flags); + mutex_unlock(&idev->info_lock); if (ret) return ret; @@ -537,12 +534,11 @@ static ssize_t uio_read(struct file *filep, char __user *buf, DECLARE_WAITQUEUE(wait, current); ssize_t retval = 0; s32 event_count; - unsigned long flags; - spin_lock_irqsave(&idev->info_lock, flags); + mutex_lock(&idev->info_lock); if (!idev->info || !idev->info->irq) retval = -EIO; - spin_unlock_irqrestore(&idev->info_lock, flags); + mutex_unlock(&idev->info_lock); if (retval) return retval; @@ -592,9 +588,8 @@ static ssize_t uio_write(struct file *filep, const char __user *buf, struct uio_device *idev = listener->dev; ssize_t retval; s32 irq_on; - unsigned long flags; - spin_lock_irqsave(&idev->info_lock, flags); + mutex_lock(&idev->info_lock); if (!idev->info || !idev->info->irq) { retval = -EIO; goto out; @@ -618,7 +613,7 @@ static ssize_t uio_write(struct file *filep, const char __user *buf, retval = idev->info->irqcontrol(idev->info, irq_on); out: - spin_unlock_irqrestore(&idev->info_lock, flags); + mutex_unlock(&idev->info_lock); return retval ? retval : sizeof(s32); } @@ -865,7 +860,7 @@ int __uio_register_device(struct module *owner, idev->owner = owner; idev->info = info; - spin_lock_init(&idev->info_lock); + mutex_init(&idev->info_lock); init_waitqueue_head(&idev->wait); atomic_set(&idev->event, 0); @@ -929,7 +924,6 @@ EXPORT_SYMBOL_GPL(__uio_register_device); void uio_unregister_device(struct uio_info *info) { struct uio_device *idev; - unsigned long flags; if (!info || !info->uio_dev) return; @@ -943,9 +937,9 @@ void uio_unregister_device(struct uio_info *info) if (info->irq && info->irq != UIO_IRQ_CUSTOM) free_irq(info->irq, idev); - spin_lock_irqsave(&idev->info_lock, flags); + mutex_lock(&idev->info_lock); idev->info = NULL; - spin_unlock_irqrestore(&idev->info_lock, flags); + mutex_unlock(&idev->info_lock); device_unregister(&idev->dev); diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 6c5f2074e14f..6f8b68cd460f 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -75,7 +75,7 @@ struct uio_device { struct fasync_struct *async_queue; wait_queue_head_t wait; struct uio_info *info; - spinlock_t info_lock; + struct mutex info_lock; struct kobject *map_dir; struct kobject *portio_dir; }; From 57c5f4df0a5a0ee83df799991251e2ee93a5e4e9 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 6 Jul 2018 22:05:39 -0400 Subject: [PATCH 311/382] uio: fix crash after the device is unregistered For the target_core_user use case, after the device is unregistered it maybe still opened in user space, then the kernel will crash, like: [ 251.163692] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 251.163820] IP: [] show_name+0x23/0x40 [uio] [ 251.163965] PGD 8000000062694067 PUD 62696067 PMD 0 [ 251.164097] Oops: 0000 [#1] SMP ... [ 251.165605] e1000 mptscsih mptbase drm_panel_orientation_quirks dm_mirror dm_region_hash dm_log dm_mod [ 251.166014] CPU: 0 PID: 13380 Comm: tcmu-runner Kdump: loaded Not tainted 3.10.0-916.el7.test.x86_64 #1 [ 251.166381] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017 [ 251.166747] task: ffff971eb91db0c0 ti: ffff971e9e384000 task.ti: ffff971e9e384000 [ 251.167137] RIP: 0010:[] [] show_name+0x23/0x40 [uio] [ 251.167563] RSP: 0018:ffff971e9e387dc8 EFLAGS: 00010282 [ 251.167978] RAX: 0000000000000000 RBX: ffff971e9e3f8000 RCX: ffff971eb8368d98 [ 251.168408] RDX: ffff971e9e3f8000 RSI: ffffffffc0738084 RDI: ffff971e9e3f8000 [ 251.168856] RBP: ffff971e9e387dd0 R08: ffff971eb8bc0018 R09: 0000000000000000 [ 251.169296] R10: 0000000000001000 R11: ffffffffa09d444d R12: ffffffffa1076e80 [ 251.169750] R13: ffff971e9e387f18 R14: 0000000000000001 R15: ffff971e9cfb1c80 [ 251.170213] FS: 00007ff37d175880(0000) GS:ffff971ebb600000(0000) knlGS:0000000000000000 [ 251.170693] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 251.171248] CR2: 0000000000000008 CR3: 00000000001f6000 CR4: 00000000003607f0 [ 251.172071] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 251.172640] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 251.173236] Call Trace: [ 251.173789] [] dev_attr_show+0x23/0x60 [ 251.174356] [] ? mutex_lock+0x12/0x2f [ 251.174892] [] sysfs_kf_seq_show+0xcf/0x1f0 [ 251.175433] [] kernfs_seq_show+0x26/0x30 [ 251.175981] [] seq_read+0x110/0x3f0 [ 251.176609] [] kernfs_fop_read+0xf5/0x160 [ 251.177158] [] vfs_read+0x9f/0x170 [ 251.177707] [] SyS_read+0x7f/0xf0 [ 251.178268] [] system_call_fastpath+0x1c/0x21 [ 251.178823] Code: 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 53 48 89 d3 e8 7e 96 56 e0 48 8b 80 d8 02 00 00 48 89 df 48 c7 c6 84 80 73 c0 <48> 8b 50 08 31 c0 e8 e2 67 44 e0 5b 48 98 5d c3 0f 1f 00 66 2e [ 251.180115] RIP [] show_name+0x23/0x40 [uio] [ 251.180820] RSP [ 251.181473] CR2: 0000000000000008 CC: Hamish Martin CC: Mike Christie Reviewed-by: Hamish Martin Signed-off-by: Xiubo Li Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio.c | 104 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 88 insertions(+), 16 deletions(-) diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 655ade4fb3b1..5d421d7e8904 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -215,7 +215,20 @@ static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buf) { struct uio_device *idev = dev_get_drvdata(dev); - return sprintf(buf, "%s\n", idev->info->name); + int ret; + + mutex_lock(&idev->info_lock); + if (!idev->info) { + ret = -EINVAL; + dev_err(dev, "the device has been unregistered\n"); + goto out; + } + + ret = sprintf(buf, "%s\n", idev->info->name); + +out: + mutex_unlock(&idev->info_lock); + return ret; } static DEVICE_ATTR_RO(name); @@ -223,7 +236,20 @@ static ssize_t version_show(struct device *dev, struct device_attribute *attr, char *buf) { struct uio_device *idev = dev_get_drvdata(dev); - return sprintf(buf, "%s\n", idev->info->version); + int ret; + + mutex_lock(&idev->info_lock); + if (!idev->info) { + ret = -EINVAL; + dev_err(dev, "the device has been unregistered\n"); + goto out; + } + + ret = sprintf(buf, "%s\n", idev->info->version); + +out: + mutex_unlock(&idev->info_lock); + return ret; } static DEVICE_ATTR_RO(version); @@ -415,11 +441,15 @@ EXPORT_SYMBOL_GPL(uio_event_notify); static irqreturn_t uio_interrupt(int irq, void *dev_id) { struct uio_device *idev = (struct uio_device *)dev_id; - irqreturn_t ret = idev->info->handler(irq, idev->info); + irqreturn_t ret; + mutex_lock(&idev->info_lock); + + ret = idev->info->handler(irq, idev->info); if (ret == IRQ_HANDLED) uio_event_notify(idev->info); + mutex_unlock(&idev->info_lock); return ret; } @@ -460,6 +490,12 @@ static int uio_open(struct inode *inode, struct file *filep) filep->private_data = listener; mutex_lock(&idev->info_lock); + if (!idev->info) { + mutex_unlock(&idev->info_lock); + ret = -EINVAL; + goto err_alloc_listener; + } + if (idev->info && idev->info->open) ret = idev->info->open(idev->info, inode); mutex_unlock(&idev->info_lock); @@ -590,6 +626,11 @@ static ssize_t uio_write(struct file *filep, const char __user *buf, s32 irq_on; mutex_lock(&idev->info_lock); + if (!idev->info) { + retval = -EINVAL; + goto out; + } + if (!idev->info || !idev->info->irq) { retval = -EIO; goto out; @@ -635,10 +676,20 @@ static vm_fault_t uio_vma_fault(struct vm_fault *vmf) struct page *page; unsigned long offset; void *addr; + int ret = 0; + int mi; - int mi = uio_find_mem_index(vmf->vma); - if (mi < 0) - return VM_FAULT_SIGBUS; + mutex_lock(&idev->info_lock); + if (!idev->info) { + ret = VM_FAULT_SIGBUS; + goto out; + } + + mi = uio_find_mem_index(vmf->vma); + if (mi < 0) { + ret = VM_FAULT_SIGBUS; + goto out; + } /* * We need to subtract mi because userspace uses offset = N*PAGE_SIZE @@ -653,7 +704,11 @@ static vm_fault_t uio_vma_fault(struct vm_fault *vmf) page = vmalloc_to_page(addr); get_page(page); vmf->page = page; - return 0; + +out: + mutex_unlock(&idev->info_lock); + + return ret; } static const struct vm_operations_struct uio_logical_vm_ops = { @@ -678,6 +733,7 @@ static int uio_mmap_physical(struct vm_area_struct *vma) struct uio_device *idev = vma->vm_private_data; int mi = uio_find_mem_index(vma); struct uio_mem *mem; + if (mi < 0) return -EINVAL; mem = idev->info->mem + mi; @@ -719,30 +775,46 @@ static int uio_mmap(struct file *filep, struct vm_area_struct *vma) vma->vm_private_data = idev; + mutex_lock(&idev->info_lock); + if (!idev->info) { + ret = -EINVAL; + goto out; + } + mi = uio_find_mem_index(vma); - if (mi < 0) - return -EINVAL; + if (mi < 0) { + ret = -EINVAL; + goto out; + } requested_pages = vma_pages(vma); actual_pages = ((idev->info->mem[mi].addr & ~PAGE_MASK) + idev->info->mem[mi].size + PAGE_SIZE -1) >> PAGE_SHIFT; - if (requested_pages > actual_pages) - return -EINVAL; + if (requested_pages > actual_pages) { + ret = -EINVAL; + goto out; + } if (idev->info->mmap) { ret = idev->info->mmap(idev->info, vma); - return ret; + goto out; } switch (idev->info->mem[mi].memtype) { case UIO_MEM_PHYS: - return uio_mmap_physical(vma); + ret = uio_mmap_physical(vma); + break; case UIO_MEM_LOGICAL: case UIO_MEM_VIRTUAL: - return uio_mmap_logical(vma); + ret = uio_mmap_logical(vma); + break; default: - return -EINVAL; + ret = -EINVAL; } + +out: + mutex_unlock(&idev->info_lock); + return 0; } static const struct file_operations uio_fops = { @@ -932,12 +1004,12 @@ void uio_unregister_device(struct uio_info *info) uio_free_minor(idev); + mutex_lock(&idev->info_lock); uio_dev_del_attributes(idev); if (info->irq && info->irq != UIO_IRQ_CUSTOM) free_irq(info->irq, idev); - mutex_lock(&idev->info_lock); idev->info = NULL; mutex_unlock(&idev->info_lock); From 007a74907deeceefef9dc3ec4679fbd7921eaa51 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 26 Jun 2018 14:46:35 +0300 Subject: [PATCH 312/382] thunderbolt: Notify userspace when boot_acl is changed The commit 9aaa3b8b4c56 ("thunderbolt: Add support for preboot ACL") introduced boot_acl attribute but missed the fact that now userspace needs to poll the attribute constantly to find out whether it has changed or not. Fix this by sending notification to the userspace whenever the boot_acl attribute is changed. Fixes: 9aaa3b8b4c56 ("thunderbolt: Add support for preboot ACL") Reported-and-tested-by: Christian Kellner Signed-off-by: Mika Westerberg Reviewed-by: Christian Kellner Acked-by: Yehezkel Bernat Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/domain.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/thunderbolt/domain.c b/drivers/thunderbolt/domain.c index 6281266b8ec0..a923ebdeb73c 100644 --- a/drivers/thunderbolt/domain.c +++ b/drivers/thunderbolt/domain.c @@ -213,6 +213,10 @@ static ssize_t boot_acl_store(struct device *dev, struct device_attribute *attr, goto err_free_acl; } ret = tb->cm_ops->set_boot_acl(tb, acl, tb->nboot_acl); + if (!ret) { + /* Notify userspace about the change */ + kobject_uevent(&tb->dev.kobj, KOBJ_CHANGE); + } mutex_unlock(&tb->lock); err_free_acl: From 87ed1405ef09d29a14df43295f7b6a93b63bfe6e Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 18 Jun 2018 18:30:43 +0100 Subject: [PATCH 313/382] nvmem: Don't let a NULL cell_id for nvmem_cell_get() crash us In commit ca04d9d3e1b1 ("phy: qcom-qusb2: New driver for QUSB2 PHY on Qcom chips") you can see a call like: devm_nvmem_cell_get(dev, NULL); Note that the cell ID passed to the function is NULL. This is because the qcom-qusb2 driver is expected to work only on systems where the PHY node is hooked up via device-tree and is nameless. This works OK for the most part. The first thing nvmem_cell_get() does is to call of_nvmem_cell_get() and there it's documented that a NULL name is fine. The problem happens when the call to of_nvmem_cell_get() returns -EINVAL. In such a case we'll fall back to nvmem_cell_get_from_list() and eventually might (if nvmem_cells isn't an empty list) crash with something that looks like: strcmp nvmem_find_cell __nvmem_device_get nvmem_cell_get_from_list nvmem_cell_get devm_nvmem_cell_get qusb2_phy_probe There are several different ways we could fix this problem: One could argue that perhaps the qcom-qusb2 driver should be changed to use of_nvmem_cell_get() which is allowed to have a NULL name. In that case, we'd need to add a patche to introduce devm_of_nvmem_cell_get() since the qcom-qusb2 driver is using devm managed resources. One could also argue that perhaps we could just add a name to qcom-qusb2. That would be OK but I believe it effectively changes the device tree bindings, so maybe it's a no-go. In this patch I have chosen to fix the problem by simply not crashing when a NULL cell_id is passed to nvmem_cell_get(). NOTE: that for the qcom-qusb2 driver the "nvmem-cells" property is defined to be optional and thus it's expected to be a common case that we would hit this crash and this is more than just a theoretical fix. Fixes: ca04d9d3e1b1 ("phy: qcom-qusb2: New driver for QUSB2 PHY on Qcom chips") Signed-off-by: Douglas Anderson Signed-off-by: Srinivas Kandagatla Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index b5b0cdc21d01..514d1dfc5630 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -936,6 +936,10 @@ struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *cell_id) return cell; } + /* NULL cell_id only allowed for device tree; invalid otherwise */ + if (!cell_id) + return ERR_PTR(-EINVAL); + return nvmem_cell_get_from_list(cell_id); } EXPORT_SYMBOL_GPL(nvmem_cell_get); From 15279df6f26cf2013d713904b4a0c957ae8abb96 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 6 Jul 2018 23:50:03 +0200 Subject: [PATCH 314/382] x86/mtrr: Don't copy out-of-bounds data in mtrr_write Don't access the provided buffer out of bounds - this can cause a kernel out-of-bounds read when invoked through sys_splice() or other things that use kernel_write()/__kernel_write(). Fixes: 7f8ec5a4f01a ("x86/mtrr: Convert to use strncpy_from_user() helper") Signed-off-by: Jann Horn Signed-off-by: Thomas Gleixner Cc: Andy Shevchenko Cc: "H. Peter Anvin" Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180706215003.156702-1-jannh@google.com --- arch/x86/kernel/cpu/mtrr/if.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 4021d3859499..40eee6cc4124 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -106,7 +106,8 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) memset(line, 0, LINE_SIZE); - length = strncpy_from_user(line, buf, LINE_SIZE - 1); + len = min_t(size_t, len, LINE_SIZE - 1); + length = strncpy_from_user(line, buf, len); if (length < 0) return length; From 47a18a2dabba99c749b88867a9eef7ca48058e92 Mon Sep 17 00:00:00 2001 From: Adam Borowski Date: Sat, 7 Jul 2018 02:15:48 +0200 Subject: [PATCH 315/382] scripts: teach extract-vmlinux about LZ4 and ZSTD Note that the LZ4 signature is different than that of modern LZ4 as we use the "legacy" format which suffers from some downsides like inability to disable compression. Signed-off-by: Adam Borowski Signed-off-by: Masahiro Yamada --- scripts/extract-vmlinux | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/extract-vmlinux b/scripts/extract-vmlinux index 5061abcc2540..e6239f39abad 100755 --- a/scripts/extract-vmlinux +++ b/scripts/extract-vmlinux @@ -57,6 +57,8 @@ try_decompress '\3757zXZ\000' abcde unxz try_decompress 'BZh' xy bunzip2 try_decompress '\135\0\0\0' xxx unlzma try_decompress '\211\114\132' xy 'lzop -d' +try_decompress '\002!L\030' xxx 'lz4 -d' +try_decompress '(\265/\375' xxx unzstd # Bail out: echo "$me: Cannot find vmlinux." >&2 From 1e4b044d22517cae7047c99038abb444423243ca Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 8 Jul 2018 16:34:02 -0700 Subject: [PATCH 316/382] Linux 4.18-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d15ac32afbaf..925c55f2524f 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Merciless Moray # *DOCUMENTATION* From fa85015c0d95884c8dc42f38e2f2d6137d436b67 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 9 Jul 2018 11:01:07 +0200 Subject: [PATCH 317/382] ACPICA: Clear status of all events when entering S5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 18996f2db918 (ACPICA: Events: Stop unconditionally clearing ACPI IRQs during suspend/resume) the status of ACPI events is not cleared any more when entering the ACPI S5 system state (power off) which causes some systems to power up immediately after turing off power in certain situations. That is a functional regression, so address it by making the code clear the status of all ACPI events again when entering S5 (for system-wide suspend or hibernation the clearing of the status of all events is not desirable, as it might cause the kernel to miss wakeup events sometimes). Fixes: 18996f2db918 (ACPICA: Events: Stop unconditionally clearing ACPI IRQs during suspend/resume) Reported-by: Takashi Iwai Tested-by: Thomas Hänig Cc: 4.17+ # 4.17+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/hwsleep.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c index fc0c2e2328cd..fe9d46d81750 100644 --- a/drivers/acpi/acpica/hwsleep.c +++ b/drivers/acpi/acpica/hwsleep.c @@ -51,16 +51,23 @@ acpi_status acpi_hw_legacy_sleep(u8 sleep_state) return_ACPI_STATUS(status); } - /* - * 1) Disable all GPEs - * 2) Enable all wakeup GPEs - */ + /* Disable all GPEs */ status = acpi_hw_disable_all_gpes(); if (ACPI_FAILURE(status)) { return_ACPI_STATUS(status); } + /* + * If the target sleep state is S5, clear all GPEs and fixed events too + */ + if (sleep_state == ACPI_STATE_S5) { + status = acpi_hw_clear_acpi_status(); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + } acpi_gbl_system_awake_and_running = FALSE; + /* Enable all wakeup GPEs */ status = acpi_hw_enable_all_wakeup_gpes(); if (ACPI_FAILURE(status)) { return_ACPI_STATUS(status); From 92748beac07c471d995fbec642b63572dc01b3dc Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 4 Jul 2018 17:07:45 +0200 Subject: [PATCH 318/382] mmc: sdhci-esdhc-imx: allow 1.8V modes without 100/200MHz pinctrl states If pinctrl nodes for 100/200MHz are missing, the controller should not select any mode which need signal frequencies 100MHz or higher. To prevent such speed modes the driver currently uses the quirk flag SDHCI_QUIRK2_NO_1_8_V. This works nicely for SD cards since 1.8V signaling is required for all faster modes and slower modes use 3.3V signaling only. However, there are eMMC modes which use 1.8V signaling and run below 100MHz, e.g. DDR52 at 1.8V. With using SDHCI_QUIRK2_NO_1_8_V this mode is prevented. When using a fixed 1.8V regulator as vqmmc-supply the stack has no valid mode to use. In this tenuous situation the kernel continuously prints voltage switching errors: mmc1: Switching to 3.3V signalling voltage failed Avoid using SDHCI_QUIRK2_NO_1_8_V and prevent faster modes by altering the SDHCI capability register. With that the stack is able to select 1.8V modes even if no faster pinctrl states are available: # cat /sys/kernel/debug/mmc1/ios ... timing spec: 8 (mmc DDR52) signal voltage: 1 (1.80 V) ... Link: http://lkml.kernel.org/r/20180628081331.13051-1-stefan@agner.ch Signed-off-by: Stefan Agner Fixes: ad93220de7da ("mmc: sdhci-esdhc-imx: change pinctrl state according to uhs mode") Cc: # v4.13+ Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-esdhc-imx.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index d6aef70d34fa..4eb3d29ecde1 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -312,6 +312,15 @@ static u32 esdhc_readl_le(struct sdhci_host *host, int reg) if (imx_data->socdata->flags & ESDHC_FLAG_HS400) val |= SDHCI_SUPPORT_HS400; + + /* + * Do not advertise faster UHS modes if there are no + * pinctrl states for 100MHz/200MHz. + */ + if (IS_ERR_OR_NULL(imx_data->pins_100mhz) || + IS_ERR_OR_NULL(imx_data->pins_200mhz)) + val &= ~(SDHCI_SUPPORT_SDR50 | SDHCI_SUPPORT_DDR50 + | SDHCI_SUPPORT_SDR104 | SDHCI_SUPPORT_HS400); } } @@ -1158,18 +1167,6 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev, ESDHC_PINCTRL_STATE_100MHZ); imx_data->pins_200mhz = pinctrl_lookup_state(imx_data->pinctrl, ESDHC_PINCTRL_STATE_200MHZ); - if (IS_ERR(imx_data->pins_100mhz) || - IS_ERR(imx_data->pins_200mhz)) { - dev_warn(mmc_dev(host->mmc), - "could not get ultra high speed state, work on normal mode\n"); - /* - * fall back to not supporting uhs by specifying no - * 1.8v quirk - */ - host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V; - } - } else { - host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V; } /* call to generic mmc_of_parse to support additional capabilities */ From ef6eaf27274c0351f7059163918f3795da13199c Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Fri, 22 Jun 2018 12:25:49 -0400 Subject: [PATCH 319/382] HID: i2c-hid: Fix "incomplete report" noise Commit ac75a041048b ("HID: i2c-hid: fix size check and type usage") started writing messages when the ret_size is <= 2 from i2c_master_recv. However, my device i2c-DLL07D1 returns 2 for a short period of time (~0.5s) after I stop moving the pointing stick or touchpad. It varies, but you get ~50 messages each time which spams the log hard. [ 95.925055] i2c_hid i2c-DLL07D1:01: i2c_hid_get_input: incomplete report (83/2) This has also been observed with a i2c-ALP0017. [ 1781.266353] i2c_hid i2c-ALP0017:00: i2c_hid_get_input: incomplete report (30/2) Only print the message when ret_size is totally invalid and less than 2 to cut down on the log spam. Fixes: ac75a041048b ("HID: i2c-hid: fix size check and type usage") Reported-by: John Smith Cc: stable@vger.kernel.org Signed-off-by: Jason Andryuk Signed-off-by: Jiri Kosina --- drivers/hid/i2c-hid/i2c-hid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index c1652bb7bd15..eae0cb3ddec6 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -484,7 +484,7 @@ static void i2c_hid_get_input(struct i2c_hid *ihid) return; } - if ((ret_size > size) || (ret_size <= 2)) { + if ((ret_size > size) || (ret_size < 2)) { dev_err(&ihid->client->dev, "%s: incomplete report (%d/%d)\n", __func__, size, ret_size); return; From 4f65245f2d178b9cba48350620d76faa4a098841 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 29 Jun 2018 17:08:44 -0500 Subject: [PATCH 320/382] HID: hiddev: fix potential Spectre v1 uref->field_index, uref->usage_index, finfo.field_index and cinfo.index can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/hid/usbhid/hiddev.c:473 hiddev_ioctl_usage() warn: potential spectre issue 'report->field' (local cap) drivers/hid/usbhid/hiddev.c:477 hiddev_ioctl_usage() warn: potential spectre issue 'field->usage' (local cap) drivers/hid/usbhid/hiddev.c:757 hiddev_ioctl() warn: potential spectre issue 'report->field' (local cap) drivers/hid/usbhid/hiddev.c:801 hiddev_ioctl() warn: potential spectre issue 'hid->collection' (local cap) Fix this by sanitizing such structure fields before using them to index report->field, field->usage and hid->collection Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hiddev.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index e3ce233f8bdc..23872d08308c 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "usbhid.h" #ifdef CONFIG_USB_DYNAMIC_MINORS @@ -469,10 +470,14 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd, if (uref->field_index >= report->maxfield) goto inval; + uref->field_index = array_index_nospec(uref->field_index, + report->maxfield); field = report->field[uref->field_index]; if (uref->usage_index >= field->maxusage) goto inval; + uref->usage_index = array_index_nospec(uref->usage_index, + field->maxusage); uref->usage_code = field->usage[uref->usage_index].hid; @@ -499,6 +504,8 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd, if (uref->field_index >= report->maxfield) goto inval; + uref->field_index = array_index_nospec(uref->field_index, + report->maxfield); field = report->field[uref->field_index]; @@ -753,6 +760,8 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (finfo.field_index >= report->maxfield) break; + finfo.field_index = array_index_nospec(finfo.field_index, + report->maxfield); field = report->field[finfo.field_index]; memset(&finfo, 0, sizeof(finfo)); @@ -797,6 +806,8 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (cinfo.index >= hid->maxcollection) break; + cinfo.index = array_index_nospec(cinfo.index, + hid->maxcollection); cinfo.type = hid->collection[cinfo.index].type; cinfo.usage = hid->collection[cinfo.index].usage; From 54836e2d03e76d80aec3399368ffaf5b7caadd1b Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Tue, 3 Jul 2018 09:55:43 +0100 Subject: [PATCH 321/382] i2c: tegra: Fix NACK error handling On Tegra30 Cardhu the PCA9546 I2C mux is not ACK'ing I2C commands on resume from suspend (which is caused by the reset signal for the I2C mux not being configured correctl). However, this NACK is causing the Tegra30 to hang on resuming from suspend which is not expected as we detect NACKs and handle them. The hang observed appears to occur when resetting the I2C controller to recover from the NACK. Commit 77821b4678f9 ("i2c: tegra: proper handling of error cases") added additional error handling for some error cases including NACK, however, it appears that this change conflicts with an early fix by commit f70893d08338 ("i2c: tegra: Add delay before resetting the controller after NACK"). After commit 77821b4678f9 was made we now disable 'packet mode' before the delay from commit f70893d08338 happens. Testing shows that moving the delay to before disabling 'packet mode' fixes the hang observed on Tegra30. The delay was added to give the I2C controller chance to send a stop condition and so it makes sense to move this to before we disable packet mode. Please note that packet mode is always enabled for Tegra. Fixes: 77821b4678f9 ("i2c: tegra: proper handling of error cases") Signed-off-by: Jon Hunter Acked-by: Thierry Reding Signed-off-by: Wolfram Sang Cc: stable@vger.kernel.org --- drivers/i2c/busses/i2c-tegra.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 5fccd1f1bca8..797def5319f1 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -545,6 +545,14 @@ static int tegra_i2c_disable_packet_mode(struct tegra_i2c_dev *i2c_dev) { u32 cnfg; + /* + * NACK interrupt is generated before the I2C controller generates + * the STOP condition on the bus. So wait for 2 clock periods + * before disabling the controller so that the STOP condition has + * been delivered properly. + */ + udelay(DIV_ROUND_UP(2 * 1000000, i2c_dev->bus_clk_rate)); + cnfg = i2c_readl(i2c_dev, I2C_CNFG); if (cnfg & I2C_CNFG_PACKET_MODE_EN) i2c_writel(i2c_dev, cnfg & ~I2C_CNFG_PACKET_MODE_EN, I2C_CNFG); @@ -706,15 +714,6 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, if (likely(i2c_dev->msg_err == I2C_ERR_NONE)) return 0; - /* - * NACK interrupt is generated before the I2C controller generates - * the STOP condition on the bus. So wait for 2 clock periods - * before resetting the controller so that the STOP condition has - * been delivered properly. - */ - if (i2c_dev->msg_err == I2C_ERR_NO_ACK) - udelay(DIV_ROUND_UP(2 * 1000000, i2c_dev->bus_clk_rate)); - tegra_i2c_init(i2c_dev); if (i2c_dev->msg_err == I2C_ERR_NO_ACK) { if (msg->flags & I2C_M_IGNORE_NAK) From bdf33113d89f70186ffb6674e925fa9b8a0266b1 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Mon, 9 Jul 2018 13:15:32 +0100 Subject: [PATCH 322/382] Update TDA998x maintainer entry Update my TDA998x HDMI encoder MAINTAINERS entry to include the dt-bindings header, and a keyword pattern to catch patches containing the DT compatible. Also change the status to "maintained" rather than "supported". Signed-off-by: Russell King Signed-off-by: Linus Torvalds --- MAINTAINERS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 96e98e206b0d..19f2d12ace6b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10213,11 +10213,13 @@ F: sound/soc/codecs/sgtl5000* NXP TDA998X DRM DRIVER M: Russell King -S: Supported +S: Maintained T: git git://git.armlinux.org.uk/~rmk/linux-arm.git drm-tda998x-devel T: git git://git.armlinux.org.uk/~rmk/linux-arm.git drm-tda998x-fixes F: drivers/gpu/drm/i2c/tda998x_drv.c F: include/drm/i2c/tda998x.h +F: include/dt-bindings/display/tda998x.h +K: "nxp,tda998x" NXP TFA9879 DRIVER M: Peter Rosin From dc81aab1be9fac2e11f31fe7538a50705eba08cf Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 6 Jul 2018 09:28:42 +0200 Subject: [PATCH 323/382] MAINTAINERS: update drm tree Mail to dri-devel went out, linux-next was updated, but we forgot this one here. Cc: David Airlie Signed-off-by: Daniel Vetter Acked-by: Alex Deucher Acked-by: Rodrigo Vivi Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20180706072842.9009-1-daniel.vetter@ffwll.ch --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 96e98e206b0d..ad0753fb5357 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -581,7 +581,7 @@ W: https://www.infradead.org/~dhowells/kafs/ AGPGART DRIVER M: David Airlie -T: git git://people.freedesktop.org/~airlied/linux (part of drm maint) +T: git git://anongit.freedesktop.org/drm/drm S: Maintained F: drivers/char/agp/ F: include/linux/agp* @@ -4630,7 +4630,7 @@ F: include/uapi/drm/vmwgfx_drm.h DRM DRIVERS M: David Airlie L: dri-devel@lists.freedesktop.org -T: git git://people.freedesktop.org/~airlied/linux +T: git git://anongit.freedesktop.org/drm/drm B: https://bugs.freedesktop.org/ C: irc://chat.freenode.net/dri-devel S: Maintained From 96f95a17c1cfe65a002e525114d96616e91a8f2d Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 9 Jul 2018 13:09:56 -0700 Subject: [PATCH 324/382] Revert "arm64: Use aarch64elf and aarch64elfb emulation mode variants" This reverts commit 38fc4248677552ce35efc09902fdcb06b61d7ef9. Distributions such as Fedora and Debian do not package the ELF linker scripts with their toolchains, resulting in kernel build failures such as: | CHK include/generated/compile.h | LD [M] arch/arm64/crypto/sha512-ce.o | aarch64-linux-gnu-ld: cannot open linker script file ldscripts/aarch64elf.xr: No such file or directory | make[1]: *** [scripts/Makefile.build:530: arch/arm64/crypto/sha512-ce.o] Error 1 | make: *** [Makefile:1029: arch/arm64/crypto] Error 2 Revert back to the linux targets for now, adding a comment to the Makefile so we don't accidentally break this in the future. Cc: Paul Kocialkowski Cc: Fixes: 38fc42486775 ("arm64: Use aarch64elf and aarch64elfb emulation mode variants") Tested-by: Kevin Hilman Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/arm64/Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7976d2d242fa..e7101b19d590 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -60,13 +60,15 @@ ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__AARCH64EB__ AS += -EB -LDFLAGS += -EB -maarch64elfb +# We must use the linux target here, since distributions don't tend to package +# the ELF linker scripts with binutils, and this results in a build failure. +LDFLAGS += -EB -maarch64linuxb UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian CHECKFLAGS += -D__AARCH64EL__ AS += -EL -LDFLAGS += -EL -maarch64elf +LDFLAGS += -EL -maarch64linux # See comment above UTS_MACHINE := aarch64 endif From 5b5ccbc2b041f98f26b984e013d303b7f9e6fb8e Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 9 Jul 2018 16:45:35 +0100 Subject: [PATCH 325/382] Revert "tick: Prefer a lower rating device only if it's CPU local device" This reverts commit 1332a90558013ae4242e3dd7934bdcdeafb06c0d. The original issue was not because of incorrect checking of cpumask for both new and old tick device. It was incorrectly analysed was due to the misunderstanding of the comment and misinterpretation of the return value from tick_check_preferred. The main issue is with the clockevent driver that sets the cpumask to cpu_all_mask instead of cpu_possible_mask. Signed-off-by: Sudeep Holla Signed-off-by: Thomas Gleixner Tested-by: Kevin Hilman Tested-by: Martin Blumenstingl Cc: linux-arm-kernel@lists.infradead.org Cc: Marc Zyngier Link: https://lkml.kernel.org/r/1531151136-18297-1-git-send-email-sudeep.holla@arm.com --- kernel/time/tick-common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index b7005dd21ec1..14de3727b18e 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -277,8 +277,7 @@ static bool tick_check_preferred(struct clock_event_device *curdev, */ return !curdev || newdev->rating > curdev->rating || - (!cpumask_equal(curdev->cpumask, newdev->cpumask) && - !tick_check_percpu(curdev, newdev, smp_processor_id())); + !cpumask_equal(curdev->cpumask, newdev->cpumask); } /* From 5e18e412973d6bb1804de1d4d30a891c774b006e Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 9 Jul 2018 16:45:36 +0100 Subject: [PATCH 326/382] clocksource: arm_arch_timer: Set arch_mem_timer cpumask to cpu_possible_mask Currently, arch_mem_timer cpumask is set to cpu_all_mask which should be fine. However, cpu_possible_mask is more accurate and if there are other clockevent source in the system which are set to cpu_possible_mask, then having cpu_all_mask may result in issue. E.g. on a platform with arm,sp804 timer with rating 300 and cpu_possible_mask and this arch_mem_timer timer with rating 400 and cpu_all_mask, tick_check_preferred may choose both preferred as the cpumasks are not equal though they must be. This issue was root caused incorrectly initially and a fix was merged as commit 1332a9055801 ("tick: Prefer a lower rating device only if it's CPU local device"). Signed-off-by: Sudeep Holla Signed-off-by: Thomas Gleixner Tested-by: Kevin Hilman Tested-by: Martin Blumenstingl Cc: linux-arm-kernel@lists.infradead.org Cc: Marc Zyngier Cc: Mark Rutland Link: https://lkml.kernel.org/r/1531151136-18297-2-git-send-email-sudeep.holla@arm.com --- drivers/clocksource/arm_arch_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 57cb2f00fc07..d8c7f5750cdb 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -735,7 +735,7 @@ static void __arch_timer_setup(unsigned type, clk->features |= CLOCK_EVT_FEAT_DYNIRQ; clk->name = "arch_mem_timer"; clk->rating = 400; - clk->cpumask = cpu_all_mask; + clk->cpumask = cpu_possible_mask; if (arch_timer_mem_use_virtual) { clk->set_state_shutdown = arch_timer_shutdown_virt_mem; clk->set_state_oneshot_stopped = arch_timer_shutdown_virt_mem; From e96d71359e9bbea846a2111e4469a03a055dfa6f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 9 Jul 2018 15:51:50 -0400 Subject: [PATCH 327/382] rseq: Use __u64 for rseq_cs fields, validate user inputs Change the rseq ABI so rseq_cs start_ip, post_commit_offset and abort_ip fields are seen as 64-bit fields by both 32-bit and 64-bit kernels rather that ignoring the 32 upper bits on 32-bit kernels. This ensures we have a consistent behavior for a 32-bit binary executed on 32-bit kernels and in compat mode on 64-bit kernels. Validating the value of abort_ip field to be below TASK_SIZE ensures the kernel don't return to an invalid address when returning to userspace after an abort. I don't fully trust each architecture code to consistently deal with invalid return addresses. Validating the value of the start_ip and post_commit_offset fields prevents overflow on arithmetic performed on those values, used to check whether abort_ip is within the rseq critical section. If validation fails, the process is killed with a segmentation fault. When the signature encountered before abort_ip does not match the expected signature, return -EINVAL rather than -EPERM to be consistent with other input validation return codes from rseq_get_rseq_cs(). Signed-off-by: Mathieu Desnoyers Signed-off-by: Thomas Gleixner Cc: linux-api@vger.kernel.org Cc: Peter Zijlstra Cc: "Paul E . McKenney" Cc: Boqun Feng Cc: Andy Lutomirski Cc: Dave Watson Cc: Paul Turner Cc: Andrew Morton Cc: Russell King Cc: "H . Peter Anvin" Cc: Andi Kleen Cc: Chris Lameter Cc: Ben Maurer Cc: Steven Rostedt Cc: Josh Triplett Cc: Linus Torvalds Cc: Catalin Marinas Cc: Will Deacon Cc: Michael Kerrisk Cc: Joel Fernandes Cc: "Paul E. McKenney" Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180709195155.7654-2-mathieu.desnoyers@efficios.com --- include/uapi/linux/rseq.h | 6 +++--- kernel/rseq.c | 14 ++++++++++---- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index d620fa43756c..519ad6e176d1 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -52,10 +52,10 @@ struct rseq_cs { __u32 version; /* enum rseq_cs_flags */ __u32 flags; - LINUX_FIELD_u32_u64(start_ip); + __u64 start_ip; /* Offset from start_ip. */ - LINUX_FIELD_u32_u64(post_commit_offset); - LINUX_FIELD_u32_u64(abort_ip); + __u64 post_commit_offset; + __u64 abort_ip; } __attribute__((aligned(4 * sizeof(__u64)))); /* diff --git a/kernel/rseq.c b/kernel/rseq.c index 22b6acf1ad63..16b38c5342f9 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -130,14 +130,20 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) urseq_cs = (struct rseq_cs __user *)ptr; if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs))) return -EFAULT; - if (rseq_cs->version > 0) - return -EINVAL; + if (rseq_cs->start_ip >= TASK_SIZE || + rseq_cs->start_ip + rseq_cs->post_commit_offset >= TASK_SIZE || + rseq_cs->abort_ip >= TASK_SIZE || + rseq_cs->version > 0) + return -EINVAL; + /* Check for overflow. */ + if (rseq_cs->start_ip + rseq_cs->post_commit_offset < rseq_cs->start_ip) + return -EINVAL; /* Ensure that abort_ip is not in the critical section. */ if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset) return -EINVAL; - usig = (u32 __user *)(rseq_cs->abort_ip - sizeof(u32)); + usig = (u32 __user *)(unsigned long)(rseq_cs->abort_ip - sizeof(u32)); ret = get_user(sig, usig); if (ret) return ret; @@ -146,7 +152,7 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) printk_ratelimited(KERN_WARNING "Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n", sig, current->rseq_sig, current->pid, usig); - return -EPERM; + return -EINVAL; } return 0; } From 8f28177014925f968baf45fc833c25848faf8c1c Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 9 Jul 2018 15:51:51 -0400 Subject: [PATCH 328/382] rseq: Use get_user/put_user rather than __get_user/__put_user __get_user()/__put_user() is used to read values for address ranges that were already checked with access_ok() on rseq registration. It has been recognized that __get_user/__put_user are optimizing the wrong thing. Replace them by get_user/put_user across rseq instead. If those end up showing up in benchmarks, the proper approach would be to use user_access_begin() / unsafe_{get,put}_user() / user_access_end() anyway. Signed-off-by: Mathieu Desnoyers Signed-off-by: Thomas Gleixner Cc: linux-api@vger.kernel.org Cc: Peter Zijlstra Cc: "Paul E . McKenney" Cc: Boqun Feng Cc: Andy Lutomirski Cc: Dave Watson Cc: Paul Turner Cc: Andrew Morton Cc: Russell King Cc: "H . Peter Anvin" Cc: Andi Kleen Cc: Chris Lameter Cc: Ben Maurer Cc: Steven Rostedt Cc: Josh Triplett Cc: Linus Torvalds Cc: Catalin Marinas Cc: Will Deacon Cc: Michael Kerrisk Cc: Joel Fernandes Cc: linux-arm-kernel@lists.infradead.org Link: https://lkml.kernel.org/r/20180709195155.7654-3-mathieu.desnoyers@efficios.com --- kernel/rseq.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/kernel/rseq.c b/kernel/rseq.c index 16b38c5342f9..2c8463acb50d 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -85,9 +85,9 @@ static int rseq_update_cpu_id(struct task_struct *t) { u32 cpu_id = raw_smp_processor_id(); - if (__put_user(cpu_id, &t->rseq->cpu_id_start)) + if (put_user(cpu_id, &t->rseq->cpu_id_start)) return -EFAULT; - if (__put_user(cpu_id, &t->rseq->cpu_id)) + if (put_user(cpu_id, &t->rseq->cpu_id)) return -EFAULT; trace_rseq_update(t); return 0; @@ -100,14 +100,14 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t) /* * Reset cpu_id_start to its initial state (0). */ - if (__put_user(cpu_id_start, &t->rseq->cpu_id_start)) + if (put_user(cpu_id_start, &t->rseq->cpu_id_start)) return -EFAULT; /* * Reset cpu_id to RSEQ_CPU_ID_UNINITIALIZED, so any user coming * in after unregistration can figure out that rseq needs to be * registered again. */ - if (__put_user(cpu_id, &t->rseq->cpu_id)) + if (put_user(cpu_id, &t->rseq->cpu_id)) return -EFAULT; return 0; } @@ -120,7 +120,7 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) u32 sig; int ret; - ret = __get_user(ptr, &t->rseq->rseq_cs); + ret = get_user(ptr, &t->rseq->rseq_cs); if (ret) return ret; if (!ptr) { @@ -163,7 +163,7 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags) int ret; /* Get thread flags. */ - ret = __get_user(flags, &t->rseq->flags); + ret = get_user(flags, &t->rseq->flags); if (ret) return ret; @@ -203,7 +203,7 @@ static int clear_rseq_cs(struct task_struct *t) * * Set rseq_cs to NULL with single-copy atomicity. */ - return __put_user(0UL, &t->rseq->rseq_cs); + return put_user(0UL, &t->rseq->rseq_cs); } /* From 0fb9a1abc8c97f858997e962694eb36b4517144e Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 9 Jul 2018 15:51:52 -0400 Subject: [PATCH 329/382] rseq: uapi: Update uapi comments Update rseq uapi header comments to reflect that user-space need to do thread-local loads/stores from/to the struct rseq fields. As a consequence of this added requirement, the kernel does not need to perform loads/stores with single-copy atomicity. Update the comment associated to the "flags" fields to describe more accurately that it's only useful to facilitate single-stepping through rseq critical sections with debuggers. Signed-off-by: Mathieu Desnoyers Signed-off-by: Thomas Gleixner Cc: linux-api@vger.kernel.org Cc: Peter Zijlstra Cc: "Paul E . McKenney" Cc: Boqun Feng Cc: Andy Lutomirski Cc: Dave Watson Cc: Paul Turner Cc: Andrew Morton Cc: Russell King Cc: "H . Peter Anvin" Cc: Andi Kleen Cc: Chris Lameter Cc: Ben Maurer Cc: Steven Rostedt Cc: Josh Triplett Cc: Linus Torvalds Cc: Catalin Marinas Cc: Will Deacon Cc: Michael Kerrisk Cc: Joel Fernandes Cc: "Paul E. McKenney" Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180709195155.7654-4-mathieu.desnoyers@efficios.com --- include/uapi/linux/rseq.h | 69 ++++++++++++++++++++------------------- kernel/rseq.c | 2 +- 2 files changed, 37 insertions(+), 34 deletions(-) diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index 519ad6e176d1..bf4188c13bec 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -67,28 +67,30 @@ struct rseq_cs { struct rseq { /* * Restartable sequences cpu_id_start field. Updated by the - * kernel, and read by user-space with single-copy atomicity - * semantics. Aligned on 32-bit. Always contains a value in the - * range of possible CPUs, although the value may not be the - * actual current CPU (e.g. if rseq is not initialized). This - * CPU number value should always be compared against the value - * of the cpu_id field before performing a rseq commit or - * returning a value read from a data structure indexed using - * the cpu_id_start value. + * kernel. Read by user-space with single-copy atomicity + * semantics. This field should only be read by the thread which + * registered this data structure. Aligned on 32-bit. Always + * contains a value in the range of possible CPUs, although the + * value may not be the actual current CPU (e.g. if rseq is not + * initialized). This CPU number value should always be compared + * against the value of the cpu_id field before performing a rseq + * commit or returning a value read from a data structure indexed + * using the cpu_id_start value. */ __u32 cpu_id_start; /* - * Restartable sequences cpu_id field. Updated by the kernel, - * and read by user-space with single-copy atomicity semantics. - * Aligned on 32-bit. Values RSEQ_CPU_ID_UNINITIALIZED and - * RSEQ_CPU_ID_REGISTRATION_FAILED have a special semantic: the - * former means "rseq uninitialized", and latter means "rseq - * initialization failed". This value is meant to be read within - * rseq critical sections and compared with the cpu_id_start - * value previously read, before performing the commit instruction, - * or read and compared with the cpu_id_start value before returning - * a value loaded from a data structure indexed using the - * cpu_id_start value. + * Restartable sequences cpu_id field. Updated by the kernel. + * Read by user-space with single-copy atomicity semantics. This + * field should only be read by the thread which registered this + * data structure. Aligned on 32-bit. Values + * RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED + * have a special semantic: the former means "rseq uninitialized", + * and latter means "rseq initialization failed". This value is + * meant to be read within rseq critical sections and compared + * with the cpu_id_start value previously read, before performing + * the commit instruction, or read and compared with the + * cpu_id_start value before returning a value loaded from a data + * structure indexed using the cpu_id_start value. */ __u32 cpu_id; /* @@ -105,27 +107,28 @@ struct rseq { * targeted by the rseq_cs. Also needs to be set to NULL by user-space * before reclaiming memory that contains the targeted struct rseq_cs. * - * Read and set by the kernel with single-copy atomicity semantics. - * Set by user-space with single-copy atomicity semantics. Aligned - * on 64-bit. + * Read and set by the kernel. Set by user-space with single-copy + * atomicity semantics. This field should only be updated by the + * thread which registered this data structure. Aligned on 64-bit. */ LINUX_FIELD_u32_u64(rseq_cs); /* - * - RSEQ_DISABLE flag: + * Restartable sequences flags field. + * + * This field should only be updated by the thread which + * registered this data structure. Read by the kernel. + * Mainly used for single-stepping through rseq critical sections + * with debuggers. * - * Fallback fast-track flag for single-stepping. - * Set by user-space if lack of progress is detected. - * Cleared by user-space after rseq finish. - * Read by the kernel. * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT - * Inhibit instruction sequence block restart and event - * counter increment on preemption for this thread. + * Inhibit instruction sequence block restart on preemption + * for this thread. * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL - * Inhibit instruction sequence block restart and event - * counter increment on signal delivery for this thread. + * Inhibit instruction sequence block restart on signal + * delivery for this thread. * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE - * Inhibit instruction sequence block restart and event - * counter increment on migration for this thread. + * Inhibit instruction sequence block restart on migration for + * this thread. */ __u32 flags; } __attribute__((aligned(4 * sizeof(__u64)))); diff --git a/kernel/rseq.c b/kernel/rseq.c index 2c8463acb50d..2a7748675be7 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -201,7 +201,7 @@ static int clear_rseq_cs(struct task_struct *t) * of code outside of the rseq assembly block. This performs * a lazy clear of the rseq_cs field. * - * Set rseq_cs to NULL with single-copy atomicity. + * Set rseq_cs to NULL. */ return put_user(0UL, &t->rseq->rseq_cs); } From ec9c82e03a744e5698bd95eab872855861a821fa Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 9 Jul 2018 15:51:53 -0400 Subject: [PATCH 330/382] rseq: uapi: Declare rseq_cs field as union, update includes Declaring the rseq_cs field as a union between __u64 and two __u32 allows both 32-bit and 64-bit kernels to read the full __u64, and therefore validate that a 32-bit user-space cleared the upper 32 bits, thus ensuring a consistent behavior between native 32-bit kernels and 32-bit compat tasks on 64-bit kernels. Check that the rseq_cs value read is < TASK_SIZE. The asm/byteorder.h header needs to be included by rseq.h, now that it is not using linux/types_32_64.h anymore. Considering that only __32 and __u64 types are declared in linux/rseq.h, the linux/types.h header should always be included for both kernel and user-space code: including stdint.h is just for u64 and u32, which are not used in this header at all. Use copy_from_user()/clear_user() to interact with a 64-bit field, because arm32 does not implement 64-bit __get_user, and ppc32 does not 64-bit get_user. Considering that the rseq_cs pointer does not need to be loaded/stored with single-copy atomicity from the kernel anymore, we can simply use copy_from_user()/clear_user(). Signed-off-by: Mathieu Desnoyers Signed-off-by: Thomas Gleixner Cc: linux-api@vger.kernel.org Cc: Peter Zijlstra Cc: "Paul E . McKenney" Cc: Boqun Feng Cc: Andy Lutomirski Cc: Dave Watson Cc: Paul Turner Cc: Andrew Morton Cc: Russell King Cc: "H . Peter Anvin" Cc: Andi Kleen Cc: Chris Lameter Cc: Ben Maurer Cc: Steven Rostedt Cc: Josh Triplett Cc: Linus Torvalds Cc: Catalin Marinas Cc: Will Deacon Cc: Michael Kerrisk Cc: Joel Fernandes Cc: "Paul E. McKenney" Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180709195155.7654-5-mathieu.desnoyers@efficios.com --- include/uapi/linux/rseq.h | 27 +++++++++++++++++++-------- kernel/rseq.c | 15 +++++++++------ tools/testing/selftests/rseq/rseq.h | 11 ++++++++++- 3 files changed, 38 insertions(+), 15 deletions(-) diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h index bf4188c13bec..9a402fdb60e9 100644 --- a/include/uapi/linux/rseq.h +++ b/include/uapi/linux/rseq.h @@ -10,13 +10,8 @@ * Copyright (c) 2015-2018 Mathieu Desnoyers */ -#ifdef __KERNEL__ -# include -#else -# include -#endif - -#include +#include +#include enum rseq_cpu_id_state { RSEQ_CPU_ID_UNINITIALIZED = -1, @@ -111,7 +106,23 @@ struct rseq { * atomicity semantics. This field should only be updated by the * thread which registered this data structure. Aligned on 64-bit. */ - LINUX_FIELD_u32_u64(rseq_cs); + union { + __u64 ptr64; +#ifdef __LP64__ + __u64 ptr; +#else + struct { +#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN) + __u32 padding; /* Initialized to zero. */ + __u32 ptr32; +#else /* LITTLE */ + __u32 ptr32; + __u32 padding; /* Initialized to zero. */ +#endif /* ENDIAN */ + } ptr; +#endif + } rseq_cs; + /* * Restartable sequences flags field. * diff --git a/kernel/rseq.c b/kernel/rseq.c index 2a7748675be7..c6242d8594dc 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -115,19 +115,20 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t) static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) { struct rseq_cs __user *urseq_cs; - unsigned long ptr; + u64 ptr; u32 __user *usig; u32 sig; int ret; - ret = get_user(ptr, &t->rseq->rseq_cs); - if (ret) - return ret; + if (copy_from_user(&ptr, &t->rseq->rseq_cs.ptr64, sizeof(ptr))) + return -EFAULT; if (!ptr) { memset(rseq_cs, 0, sizeof(*rseq_cs)); return 0; } - urseq_cs = (struct rseq_cs __user *)ptr; + if (ptr >= TASK_SIZE) + return -EINVAL; + urseq_cs = (struct rseq_cs __user *)(unsigned long)ptr; if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs))) return -EFAULT; @@ -203,7 +204,9 @@ static int clear_rseq_cs(struct task_struct *t) * * Set rseq_cs to NULL. */ - return put_user(0UL, &t->rseq->rseq_cs); + if (clear_user(&t->rseq->rseq_cs.ptr64, sizeof(t->rseq->rseq_cs.ptr64))) + return -EFAULT; + return 0; } /* diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index a4684112676c..f2073cfa4448 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -133,6 +133,15 @@ static inline uint32_t rseq_current_cpu(void) return cpu; } +static inline void rseq_clear_rseq_cs(void) +{ +#ifdef __LP64__ + __rseq_abi.rseq_cs.ptr = 0; +#else + __rseq_abi.rseq_cs.ptr.ptr32 = 0; +#endif +} + /* * rseq_prepare_unload() should be invoked by each thread using rseq_finish*() * at least once between their last rseq_finish*() and library unload of the @@ -143,7 +152,7 @@ static inline uint32_t rseq_current_cpu(void) */ static inline void rseq_prepare_unload(void) { - __rseq_abi.rseq_cs = 0; + rseq_clear_rseq_cs(); } #endif /* RSEQ_H_ */ From 4f4c0acdf4652a964da869d578a3c8bf6df14ce2 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 9 Jul 2018 15:51:54 -0400 Subject: [PATCH 331/382] rseq: Remove unused types_32_64.h uapi header This header was introduced in the 4.18 merge window, and rseq does not need it anymore. Nuke it before the final release. Signed-off-by: Mathieu Desnoyers Signed-off-by: Thomas Gleixner Cc: linux-api@vger.kernel.org Cc: Peter Zijlstra Cc: "Paul E . McKenney" Cc: Boqun Feng Cc: Andy Lutomirski Cc: Dave Watson Cc: Paul Turner Cc: Andrew Morton Cc: Russell King Cc: "H . Peter Anvin" Cc: Andi Kleen Cc: Chris Lameter Cc: Ben Maurer Cc: Steven Rostedt Cc: Josh Triplett Cc: Linus Torvalds Cc: Catalin Marinas Cc: Will Deacon Cc: Michael Kerrisk Cc: Joel Fernandes Cc: "Paul E. McKenney" Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180709195155.7654-6-mathieu.desnoyers@efficios.com --- include/uapi/linux/types_32_64.h | 50 -------------------------------- 1 file changed, 50 deletions(-) delete mode 100644 include/uapi/linux/types_32_64.h diff --git a/include/uapi/linux/types_32_64.h b/include/uapi/linux/types_32_64.h deleted file mode 100644 index 0a87ace34a57..000000000000 --- a/include/uapi/linux/types_32_64.h +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ -#ifndef _UAPI_LINUX_TYPES_32_64_H -#define _UAPI_LINUX_TYPES_32_64_H - -/* - * linux/types_32_64.h - * - * Integer type declaration for pointers across 32-bit and 64-bit systems. - * - * Copyright (c) 2015-2018 Mathieu Desnoyers - */ - -#ifdef __KERNEL__ -# include -#else -# include -#endif - -#include - -#ifdef __BYTE_ORDER -# if (__BYTE_ORDER == __BIG_ENDIAN) -# define LINUX_BYTE_ORDER_BIG_ENDIAN -# else -# define LINUX_BYTE_ORDER_LITTLE_ENDIAN -# endif -#else -# ifdef __BIG_ENDIAN -# define LINUX_BYTE_ORDER_BIG_ENDIAN -# else -# define LINUX_BYTE_ORDER_LITTLE_ENDIAN -# endif -#endif - -#ifdef __LP64__ -# define LINUX_FIELD_u32_u64(field) __u64 field -# define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v) field = (intptr_t)v -#else -# ifdef LINUX_BYTE_ORDER_BIG_ENDIAN -# define LINUX_FIELD_u32_u64(field) __u32 field ## _padding, field -# define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v) \ - field ## _padding = 0, field = (intptr_t)v -# else -# define LINUX_FIELD_u32_u64(field) __u32 field, field ## _padding -# define LINUX_FIELD_u32_u64_INIT_ONSTACK(field, v) \ - field = (intptr_t)v, field ## _padding = 0 -# endif -#endif - -#endif /* _UAPI_LINUX_TYPES_32_64_H */ From 8a46580128a02bdc18d7dcc0cba19d3cea4fb9c4 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 9 Jul 2018 15:51:55 -0400 Subject: [PATCH 332/382] rseq/selftests: cleanup: Update comment above rseq_prepare_unload rseq as it was merged does not have rseq_finish_*() in the user-space selftests anymore. Update the rseq_prepare_unload() helper comment to adapt to this reality. Signed-off-by: Mathieu Desnoyers Signed-off-by: Thomas Gleixner Cc: linux-api@vger.kernel.org Cc: Peter Zijlstra Cc: "Paul E . McKenney" Cc: Boqun Feng Cc: Andy Lutomirski Cc: Dave Watson Cc: Paul Turner Cc: Andrew Morton Cc: Russell King Cc: "H . Peter Anvin" Cc: Andi Kleen Cc: Chris Lameter Cc: Ben Maurer Cc: Steven Rostedt Cc: Josh Triplett Cc: Linus Torvalds Cc: Catalin Marinas Cc: Will Deacon Cc: Michael Kerrisk Cc: Joel Fernandes Cc: "Paul E. McKenney" Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180709195155.7654-7-mathieu.desnoyers@efficios.com --- tools/testing/selftests/rseq/rseq.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index f2073cfa4448..86ce22417e0d 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -143,12 +143,13 @@ static inline void rseq_clear_rseq_cs(void) } /* - * rseq_prepare_unload() should be invoked by each thread using rseq_finish*() - * at least once between their last rseq_finish*() and library unload of the - * library defining the rseq critical section (struct rseq_cs). This also - * applies to use of rseq in code generated by JIT: rseq_prepare_unload() - * should be invoked at least once by each thread using rseq_finish*() before - * reclaim of the memory holding the struct rseq_cs. + * rseq_prepare_unload() should be invoked by each thread executing a rseq + * critical section at least once between their last critical section and + * library unload of the library defining the rseq critical section + * (struct rseq_cs). This also applies to use of rseq in code generated by + * JIT: rseq_prepare_unload() should be invoked at least once by each + * thread executing a rseq critical section before reclaim of the memory + * holding the struct rseq_cs. */ static inline void rseq_prepare_unload(void) { From cea394772d3c41d04cb71a032f6ed878392bd134 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 18 Jun 2018 14:33:03 +0100 Subject: [PATCH 333/382] ARM: 8775/1: NOMMU: Use instr_sync instead of plain isb in common code Greg reported that commit 3c24121039c9d ("ARM: 8756/1: NOMMU: Postpone MPU activation till __after_proc_init") is causing breakage for the old Versatile platform in no-MMU mode (with out-of-tree patches): AS arch/arm/kernel/head-nommu.o arch/arm/kernel/head-nommu.S: Assembler messages: arch/arm/kernel/head-nommu.S:180: Error: selected processor does not support `isb' in ARM mode scripts/Makefile.build:417: recipe for target 'arch/arm/kernel/head-nommu.o' failed make[2]: *** [arch/arm/kernel/head-nommu.o] Error 1 Makefile:1034: recipe for target 'arch/arm/kernel' failed make[1]: *** [arch/arm/kernel] Error 2 Since the code is common for all NOMMU builds usage of the isb was a bad idea (please, note that isb also used in MPU related code which is fine because MPU has dependency on CPU_V7/CPU_V7M), instead use more robust instr_sync assembler macro. Fixes: 3c24121039c9 ("ARM: 8756/1: NOMMU: Postpone MPU activation till __after_proc_init") Reported-by: Greg Ungerer Tested-by: Greg Ungerer Signed-off-by: Vladimir Murzin Signed-off-by: Russell King --- arch/arm/kernel/head-nommu.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index dd546d65a383..7a9b86978ee1 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -177,7 +177,7 @@ M_CLASS(streq r3, [r12, #PMSAv8_MAIR1]) bic r0, r0, #CR_I #endif mcr p15, 0, r0, c1, c0, 0 @ write control reg - isb + instr_sync #elif defined (CONFIG_CPU_V7M) #ifdef CONFIG_ARM_MPU ldreq r3, [r12, MPU_CTRL] From e296701800f30d260a66f8aa1971b5b1bc3d2f81 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 11 Jul 2018 11:02:35 +0200 Subject: [PATCH 334/382] efi/x86: Fix mixed mode reboot loop by removing pointless call to PciIo->Attributes() Hans de Goede reported that his mixed EFI mode Bay Trail tablet would not boot at all any more, but enter a reboot loop without any logs printed by the kernel. Unbreak 64-bit Linux/x86 on 32-bit UEFI: When it was first introduced, the EFI stub code that copies the contents of PCI option ROMs originally only intended to do so if the EFI_PCI_IO_ATTRIBUTE_EMBEDDED_ROM attribute was *not* set. The reason was that the UEFI spec permits PCI option ROM images to be provided by the platform directly, rather than via the ROM BAR, and in this case, the OS can only access them at runtime if they are preserved at boot time by copying them from the areas described by PciIo->RomImage and PciIo->RomSize. However, it implemented this check erroneously, as can be seen in commit: dd5fc854de5fd ("EFI: Stash ROMs if they're not in the PCI BAR") which introduced: if (!attributes & EFI_PCI_IO_ATTRIBUTE_EMBEDDED_ROM) continue; and given that the numeric value of EFI_PCI_IO_ATTRIBUTE_EMBEDDED_ROM is 0x4000, this condition never becomes true, and so the option ROMs were copied unconditionally. This was spotted and 'fixed' by commit: 886d751a2ea99a160 ("x86, efi: correct precedence of operators in setup_efi_pci") but inadvertently inverted the logic at the same time, defeating the purpose of the code, since it now only preserves option ROM images that can be read from the ROM BAR as well. Unsurprisingly, this broke some systems, and so the check was removed entirely in the following commit: 739701888f5d ("x86, efi: remove attribute check from setup_efi_pci") It is debatable whether this check should have been included in the first place, since the option ROM image provided to the UEFI driver by the firmware may be different from the one that is actually present in the card's flash ROM, and so whatever PciIo->RomImage points at should be preferred regardless of whether the attribute is set. As this was the only use of the attributes field, we can remove the call to PciIo->Attributes() entirely, which is especially nice because its prototype involves uint64_t type by-value arguments which the EFI mixed mode has trouble dealing with. Any mixed mode system with PCI is likely to be affected. Tested-by: Wilfried Klaebe Tested-by: Hans de Goede Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20180711090235.9327-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index e57665b4ba1c..e98522ea6f09 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -114,18 +114,12 @@ __setup_efi_pci(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) struct pci_setup_rom *rom = NULL; efi_status_t status; unsigned long size; - uint64_t attributes, romsize; + uint64_t romsize; void *romimage; - status = efi_call_proto(efi_pci_io_protocol, attributes, pci, - EfiPciIoAttributeOperationGet, 0ULL, - &attributes); - if (status != EFI_SUCCESS) - return status; - /* - * Some firmware images contain EFI function pointers at the place where the - * romimage and romsize fields are supposed to be. Typically the EFI + * Some firmware images contain EFI function pointers at the place where + * the romimage and romsize fields are supposed to be. Typically the EFI * code is mapped at high addresses, translating to an unrealistically * large romsize. The UEFI spec limits the size of option ROMs to 16 * MiB so we reject any ROMs over 16 MiB in size to catch this. From 877cc639686b68c7de179a485544f4761e376b30 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 8 May 2018 21:27:43 +0000 Subject: [PATCH 335/382] perf tools: Generate a Python script compatible with Python 2 and 3 When generating a Python script with "perf script -g python", produce one that is compatible with Python 2 and 3. The difference between the two generated scripts is: --- python2-perf-script.py 2018-05-08 15:35:00.865889705 -0400 +++ python3-perf-script.py 2018-05-08 15:34:49.019789564 -0400 @@ -7,6 +7,8 @@ # be retrieved using Python functions of the form common_*(context). # See the perf-script-python Documentation for the list of available functions. +from __future__ import print_function + import os import sys @@ -18,10 +20,10 @@ def trace_begin(): - print "in trace_begin" + print("in trace_begin") def trace_end(): - print "in trace_end" + print("in trace_end") def raw_syscalls__sys_enter(event_name, context, common_cpu, common_secs, common_nsecs, common_pid, common_comm, @@ -29,26 +31,26 @@ print_header(event_name, common_cpu, common_secs, common_nsecs, common_pid, common_comm) - print "id=%d, args=%s" % \ - (id, args) + print("id=%d, args=%s" % \ + (id, args)) - print 'Sample: {'+get_dict_as_string(perf_sample_dict['sample'], ', ')+'}' + print('Sample: {'+get_dict_as_string(perf_sample_dict['sample'], ', ')+'}') for node in common_callchain: if 'sym' in node: - print "\t[%x] %s" % (node['ip'], node['sym']['name']) + print("\t[%x] %s" % (node['ip'], node['sym']['name'])) else: - print " [%x]" % (node['ip']) + print(" [%x]" % (node['ip'])) - print "\n" + print() def trace_unhandled(event_name, context, event_fields_dict, perf_sample_dict): - print get_dict_as_string(event_fields_dict) - print 'Sample: {'+get_dict_as_string(perf_sample_dict['sample'], ', ')+'}' + print(get_dict_as_string(event_fields_dict)) + print('Sample: {'+get_dict_as_string(perf_sample_dict['sample'], ', ')+'}') def print_header(event_name, cpu, secs, nsecs, pid, comm): - print "%-20s %5u %05u.%09u %8u %-20s " % \ - (event_name, cpu, secs, nsecs, pid, comm), + print("%-20s %5u %05u.%09u %8u %-20s " % \ + (event_name, cpu, secs, nsecs, pid, comm), end="") def get_dict_as_string(a_dict, delimiter=' '): return delimiter.join(['%s=%s'%(k,str(v))for k,v in sorted(a_dict.items())]) Signed-off-by: Jeremy Cline Cc: Alexander Shishkin Cc: Herton Krzesinski Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/0100016341a7278a-d178c724-2b0f-49ca-be93-80a7d51aaa0d-000000@email.amazonses.com Signed-off-by: Arnaldo Carvalho de Melo --- .../scripting-engines/trace-event-python.c | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 46e9e19ab1ac..8b2eb6dbff4d 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1627,6 +1627,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "# See the perf-script-python Documentation for the list " "of available functions.\n\n"); + fprintf(ofp, "from __future__ import print_function\n\n"); fprintf(ofp, "import os\n"); fprintf(ofp, "import sys\n\n"); @@ -1636,10 +1637,10 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "from Core import *\n\n\n"); fprintf(ofp, "def trace_begin():\n"); - fprintf(ofp, "\tprint \"in trace_begin\"\n\n"); + fprintf(ofp, "\tprint(\"in trace_begin\")\n\n"); fprintf(ofp, "def trace_end():\n"); - fprintf(ofp, "\tprint \"in trace_end\"\n\n"); + fprintf(ofp, "\tprint(\"in trace_end\")\n\n"); while ((event = trace_find_next_event(pevent, event))) { fprintf(ofp, "def %s__%s(", event->system, event->name); @@ -1675,7 +1676,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) "common_secs, common_nsecs,\n\t\t\t" "common_pid, common_comm)\n\n"); - fprintf(ofp, "\t\tprint \""); + fprintf(ofp, "\t\tprint(\""); not_first = 0; count = 0; @@ -1736,31 +1737,31 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "%s", f->name); } - fprintf(ofp, ")\n\n"); + fprintf(ofp, "))\n\n"); - fprintf(ofp, "\t\tprint 'Sample: {'+" - "get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'\n\n"); + fprintf(ofp, "\t\tprint('Sample: {'+" + "get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')\n\n"); fprintf(ofp, "\t\tfor node in common_callchain:"); fprintf(ofp, "\n\t\t\tif 'sym' in node:"); - fprintf(ofp, "\n\t\t\t\tprint \"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name'])"); + fprintf(ofp, "\n\t\t\t\tprint(\"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name']))"); fprintf(ofp, "\n\t\t\telse:"); - fprintf(ofp, "\n\t\t\t\tprint \"\t[%%x]\" %% (node['ip'])\n\n"); - fprintf(ofp, "\t\tprint \"\\n\"\n\n"); + fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x]\" %% (node['ip']))\n\n"); + fprintf(ofp, "\t\tprint()\n\n"); } fprintf(ofp, "def trace_unhandled(event_name, context, " "event_fields_dict, perf_sample_dict):\n"); - fprintf(ofp, "\t\tprint get_dict_as_string(event_fields_dict)\n"); - fprintf(ofp, "\t\tprint 'Sample: {'+" - "get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'\n\n"); + fprintf(ofp, "\t\tprint(get_dict_as_string(event_fields_dict))\n"); + fprintf(ofp, "\t\tprint('Sample: {'+" + "get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')\n\n"); fprintf(ofp, "def print_header(" "event_name, cpu, secs, nsecs, pid, comm):\n" - "\tprint \"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t" - "(event_name, cpu, secs, nsecs, pid, comm),\n\n"); + "\tprint(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t" + "(event_name, cpu, secs, nsecs, pid, comm), end=\"\")\n\n"); fprintf(ofp, "def get_dict_as_string(a_dict, delimiter=' '):\n" "\treturn delimiter.join" From 770d2f86c0051d4f2c0ab9d74d68434cb383241d Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 8 May 2018 21:27:45 +0000 Subject: [PATCH 336/382] perf scripts python: Add Python 3 support to Core.py Support both Python 2 and Python 3 in Core.py. This should have no functional change. Signed-off-by: Jeremy Cline Cc: Alexander Shishkin Cc: Herton Krzesinski Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/0100016341a72ebe-e572899e-f445-4765-98f0-c314935727f9-000000@email.amazonses.com Signed-off-by: Arnaldo Carvalho de Melo --- .../Perf-Trace-Util/lib/Perf/Trace/Core.py | 38 ++++++++----------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py index 38dfb720fb6f..54ace2f6bc36 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py @@ -31,10 +31,8 @@ def flag_str(event_name, field_name, value): string = "" if flag_fields[event_name][field_name]: - print_delim = 0 - keys = flag_fields[event_name][field_name]['values'].keys() - keys.sort() - for idx in keys: + print_delim = 0 + for idx in sorted(flag_fields[event_name][field_name]['values']): if not value and not idx: string += flag_fields[event_name][field_name]['values'][idx] break @@ -51,14 +49,12 @@ def symbol_str(event_name, field_name, value): string = "" if symbolic_fields[event_name][field_name]: - keys = symbolic_fields[event_name][field_name]['values'].keys() - keys.sort() - for idx in keys: + for idx in sorted(symbolic_fields[event_name][field_name]['values']): if not value and not idx: - string = symbolic_fields[event_name][field_name]['values'][idx] + string = symbolic_fields[event_name][field_name]['values'][idx] break - if (value == idx): - string = symbolic_fields[event_name][field_name]['values'][idx] + if (value == idx): + string = symbolic_fields[event_name][field_name]['values'][idx] break return string @@ -74,19 +70,17 @@ def trace_flag_str(value): string = "" print_delim = 0 - keys = trace_flags.keys() + for idx in trace_flags: + if not value and not idx: + string += "NONE" + break - for idx in keys: - if not value and not idx: - string += "NONE" - break - - if idx and (value & idx) == idx: - if print_delim: - string += " | "; - string += trace_flags[idx] - print_delim = 1 - value &= ~idx + if idx and (value & idx) == idx: + if print_delim: + string += " | "; + string += trace_flags[idx] + print_delim = 1 + value &= ~idx return string From 2ab89262ff8895b8476b97345507c676fe3081fa Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 8 May 2018 21:27:45 +0000 Subject: [PATCH 337/382] perf scripts python: Add Python 3 support to SchedGui.py Fix a single syntax error in SchedGui.py to support both Python 2 and Python 3. This should have no functional change. Signed-off-by: Jeremy Cline Cc: Alexander Shishkin Cc: Herton Krzesinski Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/0100016341a72d26-75729663-fe55-4309-8c9b-302e065ed2f1-000000@email.amazonses.com Signed-off-by: Arnaldo Carvalho de Melo --- .../scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py index fdd92f699055..cac7b2542ee8 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py @@ -11,7 +11,7 @@ try: import wx except ImportError: - raise ImportError, "You need to install the wxpython lib for this script" + raise ImportError("You need to install the wxpython lib for this script") class RootFrame(wx.Frame): From c45b168effb12719f6dfc8d2c20ba8c057e8c16b Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 8 May 2018 21:27:46 +0000 Subject: [PATCH 338/382] perf scripts python: Add Python 3 support to Util.py Support both Python 2 and Python 3 in Util.py. The dict class no longer has a ``has_key`` method and print is now a function rather than a statement. This should have no functional change. Signed-off-by: Jeremy Cline Cc: Alexander Shishkin Cc: Herton Krzesinski Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/0100016341a730c6-8db8b9b1-da2d-4ee3-96bf-47e0ae9796bd-000000@email.amazonses.com Signed-off-by: Arnaldo Carvalho de Melo --- .../python/Perf-Trace-Util/lib/Perf/Trace/Util.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py index f6c84966e4f8..7384dcb628c4 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py @@ -5,6 +5,7 @@ # This software may be distributed under the terms of the GNU General # Public License ("GPL") version 2 as published by the Free Software # Foundation. +from __future__ import print_function import errno, os @@ -33,7 +34,7 @@ def nsecs_str(nsecs): return str def add_stats(dict, key, value): - if not dict.has_key(key): + if key not in dict: dict[key] = (value, value, value, 1) else: min, max, avg, count = dict[key] @@ -72,10 +73,10 @@ try: except: if not audit_package_warned: audit_package_warned = True - print "Install the audit-libs-python package to get syscall names.\n" \ - "For example:\n # apt-get install python-audit (Ubuntu)" \ - "\n # yum install audit-libs-python (Fedora)" \ - "\n etc.\n" + print("Install the audit-libs-python package to get syscall names.\n" + "For example:\n # apt-get install python-audit (Ubuntu)" + "\n # yum install audit-libs-python (Fedora)" + "\n etc.\n") def syscall_name(id): try: From 8c1c1ab2d2a77cb50841822168e56d11c4ebfd6e Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 8 May 2018 21:27:47 +0000 Subject: [PATCH 339/382] perf scripts python: Add Python 3 support to sched-migration.py Support both Python 2 and Python 3 in the sched-migration.py script. This should have no functional change. Signed-off-by: Jeremy Cline Cc: Alexander Shishkin Cc: Herton Krzesinski Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/0100016341a737a5-44ec436f-3440-4cac-a03f-ddfa589bf308-000000@email.amazonses.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/sched-migration.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tools/perf/scripts/python/sched-migration.py b/tools/perf/scripts/python/sched-migration.py index de66cb3b72c9..3473e7f66081 100644 --- a/tools/perf/scripts/python/sched-migration.py +++ b/tools/perf/scripts/python/sched-migration.py @@ -9,13 +9,17 @@ # This software is distributed under the terms of the GNU General # Public License ("GPL") version 2 as published by the Free Software # Foundation. - +from __future__ import print_function import os import sys from collections import defaultdict -from UserList import UserList +try: + from UserList import UserList +except ImportError: + # Python 3: UserList moved to the collections package + from collections import UserList sys.path.append(os.environ['PERF_EXEC_PATH'] + \ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') @@ -300,7 +304,7 @@ class TimeSliceList(UserList): if i == -1: return - for i in xrange(i, len(self.data)): + for i in range(i, len(self.data)): timeslice = self.data[i] if timeslice.start > end: return @@ -336,8 +340,8 @@ class SchedEventProxy: on_cpu_task = self.current_tsk[headers.cpu] if on_cpu_task != -1 and on_cpu_task != prev_pid: - print "Sched switch event rejected ts: %s cpu: %d prev: %s(%d) next: %s(%d)" % \ - (headers.ts_format(), headers.cpu, prev_comm, prev_pid, next_comm, next_pid) + print("Sched switch event rejected ts: %s cpu: %d prev: %s(%d) next: %s(%d)" % \ + headers.ts_format(), headers.cpu, prev_comm, prev_pid, next_comm, next_pid) threads[prev_pid] = prev_comm threads[next_pid] = next_comm From 12aa6c7389a321b4b5d921f89c3f83b9750598f7 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 8 May 2018 21:27:48 +0000 Subject: [PATCH 340/382] perf scripts python: Add Python 3 support to EventClass.py Support both Python 2 and Python 3 in EventClass.py. ``print`` is now a function rather than a statement. This should have no functional change. Signed-off-by: Jeremy Cline Cc: Alexander Shishkin Cc: Herton Krzesinski Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/0100016341a73aac-e0734bdc-dcab-4c61-8333-d8be97524aa0-000000@email.amazonses.com Signed-off-by: Arnaldo Carvalho de Melo --- .../python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py index 81a56cd2b3c1..21a7a1298094 100755 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py @@ -8,6 +8,7 @@ # PerfEvent is the base class for all perf event sample, PebsEvent # is a HW base Intel x86 PEBS event, and user could add more SW/HW # event classes based on requirements. +from __future__ import print_function import struct @@ -44,7 +45,8 @@ class PerfEvent(object): PerfEvent.event_num += 1 def show(self): - print "PMU event: name=%12s, symbol=%24s, comm=%8s, dso=%12s" % (self.name, self.symbol, self.comm, self.dso) + print("PMU event: name=%12s, symbol=%24s, comm=%8s, dso=%12s" % + (self.name, self.symbol, self.comm, self.dso)) # # Basic Intel PEBS (Precise Event-based Sampling) event, whose raw buffer From 508ef3e737bbb9858a30e7bb153507429ea89644 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 29 Jun 2018 12:46:33 -0500 Subject: [PATCH 341/382] perf test shell: Replace '|&' with '2>&1 |' to work with more shells Since we do not specify bash (and/or zsh) as a requirement, use the standard error redirection that is more widely supported. BEFORE: $ sudo perf test -v 62 62: Check open filename arg using perf trace + vfs_getname: --- start --- test child forked, pid 27305 ./tests/shell/trace+probe_vfs_getname.sh: 20: ./tests/shell/trace+probe_vfs_getname.sh: Syntax error: "&" unexpected test child finished with -2 ---- end ---- Check open filename arg using perf trace + vfs_getname: Skip AFTER: $ sudo perf test -v 62 64: Check open filename arg using perf trace + vfs_getname : --- start --- test child forked, pid 23008 Added new event: probe:vfs_getname (on getname_flags:72 with pathname=result->name:string) You can now use it in all perf tools, such as: perf record -e probe:vfs_getname -aR sleep 1 0.361 ( 0.008 ms): touch/23032 openat(dfd: CWD, filename: /tmp/temporary_file.VEh0n, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: IRUGO|IWUGO) = 4 test child finished with 0 ---- end ---- Check open filename arg using perf trace + vfs_getname: Ok Similar to commit 35435cd06081, with the same title. Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Thomas Richter Link: http://lkml.kernel.org/r/20180629124633.0a9f4bea54b8d2c28f265de2@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/trace+probe_vfs_getname.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 55ad9793d544..4ce276efe6b4 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -17,7 +17,7 @@ skip_if_no_perf_probe || exit 2 file=$(mktemp /tmp/temporary_file.XXXXX) trace_open_vfs_getname() { - evts=$(echo $(perf list syscalls:sys_enter_open* |& egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') + evts=$(echo $(perf list syscalls:sys_enter_open* 2>&1 | egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') perf trace -e $evts touch $file 2>&1 | \ egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } From 98c6c8a1d00fba66625311730f80c6ceaa9c1d19 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 29 Jun 2018 12:46:43 -0500 Subject: [PATCH 342/382] perf test shell: Make perf's inet_pton test more portable Debian based systems such as Ubuntu have dash as their default shell. Even if the normal or root user's shell is bash, certain scripts still call /bin/sh, which points to dash, so we fix this perf test by rewriting it in a more portable way. BEFORE: $ sudo perf test -v 64 64: probe libc's inet_pton & backtrace it with ping : --- start --- test child forked, pid 31942 ./tests/shell/record+probe_libc_inet_pton.sh: 18: ./tests/shell/record+probe_libc_inet_pton.sh: expected[0]=ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\): not found ./tests/shell/record+probe_libc_inet_pton.sh: 19: ./tests/shell/record+probe_libc_inet_pton.sh: expected[1]=.*inet_pton\+0x[[:xdigit:]]+[[:space:]]\(/lib/x86_64-linux-gnu/libc-2.27.so|inlined\)$: not found ./tests/shell/record+probe_libc_inet_pton.sh: 29: ./tests/shell/record+probe_libc_inet_pton.sh: expected[2]=getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\(/lib/x86_64-linux-gnu/libc-2.27.so\)$: not found ./tests/shell/record+probe_libc_inet_pton.sh: 30: ./tests/shell/record+probe_libc_inet_pton.sh: expected[3]=.*\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$: not found ping 31963 [004] 83577.670613: probe_libc:inet_pton: (7fe15f87f4b0) ./tests/shell/record+probe_libc_inet_pton.sh: 39: ./tests/shell/record+probe_libc_inet_pton.sh: Bad substitution ./tests/shell/record+probe_libc_inet_pton.sh: 41: ./tests/shell/record+probe_libc_inet_pton.sh: Bad substitution test child finished with -2 ---- end ---- probe libc's inet_pton & backtrace it with ping: Skip AFTER: $ sudo perf test -v 64 64: probe libc's inet_pton & backtrace it with ping : --- start --- test child forked, pid 32277 ping 32295 [001] 83679.690020: probe_libc:inet_pton: (7ff244f504b0) 7ff244f504b0 __GI___inet_pton+0x0 (/lib/x86_64-linux-gnu/libc-2.27.so) 7ff244f14ce4 getaddrinfo+0x124 (/lib/x86_64-linux-gnu/libc-2.27.so) 556ac036b57d _init+0xb75 (/bin/ping) test child finished with 0 ---- end ---- probe libc's inet_pton & backtrace it with ping: Ok Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Thomas Richter Link: http://lkml.kernel.org/r/20180629124643.2089b3ce59960eba34e87b27@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../shell/record+probe_libc_inet_pton.sh | 37 +++++++++++-------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index 263057039693..94e513e62b34 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -14,35 +14,40 @@ libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1 nm -Dg $libc 2>/dev/null | fgrep -q inet_pton || exit 254 trace_libc_inet_pton_backtrace() { - idx=0 - expected[0]="ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)" - expected[1]=".*inet_pton\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" + + expected=`mktemp -u /tmp/expected.XXX` + + echo "ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)" > $expected + echo ".*inet_pton\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected case "$(uname -m)" in s390x) eventattr='call-graph=dwarf,max-stack=4' - expected[2]="gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" - expected[3]="(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" - expected[4]="main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" + echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected + echo "(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected + echo "main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected ;; *) eventattr='max-stack=3' - expected[2]="getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" - expected[3]=".*\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" + echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected + echo ".*\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected ;; esac - file=`mktemp -u /tmp/perf.data.XXX` + perf_data=`mktemp -u /tmp/perf.data.XXX` + perf_script=`mktemp -u /tmp/perf.script.XXX` + perf record -e probe_libc:inet_pton/$eventattr/ -o $perf_data ping -6 -c 1 ::1 > /dev/null 2>&1 + perf script -i $perf_data > $perf_script - perf record -e probe_libc:inet_pton/$eventattr/ -o $file ping -6 -c 1 ::1 > /dev/null 2>&1 - perf script -i $file | while read line ; do + exec 3<$perf_script + exec 4<$expected + while read line <&3 && read -r pattern <&4; do + [ -z "$pattern" ] && break echo $line - echo "$line" | egrep -q "${expected[$idx]}" + echo "$line" | egrep -q "$pattern" if [ $? -ne 0 ] ; then - printf "FAIL: expected backtrace entry %d \"%s\" got \"%s\"\n" $idx "${expected[$idx]}" "$line" + printf "FAIL: expected backtrace entry \"%s\" got \"%s\"\n" "$pattern" "$line" exit 1 fi - let idx+=1 - [ -z "${expected[$idx]}" ] && break done # If any statements are executed from this point onwards, @@ -58,6 +63,6 @@ skip_if_no_perf_probe && \ perf probe -q $libc inet_pton && \ trace_libc_inet_pton_backtrace err=$? -rm -f ${file} +rm -f ${perf_data} ${perf_script} ${expected} perf probe -q -d probe_libc:inet_pton exit $err From f6432b9f65001651412dbc3589d251534822d4ab Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 29 Jun 2018 12:46:52 -0500 Subject: [PATCH 343/382] perf llvm-utils: Remove bashism from kernel include fetch script Like system(), popen() calls /bin/sh, which may/may not be bash. Script when run on dash and encounters the line, yields: exit: Illegal number: -1 checkbashisms report on script content: possible bashism (exit|return with negative status code): exit -1 Remove the bashism and use the more portable non-zero failure status code 1. Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Thomas Richter Link: http://lkml.kernel.org/r/20180629124652.8d0af7e2281fd3fd8262cacc@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/llvm-utils.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 976e658e38dc..5e94857dfca2 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -266,16 +266,16 @@ static const char *kinc_fetch_script = "#!/usr/bin/env sh\n" "if ! test -d \"$KBUILD_DIR\"\n" "then\n" -" exit -1\n" +" exit 1\n" "fi\n" "if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n" "then\n" -" exit -1\n" +" exit 1\n" "fi\n" "TMPDIR=`mktemp -d`\n" "if test -z \"$TMPDIR\"\n" "then\n" -" exit -1\n" +" exit 1\n" "fi\n" "cat << EOF > $TMPDIR/Makefile\n" "obj-y := dummy.o\n" From db8fec583f250557ddd6def1505a6c466c9747aa Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Fri, 29 Jun 2018 12:46:58 -0500 Subject: [PATCH 344/382] perf test shell: Prevent temporary editor files from being considered test scripts Allows a perf shell test developer to concurrently edit and run their test scripts, avoiding perf test attempts to execute their editor temporary files, such as seen here: $ sudo taskset -c 0 ./perf test -vvvvvvvv -F 63 63: 0VIM 8.0 : --- start --- sh: 1: ./tests/shell/.record+probe_libc_inet_pton.sh.swp: Permission denied ---- end ---- 0VIM 8.0: FAILED! Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Thomas Richter Link: http://lkml.kernel.org/r/20180629124658.15a506b41fc4539c08eb9426@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 2bde505e2e7e..dd850a26d579 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -422,7 +422,7 @@ static const char *shell_test__description(char *description, size_t size, #define for_each_shell_test(dir, base, ent) \ while ((ent = readdir(dir)) != NULL) \ - if (!is_directory(base, ent)) + if (!is_directory(base, ent) && ent->d_name[0] != '.') static const char *shell_tests__dir(char *path, size_t size) { From a09603f851045b031e990d2d663958ccb49db525 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Jul 2018 15:42:01 +0200 Subject: [PATCH 345/382] perf tools: Fix compilation errors on gcc8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are getting following warnings on gcc8 that break compilation: $ make CC jvmti/jvmti_agent.o jvmti/jvmti_agent.c: In function ‘jvmti_open’: jvmti/jvmti_agent.c:252:35: error: ‘/jit-’ directive output may be truncated \ writing 5 bytes into a region of size between 1 and 4096 [-Werror=format-truncation=] snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); There's no point in checking the result of snprintf call in jvmti_open, the following open call will fail in case the name is mangled or too long. Using tools/lib/ function scnprintf that touches the return value from the snprintf() calls and thus get rid of those warnings. $ make DEBUG=1 CC arch/x86/util/perf_regs.o arch/x86/util/perf_regs.c: In function ‘arch_sdt_arg_parse_op’: arch/x86/util/perf_regs.c:229:4: error: ‘strncpy’ output truncated before terminating nul copying 2 bytes from a string of the same length [-Werror=stringop-truncation] strncpy(prefix, "+0", 2); ^~~~~~~~~~~~~~~~~~~~~~~~ Using scnprintf instead of the strncpy (which we know is safe in here) to get rid of that warning. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180702134202.17745-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/perf_regs.c | 2 +- tools/perf/jvmti/jvmti_agent.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index 4b2caf6d48e7..fead6b3b4206 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -226,7 +226,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op) else if (rm[2].rm_so != rm[2].rm_eo) prefix[0] = '+'; else - strncpy(prefix, "+0", 2); + scnprintf(prefix, sizeof(prefix), "+0"); } /* Rename register */ diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index 0c6d1002b524..ac1bcdc17dae 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -35,6 +35,7 @@ #include #include /* for gettid() */ #include +#include #include "jvmti_agent.h" #include "../util/jitdump.h" @@ -249,7 +250,7 @@ void *jvmti_open(void) /* * jitdump file name */ - snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + scnprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666); if (fd == -1) From c818cc063089e78bc31845d1eb6a1f0bedee1eae Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 Jul 2018 15:42:02 +0200 Subject: [PATCH 346/382] perf stat: Fix --interval_clear option Currently we display extra header line, like: # perf stat -I 1000 -a --interval-clear # time counts unit events insn per cycle branch-misses of all branches 2.964917103 3855.349912 cpu-clock (msec) # 3.855 CPUs utilized 2.964917103 23,993 context-switches # 0.006 M/sec 2.964917103 1,301 cpu-migrations # 0.329 K/sec ... Fixing the condition and getting proper: # perf stat -I 1000 -a --interval-clear # time counts unit events 2.359048938 1432.492228 cpu-clock (msec) # 1.432 CPUs utilized 2.359048938 7,613 context-switches # 0.002 M/sec 2.359048938 419 cpu-migrations # 0.133 K/sec ... Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: 9660e08ee8cb ("perf stat: Add --interval-clear option") Link: http://lkml.kernel.org/r/20180702134202.17745-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 22547a490e1f..05be023c3f0e 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1742,7 +1742,7 @@ static void print_interval(char *prefix, struct timespec *ts) } } - if ((num_print_interval == 0 && metric_only) || interval_clear) + if ((num_print_interval == 0 || interval_clear) && metric_only) print_metric_headers(" ", true); if (++num_print_interval == 25) num_print_interval = 0; From db0ba84c04ef2cf293aaada5ae97531127844d9d Mon Sep 17 00:00:00 2001 From: Janne Huttunen Date: Mon, 9 Jul 2018 13:59:50 +0300 Subject: [PATCH 347/382] perf script python: Fix dict reference counting The dictionaries are attached to the parameter tuple that steals the references and takes care of releasing them when appropriate. The code should not decrement the reference counts explicitly. E.g. if libpython has been built with reference debugging enabled, the superfluous DECREFs will trigger this error when running perf script: Fatal Python error: Objects/tupleobject.c:238 object at 0x7f10f2041b40 has negative ref count -1 Aborted (core dumped) If the reference debugging is not enabled, the superfluous DECREFs might cause the dict objects to be silently released while they are still in use. This may trigger various other assertions or just cause perf crashes and/or weird and unexpected data changes in the stored Python objects. Signed-off-by: Janne Huttunen Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jaroslav Skarvada Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1531133990-17485-1-git-send-email-janne.huttunen@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/scripting-engines/trace-event-python.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 8b2eb6dbff4d..bc32e57d17be 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -908,14 +908,11 @@ static void python_process_tracepoint(struct perf_sample *sample, if (_PyTuple_Resize(&t, n) == -1) Py_FatalError("error resizing Python tuple"); - if (!dict) { + if (!dict) call_object(handler, t, handler_name); - } else { + else call_object(handler, t, default_handler_name); - Py_DECREF(dict); - } - Py_XDECREF(all_entries_dict); Py_DECREF(t); } @@ -1235,7 +1232,6 @@ static void python_process_general_event(struct perf_sample *sample, call_object(handler, t, handler_name); - Py_DECREF(dict); Py_DECREF(t); } From 32aa928a7b817140c84987b726d5014911808fa4 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 10 Jul 2018 11:46:12 -0400 Subject: [PATCH 348/382] perf tools: Use python-config --includes rather than --cflags Builds started failing in Fedora on Python 3.7 with: `.gnu.debuglto_.debug_macro' referenced in section `.gnu.debuglto_.debug_macro' of util/scripting-engines/trace-event-python.o: defined in discarded section In Fedora, Python 3.7 added -flto to the list of --cflags and since it was only applied to util/scripting-engines/trace-event-python.c and scripts/python/Perf-Trace-Util/Context.c, linking failed. It's not the first time the addition of flags has broken builds: commit c6707fdef7e2 ("perf tools: Fix up build in hardnened environments") appears to have fixed a similar problem. "python-config --includes" provides the proper -I flags and doesn't introduce additional CFLAGS. Signed-off-by: Jeremy Cline Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180710154612.6285-1-jcline@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index b5ac356ba323..f5a3b402589e 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -207,8 +207,7 @@ ifdef PYTHON_CONFIG PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil - PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) - PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) + PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null) FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) endif From 70dbcc2254fa2a9add74a122b9dac954c4736e01 Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Wed, 11 Jul 2018 10:46:03 -0400 Subject: [PATCH 349/382] bsg: fix bogus EINVAL on non-data commands Fix a regression introduced in Linux kernel 4.17 where sending a SCSI command that does not transfer data (such as TEST UNIT READY) via /dev/bsg/* results in EINVAL. Fixes: 17cb960f29c2 ("bsg: split handling of SCSI CDBs vs transport requeues") Cc: # 4.17+ Reviewed-by: Christoph Hellwig Signed-off-by: Tony Battersby Signed-off-by: Jens Axboe --- block/bsg.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/block/bsg.c b/block/bsg.c index 66602c489956..3da540faf673 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -267,8 +267,6 @@ bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode) } else if (hdr->din_xfer_len) { ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr->din_xferp), hdr->din_xfer_len, GFP_KERNEL); - } else { - ret = blk_rq_map_user(q, rq, NULL, NULL, 0, GFP_KERNEL); } if (ret) From 2fd8eb4ad87104c54800ef3cea498c92eb15c78a Mon Sep 17 00:00:00 2001 From: Yandong Zhao Date: Wed, 11 Jul 2018 19:06:28 +0800 Subject: [PATCH 350/382] arm64: neon: Fix function may_use_simd() return error status It does not matter if the caller of may_use_simd() migrates to another cpu after the call, but it is still important that the kernel_neon_busy percpu instance that is read matches the cpu the task is running on at the time of the read. This means that raw_cpu_read() is not sufficient. kernel_neon_busy may appear true if the caller migrates during the execution of raw_cpu_read() and the next task to be scheduled in on the initial cpu calls kernel_neon_begin(). This patch replaces raw_cpu_read() with this_cpu_read() to protect against this race. Cc: Fixes: cb84d11e1625 ("arm64: neon: Remove support for nested or hardirq kernel-mode NEON") Acked-by: Ard Biesheuvel Reviewed-by: Dave Martin Reviewed-by: Mark Rutland Signed-off-by: Yandong Zhao Signed-off-by: Will Deacon --- arch/arm64/include/asm/simd.h | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h index fa8b3fe932e6..6495cc51246f 100644 --- a/arch/arm64/include/asm/simd.h +++ b/arch/arm64/include/asm/simd.h @@ -29,20 +29,15 @@ DECLARE_PER_CPU(bool, kernel_neon_busy); static __must_check inline bool may_use_simd(void) { /* - * The raw_cpu_read() is racy if called with preemption enabled. - * This is not a bug: kernel_neon_busy is only set when - * preemption is disabled, so we cannot migrate to another CPU - * while it is set, nor can we migrate to a CPU where it is set. - * So, if we find it clear on some CPU then we're guaranteed to - * find it clear on any CPU we could migrate to. - * - * If we are in between kernel_neon_begin()...kernel_neon_end(), - * the flag will be set, but preemption is also disabled, so we - * can't migrate to another CPU and spuriously see it become - * false. + * kernel_neon_busy is only set while preemption is disabled, + * and is clear whenever preemption is enabled. Since + * this_cpu_read() is atomic w.r.t. preemption, kernel_neon_busy + * cannot change under our feet -- if it's set we cannot be + * migrated, and if it's clear we cannot be migrated to a CPU + * where it is set. */ return !in_irq() && !irqs_disabled() && !in_nmi() && - !raw_cpu_read(kernel_neon_busy); + !this_cpu_read(kernel_neon_busy); } #else /* ! CONFIG_KERNEL_MODE_NEON */ From ee6581ceba7f8314b81b2f2a81f1cf3f67c679e2 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 11 Jul 2018 10:10:11 -0700 Subject: [PATCH 351/382] nfit: fix unchecked dereference in acpi_nfit_ctl Incremental patch to fix the unchecked dereference in acpi_nfit_ctl. Reported by Dan Carpenter: "acpi/nfit: fix cmd_rc for acpi_nfit_ctl to always return a value" from Jun 28, 2018, leads to the following Smatch complaint: drivers/acpi/nfit/core.c:578 acpi_nfit_ctl() warn: variable dereferenced before check 'cmd_rc' (see line 411) drivers/acpi/nfit/core.c 410 411 *cmd_rc = -EINVAL; ^^^^^^^^^^^^^^^^^^ Patch adds unchecked dereference. Fixes: c1985cefd844 ("acpi/nfit: fix cmd_rc for acpi_nfit_ctl to always return a value") Signed-off-by: Dave Jiang --- drivers/acpi/nfit/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index b8040fed2a69..7c479002e798 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -408,7 +408,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, const guid_t *guid; int rc, i; - *cmd_rc = -EINVAL; + if (cmd_rc) + *cmd_rc = -EINVAL; func = cmd; if (cmd == ND_CMD_CALL) { call_pkg = buf; @@ -519,7 +520,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, * If we return an error (like elsewhere) then caller wouldn't * be able to rely upon data returned to make calculation. */ - *cmd_rc = 0; + if (cmd_rc) + *cmd_rc = 0; return 0; } From 498e8bf51c633cc4496343e6113f340f8e9301ae Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Fri, 6 Jul 2018 23:50:06 +0300 Subject: [PATCH 352/382] sample: vfio-mdev: avoid deadlock in mdev_access() mdev_access() calls mbochs_get_page() with mdev_state->ops_lock held, while mbochs_get_page() locks the mutex by itself. It leads to unavoidable deadlock. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: Alex Williamson --- samples/vfio-mdev/mbochs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c index d5d5a499160c..2535c3677c7b 100644 --- a/samples/vfio-mdev/mbochs.c +++ b/samples/vfio-mdev/mbochs.c @@ -178,6 +178,8 @@ static const char *vbe_name(u32 index) return "(invalid)"; } +static struct page *__mbochs_get_page(struct mdev_state *mdev_state, + pgoff_t pgoff); static struct page *mbochs_get_page(struct mdev_state *mdev_state, pgoff_t pgoff); @@ -394,7 +396,7 @@ static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count, MBOCHS_MEMORY_BAR_OFFSET + mdev_state->memsize) { pos -= MBOCHS_MMIO_BAR_OFFSET; poff = pos & ~PAGE_MASK; - pg = mbochs_get_page(mdev_state, pos >> PAGE_SHIFT); + pg = __mbochs_get_page(mdev_state, pos >> PAGE_SHIFT); map = kmap(pg); if (is_write) memcpy(map + poff, buf, count); From 0fc8c3581dd42bc8f530314ca86db2d861485731 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 9 Jul 2018 16:19:06 +0200 Subject: [PATCH 353/382] tracing/kprobe: Release kprobe print_fmt properly We don't release tk->tp.call.print_fmt when destroying local uprobe. Also there's missing print_fmt kfree in create_local_trace_kprobe error path. Link: http://lkml.kernel.org/r/20180709141906.2390-1-jolsa@kernel.org Cc: stable@vger.kernel.org Fixes: e12f03d7031a ("perf/core: Implement the 'perf_kprobe' PMU") Acked-by: Song Liu Acked-by: Masami Hiramatsu Signed-off-by: Jiri Olsa Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_kprobe.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index daa81571b22a..21f718472942 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1480,8 +1480,10 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs, } ret = __register_trace_kprobe(tk); - if (ret < 0) + if (ret < 0) { + kfree(tk->tp.call.print_fmt); goto error; + } return &tk->tp.call; error: @@ -1501,6 +1503,8 @@ void destroy_local_trace_kprobe(struct trace_event_call *event_call) } __unregister_trace_kprobe(tk); + + kfree(tk->tp.call.print_fmt); free_trace_kprobe(tk); } #endif /* CONFIG_PERF_EVENTS */ From d63c46734c545ad0488761059004a65c46efdde3 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 10 Jul 2018 11:56:50 +0300 Subject: [PATCH 354/382] RDMA/mlx5: Fix memory leak in mlx5_ib_create_srq() error path Fix memory leak in the error path of mlx5_ib_create_srq() by making sure to free the allocated srq. Fixes: c2b37f76485f ("IB/mlx5: Fix integer overflows in mlx5_ib_create_srq") Signed-off-by: Kamal Heib Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/srq.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 0af7b7905550..f5de5adc9b1a 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -266,18 +266,24 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, desc_size = sizeof(struct mlx5_wqe_srq_next_seg) + srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg); - if (desc_size == 0 || srq->msrq.max_gs > desc_size) - return ERR_PTR(-EINVAL); + if (desc_size == 0 || srq->msrq.max_gs > desc_size) { + err = -EINVAL; + goto err_srq; + } desc_size = roundup_pow_of_two(desc_size); desc_size = max_t(size_t, 32, desc_size); - if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg)) - return ERR_PTR(-EINVAL); + if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg)) { + err = -EINVAL; + goto err_srq; + } srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) / sizeof(struct mlx5_wqe_data_seg); srq->msrq.wqe_shift = ilog2(desc_size); buf_size = srq->msrq.max * desc_size; - if (buf_size < desc_size) - return ERR_PTR(-EINVAL); + if (buf_size < desc_size) { + err = -EINVAL; + goto err_srq; + } in.type = init_attr->srq_type; if (pd->uobject) From b4c7e2bd2eb4764afe3af9409ff3b1b87116fa30 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 10 Jul 2018 08:22:40 +0100 Subject: [PATCH 355/382] ARM: 8780/1: ftrace: Only set kernel memory back to read-only after boot Dynamic ftrace requires modifying the code segments that are usually set to read-only. To do this, a per arch function is called both before and after the ftrace modifications are performed. The "before" function will set kernel code text to read-write to allow for ftrace to make the modifications, and the "after" function will set the kernel code text back to "read-only" to keep the kernel code text protected. The issue happens when dynamic ftrace is tested at boot up. The test is done before the kernel code text has been set to read-only. But the "before" and "after" calls are still performed. The "after" call will change the kernel code text to read-only prematurely, and other boot code that expects this code to be read-write will fail. The solution is to add a variable that is set when the kernel code text is expected to be converted to read-only, and make the ftrace "before" and "after" calls do nothing if that variable is not yet set. This is similar to the x86 solution from commit 162396309745 ("ftrace, x86: make kernel text writable only for conversions"). Link: http://lkml.kernel.org/r/20180620212906.24b7b66e@vmware.local.home Reported-by: Stefan Agner Tested-by: Stefan Agner Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Russell King --- arch/arm/mm/init.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index c186474422f3..0cc8e04295a4 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -736,20 +736,29 @@ static int __mark_rodata_ro(void *unused) return 0; } +static int kernel_set_to_readonly __read_mostly; + void mark_rodata_ro(void) { + kernel_set_to_readonly = 1; stop_machine(__mark_rodata_ro, NULL, NULL); debug_checkwx(); } void set_kernel_text_rw(void) { + if (!kernel_set_to_readonly) + return; + set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), false, current->active_mm); } void set_kernel_text_ro(void) { + if (!kernel_set_to_readonly) + return; + set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), true, current->active_mm); } From dad59262b79b0588cfeeda19fd88307e3e91c0ea Mon Sep 17 00:00:00 2001 From: Alastair Bridgewater Date: Wed, 11 Jul 2018 18:09:45 -0400 Subject: [PATCH 356/382] ALSA: hda/ca0132: Add Recon3Di quirk for Gigabyte G1.Sniper Z97 These motherboards have Sound Core3D and apparently "support" Recon3Di. Added to the quirk list as QUIRK_R3DI. Issue report, PCI Subsystem ID, and testing by a contributor on IRC who wished to remain anonymous. Signed-off-by: Alastair Bridgewater Reviewed-by: Connor McAdams Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_ca0132.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 4ff5320378e2..f5203f681b6e 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -1048,6 +1048,7 @@ static const struct snd_pci_quirk ca0132_quirks[] = { SND_PCI_QUIRK(0x1102, 0x0010, "Sound Blaster Z", QUIRK_SBZ), SND_PCI_QUIRK(0x1102, 0x0023, "Sound Blaster Z", QUIRK_SBZ), SND_PCI_QUIRK(0x1458, 0xA016, "Recon3Di", QUIRK_R3DI), + SND_PCI_QUIRK(0x1458, 0xA026, "Gigabyte G1.Sniper Z97", QUIRK_R3DI), SND_PCI_QUIRK(0x1458, 0xA036, "Recon3Di", QUIRK_R3DI), {} }; From c5a59d2477abf04e1b77152bef49383fd212da8d Mon Sep 17 00:00:00 2001 From: Alastair Bridgewater Date: Wed, 11 Jul 2018 18:09:46 -0400 Subject: [PATCH 357/382] ALSA: hda/ca0132: Update a pci quirk device name The PCI subsystem in question for this quirk rule has been identified as a Gigabyte GA-Z170X-Gaming 7 motherboard. Set the device name appropriately. Signed-off-by: Alastair Bridgewater Reviewed-by: Connor McAdams Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_ca0132.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index f5203f681b6e..321e95c409c1 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -1049,7 +1049,7 @@ static const struct snd_pci_quirk ca0132_quirks[] = { SND_PCI_QUIRK(0x1102, 0x0023, "Sound Blaster Z", QUIRK_SBZ), SND_PCI_QUIRK(0x1458, 0xA016, "Recon3Di", QUIRK_R3DI), SND_PCI_QUIRK(0x1458, 0xA026, "Gigabyte G1.Sniper Z97", QUIRK_R3DI), - SND_PCI_QUIRK(0x1458, 0xA036, "Recon3Di", QUIRK_R3DI), + SND_PCI_QUIRK(0x1458, 0xA036, "Gigabyte GA-Z170X-Gaming 7", QUIRK_R3DI), {} }; From e69b5d308da72cbf4e7911c3979f9a46d28532af Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 2 Jul 2018 12:00:18 +0200 Subject: [PATCH 358/382] xen: remove global bit from __default_kernel_pte_mask for pv guests When removing the global bit from __supported_pte_mask do the same for __default_kernel_pte_mask in order to avoid the WARN_ONCE() in check_pgprot() when setting a kernel pte before having called init_mem_mapping(). Cc: # 4.17 Reported-by: Michael Young Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 8d4e2e1ae60b..4816b6f82a9a 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1229,6 +1229,7 @@ asmlinkage __visible void __init xen_start_kernel(void) /* Prevent unwanted bits from being set in PTEs. */ __supported_pte_mask &= ~_PAGE_GLOBAL; + __default_kernel_pte_mask &= ~_PAGE_GLOBAL; /* * Prevent page tables from being allocated in highmem, even From 2f8b5b21830aea95989a6e67d8a971297272a086 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 10 Jul 2018 14:47:25 -0500 Subject: [PATCH 359/382] ARM: DRA7/OMAP5: Enable ACTLR[0] (Enable invalidates of BTB) for secondary cores Call secure services to enable ACTLR[0] (Enable invalidates of BTB with ICIALLU) when branch hardening is enabled for kernel. On GP devices OMAP5/DRA7, there is no possibility to update secure side since "secure world" is ROM and there are no override mechanisms possible. On HS devices, appropriate PPA should do the workarounds as well. However, the configuration is only done for secondary core, since it is expected that firmware/bootloader will have enabled the required configuration for the primary boot core (note: bootloaders typically will NOT enable secondary processors, since it has no need to do so). Signed-off-by: Nishanth Menon Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/omap-smp.c | 41 ++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index 69df3620eca5..1c73694c871a 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -109,6 +109,45 @@ void omap5_erratum_workaround_801819(void) static inline void omap5_erratum_workaround_801819(void) { } #endif +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +/* + * Configure ACR and enable ACTLR[0] (Enable invalidates of BTB with + * ICIALLU) to activate the workaround for secondary Core. + * NOTE: it is assumed that the primary core's configuration is done + * by the boot loader (kernel will detect a misconfiguration and complain + * if this is not done). + * + * In General Purpose(GP) devices, ACR bit settings can only be done + * by ROM code in "secure world" using the smc call and there is no + * option to update the "firmware" on such devices. This also works for + * High security(HS) devices, as a backup option in case the + * "update" is not done in the "security firmware". + */ +static void omap5_secondary_harden_predictor(void) +{ + u32 acr, acr_mask; + + asm volatile ("mrc p15, 0, %0, c1, c0, 1" : "=r" (acr)); + + /* + * ACTLR[0] (Enable invalidates of BTB with ICIALLU) + */ + acr_mask = BIT(0); + + /* Do we already have it done.. if yes, skip expensive smc */ + if ((acr & acr_mask) == acr_mask) + return; + + acr |= acr_mask; + omap_smc1(OMAP5_DRA7_MON_SET_ACR_INDEX, acr); + + pr_debug("%s: ARM ACR setup for CVE_2017_5715 applied on CPU%d\n", + __func__, smp_processor_id()); +} +#else +static inline void omap5_secondary_harden_predictor(void) { } +#endif + static void omap4_secondary_init(unsigned int cpu) { /* @@ -131,6 +170,8 @@ static void omap4_secondary_init(unsigned int cpu) set_cntfreq(); /* Configure ACR to disable streaming WA for 801819 */ omap5_erratum_workaround_801819(); + /* Enable ACR to allow for ICUALLU workaround */ + omap5_secondary_harden_predictor(); } /* From 923847413f7316b5ced3491769b3fefa6c56a79a Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Wed, 11 Jul 2018 12:54:54 -0500 Subject: [PATCH 360/382] ARM: dts: am3517.dtsi: Disable reference to OMAP3 OTG controller The AM3517 has a different OTG controller location than the OMAP3, which is included from omap3.dtsi. This results in a hwmod error. Since the AM3517 has a different OTG controller address, this patch disabes one that is isn't available. Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am3517.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/boot/dts/am3517.dtsi b/arch/arm/boot/dts/am3517.dtsi index 4b6062b631b1..23ea381d363f 100644 --- a/arch/arm/boot/dts/am3517.dtsi +++ b/arch/arm/boot/dts/am3517.dtsi @@ -91,6 +91,11 @@ hecc: can@5c050000 { }; }; +/* Table Table 5-79 of the TRM shows 480ab000 is reserved */ +&usb_otg_hs { + status = "disabled"; +}; + &iva { status = "disabled"; }; From 9feeb638cde083c737e295c0547f1b4f28e99583 Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Tue, 5 Jun 2018 19:00:22 +0200 Subject: [PATCH 361/382] tools build: fix # escaping in .cmd files for future Make In 2016 GNU Make made a backwards incompatible change to the way '#' characters were handled in Makefiles when used inside functions or macros: http://git.savannah.gnu.org/cgit/make.git/commit/?id=c6966b323811c37acedff05b57 Due to this change, when attempting to run `make prepare' I get a spurious make syntax error: /home/earnest/linux/tools/objtool/.fixdep.o.cmd:1: *** missing separator. Stop. When inspecting `.fixdep.o.cmd' it includes two lines which use unescaped comment characters at the top: \# cannot find fixdep (/home/earnest/linux/tools/objtool//fixdep) \# using basic dep data This is because `tools/build/Build.include' prints these '\#' characters: printf '\# cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \ printf '\# using basic dep data\n\n' >> $(dot-target).cmd; \ This completes commit 9564a8cf422d ("Kbuild: fix # escaping in .cmd files for future Make"). Link: https://bugzilla.kernel.org/show_bug.cgi?id=197847 Cc: Randy Dunlap Cc: Rasmus Villemoes Cc: stable@vger.kernel.org Signed-off-by: Paul Menzel Signed-off-by: Masahiro Yamada --- tools/build/Build.include | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/build/Build.include b/tools/build/Build.include index a4bbb984941d..d9048f145f97 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -63,8 +63,8 @@ dep-cmd = $(if $(wildcard $(fixdep)), $(fixdep) $(depfile) $@ '$(make-cmd)' > $(dot-target).tmp; \ rm -f $(depfile); \ mv -f $(dot-target).tmp $(dot-target).cmd, \ - printf '\# cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \ - printf '\# using basic dep data\n\n' >> $(dot-target).cmd; \ + printf '$(pound) cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \ + printf '$(pound) using basic dep data\n\n' >> $(dot-target).cmd; \ cat $(depfile) >> $(dot-target).cmd; \ printf '\n%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd) From 6fdbd824fd7a3876aac43d32fdf1f30b9ef72ce4 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 9 Jul 2018 17:45:56 -0700 Subject: [PATCH 362/382] tools: build: Fixup host c flags Commit 0c3b7e42616f ("tools build: Add support for host programs format") introduced host_c_flags which referenced CHOSTFLAGS. The actual name of the variable is HOSTCFLAGS. Fix this up. Fixes: 0c3b7e42616f ("tools build: Add support for host programs format") Signed-off-by: Laura Abbott Acked-by: Jiri Olsa Signed-off-by: Masahiro Yamada --- tools/build/Build.include | 2 +- tools/perf/pmu-events/Build | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/build/Build.include b/tools/build/Build.include index d9048f145f97..950c1504ca37 100644 --- a/tools/build/Build.include +++ b/tools/build/Build.include @@ -98,4 +98,4 @@ cxx_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXX ### ## HOSTCC C flags -host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CHOSTFLAGS) -D"BUILD_STR(s)=\#s" $(CHOSTFLAGS_$(basetarget).o) $(CHOSTFLAGS_$(obj)) +host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj)) diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index 17783913d330..215ba30b8534 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -1,7 +1,7 @@ hostprogs := jevents jevents-y += json.o jsmn.o jevents.o -CHOSTFLAGS_jevents.o = -I$(srctree)/tools/include +HOSTCFLAGS_jevents.o = -I$(srctree)/tools/include pmu-events-y += pmu-events.o JDIR = pmu-events/arch/$(SRCARCH) JSON = $(shell [ -d $(JDIR) ] && \ From 8b247a92ebd0cda7dec49a6f771d9c4950f3d3ad Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 9 Jul 2018 17:45:57 -0700 Subject: [PATCH 363/382] tools: build: Use HOSTLDFLAGS with fixdep The final link of fixdep uses LDFLAGS but not the existing HOSTLDFLAGS. Fix this. Signed-off-by: Laura Abbott Acked-by: Jiri Olsa Signed-off-by: Masahiro Yamada --- tools/build/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/build/Makefile b/tools/build/Makefile index 5eb4b5ad79cb..5edf65e684ab 100644 --- a/tools/build/Makefile +++ b/tools/build/Makefile @@ -43,7 +43,7 @@ $(OUTPUT)fixdep-in.o: FORCE $(Q)$(MAKE) $(build)=fixdep $(OUTPUT)fixdep: $(OUTPUT)fixdep-in.o - $(QUIET_LINK)$(HOSTCC) $(LDFLAGS) -o $@ $< + $(QUIET_LINK)$(HOSTCC) $(HOSTLDFLAGS) -o $@ $< FORCE: From e23ba825db245724fec08d7285bc0272a57d38d4 Mon Sep 17 00:00:00 2001 From: Constantine Shulyupin Date: Wed, 11 Jul 2018 21:36:42 +0300 Subject: [PATCH 364/382] scripts/tags.sh: add __ro_after_init Signed-off-by: Constantine Shulyupin Signed-off-by: Masahiro Yamada --- scripts/tags.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/tags.sh b/scripts/tags.sh index 66f08bb1cce9..412a70cce558 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -245,7 +245,7 @@ exuberant() { setup_regex exuberant asm c all_target_sources | xargs $1 -a \ - -I __initdata,__exitdata,__initconst, \ + -I __initdata,__exitdata,__initconst,__ro_after_init \ -I __initdata_memblock \ -I __refdata,__attribute,__maybe_unused,__always_unused \ -I __acquires,__releases,__deprecated \ From 6d79a7b424a5630a6fcab31fd7c38af4ea9c9a0f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 12 Jul 2018 19:38:36 +0900 Subject: [PATCH 365/382] kbuild: suppress warnings from 'getconf LFS_*' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Suppress warnings for systems that do not recognize LFS_*. getconf: no such configuration parameter `LFS_CFLAGS' getconf: no such configuration parameter `LFS_LDFLAGS' getconf: no such configuration parameter `LFS_LIBS' Fixes: d7f14c66c273 ("kbuild: Enable Large File Support for hostprogs") Reported-by: Chen Feng Signed-off-by: Masahiro Yamada Acked-by: Uwe Kleine-König --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 5b26847909ec..4a457302e3d4 100644 --- a/Makefile +++ b/Makefile @@ -353,9 +353,9 @@ CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ else if [ -x /bin/bash ]; then echo /bin/bash; \ else echo sh; fi ; fi) -HOST_LFS_CFLAGS := $(shell getconf LFS_CFLAGS) -HOST_LFS_LDFLAGS := $(shell getconf LFS_LDFLAGS) -HOST_LFS_LIBS := $(shell getconf LFS_LIBS) +HOST_LFS_CFLAGS := $(shell getconf LFS_CFLAGS 2>/dev/null) +HOST_LFS_LDFLAGS := $(shell getconf LFS_LDFLAGS 2>/dev/null) +HOST_LFS_LIBS := $(shell getconf LFS_LIBS 2>/dev/null) HOSTCC = gcc HOSTCXX = g++ From abe41184abac487264a4904bfcff2d5500dccce6 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 10 Jul 2018 23:42:15 +0200 Subject: [PATCH 366/382] i2c: recovery: if possible send STOP with recovery pulses I2C clients may misunderstand recovery pulses if they can't read SDA to bail out early. In the worst case, as a write operation. To avoid that and if we can write SDA, try to send STOP to avoid the misinterpretation. Signed-off-by: Wolfram Sang Reviewed-by: Peter Rosin Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/i2c-core-base.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 31d16ada6e7d..301285c54603 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -198,7 +198,16 @@ int i2c_generic_scl_recovery(struct i2c_adapter *adap) val = !val; bri->set_scl(adap, val); - ndelay(RECOVERY_NDELAY); + + /* + * If we can set SDA, we will always create STOP here to ensure + * the additional pulses will do no harm. This is achieved by + * letting SDA follow SCL half a cycle later. + */ + ndelay(RECOVERY_NDELAY / 2); + if (bri->set_sda) + bri->set_sda(adap, val); + ndelay(RECOVERY_NDELAY / 2); } /* check if recovery actually succeeded */ From f8494fa3dd10b52eab47a9666a8bc34719a129aa Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Mon, 25 Jun 2018 17:08:22 -0700 Subject: [PATCH 367/382] tracing: Reorder display of TGID to be after PID Currently ftrace displays data in trace output like so: _-----=> irqs-off / _----=> need-resched | / _---=> hardirq/softirq || / _--=> preempt-depth ||| / delay TASK-PID CPU TGID |||| TIMESTAMP FUNCTION | | | | |||| | | bash-1091 [000] ( 1091) d..2 28.313544: sched_switch: However Android's trace visualization tools expect a slightly different format due to an out-of-tree patch patch that was been carried for a decade, notice that the TGID and CPU fields are reversed: _-----=> irqs-off / _----=> need-resched | / _---=> hardirq/softirq || / _--=> preempt-depth ||| / delay TASK-PID TGID CPU |||| TIMESTAMP FUNCTION | | | | |||| | | bash-1091 ( 1091) [002] d..2 64.965177: sched_switch: From kernel v4.13 onwards, during which TGID was introduced, tracing with systrace on all Android kernels will break (most Android kernels have been on 4.9 with Android patches, so this issues hasn't been seen yet). From v4.13 onwards things will break. The chrome browser's tracing tools also embed the systrace viewer which uses the legacy TGID format and updates to that are known to be difficult to make. Considering this, I suggest we make this change to the upstream kernel and backport it to all Android kernels. I believe this feature is merged recently enough into the upstream kernel that it shouldn't be a problem. Also logically, IMO it makes more sense to group the TGID with the TASK-PID and the CPU after these. Link: http://lkml.kernel.org/r/20180626000822.113931-1-joel@joelfernandes.org Cc: jreck@google.com Cc: tkjos@google.com Cc: stable@vger.kernel.org Fixes: 441dae8f2f29 ("tracing: Add support for display of tgid in trace output") Signed-off-by: Joel Fernandes (Google) Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 8 ++++---- kernel/trace/trace_output.c | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f054bd6a1c66..87cf25171fb8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3365,8 +3365,8 @@ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m, print_event_info(buf, m); - seq_printf(m, "# TASK-PID CPU# %s TIMESTAMP FUNCTION\n", tgid ? "TGID " : ""); - seq_printf(m, "# | | | %s | |\n", tgid ? " | " : ""); + seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? "TGID " : ""); + seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); } static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m, @@ -3386,9 +3386,9 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file tgid ? tgid_space : space); seq_printf(m, "# %s||| / delay\n", tgid ? tgid_space : space); - seq_printf(m, "# TASK-PID CPU#%s|||| TIMESTAMP FUNCTION\n", + seq_printf(m, "# TASK-PID %sCPU# |||| TIMESTAMP FUNCTION\n", tgid ? " TGID " : space); - seq_printf(m, "# | | | %s|||| | |\n", + seq_printf(m, "# | | %s | |||| | |\n", tgid ? " | " : space); } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 90db994ac900..1c8e30fda46a 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -594,8 +594,7 @@ int trace_print_context(struct trace_iterator *iter) trace_find_cmdline(entry->pid, comm); - trace_seq_printf(s, "%16s-%-5d [%03d] ", - comm, entry->pid, iter->cpu); + trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); if (tr->trace_flags & TRACE_ITER_RECORD_TGID) { unsigned int tgid = trace_find_tgid(entry->pid); @@ -606,6 +605,8 @@ int trace_print_context(struct trace_iterator *iter) trace_seq_printf(s, "(%5d) ", tgid); } + trace_seq_printf(s, "[%03d] ", iter->cpu); + if (tr->trace_flags & TRACE_ITER_IRQ_INFO) trace_print_lat_fmt(s, entry); From 0ce0bba4e5e0eb9b753bb821785de5d23c494392 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 12 Jul 2018 17:40:34 +0200 Subject: [PATCH 368/382] xen: setup pv irq ops vector earlier Setting pv_irq_ops for Xen PV domains should be done as early as possible in order to support e.g. very early printk() usage. The same applies to xen_vcpu_info_reset(0), as it is needed for the pv irq ops. Move the call of xen_setup_machphys_mapping() after initializing the pv functions as it contains a WARN_ON(), too. Remove the no longer necessary conditional in xen_init_irq_ops() from PVH V1 times to make clear this is a PV only function. Cc: # 4.14 Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 24 +++++++++++------------- arch/x86/xen/irq.c | 4 +--- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 4816b6f82a9a..439a94bf89ad 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1207,12 +1207,20 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_setup_features(); - xen_setup_machphys_mapping(); - /* Install Xen paravirt ops */ pv_info = xen_info; pv_init_ops.patch = paravirt_patch_default; pv_cpu_ops = xen_cpu_ops; + xen_init_irq_ops(); + + /* + * Setup xen_vcpu early because it is needed for + * local_irq_disable(), irqs_disabled(), e.g. in printk(). + * + * Don't do the full vcpu_info placement stuff until we have + * the cpu_possible_mask and a non-dummy shared_info. + */ + xen_vcpu_info_reset(0); x86_platform.get_nmi_reason = xen_get_nmi_reason; @@ -1225,6 +1233,7 @@ asmlinkage __visible void __init xen_start_kernel(void) * Set up some pagetable state before starting to set any ptes. */ + xen_setup_machphys_mapping(); xen_init_mmu_ops(); /* Prevent unwanted bits from being set in PTEs. */ @@ -1250,20 +1259,9 @@ asmlinkage __visible void __init xen_start_kernel(void) get_cpu_cap(&boot_cpu_data); x86_configure_nx(); - xen_init_irq_ops(); - /* Let's presume PV guests always boot on vCPU with id 0. */ per_cpu(xen_vcpu_id, 0) = 0; - /* - * Setup xen_vcpu early because idt_setup_early_handler needs it for - * local_irq_disable(), irqs_disabled(). - * - * Don't do the full vcpu_info placement stuff until we have - * the cpu_possible_mask and a non-dummy shared_info. - */ - xen_vcpu_info_reset(0); - idt_setup_early_handler(); xen_init_capabilities(); diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 74179852e46c..7515a19fd324 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -128,8 +128,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = { void __init xen_init_irq_ops(void) { - /* For PVH we use default pv_irq_ops settings. */ - if (!xen_feature(XENFEAT_hvm_callback_vector)) - pv_irq_ops = xen_irq_ops; + pv_irq_ops = xen_irq_ops; x86_init.irqs.intr_init = xen_init_IRQ; } From fd6792bb022e43faa0c4a45b6f25285e21206f9d Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 12 Jul 2018 12:22:44 +0200 Subject: [PATCH 369/382] rtc: fix alarm read and set offset The offset needs to be added after reading the alarm value. It also needs to be subtracted after the now < alarm test. Tested-by: Jon Hunter Signed-off-by: Alexandre Belloni --- drivers/rtc/interface.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 6d4012dd6922..bac1eeb3d312 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -265,8 +265,10 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) return err; /* full-function RTCs won't have such missing fields */ - if (rtc_valid_tm(&alarm->time) == 0) + if (rtc_valid_tm(&alarm->time) == 0) { + rtc_add_offset(rtc, &alarm->time); return 0; + } /* get the "after" timestamp, to detect wrapped fields */ err = rtc_read_time(rtc, &now); @@ -409,7 +411,6 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) if (err) return err; - rtc_subtract_offset(rtc, &alarm->time); scheduled = rtc_tm_to_time64(&alarm->time); /* Make sure we're not setting alarms in the past */ @@ -426,6 +427,8 @@ static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) * over right here, before we set the alarm. */ + rtc_subtract_offset(rtc, &alarm->time); + if (!rtc->ops) err = -ENODEV; else if (!rtc->ops->set_alarm) @@ -467,7 +470,6 @@ int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) mutex_unlock(&rtc->ops_lock); - rtc_add_offset(rtc, &alarm->time); return err; } EXPORT_SYMBOL_GPL(rtc_set_alarm); From e181ae0c5db9544de9c53239eb22bc012ce75033 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Sat, 14 Jul 2018 09:15:07 -0400 Subject: [PATCH 370/382] mm: zero unavailable pages before memmap init We must zero struct pages for memory that is not backed by physical memory, or kernel does not have access to. Recently, there was a change which zeroed all memmap for all holes in e820. Unfortunately, it introduced a bug that is discussed here: https://www.spinics.net/lists/linux-mm/msg156764.html Linus, also saw this bug on his machine, and confirmed that reverting commit 124049decbb1 ("x86/e820: put !E820_TYPE_RAM regions into memblock.reserved") fixes the issue. The problem is that we incorrectly zero some struct pages after they were setup. The fix is to zero unavailable struct pages prior to initializing of struct pages. A more detailed fix should come later that would avoid double zeroing cases: one in __init_single_page(), the other one in zero_resv_unavail(). Fixes: 124049decbb1 ("x86/e820: put !E820_TYPE_RAM regions into memblock.reserved") Signed-off-by: Pavel Tatashin Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1521100f1e63..5d800d61ddb7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6847,6 +6847,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) /* Initialise every node */ mminit_verify_pageflags_layout(); setup_nr_node_ids(); + zero_resv_unavail(); for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); free_area_init_node(nid, NULL, @@ -6857,7 +6858,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) node_set_state(nid, N_MEMORY); check_for_memory(pgdat, nid); } - zero_resv_unavail(); } static int __init cmdline_parse_core(char *p, unsigned long *core, @@ -7033,9 +7033,9 @@ void __init set_dma_reserve(unsigned long new_dma_reserve) void __init free_area_init(unsigned long *zones_size) { + zero_resv_unavail(); free_area_init_node(0, zones_size, __pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL); - zero_resv_unavail(); } static int page_alloc_cpu_dead(unsigned int cpu) From bce73e4842390f7b7309c8e253e139db71288ac3 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 13 Jul 2018 16:58:52 -0700 Subject: [PATCH 371/382] mm: do not drop unused pages when userfaultd is running KVM guests on s390 can notify the host of unused pages. This can result in pte_unused callbacks to be true for KVM guest memory. If a page is unused (checked with pte_unused) we might drop this page instead of paging it. This can have side-effects on userfaultd, when the page in question was already migrated: The next access of that page will trigger a fault and a user fault instead of faulting in a new and empty zero page. As QEMU does not expect a userfault on an already migrated page this migration will fail. The most straightforward solution is to ignore the pte_unused hint if a userfault context is active for this VMA. Link: http://lkml.kernel.org/r/20180703171854.63981-1-borntraeger@de.ibm.com Signed-off-by: Christian Borntraeger Cc: Martin Schwidefsky Cc: Andrea Arcangeli Cc: Mike Rapoport Cc: Janosch Frank Cc: David Hildenbrand Cc: Cornelia Huck Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rmap.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mm/rmap.c b/mm/rmap.c index 6db729dc4c50..eb477809a5c0 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -64,6 +64,7 @@ #include #include #include +#include #include @@ -1481,11 +1482,16 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, set_pte_at(mm, address, pvmw.pte, pteval); } - } else if (pte_unused(pteval)) { + } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) { /* * The guest indicated that the page content is of no * interest anymore. Simply discard the pte, vmscan * will take care of the rest. + * A future reference will then fault in a new zero + * page. When userfaultfd is active, we must not drop + * this page though, as its main user (postcopy + * migration) will not expect userfaults on already + * copied pages. */ dec_mm_counter(mm, mm_counter(page)); /* We have to invalidate as we cleared the pte */ From e70cc2bd579e8a9d6d153762f0fe294d0e652ff0 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 13 Jul 2018 16:58:56 -0700 Subject: [PATCH 372/382] fs/proc/task_mmu.c: fix Locked field in /proc/pid/smaps* Thomas reports: "While looking around in /proc on my v4.14.52 system I noticed that all processes got a lot of "Locked" memory in /proc/*/smaps. A lot more memory than a regular user can usually lock with mlock(). Commit 493b0e9d945f (in v4.14-rc1) seems to have changed the behavior of "Locked". Before that commit the code was like this. Notice the VM_LOCKED check. (vma->vm_flags & VM_LOCKED) ? (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); After that commit Locked is now the same as Pss: (unsigned long)(mss->pss >> (10 + PSS_SHIFT))); This looks like a mistake." Indeed, the commit has added mss->pss_locked with the correct value that depends on VM_LOCKED, but forgot to actually use it. Fix it. Link: http://lkml.kernel.org/r/ebf6c7fb-fec3-6a26-544f-710ed193c154@suse.cz Fixes: 493b0e9d945f ("mm: add /proc/pid/smaps_rollup") Signed-off-by: Vlastimil Babka Reported-by: Thomas Lindroth Cc: Alexey Dobriyan Cc: Daniel Colascione Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index e9679016271f..dfd73a4616ce 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -831,7 +831,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) SEQ_PUT_DEC(" kB\nSwap: ", mss->swap); SEQ_PUT_DEC(" kB\nSwapPss: ", mss->swap_pss >> PSS_SHIFT); - SEQ_PUT_DEC(" kB\nLocked: ", mss->pss >> PSS_SHIFT); + SEQ_PUT_DEC(" kB\nLocked: ", + mss->pss_locked >> PSS_SHIFT); seq_puts(m, " kB\n"); } if (!rollup_mode) { From 02f51d45937f7bc7f4dee21e9f85b2d5eac37104 Mon Sep 17 00:00:00 2001 From: Tomas Bortoli Date: Fri, 13 Jul 2018 16:58:59 -0700 Subject: [PATCH 373/382] autofs: fix slab out of bounds read in getname_kernel() The autofs subsystem does not check that the "path" parameter is present for all cases where it is required when it is passed in via the "param" struct. In particular it isn't checked for the AUTOFS_DEV_IOCTL_OPENMOUNT_CMD ioctl command. To solve it, modify validate_dev_ioctl(function to check that a path has been provided for ioctl commands that require it. Link: http://lkml.kernel.org/r/153060031527.26631.18306637892746301555.stgit@pluto.themaw.net Signed-off-by: Tomas Bortoli Signed-off-by: Ian Kent Reported-by: syzbot+60c837b428dc84e83a93@syzkaller.appspotmail.com Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/autofs/dev-ioctl.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c index ea4ca1445ab7..86eafda4a652 100644 --- a/fs/autofs/dev-ioctl.c +++ b/fs/autofs/dev-ioctl.c @@ -135,6 +135,15 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) cmd); goto out; } + } else { + unsigned int inr = _IOC_NR(cmd); + + if (inr == AUTOFS_DEV_IOCTL_OPENMOUNT_CMD || + inr == AUTOFS_DEV_IOCTL_REQUESTER_CMD || + inr == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) { + err = -EINVAL; + goto out; + } } err = 0; @@ -271,7 +280,8 @@ static int autofs_dev_ioctl_openmount(struct file *fp, dev_t devid; int err, fd; - /* param->path has already been checked */ + /* param->path has been checked in validate_dev_ioctl() */ + if (!param->openmount.devid) return -EINVAL; @@ -433,10 +443,7 @@ static int autofs_dev_ioctl_requester(struct file *fp, dev_t devid; int err = -ENOENT; - if (param->size <= AUTOFS_DEV_IOCTL_SIZE) { - err = -EINVAL; - goto out; - } + /* param->path has been checked in validate_dev_ioctl() */ devid = sbi->sb->s_dev; @@ -521,10 +528,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, unsigned int devid, magic; int err = -ENOENT; - if (param->size <= AUTOFS_DEV_IOCTL_SIZE) { - err = -EINVAL; - goto out; - } + /* param->path has been checked in validate_dev_ioctl() */ name = param->path; type = param->ismountpoint.in.type; From a90744bac57c3c07d0d4422af62f3e44549ade30 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 13 Jul 2018 16:59:03 -0700 Subject: [PATCH 374/382] mm: allow arch to supply p??_free_tlb functions The mmu_gather APIs keep track of the invalidated address range including the span covered by invalidated page table pages. Ranges covered by page tables but not ptes (and therefore no TLBs) still need to be invalidated because some architectures (x86) can cache intermediate page table entries, and invalidate those with normal TLB invalidation instructions to be almost-backward-compatible. Architectures which don't cache intermediate page table entries, or which invalidate these caches separately from TLB invalidation, do not require TLB invalidation range expanded over page tables. Allow architectures to supply their own p??_free_tlb functions, which can avoid the __tlb_adjust_range. Link: http://lkml.kernel.org/r/20180703013131.2807-1-npiggin@gmail.com Signed-off-by: Nicholas Piggin Reviewed-by: Andrew Morton Cc: "Aneesh Kumar K. V" Cc: Minchan Kim Cc: Mel Gorman Cc: Nadav Amit Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/tlb.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index faddde44de8c..3063125197ad 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -265,33 +265,41 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, * For now w.r.t page table cache, mark the range_size as PAGE_SIZE */ +#ifndef pte_free_tlb #define pte_free_tlb(tlb, ptep, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ __pte_free_tlb(tlb, ptep, address); \ } while (0) +#endif +#ifndef pmd_free_tlb #define pmd_free_tlb(tlb, pmdp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ __pmd_free_tlb(tlb, pmdp, address); \ } while (0) +#endif #ifndef __ARCH_HAS_4LEVEL_HACK +#ifndef pud_free_tlb #define pud_free_tlb(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ __pud_free_tlb(tlb, pudp, address); \ } while (0) #endif +#endif #ifndef __ARCH_HAS_5LEVEL_HACK +#ifndef p4d_free_tlb #define p4d_free_tlb(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ __p4d_free_tlb(tlb, pudp, address); \ } while (0) #endif +#endif #define tlb_migrate_finish(mm) do {} while (0) From c290fba8c4ce6530cd941ea14db5a4ac2f77183f Mon Sep 17 00:00:00 2001 From: piaojun Date: Fri, 13 Jul 2018 16:59:06 -0700 Subject: [PATCH 375/382] net/9p/client.c: put refcount of trans_mod in error case in parse_opts() In my testing, the second mount will fail after umounting successfully. The reason is that we put refcount of trans_mod in the correct case rather than the error case in parse_opts() at last. That will cause the refcount decrease to -1, and when we try to get trans_mod again in try_module_get(), we could only increase refcount to 0 which will cause failure as follows: parse_opts v9fs_get_trans_by_name try_module_get : return NULL to caller which cause error So we should put refcount of trans_mod in error case. Link: http://lkml.kernel.org/r/5B3F39A0.2030509@huawei.com Fixes: 9421c3e64137ec ("net/9p/client.c: fix potential refcnt problem of trans module") Signed-off-by: Jun Piao Reviewed-by: Yiwen Jiang Reviewed-by: Greg Kurz Reviewed-by: Dominique Martinet Tested-by: Dominique Martinet Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/9p/client.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/9p/client.c b/net/9p/client.c index 18c5271910dc..5c1343195292 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -225,7 +225,8 @@ static int parse_opts(char *opts, struct p9_client *clnt) } free_and_return: - v9fs_put_trans(clnt->trans_mod); + if (ret) + v9fs_put_trans(clnt->trans_mod); kfree(tmp_options); return ret; } From fa8cbda88db12e632a8987c94b66f5caf25bcec4 Mon Sep 17 00:00:00 2001 From: Philipp Rudo Date: Fri, 13 Jul 2018 16:59:09 -0700 Subject: [PATCH 376/382] x86/purgatory: add missing FORCE to Makefile target - Build the kernel without the fix - Add some flag to the purgatories KBUILD_CFLAGS,I used -fno-asynchronous-unwind-tables - Re-build the kernel When you look at makes output you see that sha256.o is not re-build in the last step. Also readelf -S still shows the .eh_frame section for sha256.o. With the fix sha256.o is rebuilt in the last step. Without FORCE make does not detect changes only made to the command line options. So object files might not be re-built even when they should be. Fix this by adding FORCE where it is missing. Link: http://lkml.kernel.org/r/20180704110044.29279-2-prudo@linux.ibm.com Fixes: df6f2801f511 ("kernel/kexec_file.c: move purgatories sha256 to common code") Signed-off-by: Philipp Rudo Acked-by: Dave Young Cc: Ingo Molnar Cc: Thomas Gleixner Cc: [4.17+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/purgatory/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 2e9ee023e6bc..81a8e33115ad 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -6,7 +6,7 @@ purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string targets += $(purgatory-y) PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) -$(obj)/sha256.o: $(srctree)/lib/sha256.c +$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE $(call if_changed_rule,cc_o_c) LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib From 24962af7e1041b7e50c1bc71d8d10dc678c556b5 Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Fri, 13 Jul 2018 16:59:13 -0700 Subject: [PATCH 377/382] fs, elf: make sure to page align bss in load_elf_library The current code does not make sure to page align bss before calling vm_brk(), and this can lead to a VM_BUG_ON() in __mm_populate() due to the requested lenght not being correctly aligned. Let us make sure to align it properly. Kees: only applicable to CONFIG_USELIB kernels: 32-bit and configured for libc5. Link: http://lkml.kernel.org/r/20180705145539.9627-1-osalvador@techadventures.net Signed-off-by: Oscar Salvador Reported-by: syzbot+5dcb560fe12aa5091c06@syzkaller.appspotmail.com Tested-by: Tetsuo Handa Acked-by: Kees Cook Cc: Michal Hocko Cc: Nicolas Pitre Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/binfmt_elf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 0ac456b52bdd..816cc921cf36 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1259,9 +1259,8 @@ static int load_elf_library(struct file *file) goto out_free_ph; } - len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + - ELF_MIN_ALIGN - 1); - bss = eppnt->p_memsz + eppnt->p_vaddr; + len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr); + bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr); if (bss > len) { error = vm_brk(len, bss - len); if (error) From e3d301cae0092062cbcd6b4e7ceebbab9d87e263 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 13 Jul 2018 16:59:16 -0700 Subject: [PATCH 378/382] mm/memblock.c: do not complain about top-down allocations for !MEMORY_HOTREMOVE Mike Rapoport is converting architectures from bootmem to nobootmem allocator. While doing so for m68k Geert has noticed that he gets a scary looking warning: WARNING: CPU: 0 PID: 0 at mm/memblock.c:230 memblock_find_in_range_node+0x11c/0x1be memblock: bottom-up allocation failed, memory hotunplug may be affected Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 4.18.0-rc3-atari-01343-gf2fb5f2e09a97a3c-dirty #7 Call Trace: __warn+0xa8/0xc2 kernel_pg_dir+0x0/0x1000 netdev_lower_get_next+0x2/0x22 warn_slowpath_fmt+0x2e/0x36 memblock_find_in_range_node+0x11c/0x1be memblock_find_in_range_node+0x11c/0x1be memblock_find_in_range_node+0x0/0x1be vprintk_func+0x66/0x6e memblock_virt_alloc_internal+0xd0/0x156 netdev_lower_get_next+0x2/0x22 netdev_lower_get_next+0x2/0x22 kernel_pg_dir+0x0/0x1000 memblock_virt_alloc_try_nid_nopanic+0x58/0x7a netdev_lower_get_next+0x2/0x22 kernel_pg_dir+0x0/0x1000 kernel_pg_dir+0x0/0x1000 EXPTBL+0x234/0x400 EXPTBL+0x234/0x400 alloc_node_mem_map+0x4a/0x66 netdev_lower_get_next+0x2/0x22 free_area_init_node+0xe2/0x29e EXPTBL+0x234/0x400 paging_init+0x430/0x462 kernel_pg_dir+0x0/0x1000 printk+0x0/0x1a EXPTBL+0x234/0x400 setup_arch+0x1b8/0x22c start_kernel+0x4a/0x40a _sinittext+0x344/0x9e8 The warning is basically saying that a top-down allocation can break memory hotremove because memblock allocation is not movable. But m68k doesn't even support MEMORY_HOTREMOVE so there is no point to warn about it. Make the warning conditional only to configurations that care. Link: http://lkml.kernel.org/r/20180706061750.GH32658@dhcp22.suse.cz Signed-off-by: Michal Hocko Reported-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Reviewed-by: Andrew Morton Cc: Vlastimil Babka Cc: Mike Rapoport Cc: Greg Ungerer Cc: Sam Creasey Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memblock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index 03d48d8835ba..11e46f83e1ad 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -227,7 +227,8 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, * so we use WARN_ONCE() here to see the stack trace if * fail happens. */ - WARN_ONCE(1, "memblock: bottom-up allocation failed, memory hotunplug may be affected\n"); + WARN_ONCE(IS_ENABLED(CONFIG_MEMORY_HOTREMOVE), + "memblock: bottom-up allocation failed, memory hotremove may be affected\n"); } return __memblock_find_range_top_down(start, end, size, align, nid, From bb177a732c4369bb58a1fe1df8f552b6f0f7db5f Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 13 Jul 2018 16:59:20 -0700 Subject: [PATCH 379/382] mm: do not bug_on on incorrect length in __mm_populate() syzbot has noticed that a specially crafted library can easily hit VM_BUG_ON in __mm_populate kernel BUG at mm/gup.c:1242! invalid opcode: 0000 [#1] SMP CPU: 2 PID: 9667 Comm: a.out Not tainted 4.18.0-rc3 #644 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017 RIP: 0010:__mm_populate+0x1e2/0x1f0 Code: 55 d0 65 48 33 14 25 28 00 00 00 89 d8 75 21 48 83 c4 20 5b 41 5c 41 5d 41 5e 41 5f 5d c3 e8 75 18 f1 ff 0f 0b e8 6e 18 f1 ff <0f> 0b 31 db eb c9 e8 93 06 e0 ff 0f 1f 00 55 48 89 e5 53 48 89 fb Call Trace: vm_brk_flags+0xc3/0x100 vm_brk+0x1f/0x30 load_elf_library+0x281/0x2e0 __ia32_sys_uselib+0x170/0x1e0 do_fast_syscall_32+0xca/0x420 entry_SYSENTER_compat+0x70/0x7f The reason is that the length of the new brk is not page aligned when we try to populate the it. There is no reason to bug on that though. do_brk_flags already aligns the length properly so the mapping is expanded as it should. All we need is to tell mm_populate about it. Besides that there is absolutely no reason to to bug_on in the first place. The worst thing that could happen is that the last page wouldn't get populated and that is far from putting system into an inconsistent state. Fix the issue by moving the length sanitization code from do_brk_flags up to vm_brk_flags. The only other caller of do_brk_flags is brk syscall entry and it makes sure to provide the proper length so t here is no need for sanitation and so we can use do_brk_flags without it. Also remove the bogus BUG_ONs. [osalvador@techadventures.net: fix up vm_brk_flags s@request@len@] Link: http://lkml.kernel.org/r/20180706090217.GI32658@dhcp22.suse.cz Signed-off-by: Michal Hocko Reported-by: syzbot Tested-by: Tetsuo Handa Reviewed-by: Oscar Salvador Cc: Zi Yan Cc: "Aneesh Kumar K.V" Cc: Dan Williams Cc: "Kirill A. Shutemov" Cc: Michael S. Tsirkin Cc: Al Viro Cc: "Huang, Ying" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/gup.c | 2 -- mm/mmap.c | 29 ++++++++++++----------------- 2 files changed, 12 insertions(+), 19 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index b70d7ba7cc13..fc5f98069f4e 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1238,8 +1238,6 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) int locked = 0; long ret = 0; - VM_BUG_ON(start & ~PAGE_MASK); - VM_BUG_ON(len != PAGE_ALIGN(len)); end = start + len; for (nstart = start; nstart < end; nstart = nend) { diff --git a/mm/mmap.c b/mm/mmap.c index d1eb87ef4b1a..5801b5f0a634 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -186,8 +186,8 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) return next; } -static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf); - +static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, + struct list_head *uf); SYSCALL_DEFINE1(brk, unsigned long, brk) { unsigned long retval; @@ -245,7 +245,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) goto out; /* Ok, looks good - let it rip. */ - if (do_brk(oldbrk, newbrk-oldbrk, &uf) < 0) + if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0) goto out; set_brk: @@ -2929,21 +2929,14 @@ static inline void verify_mm_writelocked(struct mm_struct *mm) * anonymous maps. eventually we may be able to do some * brk-specific accounting here. */ -static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, struct list_head *uf) +static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; - unsigned long len; struct rb_node **rb_link, *rb_parent; pgoff_t pgoff = addr >> PAGE_SHIFT; int error; - len = PAGE_ALIGN(request); - if (len < request) - return -ENOMEM; - if (!len) - return 0; - /* Until we need other flags, refuse anything except VM_EXEC. */ if ((flags & (~VM_EXEC)) != 0) return -EINVAL; @@ -3015,18 +3008,20 @@ static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long return 0; } -static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf) -{ - return do_brk_flags(addr, len, 0, uf); -} - -int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags) +int vm_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) { struct mm_struct *mm = current->mm; + unsigned long len; int ret; bool populate; LIST_HEAD(uf); + len = PAGE_ALIGN(request); + if (len < request) + return -ENOMEM; + if (!len) + return 0; + if (down_write_killable(&mm->mmap_sem)) return -EINTR; From ffe075132af8b7967089c361e506d4fa747efd14 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 13 Jul 2018 16:59:23 -0700 Subject: [PATCH 380/382] checkpatch: fix duplicate invalid vsprintf pointer extension '%p' messages Multiline statements with invalid %p uses produce multiple warnings. Fix that. e.g.: $ cat t_block.c void foo(void) { MY_DEBUG(drv->foo, "%pk", foo->boo); } $ ./scripts/checkpatch.pl -f t_block.c WARNING: Missing or malformed SPDX-License-Identifier tag in line 1 #1: FILE: t_block.c:1: +void foo(void) WARNING: Invalid vsprintf pointer extension '%pk' #3: FILE: t_block.c:3: + MY_DEBUG(drv->foo, + "%pk", + foo->boo); WARNING: Invalid vsprintf pointer extension '%pk' #3: FILE: t_block.c:3: + MY_DEBUG(drv->foo, + "%pk", + foo->boo); total: 0 errors, 3 warnings, 6 lines checked NOTE: For some of the reported defects, checkpatch may be able to mechanically convert to the typical style using --fix or --fix-inplace. t_block.c has style problems, please review. NOTE: If any of the errors are false positives, please report them to the maintainer, see CHECKPATCH in MAINTAINERS. Link: http://lkml.kernel.org/r/9e8341bbe4c9877d159cb512bb701043cbfbb10b.camel@perches.com Signed-off-by: Joe Perches Cc: "Tobin C. Harding" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index a9c05506e325..447857ffaf6b 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5813,14 +5813,14 @@ sub process { defined $stat && $stat =~ /^\+(?![^\{]*\{\s*).*\b(\w+)\s*\(.*$String\s*,/s && $1 !~ /^_*volatile_*$/) { - my $specifier; - my $extension; - my $bad_specifier = ""; my $stat_real; my $lc = $stat =~ tr@\n@@; $lc = $lc + $linenr; for (my $count = $linenr; $count <= $lc; $count++) { + my $specifier; + my $extension; + my $bad_specifier = ""; my $fmt = get_quoted_string($lines[$count - 1], raw_line($count, 0)); $fmt =~ s/%%//g; From fe10e398e860955bac4d28ec031b701d358465e4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 13 Jul 2018 16:59:27 -0700 Subject: [PATCH 381/382] reiserfs: fix buffer overflow with long warning messages ReiserFS prepares log messages into a 1024-byte buffer with no bounds checks. Long messages, such as the "unknown mount option" warning when userspace passes a crafted mount options string, overflow this buffer. This causes KASAN to report a global-out-of-bounds write. Fix it by truncating messages to the buffer size. Link: http://lkml.kernel.org/r/20180707203621.30922-1-ebiggers3@gmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+b890b3335a4d8c608963@syzkaller.appspotmail.com Signed-off-by: Eric Biggers Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/prints.c | 141 +++++++++++++++++++++++++------------------ 1 file changed, 81 insertions(+), 60 deletions(-) diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index 7e288d97adcb..9fed1c05f1f4 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c @@ -76,83 +76,99 @@ static char *le_type(struct reiserfs_key *key) } /* %k */ -static void sprintf_le_key(char *buf, struct reiserfs_key *key) +static int scnprintf_le_key(char *buf, size_t size, struct reiserfs_key *key) { if (key) - sprintf(buf, "[%d %d %s %s]", le32_to_cpu(key->k_dir_id), - le32_to_cpu(key->k_objectid), le_offset(key), - le_type(key)); + return scnprintf(buf, size, "[%d %d %s %s]", + le32_to_cpu(key->k_dir_id), + le32_to_cpu(key->k_objectid), le_offset(key), + le_type(key)); else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } /* %K */ -static void sprintf_cpu_key(char *buf, struct cpu_key *key) +static int scnprintf_cpu_key(char *buf, size_t size, struct cpu_key *key) { if (key) - sprintf(buf, "[%d %d %s %s]", key->on_disk_key.k_dir_id, - key->on_disk_key.k_objectid, reiserfs_cpu_offset(key), - cpu_type(key)); + return scnprintf(buf, size, "[%d %d %s %s]", + key->on_disk_key.k_dir_id, + key->on_disk_key.k_objectid, + reiserfs_cpu_offset(key), cpu_type(key)); else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } -static void sprintf_de_head(char *buf, struct reiserfs_de_head *deh) +static int scnprintf_de_head(char *buf, size_t size, + struct reiserfs_de_head *deh) { if (deh) - sprintf(buf, - "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]", - deh_offset(deh), deh_dir_id(deh), deh_objectid(deh), - deh_location(deh), deh_state(deh)); + return scnprintf(buf, size, + "[offset=%d dir_id=%d objectid=%d location=%d state=%04x]", + deh_offset(deh), deh_dir_id(deh), + deh_objectid(deh), deh_location(deh), + deh_state(deh)); else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } -static void sprintf_item_head(char *buf, struct item_head *ih) +static int scnprintf_item_head(char *buf, size_t size, struct item_head *ih) { if (ih) { - strcpy(buf, - (ih_version(ih) == KEY_FORMAT_3_6) ? "*3.6* " : "*3.5*"); - sprintf_le_key(buf + strlen(buf), &(ih->ih_key)); - sprintf(buf + strlen(buf), ", item_len %d, item_location %d, " - "free_space(entry_count) %d", - ih_item_len(ih), ih_location(ih), ih_free_space(ih)); + char *p = buf; + char * const end = buf + size; + + p += scnprintf(p, end - p, "%s", + (ih_version(ih) == KEY_FORMAT_3_6) ? + "*3.6* " : "*3.5*"); + + p += scnprintf_le_key(p, end - p, &ih->ih_key); + + p += scnprintf(p, end - p, + ", item_len %d, item_location %d, free_space(entry_count) %d", + ih_item_len(ih), ih_location(ih), + ih_free_space(ih)); + return p - buf; } else - sprintf(buf, "[NULL]"); + return scnprintf(buf, size, "[NULL]"); } -static void sprintf_direntry(char *buf, struct reiserfs_dir_entry *de) +static int scnprintf_direntry(char *buf, size_t size, + struct reiserfs_dir_entry *de) { char name[20]; memcpy(name, de->de_name, de->de_namelen > 19 ? 19 : de->de_namelen); name[de->de_namelen > 19 ? 19 : de->de_namelen] = 0; - sprintf(buf, "\"%s\"==>[%d %d]", name, de->de_dir_id, de->de_objectid); + return scnprintf(buf, size, "\"%s\"==>[%d %d]", + name, de->de_dir_id, de->de_objectid); } -static void sprintf_block_head(char *buf, struct buffer_head *bh) +static int scnprintf_block_head(char *buf, size_t size, struct buffer_head *bh) { - sprintf(buf, "level=%d, nr_items=%d, free_space=%d rdkey ", - B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh)); + return scnprintf(buf, size, + "level=%d, nr_items=%d, free_space=%d rdkey ", + B_LEVEL(bh), B_NR_ITEMS(bh), B_FREE_SPACE(bh)); } -static void sprintf_buffer_head(char *buf, struct buffer_head *bh) +static int scnprintf_buffer_head(char *buf, size_t size, struct buffer_head *bh) { - sprintf(buf, - "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", - bh->b_bdev, bh->b_size, - (unsigned long long)bh->b_blocknr, atomic_read(&(bh->b_count)), - bh->b_state, bh->b_page, - buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE", - buffer_dirty(bh) ? "DIRTY" : "CLEAN", - buffer_locked(bh) ? "LOCKED" : "UNLOCKED"); + return scnprintf(buf, size, + "dev %pg, size %zd, blocknr %llu, count %d, state 0x%lx, page %p, (%s, %s, %s)", + bh->b_bdev, bh->b_size, + (unsigned long long)bh->b_blocknr, + atomic_read(&(bh->b_count)), + bh->b_state, bh->b_page, + buffer_uptodate(bh) ? "UPTODATE" : "!UPTODATE", + buffer_dirty(bh) ? "DIRTY" : "CLEAN", + buffer_locked(bh) ? "LOCKED" : "UNLOCKED"); } -static void sprintf_disk_child(char *buf, struct disk_child *dc) +static int scnprintf_disk_child(char *buf, size_t size, struct disk_child *dc) { - sprintf(buf, "[dc_number=%d, dc_size=%u]", dc_block_number(dc), - dc_size(dc)); + return scnprintf(buf, size, "[dc_number=%d, dc_size=%u]", + dc_block_number(dc), dc_size(dc)); } static char *is_there_reiserfs_struct(char *fmt, int *what) @@ -189,55 +205,60 @@ static void prepare_error_buf(const char *fmt, va_list args) char *fmt1 = fmt_buf; char *k; char *p = error_buf; + char * const end = &error_buf[sizeof(error_buf)]; int what; spin_lock(&error_lock); - strcpy(fmt1, fmt); + if (WARN_ON(strscpy(fmt_buf, fmt, sizeof(fmt_buf)) < 0)) { + strscpy(error_buf, "format string too long", end - error_buf); + goto out_unlock; + } while ((k = is_there_reiserfs_struct(fmt1, &what)) != NULL) { *k = 0; - p += vsprintf(p, fmt1, args); + p += vscnprintf(p, end - p, fmt1, args); switch (what) { case 'k': - sprintf_le_key(p, va_arg(args, struct reiserfs_key *)); + p += scnprintf_le_key(p, end - p, + va_arg(args, struct reiserfs_key *)); break; case 'K': - sprintf_cpu_key(p, va_arg(args, struct cpu_key *)); + p += scnprintf_cpu_key(p, end - p, + va_arg(args, struct cpu_key *)); break; case 'h': - sprintf_item_head(p, va_arg(args, struct item_head *)); + p += scnprintf_item_head(p, end - p, + va_arg(args, struct item_head *)); break; case 't': - sprintf_direntry(p, - va_arg(args, - struct reiserfs_dir_entry *)); + p += scnprintf_direntry(p, end - p, + va_arg(args, struct reiserfs_dir_entry *)); break; case 'y': - sprintf_disk_child(p, - va_arg(args, struct disk_child *)); + p += scnprintf_disk_child(p, end - p, + va_arg(args, struct disk_child *)); break; case 'z': - sprintf_block_head(p, - va_arg(args, struct buffer_head *)); + p += scnprintf_block_head(p, end - p, + va_arg(args, struct buffer_head *)); break; case 'b': - sprintf_buffer_head(p, - va_arg(args, struct buffer_head *)); + p += scnprintf_buffer_head(p, end - p, + va_arg(args, struct buffer_head *)); break; case 'a': - sprintf_de_head(p, - va_arg(args, - struct reiserfs_de_head *)); + p += scnprintf_de_head(p, end - p, + va_arg(args, struct reiserfs_de_head *)); break; } - p += strlen(p); fmt1 = k + 2; } - vsprintf(p, fmt1, args); + p += vscnprintf(p, end - p, fmt1, args); +out_unlock: spin_unlock(&error_lock); } From 9d3cce1e8b8561fed5f383d22a4d6949db4eadbe Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 15 Jul 2018 12:49:31 -0700 Subject: [PATCH 382/382] Linux 4.18-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2d80fbbe51a8..a89d8a0d3ee1 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Merciless Moray # *DOCUMENTATION*