From e1465d125d2189e667029b9fa8a6f455180fbcf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lefaure?= Date: Wed, 30 Nov 2016 15:54:02 -0800 Subject: [PATCH 1/7] mm, thp: propagation of conditional compilation in khugepaged.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit b46e756f5e47 ("thp: extract khugepaged from mm/huge_memory.c") moved code from huge_memory.c to khugepaged.c. Some of this code should be compiled only when CONFIG_SYSFS is enabled but the condition around this code was not moved into khugepaged.c. The result is a compilation error when CONFIG_SYSFS is disabled: mm/built-in.o: In function `khugepaged_defrag_store': khugepaged.c:(.text+0x2d095): undefined reference to `single_hugepage_flag_store' mm/built-in.o: In function `khugepaged_defrag_show': khugepaged.c:(.text+0x2d0ab): undefined reference to `single_hugepage_flag_show' This commit adds the #ifdef CONFIG_SYSFS around the code related to sysfs. Link: http://lkml.kernel.org/r/20161114203448.24197-1-jeremy.lefaure@lse.epita.fr Signed-off-by: Jérémy Lefaure Acked-by: Kirill A. Shutemov Acked-by: Hillf Danton Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/khugepaged.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 728d7790dc2d..87e1a7ca3846 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -103,6 +103,7 @@ static struct khugepaged_scan khugepaged_scan = { .mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head), }; +#ifdef CONFIG_SYSFS static ssize_t scan_sleep_millisecs_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -295,6 +296,7 @@ struct attribute_group khugepaged_attr_group = { .attrs = khugepaged_attr, .name = "khugepaged", }; +#endif /* CONFIG_SYSFS */ #define VM_NO_KHUGEPAGED (VM_SPECIAL | VM_HUGETLB) From 655548bf6271b212cd1e4c259da9dbe616348d38 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 30 Nov 2016 15:54:05 -0800 Subject: [PATCH 2/7] thp: fix corner case of munlock() of PTE-mapped THPs The following program triggers BUG() in munlock_vma_pages_range(): // autogenerated by syzkaller (http://github.com/google/syzkaller) #include int main() { mmap((void*)0x20105000ul, 0xc00000ul, 0x2ul, 0x2172ul, -1, 0); mremap((void*)0x201fd000ul, 0x4000ul, 0xc00000ul, 0x3ul, 0x203f0000ul); return 0; } The test-case constructs the situation when munlock_vma_pages_range() finds PTE-mapped THP-head in the middle of page table and, by mistake, skips HPAGE_PMD_NR pages after that. As result, on the next iteration it hits the middle of PMD-mapped THP and gets upset seeing mlocked tail page. The solution is only skip HPAGE_PMD_NR pages if the THP was mlocked during munlock_vma_page(). It would guarantee that the page is PMD-mapped as we never mlock PTE-mapeed THPs. Fixes: e90309c9f772 ("thp: allow mlocked THP again") Link: http://lkml.kernel.org/r/20161115132703.7s7rrgmwttegcdh4@black.fi.intel.com Signed-off-by: Kirill A. Shutemov Reported-by: Dmitry Vyukov Cc: Konstantin Khlebnikov Cc: Andrey Ryabinin Cc: syzkaller Cc: Andrea Arcangeli Cc: [4.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mlock.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/mlock.c b/mm/mlock.c index 145a4258ddbc..cdbed8aaa426 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -190,10 +190,13 @@ unsigned int munlock_vma_page(struct page *page) */ spin_lock_irq(zone_lru_lock(zone)); - nr_pages = hpage_nr_pages(page); - if (!TestClearPageMlocked(page)) + if (!TestClearPageMlocked(page)) { + /* Potentially, PTE-mapped THP: do not skip the rest PTEs */ + nr_pages = 1; goto unlock_out; + } + nr_pages = hpage_nr_pages(page); __mod_zone_page_state(zone, NR_MLOCK, -nr_pages); if (__munlock_isolate_lru_page(page, true)) { From 529e71e16403830ae0d737a66c55c5f360f3576b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Nov 2016 15:54:08 -0800 Subject: [PATCH 3/7] zram: fix unbalanced idr management at hot removal The zram hot removal code calls idr_remove() even when zram_remove() returns an error (typically -EBUSY). This results in a leftover at the device release, eventually leading to a crash when the module is reloaded. As described in the bug report below, the following procedure would cause an Oops with zram: - provision three zram devices via modprobe zram num_devices=3 - configure a size for each device + echo "1G" > /sys/block/$zram_name/disksize - mkfs and mount zram0 only - attempt to hot remove all three devices + echo 2 > /sys/class/zram-control/hot_remove + echo 1 > /sys/class/zram-control/hot_remove + echo 0 > /sys/class/zram-control/hot_remove - zram0 removal fails with EBUSY, as expected - unmount zram0 - try zram0 hot remove again + echo 0 > /sys/class/zram-control/hot_remove - fails with ENODEV (unexpected) - unload zram kernel module + completes successfully - zram0 device node still exists - attempt to mount /dev/zram0 + mount command is killed + following BUG is encountered BUG: unable to handle kernel paging request at ffffffffa0002ba0 IP: get_disk+0x16/0x50 Oops: 0000 [#1] SMP CPU: 0 PID: 252 Comm: mount Not tainted 4.9.0-rc6 #176 Call Trace: exact_lock+0xc/0x20 kobj_lookup+0xdc/0x160 get_gendisk+0x2f/0x110 __blkdev_get+0x10c/0x3c0 blkdev_get+0x19d/0x2e0 blkdev_open+0x56/0x70 do_dentry_open.isra.19+0x1ff/0x310 vfs_open+0x43/0x60 path_openat+0x2c9/0xf30 do_filp_open+0x79/0xd0 do_sys_open+0x114/0x1e0 SyS_open+0x19/0x20 entry_SYSCALL_64_fastpath+0x13/0x94 This patch adds the proper error check in hot_remove_store() not to call idr_remove() unconditionally. Fixes: 17ec4cd98578 ("zram: don't call idr_remove() from zram_remove()") Bugzilla: https://bugzilla.opensuse.org/show_bug.cgi?id=1010970 Link: http://lkml.kernel.org/r/20161121132140.12683-1-tiwai@suse.de Signed-off-by: Takashi Iwai Reviewed-by: David Disseldorp Reported-by: David Disseldorp Tested-by: David Disseldorp Acked-by: Minchan Kim Acked-by: Sergey Senozhatsky Cc: [4.4+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 04365b17ee67..5163c8f918cb 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1403,7 +1403,8 @@ static ssize_t hot_remove_store(struct class *class, zram = idr_find(&zram_index_idr, dev_id); if (zram) { ret = zram_remove(zram); - idr_remove(&zram_index_idr, dev_id); + if (!ret) + idr_remove(&zram_index_idr, dev_id); } else { ret = -ENODEV; } From f8ff04e2be0815b34d11a72d08473a383a3c9eb5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 30 Nov 2016 15:54:10 -0800 Subject: [PATCH 4/7] lib/debugobjects: export for use in modules Drivers, or other modules, that use a mixture of objects (especially objects embedded within other objects) would like to take advantage of the debugobjects facilities to help catch misuse. Currently, the debugobjects interface is only available to builtin drivers and requires a set of EXPORT_SYMBOL_GPL for use by modules. I am using the debugobjects in i915.ko to try and catch some invalid operations on embedded objects. The problem currently only presents itself across module unload so forcing i915 to be builtin is not an option. Link: http://lkml.kernel.org/r/20161122143039.6433-1-chris@chris-wilson.co.uk Signed-off-by: Chris Wilson Cc: "Du, Changbin" Cc: Thomas Gleixner Cc: Christian Borntraeger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/debugobjects.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index a8e12601eb37..056052dc8e91 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -362,6 +362,7 @@ void debug_object_init(void *addr, struct debug_obj_descr *descr) __debug_object_init(addr, descr, 0); } +EXPORT_SYMBOL_GPL(debug_object_init); /** * debug_object_init_on_stack - debug checks when an object on stack is @@ -376,6 +377,7 @@ void debug_object_init_on_stack(void *addr, struct debug_obj_descr *descr) __debug_object_init(addr, descr, 1); } +EXPORT_SYMBOL_GPL(debug_object_init_on_stack); /** * debug_object_activate - debug checks when an object is activated @@ -449,6 +451,7 @@ int debug_object_activate(void *addr, struct debug_obj_descr *descr) } return 0; } +EXPORT_SYMBOL_GPL(debug_object_activate); /** * debug_object_deactivate - debug checks when an object is deactivated @@ -496,6 +499,7 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr) raw_spin_unlock_irqrestore(&db->lock, flags); } +EXPORT_SYMBOL_GPL(debug_object_deactivate); /** * debug_object_destroy - debug checks when an object is destroyed @@ -542,6 +546,7 @@ void debug_object_destroy(void *addr, struct debug_obj_descr *descr) out_unlock: raw_spin_unlock_irqrestore(&db->lock, flags); } +EXPORT_SYMBOL_GPL(debug_object_destroy); /** * debug_object_free - debug checks when an object is freed @@ -582,6 +587,7 @@ void debug_object_free(void *addr, struct debug_obj_descr *descr) out_unlock: raw_spin_unlock_irqrestore(&db->lock, flags); } +EXPORT_SYMBOL_GPL(debug_object_free); /** * debug_object_assert_init - debug checks when object should be init-ed @@ -626,6 +632,7 @@ void debug_object_assert_init(void *addr, struct debug_obj_descr *descr) raw_spin_unlock_irqrestore(&db->lock, flags); } +EXPORT_SYMBOL_GPL(debug_object_assert_init); /** * debug_object_active_state - debug checks object usage state machine @@ -673,6 +680,7 @@ debug_object_active_state(void *addr, struct debug_obj_descr *descr, raw_spin_unlock_irqrestore(&db->lock, flags); } +EXPORT_SYMBOL_GPL(debug_object_active_state); #ifdef CONFIG_DEBUG_OBJECTS_FREE static void __debug_check_no_obj_freed(const void *address, unsigned long size) From 045d599a286bc01daa3510d59272440a17b23c2e Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 30 Nov 2016 15:54:13 -0800 Subject: [PATCH 5/7] kasan: update kasan_global for gcc 7 kasan_global struct is part of compiler/runtime ABI. gcc revision 241983 has added a new field to kasan_global struct. Update kernel definition of kasan_global struct to include the new field. Without this patch KASAN is broken with gcc 7. Link: http://lkml.kernel.org/r/1479219743-28682-1-git-send-email-dvyukov@google.com Signed-off-by: Dmitry Vyukov Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: [4.0+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 4 +++- mm/kasan/kasan.h | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 432f5c97e18f..928e5ca0caee 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -263,7 +263,9 @@ #endif #endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP && !__CHECKER__ */ -#if GCC_VERSION >= 50000 +#if GCC_VERSION >= 70000 +#define KASAN_ABI_VERSION 5 +#elif GCC_VERSION >= 50000 #define KASAN_ABI_VERSION 4 #elif GCC_VERSION >= 40902 #define KASAN_ABI_VERSION 3 diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index e5c2181fee6f..03f4545b103d 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -53,6 +53,9 @@ struct kasan_global { #if KASAN_ABI_VERSION >= 4 struct kasan_source_location *location; #endif +#if KASAN_ABI_VERSION >= 5 + char *odr_indicator; +#endif }; /** From 828347f8f9a558cf1af2faa46387a26564f2ac3e Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 30 Nov 2016 15:54:16 -0800 Subject: [PATCH 6/7] kasan: support use-after-scope detection Gcc revision 241896 implements use-after-scope detection. Will be available in gcc 7. Support it in KASAN. Gcc emits 2 new callbacks to poison/unpoison large stack objects when they go in/out of scope. Implement the callbacks and add a test. [dvyukov@google.com: v3] Link: http://lkml.kernel.org/r/1479998292-144502-1-git-send-email-dvyukov@google.com Link: http://lkml.kernel.org/r/1479226045-145148-1-git-send-email-dvyukov@google.com Signed-off-by: Dmitry Vyukov Acked-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: [4.0+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 29 +++++++++++++++++++++++++++++ mm/kasan/kasan.c | 19 +++++++++++++++++++ mm/kasan/kasan.h | 1 + mm/kasan/report.c | 3 +++ 4 files changed, 52 insertions(+) diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 5e51872b3fc1..fbdf87920093 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -20,6 +20,11 @@ #include #include +/* + * Note: test functions are marked noinline so that their names appear in + * reports. + */ + static noinline void __init kmalloc_oob_right(void) { char *ptr; @@ -411,6 +416,29 @@ static noinline void __init copy_user_test(void) kfree(kmem); } +static noinline void __init use_after_scope_test(void) +{ + volatile char *volatile p; + + pr_info("use-after-scope on int\n"); + { + int local = 0; + + p = (char *)&local; + } + p[0] = 1; + p[3] = 1; + + pr_info("use-after-scope on array\n"); + { + char local[1024] = {0}; + + p = local; + } + p[0] = 1; + p[1023] = 1; +} + static int __init kmalloc_tests_init(void) { kmalloc_oob_right(); @@ -436,6 +464,7 @@ static int __init kmalloc_tests_init(void) kasan_global_oob(); ksize_unpoisons_memory(); copy_user_test(); + use_after_scope_test(); return -EAGAIN; } diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 70c009741aab..0e9505f66ec1 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -764,6 +764,25 @@ EXPORT_SYMBOL(__asan_storeN_noabort); void __asan_handle_no_return(void) {} EXPORT_SYMBOL(__asan_handle_no_return); +/* Emitted by compiler to poison large objects when they go out of scope. */ +void __asan_poison_stack_memory(const void *addr, size_t size) +{ + /* + * Addr is KASAN_SHADOW_SCALE_SIZE-aligned and the object is surrounded + * by redzones, so we simply round up size to simplify logic. + */ + kasan_poison_shadow(addr, round_up(size, KASAN_SHADOW_SCALE_SIZE), + KASAN_USE_AFTER_SCOPE); +} +EXPORT_SYMBOL(__asan_poison_stack_memory); + +/* Emitted by compiler to unpoison large objects when they go into scope. */ +void __asan_unpoison_stack_memory(const void *addr, size_t size) +{ + kasan_unpoison_shadow(addr, size); +} +EXPORT_SYMBOL(__asan_unpoison_stack_memory); + #ifdef CONFIG_MEMORY_HOTPLUG static int kasan_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 03f4545b103d..1c260e6b3b3c 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -21,6 +21,7 @@ #define KASAN_STACK_MID 0xF2 #define KASAN_STACK_RIGHT 0xF3 #define KASAN_STACK_PARTIAL 0xF4 +#define KASAN_USE_AFTER_SCOPE 0xF8 /* Don't break randconfig/all*config builds */ #ifndef KASAN_ABI_VERSION diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 24c1211fe9d5..073325aedc68 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -90,6 +90,9 @@ static void print_error_description(struct kasan_access_info *info) case KASAN_KMALLOC_FREE: bug_type = "use-after-free"; break; + case KASAN_USE_AFTER_SCOPE: + bug_type = "use-after-scope"; + break; } pr_err("BUG: KASAN: %s in %pS at addr %p\n", From 5cbc198ae08d84bd416b672ad8bd1222acd0855c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 30 Nov 2016 15:54:19 -0800 Subject: [PATCH 7/7] mm: fix false-positive WARN_ON() in truncate/invalidate for hugetlb Hugetlb pages have ->index in size of the huge pages (PMD_SIZE or PUD_SIZE), not in PAGE_SIZE as other types of pages. This means we cannot user page_to_pgoff() to check whether we've got the right page for the radix-tree index. Let's introduce page_to_index() which would return radix-tree index for given page. We will be able to get rid of this once hugetlb will be switched to multi-order entries. Fixes: fc127da085c2 ("truncate: handle file thp") Link: http://lkml.kernel.org/r/20161123093053.mjbnvn5zwxw5e6lk@black.fi.intel.com Signed-off-by: Kirill A. Shutemov Reported-by: Doug Nelson Tested-by: Doug Nelson Reviewed-by: Naoya Horiguchi Cc: [4.8+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 21 +++++++++++++++------ mm/truncate.c | 8 ++++---- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index dd15d39e1985..7dbe9148b2f8 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -374,16 +374,13 @@ static inline struct page *read_mapping_page(struct address_space *mapping, } /* - * Get the offset in PAGE_SIZE. - * (TODO: hugepage should have ->index in PAGE_SIZE) + * Get index of the page with in radix-tree + * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE) */ -static inline pgoff_t page_to_pgoff(struct page *page) +static inline pgoff_t page_to_index(struct page *page) { pgoff_t pgoff; - if (unlikely(PageHeadHuge(page))) - return page->index << compound_order(page); - if (likely(!PageTransTail(page))) return page->index; @@ -396,6 +393,18 @@ static inline pgoff_t page_to_pgoff(struct page *page) return pgoff; } +/* + * Get the offset in PAGE_SIZE. + * (TODO: hugepage should have ->index in PAGE_SIZE) + */ +static inline pgoff_t page_to_pgoff(struct page *page) +{ + if (unlikely(PageHeadHuge(page))) + return page->index << compound_order(page); + + return page_to_index(page); +} + /* * Return byte-offset into filesystem object for page. */ diff --git a/mm/truncate.c b/mm/truncate.c index a01cce450a26..8d8c62d89e6d 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -283,7 +283,7 @@ void truncate_inode_pages_range(struct address_space *mapping, if (!trylock_page(page)) continue; - WARN_ON(page_to_pgoff(page) != index); + WARN_ON(page_to_index(page) != index); if (PageWriteback(page)) { unlock_page(page); continue; @@ -371,7 +371,7 @@ void truncate_inode_pages_range(struct address_space *mapping, } lock_page(page); - WARN_ON(page_to_pgoff(page) != index); + WARN_ON(page_to_index(page) != index); wait_on_page_writeback(page); truncate_inode_page(mapping, page); unlock_page(page); @@ -492,7 +492,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, if (!trylock_page(page)) continue; - WARN_ON(page_to_pgoff(page) != index); + WARN_ON(page_to_index(page) != index); /* Middle of THP: skip */ if (PageTransTail(page)) { @@ -612,7 +612,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, } lock_page(page); - WARN_ON(page_to_pgoff(page) != index); + WARN_ON(page_to_index(page) != index); if (page->mapping != mapping) { unlock_page(page); continue;