From a6e2f029ae34f41adb6ae3812c32c5d326e1abd2 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 29 Apr 2015 12:48:40 -0400 Subject: [PATCH 001/240] Make asm/word-at-a-time.h available on all architectures Added the x86 implementation of word-at-a-time to the generic version, which previously only supported big-endian. Omitted the x86-specific load_unaligned_zeropad(), which in any case is also not present for the existing BE-only implementation of a word-at-a-time, and is only used under CONFIG_DCACHE_WORD_ACCESS. Added as a "generic-y" to the Kbuilds of all architectures that didn't previously have it. Signed-off-by: Chris Metcalf --- arch/arc/include/asm/Kbuild | 1 + arch/avr32/include/asm/Kbuild | 1 + arch/blackfin/include/asm/Kbuild | 1 + arch/c6x/include/asm/Kbuild | 1 + arch/cris/include/asm/Kbuild | 1 + arch/frv/include/asm/Kbuild | 1 + arch/hexagon/include/asm/Kbuild | 1 + arch/ia64/include/asm/Kbuild | 1 + arch/m32r/include/asm/Kbuild | 1 + arch/metag/include/asm/Kbuild | 1 + arch/microblaze/include/asm/Kbuild | 1 + arch/mips/include/asm/Kbuild | 1 + arch/mn10300/include/asm/Kbuild | 1 + arch/nios2/include/asm/Kbuild | 1 + arch/powerpc/include/asm/Kbuild | 1 + arch/s390/include/asm/Kbuild | 1 + arch/score/include/asm/Kbuild | 1 + arch/tile/include/asm/Kbuild | 1 + arch/um/include/asm/Kbuild | 1 + arch/unicore32/include/asm/Kbuild | 1 + arch/xtensa/include/asm/Kbuild | 1 + include/asm-generic/word-at-a-time.h | 80 +++++++++++++++++++++++++--- 22 files changed, 93 insertions(+), 8 deletions(-) diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 1a80cc91a03b..c8f07ce1c757 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -47,4 +47,5 @@ generic-y += types.h generic-y += ucontext.h generic-y += user.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index 1d66afdfac07..519810d0d5e1 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -19,4 +19,5 @@ generic-y += sections.h generic-y += topology.h generic-y += trace_clock.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 07051a63415d..c80181e4454f 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -45,4 +45,5 @@ generic-y += types.h generic-y += ucontext.h generic-y += unaligned.h generic-y += user.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 7aeb32272975..2f697020a8fd 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -58,4 +58,5 @@ generic-y += types.h generic-y += ucontext.h generic-y += user.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index d294f6aaff1d..09916c8073ac 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -25,4 +25,5 @@ generic-y += sections.h generic-y += topology.h generic-y += trace_clock.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index 30edce31e5c2..2c987dc05af4 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -6,3 +6,4 @@ generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += trace_clock.h +generic-y += word-at-a-time.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 5ade4a163558..0988816dded0 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -57,4 +57,5 @@ generic-y += types.h generic-y += ucontext.h generic-y += unaligned.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index ccff13d33fa2..dc05773e1f11 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -7,3 +7,4 @@ generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += trace_clock.h generic-y += vtime.h +generic-y += word-at-a-time.h diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index ba1cdc018731..219e54b33685 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -8,3 +8,4 @@ generic-y += module.h generic-y += preempt.h generic-y += sections.h generic-y += trace_clock.h +generic-y += word-at-a-time.h diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index 199320f3c345..611c0df2be39 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -53,4 +53,5 @@ generic-y += ucontext.h generic-y += unaligned.h generic-y += user.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 9989ddb169ca..cefeabae24cc 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -9,3 +9,4 @@ generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += syscalls.h generic-y += trace_clock.h +generic-y += word-at-a-time.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 7fe5c61a3cb8..dee810fa973e 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -17,4 +17,5 @@ generic-y += serial.h generic-y += trace_clock.h generic-y += ucontext.h generic-y += user.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index de30b0c88796..27cbc0267b9c 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -8,3 +8,4 @@ generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += sections.h generic-y += trace_clock.h +generic-y += word-at-a-time.h diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index 434639d510b3..e22478929719 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild @@ -60,4 +60,5 @@ generic-y += types.h generic-y += unaligned.h generic-y += user.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 050712e1ce41..d9c0b44191b0 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -8,3 +8,4 @@ generic-y += preempt.h generic-y += rwsem.h generic-y += trace_clock.h generic-y += vtime.h +generic-y += word-at-a-time.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index dc5385ebb071..9e8089b44f46 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -5,3 +5,4 @@ generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += trace_clock.h +generic-y += word-at-a-time.h diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index 138fb3db45ba..ff19975beb33 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -12,3 +12,4 @@ generic-y += sections.h generic-y += trace_clock.h generic-y += xor.h generic-y += serial.h +generic-y += word-at-a-time.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index d53654488c2c..ea59330b4d90 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -38,4 +38,5 @@ generic-y += termbits.h generic-y += termios.h generic-y += trace_clock.h generic-y += types.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 3d63ff6f583f..33c1d3e0caad 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -24,4 +24,5 @@ generic-y += preempt.h generic-y += switch_to.h generic-y += topology.h generic-y += trace_clock.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index d12b377b5a8b..932070cd754a 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -61,4 +61,5 @@ generic-y += ucontext.h generic-y += unaligned.h generic-y += user.h generic-y += vga.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 14d15bf1a95b..85acffd26217 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -28,4 +28,5 @@ generic-y += statfs.h generic-y += termios.h generic-y += topology.h generic-y += trace_clock.h +generic-y += word-at-a-time.h generic-y += xor.h diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h index 94f9ea8abcae..011dde083f23 100644 --- a/include/asm-generic/word-at-a-time.h +++ b/include/asm-generic/word-at-a-time.h @@ -1,15 +1,10 @@ #ifndef _ASM_WORD_AT_A_TIME_H #define _ASM_WORD_AT_A_TIME_H -/* - * This says "generic", but it's actually big-endian only. - * Little-endian can use more efficient versions of these - * interfaces, see for example - * arch/x86/include/asm/word-at-a-time.h - * for those. - */ - #include +#include + +#ifdef __BIG_ENDIAN struct word_at_a_time { const unsigned long high_bits, low_bits; @@ -53,4 +48,73 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct #define zero_bytemask(mask) (~1ul << __fls(mask)) #endif +#else + +/* + * The optimal byte mask counting is probably going to be something + * that is architecture-specific. If you have a reliably fast + * bit count instruction, that might be better than the multiply + * and shift, for example. + */ +struct word_at_a_time { + const unsigned long one_bits, high_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } + +#ifdef CONFIG_64BIT + +/* + * Jan Achrenius on G+: microoptimized version of + * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" + * that works for the bytemasks without having to + * mask them first. + */ +static inline long count_masked_bytes(unsigned long mask) +{ + return mask*0x0001020304050608ul >> 56; +} + +#else /* 32-bit case */ + +/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ +static inline long count_masked_bytes(long mask) +{ + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + long a = (0x0ff0001+mask) >> 23; + /* Fix the 1 for 00 case */ + return a & mask; +} + +#endif + +/* Return nonzero if it has a zero */ +static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) +{ + unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits; + *bits = mask; + return mask; +} + +static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c) +{ + return bits; +} + +static inline unsigned long create_zero_mask(unsigned long bits) +{ + bits = (bits - 1) & ~bits; + return bits >> 7; +} + +/* The mask we created is directly usable as a bytemask */ +#define zero_bytemask(mask) (mask) + +static inline unsigned long find_zero(unsigned long mask) +{ + return count_masked_bytes(mask); +} + +#endif /* __BIG_ENDIAN */ + #endif /* _ASM_WORD_AT_A_TIME_H */ From 8f6429c7cb59f28433253575cc8e3262eed63592 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 16 Jul 2015 19:40:12 +0100 Subject: [PATCH 002/240] iommu/iova: Avoid over-allocating when size-aligned Currently, allocating a size-aligned IOVA region quietly adjusts the actual allocation size in the process, returning a rounded-up power-of-two-sized allocation. This results in mismatched behaviour in the IOMMU driver if the original size was not a power of two, where the original size is mapped, but the rounded-up IOVA size is unmapped. Whilst some IOMMUs will happily unmap already-unmapped pages, others consider this an error, so fix it by computing the necessary alignment padding without altering the actual allocation size. Also clean up by making pad_size unsigned, since its callers always pass unsigned values and negative padding makes little sense here anyway. Signed-off-by: Robin Murphy Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 2 ++ drivers/iommu/iova.c | 23 ++++++----------------- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index a98a7b27aca1..92101597cede 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3233,6 +3233,8 @@ static struct iova *intel_alloc_iova(struct device *dev, /* Restrict dma_mask to the width that the iommu can handle */ dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask); + /* Ensure we reserve the whole size-aligned region */ + nrpages = __roundup_pow_of_two(nrpages); if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) { /* diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index b7c3d923f3e1..29f2efcf668e 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -120,19 +120,14 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) } } -/* Computes the padding size required, to make the - * the start address naturally aligned on its size +/* + * Computes the padding size required, to make the start address + * naturally aligned on the power-of-two order of its size */ -static int -iova_get_pad_size(int size, unsigned int limit_pfn) +static unsigned int +iova_get_pad_size(unsigned int size, unsigned int limit_pfn) { - unsigned int pad_size = 0; - unsigned int order = ilog2(size); - - if (order) - pad_size = (limit_pfn + 1) % (1 << order); - - return pad_size; + return (limit_pfn + 1 - size) & (__roundup_pow_of_two(size) - 1); } static int __alloc_and_insert_iova_range(struct iova_domain *iovad, @@ -265,12 +260,6 @@ alloc_iova(struct iova_domain *iovad, unsigned long size, if (!new_iova) return NULL; - /* If size aligned is set then round the size to - * to next power of two. - */ - if (size_aligned) - size = __roundup_pow_of_two(size); - ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn, new_iova, size_aligned); From ae1ff3d623905947158fd3394854c23026337810 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 13 Jul 2015 14:31:28 +0300 Subject: [PATCH 003/240] iommu: iova: Move iova cache management to the iova library This is necessary to separate intel-iommu from the iova library. Signed-off-by: Sakari Ailus Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 6 +-- drivers/iommu/iova.c | 83 ++++++++++++++++++++++--------------- include/linux/iova.h | 4 +- 3 files changed, 54 insertions(+), 39 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 92101597cede..2d5cf39e1053 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3743,7 +3743,7 @@ static inline int iommu_devinfo_cache_init(void) static int __init iommu_init_mempool(void) { int ret; - ret = iommu_iova_cache_init(); + ret = iova_cache_get(); if (ret) return ret; @@ -3757,7 +3757,7 @@ static int __init iommu_init_mempool(void) kmem_cache_destroy(iommu_domain_cache); domain_error: - iommu_iova_cache_destroy(); + iova_cache_put(); return -ENOMEM; } @@ -3766,7 +3766,7 @@ static void __init iommu_exit_mempool(void) { kmem_cache_destroy(iommu_devinfo_cache); kmem_cache_destroy(iommu_domain_cache); - iommu_iova_cache_destroy(); + iova_cache_put(); } static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 29f2efcf668e..ed95f7a0fad3 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -20,40 +20,6 @@ #include #include -static struct kmem_cache *iommu_iova_cache; - -int iommu_iova_cache_init(void) -{ - int ret = 0; - - iommu_iova_cache = kmem_cache_create("iommu_iova", - sizeof(struct iova), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!iommu_iova_cache) { - pr_err("Couldn't create iova cache\n"); - ret = -ENOMEM; - } - - return ret; -} - -void iommu_iova_cache_destroy(void) -{ - kmem_cache_destroy(iommu_iova_cache); -} - -struct iova *alloc_iova_mem(void) -{ - return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC); -} - -void free_iova_mem(struct iova *iova) -{ - kmem_cache_free(iommu_iova_cache, iova); -} - void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn, unsigned long pfn_32bit) @@ -237,6 +203,55 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova) rb_insert_color(&iova->node, root); } +static struct kmem_cache *iova_cache; +static unsigned int iova_cache_users; +static DEFINE_MUTEX(iova_cache_mutex); + +struct iova *alloc_iova_mem(void) +{ + return kmem_cache_alloc(iova_cache, GFP_ATOMIC); +} +EXPORT_SYMBOL(alloc_iova_mem); + +void free_iova_mem(struct iova *iova) +{ + kmem_cache_free(iova_cache, iova); +} +EXPORT_SYMBOL(free_iova_mem); + +int iova_cache_get(void) +{ + mutex_lock(&iova_cache_mutex); + if (!iova_cache_users) { + iova_cache = kmem_cache_create( + "iommu_iova", sizeof(struct iova), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!iova_cache) { + mutex_unlock(&iova_cache_mutex); + printk(KERN_ERR "Couldn't create iova cache\n"); + return -ENOMEM; + } + } + + iova_cache_users++; + mutex_unlock(&iova_cache_mutex); + + return 0; +} + +void iova_cache_put(void) +{ + mutex_lock(&iova_cache_mutex); + if (WARN_ON(!iova_cache_users)) { + mutex_unlock(&iova_cache_mutex); + return; + } + iova_cache_users--; + if (!iova_cache_users) + kmem_cache_destroy(iova_cache); + mutex_unlock(&iova_cache_mutex); +} + /** * alloc_iova - allocates an iova * @iovad: - iova domain in question diff --git a/include/linux/iova.h b/include/linux/iova.h index 3920a19d8194..92f7177db2ce 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -68,8 +68,8 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) return iova >> iova_shift(iovad); } -int iommu_iova_cache_init(void); -void iommu_iova_cache_destroy(void); +int iova_cache_get(void); +void iova_cache_put(void); struct iova *alloc_iova_mem(void); void free_iova_mem(struct iova *iova); From 9b41760b03816b34f4c9eee2cbb8fda8439920fc Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 13 Jul 2015 14:31:29 +0300 Subject: [PATCH 004/240] iommu: iova: Export symbols Use EXPORT_SYMBOL_GPL() to export the iova library symbols. The symbols include: init_iova_domain(); iova_cache_get(); iova_cache_put(); iova_cache_init(); alloc_iova(); find_iova(); __free_iova(); free_iova(); put_iova_domain(); reserve_iova(); copy_reserved_iova(); Signed-off-by: Sakari Ailus Signed-off-by: David Woodhouse --- drivers/iommu/iova.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index ed95f7a0fad3..cbd74c79b212 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -38,6 +38,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, iovad->start_pfn = start_pfn; iovad->dma_32bit_pfn = pfn_32bit; } +EXPORT_SYMBOL_GPL(init_iova_domain); static struct rb_node * __get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) @@ -238,6 +239,7 @@ int iova_cache_get(void) return 0; } +EXPORT_SYMBOL_GPL(iova_cache_get); void iova_cache_put(void) { @@ -251,6 +253,7 @@ void iova_cache_put(void) kmem_cache_destroy(iova_cache); mutex_unlock(&iova_cache_mutex); } +EXPORT_SYMBOL_GPL(iova_cache_put); /** * alloc_iova - allocates an iova @@ -285,6 +288,7 @@ alloc_iova(struct iova_domain *iovad, unsigned long size, return new_iova; } +EXPORT_SYMBOL_GPL(alloc_iova); /** * find_iova - find's an iova for a given pfn @@ -325,6 +329,7 @@ struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); return NULL; } +EXPORT_SYMBOL_GPL(find_iova); /** * __free_iova - frees the given iova @@ -343,6 +348,7 @@ __free_iova(struct iova_domain *iovad, struct iova *iova) spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); free_iova_mem(iova); } +EXPORT_SYMBOL_GPL(__free_iova); /** * free_iova - finds and frees the iova for a given pfn @@ -360,6 +366,7 @@ free_iova(struct iova_domain *iovad, unsigned long pfn) __free_iova(iovad, iova); } +EXPORT_SYMBOL_GPL(free_iova); /** * put_iova_domain - destroys the iova doamin @@ -382,6 +389,7 @@ void put_iova_domain(struct iova_domain *iovad) } spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); } +EXPORT_SYMBOL_GPL(put_iova_domain); static int __is_range_overlap(struct rb_node *node, @@ -471,6 +479,7 @@ reserve_iova(struct iova_domain *iovad, spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); return iova; } +EXPORT_SYMBOL_GPL(reserve_iova); /** * copy_reserved_iova - copies the reserved between domains @@ -497,6 +506,7 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) } spin_unlock_irqrestore(&from->iova_rbtree_lock, flags); } +EXPORT_SYMBOL_GPL(copy_reserved_iova); struct iova * split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, From 15bbdec3931e617231c12b0920e497e87ec8c2c6 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 13 Jul 2015 14:31:30 +0300 Subject: [PATCH 005/240] iommu: Make the iova library a module The iova library has use outside the intel-iommu driver, thus make it a module. Signed-off-by: Sakari Ailus Signed-off-by: David Woodhouse --- drivers/iommu/Kconfig | 2 +- drivers/iommu/iova.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index f1fb1d3ccc56..0a141cd44ddd 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -42,7 +42,7 @@ config IOMMU_IO_PGTABLE_LPAE_SELFTEST endmenu config IOMMU_IOVA - bool + tristate config OF_IOMMU def_bool y diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index cbd74c79b212..fa0adef32bd6 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -18,6 +18,7 @@ */ #include +#include #include void @@ -548,3 +549,6 @@ split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, free_iova_mem(prev); return NULL; } + +MODULE_AUTHOR("Anil S Keshavamurthy "); +MODULE_LICENSE("GPL"); From 135919a3a80565070b9645009e65f73e72c661c0 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 9 Sep 2015 13:35:05 -0400 Subject: [PATCH 006/240] intel_idle: Skylake Client Support - updated Addition of PC9 state, and minor tweaks to existing PC6 and PC8 states. Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 3a3738fe016b..cd4510a63375 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -620,7 +620,7 @@ static struct cpuidle_state skl_cstates[] = { .name = "C6-SKL", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, - .exit_latency = 75, + .exit_latency = 85, .target_residency = 200, .enter = &intel_idle, .enter_freeze = intel_idle_freeze, }, @@ -636,10 +636,18 @@ static struct cpuidle_state skl_cstates[] = { .name = "C8-SKL", .desc = "MWAIT 0x40", .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, - .exit_latency = 174, + .exit_latency = 200, .target_residency = 800, .enter = &intel_idle, .enter_freeze = intel_idle_freeze, }, + { + .name = "C9-SKL", + .desc = "MWAIT 0x50", + .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 480, + .target_residency = 5000, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, { .name = "C10-SKL", .desc = "MWAIT 0x60", From 30035e45753b708e7d47a98398500ca005e02b86 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 29 Apr 2015 12:52:04 -0400 Subject: [PATCH 007/240] string: provide strscpy() The strscpy() API is intended to be used instead of strlcpy(), and instead of most uses of strncpy(). - Unlike strlcpy(), it doesn't read from memory beyond (src + size). - Unlike strlcpy() or strncpy(), the API provides an easy way to check for destination buffer overflow: an -E2BIG error return value. - The provided implementation is robust in the face of the source buffer being asynchronously changed during the copy, unlike the current implementation of strlcpy(). - Unlike strncpy(), the destination buffer will be NUL-terminated if the string in the source buffer is too long. - Also unlike strncpy(), the destination buffer will not be updated beyond the NUL termination, avoiding strncpy's behavior of zeroing the entire tail end of the destination buffer. (A memset() after the strscpy() can be used if this behavior is desired.) - The implementation should be reasonably performant on all platforms since it uses the asm/word-at-a-time.h API rather than simple byte copy. Kernel-to-kernel string copy is not considered to be performance critical in any case. Signed-off-by: Chris Metcalf --- include/linux/string.h | 3 ++ lib/string.c | 88 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/include/linux/string.h b/include/linux/string.h index a8d90db9c4b0..9ef7795e65e4 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -25,6 +25,9 @@ extern char * strncpy(char *,const char *, __kernel_size_t); #ifndef __HAVE_ARCH_STRLCPY size_t strlcpy(char *, const char *, size_t); #endif +#ifndef __HAVE_ARCH_STRSCPY +ssize_t __must_check strscpy(char *, const char *, size_t); +#endif #ifndef __HAVE_ARCH_STRCAT extern char * strcat(char *, const char *); #endif diff --git a/lib/string.c b/lib/string.c index 13d1e84ddb80..8dbb7b1eab50 100644 --- a/lib/string.c +++ b/lib/string.c @@ -27,6 +27,10 @@ #include #include +#include +#include +#include + #ifndef __HAVE_ARCH_STRNCASECMP /** * strncasecmp - Case insensitive, length-limited string comparison @@ -146,6 +150,90 @@ size_t strlcpy(char *dest, const char *src, size_t size) EXPORT_SYMBOL(strlcpy); #endif +#ifndef __HAVE_ARCH_STRSCPY +/** + * strscpy - Copy a C-string into a sized buffer + * @dest: Where to copy the string to + * @src: Where to copy the string from + * @count: Size of destination buffer + * + * Copy the string, or as much of it as fits, into the dest buffer. + * The routine returns the number of characters copied (not including + * the trailing NUL) or -E2BIG if the destination buffer wasn't big enough. + * The behavior is undefined if the string buffers overlap. + * The destination buffer is always NUL terminated, unless it's zero-sized. + * + * Preferred to strlcpy() since the API doesn't require reading memory + * from the src string beyond the specified "count" bytes, and since + * the return value is easier to error-check than strlcpy()'s. + * In addition, the implementation is robust to the string changing out + * from underneath it, unlike the current strlcpy() implementation. + * + * Preferred to strncpy() since it always returns a valid string, and + * doesn't unnecessarily force the tail of the destination buffer to be + * zeroed. If the zeroing is desired, it's likely cleaner to use strscpy() + * with an overflow test, then just memset() the tail of the dest buffer. + */ +ssize_t strscpy(char *dest, const char *src, size_t count) +{ + const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; + size_t max = count; + long res = 0; + + if (count == 0) + return -E2BIG; + +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + /* + * If src is unaligned, don't cross a page boundary, + * since we don't know if the next page is mapped. + */ + if ((long)src & (sizeof(long) - 1)) { + size_t limit = PAGE_SIZE - ((long)src & (PAGE_SIZE - 1)); + if (limit < max) + max = limit; + } +#else + /* If src or dest is unaligned, don't do word-at-a-time. */ + if (((long) dest | (long) src) & (sizeof(long) - 1)) + max = 0; +#endif + + while (max >= sizeof(unsigned long)) { + unsigned long c, data; + + c = *(unsigned long *)(src+res); + *(unsigned long *)(dest+res) = c; + if (has_zero(c, &data, &constants)) { + data = prep_zero_mask(c, data, &constants); + data = create_zero_mask(data); + return res + find_zero(data); + } + res += sizeof(unsigned long); + count -= sizeof(unsigned long); + max -= sizeof(unsigned long); + } + + while (count) { + char c; + + c = src[res]; + dest[res] = c; + if (!c) + return res; + res++; + count--; + } + + /* Hit buffer length without finding a NUL; force NUL-termination. */ + if (res) + dest[res-1] = '\0'; + + return -E2BIG; +} +EXPORT_SYMBOL(strscpy); +#endif + #ifndef __HAVE_ARCH_STRCAT /** * strcat - Append one %NUL-terminated string to another From 30059d494a72603d066baf55c748803df968aa08 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 29 Apr 2015 13:07:38 -0400 Subject: [PATCH 008/240] tile: use global strscpy() rather than private copy Now that strscpy() is a standard API, remove the local copy. Signed-off-by: Chris Metcalf --- arch/tile/gxio/mpipe.c | 33 ++++----------------------------- 1 file changed, 4 insertions(+), 29 deletions(-) diff --git a/arch/tile/gxio/mpipe.c b/arch/tile/gxio/mpipe.c index ee186e13dfe6..f102048d9c0e 100644 --- a/arch/tile/gxio/mpipe.c +++ b/arch/tile/gxio/mpipe.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -29,32 +30,6 @@ /* HACK: Avoid pointless "shadow" warnings. */ #define link link_shadow -/** - * strscpy - Copy a C-string into a sized buffer, but only if it fits - * @dest: Where to copy the string to - * @src: Where to copy the string from - * @size: size of destination buffer - * - * Use this routine to avoid copying too-long strings. - * The routine returns the total number of bytes copied - * (including the trailing NUL) or zero if the buffer wasn't - * big enough. To ensure that programmers pay attention - * to the return code, the destination has a single NUL - * written at the front (if size is non-zero) when the - * buffer is not big enough. - */ -static size_t strscpy(char *dest, const char *src, size_t size) -{ - size_t len = strnlen(src, size) + 1; - if (len > size) { - if (size) - dest[0] = '\0'; - return 0; - } - memcpy(dest, src, len); - return len; -} - int gxio_mpipe_init(gxio_mpipe_context_t *context, unsigned int mpipe_index) { char file[32]; @@ -540,7 +515,7 @@ int gxio_mpipe_link_instance(const char *link_name) if (!context) return GXIO_ERR_NO_DEVICE; - if (strscpy(name.name, link_name, sizeof(name.name)) == 0) + if (strscpy(name.name, link_name, sizeof(name.name)) < 0) return GXIO_ERR_NO_DEVICE; return gxio_mpipe_info_instance_aux(context, name); @@ -559,7 +534,7 @@ int gxio_mpipe_link_enumerate_mac(int idx, char *link_name, uint8_t *link_mac) rv = gxio_mpipe_info_enumerate_aux(context, idx, &name, &mac); if (rv >= 0) { - if (strscpy(link_name, name.name, sizeof(name.name)) == 0) + if (strscpy(link_name, name.name, sizeof(name.name)) < 0) return GXIO_ERR_INVAL_MEMORY_SIZE; memcpy(link_mac, mac.mac, sizeof(mac.mac)); } @@ -576,7 +551,7 @@ int gxio_mpipe_link_open(gxio_mpipe_link_t *link, _gxio_mpipe_link_name_t name; int rv; - if (strscpy(name.name, link_name, sizeof(name.name)) == 0) + if (strscpy(name.name, link_name, sizeof(name.name)) < 0) return GXIO_ERR_NO_DEVICE; rv = gxio_mpipe_link_open_aux(context, name, flags); From 02d8dabc50f94353075f2f62b1047c1306e8bf92 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 3 Sep 2015 15:23:40 +0200 Subject: [PATCH 009/240] perf stat: Fix per-pkg event reporting bug Per-pkg events need to be captured once per processor socket. The code in check_per_pkg() ensures only one value per processor package is used. However there is a problem with this function in case the first CPU of the package does not measure anything for the per-pkg event, but other CPUs do. Consider the following: $ create cgroup FOO; echo $$ >FOO/tasks; taskset -c 1 noploop & $ perf stat -a -I 1000 -e intel_cqm/llc_occupancy/ -G FOO sleep 100 1.00000 Bytes intel_cqm/llc_occupancy/ FOO The reason for this is that CPU0 in the cgroup has nothing running on it. Yet check_per_plg() will mark socket0 as processed and no other event value will be considered for the socket. This patch fixes the problem by having check_per_pkg() only consider events which actually ran. Signed-off-by: Stephane Eranian Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1441286620-10117-1-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 415c359de465..2d065d065b67 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -196,7 +196,8 @@ static void zero_per_pkg(struct perf_evsel *counter) memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); } -static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) +static int check_per_pkg(struct perf_evsel *counter, + struct perf_counts_values *vals, int cpu, bool *skip) { unsigned long *mask = counter->per_pkg_mask; struct cpu_map *cpus = perf_evsel__cpus(counter); @@ -218,6 +219,17 @@ static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) counter->per_pkg_mask = mask; } + /* + * we do not consider an event that has not run as a good + * instance to mark a package as used (skip=1). Otherwise + * we may run into a situation where the first CPU in a package + * is not running anything, yet the second is, and this function + * would mark the package as used after the first CPU and would + * not read the values from the second CPU. + */ + if (!(vals->run && vals->ena)) + return 0; + s = cpu_map__get_socket(cpus, cpu); if (s < 0) return -1; @@ -235,7 +247,7 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel static struct perf_counts_values zero; bool skip = false; - if (check_per_pkg(evsel, cpu, &skip)) { + if (check_per_pkg(evsel, count, cpu, &skip)) { pr_err("failed to read per-pkg counter\n"); return -1; } From 179f36dde3cec0f9f05a757b68f6a58e4edbcc95 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Sep 2015 11:30:20 -0300 Subject: [PATCH 010/240] Revert "perf symbols: Fix mismatched declarations for elf_getphdrnum" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit f785f2357673d520a0b7b468973cdd197f336494. We have a test to check if elf_getphdrnum() is present, so, if it fails, we'll get: [acme@rhel5 linux]$ cat /tmp/build/perf/feature/test-libelf-getphdrnum.make.output cc1: warnings being treated as errors test-libelf-getphdrnum.c: In function ‘main’: test-libelf-getphdrnum.c:7: warning: implicit declaration of function ‘elf_getphdrnum’ [acme@rhel5 linux]$ And this block will not be compiled: #ifndef HAVE_ELF_GETPHDRNUM_SUPPORT static int elf_getphdrnum(Elf *elf, size_t *dst) ... #endif So, if elf_getphdrnum() is being defined somewhere, there is a problem with the test that is not detecting that function, go fix it. Reported-by: Vinson Lee Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: "Naveen N. Rao" Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Stephane Eranian Cc: Victor Kamensky Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-qn459fal6acvcvm50i8zxx9k@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 53bb5f59ec58..f78ea3dc4c08 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -38,7 +38,7 @@ static inline char *bfd_demangle(void __maybe_unused *v, #endif #ifndef HAVE_ELF_GETPHDRNUM_SUPPORT -int elf_getphdrnum(Elf *elf, size_t *dst) +static int elf_getphdrnum(Elf *elf, size_t *dst) { GElf_Ehdr gehdr; GElf_Ehdr *ehdr; From f8ac8606fd3cd72183de8eec2b151ff05040c70f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Sep 2015 12:20:28 -0300 Subject: [PATCH 011/240] tools build: Add test for presence of numa_num_possible_cpus() in libnuma The existing numa test checks only if numa.h and numa_available() are present, but that can be satisfied with an old libnuma that is not enough for the 'perf bench numa' entry, so add a test to check for that: [acme@rhel5 linux]$ make NO_AUXTRACE=1 NO_LIBPERL=1 -C tools/perf O=/tmp/build/perf install-bin make: Entering directory `/home/acme/git/linux/tools/perf' BUILD: Doing 'make -j2' parallel build Auto-detecting system features: ... libelf: [ on ] ... libnuma: [ on ] ... numa_num_possible_cpus: [ OFF ] ... libperl: [ on ] config/Makefile:577: Old numa library found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev >= 2.0.8 INSTALL binaries This fixes the build on old systems such as RHEL/CentOS 5.11. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: "Naveen N. Rao" Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Stephane Eranian Cc: Victor Kamensky Cc: Vinson Lee Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-zqriqkezppi2de2iyjin1tnc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 2 ++ tools/build/feature/Makefile | 4 ++++ tools/build/feature/test-all.c | 5 +++++ tools/build/feature/test-numa_num_possible_cpus.c | 6 ++++++ tools/perf/config/Makefile | 11 ++++++++--- 5 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 tools/build/feature/test-numa_num_possible_cpus.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 2975632d51e2..970242098a7c 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -41,6 +41,7 @@ FEATURE_TESTS ?= \ libelf-getphdrnum \ libelf-mmap \ libnuma \ + numa_num_possible_cpus \ libperl \ libpython \ libpython-version \ @@ -61,6 +62,7 @@ FEATURE_DISPLAY ?= \ libbfd \ libelf \ libnuma \ + numa_num_possible_cpus \ libperl \ libpython \ libslang \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 74ca42093d70..e13a42bd0274 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -19,6 +19,7 @@ FILES= \ test-libelf-getphdrnum.bin \ test-libelf-mmap.bin \ test-libnuma.bin \ + test-numa_num_possible_cpus.bin \ test-libperl.bin \ test-libpython.bin \ test-libpython-version.bin \ @@ -87,6 +88,9 @@ test-libelf-getphdrnum.bin: test-libnuma.bin: $(BUILD) -lnuma +test-numa_num_possible_cpus.bin: + $(BUILD) -lnuma + test-libunwind.bin: $(BUILD) -lelf diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 84689a67814a..7a8cdbad3e1b 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -77,6 +77,10 @@ # include "test-libnuma.c" #undef main +#define main main_test_numa_num_possible_cpus +# include "test-numa_num_possible_cpus.c" +#undef main + #define main main_test_timerfd # include "test-timerfd.c" #undef main @@ -136,6 +140,7 @@ int main(int argc, char *argv[]) main_test_libbfd(); main_test_backtrace(); main_test_libnuma(); + main_test_numa_num_possible_cpus(); main_test_timerfd(); main_test_stackprotector_all(); main_test_libdw_dwarf_unwind(); diff --git a/tools/build/feature/test-numa_num_possible_cpus.c b/tools/build/feature/test-numa_num_possible_cpus.c new file mode 100644 index 000000000000..2606e94b0659 --- /dev/null +++ b/tools/build/feature/test-numa_num_possible_cpus.c @@ -0,0 +1,6 @@ +#include + +int main(void) +{ + return numa_num_possible_cpus(); +} diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 827557fc7511..053e65b04dc3 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -573,9 +573,14 @@ ifndef NO_LIBNUMA msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev); NO_LIBNUMA := 1 else - CFLAGS += -DHAVE_LIBNUMA_SUPPORT - EXTLIBS += -lnuma - $(call detected,CONFIG_NUMA) + ifeq ($(feature-numa_num_possible_cpus), 0) + msg := $(warning Old numa library found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev >= 2.0.8); + NO_LIBNUMA := 1 + else + CFLAGS += -DHAVE_LIBNUMA_SUPPORT + EXTLIBS += -lnuma + $(call detected,CONFIG_NUMA) + endif endif endif From b0063dbfb031a7c728ed0d9533257e8329292cf1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Sep 2015 12:54:30 -0300 Subject: [PATCH 012/240] tools build: Add test for presence of __get_cpuid() gcc builtin The auxtrace code needed by Intel PT uses the __get_cpuid() gcc builtin, that is not present in old systems, breaking the build. Add a test to check for that builtin and disable AUXTRACE in those systems. [acme@rhel5 linux]$ make NO_LIBPERL=1 -C tools/perf O=/tmp/build/perf install-bin make: Entering directory `/home/acme/git/linux/tools/perf' BUILD: Doing 'make -j2' parallel build Auto-detecting system features: ... lzma: [ on ] ... get_cpuid: [ OFF ] config/Makefile:630: Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc MKDIR /tmp/build/perf/util/ This fixes the build on old systems such as RHEL/CentOS 5.11. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: "Naveen N. Rao" Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Stephane Eranian Cc: Victor Kamensky Cc: Vinson Lee Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-d4puslul0jltoodzpx9r4sje@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 6 ++++-- tools/build/feature/Makefile | 6 +++++- tools/build/feature/test-all.c | 5 +++++ tools/build/feature/test-get_cpuid.c | 7 +++++++ tools/perf/config/Makefile | 9 +++++++-- 5 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 tools/build/feature/test-get_cpuid.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 970242098a7c..c8fe6d177119 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -52,7 +52,8 @@ FEATURE_TESTS ?= \ timerfd \ libdw-dwarf-unwind \ zlib \ - lzma + lzma \ + get_cpuid FEATURE_DISPLAY ?= \ dwarf \ @@ -69,7 +70,8 @@ FEATURE_DISPLAY ?= \ libunwind \ libdw-dwarf-unwind \ zlib \ - lzma + lzma \ + get_cpuid # Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features. # If in the future we need per-feature checks/flags for features not diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index e13a42bd0274..e43a2971bf56 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -35,7 +35,8 @@ FILES= \ test-compile-x32.bin \ test-zlib.bin \ test-lzma.bin \ - test-bpf.bin + test-bpf.bin \ + test-get_cpuid.bin CC := $(CROSS_COMPILE)gcc -MD PKG_CONFIG := $(CROSS_COMPILE)pkg-config @@ -166,6 +167,9 @@ test-zlib.bin: test-lzma.bin: $(BUILD) -llzma +test-get_cpuid.bin: + $(BUILD) + test-bpf.bin: $(BUILD) diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 7a8cdbad3e1b..33cf6f20bd4e 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -121,6 +121,10 @@ # include "test-lzma.c" #undef main +#define main main_test_get_cpuid +# include "test-get_cpuid.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -148,6 +152,7 @@ int main(int argc, char *argv[]) main_test_zlib(); main_test_pthread_attr_setaffinity_np(); main_test_lzma(); + main_test_get_cpuid(); return 0; } diff --git a/tools/build/feature/test-get_cpuid.c b/tools/build/feature/test-get_cpuid.c new file mode 100644 index 000000000000..d7a2c407130d --- /dev/null +++ b/tools/build/feature/test-get_cpuid.c @@ -0,0 +1,7 @@ +#include + +int main(void) +{ + unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; + return __get_cpuid(0x15, &eax, &ebx, &ecx, &edx); +} diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 053e65b04dc3..38a08539f4bf 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -626,8 +626,13 @@ ifdef LIBBABELTRACE endif ifndef NO_AUXTRACE - $(call detected,CONFIG_AUXTRACE) - CFLAGS += -DHAVE_AUXTRACE_SUPPORT + ifeq ($(feature-get_cpuid), 0) + msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc); + NO_AUXTRACE := 1 + else + $(call detected,CONFIG_AUXTRACE) + CFLAGS += -DHAVE_AUXTRACE_SUPPORT + endif endif # Among the variables below, these: From bf6445631c6f00882b25516a174d5073ce0c6f81 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Thu, 17 Sep 2015 12:08:53 +0200 Subject: [PATCH 013/240] perf tools: Bool functions shouldn't return -1 Returning a negative value for a boolean function seem to have the undesired effect of returning true. Replace -1 by false in a bool-returning function. The diff of the .s file before and after the change (for x86_64): 3907c3907 < movl $1, %ebx --- > xorl %ebx, %ebx while if -1 is replaced by true, the diff is empty. This issue was found by the following Coccinelle semantic patch: @@ identifier f; constant C; typedef bool; @@ bool f (...){ <+... * return -C; ...+> } Signed-off-by: Peter Senna Tschudin Cc: Jiri Olsa Cc: Kan Liang Cc: Matt Fleming Cc: Milos Vyletel Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1442484533-19742-1-git-send-email-peter.senna@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 7acafb3c5592..c2cd9bf2348b 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -709,7 +709,7 @@ bool find_process(const char *name) dir = opendir(procfs__mountpoint()); if (!dir) - return -1; + return false; /* Walk through the directory. */ while (ret && (d = readdir(dir)) != NULL) { From de9b8f5dcbd94bfb1d249907a635f1fb1968e19c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 13 Aug 2015 23:09:29 +0200 Subject: [PATCH 014/240] sched: Fix crash trying to dequeue/enqueue the idle thread Sasha reports that his virtual machine tries to schedule the idle thread since commit 6c37067e2786 ("sched: Change the sched_class::set_cpus_allowed() calling context"). Hit trace shows this happening from idle_thread_get()->init_idle(), which is the _second_ init_idle() invocation on that task_struct, the first being done through idle_init()->fork_idle(). (this code is insane...) Because we call init_idle() twice in a row, its ->sched_class == &idle_sched_class and ->on_rq = TASK_ON_RQ_QUEUED. This means do_set_cpus_allowed() think we're queued and will call dequeue_task(), which is implemented with BUG() for the idle class, seeing how dequeueing the idle task is a daft thing. Aside of the whole insanity of calling init_idle() _twice_, change the code to call set_cpus_allowed_common() instead as this is 'obviously' before the idle task gets ran etc.. Reported-by: Sasha Levin Tested-by: Sasha Levin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 6c37067e2786 ("sched: Change the sched_class::set_cpus_allowed() calling context") Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 97d276ff1edb..f0d043ec0182 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4927,7 +4927,15 @@ void init_idle(struct task_struct *idle, int cpu) idle->state = TASK_RUNNING; idle->se.exec_start = sched_clock(); - do_set_cpus_allowed(idle, cpumask_of(cpu)); +#ifdef CONFIG_SMP + /* + * Its possible that init_idle() gets called multiple times on a task, + * in that case do_set_cpus_allowed() will not do the right thing. + * + * And since this is boot we can forgo the serialization. + */ + set_cpus_allowed_common(idle, cpumask_of(cpu)); +#endif /* * We're having a chicken and egg problem, even though we are * holding rq->lock, the cpu isn't yet set to this cpu so the @@ -4944,7 +4952,7 @@ void init_idle(struct task_struct *idle, int cpu) rq->curr = rq->idle = idle; idle->on_rq = TASK_ON_RQ_QUEUED; -#if defined(CONFIG_SMP) +#ifdef CONFIG_SMP idle->on_cpu = 1; #endif raw_spin_unlock(&rq->lock); @@ -4959,7 +4967,7 @@ void init_idle(struct task_struct *idle, int cpu) idle->sched_class = &idle_sched_class; ftrace_graph_init_idle_task(idle, cpu); vtime_init_idle(idle, cpu); -#if defined(CONFIG_SMP) +#ifdef CONFIG_SMP sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); #endif } From 5e176213a6b2bc5146820c79542d37290434a3c4 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 14 Sep 2015 14:47:02 -0700 Subject: [PATCH 015/240] perf/x86/intel: Make the CYCLE_ACTIVITY.* constraint on Broadwell more specific The counter constraint for CYCLE_ACTIVITY.* on Broadwell covered all CYCLE_ACTIVITY.* sub events, and forced them on counter 2. But actually only one sub event (umask 8) needs to be on counter 2, all others do not have any constraint. Only force that subevent. This fixes groups with multiple CYCLE_ACTIVITY.* events, for example: % perf stat -x, -e '{cpu/event=0xa3,umask=0x6,cmask=6/,\ cpu/event=0xa2,umask=0x8/,\ cpu/event=0xa3,umask=0x4,cmask=4/,cpu/event=0xb1,umask=0x1,cmask=1/}' true 122150,,cpu/event=0xa3,umask=0x6,cmask=6/,846486,100.00 16483,,cpu/event=0xa2,umask=0x8/,846486,100.00 252280,,cpu/event=0xa3,umask=0x4,cmask=4/,846486,100.00 233604,,cpu/event=0xb1,umask=0x1,cmask=1/,846486,100.00 % Without this patch the third result would be Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1442267222-16464-1-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 3fefebfbdf4b..1d84b41ba932 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -250,7 +250,7 @@ struct event_constraint intel_bdw_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */ - INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */ + INTEL_UEVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */ EVENT_CONSTRAINT_END }; From d0dc8494cd6904f8ad035d9ad97f313948f35d0c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 9 Sep 2015 14:53:59 -0700 Subject: [PATCH 016/240] perf/x86/intel/pebs: Add PEBS frontend profiling for Skylake Skylake has a new FRONTEND_LATENCY PEBS event to accurately profile frontend problems (like ITLB or decoding issues). The new event is configured through a separate MSR, which selects a range of sub events. Define the extra MSR as a extra reg and export support for it through sysfs. To avoid duplicating the existing tables use a new function to add new entries to existing tables. Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1435707205-6676-4-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr-index.h | 2 ++ arch/x86/kernel/cpu/perf_event.h | 1 + arch/x86/kernel/cpu/perf_event_intel.c | 11 ++++++++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index c1c0a1c14344..54390bc140dd 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -141,6 +141,8 @@ #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) +#define MSR_PEBS_FRONTEND 0x000003f7 + #define MSR_IA32_POWER_CTL 0x000001fc #define MSR_IA32_MC0_CTL 0x00000400 diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 5edf6d868fc1..165be83a7fa4 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -47,6 +47,7 @@ enum extra_reg_type { EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ EXTRA_REG_LBR = 2, /* lbr_select */ EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ + EXTRA_REG_FE = 4, /* fe_* */ EXTRA_REG_MAX /* number of entries needed */ }; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 1d84b41ba932..ef74c9d05f82 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -205,6 +205,7 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = { INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), + INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x3fff17, FE), EVENT_EXTRA_END }; @@ -2891,6 +2892,8 @@ PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63"); PMU_FORMAT_ATTR(ldlat, "config1:0-15"); +PMU_FORMAT_ATTR(frontend, "config1:0-23"); + static struct attribute *intel_arch3_formats_attr[] = { &format_attr_event.attr, &format_attr_umask.attr, @@ -2907,6 +2910,11 @@ static struct attribute *intel_arch3_formats_attr[] = { NULL, }; +static struct attribute *skl_format_attr[] = { + &format_attr_frontend.attr, + NULL, +}; + static __initconst const struct x86_pmu core_pmu = { .name = "core", .handle_irq = x86_pmu_handle_irq, @@ -3516,7 +3524,8 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; - x86_pmu.cpu_events = hsw_events_attrs; + x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr, + skl_format_attr); WARN_ON(!x86_pmu.format_attrs); x86_pmu.cpu_events = hsw_events_attrs; pr_cont("Skylake events, "); From dfe1f3cb312624928052413928d88b0ee3492216 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 9 Sep 2015 14:54:00 -0700 Subject: [PATCH 017/240] perf/x86/intel: Fix Skylake FRONTEND MSR extrareg mask Stephane pointed out that the extrareg mask was one bit too short. The bubble width field was truncated by one bit. Fix that here. Also add some extra comments on the reserved bits inside the event select code. Reported-by: Stephane Eranian Signed-off-by: Andi Kleen Signed-off-by: Peter Zijlstra (Intel) Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1441835640-21347-3-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ef74c9d05f82..f63360be2238 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -205,7 +205,11 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = { INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0), INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), - INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x3fff17, FE), + /* + * Note the low 8 bits eventsel code is not a continuous field, containing + * some #GPing bits. These are masked out. + */ + INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), EVENT_EXTRA_END }; From f55fc2a57cc9ca3b1bb4fb8eb25b6e1989e5b993 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 9 Sep 2015 19:06:33 +0200 Subject: [PATCH 018/240] perf: Restructure perf syscall point of no return The exclusive_event_installable() stuff only works because its exclusive with the grouping bits. Rework the code such that there is a sane place to error out before we go do things we cannot undo. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 49 +++++++++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index f548f69c4299..39679f749500 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8297,13 +8297,30 @@ SYSCALL_DEFINE5(perf_event_open, if (move_group) { gctx = group_leader->ctx; + mutex_lock_double(&gctx->mutex, &ctx->mutex); + } else { + mutex_lock(&ctx->mutex); + } + /* + * Must be under the same ctx::mutex as perf_install_in_context(), + * because we need to serialize with concurrent event creation. + */ + if (!exclusive_event_installable(event, ctx)) { + /* exclusive and group stuff are assumed mutually exclusive */ + WARN_ON_ONCE(move_group); + + err = -EBUSY; + goto err_locked; + } + + WARN_ON_ONCE(ctx->parent_ctx); + + if (move_group) { /* * See perf_event_ctx_lock() for comments on the details * of swizzling perf_event::ctx. */ - mutex_lock_double(&gctx->mutex, &ctx->mutex); - perf_remove_from_context(group_leader, false); list_for_each_entry(sibling, &group_leader->sibling_list, @@ -8311,13 +8328,7 @@ SYSCALL_DEFINE5(perf_event_open, perf_remove_from_context(sibling, false); put_ctx(gctx); } - } else { - mutex_lock(&ctx->mutex); - } - WARN_ON_ONCE(ctx->parent_ctx); - - if (move_group) { /* * Wait for everybody to stop referencing the events through * the old lists, before installing it on new lists. @@ -8349,22 +8360,20 @@ SYSCALL_DEFINE5(perf_event_open, perf_event__state_init(group_leader); perf_install_in_context(ctx, group_leader, group_leader->cpu); get_ctx(ctx); - } - if (!exclusive_event_installable(event, ctx)) { - err = -EBUSY; - mutex_unlock(&ctx->mutex); - fput(event_file); - goto err_context; + /* + * Now that all events are installed in @ctx, nothing + * references @gctx anymore, so drop the last reference we have + * on it. + */ + put_ctx(gctx); } perf_install_in_context(ctx, event, event->cpu); perf_unpin_context(ctx); - if (move_group) { + if (move_group) mutex_unlock(&gctx->mutex); - put_ctx(gctx); - } mutex_unlock(&ctx->mutex); put_online_cpus(); @@ -8391,6 +8400,12 @@ SYSCALL_DEFINE5(perf_event_open, fd_install(event_fd, event_file); return event_fd; +err_locked: + if (move_group) + mutex_unlock(&gctx->mutex); + mutex_unlock(&ctx->mutex); +/* err_file: */ + fput(event_file); err_context: perf_unpin_context(ctx); put_ctx(ctx); From a723968c0ed36db676478c3d26078f13484fe01c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 9 Sep 2015 19:06:33 +0200 Subject: [PATCH 019/240] perf: Fix u16 overflows Vince reported that its possible to overflow the various size fields and get weird stuff if you stick too many events in a group. Put a lid on this by requiring the fixed record size not exceed 16k. This is still a fair amount of events (silly amount really) and leaves plenty room for callchains and stack dwarves while also avoiding overflowing the u16 variables. Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 50 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 39679f749500..dbb5329b6a3a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1243,11 +1243,7 @@ static inline void perf_event__state_init(struct perf_event *event) PERF_EVENT_STATE_INACTIVE; } -/* - * Called at perf_event creation and when events are attached/detached from a - * group. - */ -static void perf_event__read_size(struct perf_event *event) +static void __perf_event_read_size(struct perf_event *event, int nr_siblings) { int entry = sizeof(u64); /* value */ int size = 0; @@ -1263,7 +1259,7 @@ static void perf_event__read_size(struct perf_event *event) entry += sizeof(u64); if (event->attr.read_format & PERF_FORMAT_GROUP) { - nr += event->group_leader->nr_siblings; + nr += nr_siblings; size += sizeof(u64); } @@ -1271,14 +1267,11 @@ static void perf_event__read_size(struct perf_event *event) event->read_size = size; } -static void perf_event__header_size(struct perf_event *event) +static void __perf_event_header_size(struct perf_event *event, u64 sample_type) { struct perf_sample_data *data; - u64 sample_type = event->attr.sample_type; u16 size = 0; - perf_event__read_size(event); - if (sample_type & PERF_SAMPLE_IP) size += sizeof(data->ip); @@ -1303,6 +1296,17 @@ static void perf_event__header_size(struct perf_event *event) event->header_size = size; } +/* + * Called at perf_event creation and when events are attached/detached from a + * group. + */ +static void perf_event__header_size(struct perf_event *event) +{ + __perf_event_read_size(event, + event->group_leader->nr_siblings); + __perf_event_header_size(event, event->attr.sample_type); +} + static void perf_event__id_header_size(struct perf_event *event) { struct perf_sample_data *data; @@ -1330,6 +1334,27 @@ static void perf_event__id_header_size(struct perf_event *event) event->id_header_size = size; } +static bool perf_event_validate_size(struct perf_event *event) +{ + /* + * The values computed here will be over-written when we actually + * attach the event. + */ + __perf_event_read_size(event, event->group_leader->nr_siblings + 1); + __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ); + perf_event__id_header_size(event); + + /* + * Sum the lot; should not exceed the 64k limit we have on records. + * Conservative limit to allow for callchains and other variable fields. + */ + if (event->read_size + event->header_size + + event->id_header_size + sizeof(struct perf_event_header) >= 16*1024) + return false; + + return true; +} + static void perf_group_attach(struct perf_event *event) { struct perf_event *group_leader = event->group_leader, *pos; @@ -8302,6 +8327,11 @@ SYSCALL_DEFINE5(perf_event_open, mutex_lock(&ctx->mutex); } + if (!perf_event_validate_size(event)) { + err = -E2BIG; + goto err_locked; + } + /* * Must be under the same ctx::mutex as perf_install_in_context(), * because we need to serialize with concurrent event creation. From f73e22ab450140830005581c2c7ec389791a1b8d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 9 Sep 2015 20:48:22 +0200 Subject: [PATCH 020/240] perf: Fix races in computing the header sizes There are two races with the current code: - Another event can join the group and compute a larger header_size concurrently, if the smaller store wins we'll have an incorrect header_size set. - We compute the header_size after the event becomes active, therefore its possible to use the size before its computed. Remedy the first by moving the computation inside the ctx::mutex lock, and the second by placing it _before_ perf_install_in_context(). Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index dbb5329b6a3a..b11756f9b6dc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8399,6 +8399,15 @@ SYSCALL_DEFINE5(perf_event_open, put_ctx(gctx); } + /* + * Precalculate sample_data sizes; do while holding ctx::mutex such + * that we're serialized against further additions and before + * perf_install_in_context() which is the point the event is active and + * can use these values. + */ + perf_event__header_size(event); + perf_event__id_header_size(event); + perf_install_in_context(ctx, event, event->cpu); perf_unpin_context(ctx); @@ -8414,12 +8423,6 @@ SYSCALL_DEFINE5(perf_event_open, list_add_tail(&event->owner_entry, ¤t->perf_event_list); mutex_unlock(¤t->perf_event_mutex); - /* - * Precalculate sample_data sizes - */ - perf_event__header_size(event); - perf_event__id_header_size(event); - /* * Drop the reference on the group_event after placing the * new event on the sibling_list. This ensures destruction From 381c02f6d8ccad8ed574630f879c40fb59715124 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Sep 2015 18:18:49 +0100 Subject: [PATCH 021/240] perf record: Avoid infinite loop at buildid processing with no samples If a session contains no events, we can get stuck in an infinite loop in __perf_session__process_events, with a non-zero file_size and data_offset, but a zero data_size. In this case, we can mmap the entirety of the file (consisting of the file and attribute headers), and fetch_mmaped_event will correctly refuse to read any (unmapped and non-existent) event headers. This causes __perf_session__process_events to unmap the file and retry with the exact same parameters, getting stuck in an infinite loop. This has been observed to result in an exit-time hang when counting rare/unschedulable events with perf record, and can be triggered artificially with the script below: ---- #!/bin/sh printf "REPRO: launching perf\n"; ./perf record -e software/config=9/ sleep 1 & PERF_PID=$!; sleep 0.002; kill -2 $PERF_PID; printf "REPRO: waiting for perf (%d) to exit...\n" "$PERF_PID"; wait $PERF_PID; printf "REPRO: perf exited\n"; ---- To avoid this, have __perf_session__process_events bail out early when the file has no data (i.e. it has no events). Commiter note: I only managed to reproduce this when setting /proc/sys/kernel/kptr_restrict to '1' and changing the code to purposefully not process any samples and no synthesized samples, i.e. kptr_restrict prevents 'record' from synthesizing the kernel mmaps for vmlinux + modules and since it is a workload started from perf, we don't synthesize mmap/comm records for existing threads. Adrian Hunter managed to reproduce it in his environment tho. Signed-off-by: Mark Rutland Tested-by: Arnaldo Carvalho de Melo Tested-by: Adrian Hunter Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1442423929-12253-1-git-send-email-mark.rutland@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 8a4537ee9bc3..fc3f7c922f99 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1580,7 +1580,10 @@ static int __perf_session__process_events(struct perf_session *session, file_offset = page_offset; head = data_offset - page_offset; - if (data_size && (data_offset + data_size < file_size)) + if (data_size == 0) + goto out; + + if (data_offset + data_size < file_size) file_size = data_offset + data_size; ui_progress__init(&prog, file_size, "Processing events..."); From b0379d7aa415249ce1dab8dd2554f8845d6822ab Mon Sep 17 00:00:00 2001 From: Dudley Du Date: Sat, 19 Sep 2015 10:39:32 -0700 Subject: [PATCH 022/240] Input: cyapa - fix address of Gen3 devices in device tree documentation All of the Gen3 touchpads are fixed with I2C address 0x67, so correct the reg value description from 0x24 to 0x67. Signed-off-by: Dudley Du Signed-off-by: Dmitry Torokhov --- Documentation/devicetree/bindings/input/cypress,cyapa.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/input/cypress,cyapa.txt b/Documentation/devicetree/bindings/input/cypress,cyapa.txt index 635a3b036630..8d91ba9ff2fd 100644 --- a/Documentation/devicetree/bindings/input/cypress,cyapa.txt +++ b/Documentation/devicetree/bindings/input/cypress,cyapa.txt @@ -25,7 +25,7 @@ Example: /* Cypress Gen3 touchpad */ touchpad@67 { compatible = "cypress,cyapa"; - reg = <0x24>; + reg = <0x67>; interrupt-parent = <&gpio>; interrupts = <2 IRQ_TYPE_EDGE_FALLING>; /* GPIO 2 */ wakeup-source; From b1452723cf23c908eed2bf6baf0c23943eb0e8bf Mon Sep 17 00:00:00 2001 From: Daniel Martin Date: Sat, 19 Sep 2015 11:27:19 -0700 Subject: [PATCH 023/240] Input: fix typo in MT documentation Section "Event Computation" had this: ... ABS_MT_TOOL_X := C_X ABS_MT_TOOL_X := C_Y Replace the second ABS_MT_TOOL_X with ABS_MT_TOOL_Y. Signed-off-by: Daniel Martin Acked-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- Documentation/input/multi-touch-protocol.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/input/multi-touch-protocol.txt b/Documentation/input/multi-touch-protocol.txt index b85d000faeb4..c51f1146f3bd 100644 --- a/Documentation/input/multi-touch-protocol.txt +++ b/Documentation/input/multi-touch-protocol.txt @@ -361,7 +361,7 @@ For win8 devices with both T and C coordinates, the position mapping is ABS_MT_POSITION_X := T_X ABS_MT_POSITION_Y := T_Y ABS_MT_TOOL_X := C_X - ABS_MT_TOOL_X := C_Y + ABS_MT_TOOL_Y := C_Y Unfortunately, there is not enough information to specify both the touching ellipse and the tool ellipse, so one has to resort to approximations. One From b9ab471b71900ca8a670ecf4f1cc65b626953655 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 14 Sep 2015 10:38:31 -0700 Subject: [PATCH 024/240] Input: pm8941-pwrkey - remove unneded semicolon It's not needed and is just creating a null statement so remove it. Signed-off-by: Javier Martinez Canillas Signed-off-by: Dmitry Torokhov --- drivers/input/misc/pm8941-pwrkey.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/pm8941-pwrkey.c b/drivers/input/misc/pm8941-pwrkey.c index 867db8a91372..e317b75357a0 100644 --- a/drivers/input/misc/pm8941-pwrkey.c +++ b/drivers/input/misc/pm8941-pwrkey.c @@ -93,7 +93,7 @@ static int pm8941_reboot_notify(struct notifier_block *nb, default: reset_type = PON_PS_HOLD_TYPE_HARD_RESET; break; - }; + } error = regmap_update_bits(pwrkey->regmap, pwrkey->baseaddr + PON_PS_HOLD_RST_CTL, From 86a39bffc4e94f80527c14fe00a3acb432910ef3 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 14 Sep 2015 10:38:39 -0700 Subject: [PATCH 025/240] Input: mms114 - remove unneded semicolons They aren't needed and are just creating null statements so remove it. Signed-off-by: Javier Martinez Canillas Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/mms114.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/touchscreen/mms114.c b/drivers/input/touchscreen/mms114.c index 7cce87650fc8..1fafc9f57af6 100644 --- a/drivers/input/touchscreen/mms114.c +++ b/drivers/input/touchscreen/mms114.c @@ -394,12 +394,12 @@ static struct mms114_platform_data *mms114_parse_dt(struct device *dev) if (of_property_read_u32(np, "x-size", &pdata->x_size)) { dev_err(dev, "failed to get x-size property\n"); return NULL; - }; + } if (of_property_read_u32(np, "y-size", &pdata->y_size)) { dev_err(dev, "failed to get y-size property\n"); return NULL; - }; + } of_property_read_u32(np, "contact-threshold", &pdata->contact_threshold); From 46b018fa95003f8159f5fcf9b8cd89acaea34e31 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 5 Sep 2015 10:19:38 -0700 Subject: [PATCH 026/240] Input: walkera0701 - fix abs() calculations on 64 bit values abs() function can not be used with 64 bit values, so let's switch to abs64(). From include/linux/kernel.h: /* * abs() handles unsigned and signed longs, ints, shorts and chars. * For all input types abs() returns a signed long. * abs() should not be used for 64-bit types (s64, u64, long long) * - use abs64() for those. */ Reported-by: Joe Perches Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/walkera0701.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/input/joystick/walkera0701.c b/drivers/input/joystick/walkera0701.c index b76ac580703c..a8bc2fe170dd 100644 --- a/drivers/input/joystick/walkera0701.c +++ b/drivers/input/joystick/walkera0701.c @@ -150,7 +150,7 @@ static void walkera0701_irq_handler(void *handler_data) if (w->counter == 24) { /* full frame */ walkera0701_parse_frame(w); w->counter = NO_SYNC; - if (abs(pulse_time - SYNC_PULSE) < RESERVE) /* new frame sync */ + if (abs64(pulse_time - SYNC_PULSE) < RESERVE) /* new frame sync */ w->counter = 0; } else { if ((pulse_time > (ANALOG_MIN_PULSE - RESERVE) @@ -161,7 +161,7 @@ static void walkera0701_irq_handler(void *handler_data) } else w->counter = NO_SYNC; } - } else if (abs(pulse_time - SYNC_PULSE - BIN0_PULSE) < + } else if (abs64(pulse_time - SYNC_PULSE - BIN0_PULSE) < RESERVE + BIN1_PULSE - BIN0_PULSE) /* frame sync .. */ w->counter = 0; From 6cc527b05847984990a09ef028b2f670bbc72c46 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 14 Sep 2015 10:36:35 -0700 Subject: [PATCH 027/240] Input: imx6ul_tsc - propagate the errors imx6ul_adc_init() may fail in two cases, so we should better propagate the errors and make sure that the callers of this function also check and propagate the errors accordingly. Signed-off-by: Fabio Estevam Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/imx6ul_tsc.c | 28 +++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/input/touchscreen/imx6ul_tsc.c b/drivers/input/touchscreen/imx6ul_tsc.c index ff0b75813daa..0a96b76f6021 100644 --- a/drivers/input/touchscreen/imx6ul_tsc.c +++ b/drivers/input/touchscreen/imx6ul_tsc.c @@ -94,7 +94,7 @@ struct imx6ul_tsc { * TSC module need ADC to get the measure value. So * before config TSC, we should initialize ADC module. */ -static void imx6ul_adc_init(struct imx6ul_tsc *tsc) +static int imx6ul_adc_init(struct imx6ul_tsc *tsc) { int adc_hc = 0; int adc_gc; @@ -122,17 +122,23 @@ static void imx6ul_adc_init(struct imx6ul_tsc *tsc) timeout = wait_for_completion_timeout (&tsc->completion, ADC_TIMEOUT); - if (timeout == 0) + if (timeout == 0) { dev_err(tsc->dev, "Timeout for adc calibration\n"); + return -ETIMEDOUT; + } adc_gs = readl(tsc->adc_regs + REG_ADC_GS); - if (adc_gs & ADC_CALF) + if (adc_gs & ADC_CALF) { dev_err(tsc->dev, "ADC calibration failed\n"); + return -EINVAL; + } /* TSC need the ADC work in hardware trigger */ adc_cfg = readl(tsc->adc_regs + REG_ADC_CFG); adc_cfg |= ADC_HARDWARE_TRIGGER; writel(adc_cfg, tsc->adc_regs + REG_ADC_CFG); + + return 0; } /* @@ -188,11 +194,17 @@ static void imx6ul_tsc_set(struct imx6ul_tsc *tsc) writel(start, tsc->tsc_regs + REG_TSC_FLOW_CONTROL); } -static void imx6ul_tsc_init(struct imx6ul_tsc *tsc) +static int imx6ul_tsc_init(struct imx6ul_tsc *tsc) { - imx6ul_adc_init(tsc); + int err; + + err = imx6ul_adc_init(tsc); + if (err) + return err; imx6ul_tsc_channel_config(tsc); imx6ul_tsc_set(tsc); + + return 0; } static void imx6ul_tsc_disable(struct imx6ul_tsc *tsc) @@ -311,9 +323,7 @@ static int imx6ul_tsc_open(struct input_dev *input_dev) return err; } - imx6ul_tsc_init(tsc); - - return 0; + return imx6ul_tsc_init(tsc); } static void imx6ul_tsc_close(struct input_dev *input_dev) @@ -491,7 +501,7 @@ static int __maybe_unused imx6ul_tsc_resume(struct device *dev) goto out; } - imx6ul_tsc_init(tsc); + retval = imx6ul_tsc_init(tsc); } out: From 3905de62b2624a4574776b3b7ddfa97758b75edc Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 14 Sep 2015 10:37:08 -0700 Subject: [PATCH 028/240] Input: imx6ul_tsc - check for negative return value We should check for negative values returned by platform_get_irq(). Signed-off-by: Fabio Estevam Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/imx6ul_tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/imx6ul_tsc.c b/drivers/input/touchscreen/imx6ul_tsc.c index 0a96b76f6021..4957d8b6ca59 100644 --- a/drivers/input/touchscreen/imx6ul_tsc.c +++ b/drivers/input/touchscreen/imx6ul_tsc.c @@ -416,7 +416,7 @@ static int imx6ul_tsc_probe(struct platform_device *pdev) } adc_irq = platform_get_irq(pdev, 1); - if (adc_irq <= 0) { + if (adc_irq < 0) { dev_err(&pdev->dev, "no adc irq resource?\n"); return adc_irq; } From 5eab3cf3e48cf658f3432e8ba31436d5a4f6a219 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 14 Sep 2015 10:37:31 -0700 Subject: [PATCH 029/240] Input: imx6ul_tsc - use the preferred method for kzalloc() According to Documentation/CodingStyle: "The preferred form for passing a size of a struct is the following: p = kmalloc(sizeof(*p), ...);" so do as suggested. Signed-off-by: Fabio Estevam Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/imx6ul_tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/imx6ul_tsc.c b/drivers/input/touchscreen/imx6ul_tsc.c index 4957d8b6ca59..67c73f2e7e3a 100644 --- a/drivers/input/touchscreen/imx6ul_tsc.c +++ b/drivers/input/touchscreen/imx6ul_tsc.c @@ -347,7 +347,7 @@ static int imx6ul_tsc_probe(struct platform_device *pdev) int tsc_irq; int adc_irq; - tsc = devm_kzalloc(&pdev->dev, sizeof(struct imx6ul_tsc), GFP_KERNEL); + tsc = devm_kzalloc(&pdev->dev, sizeof(*tsc), GFP_KERNEL); if (!tsc) return -ENOMEM; From 002801fc5372ecb725f0d7939f88ca752ce1d499 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 14 Sep 2015 10:37:55 -0700 Subject: [PATCH 030/240] Input: imx6ul_tsc - fix controller name We should better write "Touchscreen". Signed-off-by: Fabio Estevam Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/imx6ul_tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/imx6ul_tsc.c b/drivers/input/touchscreen/imx6ul_tsc.c index 67c73f2e7e3a..8275267eac25 100644 --- a/drivers/input/touchscreen/imx6ul_tsc.c +++ b/drivers/input/touchscreen/imx6ul_tsc.c @@ -355,7 +355,7 @@ static int imx6ul_tsc_probe(struct platform_device *pdev) if (!input_dev) return -ENOMEM; - input_dev->name = "iMX6UL TouchScreen Controller"; + input_dev->name = "iMX6UL Touchscreen Controller"; input_dev->id.bustype = BUS_HOST; input_dev->open = imx6ul_tsc_open; From 3245acbce952cda75b2fafa8e40452ef0c4756c1 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Thu, 17 Sep 2015 18:09:11 +0200 Subject: [PATCH 031/240] hwmon: (abx500) Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Guenter Roeck --- drivers/hwmon/abx500.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/abx500.c b/drivers/hwmon/abx500.c index 6cb89c0ebab6..1fd46859ed29 100644 --- a/drivers/hwmon/abx500.c +++ b/drivers/hwmon/abx500.c @@ -470,6 +470,7 @@ static const struct of_device_id abx500_temp_match[] = { { .compatible = "stericsson,abx500-temp" }, {}, }; +MODULE_DEVICE_TABLE(of, abx500_temp_match); #endif static struct platform_driver abx500_temp_driver = { From fe5152882a45a2545d44d39fe29badc8e4f66ad3 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Thu, 17 Sep 2015 18:09:28 +0200 Subject: [PATCH 032/240] hwmon: (gpio-fan) Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Guenter Roeck --- drivers/hwmon/gpio-fan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/gpio-fan.c b/drivers/hwmon/gpio-fan.c index a3dae6d0082a..82de3deeb18a 100644 --- a/drivers/hwmon/gpio-fan.c +++ b/drivers/hwmon/gpio-fan.c @@ -539,6 +539,7 @@ static const struct of_device_id of_gpio_fan_match[] = { { .compatible = "gpio-fan", }, {}, }; +MODULE_DEVICE_TABLE(of, of_gpio_fan_match); #endif /* CONFIG_OF_GPIO */ static int gpio_fan_probe(struct platform_device *pdev) From f491e70ccffa5d19aa51c958909320fa1f3905ed Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Thu, 17 Sep 2015 18:09:55 +0200 Subject: [PATCH 033/240] hwmon: (pwm-fan) Fix module autoload for OF platform driver This platform driver has a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Signed-off-by: Guenter Roeck --- drivers/hwmon/pwm-fan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c index 2d9a712699ff..3e23003f78b0 100644 --- a/drivers/hwmon/pwm-fan.c +++ b/drivers/hwmon/pwm-fan.c @@ -323,6 +323,7 @@ static const struct of_device_id of_pwm_fan_match[] = { { .compatible = "pwm-fan", }, {}, }; +MODULE_DEVICE_TABLE(of, of_pwm_fan_match); static struct platform_driver pwm_fan_driver = { .probe = pwm_fan_probe, From 66e8c57da6bf6b847a48a5a6fda59512f733ed78 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 25 Aug 2015 20:45:18 +0200 Subject: [PATCH 034/240] rcu: Change _wait_rcu_gp() to work around GCC bug 67055 Code like this in inline functions confuses some recent versions of gcc: const int n = const-expr; whatever_t array[n]; For more details, see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67055#c13 This compiler bug results in the following failure after 114b7fd4b (rcu: Create rcu_sync infrastructure): In file included from include/linux/rcupdate.h:429:0, from include/linux/rcu_sync.h:5, from kernel/rcu/sync.c:1: include/linux/rcutiny.h: In function 'rcu_barrier_sched': include/linux/rcutiny.h:55:20: internal compiler error: Segmentation fault static inline void rcu_barrier_sched(void) This commit therefore eliminates the constant local variable in favor of direct use of the expression. Reported-and-tested-by: Mark Salter Reported-by: Guenter Roeck Signed-off-by: Oleg Nesterov Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ff476515f716..581abf848566 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -230,12 +230,11 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, struct rcu_synchronize *rs_array); #define _wait_rcu_gp(checktiny, ...) \ -do { \ - call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ - const int __n = ARRAY_SIZE(__crcu_array); \ - struct rcu_synchronize __rs_array[__n]; \ - \ - __wait_rcu_gp(checktiny, __n, __crcu_array, __rs_array); \ +do { \ + call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ + struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)]; \ + __wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array), \ + __crcu_array, __rs_array); \ } while (0) #define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__) From 19a5ecde086a6a5287978b12ae948fa691b197b7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 20 Sep 2015 21:01:22 -0700 Subject: [PATCH 035/240] rcu: Suppress lockdep false positive for rcp->exp_funnel_mutex In kernels built with CONFIG_PREEMPT=y, synchronize_rcu_expedited() invokes synchronize_sched_expedited() while holding RCU-preempt's root rcu_node structure's ->exp_funnel_mutex, which is acquired after the rcu_data structure's ->exp_funnel_mutex. The first thing that synchronize_sched_expedited() will do is acquire RCU-sched's rcu_data structure's ->exp_funnel_mutex. There is no danger of an actual deadlock because the locking order is always from RCU-preempt's expedited mutexes to those of RCU-sched. Unfortunately, lockdep considers both rcu_data structures' ->exp_funnel_mutex to be in the same lock class and therefore reports a deadlock cycle. This commit silences this false positive by placing RCU-sched's rcu_data structures' ->exp_funnel_mutex locks into their own lock class. Reported-by: Sasha Levin Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9f75f25cc5d9..775d36cc0050 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3868,6 +3868,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf) static void __init rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) { + static struct lock_class_key rcu_exp_sched_rdp_class; unsigned long flags; struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rcu_get_root(rsp); @@ -3883,6 +3884,10 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) mutex_init(&rdp->exp_funnel_mutex); rcu_boot_init_nocb_percpu_data(rdp); raw_spin_unlock_irqrestore(&rnp->lock, flags); + if (rsp == &rcu_sched_state) + lockdep_set_class_and_name(&rdp->exp_funnel_mutex, + &rcu_exp_sched_rdp_class, + "rcu_data_exp_sched"); } /* From 9d7b03f863f2fc8bd80e83d3a2d7e521094d290e Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Sat, 19 Sep 2015 09:49:45 -0700 Subject: [PATCH 036/240] Input: elan_i2c - don't require known iap version The Asus X456UA has an ELAN1000 touchpad with IAP version 0xe. This is unknown to elan_get_fwinfo() so driver probe fails and I am left with an unusable touchpad. However, the fwinfo is not required for general driver usage, it is only needed if the user decides to upload new firmware. Adjust the driver so that we do not abort probe when we encounter unexpected IAP version, but rather warn user that firmware update feature of the driver will not work. Signed-off-by: Daniel Drake Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index fa945304b9a5..7cad81942b8c 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -266,11 +266,10 @@ static int elan_query_device_info(struct elan_tp_data *data) error = elan_get_fwinfo(data->iap_version, &data->fw_validpage_count, &data->fw_signature_address); - if (error) { - dev_err(&data->client->dev, - "unknown iap version %d\n", data->iap_version); - return error; - } + if (error) + dev_warn(&data->client->dev, + "unexpected iap version %#04x (ic type: %#04x), firmware update will not work\n", + data->iap_version, data->ic_type); return 0; } @@ -486,6 +485,9 @@ static ssize_t elan_sysfs_update_fw(struct device *dev, const u8 *fw_signature; static const u8 signature[] = {0xAA, 0x55, 0xCC, 0x33, 0xFF, 0xFF}; + if (data->fw_validpage_count == 0) + return -EINVAL; + /* Look for a firmware with the product id appended. */ fw_name = kasprintf(GFP_KERNEL, ETP_FW_NAME, data->product_id); if (!fw_name) { From c84333a1c55a2d3329e8b472f59e9d1ccf5ddc1b Mon Sep 17 00:00:00 2001 From: Duson Lin Date: Mon, 21 Sep 2015 09:24:46 -0700 Subject: [PATCH 037/240] Input: elan_i2c - add ic type 0x03 The 0x03 is valid 3000 serial ic type too. Signed-off-by: Duson Lin Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 7cad81942b8c..0f8ab3170055 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -102,6 +102,7 @@ static int elan_get_fwinfo(u8 iap_version, u16 *validpage_count, *validpage_count = 512; break; case 0x09: + case 0x03: *validpage_count = 768; break; case 0x0D: From ed75a14eb554995c522a3b861d4c86a7eddb2ad7 Mon Sep 17 00:00:00 2001 From: Duson Lin Date: Mon, 21 Sep 2015 09:26:46 -0700 Subject: [PATCH 038/240] Input: elan_i2c - expand maximum product_id form 0xFF to 0xFFFF In order to support more projects in the future, we expand the maximum product_id value form 0xFF to 0xFFFF. Signed-off by: Duson Lin Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c.h | 2 +- drivers/input/mouse/elan_i2c_core.c | 4 ++-- drivers/input/mouse/elan_i2c_i2c.c | 4 ++-- drivers/input/mouse/elan_i2c_smbus.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/input/mouse/elan_i2c.h b/drivers/input/mouse/elan_i2c.h index 73670f2aebfd..c0ec26118732 100644 --- a/drivers/input/mouse/elan_i2c.h +++ b/drivers/input/mouse/elan_i2c.h @@ -60,7 +60,7 @@ struct elan_transport_ops { int (*get_sm_version)(struct i2c_client *client, u8* ic_type, u8 *version); int (*get_checksum)(struct i2c_client *client, bool iap, u16 *csum); - int (*get_product_id)(struct i2c_client *client, u8 *id); + int (*get_product_id)(struct i2c_client *client, u16 *id); int (*get_max)(struct i2c_client *client, unsigned int *max_x, unsigned int *max_y); diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 0f8ab3170055..b51062622050 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -40,7 +40,7 @@ #include "elan_i2c.h" #define DRIVER_NAME "elan_i2c" -#define ELAN_DRIVER_VERSION "1.6.0" +#define ELAN_DRIVER_VERSION "1.6.1" #define ETP_MAX_PRESSURE 255 #define ETP_FWIDTH_REDUCE 90 #define ETP_FINGER_WIDTH 15 @@ -76,7 +76,7 @@ struct elan_tp_data { unsigned int x_res; unsigned int y_res; - u8 product_id; + u16 product_id; u8 fw_version; u8 sm_version; u8 iap_version; diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c index 683c840c9dd7..a679e56c44cd 100644 --- a/drivers/input/mouse/elan_i2c_i2c.c +++ b/drivers/input/mouse/elan_i2c_i2c.c @@ -276,7 +276,7 @@ static int elan_i2c_get_sm_version(struct i2c_client *client, return 0; } -static int elan_i2c_get_product_id(struct i2c_client *client, u8 *id) +static int elan_i2c_get_product_id(struct i2c_client *client, u16 *id) { int error; u8 val[3]; @@ -287,7 +287,7 @@ static int elan_i2c_get_product_id(struct i2c_client *client, u8 *id) return error; } - *id = val[0]; + *id = le16_to_cpup((__le16 *)val); return 0; } diff --git a/drivers/input/mouse/elan_i2c_smbus.c b/drivers/input/mouse/elan_i2c_smbus.c index ff36a366b2aa..cb6aecbc1dc2 100644 --- a/drivers/input/mouse/elan_i2c_smbus.c +++ b/drivers/input/mouse/elan_i2c_smbus.c @@ -183,7 +183,7 @@ static int elan_smbus_get_sm_version(struct i2c_client *client, return 0; } -static int elan_smbus_get_product_id(struct i2c_client *client, u8 *id) +static int elan_smbus_get_product_id(struct i2c_client *client, u16 *id) { int error; u8 val[3]; @@ -195,7 +195,7 @@ static int elan_smbus_get_product_id(struct i2c_client *client, u8 *id) return error; } - *id = val[1]; + *id = be16_to_cpup((__be16 *)val); return 0; } From 72d4736253af74147b1fa68145b2f4c61d1f37e1 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 19 Sep 2015 11:22:57 -0700 Subject: [PATCH 039/240] Input: uinput - fix crash when using ABS events Commit b6d30968d86c45a7bb599eaca13ff048d3fa576c (Input: uinput - switch to using for_each_set_bit()) switched driver to use for_each_set_bit(). However during initial write of the uinput structure that contains min/max data for all possible axes none of them are reflected in dev->absbit yet and so we were skipping over all of them and were not allocating absinfo memory which caused crash later when driver tried to sens EV_ABS events: <1>[ 15.064330] BUG: unable to handle kernel NULL pointer dereference at 0000000000000024 <1>[ 15.064336] IP: [] input_handle_event+0x232/0x4e0 <4>[ 15.064343] PGD 0 <4>[ 15.064345] Oops: 0000 [#1] SMP Fixes: b6d30968d86c45a7bb599eaca13ff048d3fa576c Cc: stable@vger.kernel.org Reported-by: Stephen Chandler Paul Tested-by: Stephen Chandler Paul Signed-off-by: Dmitry Torokhov --- drivers/input/misc/uinput.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index 345df9b03aed..5adbcedcb81c 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -414,7 +414,7 @@ static int uinput_setup_device(struct uinput_device *udev, dev->id.product = user_dev->id.product; dev->id.version = user_dev->id.version; - for_each_set_bit(i, dev->absbit, ABS_CNT) { + for (i = 0; i < ABS_CNT; i++) { input_abs_set_max(dev, i, user_dev->absmax[i]); input_abs_set_min(dev, i, user_dev->absmin[i]); input_abs_set_fuzz(dev, i, user_dev->absfuzz[i]); From c2e4b24ff848bb180f9b9cd873a38327cd219ad2 Mon Sep 17 00:00:00 2001 From: Kapileshwar Singh Date: Tue, 22 Sep 2015 14:22:03 +0100 Subject: [PATCH 040/240] tools lib traceevent: Fix string handling in heterogeneous arch environments When a trace recorded on a 32-bit device is processed with a 64-bit binary, the higher 32-bits of the address need to ignored. The lack of this results in the output of the 64-bit pointer value to the trace as the 32-bit address lookup fails in find_printk(). Before: burn-1778 [003] 548.600305: bputs: 0xc0046db2s: 2cec5c058d98c After: burn-1778 [003] 548.600305: bputs: 0xc0046db2s: RT throttling activated The problem occurs in PRINT_FIELD when the field is recognized as a pointer to a string (of the type const char *) Heterogeneous architectures cases below can arise and should be handled: * Traces recorded using 32-bit addresses processed on a 64-bit machine * Traces recorded using 64-bit addresses processed on a 32-bit machine Reported-by: Juri Lelli Signed-off-by: Kapileshwar Singh Reviewed-by: Steven Rostedt Cc: David Ahern Cc: Javi Merino Cc: Jiri Olsa Cc: Namhyung Kim Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1442928123-13824-1-git-send-email-kapileshwar.singh@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 4d885934b919..cf42b090477b 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3795,7 +3795,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, struct format_field *field; struct printk_map *printk; long long val, fval; - unsigned long addr; + unsigned long long addr; char *str; unsigned char *hex; int print; @@ -3828,13 +3828,30 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, */ if (!(field->flags & FIELD_IS_ARRAY) && field->size == pevent->long_size) { - addr = *(unsigned long *)(data + field->offset); + + /* Handle heterogeneous recording and processing + * architectures + * + * CASE I: + * Traces recorded on 32-bit devices (32-bit + * addressing) and processed on 64-bit devices: + * In this case, only 32 bits should be read. + * + * CASE II: + * Traces recorded on 64 bit devices and processed + * on 32-bit devices: + * In this case, 64 bits must be read. + */ + addr = (pevent->long_size == 8) ? + *(unsigned long long *)(data + field->offset) : + (unsigned long long)*(unsigned int *)(data + field->offset); + /* Check if it matches a print format */ printk = find_printk(pevent, addr); if (printk) trace_seq_puts(s, printk->printk); else - trace_seq_printf(s, "%lx", addr); + trace_seq_printf(s, "%llx", addr); break; } str = malloc(len + 1); From fc2ca674470bbfe11d72a20a3f19fd3dc43bfca0 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 30 Aug 2015 21:19:58 -0700 Subject: [PATCH 041/240] MIPS: Fix console output for Fulong2e system Commit 3adeb2566b9b ("MIPS: Loongson: Improve LEFI firmware interface") made the number of UARTs dynamic if LEFI_FIRMWARE_INTERFACE is configured. Unfortunately, it did not initialize the number of UARTs if LEFI_FIRMWARE_INTERFACE is not configured. As a result, the Fulong2e system has no console. Fixes: 3adeb2566b9b ("MIPS: Loongson: Improve LEFI firmware interface") Acked-by: Huacai Chen Signed-off-by: Guenter Roeck Tested-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/11076/ Signed-off-by: Ralf Baechle --- arch/mips/loongson64/common/env.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c index f6c44dd332e2..d6d07ad56180 100644 --- a/arch/mips/loongson64/common/env.c +++ b/arch/mips/loongson64/common/env.c @@ -64,6 +64,9 @@ void __init prom_init_env(void) } if (memsize == 0) memsize = 256; + + loongson_sysconf.nr_uarts = 1; + pr_info("memsize=%u, highmemsize=%u\n", memsize, highmemsize); #else struct boot_params *boot_p; From 88d3426942d748b90b051b7ef2d5d765f5f3054c Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Thu, 3 Sep 2015 08:36:35 +0200 Subject: [PATCH 042/240] MIPS: bootmem: Fix mapstart calculation for contiguous maps Commit a6335fa1 fixed the case with gap between initrd and next usable PFN zone, but broken the case when initrd is combined with usable memory into one region (in add_memory_region()). Restore the fixup initially brought in by f9a7febd. ---- error message ---- Unpacking initramfs... Initramfs unpacking failed: junk in compressed archive BUG: Bad page state in process swapper pfn:00261 page:81004c20 count:0 mapcount:-127 mapping: (null) index:0x2 flags: 0x0() page dumped because: nonzero mapcount CPU: 0 PID: 1 Comm: swapper Not tainted 4.2.0+ #1782 ----------------------- Signed-off-by: Alexander Sverdlin Reported-by: Tony Wu Tested-by: Tony Wu Cc: David Daney Cc: Zubair Lutfullah Kakakhel Cc: Huacai Chen Cc: Joe Perches Cc: Steven J. Hill Cc: Aaro Koskinen Cc: stable@vger.kernel.org Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/11086/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/setup.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 35b8316002f8..479515109e5b 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -338,7 +338,7 @@ static void __init bootmem_init(void) if (end <= reserved_end) continue; #ifdef CONFIG_BLK_DEV_INITRD - /* mapstart should be after initrd_end */ + /* Skip zones before initrd and initrd itself */ if (initrd_end && end <= (unsigned long)PFN_UP(__pa(initrd_end))) continue; #endif @@ -371,6 +371,14 @@ static void __init bootmem_init(void) max_low_pfn = PFN_DOWN(HIGHMEM_START); } +#ifdef CONFIG_BLK_DEV_INITRD + /* + * mapstart should be after initrd_end + */ + if (initrd_end) + mapstart = max(mapstart, (unsigned long)PFN_UP(__pa(initrd_end))); +#endif + /* * Initialize the boot-time allocator with low memory only. */ From faa9724a674e5e52316bb0d173aed16bd17d536c Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sat, 5 Sep 2015 18:46:56 +0200 Subject: [PATCH 043/240] MIPS: BPF: Avoid unreachable code on little endian On little endian, avoid generating the big endian version of the code by using #else in addition to #ifdef #endif. Also fix one alignment issue wrt delay slot. Signed-off-by: Aurelien Jarno Reviewed-by: Markos Chandras Cc: stable@vger.kernel.org # v4.2+ Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/11097/ Signed-off-by: Ralf Baechle --- arch/mips/net/bpf_jit_asm.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S index e92726099be0..4f54cb18f104 100644 --- a/arch/mips/net/bpf_jit_asm.S +++ b/arch/mips/net/bpf_jit_asm.S @@ -151,9 +151,10 @@ NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp) wsbh t0, $r_s0 jr $r_ra rotr $r_A, t0, 16 -#endif +#else jr $r_ra - move $r_A, $r_s0 + move $r_A, $r_s0 +#endif END(bpf_slow_path_word) @@ -162,9 +163,10 @@ NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp) #ifdef CONFIG_CPU_LITTLE_ENDIAN jr $r_ra wsbh $r_A, $r_s0 -#endif +#else jr $r_ra move $r_A, $r_s0 +#endif END(bpf_slow_path_half) From b259e51f2e29390518021f9b8df55a3de42f371b Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Sat, 5 Sep 2015 18:46:57 +0200 Subject: [PATCH 044/240] MIPS: BPF: Fix build on pre-R2 little endian CPUs The rotr, seh and wsbh instructions have been introduced with the R2 ISA. Thus the current BPF code fails to build on pre-R2 little endian CPUs: CC arch/mips/net/bpf_jit.o AS arch/mips/net/bpf_jit_asm.o /home/aurel32/linux-4.2/arch/mips/net/bpf_jit_asm.S: Assembler messages: /home/aurel32/linux-4.2/arch/mips/net/bpf_jit_asm.S:67: Error: opcode not supported on this processor: mips32 (mips32) `wsbh $8,$19' /home/aurel32/linux-4.2/arch/mips/net/bpf_jit_asm.S:68: Error: opcode not supported on this processor: mips32 (mips32) `rotr $19,$8,16' /home/aurel32/linux-4.2/arch/mips/net/bpf_jit_asm.S:83: Error: opcode not supported on this processor: mips32 (mips32) `wsbh $8,$19' /home/aurel32/linux-4.2/arch/mips/net/bpf_jit_asm.S:84: Error: opcode not supported on this processor: mips32 (mips32) `seh $19,$8' /home/aurel32/linux-4.2/arch/mips/net/bpf_jit_asm.S:151: Error: opcode not supported on this processor: mips32 (mips32) `wsbh $8,$12' /home/aurel32/linux-4.2/arch/mips/net/bpf_jit_asm.S:153: Error: opcode not supported on this processor: mips32 (mips32) `rotr $19,$8,16' /home/aurel32/linux-4.2/arch/mips/net/bpf_jit_asm.S:164: Error: opcode not supported on this processor: mips32 (mips32) `wsbh $19,$12' /home/aurel32/linux-4.2/scripts/Makefile.build:294: recipe for target 'arch/mips/net/bpf_jit_asm.o' failed Fix that by providing equivalent code for these CPUs. Signed-off-by: Aurelien Jarno Reviewed-by: Markos Chandras Cc: stable@vger.kernel.org # v4.2+ Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/11098/ Signed-off-by: Ralf Baechle --- arch/mips/net/bpf_jit_asm.S | 42 +++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S index 4f54cb18f104..dabf4179cd7e 100644 --- a/arch/mips/net/bpf_jit_asm.S +++ b/arch/mips/net/bpf_jit_asm.S @@ -64,8 +64,20 @@ sk_load_word_positive: PTR_ADDU t1, $r_skb_data, offset lw $r_A, 0(t1) #ifdef CONFIG_CPU_LITTLE_ENDIAN +# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) wsbh t0, $r_A rotr $r_A, t0, 16 +# else + sll t0, $r_A, 24 + srl t1, $r_A, 24 + srl t2, $r_A, 8 + or t0, t0, t1 + andi t2, t2, 0xff00 + andi t1, $r_A, 0xff00 + or t0, t0, t2 + sll t1, t1, 8 + or $r_A, t0, t1 +# endif #endif jr $r_ra move $r_ret, zero @@ -80,8 +92,16 @@ sk_load_half_positive: PTR_ADDU t1, $r_skb_data, offset lh $r_A, 0(t1) #ifdef CONFIG_CPU_LITTLE_ENDIAN +# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) wsbh t0, $r_A seh $r_A, t0 +# else + sll t0, $r_A, 24 + andi t1, $r_A, 0xff00 + sra t0, t0, 16 + srl t1, t1, 8 + or $r_A, t0, t1 +# endif #endif jr $r_ra move $r_ret, zero @@ -148,9 +168,22 @@ sk_load_byte_positive: NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp) bpf_slow_path_common(4) #ifdef CONFIG_CPU_LITTLE_ENDIAN +# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) wsbh t0, $r_s0 jr $r_ra rotr $r_A, t0, 16 +# else + sll t0, $r_s0, 24 + srl t1, $r_s0, 24 + srl t2, $r_s0, 8 + or t0, t0, t1 + andi t2, t2, 0xff00 + andi t1, $r_s0, 0xff00 + or t0, t0, t2 + sll t1, t1, 8 + jr $r_ra + or $r_A, t0, t1 +# endif #else jr $r_ra move $r_A, $r_s0 @@ -161,8 +194,17 @@ NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp) NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp) bpf_slow_path_common(2) #ifdef CONFIG_CPU_LITTLE_ENDIAN +# if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) jr $r_ra wsbh $r_A, $r_s0 +# else + sll t0, $r_s0, 8 + andi t1, $r_s0, 0xff00 + andi t0, t0, 0xff00 + srl t1, t1, 8 + jr $r_ra + or $r_A, t0, t1 +# endif #else jr $r_ra move $r_A, $r_s0 From 84dedd71cf3bc61cc65ca43a48f7252344a1bb68 Mon Sep 17 00:00:00 2001 From: Alexander Couzens Date: Sat, 19 Sep 2015 06:26:19 +0200 Subject: [PATCH 045/240] MIPS: ATH79: Set missing irq ack handler for ar7100-misc-intc irq chip The irq ack handler was forgotten while introducing OF support. Only ar71xx and ar933x based devices require it. Signed-off-by: Alexander Couzens Acked-by: Alban Bedel Acked-by: Thomas Gleixner Cc: linux-mips@linux-mips.org Cc: Rob Herring Cc: Pawel Moll Cc: Mark Rutland Cc: Ian Campbell Cc: Kumar Gala Cc: devicetree@vger.kernel.org Cc: Jason Cooper Cc: Marc Zyngier Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/11163/ Signed-off-by: Ralf Baechle --- arch/mips/ath79/irq.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/mips/ath79/irq.c b/arch/mips/ath79/irq.c index 15ecb4831e12..bf1651cc7e64 100644 --- a/arch/mips/ath79/irq.c +++ b/arch/mips/ath79/irq.c @@ -293,8 +293,16 @@ static int __init ath79_misc_intc_of_init( return 0; } -IRQCHIP_DECLARE(ath79_misc_intc, "qca,ar7100-misc-intc", - ath79_misc_intc_of_init); + +static int __init ar7100_misc_intc_of_init( + struct device_node *node, struct device_node *parent) +{ + ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask; + return ath79_misc_intc_of_init(node, parent); +} + +IRQCHIP_DECLARE(ar7100_misc_intc, "qca,ar7100-misc-intc", + ar7100_misc_intc_of_init); static int __init ar79_cpu_intc_of_init( struct device_node *node, struct device_node *parent) From 19446da415e0f01d56364b700fe984cda78bca50 Mon Sep 17 00:00:00 2001 From: Alexander Couzens Date: Sat, 19 Sep 2015 06:26:20 +0200 Subject: [PATCH 046/240] MIPS: ATH79: Add irq chip ar7240-misc-intc The ar7240 misc irq chip use ack handler instead of ack_mask handler. All new ath79 chips use the ar7240 misc irq chip Signed-off-by: Alexander Couzens Acked-by: Alban Bedel Acked-by: Thomas Gleixner Cc: linux-mips@linux-mips.org Cc: Rob Herring Cc: Pawel Moll Cc: Mark Rutland Cc: Ian Campbell Cc: Kumar Gala Cc: devicetree@vger.kernel.org Cc: Jason Cooper Cc: Marc Zyngier Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/11164/ Signed-off-by: Ralf Baechle --- .../qca,ath79-misc-intc.txt | 20 +++++++++++++++++-- arch/mips/ath79/irq.c | 10 ++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/interrupt-controller/qca,ath79-misc-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/qca,ath79-misc-intc.txt index 391717a68f3b..ec96b1f01478 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/qca,ath79-misc-intc.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/qca,ath79-misc-intc.txt @@ -4,8 +4,8 @@ The MISC interrupt controller is a secondary controller for lower priority interrupt. Required Properties: -- compatible: has to be "qca,-cpu-intc", "qca,ar7100-misc-intc" - as fallback +- compatible: has to be "qca,-cpu-intc", "qca,ar7100-misc-intc" or + "qca,-cpu-intc", "qca,ar7240-misc-intc" - reg: Base address and size of the controllers memory area - interrupt-parent: phandle of the parent interrupt controller. - interrupts: Interrupt specifier for the controllers interrupt. @@ -13,6 +13,9 @@ Required Properties: - #interrupt-cells : Specifies the number of cells needed to encode interrupt source, should be 1 +Compatible fallback depends on the SoC. Use ar7100 for ar71xx and ar913x, +use ar7240 for all other SoCs. + Please refer to interrupts.txt in this directory for details of the common Interrupt Controllers bindings used by client devices. @@ -28,3 +31,16 @@ Example: interrupt-controller; #interrupt-cells = <1>; }; + +Another example: + + interrupt-controller@18060010 { + compatible = "qca,ar9331-misc-intc", qca,ar7240-misc-intc"; + reg = <0x18060010 0x4>; + + interrupt-parent = <&cpuintc>; + interrupts = <6>; + + interrupt-controller; + #interrupt-cells = <1>; + }; diff --git a/arch/mips/ath79/irq.c b/arch/mips/ath79/irq.c index bf1651cc7e64..eeb3953ed8ac 100644 --- a/arch/mips/ath79/irq.c +++ b/arch/mips/ath79/irq.c @@ -304,6 +304,16 @@ static int __init ar7100_misc_intc_of_init( IRQCHIP_DECLARE(ar7100_misc_intc, "qca,ar7100-misc-intc", ar7100_misc_intc_of_init); +static int __init ar7240_misc_intc_of_init( + struct device_node *node, struct device_node *parent) +{ + ath79_misc_irq_chip.irq_ack = ar724x_misc_irq_ack; + return ath79_misc_intc_of_init(node, parent); +} + +IRQCHIP_DECLARE(ar7240_misc_intc, "qca,ar7240-misc-intc", + ar7240_misc_intc_of_init); + static int __init ar79_cpu_intc_of_init( struct device_node *node, struct device_node *parent) { From 2f6f31363cb7890784458d7805140687b4de5b59 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 17 Sep 2015 17:49:20 +0100 Subject: [PATCH 047/240] MIPS: cpu-features: Add cpu_has_ftlb Add cpu_has_ftlb, which specifies that an FTLB is present in addition to the VTLB, probed based on whether Config.MT == 4 (rather than 1 for standard JTLB). This is necessary since MIPS release 6 removes Config4.MMUExtDef, so the presence of the FTLB fields in Config4 must be determined from Config.MT instead. Signed-off-by: James Hogan Cc: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/11159/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/cpu-features.h | 3 +++ arch/mips/include/asm/cpu.h | 1 + arch/mips/include/asm/mipsregs.h | 2 ++ arch/mips/kernel/cpu-probe.c | 8 +++++--- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index 9801ac982655..fe67f12ac239 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -20,6 +20,9 @@ #ifndef cpu_has_tlb #define cpu_has_tlb (cpu_data[0].options & MIPS_CPU_TLB) #endif +#ifndef cpu_has_ftlb +#define cpu_has_ftlb (cpu_data[0].options & MIPS_CPU_FTLB) +#endif #ifndef cpu_has_tlbinv #define cpu_has_tlbinv (cpu_data[0].options & MIPS_CPU_TLBINV) #endif diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index cd89e9855775..82ad15f11049 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -385,6 +385,7 @@ enum cpu_type_enum { #define MIPS_CPU_CDMM 0x4000000000ull /* CPU has Common Device Memory Map */ #define MIPS_CPU_BP_GHIST 0x8000000000ull /* R12K+ Branch Prediction Global History */ #define MIPS_CPU_SP 0x10000000000ull /* Small (1KB) page support */ +#define MIPS_CPU_FTLB 0x20000000000ull /* CPU has Fixed-page-size TLB */ /* * CPU ASE encodings diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index d3cd8eac81e3..c64781cf649f 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -487,6 +487,8 @@ /* Bits specific to the MIPS32/64 PRA. */ #define MIPS_CONF_MT (_ULCAST_(7) << 7) +#define MIPS_CONF_MT_TLB (_ULCAST_(1) << 7) +#define MIPS_CONF_MT_FTLB (_ULCAST_(4) << 7) #define MIPS_CONF_AR (_ULCAST_(7) << 10) #define MIPS_CONF_AT (_ULCAST_(3) << 13) #define MIPS_CONF_M (_ULCAST_(1) << 31) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 571a8e6ea5bd..397551cf2e7b 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -410,16 +410,18 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, int enable) static inline unsigned int decode_config0(struct cpuinfo_mips *c) { unsigned int config0; - int isa; + int isa, mt; config0 = read_c0_config(); /* * Look for Standard TLB or Dual VTLB and FTLB */ - if ((((config0 & MIPS_CONF_MT) >> 7) == 1) || - (((config0 & MIPS_CONF_MT) >> 7) == 4)) + mt = config0 & MIPS_CONF_MT; + if (mt == MIPS_CONF_MT_TLB) c->options |= MIPS_CPU_TLB; + else if (mt == MIPS_CONF_MT_FTLB) + c->options |= MIPS_CPU_TLB | MIPS_CPU_FTLB; isa = (config0 & MIPS_CONF_AT) >> 13; switch (isa) { From 43d104db596977a8fddc1e71245859a7fe85a658 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 17 Sep 2015 17:49:21 +0100 Subject: [PATCH 048/240] MIPS: Fix FTLB detection for R6 R6 removed the Config4.MMUExtDef field, with the low 16 bits only allowed to contain FTLB fields, and commit e87569cd6c57 ("MIPS: cpu-probe: Fix VTLB/FTLB configuration for R6") updated the probing of this field to assume an FTLB is always present for R6. However the FTLB may still be absent. The presence of those fields is actually specified by the MMU type in the Config.MT field, so use that (the new cpu_has_ftlb) to determine whether the FTLB is actually present. Fixes: e87569cd6c57 ("MIPS: cpu-probe: Fix VTLB/FTLB configuration for R6") Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/11160/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cpu-probe.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 397551cf2e7b..09a51d091941 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -561,15 +561,18 @@ static inline unsigned int decode_config4(struct cpuinfo_mips *c) if (cpu_has_tlb) { if (((config4 & MIPS_CONF4_IE) >> 29) == 2) c->options |= MIPS_CPU_TLBINV; + /* - * This is a bit ugly. R6 has dropped that field from - * config4 and the only valid configuration is VTLB+FTLB so - * set a good value for mmuextdef for that case. + * R6 has dropped the MMUExtDef field from config4. + * On R6 the fields always describe the FTLB, and only if it is + * present according to Config.MT. */ - if (cpu_has_mips_r6) + if (!cpu_has_mips_r6) + mmuextdef = config4 & MIPS_CONF4_MMUEXTDEF; + else if (cpu_has_ftlb) mmuextdef = MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT; else - mmuextdef = config4 & MIPS_CONF4_MMUEXTDEF; + mmuextdef = 0; switch (mmuextdef) { case MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT: From 21199f27b430576552b26210b3194a363d7f05cd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Sep 2015 16:10:40 +0200 Subject: [PATCH 049/240] locking/lockdep: Fix hlock->pin_count reset on lock stack rebuilds Various people reported hitting the "unpinning an unpinned lock" warning. As it turns out there are 2 places where we take a lock out of the middle of a stack, and in those cases it would fail to preserve the pin_count when rebuilding the lock stack. Reported-by: Sasha Levin Reported-by: Tim Spriggs Tested-by: Sasha Levin Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: davej@codemonkey.org.uk Link: http://lkml.kernel.org/r/20150916141040.GA11639@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 8acfbf773e06..4e49cc4c9952 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3068,7 +3068,7 @@ static int __lock_is_held(struct lockdep_map *lock); static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, int hardirqs_off, struct lockdep_map *nest_lock, unsigned long ip, - int references) + int references, int pin_count) { struct task_struct *curr = current; struct lock_class *class = NULL; @@ -3157,7 +3157,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, hlock->waittime_stamp = 0; hlock->holdtime_stamp = lockstat_clock(); #endif - hlock->pin_count = 0; + hlock->pin_count = pin_count; if (check && !mark_irqflags(curr, hlock)) return 0; @@ -3343,7 +3343,7 @@ __lock_set_class(struct lockdep_map *lock, const char *name, hlock_class(hlock)->subclass, hlock->trylock, hlock->read, hlock->check, hlock->hardirqs_off, hlock->nest_lock, hlock->acquire_ip, - hlock->references)) + hlock->references, hlock->pin_count)) return 0; } @@ -3433,7 +3433,7 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) hlock_class(hlock)->subclass, hlock->trylock, hlock->read, hlock->check, hlock->hardirqs_off, hlock->nest_lock, hlock->acquire_ip, - hlock->references)) + hlock->references, hlock->pin_count)) return 0; } @@ -3583,7 +3583,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, current->lockdep_recursion = 1; trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); __lock_acquire(lock, subclass, trylock, read, check, - irqs_disabled_flags(flags), nest_lock, ip, 0); + irqs_disabled_flags(flags), nest_lock, ip, 0, 0); current->lockdep_recursion = 0; raw_local_irq_restore(flags); } From a7adb91b13c104e5ad950fbe1795aa2722f2ea0a Mon Sep 17 00:00:00 2001 From: Kristen Carlson Accardi Date: Tue, 22 Sep 2015 10:51:36 -0700 Subject: [PATCH 050/240] x86/cpufeatures: Correct spelling of the HWP_NOTIFY flag Because noitification just isn't right. Signed-off-by: Kristen Carlson Accardi Acked-by: Rafael J. Wysocki Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Cc: rjw@rjwysocki.net Link: http://lkml.kernel.org/r/1442944296-11737-1-git-send-email-kristen@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 2 +- arch/x86/kernel/cpu/scattered.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index e6cf2ad350d1..9727b3b48bd1 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -193,7 +193,7 @@ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_HWP ( 7*32+ 10) /* "hwp" Intel HWP */ -#define X86_FEATURE_HWP_NOITFY ( 7*32+ 11) /* Intel HWP_NOTIFY */ +#define X86_FEATURE_HWP_NOTIFY ( 7*32+ 11) /* Intel HWP_NOTIFY */ #define X86_FEATURE_HWP_ACT_WINDOW ( 7*32+ 12) /* Intel HWP_ACT_WINDOW */ #define X86_FEATURE_HWP_EPP ( 7*32+13) /* Intel HWP_EPP */ #define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 3d423a101fae..608fb26c7254 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -37,7 +37,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, { X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 }, { X86_FEATURE_HWP, CR_EAX, 7, 0x00000006, 0 }, - { X86_FEATURE_HWP_NOITFY, CR_EAX, 8, 0x00000006, 0 }, + { X86_FEATURE_HWP_NOTIFY, CR_EAX, 8, 0x00000006, 0 }, { X86_FEATURE_HWP_ACT_WINDOW, CR_EAX, 9, 0x00000006, 0 }, { X86_FEATURE_HWP_EPP, CR_EAX,10, 0x00000006, 0 }, { X86_FEATURE_HWP_PKG_REQ, CR_EAX,11, 0x00000006, 0 }, From f929d42ceb18a8acfd47e0e7b7d90b5d49bd9258 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Fri, 4 Sep 2015 12:08:07 +0200 Subject: [PATCH 051/240] xen/blkback: free requests on disconnection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is due to commit 86839c56dee28c315a4c19b7bfee450ccd84cd25 "xen/block: add multi-page ring support" When using an guest under UEFI - after the domain is destroyed the following warning comes from blkback. ------------[ cut here ]------------ WARNING: CPU: 2 PID: 95 at /home/julien/works/linux/drivers/block/xen-blkback/xenbus.c:274 xen_blkif_deferred_free+0x1f4/0x1f8() Modules linked in: CPU: 2 PID: 95 Comm: kworker/2:1 Tainted: G W 4.2.0 #85 Hardware name: APM X-Gene Mustang board (DT) Workqueue: events xen_blkif_deferred_free Call trace: [] dump_backtrace+0x0/0x124 [] show_stack+0x10/0x1c [] dump_stack+0x78/0x98 [] warn_slowpath_common+0x9c/0xd4 [] warn_slowpath_null+0x14/0x20 [] xen_blkif_deferred_free+0x1f0/0x1f8 [] process_one_work+0x160/0x3b4 [] worker_thread+0x140/0x494 [] kthread+0xd8/0xf0 ---[ end trace 6f859b7883c88cdd ]--- Request allocation has been moved to connect_ring, which is called every time blkback connects to the frontend (this can happen multiple times during a blkback instance life cycle). On the other hand, request freeing has not been moved, so it's only called when destroying the backend instance. Due to this mismatch, blkback can allocate the request pool multiple times, without freeing it. In order to fix it, move the freeing of requests to xen_blkif_disconnect to restore the symmetry between request allocation and freeing. Reported-by: Julien Grall Signed-off-by: Roger Pau Monné Tested-by: Julien Grall Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: David Vrabel Cc: xen-devel@lists.xenproject.org CC: stable@vger.kernel.org # 4.2 Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 40 ++++++++++++++++-------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index deb3f001791f..767657565de6 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -212,6 +212,9 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref, static int xen_blkif_disconnect(struct xen_blkif *blkif) { + struct pending_req *req, *n; + int i = 0, j; + if (blkif->xenblkd) { kthread_stop(blkif->xenblkd); wake_up(&blkif->shutdown_wq); @@ -238,25 +241,6 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) /* Remove all persistent grants and the cache of ballooned pages. */ xen_blkbk_free_caches(blkif); - return 0; -} - -static void xen_blkif_free(struct xen_blkif *blkif) -{ - struct pending_req *req, *n; - int i = 0, j; - - xen_blkif_disconnect(blkif); - xen_vbd_free(&blkif->vbd); - - /* Make sure everything is drained before shutting down */ - BUG_ON(blkif->persistent_gnt_c != 0); - BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0); - BUG_ON(blkif->free_pages_num != 0); - BUG_ON(!list_empty(&blkif->persistent_purge_list)); - BUG_ON(!list_empty(&blkif->free_pages)); - BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); - /* Check that there is no request in use */ list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) { list_del(&req->free_list); @@ -272,6 +256,24 @@ static void xen_blkif_free(struct xen_blkif *blkif) } WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); + blkif->nr_ring_pages = 0; + + return 0; +} + +static void xen_blkif_free(struct xen_blkif *blkif) +{ + + xen_blkif_disconnect(blkif); + xen_vbd_free(&blkif->vbd); + + /* Make sure everything is drained before shutting down */ + BUG_ON(blkif->persistent_gnt_c != 0); + BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0); + BUG_ON(blkif->free_pages_num != 0); + BUG_ON(!list_empty(&blkif->persistent_purge_list)); + BUG_ON(!list_empty(&blkif->free_pages)); + BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); kmem_cache_free(xen_blkif_cachep, blkif); } From bda4e0fb3126aca15586d165b5a15a37edc0a984 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 3 Sep 2015 08:18:17 -0600 Subject: [PATCH 052/240] NVMe: Set affinity after allocating request queues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The asynchronous namespace scanning caused affinity hints to be set before its tagset initialized, so there was no cpu mask to set the hint. This patch moves the affinity hint setting to after namespaces are scanned. Reported-by: 김경산 Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index b97fc3fe0916..30758bdf69ea 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -2439,6 +2439,22 @@ static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn) list_sort(NULL, &dev->namespaces, ns_cmp); } +static void nvme_set_irq_hints(struct nvme_dev *dev) +{ + struct nvme_queue *nvmeq; + int i; + + for (i = 0; i < dev->online_queues; i++) { + nvmeq = dev->queues[i]; + + if (!nvmeq->tags || !(*nvmeq->tags)) + continue; + + irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector, + blk_mq_tags_cpumask(*nvmeq->tags)); + } +} + static void nvme_dev_scan(struct work_struct *work) { struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work); @@ -2450,6 +2466,7 @@ static void nvme_dev_scan(struct work_struct *work) return; nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn)); kfree(ctrl); + nvme_set_irq_hints(dev); } /* @@ -2953,22 +2970,6 @@ static const struct file_operations nvme_dev_fops = { .compat_ioctl = nvme_dev_ioctl, }; -static void nvme_set_irq_hints(struct nvme_dev *dev) -{ - struct nvme_queue *nvmeq; - int i; - - for (i = 0; i < dev->online_queues; i++) { - nvmeq = dev->queues[i]; - - if (!nvmeq->tags || !(*nvmeq->tags)) - continue; - - irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector, - blk_mq_tags_cpumask(*nvmeq->tags)); - } -} - static int nvme_dev_start(struct nvme_dev *dev) { int result; @@ -3010,8 +3011,6 @@ static int nvme_dev_start(struct nvme_dev *dev) if (result) goto free_tags; - nvme_set_irq_hints(dev); - dev->event_limit = 1; return result; @@ -3062,7 +3061,6 @@ static int nvme_dev_resume(struct nvme_dev *dev) } else { nvme_unfreeze_queues(dev); nvme_dev_add(dev); - nvme_set_irq_hints(dev); } return 0; } From 680168a58a9315e1301f4ebb062244470d4919b0 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 22 Sep 2015 09:35:31 -0700 Subject: [PATCH 053/240] PM / OPP: of_property_count_u32_elems() can return errors of_property_count_u32_elems() will never return 0, but a -ve error value of a positive count. And so the current !count check is wrong. Also, a missing "opp-microvolt" property isn't a problem and so we need to do of_find_property() separately to confirm that. Fixes: 274659029c9d (PM / OPP: Add support to parse "operating-points-v2" bindings) Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 28cd75c535b0..1194669c2bb5 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -892,10 +892,17 @@ static int opp_get_microvolt(struct dev_pm_opp *opp, struct device *dev) u32 microvolt[3] = {0}; int count, ret; - count = of_property_count_u32_elems(opp->np, "opp-microvolt"); - if (!count) + /* Missing property isn't a problem, but an invalid entry is */ + if (!of_find_property(opp->np, "opp-microvolt", NULL)) return 0; + count = of_property_count_u32_elems(opp->np, "opp-microvolt"); + if (count < 0) { + dev_err(dev, "%s: Invalid opp-microvolt property (%d)\n", + __func__, count); + return count; + } + /* There can be one or three elements here */ if (count != 1 && count != 3) { dev_err(dev, "%s: Invalid number of elements in opp-microvolt property (%d)\n", From a8360062ccfb4b891d3013d0e55826c8bcb02bfb Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 18 Sep 2015 03:08:40 +0200 Subject: [PATCH 054/240] PCI / PM: Update runtime PM documentation for PCI devices Section 3.2 "Device Runtime Power Management" of pci.txt has become outdated, so update it to correctly reflect the current code flow. Also update the comment in local_pci_probe() to document the fact that pm_runtime_put_noidle() is not the only runtime PM helper function that can be used to decrement the device's runtime PM usage counter in .probe(). Signed-off-by: Rafael J. Wysocki Acked-by: Alan Stern --- Documentation/power/pci.txt | 51 +++++++++++++++++++++++++++---------- drivers/pci/pci-driver.c | 7 ++--- 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt index 62328d76b55b..b0e911e0e8f5 100644 --- a/Documentation/power/pci.txt +++ b/Documentation/power/pci.txt @@ -979,20 +979,45 @@ every time right after the runtime_resume() callback has returned (alternatively, the runtime_suspend() callback will have to check if the device should really be suspended and return -EAGAIN if that is not the case). -The runtime PM of PCI devices is disabled by default. It is also blocked by -pci_pm_init() that runs the pm_runtime_forbid() helper function. If a PCI -driver implements the runtime PM callbacks and intends to use the runtime PM -framework provided by the PM core and the PCI subsystem, it should enable this -feature by executing the pm_runtime_enable() helper function. However, the -driver should not call the pm_runtime_allow() helper function unblocking -the runtime PM of the device. Instead, it should allow user space or some -platform-specific code to do that (user space can do it via sysfs), although -once it has called pm_runtime_enable(), it must be prepared to handle the +The runtime PM of PCI devices is enabled by default by the PCI core. PCI +device drivers do not need to enable it and should not attempt to do so. +However, it is blocked by pci_pm_init() that runs the pm_runtime_forbid() +helper function. In addition to that, the runtime PM usage counter of +each PCI device is incremented by local_pci_probe() before executing the +probe callback provided by the device's driver. + +If a PCI driver implements the runtime PM callbacks and intends to use the +runtime PM framework provided by the PM core and the PCI subsystem, it needs +to decrement the device's runtime PM usage counter in its probe callback +function. If it doesn't do that, the counter will always be different from +zero for the device and it will never be runtime-suspended. The simplest +way to do that is by calling pm_runtime_put_noidle(), but if the driver +wants to schedule an autosuspend right away, for example, it may call +pm_runtime_put_autosuspend() instead for this purpose. Generally, it +just needs to call a function that decrements the devices usage counter +from its probe routine to make runtime PM work for the device. + +It is important to remember that the driver's runtime_suspend() callback +may be executed right after the usage counter has been decremented, because +user space may already have cuased the pm_runtime_allow() helper function +unblocking the runtime PM of the device to run via sysfs, so the driver must +be prepared to cope with that. + +The driver itself should not call pm_runtime_allow(), though. Instead, it +should let user space or some platform-specific code do that (user space can +do it via sysfs as stated above), but it must be prepared to handle the runtime PM of the device correctly as soon as pm_runtime_allow() is called -(which may happen at any time). [It also is possible that user space causes -pm_runtime_allow() to be called via sysfs before the driver is loaded, so in -fact the driver has to be prepared to handle the runtime PM of the device as -soon as it calls pm_runtime_enable().] +(which may happen at any time, even before the driver is loaded). + +When the driver's remove callback runs, it has to balance the decrementation +of the device's runtime PM usage counter at the probe time. For this reason, +if it has decremented the counter in its probe callback, it must run +pm_runtime_get_noresume() in its remove callback. [Since the core carries +out a runtime resume of the device and bumps up the device's usage counter +before running the driver's remove callback, the runtime PM of the device +is effectively disabled for the duration of the remove execution and all +runtime PM helper functions incrementing the device's usage counter are +then effectively equivalent to pm_runtime_get_noresume().] The runtime PM framework works by processing requests to suspend or resume devices, or to check if they are idle (in which cases it is reasonable to diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index dd652f2ae03d..108a3118ace7 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -299,9 +299,10 @@ static long local_pci_probe(void *_ddi) * Unbound PCI devices are always put in D0, regardless of * runtime PM status. During probe, the device is set to * active and the usage count is incremented. If the driver - * supports runtime PM, it should call pm_runtime_put_noidle() - * in its probe routine and pm_runtime_get_noresume() in its - * remove routine. + * supports runtime PM, it should call pm_runtime_put_noidle(), + * or any other runtime PM helper function decrementing the usage + * count, in its probe routine and pm_runtime_get_noresume() in + * its remove routine. */ pm_runtime_get_sync(dev); pci_dev->driver = pci_drv; From aebf5a67db8dbacbc624b9c652b81f5460b15eff Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Mon, 21 Sep 2015 11:06:32 +0200 Subject: [PATCH 055/240] dmaengine: pxa_dma: fix initial list move Since the commit to have an allocated list of virtual descriptors was reverted, the pxa_dma driver is broken, as it assumes the descriptor is placed on the allocated list upon allocation. Fix the issue in pxa_dma by making an allocated virtual descriptor a singleton. Fixes: 8c8fe97b2b8a ("Revert "dmaengine: virt-dma: don't always free descriptor upon completion"") Signed-off-by: Robert Jarzmik Signed-off-by: Vinod Koul --- drivers/dma/pxa_dma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index 5cb61ce01036..c6723ecd5848 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -887,6 +887,7 @@ pxad_tx_prep(struct virt_dma_chan *vc, struct virt_dma_desc *vd, struct dma_async_tx_descriptor *tx; struct pxad_chan *chan = container_of(vc, struct pxad_chan, vc); + INIT_LIST_HEAD(&vd->node); tx = vchan_tx_prep(vc, vd, tx_flags); tx->tx_submit = pxad_tx_submit; dev_dbg(&chan->vc.chan.dev->device, From c1492b4c541e3a382b60f1b5879cd3c4d246ad31 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Thu, 24 Sep 2015 16:00:17 +0200 Subject: [PATCH 056/240] dmaengine: xgene-dma: fix handling xgene_dma_get_ring_size result The function can return negative value. The problem has been detected using proposed semantic patch scripts/coccinelle/tests/assign_signed_to_unsigned.cocci [1]. [1]: http://permalink.gmane.org/gmane.linux.kernel/2046107 Signed-off-by: Andrzej Hajda Signed-off-by: Vinod Koul --- drivers/dma/xgene-dma.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c index b23e8d52d126..4c5dddaad2d5 100644 --- a/drivers/dma/xgene-dma.c +++ b/drivers/dma/xgene-dma.c @@ -1421,15 +1421,18 @@ static int xgene_dma_create_ring_one(struct xgene_dma_chan *chan, struct xgene_dma_ring *ring, enum xgene_dma_ring_cfgsize cfgsize) { + int ret; + /* Setup DMA ring descriptor variables */ ring->pdma = chan->pdma; ring->cfgsize = cfgsize; ring->num = chan->pdma->ring_num++; ring->id = XGENE_DMA_RING_ID_GET(ring->owner, ring->buf_num); - ring->size = xgene_dma_get_ring_size(chan, cfgsize); - if (ring->size <= 0) - return ring->size; + ret = xgene_dma_get_ring_size(chan, cfgsize); + if (ret <= 0) + return ret; + ring->size = ret; /* Allocate memory for DMA ring descriptor */ ring->desc_vaddr = dma_zalloc_coherent(chan->dev, ring->size, From 0b23a1ece9be2c3e04c3b8d3594a1ada1fa1ae50 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 14 Sep 2015 11:55:36 +0300 Subject: [PATCH 057/240] dmaengine: idma64: improve residue estimation The residue calculation may provide a wrong estimation when the transfer is started. There are possible scenarios we have to separate: 1) the transfer is not started yet; residue is equal to the total length; 2) the transfer is just started (first chunk is ongoing); residue is equal to the total length without already transfered bytes; 3) the transfer is ongoing and we already sent few chunks of data; residue is equal to the total length without fully transfered chunks and already sent bytes. Mistakenly the calculation in cases 2) and 3) was done in the similar way and the result is equal to -bytes that have been transfered, i.e. quite big since size_t type can't keep negative values. Rewrite the calculation algorithm to be one pass and have a correct result. Besides above in case user asks for a status of the active DMA descriptor without pausing an ongoing transfer the residue will be estimated based on the register value, though it's still racy. Since the transfer is active the value is continuously being changed. Here we have to read two registers at a time. To minimize an error make those reads close to each other. Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- drivers/dma/idma64.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c index 18c14e1f1414..48d6d9e94f67 100644 --- a/drivers/dma/idma64.c +++ b/drivers/dma/idma64.c @@ -355,23 +355,23 @@ static size_t idma64_active_desc_size(struct idma64_chan *idma64c) struct idma64_desc *desc = idma64c->desc; struct idma64_hw_desc *hw; size_t bytes = desc->length; - u64 llp; - u32 ctlhi; + u64 llp = channel_readq(idma64c, LLP); + u32 ctlhi = channel_readl(idma64c, CTL_HI); unsigned int i = 0; - llp = channel_readq(idma64c, LLP); do { hw = &desc->hw[i]; - } while ((hw->llp != llp) && (++i < desc->ndesc)); + if (hw->llp == llp) + break; + bytes -= hw->len; + } while (++i < desc->ndesc); if (!i) return bytes; - do { - bytes -= desc->hw[--i].len; - } while (i); + /* The current chunk is not fully transfered yet */ + bytes += desc->hw[--i].len; - ctlhi = channel_readl(idma64c, CTL_HI); return bytes - IDMA64C_CTLH_BLOCK_TS(ctlhi); } From 7e5560a5648ab2bce7199c73b9c2a51b846f5541 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 24 Sep 2015 04:48:53 -0700 Subject: [PATCH 058/240] perf/x86: Change test_aperfmperf() and test_intel() to static Fixes the following sparse warnings: arch/x86/kernel/cpu/perf_event_msr.c:13:6: warning: symbol 'test_aperfmperf' was not declared. Should it be static? arch/x86/kernel/cpu/perf_event_msr.c:18:6: warning: symbol 'test_intel' was not declared. Should it be static? Signed-off-by: Geliang Tang Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/4588e8ab09638458f2451af572827108be3b4a36.1443123796.git.geliangtang@163.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_msr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/kernel/cpu/perf_event_msr.c index 086b12eae794..f32ac13934f2 100644 --- a/arch/x86/kernel/cpu/perf_event_msr.c +++ b/arch/x86/kernel/cpu/perf_event_msr.c @@ -10,12 +10,12 @@ enum perf_msr_id { PERF_MSR_EVENT_MAX, }; -bool test_aperfmperf(int idx) +static bool test_aperfmperf(int idx) { return boot_cpu_has(X86_FEATURE_APERFMPERF); } -bool test_intel(int idx) +static bool test_intel(int idx) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || boot_cpu_data.x86 != 6) From 266fa2b22294909ddf6e7d2f8acfe07adf9fd978 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 24 Sep 2015 11:24:18 -0300 Subject: [PATCH 059/240] perf probe: Use existing routine to look for a kernel module by dso->short_name We have map_groups__find_by_name() to look at the list of modules that are in place for a given machine, so use it instead of traversing the machine dso list, which also includes DSOs for userspace. When merging the user and kernel DSO lists a bug was introduced where 'perf probe' stopped being able to add probes to modules using its short name: # perf probe -m usbnet --add usbnet_start_xmit usbnet_start_xmit is out of .text, skip it. Error: Failed to add events. # With this fix it works again: # perf probe -m usbnet --add usbnet_start_xmit Added new event: probe:usbnet_start_xmit (on usbnet_start_xmit in usbnet) You can now use it in all perf tools, such as: perf record -e probe:usbnet_start_xmit -aR sleep 1 # Reported-by: Wang Nan Acked-by: Masami Hiramatsu Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Fixes: 3d39ac538629 ("perf machine: No need to have two DSOs lists") Link: http://lkml.kernel.org/r/20150924015008.GE1897@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index eb5f18b75402..c6f9af78f6f5 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -270,12 +270,13 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso) int ret = 0; if (module) { - list_for_each_entry(dso, &host_machine->dsos.head, node) { - if (!dso->kernel) - continue; - if (strncmp(dso->short_name + 1, module, - dso->short_name_len - 2) == 0) - goto found; + char module_name[128]; + + snprintf(module_name, sizeof(module_name), "[%s]", module); + map = map_groups__find_by_name(&host_machine->kmaps, MAP__FUNCTION, module_name); + if (map) { + dso = map->dso; + goto found; } pr_debug("Failed to find module %s.\n", module); return -ENOENT; From 597ee40722bf05195f91a41e88e15b79bdab152c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 24 Sep 2015 13:05:21 +0300 Subject: [PATCH 060/240] perf intel-pt: Remove no_force_psb from documentation no_force_psb was dropped as a late change to the kernel driver. Consequently, remove it from the documentation. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443089122-19082-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-pt.txt | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index 4a0501d7a3b4..c94c9de3173e 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -364,21 +364,6 @@ cyc_thresh Specifies how frequently CYC packets are produced - see cyc CYC packets are not requested by default. -no_force_psb This is a driver option and is not in the IA32_RTIT_CTL MSR. - - It stops the driver resetting the byte count to zero whenever - enabling the trace (for example on context switches) which in - turn results in no PSB being forced. However some processors - will produce a PSB anyway. - - In any case, there is still a PSB when the trace is enabled for - the first time. - - no_force_psb can be used to slightly decrease the trace size but - may make it harder for the decoder to recover from errors. - - no_force_psb is not selected by default. - new snapshot option ------------------- From b5cabbcbd157a4bf5a92dfc85134999a3b55342d Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 24 Sep 2015 13:05:22 +0300 Subject: [PATCH 061/240] perf tools: Fix copying of /proc/kcore A copy of /proc/kcore containing the kernel text can be made to the buildid cache. e.g. perf buildid-cache -v -k /proc/kcore To workaround objdump limitations, a copy is also made when annotating against /proc/kcore. The copying process stops working from libelf about v1.62 onwards (the problem was found with v1.63). The cause is that a call to gelf_getphdr() in kcore__add_phdr() fails because additional validation has been added to gelf_getphdr(). The use of gelf_getphdr() is a misguided attempt to get default initialization of the Gelf_Phdr structure. That should not be necessary because every member of the Gelf_Phdr structure is subsequently assigned. So just remove the call to gelf_getphdr(). Similarly, a call to gelf_getehdr() in gelf_kcore__init() can be removed also. Committer notes: Note to stable@kernel.org, from Adrian in the cover letter for this patchkit: The "Fix copying of /proc/kcore" problem goes back to v3.13 if you think it is important enough for stable. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@kernel.org Link: http://lkml.kernel.org/r/1443089122-19082-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index f78ea3dc4c08..475d88d0a1c9 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1271,8 +1271,6 @@ static int kcore__open(struct kcore *kcore, const char *filename) static int kcore__init(struct kcore *kcore, char *filename, int elfclass, bool temp) { - GElf_Ehdr *ehdr; - kcore->elfclass = elfclass; if (temp) @@ -1289,9 +1287,7 @@ static int kcore__init(struct kcore *kcore, char *filename, int elfclass, if (!gelf_newehdr(kcore->elf, elfclass)) goto out_end; - ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr); - if (!ehdr) - goto out_end; + memset(&kcore->ehdr, 0, sizeof(GElf_Ehdr)); return 0; @@ -1348,23 +1344,18 @@ static int kcore__copy_hdr(struct kcore *from, struct kcore *to, size_t count) static int kcore__add_phdr(struct kcore *kcore, int idx, off_t offset, u64 addr, u64 len) { - GElf_Phdr gphdr; - GElf_Phdr *phdr; + GElf_Phdr phdr = { + .p_type = PT_LOAD, + .p_flags = PF_R | PF_W | PF_X, + .p_offset = offset, + .p_vaddr = addr, + .p_paddr = 0, + .p_filesz = len, + .p_memsz = len, + .p_align = page_size, + }; - phdr = gelf_getphdr(kcore->elf, idx, &gphdr); - if (!phdr) - return -1; - - phdr->p_type = PT_LOAD; - phdr->p_flags = PF_R | PF_W | PF_X; - phdr->p_offset = offset; - phdr->p_vaddr = addr; - phdr->p_paddr = 0; - phdr->p_filesz = len; - phdr->p_memsz = len; - phdr->p_align = page_size; - - if (!gelf_update_phdr(kcore->elf, idx, phdr)) + if (!gelf_update_phdr(kcore->elf, idx, &phdr)) return -1; return 0; From bb6c96d72879fe1f674a804eb95b891def4ace61 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 24 Sep 2015 10:34:21 +0300 Subject: [PATCH 062/240] xprtrdma: Replace global lkey with lkey local to PD The core API has changed so that devices that do not have a global DMA lkey automatically create an mr, per-PD, and make that lkey available. The global DMA lkey interface is going away in favor of the per-PD DMA lkey. The per-PD DMA lkey is always available. Convert xprtrdma to use the device's per-PD DMA lkey for regbufs, no matter which memory registration scheme is in use. Signed-off-by: Chuck Lever Signed-off-by: Sagi Grimberg Cc: linux-nfs Acked-by: Anna Schumaker Signed-off-by: Doug Ledford --- net/sunrpc/xprtrdma/fmr_ops.c | 19 ------------------- net/sunrpc/xprtrdma/frwr_ops.c | 5 ----- net/sunrpc/xprtrdma/physical_ops.c | 10 +--------- net/sunrpc/xprtrdma/verbs.c | 2 +- net/sunrpc/xprtrdma/xprt_rdma.h | 1 - 5 files changed, 2 insertions(+), 35 deletions(-) diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index cb25c89da623..f1e8dafbd507 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -39,25 +39,6 @@ static int fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_create_data_internal *cdata) { - struct ib_device_attr *devattr = &ia->ri_devattr; - struct ib_mr *mr; - - /* Obtain an lkey to use for the regbufs, which are - * protected from remote access. - */ - if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { - ia->ri_dma_lkey = ia->ri_device->local_dma_lkey; - } else { - mr = ib_get_dma_mr(ia->ri_pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(mr)) { - pr_err("%s: ib_get_dma_mr for failed with %lX\n", - __func__, PTR_ERR(mr)); - return -ENOMEM; - } - ia->ri_dma_lkey = ia->ri_dma_mr->lkey; - ia->ri_dma_mr = mr; - } - return 0; } diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index d6653f5d0830..5318951b3b53 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -189,11 +189,6 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct ib_device_attr *devattr = &ia->ri_devattr; int depth, delta; - /* Obtain an lkey to use for the regbufs, which are - * protected from remote access. - */ - ia->ri_dma_lkey = ia->ri_device->local_dma_lkey; - ia->ri_max_frmr_depth = min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, devattr->max_fast_reg_page_list_len); diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c index 72cf8b15bbb4..617b76f22154 100644 --- a/net/sunrpc/xprtrdma/physical_ops.c +++ b/net/sunrpc/xprtrdma/physical_ops.c @@ -23,7 +23,6 @@ static int physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_create_data_internal *cdata) { - struct ib_device_attr *devattr = &ia->ri_devattr; struct ib_mr *mr; /* Obtain an rkey to use for RPC data payloads. @@ -37,15 +36,8 @@ physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, __func__, PTR_ERR(mr)); return -ENOMEM; } + ia->ri_dma_mr = mr; - - /* Obtain an lkey to use for regbufs. - */ - if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) - ia->ri_dma_lkey = ia->ri_device->local_dma_lkey; - else - ia->ri_dma_lkey = ia->ri_dma_mr->lkey; - return 0; } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 682996779970..eb081ad05e33 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1252,7 +1252,7 @@ rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags) goto out_free; iov->length = size; - iov->lkey = ia->ri_dma_lkey; + iov->lkey = ia->ri_pd->local_dma_lkey; rb->rg_size = size; rb->rg_owner = NULL; return rb; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 02512221b8bc..c09414e6f91b 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -65,7 +65,6 @@ struct rpcrdma_ia { struct rdma_cm_id *ri_id; struct ib_pd *ri_pd; struct ib_mr *ri_dma_mr; - u32 ri_dma_lkey; struct completion ri_done; int ri_async_rc; unsigned int ri_max_frmr_depth; From 3cffd930171518821595839c5ce7036894ef0d74 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 24 Sep 2015 10:34:22 +0300 Subject: [PATCH 063/240] IB/iser: Add module parameter for always register memory This module parameter forces memory registration even for a continuous memory region. It is true by default as sending an all-physical rkey with remote permissions might be insecure. Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/iser/iscsi_iser.c | 5 +++++ drivers/infiniband/ulp/iser/iscsi_iser.h | 1 + drivers/infiniband/ulp/iser/iser_memory.c | 18 ++++++++++++------ drivers/infiniband/ulp/iser/iser_verbs.c | 21 +++++++++++++-------- 4 files changed, 31 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 1ace5d83a4d7..f58ff96b6cbb 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -97,6 +97,11 @@ unsigned int iser_max_sectors = ISER_DEF_MAX_SECTORS; module_param_named(max_sectors, iser_max_sectors, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(max_sectors, "Max number of sectors in a single scsi command (default:1024"); +bool iser_always_reg = true; +module_param_named(always_register, iser_always_reg, bool, S_IRUGO); +MODULE_PARM_DESC(always_register, + "Always register memory, even for continuous memory regions (default:true)"); + bool iser_pi_enable = false; module_param_named(pi_enable, iser_pi_enable, bool, S_IRUGO); MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)"); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 86f6583485ef..a5edd6ede692 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -611,6 +611,7 @@ extern int iser_debug_level; extern bool iser_pi_enable; extern int iser_pi_guard; extern unsigned int iser_max_sectors; +extern bool iser_always_reg; int iser_assign_reg_ops(struct iser_device *device); diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 2493cc748db8..4c46d67d37a1 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -803,11 +803,12 @@ static int iser_reg_prot_sg(struct iscsi_iser_task *task, struct iser_data_buf *mem, struct iser_fr_desc *desc, + bool use_dma_key, struct iser_mem_reg *reg) { struct iser_device *device = task->iser_conn->ib_conn.device; - if (mem->dma_nents == 1) + if (use_dma_key) return iser_reg_dma(device, mem, reg); return device->reg_ops->reg_mem(task, mem, &desc->pi_ctx->rsc, reg); @@ -817,11 +818,12 @@ static int iser_reg_data_sg(struct iscsi_iser_task *task, struct iser_data_buf *mem, struct iser_fr_desc *desc, + bool use_dma_key, struct iser_mem_reg *reg) { struct iser_device *device = task->iser_conn->ib_conn.device; - if (mem->dma_nents == 1) + if (use_dma_key) return iser_reg_dma(device, mem, reg); return device->reg_ops->reg_mem(task, mem, &desc->rsc, reg); @@ -836,14 +838,17 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task, struct iser_mem_reg *reg = &task->rdma_reg[dir]; struct iser_mem_reg *data_reg; struct iser_fr_desc *desc = NULL; + bool use_dma_key; int err; err = iser_handle_unaligned_buf(task, mem, dir); if (unlikely(err)) return err; - if (mem->dma_nents != 1 || - scsi_get_prot_op(task->sc) != SCSI_PROT_NORMAL) { + use_dma_key = (mem->dma_nents == 1 && !iser_always_reg && + scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL); + + if (!use_dma_key) { desc = device->reg_ops->reg_desc_get(ib_conn); reg->mem_h = desc; } @@ -853,7 +858,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task, else data_reg = &task->desc.data_reg; - err = iser_reg_data_sg(task, mem, desc, data_reg); + err = iser_reg_data_sg(task, mem, desc, use_dma_key, data_reg); if (unlikely(err)) goto err_reg; @@ -866,7 +871,8 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task, if (unlikely(err)) goto err_reg; - err = iser_reg_prot_sg(task, mem, desc, prot_reg); + err = iser_reg_prot_sg(task, mem, desc, + use_dma_key, prot_reg); if (unlikely(err)) goto err_reg; } diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index ae70cc1463ac..85132d867bc8 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -133,11 +133,15 @@ static int iser_create_device_ib_res(struct iser_device *device) (unsigned long)comp); } - device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ); - if (IS_ERR(device->mr)) - goto dma_mr_err; + if (!iser_always_reg) { + int access = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ; + + device->mr = ib_get_dma_mr(device->pd, access); + if (IS_ERR(device->mr)) + goto dma_mr_err; + } INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, iser_event_handler); @@ -147,7 +151,8 @@ static int iser_create_device_ib_res(struct iser_device *device) return 0; handler_err: - ib_dereg_mr(device->mr); + if (device->mr) + ib_dereg_mr(device->mr); dma_mr_err: for (i = 0; i < device->comps_used; i++) tasklet_kill(&device->comps[i].tasklet); @@ -173,7 +178,6 @@ static int iser_create_device_ib_res(struct iser_device *device) static void iser_free_device_ib_res(struct iser_device *device) { int i; - BUG_ON(device->mr == NULL); for (i = 0; i < device->comps_used; i++) { struct iser_comp *comp = &device->comps[i]; @@ -184,7 +188,8 @@ static void iser_free_device_ib_res(struct iser_device *device) } (void)ib_unregister_event_handler(&device->event_handler); - (void)ib_dereg_mr(device->mr); + if (device->mr) + (void)ib_dereg_mr(device->mr); ib_dealloc_pd(device->pd); kfree(device->comps); From c6790aa9f4fdc26b1246ba36da2fd749663beb65 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 24 Sep 2015 10:34:23 +0300 Subject: [PATCH 064/240] IB/mlx5: Remove support for IB_DEVICE_LOCAL_DMA_LKEY Commit 96249d70dd70 ("IB/core: Guarantee that a local_dma_lkey is available") allows ULPs that make use of the local dma key to keep working as before by allocating a DMA MR with local permissions and converted these consumers to use the MR associated with the PD rather then device->local_dma_lkey. ConnectIB has some known issues with memory registration using the local_dma_lkey (SEND, RDMA, RECV seems to work ok). Thus don't expose support for it (remove device->local_dma_lkey setting), and take advantage of the above commit such that no regression is introduced to working systems. The local_dma_lkey support will be restored in CX4 depending on FW capability query. Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 10 +-------- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 22 -------------------- include/linux/mlx5/device.h | 11 ---------- include/linux/mlx5/driver.h | 1 - 4 files changed, 1 insertion(+), 43 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 41d6911e244e..0ab9625911a1 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -245,7 +245,6 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; if (MLX5_CAP_GEN(mdev, apm)) props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; - props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; if (MLX5_CAP_GEN(mdev, xrc)) props->device_cap_flags |= IB_DEVICE_XRC; props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; @@ -1245,18 +1244,10 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) struct ib_srq_init_attr attr; struct mlx5_ib_dev *dev; struct ib_cq_init_attr cq_attr = {.cqe = 1}; - u32 rsvd_lkey; int ret = 0; dev = container_of(devr, struct mlx5_ib_dev, devr); - ret = mlx5_core_query_special_context(dev->mdev, &rsvd_lkey); - if (ret) { - pr_err("Failed to query special context %d\n", ret); - return ret; - } - dev->ib_dev.local_dma_lkey = rsvd_lkey; - devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); if (IS_ERR(devr->p0)) { ret = PTR_ERR(devr->p0); @@ -1418,6 +1409,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.node_type = RDMA_NODE_IB_CA; + dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; dev->num_ports = MLX5_CAP_GEN(mdev, num_ports); dev->ib_dev.phys_port_cnt = dev->num_ports; dev->ib_dev.num_comp_vectors = diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index aa0d5ffe92d8..9335e5ae18cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -200,25 +200,3 @@ int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev) return err; } - -int mlx5_core_query_special_context(struct mlx5_core_dev *dev, u32 *rsvd_lkey) -{ - struct mlx5_cmd_query_special_contexts_mbox_in in; - struct mlx5_cmd_query_special_contexts_mbox_out out; - int err; - - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); - - *rsvd_lkey = be32_to_cpu(out.resd_lkey); - - return err; -} -EXPORT_SYMBOL(mlx5_core_query_special_context); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8eb3b19af2a4..250b1ff8b48d 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -402,17 +402,6 @@ struct mlx5_cmd_teardown_hca_mbox_out { u8 rsvd[8]; }; -struct mlx5_cmd_query_special_contexts_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_cmd_query_special_contexts_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 dump_fill_mkey; - __be32 resd_lkey; -}; - struct mlx5_cmd_layout { u8 type; u8 rsvd0[3]; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 27b53f9a24ad..8b6d6f2154a4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -845,7 +845,6 @@ void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol); int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); -int mlx5_core_query_special_context(struct mlx5_core_dev *dev, u32 *rsvd_lkey); struct mlx5_profile { u64 mask; From 81fb5e26a9d05674c048803a20cb8f08a1b1c9b8 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 24 Sep 2015 10:34:24 +0300 Subject: [PATCH 065/240] IB/mlx5: Remove pa_lkey usages Since mlx5 driver cannot rely on registration using the reserved lkey (global_dma_lkey) it used to allocate a private physical address lkey for each allocated pd. Commit 96249d70dd70 ("IB/core: Guarantee that a local_dma_lkey is available") just does it in the core layer so we can go ahead and use that. Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 57 ---------------------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 - drivers/infiniband/hw/mlx5/qp.c | 4 +- 3 files changed, 1 insertion(+), 62 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0ab9625911a1..f1ccd40beae9 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -794,53 +794,6 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm return 0; } -static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn) -{ - struct mlx5_create_mkey_mbox_in *in; - struct mlx5_mkey_seg *seg; - struct mlx5_core_mr mr; - int err; - - in = kzalloc(sizeof(*in), GFP_KERNEL); - if (!in) - return -ENOMEM; - - seg = &in->seg; - seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA; - seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); - seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - seg->start_addr = 0; - - err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in), - NULL, NULL, NULL); - if (err) { - mlx5_ib_warn(dev, "failed to create mkey, %d\n", err); - goto err_in; - } - - kfree(in); - *key = mr.key; - - return 0; - -err_in: - kfree(in); - - return err; -} - -static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key) -{ - struct mlx5_core_mr mr; - int err; - - memset(&mr, 0, sizeof(mr)); - mr.key = key; - err = mlx5_core_destroy_mkey(dev->mdev, &mr); - if (err) - mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key); -} - static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) @@ -866,13 +819,6 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, kfree(pd); return ERR_PTR(-EFAULT); } - } else { - err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn); - if (err) { - mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn); - kfree(pd); - return ERR_PTR(err); - } } return &pd->ibpd; @@ -883,9 +829,6 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd) struct mlx5_ib_dev *mdev = to_mdev(pd->device); struct mlx5_ib_pd *mpd = to_mpd(pd); - if (!pd->uobject) - free_pa_mkey(mdev, mpd->pa_lkey); - mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn); kfree(mpd); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index bb8cda79e881..22123b79d550 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -103,7 +103,6 @@ static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibuconte struct mlx5_ib_pd { struct ib_pd ibpd; u32 pdn; - u32 pa_lkey; }; /* Use macros here so that don't have to duplicate @@ -213,7 +212,6 @@ struct mlx5_ib_qp { int uuarn; int create_type; - u32 pa_lkey; /* Store signature errors */ bool signature_en; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index c745c6c5e10d..6f521a3418e8 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -925,8 +925,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, err = create_kernel_qp(dev, init_attr, qp, &in, &inlen); if (err) mlx5_ib_dbg(dev, "err %d\n", err); - else - qp->pa_lkey = to_mpd(pd)->pa_lkey; } if (err) @@ -2045,7 +2043,7 @@ static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg, mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm); dseg->addr = cpu_to_be64(mfrpl->map); dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64)); - dseg->lkey = cpu_to_be32(pd->pa_lkey); + dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); } static __be32 send_ieth(struct ib_send_wr *wr) From 94c4554ba07adbdde396748ee7ae01e86cf2d8d7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 25 Sep 2015 16:30:08 +0100 Subject: [PATCH 066/240] KEYS: Fix race between key destruction and finding a keyring by name There appears to be a race between: (1) key_gc_unused_keys() which frees key->security and then calls keyring_destroy() to unlink the name from the name list (2) find_keyring_by_name() which calls key_permission(), thus accessing key->security, on a key before checking to see whether the key usage is 0 (ie. the key is dead and might be cleaned up). Fix this by calling ->destroy() before cleaning up the core key data - including key->security. Reported-by: Petr Matousek Signed-off-by: David Howells --- security/keys/gc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/security/keys/gc.c b/security/keys/gc.c index c7952375ac53..39eac1fd5706 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -134,6 +134,10 @@ static noinline void key_gc_unused_keys(struct list_head *keys) kdebug("- %u", key->serial); key_check(key); + /* Throw away the key data */ + if (key->type->destroy) + key->type->destroy(key); + security_key_free(key); /* deal with the user's key tracking and quota */ @@ -148,10 +152,6 @@ static noinline void key_gc_unused_keys(struct list_head *keys) if (test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) atomic_dec(&key->user->nikeys); - /* now throw away the key memory */ - if (key->type->destroy) - key->type->destroy(key); - key_user_put(key->user); kfree(key->description); From 292c6091353475d94e2cfb49c29906e88ee967ba Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 25 Sep 2015 16:31:45 +0100 Subject: [PATCH 067/240] KEYS: Remove unnecessary header #inclusions from extract-cert.c Remove headers #included unnecessarily from extract-cert.c lest they cause compilation of the tool to fail against an older OpenSSL library. Signed-off-by: David Howells Acked-by: David Woodhouse --- scripts/extract-cert.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/extract-cert.c b/scripts/extract-cert.c index 6ce5945a0b89..b071bf476fea 100644 --- a/scripts/extract-cert.c +++ b/scripts/extract-cert.c @@ -17,13 +17,9 @@ #include #include #include -#include #include -#include #include -#include #include -#include #include #include From e7c87bef7de2417b219d4dbfe8d33a0098a8df54 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 25 Sep 2015 16:31:46 +0100 Subject: [PATCH 068/240] X.509: Don't strip leading 00's from key ID when constructing key description Don't strip leading zeros from the crypto key ID when using it to construct the struct key description as the signature in kernels up to and including 4.2 matched this aspect of the key. This means that 1 in 256 keys won't actually match if their key ID begins with 00. The key ID is stored in the module signature as binary and so must be converted to text in order to invoke request_key() - but it isn't stripped at this point. Something like this is likely to be observed in dmesg when the key is loaded: [ 1.572423] Loaded X.509 cert 'Build time autogenerated kernel key: 62a7c3d2da278be024da4af8652c071f3fea33' followed by this when we try and use it: [ 1.646153] Request for unknown module key 'Build time autogenerated kernel key: 0062a7c3d2da278be024da4af8652c071f3fea33' err -11 The 'Loaded' line should show an extra '00' on the front of the hex string. This problem should not affect 4.3-rc1 and onwards because there the key should be matched on one of its auxiliary identities rather than the key struct's description string. Reported-by: Arjan van de Ven Reported-by: Andy Whitcroft Signed-off-by: David Howells --- crypto/asymmetric_keys/x509_public_key.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index 6d88dd15c98d..197096632412 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -332,10 +332,6 @@ static int x509_key_preparse(struct key_preparsed_payload *prep) srlen = cert->raw_serial_size; q = cert->raw_serial; } - if (srlen > 1 && *q == 0) { - srlen--; - q++; - } ret = -ENOMEM; desc = kmalloc(sulen + 2 + srlen * 2 + 1, GFP_KERNEL); From 283e8ba2dfde54f8f27d7d0f459a07de79a39d55 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 25 Sep 2015 16:31:46 +0100 Subject: [PATCH 069/240] MODSIGN: Change from CMS to PKCS#7 signing if the openssl is too old The sign-file.c program actually uses CMS rather than PKCS#7 to sign a file since that allows the target X.509 certificate to be specified by subjectKeyId rather than by issuer + serialNumber. However, older versions of the OpenSSL crypto library (such as may be found in CentOS 5.11) don't support CMS. Assume everything prior to OpenSSL-1.0.0 doesn't support CMS and switch to using PKCS#7 in that case. Further, the pre-1.0.0 OpenSSL only supports PKCS#7 signing with SHA1, so give an error from the sign-file script if the caller requests anything other than SHA1. The compiler gives the following error with an OpenSSL crypto library that's too old: HOSTCC scripts/sign-file scripts/sign-file.c:23:25: fatal error: openssl/cms.h: No such file or directory #include Reported-by: Vinson Lee Signed-off-by: David Howells Acked-by: David Woodhouse --- Documentation/Changes | 2 +- scripts/sign-file.c | 94 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 78 insertions(+), 18 deletions(-) diff --git a/Documentation/Changes b/Documentation/Changes index 6d8863004858..f447f0516f07 100644 --- a/Documentation/Changes +++ b/Documentation/Changes @@ -43,7 +43,7 @@ o udev 081 # udevd --version o grub 0.93 # grub --version || grub-install --version o mcelog 0.6 # mcelog --version o iptables 1.4.2 # iptables -V -o openssl & libcrypto 1.0.1k # openssl version +o openssl & libcrypto 1.0.0 # openssl version Kernel compilation diff --git a/scripts/sign-file.c b/scripts/sign-file.c index c3899ca4811c..250a7a645033 100755 --- a/scripts/sign-file.c +++ b/scripts/sign-file.c @@ -20,13 +20,34 @@ #include #include #include +#include #include #include #include -#include #include #include +/* + * Use CMS if we have openssl-1.0.0 or newer available - otherwise we have to + * assume that it's not available and its header file is missing and that we + * should use PKCS#7 instead. Switching to the older PKCS#7 format restricts + * the options we have on specifying the X.509 certificate we want. + * + * Further, older versions of OpenSSL don't support manually adding signers to + * the PKCS#7 message so have to accept that we get a certificate included in + * the signature message. Nor do such older versions of OpenSSL support + * signing with anything other than SHA1 - so we're stuck with that if such is + * the case. + */ +#if OPENSSL_VERSION_NUMBER < 0x10000000L +#define USE_PKCS7 +#endif +#ifndef USE_PKCS7 +#include +#else +#include +#endif + struct module_signature { uint8_t algo; /* Public-key crypto algorithm [0] */ uint8_t hash; /* Digest algorithm [0] */ @@ -110,30 +131,42 @@ int main(int argc, char **argv) struct module_signature sig_info = { .id_type = PKEY_ID_PKCS7 }; char *hash_algo = NULL; char *private_key_name, *x509_name, *module_name, *dest_name; - bool save_cms = false, replace_orig; + bool save_sig = false, replace_orig; bool sign_only = false; unsigned char buf[4096]; - unsigned long module_size, cms_size; - unsigned int use_keyid = 0, use_signed_attrs = CMS_NOATTR; + unsigned long module_size, sig_size; + unsigned int use_signed_attrs; const EVP_MD *digest_algo; EVP_PKEY *private_key; +#ifndef USE_PKCS7 CMS_ContentInfo *cms; + unsigned int use_keyid = 0; +#else + PKCS7 *pkcs7; +#endif X509 *x509; BIO *b, *bd = NULL, *bm; int opt, n; - OpenSSL_add_all_algorithms(); ERR_load_crypto_strings(); ERR_clear_error(); key_pass = getenv("KBUILD_SIGN_PIN"); +#ifndef USE_PKCS7 + use_signed_attrs = CMS_NOATTR; +#else + use_signed_attrs = PKCS7_NOATTR; +#endif + do { opt = getopt(argc, argv, "dpk"); switch (opt) { - case 'p': save_cms = true; break; - case 'd': sign_only = true; save_cms = true; break; + case 'p': save_sig = true; break; + case 'd': sign_only = true; save_sig = true; break; +#ifndef USE_PKCS7 case 'k': use_keyid = CMS_USE_KEYID; break; +#endif case -1: break; default: format(); } @@ -157,6 +190,14 @@ int main(int argc, char **argv) replace_orig = true; } +#ifdef USE_PKCS7 + if (strcmp(hash_algo, "sha1") != 0) { + fprintf(stderr, "sign-file: %s only supports SHA1 signing\n", + OPENSSL_VERSION_TEXT); + exit(3); + } +#endif + /* Read the private key and the X.509 cert the PKCS#7 message * will point to. */ @@ -213,7 +254,8 @@ int main(int argc, char **argv) bm = BIO_new_file(module_name, "rb"); ERR(!bm, "%s", module_name); - /* Load the CMS message from the digest buffer. */ +#ifndef USE_PKCS7 + /* Load the signature message from the digest buffer. */ cms = CMS_sign(NULL, NULL, NULL, NULL, CMS_NOCERTS | CMS_PARTIAL | CMS_BINARY | CMS_DETACHED | CMS_STREAM); ERR(!cms, "CMS_sign"); @@ -221,17 +263,31 @@ int main(int argc, char **argv) ERR(!CMS_add1_signer(cms, x509, private_key, digest_algo, CMS_NOCERTS | CMS_BINARY | CMS_NOSMIMECAP | use_keyid | use_signed_attrs), - "CMS_sign_add_signer"); + "CMS_add1_signer"); ERR(CMS_final(cms, bm, NULL, CMS_NOCERTS | CMS_BINARY) < 0, "CMS_final"); - if (save_cms) { - char *cms_name; +#else + pkcs7 = PKCS7_sign(x509, private_key, NULL, bm, + PKCS7_NOCERTS | PKCS7_BINARY | + PKCS7_DETACHED | use_signed_attrs); + ERR(!pkcs7, "PKCS7_sign"); +#endif - ERR(asprintf(&cms_name, "%s.p7s", module_name) < 0, "asprintf"); - b = BIO_new_file(cms_name, "wb"); - ERR(!b, "%s", cms_name); - ERR(i2d_CMS_bio_stream(b, cms, NULL, 0) < 0, "%s", cms_name); + if (save_sig) { + char *sig_file_name; + + ERR(asprintf(&sig_file_name, "%s.p7s", module_name) < 0, + "asprintf"); + b = BIO_new_file(sig_file_name, "wb"); + ERR(!b, "%s", sig_file_name); +#ifndef USE_PKCS7 + ERR(i2d_CMS_bio_stream(b, cms, NULL, 0) < 0, + "%s", sig_file_name); +#else + ERR(i2d_PKCS7_bio(b, pkcs7) < 0, + "%s", sig_file_name); +#endif BIO_free(b); } @@ -247,9 +303,13 @@ int main(int argc, char **argv) ERR(n < 0, "%s", module_name); module_size = BIO_number_written(bd); +#ifndef USE_PKCS7 ERR(i2d_CMS_bio_stream(bd, cms, NULL, 0) < 0, "%s", dest_name); - cms_size = BIO_number_written(bd) - module_size; - sig_info.sig_len = htonl(cms_size); +#else + ERR(i2d_PKCS7_bio(bd, pkcs7) < 0, "%s", dest_name); +#endif + sig_size = BIO_number_written(bd) - module_size; + sig_info.sig_len = htonl(sig_size); ERR(BIO_write(bd, &sig_info, sizeof(sig_info)) < 0, "%s", dest_name); ERR(BIO_write(bd, magic_number, sizeof(magic_number) - 1) < 0, "%s", dest_name); From bd99b2e05c4df2a428e5c9dd338289089d0e26df Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 24 Sep 2015 12:00:05 -0500 Subject: [PATCH 070/240] IB/ipoib: Expire sendonly multicast joins On neighbor expiration, check to see if the neighbor was actually a sendonly multicast join, and if so, leave the multicast group as we expire the neighbor. Signed-off-by: Christoph Lameter Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 ++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 18 ++++++++++++++++++ drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 4 ++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index ca2873698d75..4ff4e5290354 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -548,6 +548,8 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter, int ipoib_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid, int set_qkey); +int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast); +struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid); int ipoib_init_qp(struct net_device *dev); int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 36536ce5a3e2..f74316e679d2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1149,6 +1149,9 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) unsigned long dt; unsigned long flags; int i; + LIST_HEAD(remove_list); + struct ipoib_mcast *mcast, *tmcast; + struct net_device *dev = priv->dev; if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) return; @@ -1176,6 +1179,19 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) lockdep_is_held(&priv->lock))) != NULL) { /* was the neigh idle for two GC periods */ if (time_after(neigh_obsolete, neigh->alive)) { + u8 *mgid = neigh->daddr + 4; + + /* Is this multicast ? */ + if (*mgid == 0xff) { + mcast = __ipoib_mcast_find(dev, mgid); + + if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { + list_del(&mcast->list); + rb_erase(&mcast->rb_node, &priv->multicast_tree); + list_add_tail(&mcast->list, &remove_list); + } + } + rcu_assign_pointer(*np, rcu_dereference_protected(neigh->hnext, lockdep_is_held(&priv->lock))); @@ -1191,6 +1207,8 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) out_unlock: spin_unlock_irqrestore(&priv->lock, flags); + list_for_each_entry_safe(mcast, tmcast, &remove_list, list) + ipoib_mcast_leave(dev, mcast); } static void ipoib_reap_neigh(struct work_struct *work) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 09a1748f9d13..33131001fa24 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -153,7 +153,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, return mcast; } -static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid) +struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct rb_node *n = priv->multicast_tree.rb_node; @@ -675,7 +675,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev) return 0; } -static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) +int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret = 0; From c3852ab0e606212de523c1fb1e15adbf9f431619 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Fri, 25 Sep 2015 14:35:01 -0400 Subject: [PATCH 071/240] IB/ipoib: Make sendonly multicast joins create the mcast group Since IPoIB should, as much as possible, emulate how multicast sends work on Ethernet for regular TCP/IP apps, there should be no requirement to subscribe to a multicast group before your sends are properly sent. However, due to the difference in how multicast is handled on InfiniBand, we must join the appropriate multicast group before we can send to it. Previously we tried not to trigger the auto-create feature of the subnet manager when doing this because we didn't have tracking of these sendonly groups and the auto-creation might never get undone. The previous patch added timing to these sendonly joins and allows us to leave them after a reasonable idle expiration time. So supply all of the information needed to auto-create group. Signed-off-by: Doug Ledford --- .../infiniband/ulp/ipoib/ipoib_multicast.c | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 33131001fa24..136cbefe00f8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -508,17 +508,19 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) rec.hop_limit = priv->broadcast->mcmember.hop_limit; /* - * Historically Linux IPoIB has never properly supported SEND - * ONLY join. It emulated it by not providing all the required - * attributes, which is enough to prevent group creation and - * detect if there are full members or not. A major problem - * with supporting SEND ONLY is detecting when the group is - * auto-destroyed as IPoIB will cache the MLID.. + * Send-only IB Multicast joins do not work at the core + * IB layer yet, so we can't use them here. However, + * we are emulating an Ethernet multicast send, which + * does not require a multicast subscription and will + * still send properly. The most appropriate thing to + * do is to create the group if it doesn't exist as that + * most closely emulates the behavior, from a user space + * application perspecitive, of Ethernet multicast + * operation. For now, we do a full join, maybe later + * when the core IB layers support send only joins we + * will use them. */ -#if 1 - if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) - comp_mask &= ~IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; -#else +#if 0 if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) rec.join_state = 4; #endif From e1a2d49cd5ef551c51be95cc037033e9e582b0cd Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 24 Sep 2015 12:28:44 -0700 Subject: [PATCH 072/240] PM / OPP: Fix typo modifcation -> modification Reported-by: Viresh Kumar Signed-off-by: Stephen Boyd Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 1194669c2bb5..7ae7cd990fbf 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -1070,7 +1070,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_add); * share a common logic which is isolated here. * * Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the - * copy operation, returns 0 if no modifcation was done OR modification was + * copy operation, returns 0 if no modification was done OR modification was * successful. * * Locking: The internal device_opp and opp structures are RCU protected. @@ -1158,7 +1158,7 @@ static int _opp_set_availability(struct device *dev, unsigned long freq, * mutex locking or synchronize_rcu() blocking calls cannot be used. * * Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the - * copy operation, returns 0 if no modifcation was done OR modification was + * copy operation, returns 0 if no modification was done OR modification was * successful. */ int dev_pm_opp_enable(struct device *dev, unsigned long freq) @@ -1184,7 +1184,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_enable); * mutex locking or synchronize_rcu() blocking calls cannot be used. * * Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the - * copy operation, returns 0 if no modifcation was done OR modification was + * copy operation, returns 0 if no modification was done OR modification was * successful. */ int dev_pm_opp_disable(struct device *dev, unsigned long freq) From 15b94fa32a422f4b97dc34e4b7060ec83d10bee5 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Thu, 24 Sep 2015 14:54:40 +0800 Subject: [PATCH 073/240] ACPI / EC: Fix a memory leak issue in acpi_ec_query() When query handler is not found, "result" is actually stil 0, and "struct acpi_ec_query" is not NULL, so the deletion code of "struct acpi_ec_query" at the end of the function cannot be invoked. As a consequence, memory leak can be observed. The issue is introduced by this commit: Commit: 02b771b64b73226052d6e731a0987db3b47281e9 Subject: ACPI / EC: Fix an issue caused by the serialized _Qxx This patch fixes such memory leakage. Fixes: 02b771b64b73 (ACPI / EC: Fix an issue caused by the serialized _Qxx evaluations) Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 2614a839c60d..42c66b64c12c 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1044,8 +1044,10 @@ static int acpi_ec_query(struct acpi_ec *ec, u8 *data) goto err_exit; mutex_lock(&ec->mutex); + result = -ENODATA; list_for_each_entry(handler, &ec->list, node) { if (value == handler->query_bit) { + result = 0; q->handler = acpi_ec_get_query_handler(handler); ec_dbg_evt("Query(0x%02x) scheduled", q->handler->query_bit); From 5ebc76035303016ec41bb752bec156ea9fde7c34 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 17 Sep 2015 14:02:45 +0800 Subject: [PATCH 074/240] ACPI, PCI, irq: Do not share PCI IRQ with ISA IRQ Avoid IRQs occupied by ISA IRQs when allocating IRQs for PCI link devices, otherwise it may cause interrupt storm due to incompatible pin attributes. This issue was triggered on a KVM virtual machine, which 1) uses IRQ9 for SCI in high level mode. 2) defines an PCI interrupt link device (LNKS) with IRQ9 as the only possible irq. 3) has an PCI device referring to link device LNKS. So it causes interrupt storm when enabling the PCI device because PCI IRQ works in low level mode. Signed-off-by: Jiang Liu Acked-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pci_irq.c | 1 + drivers/acpi/pci_link.c | 13 +++++++++++++ include/linux/acpi.h | 1 + 3 files changed, 15 insertions(+) diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 6da0f9beab19..c9336751e5e3 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -372,6 +372,7 @@ static int acpi_isa_register_gsi(struct pci_dev *dev) /* Interrupt Line values above 0xF are forbidden */ if (dev->irq > 0 && (dev->irq <= 0xF) && + acpi_isa_irq_available(dev->irq) && (acpi_isa_irq_to_gsi(dev->irq, &dev_gsi) == 0)) { dev_warn(&dev->dev, "PCI INT %c: no GSI - using ISA IRQ %d\n", pin_name(dev->pin), dev->irq); diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index 3b4ea98e3ea0..246e50d22120 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -553,6 +553,13 @@ static int acpi_pci_link_allocate(struct acpi_pci_link *link) irq = link->irq.possible[i]; } } + if (acpi_irq_penalty[irq] >= PIRQ_PENALTY_ISA_ALWAYS) { + printk(KERN_ERR PREFIX "No IRQ available for %s [%s]. " + "Try pci=noacpi or acpi=off\n", + acpi_device_name(link->device), + acpi_device_bid(link->device)); + return -ENODEV; + } /* Attempt to enable the link device at this IRQ. */ if (acpi_pci_link_set(link, irq)) { @@ -821,6 +828,12 @@ void acpi_penalize_isa_irq(int irq, int active) } } +bool acpi_isa_irq_available(int irq) +{ + return irq >= 0 && (irq >= ARRAY_SIZE(acpi_irq_penalty) || + acpi_irq_penalty[irq] < PIRQ_PENALTY_ISA_ALWAYS); +} + /* * Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict with * PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be use for diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 7235c4851460..43856d19cf4d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -217,6 +217,7 @@ struct pci_dev; int acpi_pci_irq_enable (struct pci_dev *dev); void acpi_penalize_isa_irq(int irq, int active); +bool acpi_isa_irq_available(int irq); void acpi_penalize_sci_irq(int irq, int trigger, int polarity); void acpi_pci_irq_disable (struct pci_dev *dev); From d323efc786910bcc0c8f8b9f97780c70544ac4df Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 17 Sep 2015 14:02:46 +0800 Subject: [PATCH 075/240] ACPI / PCI: Remove duplicated penalty on SCI IRQ Now we have dedicated interface acpi_penalize_sci_irq() to penalize ISA IRQ used by ACPI SCI, so remove duplicated code to penalize ACPI SCI in acpi_irq_penalty_init(). Signed-off-by: Jiang Liu Acked-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pci_link.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index 246e50d22120..7c8408b946ca 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -498,8 +498,7 @@ int __init acpi_irq_penalty_init(void) PIRQ_PENALTY_PCI_POSSIBLE; } } - /* Add a penalty for the SCI */ - acpi_irq_penalty[acpi_gbl_FADT.sci_interrupt] += PIRQ_PENALTY_PCI_USING; + return 0; } From 2866196f294954ce9fa226825c8c1eaa64c7da8a Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Fri, 25 Sep 2015 22:30:24 -0400 Subject: [PATCH 076/240] IB/ipoib: increase the max mcast backlog queue When performing sendonly joins, we queue the packets that trigger the join until the join completes. This may take on the order of hundreds of milliseconds. It is easy to have many more than three packets come in during that time. Expand the maximum queue depth in order to try and prevent dropped packets during the time it takes to join the multicast group. Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 4ff4e5290354..4cd5428a2399 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -80,7 +80,7 @@ enum { IPOIB_NUM_WC = 4, IPOIB_MAX_PATH_REC_QUEUE = 3, - IPOIB_MAX_MCAST_QUEUE = 3, + IPOIB_MAX_MCAST_QUEUE = 64, IPOIB_FLAG_OPER_UP = 0, IPOIB_FLAG_INITIALIZED = 1, From 756357b8e4b072fd5ee86421f794e071a348802b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 25 Sep 2015 21:12:39 -0400 Subject: [PATCH 077/240] tools/power turbostat: IVB Xeon: fix --debug regression Staring in Linux-4.3-rc1, commit 6fb3143b561c ("tools/power turbostat: dump CONFIG_TDP") touches MSR 0x648, which is not supported on IVB-Xeon. This results in "turbostat --debug" exiting on those systems: turbostat: /dev/cpu/2/msr offset 0x648 read failed: Input/output error Remove IVB-Xeon from the list of machines supporting with that MSR. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9655cb49c7cb..e05d30336613 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1926,8 +1926,6 @@ int has_config_tdp(unsigned int family, unsigned int model) switch (model) { case 0x3A: /* IVB */ - case 0x3E: /* IVB Xeon */ - case 0x3C: /* HSW */ case 0x3F: /* HSX */ case 0x45: /* HSW */ From b2b34dfe4d9aa4c468fc363b3b666974783ed1f9 Mon Sep 17 00:00:00 2001 From: Hubert Chrzaniuk Date: Mon, 14 Sep 2015 13:31:00 +0200 Subject: [PATCH 078/240] tools/power turbostat: KNL workaround for %Busy and Avg_MHz KNL increments APERF and MPERF every 1024 clocks. This is compliant with the architecture specification, which requires that only the ratio of APERF/MPERF need be valid. However, turbostat takes advantage of the fact that these two MSRs increment every un-halted clock at the actual and base frequency: AVG_MHz = APERF_delta/measurement_interval %Busy = MPERF_delta/TSC_delta This quirk is needed for these calculations to also work on KNL, which would otherwise show a value 1024x smaller than expected. Signed-off-by: Hubert Chrzaniuk Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index e05d30336613..d333c819fa1f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -71,6 +71,7 @@ unsigned int extra_msr_offset32; unsigned int extra_msr_offset64; unsigned int extra_delta_offset32; unsigned int extra_delta_offset64; +unsigned int aperf_mperf_multiplier = 1; int do_smi; double bclk; unsigned int show_pkg; @@ -984,6 +985,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -3; if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) return -4; + t->aperf = t->aperf * aperf_mperf_multiplier; + t->mperf = t->mperf * aperf_mperf_multiplier; } if (do_smi) { @@ -2541,6 +2544,13 @@ int is_knl(unsigned int family, unsigned int model) return 0; } +unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model) +{ + if (is_knl(family, model)) + return 1024; + return 1; +} + #define SLM_BCLK_FREQS 5 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0}; @@ -2742,6 +2752,9 @@ void process_cpuid() } } + if (has_aperf) + aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model); + do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model); do_snb_cstates = has_snb_msrs(family, model); do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2); From a2b7b74945dbfe5d734eafe8aa52f9f1f8bc6931 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 26 Sep 2015 00:12:38 -0400 Subject: [PATCH 079/240] tools/power turbostat: SKL: Adjust for TSC difference from base frequency On a Skylake with 1500MHz base frequency, the TSC runs at 1512MHz. This is because the TSC is no longer in the n*100 MHz BCLK domain, but is now in the m*24MHz crystal clock domain. (24 MHz * 63 = 1512 MHz) This adds error to several calculations in turbostat, unless the TSC sample sizes are adjusted for this difference. Note that calculations in the time domain are immune from this issue, as the timing sub-system has already calibrated the TSC against a known wall clock. AVG_MHz = APERF_delta/measurement_interval need no adjustment. APERF_delta is in the BCLK domain, and measurement_interval is in the time domain. TSC_MHz = TSC_delta/measurement_interval needs no adjustment -- as we really do want to report the actual measured TSC delta here, and measurement_interval is in the accurate time domain. %Busy = MPERF_delta/TSC_delta needs adjustment to use TSC_BCLK_DOMAIN_delta. TSC_BCLK_DOMAIN_delta = TSC_delta * base_hz / tsc_hz Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval need adjustment as above. No other metrics in turbostat need to be adjusted. Before: CPU Avg_MHz %Busy Bzy_MHz TSC_MHz - 550 24.84 2216 1512 0 2191 98.73 2219 1514 2 0 0.01 2130 1512 1 9 0.43 2016 1512 3 2 0.08 2016 1512 After: CPU Avg_MHz %Busy Bzy_MHz TSC_MHz - 550 25.05 2198 1512 0 2190 99.62 2199 1512 2 0 0.01 2152 1512 1 9 0.46 2000 1512 3 2 0.10 2000 1512 Note that in this example, the "Before" Bzy_MHz was reported as exceeding the 2200 max turbo rate. Also, even a pinned spin loop would not be reported as over 99% busy. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d333c819fa1f..31d756b4ea78 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -74,6 +74,8 @@ unsigned int extra_delta_offset64; unsigned int aperf_mperf_multiplier = 1; int do_smi; double bclk; +double base_hz; +double tsc_tweak = 1.0; unsigned int show_pkg; unsigned int show_core; unsigned int show_cpu; @@ -503,7 +505,7 @@ int format_counters(struct thread_data *t, struct core_data *c, /* %Busy */ if (has_aperf) { if (!skip_c0) - outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc); + outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak); else outp += sprintf(outp, "********"); } @@ -511,7 +513,7 @@ int format_counters(struct thread_data *t, struct core_data *c, /* Bzy_MHz */ if (has_aperf) outp += sprintf(outp, "%8.0f", - 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float); + 1.0 * t->tsc * tsc_tweak / units * t->aperf / t->mperf / interval_float); /* TSC_MHz */ outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); @@ -1152,6 +1154,19 @@ int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; + +static void +calculate_tsc_tweak() +{ + unsigned long long msr; + unsigned int base_ratio; + + get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr); + base_ratio = (msr >> 8) & 0xFF; + base_hz = base_ratio * bclk * 1000000; + tsc_tweak = base_hz / tsc_hz; +} + static void dump_nhm_platform_info(void) { @@ -2773,6 +2788,9 @@ void process_cpuid() if (debug) dump_cstate_pstate_config_info(); + if (has_skl_msrs(family, model)) + calculate_tsc_tweak(); + return; } From af71b980c0d20586cc331b617c47094a8ec7e1db Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 26 Sep 2015 09:49:55 -0400 Subject: [PATCH 080/240] tools/power turbosat: update version number Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 31d756b4ea78..bde0ef1a63df 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3119,7 +3119,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(stderr, "turbostat version 4.7 17-June, 2015" + fprintf(stderr, "turbostat version 4.8 26-Sep, 2015" " - Len Brown \n"); } From f26bf06beae70175eda91e893190784bd1bcc7c0 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Thu, 24 Sep 2015 16:00:15 +0200 Subject: [PATCH 081/240] net: hisilicon: fix handling platform_get_irq result The function can return negative value. The problem has been detected using proposed semantic patch scripts/coccinelle/tests/assign_signed_to_unsigned.cocci [1]. [1]: http://permalink.gmane.org/gmane.linux.kernel/2046107 Signed-off-by: Andrzej Hajda Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hip04_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index cc2d8b4b18e3..253f8ed0537a 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -816,7 +816,7 @@ static int hip04_mac_probe(struct platform_device *pdev) struct net_device *ndev; struct hip04_priv *priv; struct resource *res; - unsigned int irq; + int irq; int ret; ndev = alloc_etherdev(sizeof(struct hip04_priv)); From 72521ea07c0af37b8cb21228368128191c3f1a58 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Thu, 24 Sep 2015 16:00:24 +0200 Subject: [PATCH 082/240] r8169: fix handling rtl_readphy result The function can return negative value. The problem has been detected using proposed semantic patch scripts/coccinelle/tests/assign_signed_to_unsigned.cocci [1]. [1]: http://permalink.gmane.org/gmane.linux.kernel/2046107 Signed-off-by: Andrzej Hajda Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 2b32e0c5a0b4..b4f21232019a 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -6081,7 +6081,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp) { void __iomem *ioaddr = tp->mmio_addr; struct pci_dev *pdev = tp->pci_dev; - u16 rg_saw_cnt; + int rg_saw_cnt; u32 data; static const struct ephy_info e_info_8168h_1[] = { { 0x1e, 0x0800, 0x0001 }, From 7573b94e08aeb5b814e2f277210bdcdf21a83869 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 22 Sep 2015 11:29:09 -0700 Subject: [PATCH 083/240] MIPS: CM: Provide a function to map from CPU to VP ID. The VP ID of a given CPU may not match up with the CPU number used by Linux. For example, if the width of the VP part of the VP ID is wider than log2(number of VPs per core) and the system has multiple cores then this will be the case. Alternatively, if a pre-r6 system implements the MT ASE with multiple VPEs per core and Linux is built without support for the MT ASE then the numbers won't match up either. Provide a function to convert from CPU number to VP ID. Signed-off-by: Paul Burton Acked-by: Thomas Gleixner Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: James Hogan Cc: Markos Chandras Patchwork: https://patchwork.linux-mips.org/patch/11211/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mips-cm.h | 39 +++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h index d75b75e78ebb..1f1927ab4269 100644 --- a/arch/mips/include/asm/mips-cm.h +++ b/arch/mips/include/asm/mips-cm.h @@ -194,6 +194,7 @@ BUILD_CM_RW(reg3_mask, MIPS_CM_GCB_OFS + 0xc8) BUILD_CM_R_(gic_status, MIPS_CM_GCB_OFS + 0xd0) BUILD_CM_R_(cpc_status, MIPS_CM_GCB_OFS + 0xf0) BUILD_CM_RW(l2_config, MIPS_CM_GCB_OFS + 0x130) +BUILD_CM_RW(sys_config2, MIPS_CM_GCB_OFS + 0x150) /* Core Local & Core Other register accessor functions */ BUILD_CM_Cx_RW(reset_release, 0x00) @@ -316,6 +317,10 @@ BUILD_CM_Cx_R_(tcid_8_priority, 0x80) #define CM_GCR_L2_CONFIG_ASSOC_SHF 0 #define CM_GCR_L2_CONFIG_ASSOC_MSK (_ULCAST_(0xff) << 0) +/* GCR_SYS_CONFIG2 register fields */ +#define CM_GCR_SYS_CONFIG2_MAXVPW_SHF 0 +#define CM_GCR_SYS_CONFIG2_MAXVPW_MSK (_ULCAST_(0xf) << 0) + /* GCR_Cx_COHERENCE register fields */ #define CM_GCR_Cx_COHERENCE_COHDOMAINEN_SHF 0 #define CM_GCR_Cx_COHERENCE_COHDOMAINEN_MSK (_ULCAST_(0xff) << 0) @@ -405,4 +410,38 @@ static inline int mips_cm_revision(void) return read_gcr_rev(); } +/** + * mips_cm_max_vp_width() - return the width in bits of VP indices + * + * Return: the width, in bits, of VP indices in fields that combine core & VP + * indices. + */ +static inline unsigned int mips_cm_max_vp_width(void) +{ + extern int smp_num_siblings; + + if (mips_cm_revision() >= CM_REV_CM3) + return read_gcr_sys_config2() & CM_GCR_SYS_CONFIG2_MAXVPW_MSK; + + return smp_num_siblings; +} + +/** + * mips_cm_vp_id() - calculate the hardware VP ID for a CPU + * @cpu: the CPU whose VP ID to calculate + * + * Hardware such as the GIC uses identifiers for VPs which may not match the + * CPU numbers used by Linux. This function calculates the hardware VP + * identifier corresponding to a given CPU. + * + * Return: the VP ID for the CPU. + */ +static inline unsigned int mips_cm_vp_id(unsigned int cpu) +{ + unsigned int core = cpu_data[cpu].core; + unsigned int vp = cpu_vpe_id(&cpu_data[cpu]); + + return (core * mips_cm_max_vp_width()) + vp; +} + #endif /* __MIPS_ASM_MIPS_CM_H__ */ From ab41f6c8620a6e65df4ee19d284c97efdd3d9b63 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 22 Sep 2015 11:29:10 -0700 Subject: [PATCH 084/240] irqchip: mips-gic: Convert CPU numbers to VP IDs. Make use of the mips_cm_vp_id function to convert from Linux CPU numbers to the VP IDs used by hardware, which are not identical in all systems. Without doing so we map interrupts to incorrect VP(E)s. Signed-off-by: Paul Burton Acked-by: Thomas Gleixner Cc: linux-mips@linux-mips.org Cc: Paul Burton Cc: Marc Zyngier Cc: Jason Cooper Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/11212/ Signed-off-by: Ralf Baechle --- drivers/irqchip/irq-mips-gic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index af2f16bb8a94..842a53d3f4ad 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -426,7 +426,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *cpumask, spin_lock_irqsave(&gic_lock, flags); /* Re-route this IRQ */ - gic_map_to_vpe(irq, cpumask_first(&tmp)); + gic_map_to_vpe(irq, mips_cm_vp_id(cpumask_first(&tmp))); /* Update the pcpu_masks */ for (i = 0; i < NR_CPUS; i++) @@ -599,7 +599,7 @@ static __init void gic_ipi_init_one(unsigned int intr, int cpu, GIC_SHARED_TO_HWIRQ(intr)); int i; - gic_map_to_vpe(intr, cpu); + gic_map_to_vpe(intr, mips_cm_vp_id(cpu)); for (i = 0; i < NR_CPUS; i++) clear_bit(intr, pcpu_masks[i].pcpu_mask); set_bit(intr, pcpu_masks[cpu].pcpu_mask); From d77d5ac9c9b5abf45aeb6e12930fab832e5c81d1 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 22 Sep 2015 11:29:11 -0700 Subject: [PATCH 085/240] irqchip: mips-gic: Fix pending & mask reads for MIPS64 with 32b GIC. gic_handle_shared_int reads the GIC interrupt pending & mask registers directly into a bitmap, which is defined as an array of unsigned longs. The GIC pending registers may be 32 bits wide if the CM is older than CM3, regardless of the bit width of the CPU, but for MIPS64 kernels the unsigned longs in the bitmap will be 64 bits wide. In this case we need to perform 2 x 32 bit reads per 64 bit unsigned long in order to avoid missing interrupts. Signed-off-by: Paul Burton Acked-by: Thomas Gleixner Cc: linux-mips@linux-mips.org Cc: Marc Zyngier Cc: Jason Cooper Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/11213/ Signed-off-by: Ralf Baechle --- drivers/irqchip/irq-mips-gic.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 842a53d3f4ad..aeaa061f0dbf 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -320,6 +320,14 @@ static void gic_handle_shared_int(bool chained) intrmask[i] = gic_read(intrmask_reg); pending_reg += gic_reg_step; intrmask_reg += gic_reg_step; + + if (!config_enabled(CONFIG_64BIT) || mips_cm_is64) + continue; + + pending[i] |= (u64)gic_read(pending_reg) << 32; + intrmask[i] |= (u64)gic_read(intrmask_reg) << 32; + pending_reg += gic_reg_step; + intrmask_reg += gic_reg_step; } bitmap_and(pending, pending, intrmask, gic_shared_intrs); From def3ab5d0a0fe53026c2495b054dcc46cf923dac Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 25 Sep 2015 08:59:36 -0700 Subject: [PATCH 086/240] MIPS: mm: compile maar_init unconditionally maar_init was previously only compiled when CONFIG_NEED_MULTIPLE_NODES was not set, which has been fine since it is only called from the standard implementation of mem_init which has the same condition. In preparation for calling it from the SMP startup code on secondary CPUs, move maar_init outside of the #ifndef such that it is always compiled. Signed-off-by: Paul Burton Cc: Markos Chandras Cc: linux-mips@linux-mips.org Cc: Steven J. Hill Cc: David Hildenbrand Cc: linux-kernel@vger.kernel.org Cc: Ingo Molnar Patchwork: https://patchwork.linux-mips.org/patch/11237/ Signed-off-by: Ralf Baechle --- arch/mips/mm/init.c | 126 ++++++++++++++++++++++---------------------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 66d0f49c5bec..074ac5459026 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -252,6 +252,69 @@ void __init fixrange_init(unsigned long start, unsigned long end, #endif } +unsigned __weak platform_maar_init(unsigned num_pairs) +{ + struct maar_config cfg[BOOT_MEM_MAP_MAX]; + unsigned i, num_configured, num_cfg = 0; + phys_addr_t skip; + + for (i = 0; i < boot_mem_map.nr_map; i++) { + switch (boot_mem_map.map[i].type) { + case BOOT_MEM_RAM: + case BOOT_MEM_INIT_RAM: + break; + default: + continue; + } + + skip = 0x10000 - (boot_mem_map.map[i].addr & 0xffff); + + cfg[num_cfg].lower = boot_mem_map.map[i].addr; + cfg[num_cfg].lower += skip; + + cfg[num_cfg].upper = cfg[num_cfg].lower; + cfg[num_cfg].upper += boot_mem_map.map[i].size - 1; + cfg[num_cfg].upper -= skip; + + cfg[num_cfg].attrs = MIPS_MAAR_S; + num_cfg++; + } + + num_configured = maar_config(cfg, num_cfg, num_pairs); + if (num_configured < num_cfg) + pr_warn("Not enough MAAR pairs (%u) for all bootmem regions (%u)\n", + num_pairs, num_cfg); + + return num_configured; +} + +static void maar_init(void) +{ + unsigned num_maars, used, i; + + if (!cpu_has_maar) + return; + + /* Detect the number of MAARs */ + write_c0_maari(~0); + back_to_back_c0_hazard(); + num_maars = read_c0_maari() + 1; + + /* MAARs should be in pairs */ + WARN_ON(num_maars % 2); + + /* Configure the required MAARs */ + used = platform_maar_init(num_maars / 2); + + /* Disable any further MAARs */ + for (i = (used * 2); i < num_maars; i++) { + write_c0_maari(i); + back_to_back_c0_hazard(); + write_c0_maar(0); + back_to_back_c0_hazard(); + } +} + #ifndef CONFIG_NEED_MULTIPLE_NODES int page_is_ram(unsigned long pagenr) { @@ -334,69 +397,6 @@ static inline void mem_init_free_highmem(void) #endif } -unsigned __weak platform_maar_init(unsigned num_pairs) -{ - struct maar_config cfg[BOOT_MEM_MAP_MAX]; - unsigned i, num_configured, num_cfg = 0; - phys_addr_t skip; - - for (i = 0; i < boot_mem_map.nr_map; i++) { - switch (boot_mem_map.map[i].type) { - case BOOT_MEM_RAM: - case BOOT_MEM_INIT_RAM: - break; - default: - continue; - } - - skip = 0x10000 - (boot_mem_map.map[i].addr & 0xffff); - - cfg[num_cfg].lower = boot_mem_map.map[i].addr; - cfg[num_cfg].lower += skip; - - cfg[num_cfg].upper = cfg[num_cfg].lower; - cfg[num_cfg].upper += boot_mem_map.map[i].size - 1; - cfg[num_cfg].upper -= skip; - - cfg[num_cfg].attrs = MIPS_MAAR_S; - num_cfg++; - } - - num_configured = maar_config(cfg, num_cfg, num_pairs); - if (num_configured < num_cfg) - pr_warn("Not enough MAAR pairs (%u) for all bootmem regions (%u)\n", - num_pairs, num_cfg); - - return num_configured; -} - -static void maar_init(void) -{ - unsigned num_maars, used, i; - - if (!cpu_has_maar) - return; - - /* Detect the number of MAARs */ - write_c0_maari(~0); - back_to_back_c0_hazard(); - num_maars = read_c0_maari() + 1; - - /* MAARs should be in pairs */ - WARN_ON(num_maars % 2); - - /* Configure the required MAARs */ - used = platform_maar_init(num_maars / 2); - - /* Disable any further MAARs */ - for (i = (used * 2); i < num_maars; i++) { - write_c0_maari(i); - back_to_back_c0_hazard(); - write_c0_maar(0); - back_to_back_c0_hazard(); - } -} - void __init mem_init(void) { #ifdef CONFIG_HIGHMEM From 651ca7f4dab77f07fdac9cfb68bcab6bd2b7f827 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 25 Sep 2015 08:59:37 -0700 Subject: [PATCH 087/240] MIPS: print MAAR configuration during boot Verifying that the MAAR configuration is as expected is useful when debugging the performance of a system. Print out the memory regions configured via MAAR along with their attributes. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: Steven J. Hill Cc: David Hildenbrand Cc: linux-kernel@vger.kernel.org Cc: Peter Zijlstra (Intel) Patchwork: https://patchwork.linux-mips.org/patch/11238/ Signed-off-by: Ralf Baechle --- arch/mips/mm/init.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 074ac5459026..023c164b9eb6 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -291,6 +291,7 @@ unsigned __weak platform_maar_init(unsigned num_pairs) static void maar_init(void) { unsigned num_maars, used, i; + phys_addr_t lower, upper, attr; if (!cpu_has_maar) return; @@ -313,6 +314,34 @@ static void maar_init(void) write_c0_maar(0); back_to_back_c0_hazard(); } + + pr_info("MAAR configuration:\n"); + for (i = 0; i < num_maars; i += 2) { + write_c0_maari(i); + back_to_back_c0_hazard(); + upper = read_c0_maar(); + + write_c0_maari(i + 1); + back_to_back_c0_hazard(); + lower = read_c0_maar(); + + attr = lower & upper; + lower = (lower & MIPS_MAAR_ADDR) << 4; + upper = ((upper & MIPS_MAAR_ADDR) << 4) | 0xffff; + + pr_info(" [%d]: ", i / 2); + if (!(attr & MIPS_MAAR_V)) { + pr_cont("disabled\n"); + continue; + } + + pr_cont("%pa-%pa", &lower, &upper); + + if (attr & MIPS_MAAR_S) + pr_cont(" speculate"); + + pr_cont("\n"); + } } #ifndef CONFIG_NEED_MULTIPLE_NODES From e060f6ed281669b6d2f22d8dafd664b532386918 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 25 Sep 2015 08:59:38 -0700 Subject: [PATCH 088/240] MIPS: Initialise MAARs on secondary CPUs MAARs should be initialised on each CPU (or rather, core) in the system in order to achieve consistent behaviour & performance. Previously they have only been initialised on the boot CPU which leads to performance problems if tasks are later scheduled on a secondary CPU, particularly if those tasks make use of unaligned vector accesses where some CPUs don't handle any cases in hardware for non-speculative memory regions. Fix this by recording the MAAR configuration from the boot CPU and applying it to secondary CPUs as part of their bringup. Reported-by: Doug Gilmore Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: Rusty Russell Cc: Steven J. Hill Cc: Andrew Bresticker Cc: Bjorn Helgaas Cc: David Hildenbrand Cc: linux-kernel@vger.kernel.org Cc: Aaro Koskinen Cc: James Hogan Cc: Ingo Molnar Cc: Markos Chandras Cc: Hemmo Nieminen Cc: Alex Smith Cc: Peter Zijlstra (Intel) Patchwork: https://patchwork.linux-mips.org/patch/11239/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/maar.h | 9 +++++++++ arch/mips/kernel/smp.c | 2 ++ arch/mips/mm/init.c | 28 +++++++++++++++++++++++++--- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/arch/mips/include/asm/maar.h b/arch/mips/include/asm/maar.h index b02891f9caaf..21d9607c80d7 100644 --- a/arch/mips/include/asm/maar.h +++ b/arch/mips/include/asm/maar.h @@ -65,6 +65,15 @@ static inline void write_maar_pair(unsigned idx, phys_addr_t lower, back_to_back_c0_hazard(); } +/** + * maar_init() - initialise MAARs + * + * Performs initialisation of MAARs for the current CPU, making use of the + * platforms implementation of platform_maar_init where necessary and + * duplicating the setup it provides on secondary CPUs. + */ +extern void maar_init(void); + /** * struct maar_config - MAAR configuration data * @lower: The lowest address that the MAAR pair will affect. Must be diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index a31896c33716..bd4385a8e6e8 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -42,6 +42,7 @@ #include #include #include +#include cpumask_t cpu_callin_map; /* Bitmask of started secondaries */ @@ -157,6 +158,7 @@ asmlinkage void start_secondary(void) mips_clockevent_init(); mp_ops->init_secondary(); cpu_report(); + maar_init(); /* * XXX parity protection should be folded in here when it's converted diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 023c164b9eb6..8770e619185e 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -44,6 +44,7 @@ #include #include #include +#include /* * We have up to 8 empty zeroed pages so we can map one of the right colour @@ -288,10 +289,14 @@ unsigned __weak platform_maar_init(unsigned num_pairs) return num_configured; } -static void maar_init(void) +void maar_init(void) { unsigned num_maars, used, i; phys_addr_t lower, upper, attr; + static struct { + struct maar_config cfgs[3]; + unsigned used; + } recorded = { { { 0 } }, 0 }; if (!cpu_has_maar) return; @@ -304,8 +309,14 @@ static void maar_init(void) /* MAARs should be in pairs */ WARN_ON(num_maars % 2); - /* Configure the required MAARs */ - used = platform_maar_init(num_maars / 2); + /* Set MAARs using values we recorded already */ + if (recorded.used) { + used = maar_config(recorded.cfgs, recorded.used, num_maars / 2); + BUG_ON(used != recorded.used); + } else { + /* Configure the required MAARs */ + used = platform_maar_init(num_maars / 2); + } /* Disable any further MAARs */ for (i = (used * 2); i < num_maars; i++) { @@ -315,6 +326,9 @@ static void maar_init(void) back_to_back_c0_hazard(); } + if (recorded.used) + return; + pr_info("MAAR configuration:\n"); for (i = 0; i < num_maars; i += 2) { write_c0_maari(i); @@ -341,6 +355,14 @@ static void maar_init(void) pr_cont(" speculate"); pr_cont("\n"); + + /* Record the setup for use on secondary CPUs */ + if (used <= ARRAY_SIZE(recorded.cfgs)) { + recorded.cfgs[recorded.used].lower = lower; + recorded.cfgs[recorded.used].upper = upper; + recorded.cfgs[recorded.used].attrs = attr; + recorded.used++; + } } } From 1a5e251996e1b602f2ddc9261ee9de0ca1875bfa Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Sun, 27 Sep 2015 17:13:55 -0700 Subject: [PATCH 089/240] Input: serio - fix blocking of parport If parkbd_allocate_serio() fails to allocate memory we are releasing the parport but we missed unregistering the device. As a result this device with exclusive access to that parport remains registered. And no other device will be able to use that parport even though this driver has failed to load. Signed-off-by: Sudip Mukherjee Signed-off-by: Dmitry Torokhov --- drivers/input/serio/parkbd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/serio/parkbd.c b/drivers/input/serio/parkbd.c index 26b45936f9fd..1e8cd6f1fe9e 100644 --- a/drivers/input/serio/parkbd.c +++ b/drivers/input/serio/parkbd.c @@ -194,6 +194,7 @@ static int __init parkbd_init(void) parkbd_port = parkbd_allocate_serio(); if (!parkbd_port) { parport_release(parkbd_dev); + parport_unregister_device(parkbd_dev); return -ENOMEM; } From 5b3f33eb408ad06acf59b09fe0550bf3756e320a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 6 Sep 2015 11:59:27 +0200 Subject: [PATCH 090/240] m68k: Wire up direct socket calls Signed-off-by: Geert Uytterhoeven Acked-by: Greg Ungerer --- arch/m68k/include/asm/unistd.h | 2 +- arch/m68k/include/uapi/asm/unistd.h | 17 +++++++++++++++++ arch/m68k/kernel/syscalltable.S | 18 +++++++++++++++++- 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index 244e0dbe45db..7599cb94f9a0 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -4,7 +4,7 @@ #include -#define NR_syscalls 356 +#define NR_syscalls 373 #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_STAT diff --git a/arch/m68k/include/uapi/asm/unistd.h b/arch/m68k/include/uapi/asm/unistd.h index 61fb6cb9d2ae..b29ef18adb4a 100644 --- a/arch/m68k/include/uapi/asm/unistd.h +++ b/arch/m68k/include/uapi/asm/unistd.h @@ -361,5 +361,22 @@ #define __NR_memfd_create 353 #define __NR_bpf 354 #define __NR_execveat 355 +#define __NR_socket 356 +#define __NR_socketpair 357 +#define __NR_bind 358 +#define __NR_connect 359 +#define __NR_listen 360 +#define __NR_accept4 361 +#define __NR_getsockopt 362 +#define __NR_setsockopt 363 +#define __NR_getsockname 364 +#define __NR_getpeername 365 +#define __NR_sendto 366 +#define __NR_sendmsg 367 +#define __NR_recvfrom 368 +#define __NR_recvmsg 369 +#define __NR_shutdown 370 +#define __NR_recvmmsg 371 +#define __NR_sendmmsg 372 #endif /* _UAPI_ASM_M68K_UNISTD_H_ */ diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S index a0ec4303f2c8..e0fe52b62a3e 100644 --- a/arch/m68k/kernel/syscalltable.S +++ b/arch/m68k/kernel/syscalltable.S @@ -376,4 +376,20 @@ ENTRY(sys_call_table) .long sys_memfd_create .long sys_bpf .long sys_execveat /* 355 */ - + .long sys_socket + .long sys_socketpair + .long sys_bind + .long sys_connect + .long sys_listen /* 360 */ + .long sys_accept4 + .long sys_getsockopt + .long sys_setsockopt + .long sys_getsockname + .long sys_getpeername /* 365 */ + .long sys_sendto + .long sys_sendmsg + .long sys_recvfrom + .long sys_recvmsg + .long sys_shutdown /* 370 */ + .long sys_recvmmsg + .long sys_sendmmsg From b92858f2be96f49c483436e851a4667543ab5768 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 6 Sep 2015 12:01:40 +0200 Subject: [PATCH 091/240] m68k: Wire up userfaultfd $ ./userfaultfd 10 99 nr_pages: 2560, nr_pages_per_cpu: 2560 bounces: 98, mode: racing, userfaults: 1121 bounces: 97, mode: rnd, userfaults: 977 bounces: 96, mode:, userfaults: 1119 bounces: 95, mode: rnd racing ver poll, userfaults: 1040 bounces: 94, mode: racing ver poll, userfaults: 1022 bounces: 93, mode: rnd ver poll, userfaults: 946 bounces: 92, mode: ver poll, userfaults: 1115 bounces: 91, mode: rnd racing poll, userfaults: 977 bounces: 90, mode: racing poll, userfaults: 899 bounces: 89, mode: rnd poll, userfaults: 881 bounces: 88, mode: poll, userfaults: 1069 bounces: 87, mode: rnd racing ver, userfaults: 1114 bounces: 86, mode: racing ver, userfaults: 1109 bounces: 85, mode: rnd ver, userfaults: 1165 bounces: 84, mode: ver, userfaults: 1107 bounces: 83, mode: rnd racing, userfaults: 1134 bounces: 82, mode: racing, userfaults: 1105 bounces: 81, mode: rnd, userfaults: 1323 bounces: 80, mode:, userfaults: 1103 bounces: 79, mode: rnd racing ver poll, userfaults: 909 bounces: 78, mode: racing ver poll, userfaults: 1095 bounces: 77, mode: rnd ver poll, userfaults: 951 bounces: 76, mode: ver poll, userfaults: 1099 bounces: 75, mode: rnd racing poll, userfaults: 1035 bounces: 74, mode: racing poll, userfaults: 1097 bounces: 73, mode: rnd poll, userfaults: 1159 bounces: 72, mode: poll, userfaults: 1042 bounces: 71, mode: rnd racing ver, userfaults: 848 bounces: 70, mode: racing ver, userfaults: 1093 bounces: 69, mode: rnd ver, userfaults: 892 bounces: 68, mode: ver, userfaults: 1091 bounces: 67, mode: rnd racing, userfaults: 1219 bounces: 66, mode: racing, userfaults: 1089 bounces: 65, mode: rnd, userfaults: 988 bounces: 64, mode:, userfaults: 1087 bounces: 63, mode: rnd racing ver poll, userfaults: 882 bounces: 62, mode: racing ver poll, userfaults: 984 bounces: 61, mode: rnd ver poll, userfaults: 701 bounces: 60, mode: ver poll, userfaults: 1071 bounces: 59, mode: rnd racing poll, userfaults: 1137 bounces: 58, mode: racing poll, userfaults: 1032 bounces: 57, mode: rnd poll, userfaults: 911 bounces: 56, mode: poll, userfaults: 1079 bounces: 55, mode: rnd racing ver, userfaults: 1106 bounces: 54, mode: racing ver, userfaults: 1077 bounces: 53, mode: rnd ver, userfaults: 886 bounces: 52, mode: ver, userfaults: 1075 bounces: 51, mode: rnd racing, userfaults: 1101 bounces: 50, mode: racing, userfaults: 1073 bounces: 49, mode: rnd, userfaults: 1070 bounces: 48, mode:, userfaults: 1071 bounces: 47, mode: rnd racing ver poll, userfaults: 1077 bounces: 46, mode: racing ver poll, userfaults: 910 bounces: 45, mode: rnd ver poll, userfaults: 1063 bounces: 44, mode: ver poll, userfaults: 1028 bounces: 43, mode: rnd racing poll, userfaults: 1043 bounces: 42, mode: racing poll, userfaults: 1065 bounces: 41, mode: rnd poll, userfaults: 912 bounces: 40, mode: poll, userfaults: 1063 bounces: 39, mode: rnd racing ver, userfaults: 880 bounces: 38, mode: racing ver, userfaults: 1061 bounces: 37, mode: rnd ver, userfaults: 1144 bounces: 36, mode: ver, userfaults: 1059 bounces: 35, mode: rnd racing, userfaults: 967 bounces: 34, mode: racing, userfaults: 1057 bounces: 33, mode: rnd, userfaults: 1076 bounces: 32, mode:, userfaults: 1055 bounces: 31, mode: rnd racing ver poll, userfaults: 997 bounces: 30, mode: racing ver poll, userfaults: 1053 bounces: 29, mode: rnd ver poll, userfaults: 968 bounces: 28, mode: ver poll, userfaults: 978 bounces: 27, mode: rnd racing poll, userfaults: 1008 bounces: 26, mode: racing poll, userfaults: 1049 bounces: 25, mode: rnd poll, userfaults: 900 bounces: 24, mode: poll, userfaults: 1047 bounces: 23, mode: rnd racing ver, userfaults: 988 bounces: 22, mode: racing ver, userfaults: 1045 bounces: 21, mode: rnd ver, userfaults: 1027 bounces: 20, mode: ver, userfaults: 1043 bounces: 19, mode: rnd racing, userfaults: 1017 bounces: 18, mode: racing, userfaults: 1041 bounces: 17, mode: rnd, userfaults: 979 bounces: 16, mode:, userfaults: 1039 bounces: 15, mode: rnd racing ver poll, userfaults: 1134 bounces: 14, mode: racing ver poll, userfaults: 1037 bounces: 13, mode: rnd ver poll, userfaults: 1046 bounces: 12, mode: ver poll, userfaults: 1035 bounces: 11, mode: rnd racing poll, userfaults: 1060 bounces: 10, mode: racing poll, userfaults: 1033 bounces: 9, mode: rnd poll, userfaults: 1003 bounces: 8, mode: poll, userfaults: 929 bounces: 7, mode: rnd racing ver, userfaults: 964 bounces: 6, mode: racing ver, userfaults: 1029 bounces: 5, mode: rnd ver, userfaults: 1053 bounces: 4, mode: ver, userfaults: 1027 bounces: 3, mode: rnd racing, userfaults: 863 bounces: 2, mode: racing, userfaults: 1025 bounces: 1, mode: rnd, userfaults: 1043 bounces: 0, mode:, userfaults: 950 Signed-off-by: Geert Uytterhoeven Acked-by: Greg Ungerer --- arch/m68k/include/asm/unistd.h | 2 +- arch/m68k/include/uapi/asm/unistd.h | 1 + arch/m68k/kernel/syscalltable.S | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index 7599cb94f9a0..d25d5a5c83cb 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -4,7 +4,7 @@ #include -#define NR_syscalls 373 +#define NR_syscalls 374 #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_STAT diff --git a/arch/m68k/include/uapi/asm/unistd.h b/arch/m68k/include/uapi/asm/unistd.h index b29ef18adb4a..0eebb28488ec 100644 --- a/arch/m68k/include/uapi/asm/unistd.h +++ b/arch/m68k/include/uapi/asm/unistd.h @@ -378,5 +378,6 @@ #define __NR_shutdown 370 #define __NR_recvmmsg 371 #define __NR_sendmmsg 372 +#define __NR_userfaultfd 373 #endif /* _UAPI_ASM_M68K_UNISTD_H_ */ diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S index e0fe52b62a3e..7b5a315f6df2 100644 --- a/arch/m68k/kernel/syscalltable.S +++ b/arch/m68k/kernel/syscalltable.S @@ -393,3 +393,4 @@ ENTRY(sys_call_table) .long sys_shutdown /* 370 */ .long sys_recvmmsg .long sys_sendmmsg + .long sys_userfaultfd From 7f843dab134b7ce8804b67cca2271267d3a0213d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 13 Sep 2015 11:49:13 +0200 Subject: [PATCH 092/240] m68k: Wire up membarrier $ ./membarrier_test membarrier MEMBARRIER_CMD_QUERY syscall available. membarrier: MEMBARRIER_CMD_SHARED success. membarrier: tests done! $ Signed-off-by: Geert Uytterhoeven Acked-by: Greg Ungerer --- arch/m68k/include/asm/unistd.h | 2 +- arch/m68k/include/uapi/asm/unistd.h | 1 + arch/m68k/kernel/syscalltable.S | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index d25d5a5c83cb..0793a7f17417 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -4,7 +4,7 @@ #include -#define NR_syscalls 374 +#define NR_syscalls 375 #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_STAT diff --git a/arch/m68k/include/uapi/asm/unistd.h b/arch/m68k/include/uapi/asm/unistd.h index 0eebb28488ec..5e6fae6c275f 100644 --- a/arch/m68k/include/uapi/asm/unistd.h +++ b/arch/m68k/include/uapi/asm/unistd.h @@ -379,5 +379,6 @@ #define __NR_recvmmsg 371 #define __NR_sendmmsg 372 #define __NR_userfaultfd 373 +#define __NR_membarrier 374 #endif /* _UAPI_ASM_M68K_UNISTD_H_ */ diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S index 7b5a315f6df2..5dd0e80042f5 100644 --- a/arch/m68k/kernel/syscalltable.S +++ b/arch/m68k/kernel/syscalltable.S @@ -394,3 +394,4 @@ ENTRY(sys_call_table) .long sys_recvmmsg .long sys_sendmmsg .long sys_userfaultfd + .long sys_membarrier From 8474ba74193d302e8340dddd1e16c85cc4b98caf Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Wed, 23 Sep 2015 23:12:09 +0200 Subject: [PATCH 093/240] m68k: Define asmlinkage_protect Make sure the compiler does not modify arguments of syscall functions. This can happen if the compiler generates a tailcall to another function. For example, without asmlinkage_protect sys_openat is compiled into this function: sys_openat: clr.l %d0 move.w 18(%sp),%d0 move.l %d0,16(%sp) jbra do_sys_open Note how the fourth argument is modified in place, modifying the register %d4 that gets restored from this stack slot when the function returns to user-space. The caller may expect the register to be unmodified across system calls. Signed-off-by: Andreas Schwab Signed-off-by: Geert Uytterhoeven Cc: stable@vger.kernel.org --- arch/m68k/include/asm/linkage.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/m68k/include/asm/linkage.h b/arch/m68k/include/asm/linkage.h index 5a822bb790f7..066e74f666ae 100644 --- a/arch/m68k/include/asm/linkage.h +++ b/arch/m68k/include/asm/linkage.h @@ -4,4 +4,34 @@ #define __ALIGN .align 4 #define __ALIGN_STR ".align 4" +/* + * Make sure the compiler doesn't do anything stupid with the + * arguments on the stack - they are owned by the *caller*, not + * the callee. This just fools gcc into not spilling into them, + * and keeps it from doing tailcall recursion and/or using the + * stack slots for temporaries, since they are live and "used" + * all the way to the end of the function. + */ +#define asmlinkage_protect(n, ret, args...) \ + __asmlinkage_protect##n(ret, ##args) +#define __asmlinkage_protect_n(ret, args...) \ + __asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), ##args) +#define __asmlinkage_protect0(ret) \ + __asmlinkage_protect_n(ret) +#define __asmlinkage_protect1(ret, arg1) \ + __asmlinkage_protect_n(ret, "m" (arg1)) +#define __asmlinkage_protect2(ret, arg1, arg2) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2)) +#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3)) +#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4)) +#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4), "m" (arg5)) +#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4), "m" (arg5), "m" (arg6)) + #endif From 95bc06ef049b808a067327bd8490b608b47e3870 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 13 Sep 2015 11:24:02 +0200 Subject: [PATCH 094/240] m68k/defconfig: Update defconfigs for v4.3-rc1 Signed-off-by: Geert Uytterhoeven --- arch/m68k/configs/amiga_defconfig | 9 ++++++++- arch/m68k/configs/apollo_defconfig | 9 ++++++++- arch/m68k/configs/atari_defconfig | 9 ++++++++- arch/m68k/configs/bvme6000_defconfig | 9 ++++++++- arch/m68k/configs/hp300_defconfig | 9 ++++++++- arch/m68k/configs/mac_defconfig | 9 ++++++++- arch/m68k/configs/multi_defconfig | 9 ++++++++- arch/m68k/configs/mvme147_defconfig | 9 ++++++++- arch/m68k/configs/mvme16x_defconfig | 9 ++++++++- arch/m68k/configs/q40_defconfig | 9 ++++++++- arch/m68k/configs/sun3_defconfig | 9 ++++++++- arch/m68k/configs/sun3x_defconfig | 9 ++++++++- 12 files changed, 96 insertions(+), 12 deletions(-) diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 0b6b40d37b95..5b4ec541ba7c 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -57,7 +58,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -67,10 +67,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -179,6 +181,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -206,6 +209,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -271,6 +275,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -370,6 +375,7 @@ CONFIG_ZORRO8390=y # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_SMSC is not set # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -537,6 +543,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index eeb3a8991fc4..6e5198e2c124 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -55,7 +56,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -65,10 +65,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -177,6 +179,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -204,6 +207,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -269,6 +273,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -344,6 +349,7 @@ CONFIG_VETH=m # CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -495,6 +501,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 3a7006654ce9..f75600b0ca23 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -55,7 +56,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -65,10 +65,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -177,6 +179,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -204,6 +207,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -269,6 +273,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -355,6 +360,7 @@ CONFIG_NE2000=y # CONFIG_NET_VENDOR_SEEQ is not set CONFIG_SMC91X=y # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -517,6 +523,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 0586b323a673..a42d91c389a6 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -53,7 +54,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -63,10 +63,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -175,6 +177,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -202,6 +205,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -267,6 +271,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -343,6 +348,7 @@ CONFIG_BVME6000_NET=y # CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -488,6 +494,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index ad1dbce07aa4..77f4a11083e9 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -55,7 +56,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -65,10 +65,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -177,6 +179,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -204,6 +207,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -269,6 +273,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -345,6 +350,7 @@ CONFIG_HPLANCE=y # CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -497,6 +503,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index b44acacaecf4..5a329f77329b 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -54,7 +55,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -64,10 +64,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -176,6 +178,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -203,6 +206,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -271,6 +275,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -364,6 +369,7 @@ CONFIG_MAC8390=y # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_SMSC is not set # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -519,6 +525,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 8afca3753db1..83c80d2030ec 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -64,7 +65,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -74,10 +74,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -186,6 +188,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -213,6 +216,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -281,6 +285,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -410,6 +415,7 @@ CONFIG_ZORRO8390=y # CONFIG_NET_VENDOR_SEEQ is not set CONFIG_SMC91X=y # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PLIP=m @@ -599,6 +605,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index ef00875994d9..6cb42c3bf5a2 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -52,7 +53,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -62,10 +62,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -174,6 +176,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -201,6 +204,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -266,6 +270,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -343,6 +348,7 @@ CONFIG_MVME147_NET=y # CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -488,6 +494,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 387c2bd90ff1..c7508c30330c 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -53,7 +54,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -63,10 +63,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -175,6 +177,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -202,6 +205,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -267,6 +271,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -343,6 +348,7 @@ CONFIG_MVME16x_NET=y # CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -488,6 +494,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 35355c1bc714..64b71664a303 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -53,7 +54,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -63,10 +63,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -175,6 +177,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -202,6 +205,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -267,6 +271,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -354,6 +359,7 @@ CONFIG_NE2000=y # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_SMSC is not set # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PLIP=m @@ -510,6 +516,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 8442d267b877..9a4cab78a2ea 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -50,7 +51,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -60,10 +60,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -172,6 +174,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -199,6 +202,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -264,6 +268,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -341,6 +346,7 @@ CONFIG_SUN3_82586=y # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_STMICRO is not set # CONFIG_NET_VENDOR_SUN is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -489,6 +495,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m CONFIG_CRYPTO_MANAGER=y diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 0e1b542e1555..1a2eaac13dbd 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -10,6 +10,7 @@ CONFIG_LOG_BUF_SHIFT=16 # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_USERFAULTFD=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -50,7 +51,6 @@ CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m CONFIG_NET_IPVTI=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_GENEVE_CORE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m @@ -60,10 +60,12 @@ CONFIG_INET_XFRM_MODE_BEET=m # CONFIG_INET_LRO is not set CONFIG_INET_DIAG=m CONFIG_INET_UDP_DIAG=m +CONFIG_IPV6=m CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_ILA=m CONFIG_IPV6_VTI=m CONFIG_IPV6_GRE=m CONFIG_NETFILTER=y @@ -172,6 +174,7 @@ CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_DUP_IPV4=m CONFIG_NF_TABLES_ARP=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -199,6 +202,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m @@ -264,6 +268,7 @@ CONFIG_NETLINK_DIAG=m CONFIG_MPLS=y CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m # CONFIG_WIRELESS is not set # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y @@ -341,6 +346,7 @@ CONFIG_SUN3LANCE=y # CONFIG_NET_VENDOR_SAMSUNG is not set # CONFIG_NET_VENDOR_SEEQ is not set # CONFIG_NET_VENDOR_STMICRO is not set +# CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set CONFIG_PPP=m @@ -489,6 +495,7 @@ CONFIG_TEST_USER_COPY=m CONFIG_TEST_BPF=m CONFIG_TEST_FIRMWARE=m CONFIG_TEST_UDELAY=m +CONFIG_TEST_STATIC_KEYS=m CONFIG_EARLY_PRINTK=y CONFIG_ENCRYPTED_KEYS=m CONFIG_CRYPTO_RSA=m From bc5f2ab11ca6dda4a4826e7e78d5365d7c3e1569 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 23 Sep 2015 11:32:36 -0700 Subject: [PATCH 095/240] drm/i915/skl: Don't call intel_prepare_ddi when encoder list isn't yet initialized. In case something goes wrong with power well initialization we were calling intel_prepare_ddi during boot while encoder list isnt't initilized. [ 9.618747] i915 0000:00:02.0: Invalid ROM contents [ 9.631446] [drm] failed to find VBIOS tables [ 9.720036] BUG: unable to handle kernel NULL pointer dereference at 00000000 00000058 [ 9.721986] IP: [] ddi_get_encoder_port+0x82/0x190 [i915] [ 9.723736] PGD 0 [ 9.724286] Oops: 0000 [#1] PREEMPT SMP [ 9.725386] Modules linked in: intel_powerclamp snd_hda_intel(+) coretemp crc 32c_intel snd_hda_codec snd_hda_core serio_raw snd_pcm snd_timer i915(+) parport _pc parport pinctrl_sunrisepoint pinctrl_intel nfsd nfs_acl [ 9.730635] CPU: 0 PID: 497 Comm: systemd-udevd Not tainted 4.3.0-rc2-eywa-10 967-g72de2cfd-dirty #2 [ 9.732785] Hardware name: Intel Corporation Cannonlake Client platform/Skyla ke DT DDR4 RVP8, BIOS CNLSE2R1.R00.X021.B00.1508040310 08/04/2015 [ 9.735785] task: ffff88008a704700 ti: ffff88016a1ac000 task.ti: ffff88016a1a c000 [ 9.737584] RIP: 0010:[] [] ddi_get_enco der_port+0x82/0x190 [i915] [ 9.739934] RSP: 0000:ffff88016a1af710 EFLAGS: 00010296 [ 9.741184] RAX: 000000000000004e RBX: ffff88008a9edc98 RCX: 0000000000000001 [ 9.742934] RDX: 000000000000004e RSI: ffffffff81fc1e82 RDI: 00000000ffffffff [ 9.744634] RBP: ffff88016a1af730 R08: 0000000000000000 R09: 0000000000000578 [ 9.746333] R10: 0000000000001065 R11: 0000000000000578 R12: fffffffffffffff8 [ 9.748033] R13: ffff88016a1af7a8 R14: ffff88016a1af794 R15: 0000000000000000 [ 9.749733] FS: 00007eff2e1e07c0(0000) GS:ffff88016fc00000(0000) knlGS:00000 00000000000 [ 9.751683] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 9.753083] CR2: 0000000000000058 CR3: 000000016922b000 CR4: 00000000003406f0 [ 9.754782] Stack: [ 9.755332] ffff88008a9edc98 ffff88008a9ed800 ffffffffa01d07b0 00000000fffb9 09e [ 9.757232] ffff88016a1af7d8 ffffffffa0154ea7 0000000000000246 ffff88016a370 080 [ 9.759182] ffff88016a370080 ffff88008a9ed800 0000000000000246 ffff88008a9ed c98 [ 9.761132] Call Trace: [ 9.761782] [] intel_prepare_ddi+0x67/0x860 [i915] [ 9.763332] [] ? _raw_spin_unlock_irqrestore+0x26/0x40 [ 9.765031] [] ? gen9_read32+0x141/0x360 [i915] [ 9.766531] [] skl_set_power_well+0x431/0xa80 [i915] [ 9.768181] [] skl_power_well_enable+0x13/0x20 [i915] [ 9.769781] [] intel_power_well_enable+0x28/0x50 [i915] [ 9.771481] [] intel_display_power_get+0x92/0xc0 [i915] [ 9.773180] [] intel_display_set_init_power+0x3b/0x40 [i91 5] [ 9.774980] [] intel_power_domains_init_hw+0x120/0x520 [i9 15] [ 9.776780] [] i915_driver_load+0xb21/0xf40 [i915] So let's protect this case. My first attempt was to remove the intel_prepare_ddi, but Daniel had pointed out this is really needed to restore those registers values. And Imre pointed out that this case was without the flag protection and this was actually where things were going bad. So I've just checked and this indeed solves my issue. The regressing intel_prepare_ddi call was added in commit 1d2b9526a790d55b7ae870934a74937081f62de2 Author: Damien Lespiau Date: Fri Mar 6 18:50:53 2015 +0000 drm/i915/skl: Restore the DDI translation tables when enabling PW1 Cc: Imre Deak Cc: Daniel Vetter Signed-off-by: Rodrigo Vivi Reviewed-by: Imre Deak [Jani: regression reference] Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_runtime_pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index af7fdb3bd663..7401cf90b0db 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -246,7 +246,8 @@ static void skl_power_well_post_enable(struct drm_i915_private *dev_priv, } if (power_well->data == SKL_DISP_PW_1) { - intel_prepare_ddi(dev); + if (!dev_priv->power_domains.initializing) + intel_prepare_ddi(dev); gen8_irq_power_well_post_enable(dev_priv, 1 << PIPE_A); } } From c73318f43d3967e3ce810665d9c74a7d238d24d1 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Thu, 3 Sep 2015 13:06:09 +0200 Subject: [PATCH 096/240] watchdog: Fix module autoload for OF platform driver These platform drivers have a OF device ID table but the OF module alias information is not created so module autoloading won't work. Signed-off-by: Luis de Bethencourt Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/gef_wdt.c | 1 + drivers/watchdog/mena21_wdt.c | 1 + drivers/watchdog/moxart_wdt.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/watchdog/gef_wdt.c b/drivers/watchdog/gef_wdt.c index cc1bdfc2ff71..006e2348022c 100644 --- a/drivers/watchdog/gef_wdt.c +++ b/drivers/watchdog/gef_wdt.c @@ -303,6 +303,7 @@ static const struct of_device_id gef_wdt_ids[] = { }, {}, }; +MODULE_DEVICE_TABLE(of, gef_wdt_ids); static struct platform_driver gef_wdt_driver = { .driver = { diff --git a/drivers/watchdog/mena21_wdt.c b/drivers/watchdog/mena21_wdt.c index 69013007dc47..098fa9c34d6d 100644 --- a/drivers/watchdog/mena21_wdt.c +++ b/drivers/watchdog/mena21_wdt.c @@ -253,6 +253,7 @@ static const struct of_device_id a21_wdt_ids[] = { { .compatible = "men,a021-wdt" }, { }, }; +MODULE_DEVICE_TABLE(of, a21_wdt_ids); static struct platform_driver a21_wdt_driver = { .probe = a21_wdt_probe, diff --git a/drivers/watchdog/moxart_wdt.c b/drivers/watchdog/moxart_wdt.c index 2789da2c0515..60b0605bd7e6 100644 --- a/drivers/watchdog/moxart_wdt.c +++ b/drivers/watchdog/moxart_wdt.c @@ -168,6 +168,7 @@ static const struct of_device_id moxart_watchdog_match[] = { { .compatible = "moxa,moxart-watchdog" }, { }, }; +MODULE_DEVICE_TABLE(of, moxart_watchdog_match); static struct platform_driver moxart_wdt_driver = { .probe = moxart_wdt_probe, From 898e6861ff7cfc9f539b57859a27fbd1fe4298ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Wed, 17 Jun 2015 16:04:04 +0200 Subject: [PATCH 097/240] watchdog: bcm2835: Fix poweroff behaviour MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently poweroff/halt results in a reboot on the Raspberry Pi. The firmware uses the RSTS register to know which partiton to boot from. The partiton value is spread into bits 0, 2, 4, 6, 8, 10. Partiton 63 is a special partition used by the firmware to indicate halt. The firmware made this change in 19 Aug 2013 and was matched by the downstream commit: Changes for new NOOBS multi partition booting from gsh Signed-off-by: Noralf Trønnes Tested-by: Stephen Warren Acked-by: Stephen Warren Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/bcm2835_wdt.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/watchdog/bcm2835_wdt.c b/drivers/watchdog/bcm2835_wdt.c index 66c3e656a616..8a5ce5b5a0b6 100644 --- a/drivers/watchdog/bcm2835_wdt.c +++ b/drivers/watchdog/bcm2835_wdt.c @@ -36,6 +36,13 @@ #define PM_RSTC_WRCFG_FULL_RESET 0x00000020 #define PM_RSTC_RESET 0x00000102 +/* + * The Raspberry Pi firmware uses the RSTS register to know which partiton + * to boot from. The partiton value is spread into bits 0, 2, 4, 6, 8, 10. + * Partiton 63 is a special partition used by the firmware to indicate halt. + */ +#define PM_RSTS_RASPBERRYPI_HALT 0x555 + #define SECS_TO_WDOG_TICKS(x) ((x) << 16) #define WDOG_TICKS_TO_SECS(x) ((x) >> 16) @@ -151,8 +158,7 @@ static void bcm2835_power_off(void) * hard reset. */ val = readl_relaxed(wdt->base + PM_RSTS); - val &= PM_RSTC_WRCFG_CLR; - val |= PM_PASSWORD | PM_RSTS_HADWRH_SET; + val |= PM_PASSWORD | PM_RSTS_RASPBERRYPI_HALT; writel_relaxed(val, wdt->base + PM_RSTS); /* Continue with normal reset mechanism */ From 3cef072d3bbd4344823545e50d8cb240a6d4635d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 11 Sep 2015 06:28:08 -0700 Subject: [PATCH 098/240] watchdog: iTCO: Fix dependencies on I2C If I2C is built as module, the iTCO watchdog driver must be built as module as well. I2C_I801 must only be selected if I2C is configured. This fixes the following build errors, seen if I2C=m and ITCO_WDT=y. i2c-i801.c:(.text+0x2bf055): undefined reference to `i2c_del_adapter' i2c-i801.c:(.text+0x2c13e0): undefined reference to `i2c_add_adapter' i2c-i801.c:(.text+0x2c17bd): undefined reference to `i2c_new_device' Fixes: 2a7a0e9bf7b3 ("watchdog: iTCO_wdt: Add support for TCO on Intel Sunrisepoint") Reviewed-by: Matt Fleming Cc: Lee Jones Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- drivers/watchdog/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index c68edc16aa54..79e1aa1b0959 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -817,8 +817,9 @@ config ITCO_WDT tristate "Intel TCO Timer/Watchdog" depends on (X86 || IA64) && PCI select WATCHDOG_CORE + depends on I2C || I2C=n select LPC_ICH if !EXPERT - select I2C_I801 if !EXPERT + select I2C_I801 if !EXPERT && I2C ---help--- Hardware driver for the intel TCO timer based watchdog devices. These drivers are included in the Intel 82801 I/O Controller From 9bac175d8ed0b1dd3d3611c0713666b724eeace3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Fri, 18 Sep 2015 17:54:30 +0200 Subject: [PATCH 099/240] Revert "KVM: x86: zero kvmclock_offset when vcpu0 initializes kvmclock system MSR" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shifting pvclock_vcpu_time_info.system_time on write to KVM system time MSR is a change of ABI. Probably only 2.6.16 based SLES 10 breaks due to its custom enhancements to kvmclock, but KVM never declared the MSR only for one-shot initialization. (Doc says that only one write is needed.) This reverts commit b7e60c5aedd2b63f16ef06fde4f81ca032211bc5. And adds a note to the definition of PVCLOCK_COUNTS_FROM_ZERO. Signed-off-by: Radim Krčmář Acked-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/pvclock-abi.h | 1 + arch/x86/kvm/x86.c | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h index 655e07a48f6c..67f08230103a 100644 --- a/arch/x86/include/asm/pvclock-abi.h +++ b/arch/x86/include/asm/pvclock-abi.h @@ -41,6 +41,7 @@ struct pvclock_wall_clock { #define PVCLOCK_TSC_STABLE_BIT (1 << 0) #define PVCLOCK_GUEST_STOPPED (1 << 1) +/* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */ #define PVCLOCK_COUNTS_FROM_ZERO (1 << 2) #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PVCLOCK_ABI_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 991466bf8dee..92511d4b7236 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1708,8 +1708,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->pvclock_set_guest_stopped_request = false; } - pvclock_flags |= PVCLOCK_COUNTS_FROM_ZERO; - /* If the host uses TSC clocksource, then it is stable */ if (use_master_clock) pvclock_flags |= PVCLOCK_TSC_STABLE_BIT; @@ -2007,8 +2005,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) &vcpu->requests); ka->boot_vcpu_runs_old_kvmclock = tmp; - - ka->kvmclock_offset = -get_kernel_ns(); } vcpu->arch.time = data; From dfc53c5e73f8b73abf920241e45eab87335ae742 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Mon, 28 Sep 2015 13:25:12 +0100 Subject: [PATCH 100/240] drm/i915: Consider HW CSB write pointer before resetting the sw read pointer A previous commit resets the Context Status Buffer (CSB) read pointer in ring init commit c0a03a2e4c4e ("drm/i915: Reset CSB read pointer in ring init") This is generally correct, but this pointer is not reset after suspend/resume in some platforms (cht). In this case, the driver should read the register value instead of resetting the sw read counter to 0. Otherwise we process old events, leading to unwanted pre-emptions or something worse. But in other platforms (bdw) and also during GPU reset or power up, the CSBWP is reset to 0x7 (an invalid number), and in this case the read pointer should be set to 5 (the interrupt code will increment this counter one more time, and will start reading from CSB[0]). v2: When the CSB registers are reset, the read pointer needs to be set to 5, otherwise the first write (CSB[0]) won't be read (Mika). Replace magic numbers with GEN8_CSB_ENTRIES (6) and GEN8_CSB_PTR_MASK (0x07). Cc: Mika Kuoppala Cc: stable@vger.kernel.org # v4.0+ Signed-off-by: Lei Shen Signed-off-by: Deepak S Signed-off-by: Michel Thierry Reviewed-by: Mika Kuoppala Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_lrc.c | 39 ++++++++++++++++++++++++++------ drivers/gpu/drm/i915/intel_lrc.h | 2 ++ 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 72e0edd7bbde..7412caedcf7f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -484,18 +484,18 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring) status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring)); read_pointer = ring->next_context_status_buffer; - write_pointer = status_pointer & 0x07; + write_pointer = status_pointer & GEN8_CSB_PTR_MASK; if (read_pointer > write_pointer) - write_pointer += 6; + write_pointer += GEN8_CSB_ENTRIES; spin_lock(&ring->execlist_lock); while (read_pointer < write_pointer) { read_pointer++; status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + - (read_pointer % 6) * 8); + (read_pointer % GEN8_CSB_ENTRIES) * 8); status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + - (read_pointer % 6) * 8 + 4); + (read_pointer % GEN8_CSB_ENTRIES) * 8 + 4); if (status & GEN8_CTX_STATUS_IDLE_ACTIVE) continue; @@ -521,10 +521,12 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring) spin_unlock(&ring->execlist_lock); WARN(submit_contexts > 2, "More than two context complete events?\n"); - ring->next_context_status_buffer = write_pointer % 6; + ring->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES; I915_WRITE(RING_CONTEXT_STATUS_PTR(ring), - _MASKED_FIELD(0x07 << 8, ((u32)ring->next_context_status_buffer & 0x07) << 8)); + _MASKED_FIELD(GEN8_CSB_PTR_MASK << 8, + ((u32)ring->next_context_status_buffer & + GEN8_CSB_PTR_MASK) << 8)); } static int execlists_context_queue(struct drm_i915_gem_request *request) @@ -1422,6 +1424,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; + u8 next_context_status_buffer_hw; I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask)); I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff); @@ -1436,7 +1439,29 @@ static int gen8_init_common_ring(struct intel_engine_cs *ring) _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) | _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); POSTING_READ(RING_MODE_GEN7(ring)); - ring->next_context_status_buffer = 0; + + /* + * Instead of resetting the Context Status Buffer (CSB) read pointer to + * zero, we need to read the write pointer from hardware and use its + * value because "this register is power context save restored". + * Effectively, these states have been observed: + * + * | Suspend-to-idle (freeze) | Suspend-to-RAM (mem) | + * BDW | CSB regs not reset | CSB regs reset | + * CHT | CSB regs not reset | CSB regs not reset | + */ + next_context_status_buffer_hw = (I915_READ(RING_CONTEXT_STATUS_PTR(ring)) + & GEN8_CSB_PTR_MASK); + + /* + * When the CSB registers are reset (also after power-up / gpu reset), + * CSB write pointer is set to all 1's, which is not valid, use '5' in + * this special case, so the first element read is CSB[0]. + */ + if (next_context_status_buffer_hw == GEN8_CSB_PTR_MASK) + next_context_status_buffer_hw = (GEN8_CSB_ENTRIES - 1); + + ring->next_context_status_buffer = next_context_status_buffer_hw; DRM_DEBUG_DRIVER("Execlists enabled for %s\n", ring->name); memset(&ring->hangcheck, 0, sizeof(ring->hangcheck)); diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 64f89f9982a2..3c63bb32ad81 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -25,6 +25,8 @@ #define _INTEL_LRC_H_ #define GEN8_LR_CONTEXT_ALIGN 4096 +#define GEN8_CSB_ENTRIES 6 +#define GEN8_CSB_PTR_MASK 0x07 /* Execlists regs */ #define RING_ELSP(ring) ((ring)->mmio_base+0x230) From 3a48d13d76c0088a988a2e4f5b4d94872bdf58f3 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Mon, 7 Sep 2015 20:06:57 +0530 Subject: [PATCH 101/240] tile: fix build failure When building with allmodconfig the build was failing with the error: arch/tile/kernel/usb.c:70:1: warning: data definition has no type or storage class [enabled by default] arch/tile/kernel/usb.c:70:1: error: type defaults to 'int' in declaration of 'arch_initcall' [-Werror=implicit-int] arch/tile/kernel/usb.c:70:1: warning: parameter names (without types) in function declaration [enabled by default] arch/tile/kernel/usb.c:63:19: warning: 'tilegx_usb_init' defined but not used [-Wunused-function] Include linux/module.h to resolve the build failure. Signed-off-by: Sudip Mukherjee Signed-off-by: Chris Metcalf --- arch/tile/kernel/usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/tile/kernel/usb.c b/arch/tile/kernel/usb.c index f0da5a237e94..9f1e05e12255 100644 --- a/arch/tile/kernel/usb.c +++ b/arch/tile/kernel/usb.c @@ -22,6 +22,7 @@ #include #include #include +#include #include static u64 ehci_dmamask = DMA_BIT_MASK(32); From d79bdc7f004404204a6ac07785f8d6717070ecdb Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Mon, 28 Sep 2015 15:59:22 -0700 Subject: [PATCH 102/240] Input: omap4-keypad - fix memory leak If omap4_keypad_parse_dt() fails we returned the error code but we missed releasing keypad_data. Signed-off-by: Sudip Mukherjee Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/omap4-keypad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c index b052afec9a11..6639b2b8528a 100644 --- a/drivers/input/keyboard/omap4-keypad.c +++ b/drivers/input/keyboard/omap4-keypad.c @@ -266,7 +266,7 @@ static int omap4_keypad_probe(struct platform_device *pdev) error = omap4_keypad_parse_dt(&pdev->dev, keypad_data); if (error) - return error; + goto err_free_keypad; res = request_mem_region(res->start, resource_size(res), pdev->name); if (!res) { From d3b367bc26ea2e07a83fe73f0ccbddd729cb1f9a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 28 Sep 2015 16:38:07 -0700 Subject: [PATCH 103/240] Input: zhenhua - ensure we have BITREVERSE It uses bitrev8(), so it must ensure that lib/bitrev.o gets included in vmlinux. Signed-off-by: Andrew Morton Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/joystick/Kconfig b/drivers/input/joystick/Kconfig index 56eb471b5576..4215b5382092 100644 --- a/drivers/input/joystick/Kconfig +++ b/drivers/input/joystick/Kconfig @@ -196,6 +196,7 @@ config JOYSTICK_TWIDJOY config JOYSTICK_ZHENHUA tristate "5-byte Zhenhua RC transmitter" select SERIO + select BITREVERSE help Say Y here if you have a Zhen Hua PPM-4CH transmitter which is supplied with a ready to fly micro electric indoor helicopters From 22ef28b43f2c70edf5618918a49cbda84795c0a5 Mon Sep 17 00:00:00 2001 From: duson Date: Mon, 28 Sep 2015 17:17:01 -0700 Subject: [PATCH 104/240] Input: elan_i2c - add all valid ic type for i2c/smbus Signed-off-by: Duson Lin Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index b51062622050..5e1665bbaa0b 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -98,16 +98,25 @@ static int elan_get_fwinfo(u8 iap_version, u16 *validpage_count, u16 *signature_address) { switch (iap_version) { + case 0x00: + case 0x06: case 0x08: *validpage_count = 512; break; - case 0x09: case 0x03: + case 0x07: + case 0x09: + case 0x0A: + case 0x0B: + case 0x0C: *validpage_count = 768; break; case 0x0D: *validpage_count = 896; break; + case 0x0E: + *validpage_count = 640; + break; default: /* unknown ic type clear value */ *validpage_count = 0; From c4bbac3913c0d649898a0d767728a585869a7d7d Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Mon, 28 Sep 2015 11:21:48 -0700 Subject: [PATCH 105/240] i40e: fix VLAN inside VXLAN Previously to this patch, the hardware was removing VLAN tags from the inner header of VXLAN packets. The hardware configuration can be changed to leave the packet alone since that is what the linux stack expects for this type of VLAN in VXLAN packet. Signed-off-by: Jesse Brandeburg Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 851c1a159be8..2fdf978ae6a5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2672,7 +2672,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) rx_ctx.lrxqthresh = 2; rx_ctx.crcstrip = 1; rx_ctx.l2tsel = 1; - rx_ctx.showiv = 1; + /* this controls whether VLAN is stripped from inner headers */ + rx_ctx.showiv = 0; #ifdef I40E_FCOE rx_ctx.fc_ena = (vsi->type == I40E_VSI_FCOE); #endif From 43ae93a93e8c95c5e6389dc8e11704712b1ab2e9 Mon Sep 17 00:00:00 2001 From: Mitch Williams Date: Mon, 28 Sep 2015 17:31:26 -0700 Subject: [PATCH 106/240] i40e/i40evf: check for stopped admin queue It's possible that while we are waiting for the spinlock, another entity (that owns the spinlock) has shut down the admin queue. If we then attempt to use the queue, we will panic. Add a check for this condition on the receive side. This matches an existing check on the send queue side. Signed-off-by: Mitch Williams Acked-by: Jesse Brandeburg Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_adminq.c | 9 +++++++++ drivers/net/ethernet/intel/i40evf/i40e_adminq.c | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 3e0d20037675..62488a67149d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -946,6 +946,13 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw, /* take the lock before we start messing with the ring */ mutex_lock(&hw->aq.arq_mutex); + if (hw->aq.arq.count == 0) { + i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, + "AQRX: Admin queue not initialized.\n"); + ret_code = I40E_ERR_QUEUE_EMPTY; + goto clean_arq_element_err; + } + /* set next_to_use to head */ ntu = (rd32(hw, hw->aq.arq.head) & I40E_PF_ARQH_ARQH_MASK); if (ntu == ntc) { @@ -1007,6 +1014,8 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw, /* Set pending if needed, unlock and return */ if (pending != NULL) *pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc); + +clean_arq_element_err: mutex_unlock(&hw->aq.arq_mutex); if (i40e_is_nvm_update_op(&e->desc)) { diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c index f08450b90774..929d47152bf2 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c @@ -887,6 +887,13 @@ i40e_status i40evf_clean_arq_element(struct i40e_hw *hw, /* take the lock before we start messing with the ring */ mutex_lock(&hw->aq.arq_mutex); + if (hw->aq.arq.count == 0) { + i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, + "AQRX: Admin queue not initialized.\n"); + ret_code = I40E_ERR_QUEUE_EMPTY; + goto clean_arq_element_err; + } + /* set next_to_use to head */ ntu = (rd32(hw, hw->aq.arq.head) & I40E_VF_ARQH1_ARQH_MASK); if (ntu == ntc) { @@ -948,6 +955,8 @@ i40e_status i40evf_clean_arq_element(struct i40e_hw *hw, /* Set pending if needed, unlock and return */ if (pending != NULL) *pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc); + +clean_arq_element_err: mutex_unlock(&hw->aq.arq_mutex); return ret_code; From f05940e61845951517eda02a28ccc091888aaab9 Mon Sep 17 00:00:00 2001 From: Karl Heiss Date: Thu, 24 Sep 2015 12:15:06 -0400 Subject: [PATCH 107/240] sctp: Whitespace fix Fix indentation in sctp_generate_heartbeat_event. Signed-off-by: Karl Heiss Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 35df1266bf07..f554b9a96e07 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -388,8 +388,8 @@ void sctp_generate_heartbeat_event(unsigned long data) asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); - if (error) - asoc->base.sk->sk_err = -error; + if (error) + asoc->base.sk->sk_err = -error; out_unlock: bh_unlock_sock(asoc->base.sk); From 635682a14427d241bab7bbdeebb48a7d7b91638e Mon Sep 17 00:00:00 2001 From: Karl Heiss Date: Thu, 24 Sep 2015 12:15:07 -0400 Subject: [PATCH 108/240] sctp: Prevent soft lockup when sctp_accept() is called during a timeout event A case can occur when sctp_accept() is called by the user during a heartbeat timeout event after the 4-way handshake. Since sctp_assoc_migrate() changes both assoc->base.sk and assoc->ep, the bh_sock_lock in sctp_generate_heartbeat_event() will be taken with the listening socket but released with the new association socket. The result is a deadlock on any future attempts to take the listening socket lock. Note that this race can occur with other SCTP timeouts that take the bh_lock_sock() in the event sctp_accept() is called. BUG: soft lockup - CPU#9 stuck for 67s! [swapper:0] ... RIP: 0010:[] [] _spin_lock+0x1e/0x30 RSP: 0018:ffff880028323b20 EFLAGS: 00000206 RAX: 0000000000000002 RBX: ffff880028323b20 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff880028323be0 RDI: ffff8804632c4b48 RBP: ffffffff8100bb93 R08: 0000000000000000 R09: 0000000000000000 R10: ffff880610662280 R11: 0000000000000100 R12: ffff880028323aa0 R13: ffff8804383c3880 R14: ffff880028323a90 R15: ffffffff81534225 FS: 0000000000000000(0000) GS:ffff880028320000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 00000000006df528 CR3: 0000000001a85000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper (pid: 0, threadinfo ffff880616b70000, task ffff880616b6cab0) Stack: ffff880028323c40 ffffffffa01c2582 ffff880614cfb020 0000000000000000 0100000000000000 00000014383a6c44 ffff8804383c3880 ffff880614e93c00 ffff880614e93c00 0000000000000000 ffff8804632c4b00 ffff8804383c38b8 Call Trace: [] ? sctp_rcv+0x492/0xa10 [sctp] [] ? nf_iterate+0x69/0xb0 [] ? ip_local_deliver_finish+0x0/0x2d0 [] ? nf_hook_slow+0x76/0x120 [] ? ip_local_deliver_finish+0x0/0x2d0 [] ? ip_local_deliver_finish+0xdd/0x2d0 [] ? ip_local_deliver+0x98/0xa0 [] ? ip_rcv_finish+0x12d/0x440 [] ? ip_rcv+0x275/0x350 [] ? __netif_receive_skb+0x4ab/0x750 ... With lockdep debugging: ===================================== [ BUG: bad unlock balance detected! ] ------------------------------------- CslRx/12087 is trying to release lock (slock-AF_INET) at: [] sctp_generate_timeout_event+0x40/0xe0 [sctp] but there are no more locks to release! other info that might help us debug this: 2 locks held by CslRx/12087: #0: (&asoc->timers[i]){+.-...}, at: [] run_timer_softirq+0x16f/0x3e0 #1: (slock-AF_INET){+.-...}, at: [] sctp_generate_timeout_event+0x23/0xe0 [sctp] Ensure the socket taken is also the same one that is released by saving a copy of the socket before entering the timeout event critical section. Signed-off-by: Karl Heiss Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 42 ++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index f554b9a96e07..6098d4c42fa9 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -244,12 +244,13 @@ void sctp_generate_t3_rtx_event(unsigned long peer) int error; struct sctp_transport *transport = (struct sctp_transport *) peer; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); /* Check whether a task is in the sock. */ - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ @@ -272,10 +273,10 @@ void sctp_generate_t3_rtx_event(unsigned long peer) transport, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_transport_put(transport); } @@ -285,11 +286,12 @@ void sctp_generate_t3_rtx_event(unsigned long peer) static void sctp_generate_timeout_event(struct sctp_association *asoc, sctp_event_timeout_t timeout_type) { - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); int error = 0; - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy: timer %d\n", __func__, timeout_type); @@ -312,10 +314,10 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc, (void *)timeout_type, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_association_put(asoc); } @@ -365,10 +367,11 @@ void sctp_generate_heartbeat_event(unsigned long data) int error = 0; struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ @@ -389,10 +392,10 @@ void sctp_generate_heartbeat_event(unsigned long data) transport, GFP_ATOMIC); if (error) - asoc->base.sk->sk_err = -error; + sk->sk_err = -error; out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_transport_put(transport); } @@ -403,10 +406,11 @@ void sctp_generate_proto_unreach_event(unsigned long data) { struct sctp_transport *transport = (struct sctp_transport *) data; struct sctp_association *asoc = transport->asoc; - struct net *net = sock_net(asoc->base.sk); + struct sock *sk = asoc->base.sk; + struct net *net = sock_net(sk); - bh_lock_sock(asoc->base.sk); - if (sock_owned_by_user(asoc->base.sk)) { + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ @@ -427,7 +431,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); out_unlock: - bh_unlock_sock(asoc->base.sk); + bh_unlock_sock(sk); sctp_association_put(asoc); } From 661dfc65f7981481ba2e31aaa702371e82336e56 Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Fri, 25 Sep 2015 11:52:27 +0400 Subject: [PATCH 109/240] net/ibm/emac: bump version numbers for correct work with ethtool The size of the MAC register dump used to be the size specified by the reg property in the device tree. Userland has no good way of finding out that size, and it was not specified consistently for each MAC type, so ethtool would end up printing junk at the end of the register dump if the device tree didn't match the size it assumed. Using the new version numbers indicates unambiguously that the size of the MAC register dump is dependent only on the MAC type. Fixes: 5369c71f7ca2 ("net/ibm/emac: fix size of emac dump memory areas") Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/emac/core.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h index 28df37420da9..ac02c675c59c 100644 --- a/drivers/net/ethernet/ibm/emac/core.h +++ b/drivers/net/ethernet/ibm/emac/core.h @@ -460,8 +460,8 @@ struct emac_ethtool_regs_subhdr { u32 index; }; -#define EMAC_ETHTOOL_REGS_VER 0 -#define EMAC4_ETHTOOL_REGS_VER 1 -#define EMAC4SYNC_ETHTOOL_REGS_VER 2 +#define EMAC_ETHTOOL_REGS_VER 3 +#define EMAC4_ETHTOOL_REGS_VER 4 +#define EMAC4SYNC_ETHTOOL_REGS_VER 5 #endif /* __IBM_NEWEMAC_CORE_H */ From 06a15f51cf3618e32a73871ee6a547ef7fd902b5 Mon Sep 17 00:00:00 2001 From: Alexander Couzens Date: Mon, 28 Sep 2015 11:32:42 +0200 Subject: [PATCH 110/240] l2tp: protect tunnel->del_work by ref_count There is a small chance that tunnel_free() is called before tunnel->del_work scheduled resulting in a zero pointer dereference. Signed-off-by: Alexander Couzens Acked-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index f6b090df3930..afca2eb4dfa7 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1319,7 +1319,7 @@ static void l2tp_tunnel_del_work(struct work_struct *work) tunnel = container_of(work, struct l2tp_tunnel, del_work); sk = l2tp_tunnel_sock_lookup(tunnel); if (!sk) - return; + goto out; sock = sk->sk_socket; @@ -1341,6 +1341,8 @@ static void l2tp_tunnel_del_work(struct work_struct *work) } l2tp_tunnel_sock_put(sk); +out: + l2tp_tunnel_dec_refcount(tunnel); } /* Create a socket for the tunnel, if one isn't set up by @@ -1636,8 +1638,13 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); */ int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { + l2tp_tunnel_inc_refcount(tunnel); l2tp_tunnel_closeall(tunnel); - return (false == queue_work(l2tp_wq, &tunnel->del_work)); + if (false == queue_work(l2tp_wq, &tunnel->del_work)) { + l2tp_tunnel_dec_refcount(tunnel); + return 1; + } + return 0; } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); From 2103d6b818fcdae15ffa04cf385f770e6c3892c3 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 28 Sep 2015 14:34:04 +0200 Subject: [PATCH 111/240] net: sctp: Don't use 64 kilobyte lookup table for four elements Seemingly innocuous sctp_trans_state_to_prio_map[] array is way bigger than it looks, since "[SCTP_UNKNOWN] = 2" expands into "[0xffff] = 2" ! This patch replaces it with switch() statement. Signed-off-by: Denys Vlasenko CC: Vlad Yasevich CC: Neil Horman CC: Marcelo Ricardo Leitner CC: linux-sctp@vger.kernel.org CC: netdev@vger.kernel.org CC: linux-kernel@vger.kernel.org Acked-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/associola.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 197c3f59ecbf..b00f1f9611d6 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1208,20 +1208,22 @@ void sctp_assoc_update(struct sctp_association *asoc, * within this document. * * Our basic strategy is to round-robin transports in priorities - * according to sctp_state_prio_map[] e.g., if no such + * according to sctp_trans_score() e.g., if no such * transport with state SCTP_ACTIVE exists, round-robin through * SCTP_UNKNOWN, etc. You get the picture. */ -static const u8 sctp_trans_state_to_prio_map[] = { - [SCTP_ACTIVE] = 3, /* best case */ - [SCTP_UNKNOWN] = 2, - [SCTP_PF] = 1, - [SCTP_INACTIVE] = 0, /* worst case */ -}; - static u8 sctp_trans_score(const struct sctp_transport *trans) { - return sctp_trans_state_to_prio_map[trans->state]; + switch (trans->state) { + case SCTP_ACTIVE: + return 3; /* best case */ + case SCTP_UNKNOWN: + return 2; + case SCTP_PF: + return 1; + default: /* case SCTP_INACTIVE */ + return 0; /* worst case */ + } } static struct sctp_transport *sctp_trans_elect_tie(struct sctp_transport *trans1, From 43934ece2ea72c1dd279c0b0478c1a036d5d77ee Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 14 Sep 2015 12:18:55 +0200 Subject: [PATCH 112/240] mmc: core: Don't return an error for CD/WP GPIOs when GPIOLIB is unset When CONFIG_GPIOLIB is unset, its stubs will return -ENOSYS. That means when the mmc core parses DT for CD/WP GPIOs via mmc_of_parse(), -ENOSYS becomes propagated to the caller. Typically this means that the mmc host driver fails to probe. As the CD/WP GPIOs are already treated as optional, let's extend that to cover the case when CONFIG_GPIOLIB is unset. Reported-by: Michal Simek Fixes: 16b23787fc70 ("mmc: sdhci-of-arasan: Call OF parsing for MMC") Signed-off-by: Ulf Hansson Tested-by: Michal Simek Acked-by: Venu Byravarasu --- drivers/mmc/core/host.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index abd933b7029b..5466f25f0281 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -457,7 +457,7 @@ int mmc_of_parse(struct mmc_host *host) 0, &cd_gpio_invert); if (!ret) dev_info(host->parent, "Got CD GPIO\n"); - else if (ret != -ENOENT) + else if (ret != -ENOENT && ret != -ENOSYS) return ret; /* @@ -481,7 +481,7 @@ int mmc_of_parse(struct mmc_host *host) ret = mmc_gpiod_request_ro(host, "wp", 0, false, 0, &ro_gpio_invert); if (!ret) dev_info(host->parent, "Got WP GPIO\n"); - else if (ret != -ENOENT) + else if (ret != -ENOENT && ret != -ENOSYS) return ret; if (of_property_read_bool(np, "disable-wp")) From 51424b2860670ec20e1dd5177fe70ab4b6fd7a5b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 23 Sep 2015 22:06:48 +0200 Subject: [PATCH 113/240] mmc: sunxi: Fix clk-delay settings In recent allwinner kernel sources the mmc clk-delay settings have been slightly tweaked, and for sun9i they are completely different then what we are using. This commit brings us in sync with what allwinner does, fixing problems accessing sdcards on some A33 devices (and likely others). For pre sun9i hardware this makes the following changes: -At 400Khz change the sample delay from 7 to 0 (introduced in A31 sdk) -At 50 Mhz change the sample delay from 5 to 4 (introduced in A23 sdk) This also drops the clk-delay calculation for clocks > 50 MHz, we do not need this as we've: mmc->f_max = 50000000, and the delays in the old code were not correct (at 100 MHz the delay must be a multiple of 60, at 200 MHz a multiple of 120). Signed-off-by: Hans de Goede Signed-off-by: Ulf Hansson --- drivers/mmc/host/sunxi-mmc.c | 53 ++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c index a7b7a6771598..b981b8552e43 100644 --- a/drivers/mmc/host/sunxi-mmc.c +++ b/drivers/mmc/host/sunxi-mmc.c @@ -210,6 +210,16 @@ #define SDXC_IDMAC_DES0_CES BIT(30) /* card error summary */ #define SDXC_IDMAC_DES0_OWN BIT(31) /* 1-idma owns it, 0-host owns it */ +#define SDXC_CLK_400K 0 +#define SDXC_CLK_25M 1 +#define SDXC_CLK_50M 2 +#define SDXC_CLK_50M_DDR 3 + +struct sunxi_mmc_clk_delay { + u32 output; + u32 sample; +}; + struct sunxi_idma_des { u32 config; u32 buf_size; @@ -229,6 +239,7 @@ struct sunxi_mmc_host { struct clk *clk_mmc; struct clk *clk_sample; struct clk *clk_output; + const struct sunxi_mmc_clk_delay *clk_delays; /* irq */ spinlock_t lock; @@ -654,25 +665,19 @@ static int sunxi_mmc_clk_set_rate(struct sunxi_mmc_host *host, /* determine delays */ if (rate <= 400000) { - oclk_dly = 180; - sclk_dly = 42; + oclk_dly = host->clk_delays[SDXC_CLK_400K].output; + sclk_dly = host->clk_delays[SDXC_CLK_400K].sample; } else if (rate <= 25000000) { - oclk_dly = 180; - sclk_dly = 75; + oclk_dly = host->clk_delays[SDXC_CLK_25M].output; + sclk_dly = host->clk_delays[SDXC_CLK_25M].sample; } else if (rate <= 50000000) { if (ios->timing == MMC_TIMING_UHS_DDR50) { - oclk_dly = 60; - sclk_dly = 120; + oclk_dly = host->clk_delays[SDXC_CLK_50M_DDR].output; + sclk_dly = host->clk_delays[SDXC_CLK_50M_DDR].sample; } else { - oclk_dly = 90; - sclk_dly = 150; + oclk_dly = host->clk_delays[SDXC_CLK_50M].output; + sclk_dly = host->clk_delays[SDXC_CLK_50M].sample; } - } else if (rate <= 100000000) { - oclk_dly = 6; - sclk_dly = 24; - } else if (rate <= 200000000) { - oclk_dly = 3; - sclk_dly = 12; } else { return -EINVAL; } @@ -871,6 +876,7 @@ static void sunxi_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq) static const struct of_device_id sunxi_mmc_of_match[] = { { .compatible = "allwinner,sun4i-a10-mmc", }, { .compatible = "allwinner,sun5i-a13-mmc", }, + { .compatible = "allwinner,sun9i-a80-mmc", }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, sunxi_mmc_of_match); @@ -884,6 +890,20 @@ static struct mmc_host_ops sunxi_mmc_ops = { .hw_reset = sunxi_mmc_hw_reset, }; +static const struct sunxi_mmc_clk_delay sunxi_mmc_clk_delays[] = { + [SDXC_CLK_400K] = { .output = 180, .sample = 180 }, + [SDXC_CLK_25M] = { .output = 180, .sample = 75 }, + [SDXC_CLK_50M] = { .output = 90, .sample = 120 }, + [SDXC_CLK_50M_DDR] = { .output = 60, .sample = 120 }, +}; + +static const struct sunxi_mmc_clk_delay sun9i_mmc_clk_delays[] = { + [SDXC_CLK_400K] = { .output = 180, .sample = 180 }, + [SDXC_CLK_25M] = { .output = 180, .sample = 75 }, + [SDXC_CLK_50M] = { .output = 150, .sample = 120 }, + [SDXC_CLK_50M_DDR] = { .output = 90, .sample = 120 }, +}; + static int sunxi_mmc_resource_request(struct sunxi_mmc_host *host, struct platform_device *pdev) { @@ -895,6 +915,11 @@ static int sunxi_mmc_resource_request(struct sunxi_mmc_host *host, else host->idma_des_size_bits = 16; + if (of_device_is_compatible(np, "allwinner,sun9i-a80-mmc")) + host->clk_delays = sun9i_mmc_clk_delays; + else + host->clk_delays = sunxi_mmc_clk_delays; + ret = mmc_regulator_get_supply(host->mmc); if (ret) { if (ret != -EPROBE_DEFER) From fd546ee6a7dc4b71ebc6d1205bf72ea3c1c7030a Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sat, 26 Sep 2015 21:41:01 +0200 Subject: [PATCH 114/240] mmc: pxamci: fix card detect with slot-gpio API Move pxamci to mmc slot-gpio API to fix interrupt request. It fixes the case where the card detection is on a gpio expander, on I2C for example on zylonite board. In this case, the card detect netsted interrupt is called from a threaded interrupt. The request_irq() fails, because a hard irq cannot be a nested interrupt from a threaded interrupt (set __setup_irq()). This was tested on zylonite and mioa701 boards. Signed-off-by: Robert Jarzmik Cc: Petr Cvek Signed-off-by: Ulf Hansson --- drivers/mmc/host/pxamci.c | 66 +++++++++++++-------------------------- 1 file changed, 22 insertions(+), 44 deletions(-) diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index 1420f29628c7..8cadd74e8407 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -454,12 +455,8 @@ static int pxamci_get_ro(struct mmc_host *mmc) { struct pxamci_host *host = mmc_priv(mmc); - if (host->pdata && gpio_is_valid(host->pdata->gpio_card_ro)) { - if (host->pdata->gpio_card_ro_invert) - return !gpio_get_value(host->pdata->gpio_card_ro); - else - return gpio_get_value(host->pdata->gpio_card_ro); - } + if (host->pdata && gpio_is_valid(host->pdata->gpio_card_ro)) + return mmc_gpio_get_ro(mmc); if (host->pdata && host->pdata->get_ro) return !!host->pdata->get_ro(mmc_dev(mmc)); /* @@ -551,6 +548,7 @@ static void pxamci_enable_sdio_irq(struct mmc_host *host, int enable) static const struct mmc_host_ops pxamci_ops = { .request = pxamci_request, + .get_cd = mmc_gpio_get_cd, .get_ro = pxamci_get_ro, .set_ios = pxamci_set_ios, .enable_sdio_irq = pxamci_enable_sdio_irq, @@ -790,37 +788,31 @@ static int pxamci_probe(struct platform_device *pdev) gpio_power = host->pdata->gpio_power; } if (gpio_is_valid(gpio_power)) { - ret = gpio_request(gpio_power, "mmc card power"); + ret = devm_gpio_request(&pdev->dev, gpio_power, + "mmc card power"); if (ret) { - dev_err(&pdev->dev, "Failed requesting gpio_power %d\n", gpio_power); + dev_err(&pdev->dev, "Failed requesting gpio_power %d\n", + gpio_power); goto out; } gpio_direction_output(gpio_power, host->pdata->gpio_power_invert); } - if (gpio_is_valid(gpio_ro)) { - ret = gpio_request(gpio_ro, "mmc card read only"); - if (ret) { - dev_err(&pdev->dev, "Failed requesting gpio_ro %d\n", gpio_ro); - goto err_gpio_ro; - } - gpio_direction_input(gpio_ro); + if (gpio_is_valid(gpio_ro)) + ret = mmc_gpio_request_ro(mmc, gpio_ro); + if (ret) { + dev_err(&pdev->dev, "Failed requesting gpio_ro %d\n", gpio_ro); + goto out; + } else { + mmc->caps |= host->pdata->gpio_card_ro_invert ? + MMC_CAP2_RO_ACTIVE_HIGH : 0; } - if (gpio_is_valid(gpio_cd)) { - ret = gpio_request(gpio_cd, "mmc card detect"); - if (ret) { - dev_err(&pdev->dev, "Failed requesting gpio_cd %d\n", gpio_cd); - goto err_gpio_cd; - } - gpio_direction_input(gpio_cd); - ret = request_irq(gpio_to_irq(gpio_cd), pxamci_detect_irq, - IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, - "mmc card detect", mmc); - if (ret) { - dev_err(&pdev->dev, "failed to request card detect IRQ\n"); - goto err_request_irq; - } + if (gpio_is_valid(gpio_cd)) + ret = mmc_gpio_request_cd(mmc, gpio_cd, 0); + if (ret) { + dev_err(&pdev->dev, "Failed requesting gpio_cd %d\n", gpio_cd); + goto out; } if (host->pdata && host->pdata->init) @@ -835,13 +827,7 @@ static int pxamci_probe(struct platform_device *pdev) return 0; -err_request_irq: - gpio_free(gpio_cd); -err_gpio_cd: - gpio_free(gpio_ro); -err_gpio_ro: - gpio_free(gpio_power); - out: +out: if (host) { if (host->dma_chan_rx) dma_release_channel(host->dma_chan_rx); @@ -873,14 +859,6 @@ static int pxamci_remove(struct platform_device *pdev) gpio_ro = host->pdata->gpio_card_ro; gpio_power = host->pdata->gpio_power; } - if (gpio_is_valid(gpio_cd)) { - free_irq(gpio_to_irq(gpio_cd), mmc); - gpio_free(gpio_cd); - } - if (gpio_is_valid(gpio_ro)) - gpio_free(gpio_ro); - if (gpio_is_valid(gpio_power)) - gpio_free(gpio_power); if (host->vcc) regulator_put(host->vcc); From cf6f54e3f133229f02a90c04fe0ff9dd9d3264b4 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 8 Jul 2015 11:46:36 +0200 Subject: [PATCH 115/240] UBIFS: Kill unneeded locking in ubifs_init_security Fixes the following lockdep splat: [ 1.244527] ============================================= [ 1.245193] [ INFO: possible recursive locking detected ] [ 1.245193] 4.2.0-rc1+ #37 Not tainted [ 1.245193] --------------------------------------------- [ 1.245193] cp/742 is trying to acquire lock: [ 1.245193] (&sb->s_type->i_mutex_key#9){+.+.+.}, at: [] ubifs_init_security+0x29/0xb0 [ 1.245193] [ 1.245193] but task is already holding lock: [ 1.245193] (&sb->s_type->i_mutex_key#9){+.+.+.}, at: [] path_openat+0x3af/0x1280 [ 1.245193] [ 1.245193] other info that might help us debug this: [ 1.245193] Possible unsafe locking scenario: [ 1.245193] [ 1.245193] CPU0 [ 1.245193] ---- [ 1.245193] lock(&sb->s_type->i_mutex_key#9); [ 1.245193] lock(&sb->s_type->i_mutex_key#9); [ 1.245193] [ 1.245193] *** DEADLOCK *** [ 1.245193] [ 1.245193] May be due to missing lock nesting notation [ 1.245193] [ 1.245193] 2 locks held by cp/742: [ 1.245193] #0: (sb_writers#5){.+.+.+}, at: [] mnt_want_write+0x1f/0x50 [ 1.245193] #1: (&sb->s_type->i_mutex_key#9){+.+.+.}, at: [] path_openat+0x3af/0x1280 [ 1.245193] [ 1.245193] stack backtrace: [ 1.245193] CPU: 2 PID: 742 Comm: cp Not tainted 4.2.0-rc1+ #37 [ 1.245193] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140816_022509-build35 04/01/2014 [ 1.245193] ffffffff8252d530 ffff88007b023a38 ffffffff814f6f49 ffffffff810b56c5 [ 1.245193] ffff88007c30cc80 ffff88007b023af8 ffffffff810a150d ffff88007b023a68 [ 1.245193] 000000008101302a ffff880000000000 00000008f447e23f ffffffff8252d500 [ 1.245193] Call Trace: [ 1.245193] [] dump_stack+0x4c/0x65 [ 1.245193] [] ? console_unlock+0x1c5/0x510 [ 1.245193] [] __lock_acquire+0x1a6d/0x1ea0 [ 1.245193] [] ? __lock_is_held+0x58/0x80 [ 1.245193] [] lock_acquire+0xd3/0x270 [ 1.245193] [] ? ubifs_init_security+0x29/0xb0 [ 1.245193] [] mutex_lock_nested+0x6b/0x3a0 [ 1.245193] [] ? ubifs_init_security+0x29/0xb0 [ 1.245193] [] ? ubifs_init_security+0x29/0xb0 [ 1.245193] [] ubifs_init_security+0x29/0xb0 [ 1.245193] [] ubifs_create+0xa6/0x1f0 [ 1.245193] [] ? path_openat+0x3af/0x1280 [ 1.245193] [] vfs_create+0x95/0xc0 [ 1.245193] [] path_openat+0x7cc/0x1280 [ 1.245193] [] ? __lock_acquire+0x543/0x1ea0 [ 1.245193] [] ? sched_clock_cpu+0x90/0xc0 [ 1.245193] [] ? calc_global_load_tick+0x60/0x90 [ 1.245193] [] ? sched_clock_cpu+0x90/0xc0 [ 1.245193] [] ? __alloc_fd+0xaf/0x180 [ 1.245193] [] do_filp_open+0x75/0xd0 [ 1.245193] [] ? _raw_spin_unlock+0x26/0x40 [ 1.245193] [] ? __alloc_fd+0xaf/0x180 [ 1.245193] [] do_sys_open+0x129/0x200 [ 1.245193] [] SyS_open+0x19/0x20 [ 1.245193] [] entry_SYSCALL_64_fastpath+0x12/0x6f While the lockdep splat is a false positive, becuase path_openat holds i_mutex of the parent directory and ubifs_init_security() tries to acquire i_mutex of a new inode, it reveals that taking i_mutex in ubifs_init_security() is in vain because it is only being called in the inode allocation path and therefore nobody else can see the inode yet. Cc: stable@vger.kernel.org # 3.20- Reported-and-tested-by: Boris Brezillon Reviewed-and-tested-by: Dongsheng Yang Signed-off-by: Richard Weinberger Signed-off-by: dedekind1@gmail.com --- fs/ubifs/xattr.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 96f3448b6eb4..fd65b3f1923c 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -652,11 +652,8 @@ int ubifs_init_security(struct inode *dentry, struct inode *inode, { int err; - mutex_lock(&inode->i_mutex); err = security_inode_init_security(inode, dentry, qstr, &init_xattrs, 0); - mutex_unlock(&inode->i_mutex); - if (err) { struct ubifs_info *c = dentry->i_sb->s_fs_info; ubifs_err(c, "cannot initialize security for inode %lu, error %d", From 281fda27673f833a01d516658a64d22a32c8e072 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 22 Sep 2015 23:58:07 +0200 Subject: [PATCH 116/240] UBI: Validate data_size Make sure that data_size is less than LEB size. Otherwise a handcrafted UBI image is able to trigger an out of bounds memory access in ubi_compare_lebs(). Cc: stable@vger.kernel.org Signed-off-by: Richard Weinberger Reviewed-by: David Gstir --- drivers/mtd/ubi/io.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index 5bbd1f094f4e..1fc23e48fe8e 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -926,6 +926,11 @@ static int validate_vid_hdr(const struct ubi_device *ubi, goto bad; } + if (data_size > ubi->leb_size) { + ubi_err(ubi, "bad data_size"); + goto bad; + } + if (vol_type == UBI_VID_STATIC) { /* * Although from high-level point of view static volumes may From 7c7feb2ebfc9c0552c51f0c050db1d1a004faac5 Mon Sep 17 00:00:00 2001 From: shengyong Date: Mon, 28 Sep 2015 17:57:19 +0000 Subject: [PATCH 117/240] UBI: return ENOSPC if no enough space available UBI: attaching mtd1 to ubi0 UBI: scanning is finished UBI error: init_volumes: not enough PEBs, required 706, available 686 UBI error: ubi_wl_init: no enough physical eraseblocks (-20, need 1) UBI error: ubi_attach_mtd_dev: failed to attach mtd1, error -12 <= NOT ENOMEM UBI error: ubi_init: cannot attach mtd1 If available PEBs are not enough when initializing volumes, return -ENOSPC directly. If available PEBs are not enough when initializing WL, return -ENOSPC instead of -ENOMEM. Cc: stable@vger.kernel.org Signed-off-by: Sheng Yong Signed-off-by: Richard Weinberger Reviewed-by: David Gstir --- drivers/mtd/ubi/vtbl.c | 1 + drivers/mtd/ubi/wl.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c index 80bdd5b88bac..d85c19762160 100644 --- a/drivers/mtd/ubi/vtbl.c +++ b/drivers/mtd/ubi/vtbl.c @@ -649,6 +649,7 @@ static int init_volumes(struct ubi_device *ubi, if (ubi->corr_peb_count) ubi_err(ubi, "%d PEBs are corrupted and not used", ubi->corr_peb_count); + return -ENOSPC; } ubi->rsvd_pebs += reserved_pebs; ubi->avail_pebs -= reserved_pebs; diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 275d9fb6fe5c..eb4489f9082f 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1601,6 +1601,7 @@ int ubi_wl_init(struct ubi_device *ubi, struct ubi_attach_info *ai) if (ubi->corr_peb_count) ubi_err(ubi, "%d PEBs are corrupted and not used", ubi->corr_peb_count); + err = -ENOSPC; goto out_free; } ubi->avail_pebs -= reserved_pebs; From ccc42592d436d021d17f86729d24806f30dbad25 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sun, 20 Sep 2015 07:00:10 -0700 Subject: [PATCH 118/240] clocksource/drivers/rockchip: Fix bad NO_IRQ usage The current code assumes the 'irq_of_parse_and_map' will return NO_IRQ in case of failure. Unfortunately, the NO_IRQ is not consistent across the different architectures and we must not rely on it. NO_IRQ is equal to '-1' on ARM and 'irq_of_parse_and_map' returns '0' in case of an error. Hence, the latter won't be detected and will lead to a crash. Fix this by just checking 'irq' is different from zero. Signed-off-by: Daniel Lezcano --- drivers/clocksource/rockchip_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/rockchip_timer.c b/drivers/clocksource/rockchip_timer.c index bb2c2b050964..d3c1742ded1a 100644 --- a/drivers/clocksource/rockchip_timer.c +++ b/drivers/clocksource/rockchip_timer.c @@ -148,7 +148,7 @@ static void __init rk_timer_init(struct device_node *np) bc_timer.freq = clk_get_rate(timer_clk); irq = irq_of_parse_and_map(np, 0); - if (irq == NO_IRQ) { + if (!irq) { pr_err("Failed to map interrupts for '%s'\n", TIMER_NAME); return; } From bdf7344e14d826d0df438a55fc51146d179e198d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Sun, 20 Sep 2015 07:00:35 -0700 Subject: [PATCH 119/240] clocksource/drivers/keystone: Fix bad NO_IRQ usage The current code assumes the 'irq_of_parse_and_map' will return NO_IRQ in case of failure. Unfortunately, the NO_IRQ is not consistent across the different architectures and we must not rely on it. NO_IRQ is equal to '-1' on ARM and 'irq_of_parse_and_map' returns '0' in case of an error. Hence, the latter won't be detected and will lead to a crash. Fix this by just checking 'irq' is different from zero. Signed-off-by: Daniel Lezcano --- drivers/clocksource/timer-keystone.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-keystone.c b/drivers/clocksource/timer-keystone.c index edacf3902e10..1cea08cf603e 100644 --- a/drivers/clocksource/timer-keystone.c +++ b/drivers/clocksource/timer-keystone.c @@ -152,7 +152,7 @@ static void __init keystone_timer_init(struct device_node *np) int irq, error; irq = irq_of_parse_and_map(np, 0); - if (irq == NO_IRQ) { + if (!irq) { pr_err("%s: failed to map interrupts\n", __func__); return; } From 1356aae08338f1c19ce1c67bf8c543a267688fc3 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 27 Sep 2015 02:09:19 +0900 Subject: [PATCH 120/240] blk-mq: avoid setting hctx->tags->cpumask before allocation When unmapped hw queue is remapped after CPU topology is changed, hctx->tags->cpumask has to be set after hctx->tags is setup in blk_mq_map_swqueue(), otherwise it causes null pointer dereference. Fixes: f26cdc8536 ("blk-mq: Shared tag enhancements") Signed-off-by: Akinobu Mita Cc: Keith Busch Cc: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index f2d67b4047a0..2fd7283ec62b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1811,7 +1811,6 @@ static void blk_mq_map_swqueue(struct request_queue *q) hctx = q->mq_ops->map_queue(q, i); cpumask_set_cpu(i, hctx->cpumask); - cpumask_set_cpu(i, hctx->tags->cpumask); ctx->index_hw = hctx->nr_ctx; hctx->ctxs[hctx->nr_ctx++] = ctx; } @@ -1851,6 +1850,14 @@ static void blk_mq_map_swqueue(struct request_queue *q) hctx->next_cpu = cpumask_first(hctx->cpumask); hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; } + + queue_for_each_ctx(q, ctx, i) { + if (!cpu_online(i)) + continue; + + hctx = q->mq_ops->map_queue(q, i); + cpumask_set_cpu(i, hctx->tags->cpumask); + } } static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set) From 4593fdbe7a2f44d5e64c627c715dd0bcec9bdf14 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 27 Sep 2015 02:09:20 +0900 Subject: [PATCH 121/240] blk-mq: fix sysfs registration/unregistration race There is a race between cpu hotplug handling and adding/deleting gendisk for blk-mq, where both are trying to register and unregister the same sysfs entries. null_add_dev --> blk_mq_init_queue --> blk_mq_init_allocated_queue --> add to 'all_q_list' (*) --> add_disk --> blk_register_queue --> blk_mq_register_disk (++) null_del_dev --> del_gendisk --> blk_unregister_queue --> blk_mq_unregister_disk (--) --> blk_cleanup_queue --> blk_mq_free_queue --> del from 'all_q_list' (*) blk_mq_queue_reinit --> blk_mq_sysfs_unregister (-) --> blk_mq_sysfs_register (+) While the request queue is added to 'all_q_list' (*), blk_mq_queue_reinit() can be called for the queue anytime by CPU hotplug callback. But blk_mq_sysfs_unregister (-) and blk_mq_sysfs_register (+) in blk_mq_queue_reinit must not be called before blk_mq_register_disk (++) and after blk_mq_unregister_disk (--) is finished. Because '/sys/block/*/mq/' is not exists. There has already been BLK_MQ_F_SYSFS_UP flag in hctx->flags which can be used to track these sysfs stuff, but it is only fixing this issue partially. In order to fix it completely, we just need per-queue flag instead of per-hctx flag with appropriate locking. So this introduces q->mq_sysfs_init_done which is properly protected with all_q_mutex. Also, we need to ensure that blk_mq_map_swqueue() is called with all_q_mutex is held. Since hctx->nr_ctx is reset temporarily and updated in blk_mq_map_swqueue(), so we should avoid blk_mq_register_hctx() seeing the temporary hctx->nr_ctx value in CPU hotplug handling or adding/deleting gendisk . Signed-off-by: Akinobu Mita Reviewed-by: Ming Lei Cc: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 30 ++++++++++++++++++++++-------- block/blk-mq.c | 6 +++--- include/linux/blk-mq.h | 1 - include/linux/blkdev.h | 2 ++ 4 files changed, 27 insertions(+), 12 deletions(-) diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 279c5d674edf..189f5ae6522a 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -343,7 +343,7 @@ static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx) struct blk_mq_ctx *ctx; int i; - if (!hctx->nr_ctx || !(hctx->flags & BLK_MQ_F_SYSFS_UP)) + if (!hctx->nr_ctx) return; hctx_for_each_ctx(hctx, ctx, i) @@ -358,7 +358,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) struct blk_mq_ctx *ctx; int i, ret; - if (!hctx->nr_ctx || !(hctx->flags & BLK_MQ_F_SYSFS_UP)) + if (!hctx->nr_ctx) return 0; ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", hctx->queue_num); @@ -381,6 +381,8 @@ void blk_mq_unregister_disk(struct gendisk *disk) struct blk_mq_ctx *ctx; int i, j; + blk_mq_disable_hotplug(); + queue_for_each_hw_ctx(q, hctx, i) { blk_mq_unregister_hctx(hctx); @@ -395,6 +397,9 @@ void blk_mq_unregister_disk(struct gendisk *disk) kobject_put(&q->mq_kobj); kobject_put(&disk_to_dev(disk)->kobj); + + q->mq_sysfs_init_done = false; + blk_mq_enable_hotplug(); } static void blk_mq_sysfs_init(struct request_queue *q) @@ -425,27 +430,30 @@ int blk_mq_register_disk(struct gendisk *disk) struct blk_mq_hw_ctx *hctx; int ret, i; + blk_mq_disable_hotplug(); + blk_mq_sysfs_init(q); ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); if (ret < 0) - return ret; + goto out; kobject_uevent(&q->mq_kobj, KOBJ_ADD); queue_for_each_hw_ctx(q, hctx, i) { - hctx->flags |= BLK_MQ_F_SYSFS_UP; ret = blk_mq_register_hctx(hctx); if (ret) break; } - if (ret) { + if (ret) blk_mq_unregister_disk(disk); - return ret; - } + else + q->mq_sysfs_init_done = true; +out: + blk_mq_enable_hotplug(); - return 0; + return ret; } EXPORT_SYMBOL_GPL(blk_mq_register_disk); @@ -454,6 +462,9 @@ void blk_mq_sysfs_unregister(struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i; + if (!q->mq_sysfs_init_done) + return; + queue_for_each_hw_ctx(q, hctx, i) blk_mq_unregister_hctx(hctx); } @@ -463,6 +474,9 @@ int blk_mq_sysfs_register(struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i, ret = 0; + if (!q->mq_sysfs_init_done) + return ret; + queue_for_each_hw_ctx(q, hctx, i) { ret = blk_mq_register_hctx(hctx); if (ret) diff --git a/block/blk-mq.c b/block/blk-mq.c index 2fd7283ec62b..0262131ac5f2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2035,13 +2035,13 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, goto err_hctxs; mutex_lock(&all_q_mutex); + list_add_tail(&q->all_q_node, &all_q_list); - mutex_unlock(&all_q_mutex); - blk_mq_add_queue_tag_set(set, q); - blk_mq_map_swqueue(q); + mutex_unlock(&all_q_mutex); + return q; err_hctxs: diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 37d1602c4f7a..b80ba4572a31 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -145,7 +145,6 @@ enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_TAG_SHARED = 1 << 1, BLK_MQ_F_SG_MERGE = 1 << 2, - BLK_MQ_F_SYSFS_UP = 1 << 3, BLK_MQ_F_DEFER_ISSUE = 1 << 4, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, BLK_MQ_F_ALLOC_POLICY_BITS = 1, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 99da9ebc7377..19c2e947d4d1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -456,6 +456,8 @@ struct request_queue { struct blk_mq_tag_set *tag_set; struct list_head tag_set_list; struct bio_set *bio_split; + + bool mq_sysfs_init_done; }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ From a723bab3d7529133f71fc8a5e96f86e3639a0d13 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 27 Sep 2015 02:09:21 +0900 Subject: [PATCH 122/240] blk-mq: Fix use after of free q->mq_map CPU hotplug handling for blk-mq (blk_mq_queue_reinit) updates q->mq_map by blk_mq_update_queue_map() for all request queues in all_q_list. On the other hand, q->mq_map is released before deleting the queue from all_q_list. So if CPU hotplug event occurs in the window, invalid memory access can happen. Fix it by releasing q->mq_map in blk_mq_release() to make it happen latter than removal from all_q_list. Signed-off-by: Akinobu Mita Suggested-by: Ming Lei Reviewed-by: Ming Lei Cc: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 0262131ac5f2..92648d8d6a4a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1925,6 +1925,9 @@ void blk_mq_release(struct request_queue *q) kfree(hctx); } + kfree(q->mq_map); + q->mq_map = NULL; + kfree(q->queue_hw_ctx); /* ctx kobj stays in queue_ctx */ @@ -2070,11 +2073,6 @@ void blk_mq_free_queue(struct request_queue *q) blk_mq_free_hw_queues(q, set); percpu_ref_exit(&q->mq_usage_counter); - - kfree(q->mq_map); - - q->mq_map = NULL; - mutex_lock(&all_q_mutex); list_del_init(&q->all_q_node); mutex_unlock(&all_q_mutex); From 0e6263682014d480b8d7b8c10287f4536066b54f Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 27 Sep 2015 02:09:22 +0900 Subject: [PATCH 123/240] blk-mq: fix q->mq_usage_counter access race CPU hotplug handling for blk-mq (blk_mq_queue_reinit) accesses q->mq_usage_counter while freezing all request queues in all_q_list. On the other hand, q->mq_usage_counter is deinitialized in blk_mq_free_queue() before deleting the queue from all_q_list. So if CPU hotplug event occurs in the window, percpu_ref_kill() is called with q->mq_usage_counter which has already been marked dead, and it triggers warning. Fix it by deleting the queue from all_q_list earlier than destroying q->mq_usage_counter. Signed-off-by: Akinobu Mita Reviewed-by: Ming Lei Cc: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 92648d8d6a4a..3a39184e82e5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2067,15 +2067,16 @@ void blk_mq_free_queue(struct request_queue *q) { struct blk_mq_tag_set *set = q->tag_set; + mutex_lock(&all_q_mutex); + list_del_init(&q->all_q_node); + mutex_unlock(&all_q_mutex); + blk_mq_del_queue_tag_set(q); blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); blk_mq_free_hw_queues(q, set); percpu_ref_exit(&q->mq_usage_counter); - mutex_lock(&all_q_mutex); - list_del_init(&q->all_q_node); - mutex_unlock(&all_q_mutex); } /* Basically redo blk_mq_init_queue with queue frozen */ From 5778322e67ed34dc9f391a4a5cbcbb856071ceba Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 27 Sep 2015 02:09:23 +0900 Subject: [PATCH 124/240] blk-mq: avoid inserting requests before establishing new mapping Notifier callbacks for CPU_ONLINE action can be run on the other CPU than the CPU which was just onlined. So it is possible for the process running on the just onlined CPU to insert request and run hw queue before establishing new mapping which is done by blk_mq_queue_reinit_notify(). This can cause a problem when the CPU has just been onlined first time since the request queue was initialized. At this time ctx->index_hw for the CPU, which is the index in hctx->ctxs[] for this ctx, is still zero before blk_mq_queue_reinit_notify() is called by notifier callbacks for CPU_ONLINE action. For example, there is a single hw queue (hctx) and two CPU queues (ctx0 for CPU0, and ctx1 for CPU1). Now CPU1 is just onlined and a request is inserted into ctx1->rq_list and set bit0 in pending bitmap as ctx1->index_hw is still zero. And then while running hw queue, flush_busy_ctxs() finds bit0 is set in pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list. But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list is ignored. Fix it by ensuring that new mapping is established before onlined cpu starts running. Signed-off-by: Akinobu Mita Reviewed-by: Ming Lei Cc: Jens Axboe Cc: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq-cpumap.c | 9 ++++--- block/blk-mq.c | 59 +++++++++++++++++++++++++++++++++---------- block/blk-mq.h | 3 ++- 3 files changed, 52 insertions(+), 19 deletions(-) diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 1e28ddb656b8..8764c241e5bb 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -31,7 +31,8 @@ static int get_first_sibling(unsigned int cpu) return cpu; } -int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues) +int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues, + const struct cpumask *online_mask) { unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling; cpumask_var_t cpus; @@ -41,7 +42,7 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues) cpumask_clear(cpus); nr_cpus = nr_uniq_cpus = 0; - for_each_online_cpu(i) { + for_each_cpu(i, online_mask) { nr_cpus++; first_sibling = get_first_sibling(i); if (!cpumask_test_cpu(first_sibling, cpus)) @@ -51,7 +52,7 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues) queue = 0; for_each_possible_cpu(i) { - if (!cpu_online(i)) { + if (!cpumask_test_cpu(i, online_mask)) { map[i] = 0; continue; } @@ -95,7 +96,7 @@ unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set) if (!map) return NULL; - if (!blk_mq_update_queue_map(map, set->nr_hw_queues)) + if (!blk_mq_update_queue_map(map, set->nr_hw_queues, cpu_online_mask)) return map; kfree(map); diff --git a/block/blk-mq.c b/block/blk-mq.c index 3a39184e82e5..a5dbd069c9da 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1789,7 +1789,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, } } -static void blk_mq_map_swqueue(struct request_queue *q) +static void blk_mq_map_swqueue(struct request_queue *q, + const struct cpumask *online_mask) { unsigned int i; struct blk_mq_hw_ctx *hctx; @@ -1806,7 +1807,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) */ queue_for_each_ctx(q, ctx, i) { /* If the cpu isn't online, the cpu is mapped to first hctx */ - if (!cpu_online(i)) + if (!cpumask_test_cpu(i, online_mask)) continue; hctx = q->mq_ops->map_queue(q, i); @@ -1852,7 +1853,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) } queue_for_each_ctx(q, ctx, i) { - if (!cpu_online(i)) + if (!cpumask_test_cpu(i, online_mask)) continue; hctx = q->mq_ops->map_queue(q, i); @@ -2037,13 +2038,15 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, if (blk_mq_init_hw_queues(q, set)) goto err_hctxs; + get_online_cpus(); mutex_lock(&all_q_mutex); list_add_tail(&q->all_q_node, &all_q_list); blk_mq_add_queue_tag_set(set, q); - blk_mq_map_swqueue(q); + blk_mq_map_swqueue(q, cpu_online_mask); mutex_unlock(&all_q_mutex); + put_online_cpus(); return q; @@ -2080,13 +2083,14 @@ void blk_mq_free_queue(struct request_queue *q) } /* Basically redo blk_mq_init_queue with queue frozen */ -static void blk_mq_queue_reinit(struct request_queue *q) +static void blk_mq_queue_reinit(struct request_queue *q, + const struct cpumask *online_mask) { WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); blk_mq_sysfs_unregister(q); - blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues); + blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, online_mask); /* * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe @@ -2094,7 +2098,7 @@ static void blk_mq_queue_reinit(struct request_queue *q) * involves free and re-allocate memory, worthy doing?) */ - blk_mq_map_swqueue(q); + blk_mq_map_swqueue(q, online_mask); blk_mq_sysfs_register(q); } @@ -2103,16 +2107,43 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, unsigned long action, void *hcpu) { struct request_queue *q; + int cpu = (unsigned long)hcpu; + /* + * New online cpumask which is going to be set in this hotplug event. + * Declare this cpumasks as global as cpu-hotplug operation is invoked + * one-by-one and dynamically allocating this could result in a failure. + */ + static struct cpumask online_new; /* - * Before new mappings are established, hotadded cpu might already - * start handling requests. This doesn't break anything as we map - * offline CPUs to first hardware queue. We will re-init the queue - * below to get optimal settings. + * Before hotadded cpu starts handling requests, new mappings must + * be established. Otherwise, these requests in hw queue might + * never be dispatched. + * + * For example, there is a single hw queue (hctx) and two CPU queues + * (ctx0 for CPU0, and ctx1 for CPU1). + * + * Now CPU1 is just onlined and a request is inserted into + * ctx1->rq_list and set bit0 in pending bitmap as ctx1->index_hw is + * still zero. + * + * And then while running hw queue, flush_busy_ctxs() finds bit0 is + * set in pending bitmap and tries to retrieve requests in + * hctx->ctxs[0]->rq_list. But htx->ctxs[0] is a pointer to ctx0, + * so the request in ctx1->rq_list is ignored. */ - if (action != CPU_DEAD && action != CPU_DEAD_FROZEN && - action != CPU_ONLINE && action != CPU_ONLINE_FROZEN) + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DEAD: + case CPU_UP_CANCELED: + cpumask_copy(&online_new, cpu_online_mask); + break; + case CPU_UP_PREPARE: + cpumask_copy(&online_new, cpu_online_mask); + cpumask_set_cpu(cpu, &online_new); + break; + default: return NOTIFY_OK; + } mutex_lock(&all_q_mutex); @@ -2136,7 +2167,7 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, } list_for_each_entry(q, &all_q_list, all_q_node) - blk_mq_queue_reinit(q); + blk_mq_queue_reinit(q, &online_new); list_for_each_entry(q, &all_q_list, all_q_node) blk_mq_unfreeze_queue(q); diff --git a/block/blk-mq.h b/block/blk-mq.h index 6a48c4c0d8a2..f4fea7964910 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -51,7 +51,8 @@ void blk_mq_disable_hotplug(void); * CPU -> queue mappings */ extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set); -extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues); +extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues, + const struct cpumask *online_mask); extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int); /* From 60de074ba1e8f327db19bc33d8530131ac01695d Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 27 Sep 2015 02:09:25 +0900 Subject: [PATCH 125/240] blk-mq: fix deadlock when reading cpu_list CPU hotplug handling for blk-mq (blk_mq_queue_reinit) acquires all_q_mutex in blk_mq_queue_reinit_notify() and then removes sysfs entries by blk_mq_sysfs_unregister(). Removing sysfs entry needs to be blocked until the active reference of the kernfs_node to be zero. On the other hand, reading blk_mq_hw_sysfs_cpu sysfs entry (e.g. /sys/block/nullb0/mq/0/cpu_list) acquires all_q_mutex in blk_mq_hw_sysfs_cpus_show(). If these happen at the same time, a deadlock can happen. Because one can wait for the active reference to be zero with holding all_q_mutex, and the other tries to acquire all_q_mutex with holding the active reference. The reason that all_q_mutex is acquired in blk_mq_hw_sysfs_cpus_show() is to avoid reading an imcomplete hctx->cpumask. Since reading sysfs entry for blk-mq needs to acquire q->sysfs_lock, we can avoid deadlock and reading an imcomplete hctx->cpumask by protecting q->sysfs_lock while hctx->cpumask is being updated. Signed-off-by: Akinobu Mita Reviewed-by: Ming Lei Cc: Ming Lei Cc: Wanpeng Li Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 4 ---- block/blk-mq.c | 7 +++++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 189f5ae6522a..788fffd9b409 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -229,8 +229,6 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) unsigned int i, first = 1; ssize_t ret = 0; - blk_mq_disable_hotplug(); - for_each_cpu(i, hctx->cpumask) { if (first) ret += sprintf(ret + page, "%u", i); @@ -240,8 +238,6 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) first = 0; } - blk_mq_enable_hotplug(); - ret += sprintf(ret + page, "\n"); return ret; } diff --git a/block/blk-mq.c b/block/blk-mq.c index a5dbd069c9da..31c0c6259c4c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1797,6 +1797,11 @@ static void blk_mq_map_swqueue(struct request_queue *q, struct blk_mq_ctx *ctx; struct blk_mq_tag_set *set = q->tag_set; + /* + * Avoid others reading imcomplete hctx->cpumask through sysfs + */ + mutex_lock(&q->sysfs_lock); + queue_for_each_hw_ctx(q, hctx, i) { cpumask_clear(hctx->cpumask); hctx->nr_ctx = 0; @@ -1816,6 +1821,8 @@ static void blk_mq_map_swqueue(struct request_queue *q, hctx->ctxs[hctx->nr_ctx++] = ctx; } + mutex_unlock(&q->sysfs_lock); + queue_for_each_hw_ctx(q, hctx, i) { struct blk_mq_ctxmap *map = &hctx->ctx_map; From 54313503f9a3c34a9e45aad7654976b7f50b9ae7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 25 Sep 2015 14:36:55 +0300 Subject: [PATCH 126/240] drm/amdgpu: signedness bug in amdgpu_cs_parser_init() The "i" variable should be signed or it leads to a crash in the error handling code. Fixes: 1d263474c441 ('drm/amdgpu: unwind properly in amdgpu_cs_parser_init()') Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 749420f1ea6f..cb3c274edb0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -156,7 +156,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) uint64_t *chunk_array_user; uint64_t *chunk_array; struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - unsigned size, i; + unsigned size; + int i; int ret; if (cs->in.num_chunks == 0) From 4281f46ef839050d2ef60348f661eb463c21cc2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Mon, 28 Sep 2015 18:16:31 +0900 Subject: [PATCH 127/240] drm/radeon: Restore LCD backlight level on resume (>= R5xx) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of only enabling the backlight (which seems to set it to max brightness), just re-set the current backlight level, which also takes care of enabling the backlight if necessary. Only the radeon_atom_encoder_dpms_dig part tested on a Kaveri laptop, the radeon_atom_encoder_dpms_avivo part is only compile tested. Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/atombios_encoders.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index c3872598b85a..65adb9c72377 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -1624,8 +1624,9 @@ radeon_atom_encoder_dpms_avivo(struct drm_encoder *encoder, int mode) } else atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) { - args.ucAction = ATOM_LCD_BLON; - atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args); + struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; + + atombios_set_backlight_level(radeon_encoder, dig->backlight_level); } break; case DRM_MODE_DPMS_STANDBY: @@ -1706,8 +1707,7 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode) atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_VIDEO_ON, 0); } if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) - atombios_dig_transmitter_setup(encoder, - ATOM_TRANSMITTER_ACTION_LCD_BLON, 0, 0); + atombios_set_backlight_level(radeon_encoder, dig->backlight_level); if (ext_encoder) atombios_external_encoder_setup(encoder, ext_encoder, ATOM_ENABLE); break; From 74b3112e95073b351e3b0b9799795bc76f8415fa Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 29 Sep 2015 13:53:30 -0400 Subject: [PATCH 128/240] drm/amdgpu: Restore LCD backlight level on resume Instead of only enabling the backlight (which seems to set it to max brightness), just re-set the current backlight level, which also takes care of enabling the backlight if necessary. Port of radeon commit: drm/radeon: Restore LCD backlight level on resume (>= R5xx) Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/atombios_encoders.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index cd6edc40c9cd..1e0bba29e167 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -1279,8 +1279,7 @@ amdgpu_atombios_encoder_setup_dig(struct drm_encoder *encoder, int action) amdgpu_atombios_encoder_setup_dig_encoder(encoder, ATOM_ENCODER_CMD_DP_VIDEO_ON, 0); } if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) - amdgpu_atombios_encoder_setup_dig_transmitter(encoder, - ATOM_TRANSMITTER_ACTION_LCD_BLON, 0, 0); + amdgpu_atombios_encoder_set_backlight_level(amdgpu_encoder, dig->backlight_level); if (ext_encoder) amdgpu_atombios_encoder_setup_external_encoder(encoder, ext_encoder, ATOM_ENABLE); } else { From 4613012db1d911f80897f9446a49de817b2c4c47 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Sat, 26 Sep 2015 18:50:42 -0400 Subject: [PATCH 129/240] af_unix: Convert the unix_sk macro to an inline function for type safety As suggested by Eric Dumazet this change replaces the #define with a static inline function to enjoy complaints by the compiler when misusing the API. Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- include/net/af_unix.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 4a167b30a12f..cb1b9bbda332 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -63,7 +63,11 @@ struct unix_sock { #define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; }; -#define unix_sk(__sk) ((struct unix_sock *)__sk) + +static inline struct unix_sock *unix_sk(struct sock *sk) +{ + return (struct unix_sock *)sk; +} #define peer_wait peer_wq.wait From 9f389e35674f5b086edd70ed524ca0f287259725 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Sat, 26 Sep 2015 18:50:43 -0400 Subject: [PATCH 130/240] af_unix: return data from multiple SKBs on recv() with MSG_PEEK flag AF_UNIX sockets now return multiple skbs from recv() when MSG_PEEK flag is set. This is referenced in kernel bugzilla #12323 @ https://bugzilla.kernel.org/show_bug.cgi?id=12323 As described both in the BZ and lkml thread @ http://lkml.org/lkml/2008/1/8/444 calling recv() with MSG_PEEK on an AF_UNIX socket only reads a single skb, where the desired effect is to return as much skb data has been queued, until hitting the recv buffer size (whichever comes first). The modified MSG_PEEK path will now move to the next skb in the tree and jump to the again: label, rather than following the natural loop structure. This requires duplicating some of the loop head actions. This was tested using the python socketpair python code attached to the bugzilla issue. Signed-off-by: Aaron Conole Signed-off-by: David S. Miller --- net/unix/af_unix.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 03ee4d359f6a..ef31b40ad550 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2179,8 +2179,21 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state) if (UNIXCB(skb).fp) scm.fp = scm_fp_dup(UNIXCB(skb).fp); - sk_peek_offset_fwd(sk, chunk); + if (skip) { + sk_peek_offset_fwd(sk, chunk); + skip -= chunk; + } + if (UNIXCB(skb).fp) + break; + + last = skb; + last_len = skb->len; + unix_state_lock(sk); + skb = skb_peek_next(skb, &sk->sk_receive_queue); + if (skb) + goto again; + unix_state_unlock(sk); break; } } while (size); From 4c52b1da538800a30f030eeb697366c23daf2ef3 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Mon, 28 Sep 2015 10:49:48 +0200 Subject: [PATCH 131/240] bna: fix error handling Several functions can return negative value in case of error, so their return type should be fixed as well as type of variables to which this value is assigned. The problem has been detected using proposed semantic patch scripts/coccinelle/tests/assign_signed_to_unsigned.cocci [1]. [1]: http://permalink.gmane.org/gmane.linux.kernel/2046107 Signed-off-by: Andrzej Hajda Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bfa_ioc.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index b7a0f7879de2..9e59663a6ead 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -1543,7 +1543,7 @@ bfa_flash_cmd_act_check(void __iomem *pci_bar) } /* Flush FLI data fifo. */ -static u32 +static int bfa_flash_fifo_flush(void __iomem *pci_bar) { u32 i; @@ -1573,11 +1573,11 @@ bfa_flash_fifo_flush(void __iomem *pci_bar) } /* Read flash status. */ -static u32 +static int bfa_flash_status_read(void __iomem *pci_bar) { union bfa_flash_dev_status_reg dev_status; - u32 status; + int status; u32 ret_status; int i; @@ -1611,11 +1611,11 @@ bfa_flash_status_read(void __iomem *pci_bar) } /* Start flash read operation. */ -static u32 +static int bfa_flash_read_start(void __iomem *pci_bar, u32 offset, u32 len, char *buf) { - u32 status; + int status; /* len must be mutiple of 4 and not exceeding fifo size */ if (len == 0 || len > BFA_FLASH_FIFO_SIZE || (len & 0x03) != 0) @@ -1703,7 +1703,8 @@ static enum bfa_status bfa_flash_raw_read(void __iomem *pci_bar, u32 offset, char *buf, u32 len) { - u32 n, status; + u32 n; + int status; u32 off, l, s, residue, fifo_sz; residue = len; From 75c261b51ba19f0791de608f0acfb94956f78c76 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 28 Sep 2015 15:05:33 +0200 Subject: [PATCH 132/240] net sysfs: Print link speed as signed integer Otherwise 4294967295 (MBit/s) (-1) will be printed when there is no link. Documentation/ABI/testing/sysfs-class-net does not state if this shall be signed or unsigned. Also remove the now unused variable fmt_udec. Signed-off-by: Alexander Stein Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 805a95a48107..830f8a7c1cb1 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -31,7 +31,6 @@ static const char fmt_hex[] = "%#x\n"; static const char fmt_long_hex[] = "%#lx\n"; static const char fmt_dec[] = "%d\n"; -static const char fmt_udec[] = "%u\n"; static const char fmt_ulong[] = "%lu\n"; static const char fmt_u64[] = "%llu\n"; @@ -202,7 +201,7 @@ static ssize_t speed_show(struct device *dev, if (netif_running(netdev)) { struct ethtool_cmd cmd; if (!__ethtool_get_settings(netdev, &cmd)) - ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd)); + ret = sprintf(buf, fmt_dec, ethtool_cmd_speed(&cmd)); } rtnl_unlock(); return ret; From 741a11d9e4103a8e1c590ef1280143fe654e4e33 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 28 Sep 2015 10:12:13 -0700 Subject: [PATCH 133/240] net: ipv6: Add RT6_LOOKUP_F_IFACE flag if oif is set Wolfgang reported that IPv6 stack is ignoring oif in output route lookups: With ipv6, ip -6 route get always returns the specific route. $ ip -6 r 2001:db8:e2::1 dev enp2s0 proto kernel metric 256 2001:db8:e2::/64 dev enp2s0 metric 1024 2001:db8:e3::1 dev enp3s0 proto kernel metric 256 2001:db8:e3::/64 dev enp3s0 metric 1024 fe80::/64 dev enp3s0 proto kernel metric 256 default via 2001:db8:e3::255 dev enp3s0 metric 1024 $ ip -6 r get 2001:db8:e2::100 2001:db8:e2::100 from :: dev enp2s0 src 2001:db8:e3::1 metric 0 cache $ ip -6 r get 2001:db8:e2::100 oif enp3s0 2001:db8:e2::100 from :: dev enp2s0 src 2001:db8:e3::1 metric 0 cache The stack does consider the oif but a mismatch in rt6_device_match is not considered fatal because RT6_LOOKUP_F_IFACE is not set in the flags. Cc: Wolfgang Nothdurft Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f204089e854c..cb32ce250db0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1193,7 +1193,8 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, fl6->flowi6_iif = LOOPBACK_IFINDEX; - if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) + if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || + fl6->flowi6_oif) flags |= RT6_LOOKUP_F_IFACE; if (!ipv6_addr_any(&fl6->saddr)) From 218c1f76b8b25d6dc9d01443f071cb618e206b0c Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 29 Sep 2015 15:54:58 -0700 Subject: [PATCH 134/240] Input: psmouse - fix data race in __ps2_command The data race happens on ps2dev->cmdcnt and ps2dev->cmdbuf contents. __ps2_command reads that data concurrently with the interrupt handler. As the result, for example, if a response arrives just after the timeout, __ps2_command can copy out garbage from ps2dev->cmdbuf but then see that ps2dev->cmdcnt is 0 and return success. Stop the interrupt handler with serio_pause_rx() before reading the results. The data race was found with KernelThreadSanitizer (KTSAN). Signed-off-by: Dmitry Vyukov Signed-off-by: Dmitry Torokhov --- drivers/input/serio/libps2.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c index 75516996db20..316f2c897101 100644 --- a/drivers/input/serio/libps2.c +++ b/drivers/input/serio/libps2.c @@ -212,12 +212,17 @@ int __ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command) * time before the ACK arrives. */ if (ps2_sendbyte(ps2dev, command & 0xff, - command == PS2_CMD_RESET_BAT ? 1000 : 200)) - goto out; + command == PS2_CMD_RESET_BAT ? 1000 : 200)) { + serio_pause_rx(ps2dev->serio); + goto out_reset_flags; + } - for (i = 0; i < send; i++) - if (ps2_sendbyte(ps2dev, param[i], 200)) - goto out; + for (i = 0; i < send; i++) { + if (ps2_sendbyte(ps2dev, param[i], 200)) { + serio_pause_rx(ps2dev->serio); + goto out_reset_flags; + } + } /* * The reset command takes a long time to execute. @@ -234,17 +239,18 @@ int __ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command) !(ps2dev->flags & PS2_FLAG_CMD), timeout); } + serio_pause_rx(ps2dev->serio); + if (param) for (i = 0; i < receive; i++) param[i] = ps2dev->cmdbuf[(receive - 1) - i]; if (ps2dev->cmdcnt && (command != PS2_CMD_RESET_BAT || ps2dev->cmdcnt != 1)) - goto out; + goto out_reset_flags; rc = 0; - out: - serio_pause_rx(ps2dev->serio); + out_reset_flags: ps2dev->flags = 0; serio_continue_rx(ps2dev->serio); From 31b33dfb0a144469dd805514c9e63f4993729a48 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 28 Sep 2015 17:24:25 -0700 Subject: [PATCH 135/240] skbuff: Fix skb checksum partial check. Earlier patch 6ae459bda tried to detect void ckecksum partial skb by comparing pull length to checksum offset. But it does not work for all cases since checksum-offset depends on updates to skb->data. Following patch fixes it by validating checksum start offset after skb-data pointer is updated. Negative value of checksum offset start means there is no need to checksum. Fixes: 6ae459bda ("skbuff: Fix skb checksum flag on skb pull") Reported-by: Andrew Vagin Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- net/core/skbuff.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2b0a30a6e31c..4398411236f1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2708,7 +2708,7 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb, if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); else if (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_start_offset(skb) <= len) + skb_checksum_start_offset(skb) < 0) skb->ip_summed = CHECKSUM_NONE; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index dad4dd37e2aa..fab4599ba8b2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2958,11 +2958,12 @@ EXPORT_SYMBOL_GPL(skb_append_pagefrags); */ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) { + unsigned char *data = skb->data; + BUG_ON(len > skb->len); - skb->len -= len; - BUG_ON(skb->len < skb->data_len); - skb_postpull_rcsum(skb, skb->data, len); - return skb->data += len; + __skb_pull(skb, len); + skb_postpull_rcsum(skb, data, len); + return skb->data; } EXPORT_SYMBOL_GPL(skb_pull_rcsum); From c047a1f918af75e572a19ba0581c3e3e202ed698 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 29 Sep 2015 01:50:56 +0200 Subject: [PATCH 136/240] dsa: mv88e6xxx: Enable forwarding for unknown to the CPU port Frames destined to an unknown address must be forwarded to the CPU port. Otherwise incoming ARP, dhcp leases, etc, do not work. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index f8baa897d1a0..1f7dd927cc5e 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -2051,6 +2051,8 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port) reg |= PORT_CONTROL_FRAME_ETHER_TYPE_DSA; else reg |= PORT_CONTROL_FRAME_MODE_DSA; + reg |= PORT_CONTROL_FORWARD_UNKNOWN | + PORT_CONTROL_FORWARD_UNKNOWN_MC; } if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) || From 23860f103b53e764a3cbbf615d08f88362a28295 Mon Sep 17 00:00:00 2001 From: Robb Manes Date: Tue, 29 Sep 2015 11:03:37 -0400 Subject: [PATCH 137/240] net/mlx4: Handle return codes in mlx4_qp_attach_common Both new_steering_entry() and existing_steering_entry() return values based on their success or failure, but currently they fall through silently. This can make troubleshooting difficult, as we were unable to tell which one of these two functions returned errors or specifically what code was returned. This patch remedies that situation by passing the return codes to err, which is returned by mlx4_qp_attach_common() itself. This also addresses a leak in the call to mlx4_bitmap_free() as well. Signed-off-by: Robb Manes Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mcg.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index bd9ea0d01aae..1d4e2e054647 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -1184,10 +1184,11 @@ int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], if (prot == MLX4_PROT_ETH) { /* manage the steering entry for promisc mode */ if (new_entry) - new_steering_entry(dev, port, steer, index, qp->qpn); + err = new_steering_entry(dev, port, steer, + index, qp->qpn); else - existing_steering_entry(dev, port, steer, - index, qp->qpn); + err = existing_steering_entry(dev, port, steer, + index, qp->qpn); } if (err && link && index != -1) { if (index < dev->caps.num_mgms) From 9ae6d4935e3df35a23bbbe531c6b9ff314e7fd0f Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 29 Sep 2015 17:45:28 +0200 Subject: [PATCH 138/240] testptp: Silence compiler warnings on ppc64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling Documentation/ptp/testptp.c the following compiler warnings are printed out: Documentation/ptp/testptp.c: In function ‘main’: Documentation/ptp/testptp.c:367:11: warning: format ‘%lld’ expects argument of type ‘long long int’, but argument 3 has type ‘__s64’ [-Wformat=] event.t.sec, event.t.nsec); ^ Documentation/ptp/testptp.c:505:5: warning: format ‘%lld’ expects argument of type ‘long long int’, but argument 2 has type ‘__s64’ [-Wformat=] (pct+2*i)->sec, (pct+2*i)->nsec); ^ Documentation/ptp/testptp.c:507:5: warning: format ‘%lld’ expects argument of type ‘long long int’, but argument 2 has type ‘__s64’ [-Wformat=] (pct+2*i+1)->sec, (pct+2*i+1)->nsec); ^ Documentation/ptp/testptp.c:509:5: warning: format ‘%lld’ expects argument of type ‘long long int’, but argument 2 has type ‘__s64’ [-Wformat=] (pct+2*i+2)->sec, (pct+2*i+2)->nsec); This happens because __s64 is by default defined as "long" on ppc64, not as "long long". However, to fix these warnings, it's possible to define the __SANE_USERSPACE_TYPES__ so that __s64 gets defined to "long long" on ppc64, too. Signed-off-by: Thomas Huth Acked-by: Richard Cochran Signed-off-by: David S. Miller --- Documentation/ptp/testptp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/ptp/testptp.c b/Documentation/ptp/testptp.c index 2bc8abc57fa0..6c6247aaa7b9 100644 --- a/Documentation/ptp/testptp.c +++ b/Documentation/ptp/testptp.c @@ -18,6 +18,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */ #include #include #include From 57a47532c4312159935c98b7f1cf0e62296b9171 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 29 Sep 2015 14:17:54 -0400 Subject: [PATCH 139/240] net: dsa: fix preparation of a port STP update Because of the default 0 value of ret in dsa_slave_port_attr_set, a driver may return -EOPNOTSUPP from the commit phase of a STP state, which triggers a WARN() from switchdev. This happened on a 6185 switch which does not support hardware bridging. Fixes: 3563606258cf ("switchdev: convert STP update to switchdev attr set") Reported-by: Andrew Lunn Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/slave.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index cce97385f743..7d91f4612ac0 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -458,12 +458,17 @@ static int dsa_slave_stp_update(struct net_device *dev, u8 state) static int dsa_slave_port_attr_set(struct net_device *dev, struct switchdev_attr *attr) { - int ret = 0; + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + int ret; switch (attr->id) { case SWITCHDEV_ATTR_PORT_STP_STATE: - if (attr->trans == SWITCHDEV_TRANS_COMMIT) - ret = dsa_slave_stp_update(dev, attr->u.stp_state); + if (attr->trans == SWITCHDEV_TRANS_PREPARE) + ret = ds->drv->port_stp_update ? 0 : -EOPNOTSUPP; + else + ret = ds->drv->port_stp_update(ds, p->port, + attr->u.stp_state); break; default: ret = -EOPNOTSUPP; From b84f78782052ee4516903e5d0566a5eee365b771 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 29 Sep 2015 19:07:07 -0700 Subject: [PATCH 140/240] net: Initialize flow flags in input path The fib_table_lookup tracepoint found 2 places where the flowi4_flags is not initialized. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 1 + net/ipv4/route.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 6fcbd215cdbc..690bcbc59f26 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -340,6 +340,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_tos = tos; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_tun_key.tun_id = 0; + fl4.flowi4_flags = 0; no_addr = idev->ifa_list == NULL; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c6ad99ad0ffb..c81deb85acb4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1737,6 +1737,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.flowi4_mark = skb->mark; fl4.flowi4_tos = tos; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; + fl4.flowi4_flags = 0; fl4.daddr = daddr; fl4.saddr = saddr; err = fib_lookup(net, &fl4, &res, 0); From 1e034743e918d195d339af340ae933727c072bce Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 23 Sep 2015 12:02:57 +0200 Subject: [PATCH 141/240] x86/hyperv: Fix the build in the !CONFIG_KEXEC_CORE case Recent changes in the Hyper-V driver: b4370df2b1f5 ("Drivers: hv: vmbus: add special crash handler") broke the build when CONFIG_KEXEC_CORE is not set: arch/x86/built-in.o: In function `hv_machine_crash_shutdown': arch/x86/kernel/cpu/mshyperv.c:112: undefined reference to `native_machine_crash_shutdown' Decorate all kexec related code with #ifdef CONFIG_KEXEC_CORE. Reported-by: Jim Davis Reported-by: Stephen Hemminger Signed-off-by: Vitaly Kuznetsov Signed-off-by: Thomas Gleixner Cc: devel@linuxdriverproject.org Cc: K. Y. Srinivasan Cc: Haiyang Zhang Cc: Greg Kroah-Hartman Cc: Peter Zijlstra Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1443002577-25370-1-git-send-email-vkuznets@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mshyperv.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 381c8b9b3a33..20e242ea1bc4 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -34,11 +34,10 @@ struct ms_hyperv_info ms_hyperv; EXPORT_SYMBOL_GPL(ms_hyperv); -static void (*hv_kexec_handler)(void); -static void (*hv_crash_handler)(struct pt_regs *regs); - #if IS_ENABLED(CONFIG_HYPERV) static void (*vmbus_handler)(void); +static void (*hv_kexec_handler)(void); +static void (*hv_crash_handler)(struct pt_regs *regs); void hyperv_vector_handler(struct pt_regs *regs) { @@ -96,8 +95,8 @@ void hv_remove_crash_handler(void) hv_crash_handler = NULL; } EXPORT_SYMBOL_GPL(hv_remove_crash_handler); -#endif +#ifdef CONFIG_KEXEC_CORE static void hv_machine_shutdown(void) { if (kexec_in_progress && hv_kexec_handler) @@ -111,7 +110,8 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs) hv_crash_handler(regs); native_machine_crash_shutdown(regs); } - +#endif /* CONFIG_KEXEC_CORE */ +#endif /* CONFIG_HYPERV */ static uint32_t __init ms_hyperv_platform(void) { @@ -186,8 +186,10 @@ static void __init ms_hyperv_init_platform(void) no_timer_check = 1; #endif +#if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE) machine_ops.shutdown = hv_machine_shutdown; machine_ops.crash_shutdown = hv_machine_crash_shutdown; +#endif mark_tsc_unstable("running on Hyper-V"); } From 40482e64b0b84388561b00b880eeca7000f72d38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20L=C3=B3pez?= Date: Sun, 13 Sep 2015 17:15:53 -0300 Subject: [PATCH 142/240] dmaengine: sun4i: fix unsafe list iteration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, sun4i_dma_free_contract iterates over lists and frees memory as it goes through them, causing reads to recently freed memory to be performed. Fix this by using the safe version of the iterator, so freed memory is not referenced at all. Reported-by: Dan Carpenter Signed-off-by: Emilio López Acked-by: Maxime Ripard Signed-off-by: Vinod Koul --- drivers/dma/sun4i-dma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/sun4i-dma.c b/drivers/dma/sun4i-dma.c index a1a500d96ff2..1661d518224a 100644 --- a/drivers/dma/sun4i-dma.c +++ b/drivers/dma/sun4i-dma.c @@ -599,13 +599,13 @@ get_next_cyclic_promise(struct sun4i_dma_contract *contract) static void sun4i_dma_free_contract(struct virt_dma_desc *vd) { struct sun4i_dma_contract *contract = to_sun4i_dma_contract(vd); - struct sun4i_dma_promise *promise; + struct sun4i_dma_promise *promise, *tmp; /* Free all the demands and completed demands */ - list_for_each_entry(promise, &contract->demands, list) + list_for_each_entry_safe(promise, tmp, &contract->demands, list) kfree(promise); - list_for_each_entry(promise, &contract->completed_demands, list) + list_for_each_entry_safe(promise, tmp, &contract->completed_demands, list) kfree(promise); kfree(contract); From 4ac86a6dcec1c3878de9747bf5a2aa4455be69e3 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 29 Sep 2015 19:40:14 +0300 Subject: [PATCH 143/240] x86, efi, kasan: Fix build failure on !KASAN && KMEMCHECK=y kernels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With KMEMCHECK=y, KASAN=n we get this build failure: arch/x86/platform/efi/efi.c:673:3: error: implicit declaration of function ‘memcpy’ [-Werror=implicit-function-declaration] arch/x86/platform/efi/efi_64.c:139:2: error: implicit declaration of function ‘memcpy’ [-Werror=implicit-function-declaration] arch/x86/include/asm/desc.h:121:2: error: implicit declaration of function ‘memcpy’ [-Werror=implicit-function-declaration] Don't #undef memcpy if KASAN=n. Reported-by: Ingo Molnar Reported-by: Sedat Dilek Signed-off-by: Andrey Ryabinin Cc: Linus Torvalds Cc: Matt Fleming Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 769a8089c1fd ("x86, efi, kasan: #undef memset/memcpy/memmove per arch") Link: http://lkml.kernel.org/r/1443544814-20122-1-git-send-email-ryabinin.a.a@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index ab5f1d447ef9..ae68be92f755 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -86,6 +86,7 @@ extern u64 asmlinkage efi_call(void *fp, ...); extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, u32 type, u64 attribute); +#ifdef CONFIG_KASAN /* * CONFIG_KASAN may redefine memset to __memset. __memset function is present * only in kernel binary. Since the EFI stub linked into a separate binary it @@ -95,6 +96,7 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, #undef memcpy #undef memset #undef memmove +#endif #endif /* CONFIG_X86_32 */ From 5e0fb1f9eb754eed8432392bfdc100ef295676cd Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Wed, 16 Sep 2015 14:16:45 +0900 Subject: [PATCH 144/240] drm/exynos: fix layering violation of address There is no guarantee that DMA addresses are the same as physical addresses, but dma_to_pfn() knows how to convert a dma_addr_t to a PFN which can then be converted to a struct page. Suggested-by: Russell King Signed-off-by: Joonyoung Shim Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index f12fbc36b120..a4fb78e43034 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -77,7 +77,8 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem_obj *obj) start_addr = obj->dma_addr; while (i < nr_pages) { - obj->pages[i] = phys_to_page(start_addr); + obj->pages[i] = pfn_to_page(dma_to_pfn(dev->dev, + start_addr)); start_addr += PAGE_SIZE; i++; } From 7c93537a47e15e04064158ccdbe28f88ee61170c Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Wed, 16 Sep 2015 14:14:54 +0900 Subject: [PATCH 145/240] drm/exynos: fix missed calling of drm_prime_gem_destroy() When obj->import_attach is existed, code calling drm_prime_gem_destroy() was removed from commit 67e93c808b48 ("drm/exynos: stop copying sg table"), and it's a fault. The drm_prime_gem_destroy() is cleanup function which GEM drivers need to call when they use drm_gem_prime_import() to import dma-bufs, so exynos-drm driver using drm_gem_prime_import() needs calling drm_prime_gem_destroy(). Signed-off-by: Joonyoung Shim Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gem.c | 9 +++++---- drivers/gpu/drm/exynos/exynos_drm_gem.h | 2 ++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index a4fb78e43034..ce05f1365712 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -157,11 +157,10 @@ void exynos_drm_gem_destroy(struct exynos_drm_gem_obj *exynos_gem_obj) * once dmabuf's refcount becomes 0. */ if (obj->import_attach) - goto out; + drm_prime_gem_destroy(obj, exynos_gem_obj->sgt); + else + exynos_drm_free_buf(exynos_gem_obj); - exynos_drm_free_buf(exynos_gem_obj); - -out: drm_gem_free_mmap_offset(obj); /* release file pointer to gem object. */ @@ -589,6 +588,8 @@ exynos_drm_gem_prime_import_sg_table(struct drm_device *dev, if (ret < 0) goto err_free_large; + exynos_gem_obj->sgt = sgt; + if (sgt->nents == 1) { /* always physically continuous memory if sgt->nents is 1. */ exynos_gem_obj->flags |= EXYNOS_BO_CONTIG; diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h index cd62f8410d1e..97bb48b6e862 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.h +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h @@ -39,6 +39,7 @@ * - this address could be physical address without IOMMU and * device address with IOMMU. * @pages: Array of backing pages. + * @sgt: Imported sg_table. * * P.S. this object would be transferred to user as kms_bo.handle so * user can access the buffer through kms_bo.handle. @@ -52,6 +53,7 @@ struct exynos_drm_gem_obj { dma_addr_t dma_addr; struct dma_attrs dma_attrs; struct page **pages; + struct sg_table *sgt; }; struct page **exynos_gem_get_pages(struct drm_gem_object *obj, gfp_t gfpmask); From 0269b1a17f8f887c02ae37affef884e1ffda3df6 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Wed, 16 Sep 2015 14:29:32 +0900 Subject: [PATCH 146/240] drm/exynos: remove unnecessary NULL assignment They will be freed right or was freed already, so NULL assignment is unnecessary. Signed-off-by: Joonyoung Shim Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gem.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index ce05f1365712..85aeae91b50c 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -118,8 +118,6 @@ static void exynos_drm_free_buf(struct exynos_drm_gem_obj *obj) } else dma_free_attrs(dev->dev, obj->size, obj->pages, (dma_addr_t)obj->dma_addr, &obj->dma_attrs); - - obj->dma_addr = (dma_addr_t)NULL; } static int exynos_drm_gem_handle_create(struct drm_gem_object *obj, @@ -167,7 +165,6 @@ void exynos_drm_gem_destroy(struct exynos_drm_gem_obj *exynos_gem_obj) drm_gem_object_release(obj); kfree(exynos_gem_obj); - exynos_gem_obj = NULL; } unsigned long exynos_drm_gem_get_size(struct drm_device *dev, From b319dc6a61518cc02ae21afb383901236009aab1 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Wed, 16 Sep 2015 14:29:33 +0900 Subject: [PATCH 147/240] drm/exynos: staticize exynos_drm_gem_init() The exynos_drm_gem_init() is used only in exynos_drm_gem.c file. Make it static and don't export it. Signed-off-by: Joonyoung Shim Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gem.c | 3 +-- drivers/gpu/drm/exynos/exynos_drm_gem.h | 4 ---- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 85aeae91b50c..5638a7a88c7b 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -187,8 +187,7 @@ unsigned long exynos_drm_gem_get_size(struct drm_device *dev, return exynos_gem_obj->size; } - -struct exynos_drm_gem_obj *exynos_drm_gem_init(struct drm_device *dev, +static struct exynos_drm_gem_obj *exynos_drm_gem_init(struct drm_device *dev, unsigned long size) { struct exynos_drm_gem_obj *exynos_gem_obj; diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h index 97bb48b6e862..b62d1007c0e0 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.h +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h @@ -61,10 +61,6 @@ struct page **exynos_gem_get_pages(struct drm_gem_object *obj, gfp_t gfpmask); /* destroy a buffer with gem object */ void exynos_drm_gem_destroy(struct exynos_drm_gem_obj *exynos_gem_obj); -/* create a private gem object and initialize it. */ -struct exynos_drm_gem_obj *exynos_drm_gem_init(struct drm_device *dev, - unsigned long size); - /* create a new buffer with gem object */ struct exynos_drm_gem_obj *exynos_drm_gem_create(struct drm_device *dev, unsigned int flags, From 333e8e58b0bed137e940ac92c2af3f3aa4603504 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Wed, 16 Sep 2015 14:29:34 +0900 Subject: [PATCH 148/240] drm/exynos: cleanup function calling written twice By if statment, some function callings are written twice. It needs several line feed by indentation in if statment. Make to one function calling from outside if statment. Signed-off-by: Joonyoung Shim Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gem.c | 56 ++++++++++--------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 5638a7a88c7b..10fc61408f22 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -56,24 +56,25 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem_obj *obj) nr_pages = obj->size >> PAGE_SHIFT; if (!is_drm_iommu_supported(dev)) { - dma_addr_t start_addr; - unsigned int i = 0; - obj->pages = drm_calloc_large(nr_pages, sizeof(struct page *)); if (!obj->pages) { DRM_ERROR("failed to allocate pages.\n"); return -ENOMEM; } + } - obj->cookie = dma_alloc_attrs(dev->dev, - obj->size, - &obj->dma_addr, GFP_KERNEL, - &obj->dma_attrs); - if (!obj->cookie) { - DRM_ERROR("failed to allocate buffer.\n"); + obj->cookie = dma_alloc_attrs(dev->dev, obj->size, &obj->dma_addr, + GFP_KERNEL, &obj->dma_attrs); + if (!obj->cookie) { + DRM_ERROR("failed to allocate buffer.\n"); + if (obj->pages) drm_free_large(obj->pages); - return -ENOMEM; - } + return -ENOMEM; + } + + if (obj->pages) { + dma_addr_t start_addr; + unsigned int i = 0; start_addr = obj->dma_addr; while (i < nr_pages) { @@ -83,13 +84,7 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem_obj *obj) i++; } } else { - obj->pages = dma_alloc_attrs(dev->dev, obj->size, - &obj->dma_addr, GFP_KERNEL, - &obj->dma_attrs); - if (!obj->pages) { - DRM_ERROR("failed to allocate buffer.\n"); - return -ENOMEM; - } + obj->pages = obj->cookie; } DRM_DEBUG_KMS("dma_addr(0x%lx), size(0x%lx)\n", @@ -111,13 +106,11 @@ static void exynos_drm_free_buf(struct exynos_drm_gem_obj *obj) DRM_DEBUG_KMS("dma_addr(0x%lx), size(0x%lx)\n", (unsigned long)obj->dma_addr, obj->size); - if (!is_drm_iommu_supported(dev)) { - dma_free_attrs(dev->dev, obj->size, obj->cookie, - (dma_addr_t)obj->dma_addr, &obj->dma_attrs); + dma_free_attrs(dev->dev, obj->size, obj->cookie, + (dma_addr_t)obj->dma_addr, &obj->dma_attrs); + + if (!is_drm_iommu_supported(dev)) drm_free_large(obj->pages); - } else - dma_free_attrs(dev->dev, obj->size, obj->pages, - (dma_addr_t)obj->dma_addr, &obj->dma_attrs); } static int exynos_drm_gem_handle_create(struct drm_gem_object *obj, @@ -398,6 +391,7 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv, struct drm_mode_create_dumb *args) { struct exynos_drm_gem_obj *exynos_gem_obj; + unsigned int flags; int ret; /* @@ -409,16 +403,12 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv, args->pitch = args->width * ((args->bpp + 7) / 8); args->size = args->pitch * args->height; - if (is_drm_iommu_supported(dev)) { - exynos_gem_obj = exynos_drm_gem_create(dev, - EXYNOS_BO_NONCONTIG | EXYNOS_BO_WC, - args->size); - } else { - exynos_gem_obj = exynos_drm_gem_create(dev, - EXYNOS_BO_CONTIG | EXYNOS_BO_WC, - args->size); - } + if (is_drm_iommu_supported(dev)) + flags = EXYNOS_BO_NONCONTIG | EXYNOS_BO_WC; + else + flags = EXYNOS_BO_CONTIG | EXYNOS_BO_WC; + exynos_gem_obj = exynos_drm_gem_create(dev, flags, args->size); if (IS_ERR(exynos_gem_obj)) { dev_warn(dev->dev, "FB allocation failed.\n"); return PTR_ERR(exynos_gem_obj); From b4cfd4ddfb555c8cda7b0aa55c0e37522f8e01e4 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Wed, 16 Sep 2015 14:29:35 +0900 Subject: [PATCH 149/240] drm/exynos: cleanup line feed in exynos_drm_gem_get_ioctl The beginning of statement in function is next line of a brace. Signed-off-by: Joonyoung Shim Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 10fc61408f22..3df1e0e81014 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -331,7 +331,8 @@ int exynos_drm_gem_mmap_buffer(struct exynos_drm_gem_obj *exynos_gem_obj, int exynos_drm_gem_get_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) -{ struct exynos_drm_gem_obj *exynos_gem_obj; +{ + struct exynos_drm_gem_obj *exynos_gem_obj; struct drm_exynos_gem_info *args = data; struct drm_gem_object *obj; From b9e71911f34616276399da3dc1abc47797be2396 Mon Sep 17 00:00:00 2001 From: Daniel Kurtz Date: Wed, 9 Sep 2015 12:55:25 -0300 Subject: [PATCH 150/240] drm/exynos: Remove useless EXPORT_SYMBOL_GPLs All the user of these functions are inside exynos-drm driver and you don't need to export the symbols for that case. Signed-off-by: Daniel Kurtz Signed-off-by: Gustavo Padovan Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_core.c | 6 ------ drivers/gpu/drm/exynos/exynos_drm_g2d.c | 3 --- 2 files changed, 9 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_core.c b/drivers/gpu/drm/exynos/exynos_drm_core.c index c68a6a2a9b57..7f55ba6771c6 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_core.c +++ b/drivers/gpu/drm/exynos/exynos_drm_core.c @@ -28,7 +28,6 @@ int exynos_drm_subdrv_register(struct exynos_drm_subdrv *subdrv) return 0; } -EXPORT_SYMBOL_GPL(exynos_drm_subdrv_register); int exynos_drm_subdrv_unregister(struct exynos_drm_subdrv *subdrv) { @@ -39,7 +38,6 @@ int exynos_drm_subdrv_unregister(struct exynos_drm_subdrv *subdrv) return 0; } -EXPORT_SYMBOL_GPL(exynos_drm_subdrv_unregister); int exynos_drm_device_subdrv_probe(struct drm_device *dev) { @@ -69,7 +67,6 @@ int exynos_drm_device_subdrv_probe(struct drm_device *dev) return 0; } -EXPORT_SYMBOL_GPL(exynos_drm_device_subdrv_probe); int exynos_drm_device_subdrv_remove(struct drm_device *dev) { @@ -87,7 +84,6 @@ int exynos_drm_device_subdrv_remove(struct drm_device *dev) return 0; } -EXPORT_SYMBOL_GPL(exynos_drm_device_subdrv_remove); int exynos_drm_subdrv_open(struct drm_device *dev, struct drm_file *file) { @@ -111,7 +107,6 @@ int exynos_drm_subdrv_open(struct drm_device *dev, struct drm_file *file) } return ret; } -EXPORT_SYMBOL_GPL(exynos_drm_subdrv_open); void exynos_drm_subdrv_close(struct drm_device *dev, struct drm_file *file) { @@ -122,4 +117,3 @@ void exynos_drm_subdrv_close(struct drm_device *dev, struct drm_file *file) subdrv->close(dev, subdrv->dev, file); } } -EXPORT_SYMBOL_GPL(exynos_drm_subdrv_close); diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 3734c34aed16..c17efdb238a6 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -1059,7 +1059,6 @@ int exynos_g2d_get_ver_ioctl(struct drm_device *drm_dev, void *data, return 0; } -EXPORT_SYMBOL_GPL(exynos_g2d_get_ver_ioctl); int exynos_g2d_set_cmdlist_ioctl(struct drm_device *drm_dev, void *data, struct drm_file *file) @@ -1230,7 +1229,6 @@ int exynos_g2d_set_cmdlist_ioctl(struct drm_device *drm_dev, void *data, g2d_put_cmdlist(g2d, node); return ret; } -EXPORT_SYMBOL_GPL(exynos_g2d_set_cmdlist_ioctl); int exynos_g2d_exec_ioctl(struct drm_device *drm_dev, void *data, struct drm_file *file) @@ -1293,7 +1291,6 @@ int exynos_g2d_exec_ioctl(struct drm_device *drm_dev, void *data, out: return 0; } -EXPORT_SYMBOL_GPL(exynos_g2d_exec_ioctl); static int g2d_subdrv_probe(struct drm_device *drm_dev, struct device *dev) { From 8755556fbbdd1a6af21a3c4d9ce8f451e999e457 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Fri, 25 Sep 2015 18:10:32 +0900 Subject: [PATCH 151/240] drm/exynos: remove call to drm_gem_free_mmap_offset() The drm_gem_object_release() function already performs this cleanup, so there is no reason to do it explicitly. Signed-off-by: Joonyoung Shim Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gem.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 3df1e0e81014..5c94863a4ca6 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -152,8 +152,6 @@ void exynos_drm_gem_destroy(struct exynos_drm_gem_obj *exynos_gem_obj) else exynos_drm_free_buf(exynos_gem_obj); - drm_gem_free_mmap_offset(obj); - /* release file pointer to gem object. */ drm_gem_object_release(obj); @@ -530,7 +528,6 @@ int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) err_close_vm: drm_gem_vm_close(vma); - drm_gem_free_mmap_offset(obj); return ret; } From 48cf53f4343ae12ddc1c60dbe116161ecf7a2885 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Tue, 28 Jul 2015 17:53:23 +0900 Subject: [PATCH 152/240] drm/exynos: create a fake mmap offset with gem creation Don't create a fake mmap offset in exynos_drm_gem_dumb_map_offset. If not, it will call drm_gem_create_mmap_offset whenever user requests DRM_IOCTL_MODE_MAP_DUMB ioctl. Signed-off-by: Joonyoung Shim Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gem.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 5c94863a4ca6..048ad13e5d82 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -199,6 +199,13 @@ static struct exynos_drm_gem_obj *exynos_drm_gem_init(struct drm_device *dev, return ERR_PTR(ret); } + ret = drm_gem_create_mmap_offset(obj); + if (ret < 0) { + drm_gem_object_release(obj); + kfree(exynos_gem_obj); + return ERR_PTR(ret); + } + DRM_DEBUG_KMS("created file object = 0x%x\n", (unsigned int)obj->filp); return exynos_gem_obj; @@ -445,14 +452,9 @@ int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv, goto unlock; } - ret = drm_gem_create_mmap_offset(obj); - if (ret) - goto out; - *offset = drm_vma_node_offset_addr(&obj->vma_node); DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset); -out: drm_gem_object_unreference(obj); unlock: mutex_unlock(&dev->struct_mutex); From 7082947eff3b1ddbb5d320331c846a5a2fc88261 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 24 Sep 2015 12:14:13 +0200 Subject: [PATCH 153/240] drm/exynos: Suspend/resume is unused if !PM Protect the suspend and resume callbacks with an #ifdef CONFIG_PM_SLEEP guard to avoid "defined but not used" warnings. Signed-off-by: Thierry Reding Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 831d2e4cacf9..ae9e6b2d3758 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -304,6 +304,7 @@ int exynos_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state, return 0; } +#ifdef CONFIG_PM_SLEEP static int exynos_drm_suspend(struct drm_device *dev, pm_message_t state) { struct drm_connector *connector; @@ -340,6 +341,7 @@ static int exynos_drm_resume(struct drm_device *dev) return 0; } +#endif static int exynos_drm_open(struct drm_device *dev, struct drm_file *file) { From 641a2fef39369c27df887e82cd63aee3de93f511 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 24 Sep 2015 12:14:14 +0200 Subject: [PATCH 154/240] drm/exynos: fimc: Clock control is unused if !PM Protect the fimc_clk_ctrl() function with an #ifdef CONFIG_PM guard to avoid "defined but not used" warnings. Signed-off-by: Thierry Reding Reviewed-by: Andrzej Hajda Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fimc.c | 36 ++++++++++++------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index 2a652359af64..dd3a5e6d58c8 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -1206,23 +1206,6 @@ static struct exynos_drm_ipp_ops fimc_dst_ops = { .set_addr = fimc_dst_set_addr, }; -static int fimc_clk_ctrl(struct fimc_context *ctx, bool enable) -{ - DRM_DEBUG_KMS("enable[%d]\n", enable); - - if (enable) { - clk_prepare_enable(ctx->clocks[FIMC_CLK_GATE]); - clk_prepare_enable(ctx->clocks[FIMC_CLK_WB_A]); - ctx->suspended = false; - } else { - clk_disable_unprepare(ctx->clocks[FIMC_CLK_GATE]); - clk_disable_unprepare(ctx->clocks[FIMC_CLK_WB_A]); - ctx->suspended = true; - } - - return 0; -} - static irqreturn_t fimc_irq_handler(int irq, void *dev_id) { struct fimc_context *ctx = dev_id; @@ -1780,6 +1763,24 @@ static int fimc_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM +static int fimc_clk_ctrl(struct fimc_context *ctx, bool enable) +{ + DRM_DEBUG_KMS("enable[%d]\n", enable); + + if (enable) { + clk_prepare_enable(ctx->clocks[FIMC_CLK_GATE]); + clk_prepare_enable(ctx->clocks[FIMC_CLK_WB_A]); + ctx->suspended = false; + } else { + clk_disable_unprepare(ctx->clocks[FIMC_CLK_GATE]); + clk_disable_unprepare(ctx->clocks[FIMC_CLK_WB_A]); + ctx->suspended = true; + } + + return 0; +} + #ifdef CONFIG_PM_SLEEP static int fimc_suspend(struct device *dev) { @@ -1806,7 +1807,6 @@ static int fimc_resume(struct device *dev) } #endif -#ifdef CONFIG_PM static int fimc_runtime_suspend(struct device *dev) { struct fimc_context *ctx = get_fimc_context(dev); From 85a82038b2744f87cb297f93ddecd04a8c2979bd Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Thu, 24 Sep 2015 12:14:15 +0200 Subject: [PATCH 155/240] drm/exynos: rotator: Clock control is unused if !PM Protect the rotator_clk_crtl() function with an #ifdef CONFIG_PM guard to avoid "defined but not used" warnings. Signed-off-by: Thierry Reding Reviewed-by: Andrzej Hajda Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_rotator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c index 425e70625388..2f5c118f4c8e 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c +++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c @@ -786,6 +786,7 @@ static int rotator_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM static int rotator_clk_crtl(struct rot_context *rot, bool enable) { if (enable) { @@ -822,7 +823,6 @@ static int rotator_resume(struct device *dev) } #endif -#ifdef CONFIG_PM static int rotator_runtime_suspend(struct device *dev) { struct rot_context *rot = dev_get_drvdata(dev); From 214fc4e423ff38b41b60db2209cf49b4e9a7209b Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 24 Sep 2015 12:03:35 +0300 Subject: [PATCH 156/240] dmaengine: fix balance of privatecnt dma_release_channel() decrements privatecnt counter and almost all dma_get* function increments it with the exception of dma_get_slave_channel(). In most cases this does not cause issue since normally the channel is not requested and released, but if a driver requests DMA channel via dma_get_slave_channel() and releases the channel the privatecnt will be unbalanced and this will prevent for example getting channel for memcpy. Signed-off-by: Peter Ujfalusi Signed-off-by: Vinod Koul --- drivers/dma/dmaengine.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 3ff284c8e3d5..09479d4be4db 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -554,10 +554,18 @@ struct dma_chan *dma_get_slave_channel(struct dma_chan *chan) mutex_lock(&dma_list_mutex); if (chan->client_count == 0) { + struct dma_device *device = chan->device; + + dma_cap_set(DMA_PRIVATE, device->cap_mask); + device->privatecnt++; err = dma_chan_get(chan); - if (err) + if (err) { pr_debug("%s: failed to get %s: (%d)\n", __func__, dma_chan_name(chan), err); + chan = NULL; + if (--device->privatecnt == 0) + dma_cap_clear(DMA_PRIVATE, device->cap_mask); + } } else chan = NULL; From ee08b59d47d859ed0a11ab331a3fbc5ab3b56100 Mon Sep 17 00:00:00 2001 From: Rameshwar Prasad Sahu Date: Wed, 16 Sep 2015 13:33:23 +0530 Subject: [PATCH 157/240] dmaengine: xgene-dma: Fix overwritting DMA tx ring This patch fixes an over flow issue with the TX ring descriptor. Each descriptor is 32B in size and an operation requires 2 of these descriptors. Signed-off-by: Rameshwar Prasad Sahu Signed-off-by: Vinod Koul --- drivers/dma/xgene-dma.c | 37 +++++++++++-------------------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c index 4c5dddaad2d5..8d57b1b12e41 100644 --- a/drivers/dma/xgene-dma.c +++ b/drivers/dma/xgene-dma.c @@ -59,7 +59,6 @@ #define XGENE_DMA_RING_MEM_RAM_SHUTDOWN 0xD070 #define XGENE_DMA_RING_BLK_MEM_RDY 0xD074 #define XGENE_DMA_RING_BLK_MEM_RDY_VAL 0xFFFFFFFF -#define XGENE_DMA_RING_DESC_CNT(v) (((v) & 0x0001FFFE) >> 1) #define XGENE_DMA_RING_ID_GET(owner, num) (((owner) << 6) | (num)) #define XGENE_DMA_RING_DST_ID(v) ((1 << 10) | (v)) #define XGENE_DMA_RING_CMD_OFFSET 0x2C @@ -379,14 +378,6 @@ static u8 xgene_dma_encode_xor_flyby(u32 src_cnt) return flyby_type[src_cnt]; } -static u32 xgene_dma_ring_desc_cnt(struct xgene_dma_ring *ring) -{ - u32 __iomem *cmd_base = ring->cmd_base; - u32 ring_state = ioread32(&cmd_base[1]); - - return XGENE_DMA_RING_DESC_CNT(ring_state); -} - static void xgene_dma_set_src_buffer(__le64 *ext8, size_t *len, dma_addr_t *paddr) { @@ -659,15 +650,12 @@ static void xgene_dma_clean_running_descriptor(struct xgene_dma_chan *chan, dma_pool_free(chan->desc_pool, desc, desc->tx.phys); } -static int xgene_chan_xfer_request(struct xgene_dma_ring *ring, - struct xgene_dma_desc_sw *desc_sw) +static void xgene_chan_xfer_request(struct xgene_dma_chan *chan, + struct xgene_dma_desc_sw *desc_sw) { + struct xgene_dma_ring *ring = &chan->tx_ring; struct xgene_dma_desc_hw *desc_hw; - /* Check if can push more descriptor to hw for execution */ - if (xgene_dma_ring_desc_cnt(ring) > (ring->slots - 2)) - return -EBUSY; - /* Get hw descriptor from DMA tx ring */ desc_hw = &ring->desc_hw[ring->head]; @@ -694,11 +682,13 @@ static int xgene_chan_xfer_request(struct xgene_dma_ring *ring, memcpy(desc_hw, &desc_sw->desc2, sizeof(*desc_hw)); } + /* Increment the pending transaction count */ + chan->pending += ((desc_sw->flags & + XGENE_DMA_FLAG_64B_DESC) ? 2 : 1); + /* Notify the hw that we have descriptor ready for execution */ iowrite32((desc_sw->flags & XGENE_DMA_FLAG_64B_DESC) ? 2 : 1, ring->cmd); - - return 0; } /** @@ -710,7 +700,6 @@ static int xgene_chan_xfer_request(struct xgene_dma_ring *ring, static void xgene_chan_xfer_ld_pending(struct xgene_dma_chan *chan) { struct xgene_dma_desc_sw *desc_sw, *_desc_sw; - int ret; /* * If the list of pending descriptors is empty, then we @@ -735,18 +724,13 @@ static void xgene_chan_xfer_ld_pending(struct xgene_dma_chan *chan) if (chan->pending >= chan->max_outstanding) return; - ret = xgene_chan_xfer_request(&chan->tx_ring, desc_sw); - if (ret) - return; + xgene_chan_xfer_request(chan, desc_sw); /* * Delete this element from ld pending queue and append it to * ld running queue */ list_move_tail(&desc_sw->node, &chan->ld_running); - - /* Increment the pending transaction count */ - chan->pending++; } } @@ -821,7 +805,8 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan) * Decrement the pending transaction count * as we have processed one */ - chan->pending--; + chan->pending -= ((desc_sw->flags & + XGENE_DMA_FLAG_64B_DESC) ? 2 : 1); /* * Delete this node from ld running queue and append it to @@ -1485,7 +1470,7 @@ static int xgene_dma_create_chan_rings(struct xgene_dma_chan *chan) tx_ring->id, tx_ring->num, tx_ring->desc_vaddr); /* Set the max outstanding request possible to this channel */ - chan->max_outstanding = rx_ring->slots; + chan->max_outstanding = tx_ring->slots; return ret; } From 50bbfbffa5c894def440ce8157dfe53e60960d35 Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Tue, 29 Sep 2015 17:59:17 -0300 Subject: [PATCH 158/240] drm/exynos: remove fimd_mode_fixup() The only thing mode_fixup was doing was set the adjusted_mode->vrefresh to 60, but it already has the value of 60 when the fimd_mode_fixup() is called. That means this call is actually pointless and can be removed. Signed-off-by: Gustavo Padovan Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 750a9e6b9e8d..03ad549e7698 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -41,7 +41,6 @@ * CPU Interface. */ -#define FIMD_DEFAULT_FRAMERATE 60 #define MIN_FB_WIDTH_FOR_16WORD_BURST 128 /* position control register for hardware window 0, 2 ~ 4.*/ @@ -377,16 +376,6 @@ static u32 fimd_calc_clkdiv(struct fimd_context *ctx, return (clkdiv < 0x100) ? clkdiv : 0xff; } -static bool fimd_mode_fixup(struct exynos_drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - if (adjusted_mode->vrefresh == 0) - adjusted_mode->vrefresh = FIMD_DEFAULT_FRAMERATE; - - return true; -} - static void fimd_commit(struct exynos_drm_crtc *crtc) { struct fimd_context *ctx = crtc->ctx; @@ -888,7 +877,6 @@ static void fimd_dp_clock_enable(struct exynos_drm_crtc *crtc, bool enable) static const struct exynos_drm_crtc_ops fimd_crtc_ops = { .enable = fimd_enable, .disable = fimd_disable, - .mode_fixup = fimd_mode_fixup, .commit = fimd_commit, .enable_vblank = fimd_enable_vblank, .disable_vblank = fimd_disable_vblank, From 208d7ee3c89e5e94627aa33331b15dd4df814707 Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Tue, 29 Sep 2015 17:59:18 -0300 Subject: [PATCH 159/240] drm/exynos: remove decon_mode_fixup() The only thing mode_fixup was doing was set the adjusted_mode->vrefresh to 60, but it already has the value of 60 when the decon_mode_fixup() is called. That means this call is actually pointless and can be removed. Signed-off-by: Gustavo Padovan Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos7_drm_decon.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index cbdb78ef3bac..e6cbaca821a4 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -37,7 +37,6 @@ * DECON stands for Display and Enhancement controller. */ -#define DECON_DEFAULT_FRAMERATE 60 #define MIN_FB_WIDTH_FOR_16WORD_BURST 128 #define WINDOWS_NR 2 @@ -165,16 +164,6 @@ static u32 decon_calc_clkdiv(struct decon_context *ctx, return (clkdiv < 0x100) ? clkdiv : 0xff; } -static bool decon_mode_fixup(struct exynos_drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - if (adjusted_mode->vrefresh == 0) - adjusted_mode->vrefresh = DECON_DEFAULT_FRAMERATE; - - return true; -} - static void decon_commit(struct exynos_drm_crtc *crtc) { struct decon_context *ctx = crtc->ctx; @@ -637,7 +626,6 @@ static void decon_disable(struct exynos_drm_crtc *crtc) static const struct exynos_drm_crtc_ops decon_crtc_ops = { .enable = decon_enable, .disable = decon_disable, - .mode_fixup = decon_mode_fixup, .commit = decon_commit, .enable_vblank = decon_enable_vblank, .disable_vblank = decon_disable_vblank, From 10d97d5869f9a50eb3142c7ee562ecc5b5b33f5b Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Tue, 29 Sep 2015 17:59:19 -0300 Subject: [PATCH 160/240] drm/exynos: remove unused mode_fixup() code CRTC's mode_fixup() isn't used anymore in exynos, remove it. Signed-off-by: Gustavo Padovan Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_crtc.c | 15 --------------- drivers/gpu/drm/exynos/exynos_drm_drv.h | 4 ---- 2 files changed, 19 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c index 0872aa2f450f..ed28823d3b35 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c @@ -41,20 +41,6 @@ static void exynos_drm_crtc_disable(struct drm_crtc *crtc) exynos_crtc->ops->disable(exynos_crtc); } -static bool -exynos_drm_crtc_mode_fixup(struct drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc); - - if (exynos_crtc->ops->mode_fixup) - return exynos_crtc->ops->mode_fixup(exynos_crtc, mode, - adjusted_mode); - - return true; -} - static void exynos_drm_crtc_mode_set_nofb(struct drm_crtc *crtc) { @@ -99,7 +85,6 @@ static void exynos_crtc_atomic_flush(struct drm_crtc *crtc, static struct drm_crtc_helper_funcs exynos_crtc_helper_funcs = { .enable = exynos_drm_crtc_enable, .disable = exynos_drm_crtc_disable, - .mode_fixup = exynos_drm_crtc_mode_fixup, .mode_set_nofb = exynos_drm_crtc_mode_set_nofb, .atomic_begin = exynos_crtc_atomic_begin, .atomic_flush = exynos_crtc_atomic_flush, diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index b7ba21dfb696..6c717ba672db 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -82,7 +82,6 @@ struct exynos_drm_plane { * * @enable: enable the device * @disable: disable the device - * @mode_fixup: fix mode data before applying it * @commit: set current hw specific display mode to hw. * @enable_vblank: specific driver callback for enabling vblank interrupt. * @disable_vblank: specific driver callback for disabling vblank interrupt. @@ -103,9 +102,6 @@ struct exynos_drm_crtc; struct exynos_drm_crtc_ops { void (*enable)(struct exynos_drm_crtc *crtc); void (*disable)(struct exynos_drm_crtc *crtc); - bool (*mode_fixup)(struct exynos_drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode); void (*commit)(struct exynos_drm_crtc *crtc); int (*enable_vblank)(struct exynos_drm_crtc *crtc); void (*disable_vblank)(struct exynos_drm_crtc *crtc); From 6bea0f6d1c47b07be88dfd93f013ae05fcb3d8bf Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 28 Sep 2015 18:57:03 +0300 Subject: [PATCH 161/240] dmaengine: dw: properly read DWC_PARAMS register In case we have less than maximum allowed channels (8) and autoconfiguration is enabled the DWC_PARAMS read is wrong because it uses different arithmetic to what is needed for channel priority setup. Re-do the caclulations properly. This now works on AVR32 board well. Fixes: fed2574b3c9f (dw_dmac: introduce software emulation of LLP transfers) Cc: yitian.bu@tangramtek.com Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- drivers/dma/dw/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index cf1c87fa1edd..bedce038c6e2 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -1591,7 +1591,6 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) INIT_LIST_HEAD(&dw->dma.channels); for (i = 0; i < nr_channels; i++) { struct dw_dma_chan *dwc = &dw->chan[i]; - int r = nr_channels - i - 1; dwc->chan.device = &dw->dma; dma_cookie_init(&dwc->chan); @@ -1603,7 +1602,7 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) /* 7 is highest priority & 0 is lowest. */ if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING) - dwc->priority = r; + dwc->priority = nr_channels - i - 1; else dwc->priority = i; @@ -1622,6 +1621,7 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) /* Hardware configuration */ if (autocfg) { unsigned int dwc_params; + unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1; void __iomem *addr = chip->regs + r * sizeof(u32); dwc_params = dma_read_byaddr(addr, DWC_PARAMS); From 96fc7a9cee671f10e14aaca44833696a71f1ebdb Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 30 Sep 2015 14:24:31 +0200 Subject: [PATCH 162/240] MIPS: Wire up userfaultfd and membarrier syscalls. Signed-off-by: Ralf Baechle --- arch/mips/include/uapi/asm/unistd.h | 18 ++++++++++++------ arch/mips/kernel/scall32-o32.S | 2 ++ arch/mips/kernel/scall64-64.S | 2 ++ arch/mips/kernel/scall64-n32.S | 2 ++ arch/mips/kernel/scall64-o32.S | 2 ++ 5 files changed, 20 insertions(+), 6 deletions(-) diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h index c03088f9f514..cfabadb135d9 100644 --- a/arch/mips/include/uapi/asm/unistd.h +++ b/arch/mips/include/uapi/asm/unistd.h @@ -377,16 +377,18 @@ #define __NR_memfd_create (__NR_Linux + 354) #define __NR_bpf (__NR_Linux + 355) #define __NR_execveat (__NR_Linux + 356) +#define __NR_userfaultfd (__NR_Linux + 357) +#define __NR_membarrier (__NR_Linux + 358) /* * Offset of the last Linux o32 flavoured syscall */ -#define __NR_Linux_syscalls 356 +#define __NR_Linux_syscalls 358 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ #define __NR_O32_Linux 4000 -#define __NR_O32_Linux_syscalls 356 +#define __NR_O32_Linux_syscalls 358 #if _MIPS_SIM == _MIPS_SIM_ABI64 @@ -711,16 +713,18 @@ #define __NR_memfd_create (__NR_Linux + 314) #define __NR_bpf (__NR_Linux + 315) #define __NR_execveat (__NR_Linux + 316) +#define __NR_userfaultfd (__NR_Linux + 317) +#define __NR_membarrier (__NR_Linux + 318) /* * Offset of the last Linux 64-bit flavoured syscall */ -#define __NR_Linux_syscalls 316 +#define __NR_Linux_syscalls 318 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ #define __NR_64_Linux 5000 -#define __NR_64_Linux_syscalls 316 +#define __NR_64_Linux_syscalls 318 #if _MIPS_SIM == _MIPS_SIM_NABI32 @@ -1049,15 +1053,17 @@ #define __NR_memfd_create (__NR_Linux + 318) #define __NR_bpf (__NR_Linux + 319) #define __NR_execveat (__NR_Linux + 320) +#define __NR_userfaultfd (__NR_Linux + 321) +#define __NR_membarrier (__NR_Linux + 322) /* * Offset of the last N32 flavoured syscall */ -#define __NR_Linux_syscalls 320 +#define __NR_Linux_syscalls 322 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */ #define __NR_N32_Linux 6000 -#define __NR_N32_Linux_syscalls 320 +#define __NR_N32_Linux_syscalls 322 #endif /* _UAPI_ASM_UNISTD_H */ diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 4cc13508d967..d06e30c1e4cd 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -599,3 +599,5 @@ EXPORT(sys_call_table) PTR sys_memfd_create PTR sys_bpf /* 4355 */ PTR sys_execveat + PTR sys_userfaultfd + PTR sys_membarrier diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index a6f6b762c47a..428c2cd2326c 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -436,4 +436,6 @@ EXPORT(sys_call_table) PTR sys_memfd_create PTR sys_bpf /* 5315 */ PTR sys_execveat + PTR sys_userfaultfd + PTR sys_membarrier .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 4b2010654c46..3868cf4c83df 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -429,4 +429,6 @@ EXPORT(sysn32_call_table) PTR sys_memfd_create PTR sys_bpf PTR compat_sys_execveat /* 6320 */ + PTR sys_userfaultfd + PTR sys_membarrier .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index f543ff4feef9..6622980d82ea 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -584,4 +584,6 @@ EXPORT(sys32_call_table) PTR sys_memfd_create PTR sys_bpf /* 4355 */ PTR compat_sys_execveat + PTR sys_userfaultfd + PTR sys_membarrier .size sys32_call_table,.-sys32_call_table From 575f9c8604e0b4c7b36fb41fc5fd280a3c336906 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 29 Sep 2015 07:49:56 -0700 Subject: [PATCH 163/240] drm/vmwgfx: Fix a command submission hang regression When we're out of command buffer space, we turn on the command buffer processed irq without re-checking for finished command buffers afterwards. This might lead to a missed irq and the command submission process waiting forever for space. Fix this by rerunning the command buffer submission handler whenever we're out of command space. This ensures both that we don't needlessly turn on the irq, and that if we decide to turn on the irq, we recheck for finished command buffers before going to sleep. Reported-and-tested-by: Bryan Li Signed-off-by: Thomas Hellstrom Reviewed-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c index 5ae8f921da2a..8a76821177a6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c @@ -681,6 +681,14 @@ static bool vmw_cmdbuf_try_alloc(struct vmw_cmdbuf_man *man, 0, 0, DRM_MM_SEARCH_DEFAULT, DRM_MM_CREATE_DEFAULT); + if (ret) { + (void) vmw_cmdbuf_man_process(man); + ret = drm_mm_insert_node_generic(&man->mm, info->node, + info->page_size, 0, 0, + DRM_MM_SEARCH_DEFAULT, + DRM_MM_CREATE_DEFAULT); + } + spin_unlock_bh(&man->lock); info->done = !ret; From 031277d4d33d33f0174fbb569ca8f68238175617 Mon Sep 17 00:00:00 2001 From: Chaotian Jing Date: Wed, 30 Sep 2015 17:37:18 +0800 Subject: [PATCH 164/240] mmc: core: fix dead loop of mmc_retune When get a CRC error, start the mmc_retune, it will issue CMD19/CMD21 to do tune, assume there were 10 clock phase need to try, phase 0 to phase 6 is ok, phase 7 to phase 9 is NG, we try it from 0 to 9, so the last CMD19/CMD21 will get CRC error, host->need_retune was set and cause mmc_retune was called, then dead loop of mmc_retune Signed-off-by: Chaotian Jing Acked-by: Adrian Hunter Fixes: bd11e8bd03ca ("mmc: core: Flag re-tuning is needed on CRC errors") Signed-off-by: Ulf Hansson --- drivers/mmc/core/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 0520064dc33b..a3eb20bdcd97 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -134,9 +134,11 @@ void mmc_request_done(struct mmc_host *host, struct mmc_request *mrq) int err = cmd->error; /* Flag re-tuning needed on CRC errors */ - if (err == -EILSEQ || (mrq->sbc && mrq->sbc->error == -EILSEQ) || + if ((cmd->opcode != MMC_SEND_TUNING_BLOCK && + cmd->opcode != MMC_SEND_TUNING_BLOCK_HS200) && + (err == -EILSEQ || (mrq->sbc && mrq->sbc->error == -EILSEQ) || (mrq->data && mrq->data->error == -EILSEQ) || - (mrq->stop && mrq->stop->error == -EILSEQ)) + (mrq->stop && mrq->stop->error == -EILSEQ))) mmc_retune_needed(host); if (err && cmd->retries && mmc_host_is_spi(host)) { From 4ad640e99e5e5514d623210bc937e665ffd8f43f Mon Sep 17 00:00:00 2001 From: Egbert Eich Date: Wed, 23 Sep 2015 16:13:00 +0200 Subject: [PATCH 165/240] drm: Add a non-locking version of drm_kms_helper_poll_enable(), v2 drm_kms_helper_poll_enable() was converted to lock the mode_config mutex in commit 8c4ccc4ab6f64e859d4ff8d7c02c2ed2e956e07f ("drm/probe-helper: Grab mode_config.mutex in poll_init/enable"). This disregarded the cases where this function is called from a context where this mutex is already locked. Add a non-locking version as well. Changes since v1: - use function name suffix '_locked' for the function that is to be called from a locked context. Signed-off-by: Egbert Eich Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula --- drivers/gpu/drm/drm_probe_helper.c | 19 ++++++++++++++++--- include/drm/drm_crtc_helper.h | 1 + 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index d734780b31c0..a18164f2f6d2 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -94,7 +94,18 @@ static int drm_helper_probe_add_cmdline_mode(struct drm_connector *connector) } #define DRM_OUTPUT_POLL_PERIOD (10*HZ) -static void __drm_kms_helper_poll_enable(struct drm_device *dev) +/** + * drm_kms_helper_poll_enable_locked - re-enable output polling. + * @dev: drm_device + * + * This function re-enables the output polling work without + * locking the mode_config mutex. + * + * This is like drm_kms_helper_poll_enable() however it is to be + * called from a context where the mode_config mutex is locked + * already. + */ +void drm_kms_helper_poll_enable_locked(struct drm_device *dev) { bool poll = false; struct drm_connector *connector; @@ -113,6 +124,8 @@ static void __drm_kms_helper_poll_enable(struct drm_device *dev) if (poll) schedule_delayed_work(&dev->mode_config.output_poll_work, DRM_OUTPUT_POLL_PERIOD); } +EXPORT_SYMBOL(drm_kms_helper_poll_enable_locked); + static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connector *connector, uint32_t maxX, uint32_t maxY, bool merge_type_bits) @@ -174,7 +187,7 @@ static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connect /* Re-enable polling in case the global poll config changed. */ if (drm_kms_helper_poll != dev->mode_config.poll_running) - __drm_kms_helper_poll_enable(dev); + drm_kms_helper_poll_enable_locked(dev); dev->mode_config.poll_running = drm_kms_helper_poll; @@ -428,7 +441,7 @@ EXPORT_SYMBOL(drm_kms_helper_poll_disable); void drm_kms_helper_poll_enable(struct drm_device *dev) { mutex_lock(&dev->mode_config.mutex); - __drm_kms_helper_poll_enable(dev); + drm_kms_helper_poll_enable_locked(dev); mutex_unlock(&dev->mode_config.mutex); } EXPORT_SYMBOL(drm_kms_helper_poll_enable); diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h index 2a747a91fded..3febb4b9fce9 100644 --- a/include/drm/drm_crtc_helper.h +++ b/include/drm/drm_crtc_helper.h @@ -240,5 +240,6 @@ extern void drm_kms_helper_hotplug_event(struct drm_device *dev); extern void drm_kms_helper_poll_disable(struct drm_device *dev); extern void drm_kms_helper_poll_enable(struct drm_device *dev); +extern void drm_kms_helper_poll_enable_locked(struct drm_device *dev); #endif From b94be972538f4523f09243af7c726158f0bfae33 Mon Sep 17 00:00:00 2001 From: Egbert Eich Date: Wed, 23 Sep 2015 16:13:01 +0200 Subject: [PATCH 166/240] drm/i915: Call non-locking version of drm_kms_helper_poll_enable(), v2 drm_kms_helper_poll_enable() is called from a context in intel_hpd_irq_storm_disable() where the the mode_config mutex is already locked. When this function was converted to lock this mutex in commit 8c4ccc4ab6f6 ("drm/probe-helper: Grab mode_config.mutex in poll_init/enable") a deadlock occurred. Call the newly implemented non-locking version of this function. Changes since v1: - use function name suffix '_locked' for the function that is to be called from a locked context. Signed-off-by: Egbert Eich Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_hotplug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index 53c0173a39fe..b17785719598 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -180,7 +180,7 @@ static void intel_hpd_irq_storm_disable(struct drm_i915_private *dev_priv) /* Enable polling and queue hotplug re-enabling. */ if (hpd_disabled) { - drm_kms_helper_poll_enable(dev); + drm_kms_helper_poll_enable_locked(dev); mod_delayed_work(system_wq, &dev_priv->hotplug.reenable_work, msecs_to_jiffies(HPD_STORM_REENABLE_DELAY)); } From 53960059d56ecef67d4ddd546731623641a3d2d1 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 27 Mar 2015 08:33:43 +0000 Subject: [PATCH 167/240] MIPS: dma-default: Fix 32-bit fall back to GFP_DMA If there is a DMA zone (usually 24bit = 16MB I believe), but no DMA32 zone, as is the case for some 32-bit kernels, then massage_gfp_flags() will cause DMA memory allocated for devices with a 32..63-bit coherent_dma_mask to fall back to using __GFP_DMA, even though there may only be 32-bits of physical address available anyway. Correct that case to compare against a mask the size of phys_addr_t instead of always using a 64-bit mask. Signed-off-by: James Hogan Fixes: a2e715a86c6d ("MIPS: DMA: Fix computation of DMA flags from device's coherent_dma_mask.") Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: # 2.6.36+ Patchwork: https://patchwork.linux-mips.org/patch/9610/ Signed-off-by: Ralf Baechle --- arch/mips/mm/dma-default.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index a914dc1cb6d1..d8117be729a2 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -100,7 +100,7 @@ static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp) else #endif #if defined(CONFIG_ZONE_DMA) && !defined(CONFIG_ZONE_DMA32) - if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) + if (dev->coherent_dma_mask < DMA_BIT_MASK(sizeof(phys_addr_t) * 8)) dma_flag = __GFP_DMA; else #endif From 1e5fb282f8eda889776ee83f9214d5df9edaa26d Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Wed, 5 Aug 2015 15:42:36 -0700 Subject: [PATCH 168/240] MIPS: CPS: Stop dangling delay slot from has_mt. The has_mt macro ended with a branch, leaving its callers with a delay slot that would be executed if Config3.MT is not set. However it would not be executed if Config3 (or earlier Config registers) don't exist which makes it somewhat inconsistent at best. Fill the delay slot in the macro & fix the mips_cps_boot_vpes caller appropriately. Signed-off-by: Paul Burton Cc: Markos Chandras Cc: James Hogan Cc: # 3.16+ Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10865/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index 9f71c06aebf6..fa159aab252c 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -39,6 +39,7 @@ mfc0 \dest, CP0_CONFIG, 3 andi \dest, \dest, MIPS_CONF3_MT beqz \dest, \nomt + nop .endm .section .text.cps-vec @@ -226,7 +227,6 @@ LEAF(mips_cps_core_init) #ifdef CONFIG_MIPS_MT /* Check that the core implements the MT ASE */ has_mt t0, 3f - nop .set push .set mips64r2 @@ -310,8 +310,8 @@ LEAF(mips_cps_boot_vpes) PTR_ADDU t0, t0, t1 /* Calculate this VPEs ID. If the core doesn't support MT use 0 */ + li t9, 0 has_mt ta2, 1f - li t9, 0 /* Find the number of VPEs present in the core */ mfc0 t1, CP0_MVPCONF0 From a5b0f6db0e6cf6224e50f6585e9c8f0c2d38a8f8 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Wed, 5 Aug 2015 15:42:37 -0700 Subject: [PATCH 169/240] MIPS: CPS: Don't include MT code in non-MT kernels. The MT-specific code in mips_cps_boot_vpes can safely be omitted from kernels which don't support MT, with the default VPE==0 case being used as it would be after the has_mt (Config3.MT) check failed at runtime. Discarding the code entirely will save us a few bytes & allow cleaner handling of MT ASE instructions by later patches. Signed-off-by: Paul Burton Cc: Markos Chandras Cc: James Hogan Cc: # 3.16+ Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10866/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index fa159aab252c..57642f56dffb 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -311,6 +311,7 @@ LEAF(mips_cps_boot_vpes) /* Calculate this VPEs ID. If the core doesn't support MT use 0 */ li t9, 0 +#ifdef CONFIG_MIPS_MT has_mt ta2, 1f /* Find the number of VPEs present in the core */ @@ -330,6 +331,7 @@ LEAF(mips_cps_boot_vpes) /* Retrieve the VPE ID from EBase.CPUNum */ mfc0 t9, $15, 1 and t9, t9, t1 +#endif 1: /* Calculate a pointer to this VPEs struct vpe_boot_config */ li t1, VPEBOOTCFG_SIZE From 7a63076d9a31a6c2073da45021eeb4f89d2a8b56 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Wed, 5 Aug 2015 15:42:38 -0700 Subject: [PATCH 170/240] MIPS: CPS: #ifdef on CONFIG_MIPS_MT_SMP rather than CONFIG_MIPS_MT The CONFIG_MIPS_MT symbol can be selected by CONFIG_MIPS_VPE_LOADER in addition to CONFIG_MIPS_MT_SMP. We only want MT code in the CPS SMP boot vector if we're using MT for SMP. Thus switch the config symbol we ifdef against to CONFIG_MIPS_MT_SMP. Signed-off-by: Paul Burton Cc: Markos Chandras Cc: James Hogan Cc: # 3.16+ Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10867/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index 57642f56dffb..209ded16806b 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -224,7 +224,7 @@ LEAF(excep_ejtag) END(excep_ejtag) LEAF(mips_cps_core_init) -#ifdef CONFIG_MIPS_MT +#ifdef CONFIG_MIPS_MT_SMP /* Check that the core implements the MT ASE */ has_mt t0, 3f @@ -311,7 +311,7 @@ LEAF(mips_cps_boot_vpes) /* Calculate this VPEs ID. If the core doesn't support MT use 0 */ li t9, 0 -#ifdef CONFIG_MIPS_MT +#ifdef CONFIG_MIPS_MT_SMP has_mt ta2, 1f /* Find the number of VPEs present in the core */ @@ -339,7 +339,7 @@ LEAF(mips_cps_boot_vpes) PTR_L ta3, COREBOOTCFG_VPECONFIG(t0) PTR_ADDU v0, v0, ta3 -#ifdef CONFIG_MIPS_MT +#ifdef CONFIG_MIPS_MT_SMP /* If the core doesn't support MT then return */ bnez ta2, 1f @@ -453,7 +453,7 @@ LEAF(mips_cps_boot_vpes) 2: .set pop -#endif /* CONFIG_MIPS_MT */ +#endif /* CONFIG_MIPS_MT_SMP */ /* Return */ jr ra From 2392eec65c493c3f49a1f23f4af713e3c68cf6f5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 29 Sep 2015 10:35:45 -0400 Subject: [PATCH 171/240] drm/amdgpu/cgs: remove import_gpu_mem It was added for completeness, but we don't have any users for it yet. Daniel noted that it may be racy. Remove it. Change-Id: I5f5546f8911a4f294008a62dc86a73f3face38d1 Reviewed-by: Daniel Vetter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 39 ------------------------- drivers/gpu/drm/amd/include/cgs_linux.h | 17 ----------- 2 files changed, 56 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 1c3fc99c5465..8e995148f56e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -208,44 +208,6 @@ static int amdgpu_cgs_alloc_gpu_mem(void *cgs_device, return ret; } -static int amdgpu_cgs_import_gpu_mem(void *cgs_device, int dmabuf_fd, - cgs_handle_t *handle) -{ - CGS_FUNC_ADEV; - int r; - uint32_t dma_handle; - struct drm_gem_object *obj; - struct amdgpu_bo *bo; - struct drm_device *dev = adev->ddev; - struct drm_file *file_priv = NULL, *priv; - - mutex_lock(&dev->struct_mutex); - list_for_each_entry(priv, &dev->filelist, lhead) { - rcu_read_lock(); - if (priv->pid == get_pid(task_pid(current))) - file_priv = priv; - rcu_read_unlock(); - if (file_priv) - break; - } - mutex_unlock(&dev->struct_mutex); - r = dev->driver->prime_fd_to_handle(dev, - file_priv, dmabuf_fd, - &dma_handle); - spin_lock(&file_priv->table_lock); - - /* Check if we currently have a reference on the object */ - obj = idr_find(&file_priv->object_idr, dma_handle); - if (obj == NULL) { - spin_unlock(&file_priv->table_lock); - return -EINVAL; - } - spin_unlock(&file_priv->table_lock); - bo = gem_to_amdgpu_bo(obj); - *handle = (cgs_handle_t)bo; - return 0; -} - static int amdgpu_cgs_free_gpu_mem(void *cgs_device, cgs_handle_t handle) { struct amdgpu_bo *obj = (struct amdgpu_bo *)handle; @@ -810,7 +772,6 @@ static const struct cgs_ops amdgpu_cgs_ops = { }; static const struct cgs_os_ops amdgpu_cgs_os_ops = { - amdgpu_cgs_import_gpu_mem, amdgpu_cgs_add_irq_source, amdgpu_cgs_irq_get, amdgpu_cgs_irq_put diff --git a/drivers/gpu/drm/amd/include/cgs_linux.h b/drivers/gpu/drm/amd/include/cgs_linux.h index 488642f08267..3b47ae313e36 100644 --- a/drivers/gpu/drm/amd/include/cgs_linux.h +++ b/drivers/gpu/drm/amd/include/cgs_linux.h @@ -26,19 +26,6 @@ #include "cgs_common.h" -/** - * cgs_import_gpu_mem() - Import dmabuf handle - * @cgs_device: opaque device handle - * @dmabuf_fd: DMABuf file descriptor - * @handle: memory handle (output) - * - * Must be called in the process context that dmabuf_fd belongs to. - * - * Return: 0 on success, -errno otherwise - */ -typedef int (*cgs_import_gpu_mem_t)(void *cgs_device, int dmabuf_fd, - cgs_handle_t *handle); - /** * cgs_irq_source_set_func() - Callback for enabling/disabling interrupt sources * @private_data: private data provided to cgs_add_irq_source @@ -114,16 +101,12 @@ typedef int (*cgs_irq_get_t)(void *cgs_device, unsigned src_id, unsigned type); typedef int (*cgs_irq_put_t)(void *cgs_device, unsigned src_id, unsigned type); struct cgs_os_ops { - cgs_import_gpu_mem_t import_gpu_mem; - /* IRQ handling */ cgs_add_irq_source_t add_irq_source; cgs_irq_get_t irq_get; cgs_irq_put_t irq_put; }; -#define cgs_import_gpu_mem(dev,dmabuf_fd,handle) \ - CGS_OS_CALL(import_gpu_mem,dev,dmabuf_fd,handle) #define cgs_add_irq_source(dev,src_id,num_types,set,handler,private_data) \ CGS_OS_CALL(add_irq_source,dev,src_id,num_types,set,handler, \ private_data) From 3e4e3805643445b71b7ee1b84892e43d004e24e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 10 Sep 2015 15:00:39 +0200 Subject: [PATCH 172/240] drm/amdgpu: only print meaningful VM faults MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Port of radeon commit 9b7d786b900baf7c0d1a7e211570aef1cb27590f. Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 8 ++++++-- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 774528ab8704..fab5471d25d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -1262,6 +1262,12 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR); status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); mc_client = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT); + /* reset addr and status */ + WREG32_P(mmVM_CONTEXT1_CNTL2, 1, ~1); + + if (!addr && !status) + return 0; + dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", entry->src_id, entry->src_data); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", @@ -1269,8 +1275,6 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", status); gmc_v7_0_vm_decode_fault(adev, status, addr, mc_client); - /* reset addr and status */ - WREG32_P(mmVM_CONTEXT1_CNTL2, 1, ~1); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 9a07742620d0..7bc9e9fcf3d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1262,6 +1262,12 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR); status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); mc_client = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_MCCLIENT); + /* reset addr and status */ + WREG32_P(mmVM_CONTEXT1_CNTL2, 1, ~1); + + if (!addr && !status) + return 0; + dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", entry->src_id, entry->src_data); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", @@ -1269,8 +1275,6 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", status); gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client); - /* reset addr and status */ - WREG32_P(mmVM_CONTEXT1_CNTL2, 1, ~1); return 0; } From b9a532277938798b53178d5a66af6e2915cb27cf Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 30 Sep 2015 12:48:40 -0400 Subject: [PATCH 173/240] Initialize msg/shm IPC objects before doing ipc_addid() As reported by Dmitry Vyukov, we really shouldn't do ipc_addid() before having initialized the IPC object state. Yes, we initialize the IPC object in a locked state, but with all the lockless RCU lookup work, that IPC object lock no longer means that the state cannot be seen. We already did this for the IPC semaphore code (see commit e8577d1f0329: "ipc/sem.c: fully initialize sem_array before making it visible") but we clearly forgot about msg and shm. Reported-by: Dmitry Vyukov Cc: Manfred Spraul Cc: Davidlohr Bueso Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- ipc/msg.c | 14 +++++++------- ipc/shm.c | 13 +++++++------ ipc/util.c | 8 ++++---- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/ipc/msg.c b/ipc/msg.c index 66c4f567eb73..1471db9a7e61 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -137,13 +137,6 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) return retval; } - /* ipc_addid() locks msq upon success. */ - id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); - if (id < 0) { - ipc_rcu_putref(msq, msg_rcu_free); - return id; - } - msq->q_stime = msq->q_rtime = 0; msq->q_ctime = get_seconds(); msq->q_cbytes = msq->q_qnum = 0; @@ -153,6 +146,13 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) INIT_LIST_HEAD(&msq->q_receivers); INIT_LIST_HEAD(&msq->q_senders); + /* ipc_addid() locks msq upon success. */ + id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); + if (id < 0) { + ipc_rcu_putref(msq, msg_rcu_free); + return id; + } + ipc_unlock_object(&msq->q_perm); rcu_read_unlock(); diff --git a/ipc/shm.c b/ipc/shm.c index 222131e8e38f..41787276e141 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -551,12 +551,6 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (IS_ERR(file)) goto no_file; - id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); - if (id < 0) { - error = id; - goto no_id; - } - shp->shm_cprid = task_tgid_vnr(current); shp->shm_lprid = 0; shp->shm_atim = shp->shm_dtim = 0; @@ -565,6 +559,13 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_nattch = 0; shp->shm_file = file; shp->shm_creator = current; + + id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); + if (id < 0) { + error = id; + goto no_id; + } + list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist); /* diff --git a/ipc/util.c b/ipc/util.c index be4230020a1f..0f401d94b7c6 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -237,6 +237,10 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) rcu_read_lock(); spin_lock(&new->lock); + current_euid_egid(&euid, &egid); + new->cuid = new->uid = euid; + new->gid = new->cgid = egid; + id = idr_alloc(&ids->ipcs_idr, new, (next_id < 0) ? 0 : ipcid_to_idx(next_id), 0, GFP_NOWAIT); @@ -249,10 +253,6 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size) ids->in_use++; - current_euid_egid(&euid, &egid); - new->cuid = new->uid = euid; - new->gid = new->cgid = egid; - if (next_id < 0) { new->seq = ids->seq++; if (ids->seq > IPCID_SEQ_MAX) From eddd3826a1a0190e5235703d1e666affa4d13b96 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Sep 2015 08:38:22 +0000 Subject: [PATCH 174/240] x86/process: Add proper bound checks in 64bit get_wchan() Dmitry Vyukov reported the following using trinity and the memory error detector AddressSanitizer (https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel). [ 124.575597] ERROR: AddressSanitizer: heap-buffer-overflow on address ffff88002e280000 [ 124.576801] ffff88002e280000 is located 131938492886538 bytes to the left of 28857600-byte region [ffffffff81282e0a, ffffffff82e0830a) [ 124.578633] Accessed by thread T10915: [ 124.579295] inlined in describe_heap_address ./arch/x86/mm/asan/report.c:164 [ 124.579295] #0 ffffffff810dd277 in asan_report_error ./arch/x86/mm/asan/report.c:278 [ 124.580137] #1 ffffffff810dc6a0 in asan_check_region ./arch/x86/mm/asan/asan.c:37 [ 124.581050] #2 ffffffff810dd423 in __tsan_read8 ??:0 [ 124.581893] #3 ffffffff8107c093 in get_wchan ./arch/x86/kernel/process_64.c:444 The address checks in the 64bit implementation of get_wchan() are wrong in several ways: - The lower bound of the stack is not the start of the stack page. It's the start of the stack page plus sizeof (struct thread_info) - The upper bound must be: top_of_stack - TOP_OF_KERNEL_STACK_PADDING - 2 * sizeof(unsigned long). The 2 * sizeof(unsigned long) is required because the stack pointer points at the frame pointer. The layout on the stack is: ... IP FP ... IP FP. So we need to make sure that both IP and FP are in the bounds. Fix the bound checks and get rid of the mix of numeric constants, u64 and unsigned long. Making all unsigned long allows us to use the same function for 32bit as well. Use READ_ONCE() when accessing the stack. This does not prevent a concurrent wakeup of the task and the stack changing, but at least it avoids TOCTOU. Also check task state at the end of the loop. Again that does not prevent concurrent changes, but it avoids walking for nothing. Add proper comments while at it. Reported-by: Dmitry Vyukov Reported-by: Sasha Levin Based-on-patch-from: Wolfram Gloger Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andrey Konovalov Cc: Kostya Serebryany Cc: Alexander Potapenko Cc: kasan-dev Cc: Denys Vlasenko Cc: Andi Kleen Cc: Wolfram Gloger Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20150930083302.694788319@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process_64.c | 52 +++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3c1bbcf12924..f1fd0889e8af 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -499,27 +499,59 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); +/* + * Called from fs/proc with a reference on @p to find the function + * which called into schedule(). This needs to be done carefully + * because the task might wake up and we might look at a stack + * changing under us. + */ unsigned long get_wchan(struct task_struct *p) { - unsigned long stack; - u64 fp, ip; + unsigned long start, bottom, top, sp, fp, ip; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; - stack = (unsigned long)task_stack_page(p); - if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE) + + start = (unsigned long)task_stack_page(p); + if (!start) return 0; - fp = *(u64 *)(p->thread.sp); + + /* + * Layout of the stack page: + * + * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long) + * PADDING + * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING + * stack + * ----------- bottom = start + sizeof(thread_info) + * thread_info + * ----------- start + * + * The tasks stack pointer points at the location where the + * framepointer is stored. The data on the stack is: + * ... IP FP ... IP FP + * + * We need to read FP and IP, so we need to adjust the upper + * bound by another unsigned long. + */ + top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; + top -= 2 * sizeof(unsigned long); + bottom = start + sizeof(struct thread_info); + + sp = READ_ONCE(p->thread.sp); + if (sp < bottom || sp > top) + return 0; + + fp = READ_ONCE(*(unsigned long *)sp); do { - if (fp < (unsigned long)stack || - fp >= (unsigned long)stack+THREAD_SIZE) + if (fp < bottom || fp > top) return 0; - ip = *(u64 *)(fp+8); + ip = READ_ONCE(*(unsigned long *)(fp + sizeof(unsigned long))); if (!in_sched_functions(ip)) return ip; - fp = *(u64 *)fp; - } while (count++ < 16); + fp = READ_ONCE(*(unsigned long *)fp); + } while (count++ < 16 && p->state != TASK_RUNNING); return 0; } From 7ba78053aacb89998a052843e3c56983c31d57f0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Sep 2015 08:38:23 +0000 Subject: [PATCH 175/240] x86/process: Unify 32bit and 64bit implementations of get_wchan() The stack layout and the functionality is identical. Use the 64bit version for all of x86. Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andrey Konovalov Cc: Kostya Serebryany Cc: Alexander Potapenko Cc: kasan-dev Cc: Denys Vlasenko Cc: Andi Kleen Cc: Sasha Levin Cc: Wolfram Gloger Link: http://lkml.kernel.org/r/20150930083302.779694618@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 55 +++++++++++++++++++++++++++++++++++ arch/x86/kernel/process_32.c | 28 ------------------ arch/x86/kernel/process_64.c | 56 ------------------------------------ 3 files changed, 55 insertions(+), 84 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6d0e62ae8516..39e585a554b7 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -506,3 +506,58 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) return randomize_range(mm->brk, range_end, 0) ? : mm->brk; } +/* + * Called from fs/proc with a reference on @p to find the function + * which called into schedule(). This needs to be done carefully + * because the task might wake up and we might look at a stack + * changing under us. + */ +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long start, bottom, top, sp, fp, ip; + int count = 0; + + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + + start = (unsigned long)task_stack_page(p); + if (!start) + return 0; + + /* + * Layout of the stack page: + * + * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long) + * PADDING + * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING + * stack + * ----------- bottom = start + sizeof(thread_info) + * thread_info + * ----------- start + * + * The tasks stack pointer points at the location where the + * framepointer is stored. The data on the stack is: + * ... IP FP ... IP FP + * + * We need to read FP and IP, so we need to adjust the upper + * bound by another unsigned long. + */ + top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; + top -= 2 * sizeof(unsigned long); + bottom = start + sizeof(struct thread_info); + + sp = READ_ONCE(p->thread.sp); + if (sp < bottom || sp > top) + return 0; + + fp = READ_ONCE(*(unsigned long *)sp); + do { + if (fp < bottom || fp > top) + return 0; + ip = READ_ONCE(*(unsigned long *)(fp + sizeof(unsigned long))); + if (!in_sched_functions(ip)) + return ip; + fp = READ_ONCE(*(unsigned long *)fp); + } while (count++ < 16 && p->state != TASK_RUNNING); + return 0; +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index c13df2c735f8..737527b40e5b 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -324,31 +324,3 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) return prev_p; } - -#define top_esp (THREAD_SIZE - sizeof(unsigned long)) -#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) - -unsigned long get_wchan(struct task_struct *p) -{ - unsigned long bp, sp, ip; - unsigned long stack_page; - int count = 0; - if (!p || p == current || p->state == TASK_RUNNING) - return 0; - stack_page = (unsigned long)task_stack_page(p); - sp = p->thread.sp; - if (!stack_page || sp < stack_page || sp > top_esp+stack_page) - return 0; - /* include/asm-i386/system.h:switch_to() pushes bp last. */ - bp = *(unsigned long *) sp; - do { - if (bp < stack_page || bp > top_ebp+stack_page) - return 0; - ip = *(unsigned long *) (bp+4); - if (!in_sched_functions(ip)) - return ip; - bp = *(unsigned long *) bp; - } while (count++ < 16); - return 0; -} - diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index f1fd0889e8af..b35921a670b2 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -499,62 +499,6 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); -/* - * Called from fs/proc with a reference on @p to find the function - * which called into schedule(). This needs to be done carefully - * because the task might wake up and we might look at a stack - * changing under us. - */ -unsigned long get_wchan(struct task_struct *p) -{ - unsigned long start, bottom, top, sp, fp, ip; - int count = 0; - - if (!p || p == current || p->state == TASK_RUNNING) - return 0; - - start = (unsigned long)task_stack_page(p); - if (!start) - return 0; - - /* - * Layout of the stack page: - * - * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long) - * PADDING - * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING - * stack - * ----------- bottom = start + sizeof(thread_info) - * thread_info - * ----------- start - * - * The tasks stack pointer points at the location where the - * framepointer is stored. The data on the stack is: - * ... IP FP ... IP FP - * - * We need to read FP and IP, so we need to adjust the upper - * bound by another unsigned long. - */ - top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; - top -= 2 * sizeof(unsigned long); - bottom = start + sizeof(struct thread_info); - - sp = READ_ONCE(p->thread.sp); - if (sp < bottom || sp > top) - return 0; - - fp = READ_ONCE(*(unsigned long *)sp); - do { - if (fp < bottom || fp > top) - return 0; - ip = READ_ONCE(*(unsigned long *)(fp + sizeof(unsigned long))); - if (!in_sched_functions(ip)) - return ip; - fp = READ_ONCE(*(unsigned long *)fp); - } while (count++ < 16 && p->state != TASK_RUNNING); - return 0; -} - long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) { int ret = 0; From a1cf09031e641d3cceaca4a4dd20ef6a785bc9b3 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 15 Sep 2015 15:36:00 +0200 Subject: [PATCH 176/240] dmaengine: at_xdmac: change block increment addressing mode The addressing mode we were using was not only incrementing the address at each microblock, but also at each data boundary, which was severely slowing the transfer, without any benefit since we were not using the data stride. Switch to the micro block increment only in order to get back to an acceptable performance level. Signed-off-by: Maxime Ripard Signed-off-by: Ludovic Desroches Fixes: 6007ccb57744 ("dmaengine: xdmac: Add interleaved transfer support") Cc: stable@vger.kernel.org #4.2 Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index a165b4bfd330..ffea6027b631 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -875,14 +875,14 @@ at_xdmac_interleaved_queue_desc(struct dma_chan *chan, if (xt->src_inc) { if (xt->src_sgl) - chan_cc |= AT_XDMAC_CC_SAM_UBS_DS_AM; + chan_cc |= AT_XDMAC_CC_SAM_UBS_AM; else chan_cc |= AT_XDMAC_CC_SAM_INCREMENTED_AM; } if (xt->dst_inc) { if (xt->dst_sgl) - chan_cc |= AT_XDMAC_CC_DAM_UBS_DS_AM; + chan_cc |= AT_XDMAC_CC_DAM_UBS_AM; else chan_cc |= AT_XDMAC_CC_DAM_INCREMENTED_AM; } From 0be2136b67067617b36c70e525d7534108361e36 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Tue, 15 Sep 2015 15:39:11 +0200 Subject: [PATCH 177/240] dmaengine: at_xdmac: clean used descriptor When putting back a descriptor to the free descs list, some fields are not set to 0, it can cause bugs if someone uses it without having this in mind. Descriptor are not put back one by one so it is easier to clean descriptors when we request them. Signed-off-by: Ludovic Desroches Cc: stable@vger.kernel.org #4.2 Signed-off-by: Vinod Koul --- drivers/dma/at_xdmac.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index ffea6027b631..dd24375b76dd 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -455,6 +455,15 @@ static struct at_xdmac_desc *at_xdmac_alloc_desc(struct dma_chan *chan, return desc; } +void at_xdmac_init_used_desc(struct at_xdmac_desc *desc) +{ + memset(&desc->lld, 0, sizeof(desc->lld)); + INIT_LIST_HEAD(&desc->descs_list); + desc->direction = DMA_TRANS_NONE; + desc->xfer_size = 0; + desc->active_xfer = false; +} + /* Call must be protected by lock. */ static struct at_xdmac_desc *at_xdmac_get_desc(struct at_xdmac_chan *atchan) { @@ -466,7 +475,7 @@ static struct at_xdmac_desc *at_xdmac_get_desc(struct at_xdmac_chan *atchan) desc = list_first_entry(&atchan->free_descs_list, struct at_xdmac_desc, desc_node); list_del(&desc->desc_node); - desc->active_xfer = false; + at_xdmac_init_used_desc(desc); } return desc; From aa3ee5f569fda51e54c224c0df60e187e9c5e582 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sat, 26 Sep 2015 17:15:47 +0800 Subject: [PATCH 178/240] dmaengine: zxdma: Fix off-by-one for testing valid pchan request The valid pchan range is 0 ~ d->dma_requests - 1. Signed-off-by: Axel Lin Reviewed-by: Jun Nie Signed-off-by: Vinod Koul --- drivers/dma/zx296702_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/zx296702_dma.c b/drivers/dma/zx296702_dma.c index 39915a6b7986..c017fcd8e07c 100644 --- a/drivers/dma/zx296702_dma.c +++ b/drivers/dma/zx296702_dma.c @@ -739,7 +739,7 @@ static struct dma_chan *zx_of_dma_simple_xlate(struct of_phandle_args *dma_spec, struct dma_chan *chan; struct zx_dma_chan *c; - if (request > d->dma_requests) + if (request >= d->dma_requests) return NULL; chan = dma_get_any_slave_channel(&d->slave); From e87ffbdf06971a80ad2a11217200bdd936195af1 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Wed, 30 Sep 2015 19:42:14 +0200 Subject: [PATCH 179/240] dmaengine: pxa_dma: fix the no-requestor case A very small number of devices don't use the flow control offered by requestor lines. In these specific cases, the pxa dma driver should be aware of that and not try to use a requestor line. Signed-off-by: Robert Jarzmik Signed-off-by: Vinod Koul --- drivers/dma/pxa_dma.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index c6723ecd5848..064a8266a3b7 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -473,8 +473,10 @@ static void pxad_free_phy(struct pxad_chan *chan) return; /* clear the channel mapping in DRCMR */ - reg = pxad_drcmr(chan->drcmr); - writel_relaxed(0, chan->phy->base + reg); + if (chan->drcmr <= DRCMR_CHLNUM) { + reg = pxad_drcmr(chan->drcmr); + writel_relaxed(0, chan->phy->base + reg); + } spin_lock_irqsave(&pdev->phy_lock, flags); for (i = 0; i < 32; i++) @@ -516,8 +518,10 @@ static void phy_enable(struct pxad_phy *phy, bool misaligned) "%s(); phy=%p(%d) misaligned=%d\n", __func__, phy, phy->idx, misaligned); - reg = pxad_drcmr(phy->vchan->drcmr); - writel_relaxed(DRCMR_MAPVLD | phy->idx, phy->base + reg); + if (phy->vchan->drcmr <= DRCMR_CHLNUM) { + reg = pxad_drcmr(phy->vchan->drcmr); + writel_relaxed(DRCMR_MAPVLD | phy->idx, phy->base + reg); + } dalgn = phy_readl_relaxed(phy, DALGN); if (misaligned) @@ -911,14 +915,18 @@ static void pxad_get_config(struct pxad_chan *chan, width = chan->cfg.src_addr_width; dev_addr = chan->cfg.src_addr; *dev_src = dev_addr; - *dcmd |= PXA_DCMD_INCTRGADDR | PXA_DCMD_FLOWSRC; + *dcmd |= PXA_DCMD_INCTRGADDR; + if (chan->drcmr <= DRCMR_CHLNUM) + *dcmd |= PXA_DCMD_FLOWSRC; } if (dir == DMA_MEM_TO_DEV) { maxburst = chan->cfg.dst_maxburst; width = chan->cfg.dst_addr_width; dev_addr = chan->cfg.dst_addr; *dev_dst = dev_addr; - *dcmd |= PXA_DCMD_INCSRCADDR | PXA_DCMD_FLOWTRG; + *dcmd |= PXA_DCMD_INCSRCADDR; + if (chan->drcmr <= DRCMR_CHLNUM) + *dcmd |= PXA_DCMD_FLOWTRG; } if (dir == DMA_MEM_TO_MEM) *dcmd |= PXA_DCMD_BURST32 | PXA_DCMD_INCTRGADDR | From 7b09a1bba4091a9d208481d7831682a1f3061ab9 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Wed, 30 Sep 2015 19:42:15 +0200 Subject: [PATCH 180/240] dmaengine: pxa_dma: fix residue corner case A very tiny temporal window exists in the residue calculation where : - upon entering residue calculation, the transfer is ongoing - when reading the current transfer pointer, it just changed to the "finisher/linker" descriptor In this case, the residue returned is the whole transfer length instead of 0. Fix it. This appears almost in one extreme case, where the driver is used by older clients which inquire for residue in interrupt context, such as the smsc91x ethernet driver, in a tight loop : interrupt_handler() dmaengine_submit() do { dmaengine_tx_status() } while (residue > 0 || status != DMA_ERROR) Signed-off-by: Robert Jarzmik Signed-off-by: Vinod Koul --- drivers/dma/pxa_dma.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index 064a8266a3b7..fc4156afa070 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -1186,6 +1186,16 @@ static unsigned int pxad_residue(struct pxad_chan *chan, else curr = phy_readl_relaxed(chan->phy, DTADR); + /* + * curr has to be actually read before checking descriptor + * completion, so that a curr inside a status updater + * descriptor implies the following test returns true, and + * preventing reordering of curr load and the test. + */ + rmb(); + if (is_desc_completed(vd)) + goto out; + for (i = 0; i < sw_desc->nb_desc - 1; i++) { hw_desc = sw_desc->hw_desc[i]; if (sw_desc->hw_desc[0]->dcmd & PXA_DCMD_INCSRCADDR) From 8d0d94015e96b8853c4f7f06eac3f269e1b3d866 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabiano=20Fid=C3=AAncio?= Date: Thu, 24 Sep 2015 15:18:34 +0200 Subject: [PATCH 181/240] drm/qxl: recreate the primary surface when the bo is not primary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When disabling/enabling a crtc the primary area must be updated independently of which crtc has been disabled/enabled. Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=1264735 Signed-off-by: Fabiano Fidêncio Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/qxl/qxl_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index dd845f82cc24..4649bd2ed340 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -618,7 +618,7 @@ static int qxl_crtc_mode_set(struct drm_crtc *crtc, adjusted_mode->hdisplay, adjusted_mode->vdisplay); - if (qcrtc->index == 0) + if (bo->is_primary == false) recreate_primary = true; if (bo->surf.stride * bo->surf.height > qdev->vram_size) { From 148ba09c465cc54d8e68f041bf9a30332b315c39 Mon Sep 17 00:00:00 2001 From: Inki Dae Date: Wed, 30 Sep 2015 20:12:29 +0900 Subject: [PATCH 182/240] drm/exynos: dp: remove suspend/resume functions This patch removes unnecessary pm suspend/resume functions. All kms sub drivers will be controlled by top of Exynos drm driver and connector dpms so these sub drivers shouldn't have their own pm interfaces. Signed-off-by: Inki Dae Reviewed-by: Gustavo Padovan --- drivers/gpu/drm/exynos/exynos_dp_core.c | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_dp_core.c b/drivers/gpu/drm/exynos/exynos_dp_core.c index d66ade0efac8..124fb9a56f02 100644 --- a/drivers/gpu/drm/exynos/exynos_dp_core.c +++ b/drivers/gpu/drm/exynos/exynos_dp_core.c @@ -1383,28 +1383,6 @@ static int exynos_dp_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int exynos_dp_suspend(struct device *dev) -{ - struct exynos_dp_device *dp = dev_get_drvdata(dev); - - exynos_dp_disable(&dp->encoder); - return 0; -} - -static int exynos_dp_resume(struct device *dev) -{ - struct exynos_dp_device *dp = dev_get_drvdata(dev); - - exynos_dp_enable(&dp->encoder); - return 0; -} -#endif - -static const struct dev_pm_ops exynos_dp_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(exynos_dp_suspend, exynos_dp_resume) -}; - static const struct of_device_id exynos_dp_match[] = { { .compatible = "samsung,exynos5-dp" }, {}, @@ -1417,7 +1395,6 @@ struct platform_driver dp_driver = { .driver = { .name = "exynos-dp", .owner = THIS_MODULE, - .pm = &exynos_dp_pm_ops, .of_match_table = exynos_dp_match, }, }; From 3c79fb8c9424a24bf812b9a8cb4f19b781052b0b Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Wed, 30 Sep 2015 18:40:54 -0300 Subject: [PATCH 183/240] drm/exynos: fimd: actually disable dp clock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fimd_dp_clock_enable() was setting the always to enabled, this patch fix this to actually use the value that is set to 'val'. Reported-by: Emilio López Signed-off-by: Gustavo Padovan Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_fimd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index 03ad549e7698..3d1aba67758b 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -871,7 +871,7 @@ static void fimd_dp_clock_enable(struct exynos_drm_crtc *crtc, bool enable) return; val = enable ? DP_MIE_CLK_DP_ENABLE : DP_MIE_CLK_DISABLE; - writel(DP_MIE_CLK_DP_ENABLE, ctx->regs + DP_MIE_CLKCON); + writel(val, ctx->regs + DP_MIE_CLKCON); } static const struct exynos_drm_crtc_ops fimd_crtc_ops = { From cda374253f862bd0f43edda6935a48294fe8ed3e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 1 Oct 2015 14:21:40 +0900 Subject: [PATCH 184/240] drm/exynos: Staticize local function in exynos_drm_gem.c The exynos_drm_gem_mmap_buffer() is not used outside so make it static. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 048ad13e5d82..407afedb6003 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -307,7 +307,7 @@ void exynos_drm_gem_put_dma_addr(struct drm_device *dev, drm_gem_object_unreference_unlocked(obj); } -int exynos_drm_gem_mmap_buffer(struct exynos_drm_gem_obj *exynos_gem_obj, +static int exynos_drm_gem_mmap_buffer(struct exynos_drm_gem_obj *exynos_gem_obj, struct vm_area_struct *vma) { struct drm_device *drm_dev = exynos_gem_obj->base.dev; From 5b235dc2647e4977b17b5c41d959d0f455831c3f Mon Sep 17 00:00:00 2001 From: Alban Bedel Date: Fri, 4 Sep 2015 14:29:16 +0200 Subject: [PATCH 185/240] MIPS: Fix the build on jz4740 after removing the custom gpio.h Somehow the wrong version of the patch to remove the use of custom gpio.h on mips has been merged. This patch add the missing fixes for a build error on jz4740 because linux/gpio.h doesn't provide any machine specfics definitions anymore. Signed-off-by: Alban Bedel Cc: Paul Burton Cc: Lars-Peter Clausen Cc: Brian Norris Cc: Thomas Gleixner Cc: Linus Walleij Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/11089/ Signed-off-by: Ralf Baechle --- arch/mips/jz4740/board-qi_lb60.c | 1 + arch/mips/jz4740/gpio.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c index 4e62bf85d0b0..459cb017306c 100644 --- a/arch/mips/jz4740/board-qi_lb60.c +++ b/arch/mips/jz4740/board-qi_lb60.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c index a74e181058b0..8c6d76c9b2d6 100644 --- a/arch/mips/jz4740/gpio.c +++ b/arch/mips/jz4740/gpio.c @@ -28,6 +28,7 @@ #include #include +#include #define JZ4740_GPIO_BASE_A (32*0) #define JZ4740_GPIO_BASE_B (32*1) From f4829a9b7a61e159367350008a608b062c4f6840 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 27 Sep 2015 21:01:50 +0200 Subject: [PATCH 186/240] blk-mq: fix racy updates of rq->errors blk_mq_complete_request may be a no-op if the request has already been completed by others means (e.g. a timeout or cancellation), but currently drivers have to set rq->errors before calling blk_mq_complete_request, which might leave us with the wrong error value. Add an error parameter to blk_mq_complete_request so that we can defer setting rq->errors until we known we won the race to complete the request. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- block/blk-mq.c | 12 ++++++------ drivers/block/loop.c | 11 +++++------ drivers/block/null_blk.c | 2 +- drivers/block/nvme-core.c | 16 +++++++--------- drivers/block/virtio_blk.c | 2 +- drivers/block/xen-blkfront.c | 19 ++++++++++--------- drivers/scsi/scsi_lib.c | 2 +- include/linux/blk-mq.h | 2 +- 8 files changed, 32 insertions(+), 34 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 31c0c6259c4c..2306330530e8 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -393,14 +393,16 @@ void __blk_mq_complete_request(struct request *rq) * Ends all I/O on a request. It does not handle partial completions. * The actual completion happens out-of-order, through a IPI handler. **/ -void blk_mq_complete_request(struct request *rq) +void blk_mq_complete_request(struct request *rq, int error) { struct request_queue *q = rq->q; if (unlikely(blk_should_fake_timeout(q))) return; - if (!blk_mark_rq_complete(rq)) + if (!blk_mark_rq_complete(rq)) { + rq->errors = error; __blk_mq_complete_request(rq); + } } EXPORT_SYMBOL(blk_mq_complete_request); @@ -616,10 +618,8 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, * If a request wasn't started before the queue was * marked dying, kill it here or it'll go unnoticed. */ - if (unlikely(blk_queue_dying(rq->q))) { - rq->errors = -EIO; - blk_mq_complete_request(rq); - } + if (unlikely(blk_queue_dying(rq->q))) + blk_mq_complete_request(rq, -EIO); return; } if (rq->cmd_flags & REQ_NO_TIMEOUT) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f9889b6bc02c..674f800a3b57 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1486,17 +1486,16 @@ static void loop_handle_cmd(struct loop_cmd *cmd) { const bool write = cmd->rq->cmd_flags & REQ_WRITE; struct loop_device *lo = cmd->rq->q->queuedata; - int ret = -EIO; + int ret = 0; - if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) + if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) { + ret = -EIO; goto failed; + } ret = do_req_filebacked(lo, cmd->rq); - failed: - if (ret) - cmd->rq->errors = -EIO; - blk_mq_complete_request(cmd->rq); + blk_mq_complete_request(cmd->rq, ret ? -EIO : 0); } static void loop_queue_write_work(struct work_struct *work) diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index a295b98c6bae..1c9e4fe5aa44 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -289,7 +289,7 @@ static inline void null_handle_cmd(struct nullb_cmd *cmd) case NULL_IRQ_SOFTIRQ: switch (queue_mode) { case NULL_Q_MQ: - blk_mq_complete_request(cmd->rq); + blk_mq_complete_request(cmd->rq, cmd->rq->errors); break; case NULL_Q_RQ: blk_complete_request(cmd->rq); diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 30758bdf69ea..6f04771f1019 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -618,16 +618,15 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, spin_unlock_irqrestore(req->q->queue_lock, flags); return; } + if (req->cmd_type == REQ_TYPE_DRV_PRIV) { if (cmd_rq->ctx == CMD_CTX_CANCELLED) - req->errors = -EINTR; - else - req->errors = status; + status = -EINTR; } else { - req->errors = nvme_error_status(status); + status = nvme_error_status(status); } - } else - req->errors = 0; + } + if (req->cmd_type == REQ_TYPE_DRV_PRIV) { u32 result = le32_to_cpup(&cqe->result); req->special = (void *)(uintptr_t)result; @@ -650,7 +649,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, } nvme_free_iod(nvmeq->dev, iod); - blk_mq_complete_request(req); + blk_mq_complete_request(req, status); } /* length is in bytes. gfp flags indicates whether we may sleep. */ @@ -863,8 +862,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, if (ns && ns->ms && !blk_integrity_rq(req)) { if (!(ns->pi_type && ns->ms == 8) && req->cmd_type != REQ_TYPE_DRV_PRIV) { - req->errors = -EFAULT; - blk_mq_complete_request(req); + blk_mq_complete_request(req, -EFAULT); return BLK_MQ_RQ_QUEUE_OK; } } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index e93899cc6f60..6ca35495a5be 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -144,7 +144,7 @@ static void virtblk_done(struct virtqueue *vq) do { virtqueue_disable_cb(vq); while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { - blk_mq_complete_request(vbr->req); + blk_mq_complete_request(vbr->req, vbr->req->errors); req_done = true; } if (unlikely(virtqueue_is_broken(vq))) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 0823a96902f8..611170896b8c 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1142,6 +1142,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) RING_IDX i, rp; unsigned long flags; struct blkfront_info *info = (struct blkfront_info *)dev_id; + int error; spin_lock_irqsave(&info->io_lock, flags); @@ -1182,37 +1183,37 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) continue; } - req->errors = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; + error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; switch (bret->operation) { case BLKIF_OP_DISCARD: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { struct request_queue *rq = info->rq; printk(KERN_WARNING "blkfront: %s: %s op failed\n", info->gd->disk_name, op_name(bret->operation)); - req->errors = -EOPNOTSUPP; + error = -EOPNOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; queue_flag_clear(QUEUE_FLAG_DISCARD, rq); queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq); } - blk_mq_complete_request(req); + blk_mq_complete_request(req, error); break; case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_WRITE_BARRIER: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { printk(KERN_WARNING "blkfront: %s: %s op failed\n", info->gd->disk_name, op_name(bret->operation)); - req->errors = -EOPNOTSUPP; + error = -EOPNOTSUPP; } if (unlikely(bret->status == BLKIF_RSP_ERROR && info->shadow[id].req.u.rw.nr_segments == 0)) { printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", info->gd->disk_name, op_name(bret->operation)); - req->errors = -EOPNOTSUPP; + error = -EOPNOTSUPP; } - if (unlikely(req->errors)) { - if (req->errors == -EOPNOTSUPP) - req->errors = 0; + if (unlikely(error)) { + if (error == -EOPNOTSUPP) + error = 0; info->feature_flush = 0; xlvbd_flush(info); } @@ -1223,7 +1224,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " "request: %x\n", bret->status); - blk_mq_complete_request(req); + blk_mq_complete_request(req, error); break; default: BUG(); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index cbfc5990052b..126a48c6431e 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1957,7 +1957,7 @@ static int scsi_mq_prep_fn(struct request *req) static void scsi_mq_done(struct scsi_cmnd *cmd) { trace_scsi_dispatch_cmd_done(cmd); - blk_mq_complete_request(cmd->request); + blk_mq_complete_request(cmd->request, cmd->request->errors); } static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b80ba4572a31..c1b5c867ff07 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -214,7 +214,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); void blk_mq_cancel_requeue_work(struct request_queue *q); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_abort_requeue_list(struct request_queue *q); -void blk_mq_complete_request(struct request *rq); +void blk_mq_complete_request(struct request *rq, int error); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); From 0bf6cd5b9531bcc29c0a5e504b6ce2984c6fd8d8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 27 Sep 2015 21:01:51 +0200 Subject: [PATCH 187/240] blk-mq: factor out a helper to iterate all tags for a request_queue And replace the blk_mq_tag_busy_iter with it - the driver use has been replaced with a new helper a while ago, and internal to the block we only need the new version. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 27 ++++++++++++++++++++------- block/blk-mq-tag.h | 2 ++ block/blk-mq.c | 14 +++----------- include/linux/blk-mq.h | 2 -- 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 9115c6d59948..ed96474d75cb 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -471,17 +471,30 @@ void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, } EXPORT_SYMBOL(blk_mq_all_tag_busy_iter); -void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, +void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, void *priv) { - struct blk_mq_tags *tags = hctx->tags; + struct blk_mq_hw_ctx *hctx; + int i; + + + queue_for_each_hw_ctx(q, hctx, i) { + struct blk_mq_tags *tags = hctx->tags; + + /* + * If not software queues are currently mapped to this + * hardware queue, there's nothing to check + */ + if (!blk_mq_hw_queue_mapped(hctx)) + continue; + + if (tags->nr_reserved_tags) + bt_for_each(hctx, &tags->breserved_tags, 0, fn, priv, true); + bt_for_each(hctx, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv, + false); + } - if (tags->nr_reserved_tags) - bt_for_each(hctx, &tags->breserved_tags, 0, fn, priv, true); - bt_for_each(hctx, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv, - false); } -EXPORT_SYMBOL(blk_mq_tag_busy_iter); static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt) { diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 9eb2cf4f01cb..d468a79f2c4a 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -58,6 +58,8 @@ extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag); extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth); extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool); +void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, + void *priv); enum { BLK_MQ_TAG_CACHE_MIN = 1, diff --git a/block/blk-mq.c b/block/blk-mq.c index 2306330530e8..7785ae96267a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -641,24 +641,16 @@ static void blk_mq_rq_timer(unsigned long priv) .next = 0, .next_set = 0, }; - struct blk_mq_hw_ctx *hctx; int i; - queue_for_each_hw_ctx(q, hctx, i) { - /* - * If not software queues are currently mapped to this - * hardware queue, there's nothing to check - */ - if (!blk_mq_hw_queue_mapped(hctx)) - continue; - - blk_mq_tag_busy_iter(hctx, blk_mq_check_expired, &data); - } + blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data); if (data.next_set) { data.next = blk_rq_timeout(round_jiffies_up(data.next)); mod_timer(&q->timeout, data.next); } else { + struct blk_mq_hw_ctx *hctx; + queue_for_each_hw_ctx(q, hctx, i) { /* the hctx may be unmapped, so check it here */ if (blk_mq_hw_queue_mapped(hctx)) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c1b5c867ff07..5e7d43ab61c0 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -223,8 +223,6 @@ void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); -void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, - void *priv); void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, void *priv); void blk_mq_freeze_queue(struct request_queue *q); From 95c2b17534654829db428f11bcf4297c059a2a7e Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 26 Sep 2015 12:23:56 +0100 Subject: [PATCH 188/240] genirq: Fix race in register_irq_proc() Per-IRQ directories in procfs are created only when a handler is first added to the irqdesc, not when the irqdesc is created. In the case of a shared IRQ, multiple tasks can race to create a directory. This race condition seems to have been present forever, but is easier to hit with async probing. Signed-off-by: Ben Hutchings Link: http://lkml.kernel.org/r/1443266636.2004.2.camel@decadent.org.uk Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org --- kernel/irq/proc.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index e3a8c9577ba6..a50ddc9417ff 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "internals.h" @@ -323,18 +324,29 @@ void register_handler_proc(unsigned int irq, struct irqaction *action) void register_irq_proc(unsigned int irq, struct irq_desc *desc) { + static DEFINE_MUTEX(register_lock); char name [MAX_NAMELEN]; - if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip) || desc->dir) + if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip)) return; + /* + * irq directories are registered only when a handler is + * added, not when the descriptor is created, so multiple + * tasks might try to register at the same time. + */ + mutex_lock(®ister_lock); + + if (desc->dir) + goto out_unlock; + memset(name, 0, MAX_NAMELEN); sprintf(name, "%d", irq); /* create /proc/irq/1234 */ desc->dir = proc_mkdir(name, root_irq_dir); if (!desc->dir) - return; + goto out_unlock; #ifdef CONFIG_SMP /* create /proc/irq//smp_affinity */ @@ -355,6 +367,9 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) proc_create_data("spurious", 0444, desc->dir, &irq_spurious_proc_fops, (void *)(long)irq); + +out_unlock: + mutex_unlock(®ister_lock); } void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) From a5caa209ba9c29c6421292e7879d2387a2ef39c9 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 25 Sep 2015 23:02:18 +0100 Subject: [PATCH 189/240] x86/efi: Fix boot crash by mapping EFI memmap entries bottom-up at runtime, instead of top-down Beginning with UEFI v2.5 EFI_PROPERTIES_TABLE was introduced that signals that the firmware PE/COFF loader supports splitting code and data sections of PE/COFF images into separate EFI memory map entries. This allows the kernel to map those regions with strict memory protections, e.g. EFI_MEMORY_RO for code, EFI_MEMORY_XP for data, etc. Unfortunately, an unwritten requirement of this new feature is that the regions need to be mapped with the same offsets relative to each other as observed in the EFI memory map. If this is not done crashes like this may occur, BUG: unable to handle kernel paging request at fffffffefe6086dd IP: [] 0xfffffffefe6086dd Call Trace: [] efi_call+0x7e/0x100 [] ? virt_efi_set_variable+0x61/0x90 [] efi_delete_dummy_variable+0x63/0x70 [] efi_enter_virtual_mode+0x383/0x392 [] start_kernel+0x38a/0x417 [] x86_64_start_reservations+0x2a/0x2c [] x86_64_start_kernel+0xeb/0xef Here 0xfffffffefe6086dd refers to an address the firmware expects to be mapped but which the OS never claimed was mapped. The issue is that included in these regions are relative addresses to other regions which were emitted by the firmware toolchain before the "splitting" of sections occurred at runtime. Needless to say, we don't satisfy this unwritten requirement on x86_64 and instead map the EFI memory map entries in reverse order. The above crash is almost certainly triggerable with any kernel newer than v3.13 because that's when we rewrote the EFI runtime region mapping code, in commit d2f7cbe7b26a ("x86/efi: Runtime services virtual mapping"). For kernel versions before v3.13 things may work by pure luck depending on the fragmentation of the kernel virtual address space at the time we map the EFI regions. Instead of mapping the EFI memory map entries in reverse order, where entry N has a higher virtual address than entry N+1, map them in the same order as they appear in the EFI memory map to preserve this relative offset between regions. This patch has been kept as small as possible with the intention that it should be applied aggressively to stable and distribution kernels. It is very much a bugfix rather than support for a new feature, since when EFI_PROPERTIES_TABLE is enabled we must map things as outlined above to even boot - we have no way of asking the firmware not to split the code/data regions. In fact, this patch doesn't even make use of the more strict memory protections available in UEFI v2.5. That will come later. Suggested-by: Ard Biesheuvel Reported-by: Ard Biesheuvel Signed-off-by: Matt Fleming Cc: Cc: Borislav Petkov Cc: Chun-Yi Cc: Dave Young Cc: H. Peter Anvin Cc: James Bottomley Cc: Lee, Chun-Yi Cc: Leif Lindholm Cc: Linus Torvalds Cc: Matthew Garrett Cc: Mike Galbraith Cc: Peter Jones Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1443218539-7610-2-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 67 ++++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 1db84c0758b7..6a28ded74211 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -704,6 +704,70 @@ static void *realloc_pages(void *old_memmap, int old_shift) return ret; } +/* + * Iterate the EFI memory map in reverse order because the regions + * will be mapped top-down. The end result is the same as if we had + * mapped things forward, but doesn't require us to change the + * existing implementation of efi_map_region(). + */ +static inline void *efi_map_next_entry_reverse(void *entry) +{ + /* Initial call */ + if (!entry) + return memmap.map_end - memmap.desc_size; + + entry -= memmap.desc_size; + if (entry < memmap.map) + return NULL; + + return entry; +} + +/* + * efi_map_next_entry - Return the next EFI memory map descriptor + * @entry: Previous EFI memory map descriptor + * + * This is a helper function to iterate over the EFI memory map, which + * we do in different orders depending on the current configuration. + * + * To begin traversing the memory map @entry must be %NULL. + * + * Returns %NULL when we reach the end of the memory map. + */ +static void *efi_map_next_entry(void *entry) +{ + if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) { + /* + * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE + * config table feature requires us to map all entries + * in the same order as they appear in the EFI memory + * map. That is to say, entry N must have a lower + * virtual address than entry N+1. This is because the + * firmware toolchain leaves relative references in + * the code/data sections, which are split and become + * separate EFI memory regions. Mapping things + * out-of-order leads to the firmware accessing + * unmapped addresses. + * + * Since we need to map things this way whether or not + * the kernel actually makes use of + * EFI_PROPERTIES_TABLE, let's just switch to this + * scheme by default for 64-bit. + */ + return efi_map_next_entry_reverse(entry); + } + + /* Initial call */ + if (!entry) + return memmap.map; + + entry += memmap.desc_size; + if (entry >= memmap.map_end) + return NULL; + + return entry; +} + /* * Map the efi memory ranges of the runtime services and update new_mmap with * virtual addresses. @@ -714,7 +778,8 @@ static void * __init efi_map_regions(int *count, int *pg_shift) unsigned long left = 0; efi_memory_desc_t *md; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + p = NULL; + while ((p = efi_map_next_entry(p))) { md = p; if (!(md->attribute & EFI_MEMORY_RUNTIME)) { #ifdef CONFIG_X86_64 From 0ce3cc008ec04258b6a6314b09f1a6012810881a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 25 Sep 2015 23:02:19 +0100 Subject: [PATCH 190/240] arm64/efi: Fix boot crash by not padding between EFI_MEMORY_RUNTIME regions The new Properties Table feature introduced in UEFIv2.5 may split memory regions that cover PE/COFF memory images into separate code and data regions. Since these regions only differ in the type (runtime code vs runtime data) and the permission bits, but not in the memory type attributes (UC/WC/WT/WB), the spec does not require them to be aligned to 64 KB. Since the relative offset of PE/COFF .text and .data segments cannot be changed on the fly, this means that we can no longer pad out those regions to be mappable using 64 KB pages. Unfortunately, there is no annotation in the UEFI memory map that identifies data regions that were split off from a code region, so we must apply this logic to all adjacent runtime regions whose attributes only differ in the permission bits. So instead of rounding each memory region to 64 KB alignment at both ends, only round down regions that are not directly preceded by another runtime region with the same type attributes. Since the UEFI spec does not mandate that the memory map be sorted, this means we also need to sort it first. Note that this change will result in all EFI_MEMORY_RUNTIME regions whose start addresses are not aligned to the OS page size to be mapped with executable permissions (i.e., on kernels compiled with 64 KB pages). However, since these mappings are only active during the time that UEFI Runtime Services are being invoked, the window for abuse is rather small. Tested-by: Mark Salter Tested-by: Mark Rutland [UEFI 2.4 only] Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Reviewed-by: Mark Salter Reviewed-by: Mark Rutland Cc: # v4.0+ Cc: Catalin Marinas Cc: Leif Lindholm Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1443218539-7610-3-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- arch/arm64/kernel/efi.c | 3 +- drivers/firmware/efi/libstub/arm-stub.c | 86 +++++++++++++++++++++---- 2 files changed, 74 insertions(+), 15 deletions(-) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index e8ca6eaedd02..13671a9cf016 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -258,7 +258,8 @@ static bool __init efi_virtmap_init(void) */ if (!is_normal_ram(md)) prot = __pgprot(PROT_DEVICE_nGnRE); - else if (md->type == EFI_RUNTIME_SERVICES_CODE) + else if (md->type == EFI_RUNTIME_SERVICES_CODE || + !PAGE_ALIGNED(md->phys_addr)) prot = PAGE_KERNEL_EXEC; else prot = PAGE_KERNEL; diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index e29560e6b40b..950c87f5d279 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -13,6 +13,7 @@ */ #include +#include #include #include "efistub.h" @@ -305,6 +306,44 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, */ #define EFI_RT_VIRTUAL_BASE 0x40000000 +static int cmp_mem_desc(const void *l, const void *r) +{ + const efi_memory_desc_t *left = l, *right = r; + + return (left->phys_addr > right->phys_addr) ? 1 : -1; +} + +/* + * Returns whether region @left ends exactly where region @right starts, + * or false if either argument is NULL. + */ +static bool regions_are_adjacent(efi_memory_desc_t *left, + efi_memory_desc_t *right) +{ + u64 left_end; + + if (left == NULL || right == NULL) + return false; + + left_end = left->phys_addr + left->num_pages * EFI_PAGE_SIZE; + + return left_end == right->phys_addr; +} + +/* + * Returns whether region @left and region @right have compatible memory type + * mapping attributes, and are both EFI_MEMORY_RUNTIME regions. + */ +static bool regions_have_compatible_memory_type_attrs(efi_memory_desc_t *left, + efi_memory_desc_t *right) +{ + static const u64 mem_type_mask = EFI_MEMORY_WB | EFI_MEMORY_WT | + EFI_MEMORY_WC | EFI_MEMORY_UC | + EFI_MEMORY_RUNTIME; + + return ((left->attribute ^ right->attribute) & mem_type_mask) == 0; +} + /* * efi_get_virtmap() - create a virtual mapping for the EFI memory map * @@ -317,33 +356,52 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, int *count) { u64 efi_virt_base = EFI_RT_VIRTUAL_BASE; - efi_memory_desc_t *out = runtime_map; + efi_memory_desc_t *in, *prev = NULL, *out = runtime_map; int l; - for (l = 0; l < map_size; l += desc_size) { - efi_memory_desc_t *in = (void *)memory_map + l; + /* + * To work around potential issues with the Properties Table feature + * introduced in UEFI 2.5, which may split PE/COFF executable images + * in memory into several RuntimeServicesCode and RuntimeServicesData + * regions, we need to preserve the relative offsets between adjacent + * EFI_MEMORY_RUNTIME regions with the same memory type attributes. + * The easiest way to find adjacent regions is to sort the memory map + * before traversing it. + */ + sort(memory_map, map_size / desc_size, desc_size, cmp_mem_desc, NULL); + + for (l = 0; l < map_size; l += desc_size, prev = in) { u64 paddr, size; + in = (void *)memory_map + l; if (!(in->attribute & EFI_MEMORY_RUNTIME)) continue; + paddr = in->phys_addr; + size = in->num_pages * EFI_PAGE_SIZE; + /* * Make the mapping compatible with 64k pages: this allows * a 4k page size kernel to kexec a 64k page size kernel and * vice versa. */ - paddr = round_down(in->phys_addr, SZ_64K); - size = round_up(in->num_pages * EFI_PAGE_SIZE + - in->phys_addr - paddr, SZ_64K); + if (!regions_are_adjacent(prev, in) || + !regions_have_compatible_memory_type_attrs(prev, in)) { - /* - * Avoid wasting memory on PTEs by choosing a virtual base that - * is compatible with section mappings if this region has the - * appropriate size and physical alignment. (Sections are 2 MB - * on 4k granule kernels) - */ - if (IS_ALIGNED(in->phys_addr, SZ_2M) && size >= SZ_2M) - efi_virt_base = round_up(efi_virt_base, SZ_2M); + paddr = round_down(in->phys_addr, SZ_64K); + size += in->phys_addr - paddr; + + /* + * Avoid wasting memory on PTEs by choosing a virtual + * base that is compatible with section mappings if this + * region has the appropriate size and physical + * alignment. (Sections are 2 MB on 4k granule kernels) + */ + if (IS_ALIGNED(in->phys_addr, SZ_2M) && size >= SZ_2M) + efi_virt_base = round_up(efi_virt_base, SZ_2M); + else + efi_virt_base = round_up(efi_virt_base, SZ_64K); + } in->virt_addr = efi_virt_base + in->phys_addr - paddr; efi_virt_base += size; From 606decd67049217684e3cb5a54104d51ddd4ef35 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 1 Oct 2015 13:12:47 +0200 Subject: [PATCH 191/240] Revert "KVM: x86: apply guest MTRR virtualization on host reserved pages" This reverts commit fd717f11015f673487ffc826e59b2bad69d20fe5. It was reported to cause Machine Check Exceptions (bug 104091). Reported-by: harn-solo@gmx.de Cc: stable@vger.kernel.org # 4.2+ Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 7 +++++-- arch/x86/kvm/vmx.c | 11 ++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 94b7d15db3fc..9e88078f4146 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1167,11 +1167,14 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) u8 mtrr; /* - * 1. MMIO: trust guest MTRR, so same as item 3. + * 1. MMIO: always map as UC * 2. No passthrough: always map as WB, and force guest PAT to WB as well * 3. Passthrough: can't guarantee the result, try to trust guest. */ - if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm)) + if (is_mmio) + return _PAGE_NOCACHE; + + if (!kvm_arch_has_assigned_device(vcpu->kvm)) return 0; if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED) && diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 64076740251e..06ef4908ba61 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8617,17 +8617,22 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) u64 ipat = 0; /* For VT-d and EPT combination - * 1. MMIO: guest may want to apply WC, trust it. + * 1. MMIO: always map as UC * 2. EPT with VT-d: * a. VT-d without snooping control feature: can't guarantee the - * result, try to trust guest. So the same as item 1. + * result, try to trust guest. * b. VT-d with snooping control feature: snooping control feature of * VT-d engine can guarantee the cache correctness. Just set it * to WB to keep consistent with host. So the same as item 3. * 3. EPT without VT-d: always map as WB and set IPAT=1 to keep * consistent with host MTRR */ - if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) { + if (is_mmio) { + cache = MTRR_TYPE_UNCACHABLE; + goto exit; + } + + if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) { ipat = VMX_EPT_IPAT_BIT; cache = MTRR_TYPE_WRBACK; goto exit; From 625422f60c55bbc368b8568ff925770b36bfc189 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 1 Oct 2015 13:28:15 +0200 Subject: [PATCH 192/240] Revert "KVM: SVM: Sync g_pat with guest-written PAT value" This reverts commit e098223b789b4a618dacd79e5e0dad4a9d5018d1, which has a dependency on other commits being reverted. Cc: stable@vger.kernel.org # 4.2+ Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9e88078f4146..22601206ffd7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3354,16 +3354,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; - case MSR_IA32_CR_PAT: - if (npt_enabled) { - if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) - return 1; - vcpu->arch.pat = data; - svm_set_guest_pat(svm, &svm->vmcb->save.g_pat); - mark_dirty(svm->vmcb, VMCB_NPT); - break; - } - /* fall through */ default: return kvm_set_msr_common(vcpu, msr); } From bcf166a9942c3aabd8b752a7c38a49f57c54cfb8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 1 Oct 2015 13:19:55 +0200 Subject: [PATCH 193/240] Revert "KVM: svm: handle KVM_X86_QUIRK_CD_NW_CLEARED in svm_get_mt_mask" This reverts commit 5492830370171b6a4ede8a3bfba687a8d0f25fa5. It builds on the commit that is being reverted next. Cc: stable@vger.kernel.org # 4.2+ Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 22601206ffd7..7c242b7007ac 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1177,10 +1177,6 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) if (!kvm_arch_has_assigned_device(vcpu->kvm)) return 0; - if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED) && - kvm_read_cr0(vcpu) & X86_CR0_CD) - return _PAGE_NOCACHE; - mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn); return mtrr2protval[mtrr]; } @@ -1676,10 +1672,13 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if (!vcpu->fpu_active) cr0 |= X86_CR0_TS; - - /* These are emulated via page tables. */ - cr0 &= ~(X86_CR0_CD | X86_CR0_NW); - + /* + * re-enable caching here because the QEMU bios + * does not do it - this results in some delay at + * reboot + */ + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) + cr0 &= ~(X86_CR0_CD | X86_CR0_NW); svm->vmcb->save.cr0 = cr0; mark_dirty(svm->vmcb, VMCB_CR); update_cr0_intercept(svm); From fc07e76ac7ffa3afd621a1c3858a503386a14281 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 1 Oct 2015 13:20:22 +0200 Subject: [PATCH 194/240] Revert "KVM: SVM: use NPT page attributes" This reverts commit 3c2e7f7de3240216042b61073803b61b9b3cfb22. Initializing the mapping from MTRR to PAT values was reported to fail nondeterministically, and it also caused extremely slow boot (due to caching getting disabled---bug 103321) with assigned devices. Reported-by: Markus Trippelsdorf Reported-by: Sebastian Schuette Cc: stable@vger.kernel.org # 4.2+ Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 101 +++------------------------------------------ 1 file changed, 5 insertions(+), 96 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7c242b7007ac..99b3c5f40bf7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -866,64 +866,6 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); } -#define MTRR_TYPE_UC_MINUS 7 -#define MTRR2PROTVAL_INVALID 0xff - -static u8 mtrr2protval[8]; - -static u8 fallback_mtrr_type(int mtrr) -{ - /* - * WT and WP aren't always available in the host PAT. Treat - * them as UC and UC- respectively. Everything else should be - * there. - */ - switch (mtrr) - { - case MTRR_TYPE_WRTHROUGH: - return MTRR_TYPE_UNCACHABLE; - case MTRR_TYPE_WRPROT: - return MTRR_TYPE_UC_MINUS; - default: - BUG(); - } -} - -static void build_mtrr2protval(void) -{ - int i; - u64 pat; - - for (i = 0; i < 8; i++) - mtrr2protval[i] = MTRR2PROTVAL_INVALID; - - /* Ignore the invalid MTRR types. */ - mtrr2protval[2] = 0; - mtrr2protval[3] = 0; - - /* - * Use host PAT value to figure out the mapping from guest MTRR - * values to nested page table PAT/PCD/PWT values. We do not - * want to change the host PAT value every time we enter the - * guest. - */ - rdmsrl(MSR_IA32_CR_PAT, pat); - for (i = 0; i < 8; i++) { - u8 mtrr = pat >> (8 * i); - - if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID) - mtrr2protval[mtrr] = __cm_idx2pte(i); - } - - for (i = 0; i < 8; i++) { - if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) { - u8 fallback = fallback_mtrr_type(i); - mtrr2protval[i] = mtrr2protval[fallback]; - BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID); - } - } -} - static __init int svm_hardware_setup(void) { int cpu; @@ -990,7 +932,6 @@ static __init int svm_hardware_setup(void) } else kvm_disable_tdp(); - build_mtrr2protval(); return 0; err: @@ -1145,42 +1086,6 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) return target_tsc - tsc; } -static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat) -{ - struct kvm_vcpu *vcpu = &svm->vcpu; - - /* Unlike Intel, AMD takes the guest's CR0.CD into account. - * - * AMD does not have IPAT. To emulate it for the case of guests - * with no assigned devices, just set everything to WB. If guests - * have assigned devices, however, we cannot force WB for RAM - * pages only, so use the guest PAT directly. - */ - if (!kvm_arch_has_assigned_device(vcpu->kvm)) - *g_pat = 0x0606060606060606; - else - *g_pat = vcpu->arch.pat; -} - -static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) -{ - u8 mtrr; - - /* - * 1. MMIO: always map as UC - * 2. No passthrough: always map as WB, and force guest PAT to WB as well - * 3. Passthrough: can't guarantee the result, try to trust guest. - */ - if (is_mmio) - return _PAGE_NOCACHE; - - if (!kvm_arch_has_assigned_device(vcpu->kvm)) - return 0; - - mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn); - return mtrr2protval[mtrr]; -} - static void init_vmcb(struct vcpu_svm *svm, bool init_event) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -1277,7 +1182,6 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) clr_cr_intercept(svm, INTERCEPT_CR3_READ); clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); save->g_pat = svm->vcpu.arch.pat; - svm_set_guest_pat(svm, &save->g_pat); save->cr3 = 0; save->cr4 = 0; } @@ -4187,6 +4091,11 @@ static bool svm_has_high_real_mode_segbase(void) return true; } +static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) +{ + return 0; +} + static void svm_cpuid_update(struct kvm_vcpu *vcpu) { } From 038161dea1eaeee80341134e2675b24656a71b59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dirk=20M=C3=BCller?= Date: Thu, 1 Oct 2015 13:46:01 +0200 Subject: [PATCH 195/240] Update KVM homepage Url The old one appears to be a generic catch all page, which is unhelpful. Signed-off-by: Dirk Mueller Signed-off-by: Paolo Bonzini --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9f6685f6c5a9..797236befd27 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5957,7 +5957,7 @@ F: virt/kvm/ KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V M: Joerg Roedel L: kvm@vger.kernel.org -W: http://kvm.qumranet.com +W: http://www.linux-kvm.org/ S: Maintained F: arch/x86/include/asm/svm.h F: arch/x86/kvm/svm.c @@ -5965,7 +5965,7 @@ F: arch/x86/kvm/svm.c KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC M: Alexander Graf L: kvm-ppc@vger.kernel.org -W: http://kvm.qumranet.com +W: http://www.linux-kvm.org/ T: git git://github.com/agraf/linux-2.6.git S: Supported F: arch/powerpc/include/asm/kvm* From d2922422c48df93f3edff7d872ee4f3191fefb08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dirk=20M=C3=BCller?= Date: Thu, 1 Oct 2015 13:43:42 +0200 Subject: [PATCH 196/240] Use WARN_ON_ONCE for missing X86_FEATURE_NRIPS The cpu feature flags are not ever going to change, so warning everytime can cause a lot of kernel log spam (in our case more than 10GB/hour). The warning seems to only occur when nested virtualization is enabled, so it's probably triggered by a KVM bug. This is a sensible and safe change anyway, and the KVM bug fix might not be suitable for stable releases anyway. Cc: stable@vger.kernel.org Signed-off-by: Dirk Mueller Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 99b3c5f40bf7..2f9ed1ff0632 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -514,7 +514,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (svm->vmcb->control.next_rip != 0) { - WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS)); + WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; } From 1e16a8f11669c98a0adf5fb5f8522aebc1f69f71 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 1 Oct 2015 15:45:44 +0200 Subject: [PATCH 197/240] MIPS: BPF: Do all exports of symbols with FEXPORT(). FEXPORT also marks the symbol as code using .type symbol, @function. Without objdump -d will output only a hexdump for code following the affected symbols. Signed-off-by: Ralf Baechle --- arch/mips/net/bpf_jit_asm.S | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S index dabf4179cd7e..eabb5e0889ee 100644 --- a/arch/mips/net/bpf_jit_asm.S +++ b/arch/mips/net/bpf_jit_asm.S @@ -57,8 +57,7 @@ LEAF(sk_load_word) is_offset_negative(word) - .globl sk_load_word_positive -sk_load_word_positive: +FEXPORT(sk_load_word_positive) is_offset_in_header(4, word) /* Offset within header boundaries */ PTR_ADDU t1, $r_skb_data, offset @@ -85,8 +84,7 @@ sk_load_word_positive: LEAF(sk_load_half) is_offset_negative(half) - .globl sk_load_half_positive -sk_load_half_positive: +FEXPORT(sk_load_half_positive) is_offset_in_header(2, half) /* Offset within header boundaries */ PTR_ADDU t1, $r_skb_data, offset @@ -109,8 +107,7 @@ sk_load_half_positive: LEAF(sk_load_byte) is_offset_negative(byte) - .globl sk_load_byte_positive -sk_load_byte_positive: +FEXPORT(sk_load_byte_positive) is_offset_in_header(1, byte) /* Offset within header boundaries */ PTR_ADDU t1, $r_skb_data, offset From 1a541b4e3cd6f5795022514114854b3e1345f24e Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 1 Oct 2015 13:06:07 +0100 Subject: [PATCH 198/240] arm64: Fix THP protection change logic 6910fa1 ("arm64: enable PTE type bit in the mask for pte_modify") fixes a problem whereby a large block of PROT_NONE mapped memory is incorrectly mapped as block descriptors when mprotect is called. Unfortunately, a subtle bug was introduced by this fix to the THP logic. If one mmaps a large block of memory, then faults it such that it is collapsed into THPs; resulting calls to mprotect on this area of memory will lead to incorrect table descriptors being written instead of block descriptors. This is because pmd_modify calls pte_modify which is now allowed to modify the type of the page table entry. This patch reverts commit 6910fa16dbe142f6a0fd0fd7c249f9883ff7fc8a, and fixes the problem it was trying to address by adjusting PAGE_NONE to represent a table entry. Thus no change in pte type is required when moving from PROT_NONE to a different protection. Fixes: 6910fa16dbe1 ("arm64: enable PTE type bit in the mask for pte_modify") Cc: # 4.0+ Cc: Feng Kan Reported-by: Ganapatrao Kulkarni Tested-by: Ganapatrao Kulkarni Reviewed-by: Catalin Marinas Signed-off-by: Steve Capper Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b0329be95cb1..26b066690593 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -79,7 +79,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) #define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) -#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) @@ -496,7 +496,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK; + PTE_PROT_NONE | PTE_VALID | PTE_WRITE; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) pte = pte_mkdirty(pte); From a523841ee4e506fa1f05ff3a85b1e6d8176a3d4d Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 1 Oct 2015 15:36:48 -0700 Subject: [PATCH 199/240] arch/x86/include/asm/efi.h: fix build failure With KMEMCHECK=y, KASAN=n: arch/x86/platform/efi/efi.c:673:3: error: implicit declaration of function `memcpy' [-Werror=implicit-function-declaration] arch/x86/platform/efi/efi_64.c:139:2: error: implicit declaration of function `memcpy' [-Werror=implicit-function-declaration] arch/x86/include/asm/desc.h:121:2: error: implicit declaration of function `memcpy' [-Werror=implicit-function-declaration] Don't #undef memcpy if KASAN=n. Fixes: 769a8089c1fd ("x86, efi, kasan: #undef memset/memcpy/memmove per arch") Signed-off-by: Andrey Ryabinin Reported-by: Ingo Molnar Reported-by: Sedat Dilek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/efi.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index ab5f1d447ef9..ae68be92f755 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -86,6 +86,7 @@ extern u64 asmlinkage efi_call(void *fp, ...); extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, u32 type, u64 attribute); +#ifdef CONFIG_KASAN /* * CONFIG_KASAN may redefine memset to __memset. __memset function is present * only in kernel binary. Since the EFI stub linked into a separate binary it @@ -95,6 +96,7 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, #undef memcpy #undef memset #undef memmove +#endif #endif /* CONFIG_X86_32 */ From 9ff42d10c3b3e26d9555878f31b9a2e5c24efa57 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 1 Oct 2015 15:36:51 -0700 Subject: [PATCH 200/240] userfaultfd: remove kernel header include from uapi header As include/uapi/linux/userfaultfd.h is a user visible header file, it should not include kernel-exclusive header files. So trying to build the userfaultfd test program from the selftests directory fails, since it contains a reference to linux/compiler.h. As it turns out, that header is not really needed there, so we can simply remove it to fix that issue. Signed-off-by: Andre Przywara Cc: Andrea Arcangeli Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/userfaultfd.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index df0e09bb7dd5..9057d7af3ae1 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -11,8 +11,6 @@ #include -#include - #define UFFD_API ((__u64)0xAA) /* * After implementing the respective features it will become: From 03a2d2a3eafe4015412cf4e9675ca0e2d9204074 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Thu, 1 Oct 2015 15:36:54 -0700 Subject: [PATCH 201/240] mm/slab: fix unexpected index mapping result of kmalloc_size(INDEX_NODE+1) Commit description is copied from the original post of this bug: http://comments.gmane.org/gmane.linux.kernel.mm/135349 Kernels after v3.9 use kmalloc_size(INDEX_NODE + 1) to get the next larger cache size than the size index INDEX_NODE mapping. In kernels 3.9 and earlier we used malloc_sizes[INDEX_L3 + 1].cs_size. However, sometimes we can't get the right output we expected via kmalloc_size(INDEX_NODE + 1), causing a BUG(). The mapping table in the latest kernel is like: index = {0, 1, 2 , 3, 4, 5, 6, n} size = {0, 96, 192, 8, 16, 32, 64, 2^n} The mapping table before 3.10 is like this: index = {0 , 1 , 2, 3, 4 , 5 , 6, n} size = {32, 64, 96, 128, 192, 256, 512, 2^(n+3)} The problem on my mips64 machine is as follows: (1) When configured DEBUG_SLAB && DEBUG_PAGEALLOC && DEBUG_LOCK_ALLOC && DEBUG_SPINLOCK, the sizeof(struct kmem_cache_node) will be "150", and the macro INDEX_NODE turns out to be "2": #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node)) (2) Then the result of kmalloc_size(INDEX_NODE + 1) is 8. (3) Then "if(size >= kmalloc_size(INDEX_NODE + 1)" will lead to "size = PAGE_SIZE". (4) Then "if ((size >= (PAGE_SIZE >> 3))" test will be satisfied and "flags |= CFLGS_OFF_SLAB" will be covered. (5) if (flags & CFLGS_OFF_SLAB)" test will be satisfied and will go to "cachep->slabp_cache = kmalloc_slab(slab_size, 0u)", and the result here may be NULL while kernel bootup. (6) Finally,"BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));" causes the BUG info as the following shows (may be only mips64 has this problem): This patch fixes the problem of kmalloc_size(INDEX_NODE + 1) and removes the BUG by adding 'size >= 256' check to guarantee that all necessary small sized slabs are initialized regardless sequence of slab size in mapping table. Fixes: e33660165c90 ("slab: Use common kmalloc_index/kmalloc_size...") Signed-off-by: Joonsoo Kim Reported-by: Liuhailong Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index c77ebe6cc87c..4fcc5dd8d5a6 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2190,9 +2190,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) size += BYTES_PER_WORD; } #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) - if (size >= kmalloc_size(INDEX_NODE + 1) - && cachep->object_size > cache_line_size() - && ALIGN(size, cachep->align) < PAGE_SIZE) { + /* + * To activate debug pagealloc, off-slab management is necessary + * requirement. In early phase of initialization, small sized slab + * doesn't get initialized so it would not be possible. So, we need + * to check size >= 256. It guarantees that all necessary small + * sized slab is initialized in current slab initialization sequence. + */ + if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) && + size >= 256 && cachep->object_size > cache_line_size() && + ALIGN(size, cachep->align) < PAGE_SIZE) { cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align); size = PAGE_SIZE; } From 2f84a8990ebbe235c59716896e017c6b2ca1200f Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 1 Oct 2015 15:36:57 -0700 Subject: [PATCH 202/240] mm: hugetlbfs: skip shared VMAs when unmapping private pages to satisfy a fault SunDong reported the following on https://bugzilla.kernel.org/show_bug.cgi?id=103841 I think I find a linux bug, I have the test cases is constructed. I can stable recurring problems in fedora22(4.0.4) kernel version, arch for x86_64. I construct transparent huge page, when the parent and child process with MAP_SHARE, MAP_PRIVATE way to access the same huge page area, it has the opportunity to lead to huge page copy on write failure, and then it will munmap the child corresponding mmap area, but then the child mmap area with VM_MAYSHARE attributes, child process munmap this area can trigger VM_BUG_ON in set_vma_resv_flags functions (vma - > vm_flags & VM_MAYSHARE). There were a number of problems with the report (e.g. it's hugetlbfs that triggers this, not transparent huge pages) but it was fundamentally correct in that a VM_BUG_ON in set_vma_resv_flags() can be triggered that looks like this vma ffff8804651fd0d0 start 00007fc474e00000 end 00007fc475e00000 next ffff8804651fd018 prev ffff8804651fd188 mm ffff88046b1b1800 prot 8000000000000027 anon_vma (null) vm_ops ffffffff8182a7a0 pgoff 0 file ffff88106bdb9800 private_data (null) flags: 0x84400fb(read|write|shared|mayread|maywrite|mayexec|mayshare|dontexpand|hugetlb) ------------ kernel BUG at mm/hugetlb.c:462! SMP Modules linked in: xt_pkttype xt_LOG xt_limit [..] CPU: 38 PID: 26839 Comm: map Not tainted 4.0.4-default #1 Hardware name: Dell Inc. PowerEdge R810/0TT6JF, BIOS 2.7.4 04/26/2012 set_vma_resv_flags+0x2d/0x30 The VM_BUG_ON is correct because private and shared mappings have different reservation accounting but the warning clearly shows that the VMA is shared. When a private COW fails to allocate a new page then only the process that created the VMA gets the page -- all the children unmap the page. If the children access that data in the future then they get killed. The problem is that the same file is mapped shared and private. During the COW, the allocation fails, the VMAs are traversed to unmap the other private pages but a shared VMA is found and the bug is triggered. This patch identifies such VMAs and skips them. Signed-off-by: Mel Gorman Reported-by: SunDong Reviewed-by: Michal Hocko Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Naoya Horiguchi Cc: David Rientjes Reviewed-by: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 999fb0aef8f1..9cc773483624 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3201,6 +3201,14 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, if (iter_vma == vma) continue; + /* + * Shared VMAs have their own reserves and do not affect + * MAP_PRIVATE accounting but it is possible that a shared + * VMA is using the same page so check and skip such VMAs. + */ + if (iter_vma->vm_flags & VM_MAYSHARE) + continue; + /* * Unmap the page from other VMAs without their own reserves. * They get marked to be SIGKILLed if they fault in these From 8346c416d17bf5b4ea1508662959bb62e73fd6a5 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Thu, 1 Oct 2015 15:36:59 -0700 Subject: [PATCH 203/240] dax: fix NULL pointer in __dax_pmd_fault() Commit 46c043ede471 ("mm: take i_mmap_lock in unmap_mapping_range() for DAX") moved some code in __dax_pmd_fault() that was responsible for zeroing newly allocated PMD pages. The new location didn't properly set up 'kaddr', so when run this code resulted in a NULL pointer BUG. Fix this by getting the correct 'kaddr' via bdev_direct_access(). Signed-off-by: Ross Zwisler Reported-by: Dan Williams Reviewed-by: Dan Williams Cc: Alexander Viro Cc: Matthew Wilcox Cc: "Kirill A. Shutemov" Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/dax.c b/fs/dax.c index 7ae6df7ea1d2..bcfb14bfc1e4 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -569,8 +569,20 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) goto fallback; + sector = bh.b_blocknr << (blkbits - 9); + if (buffer_unwritten(&bh) || buffer_new(&bh)) { int i; + + length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn, + bh.b_size); + if (length < 0) { + result = VM_FAULT_SIGBUS; + goto out; + } + if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR)) + goto fallback; + for (i = 0; i < PTRS_PER_PMD; i++) clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE); wmb_pmem(); @@ -623,7 +635,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, result = VM_FAULT_NOPAGE; spin_unlock(ptl); } else { - sector = bh.b_blocknr << (blkbits - 9); length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn, bh.b_size); if (length < 0) { From 0610c25daa3e76e38ad5a8fae683a89ff9f71798 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Thu, 1 Oct 2015 15:37:02 -0700 Subject: [PATCH 204/240] memcg: fix dirty page migration The problem starts with a file backed dirty page which is charged to a memcg. Then page migration is used to move oldpage to newpage. Migration: - copies the oldpage's data to newpage - clears oldpage.PG_dirty - sets newpage.PG_dirty - uncharges oldpage from memcg - charges newpage to memcg Clearing oldpage.PG_dirty decrements the charged memcg's dirty page count. However, because newpage is not yet charged, setting newpage.PG_dirty does not increment the memcg's dirty page count. After migration completes newpage.PG_dirty is eventually cleared, often in account_page_cleaned(). At this time newpage is charged to a memcg so the memcg's dirty page count is decremented which causes underflow because the count was not previously incremented by migration. This underflow causes balance_dirty_pages() to see a very large unsigned number of dirty memcg pages which leads to aggressive throttling of buffered writes by processes in non root memcg. This issue: - can harm performance of non root memcg buffered writes. - can report too small (even negative) values in memory.stat[(total_)dirty] counters of all memcg, including the root. To avoid polluting migrate.c with #ifdef CONFIG_MEMCG checks, introduce page_memcg() and set_page_memcg() helpers. Test: 0) setup and enter limited memcg mkdir /sys/fs/cgroup/test echo 1G > /sys/fs/cgroup/test/memory.limit_in_bytes echo $$ > /sys/fs/cgroup/test/cgroup.procs 1) buffered writes baseline dd if=/dev/zero of=/data/tmp/foo bs=1M count=1k sync grep ^dirty /sys/fs/cgroup/test/memory.stat 2) buffered writes with compaction antagonist to induce migration yes 1 > /proc/sys/vm/compact_memory & rm -rf /data/tmp/foo dd if=/dev/zero of=/data/tmp/foo bs=1M count=1k kill % sync grep ^dirty /sys/fs/cgroup/test/memory.stat 3) buffered writes without antagonist, should match baseline rm -rf /data/tmp/foo dd if=/dev/zero of=/data/tmp/foo bs=1M count=1k sync grep ^dirty /sys/fs/cgroup/test/memory.stat (speed, dirty residue) unpatched patched 1) 841 MB/s 0 dirty pages 886 MB/s 0 dirty pages 2) 611 MB/s -33427456 dirty pages 793 MB/s 0 dirty pages 3) 114 MB/s -33427456 dirty pages 891 MB/s 0 dirty pages Notice that unpatched baseline performance (1) fell after migration (3): 841 -> 114 MB/s. In the patched kernel, post migration performance matches baseline. Fixes: c4843a7593a9 ("memcg: add per cgroup dirty page accounting") Signed-off-by: Greg Thelen Reported-by: Dave Hansen Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: [4.2+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 21 +++++++++++++++++++++ mm/migrate.c | 12 +++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 91c08f6f0dc9..80001de019ba 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -905,6 +905,27 @@ static inline void set_page_links(struct page *page, enum zone_type zone, #endif } +#ifdef CONFIG_MEMCG +static inline struct mem_cgroup *page_memcg(struct page *page) +{ + return page->mem_cgroup; +} + +static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg) +{ + page->mem_cgroup = memcg; +} +#else +static inline struct mem_cgroup *page_memcg(struct page *page) +{ + return NULL; +} + +static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg) +{ +} +#endif + /* * Some inline functions in vmstat.h depend on page_zone() */ diff --git a/mm/migrate.c b/mm/migrate.c index 7452a00bbb50..842ecd7aaf7f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -740,6 +740,15 @@ static int move_to_new_page(struct page *newpage, struct page *page, if (PageSwapBacked(page)) SetPageSwapBacked(newpage); + /* + * Indirectly called below, migrate_page_copy() copies PG_dirty and thus + * needs newpage's memcg set to transfer memcg dirty page accounting. + * So perform memcg migration in two steps: + * 1. set newpage->mem_cgroup (here) + * 2. clear page->mem_cgroup (below) + */ + set_page_memcg(newpage, page_memcg(page)); + mapping = page_mapping(page); if (!mapping) rc = migrate_page(mapping, newpage, page, mode); @@ -756,9 +765,10 @@ static int move_to_new_page(struct page *newpage, struct page *page, rc = fallback_migrate_page(mapping, newpage, page, mode); if (rc != MIGRATEPAGE_SUCCESS) { + set_page_memcg(newpage, NULL); newpage->mapping = NULL; } else { - mem_cgroup_migrate(page, newpage, false); + set_page_memcg(page, NULL); if (page_was_mapped) remove_migration_ptes(page, newpage); page->mapping = NULL; From 484ebb3b8c8b27dd2171696462a3116edb9ff801 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Thu, 1 Oct 2015 15:37:05 -0700 Subject: [PATCH 205/240] memcg: make mem_cgroup_read_stat() unsigned mem_cgroup_read_stat() returns a page count by summing per cpu page counters. The summing is racy wrt. updates, so a transient negative sum is possible. Callers don't want negative values: - mem_cgroup_wb_stats() doesn't want negative nr_dirty or nr_writeback. This could confuse dirty throttling. - oom reports and memory.stat shouldn't show confusing negative usage. - tree_usage() already avoids negatives. Avoid returning negative page counts from mem_cgroup_read_stat() and convert it to unsigned. [akpm@linux-foundation.org: fix old typo while we're in there] Signed-off-by: Greg Thelen Cc: Johannes Weiner Acked-by: Michal Hocko Cc: [4.2+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6ddaeba34e09..03cc0a742ff1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -644,12 +644,14 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) } /* + * Return page count for single (non recursive) @memcg. + * * Implementation Note: reading percpu statistics for memcg. * * Both of vmstat[] and percpu_counter has threshold and do periodic * synchronization to implement "quick" read. There are trade-off between * reading cost and precision of value. Then, we may have a chance to implement - * a periodic synchronizion of counter in memcg's counter. + * a periodic synchronization of counter in memcg's counter. * * But this _read() function is used for user interface now. The user accounts * memory usage by memory cgroup and he _always_ requires exact value because @@ -659,17 +661,24 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) * * If there are kernel internal actions which can make use of some not-exact * value, and reading all cpu value can be performance bottleneck in some - * common workload, threashold and synchonization as vmstat[] should be + * common workload, threshold and synchronization as vmstat[] should be * implemented. */ -static long mem_cgroup_read_stat(struct mem_cgroup *memcg, - enum mem_cgroup_stat_index idx) +static unsigned long +mem_cgroup_read_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { long val = 0; int cpu; + /* Per-cpu values can be negative, use a signed accumulator */ for_each_possible_cpu(cpu) val += per_cpu(memcg->stat->count[idx], cpu); + /* + * Summing races with updates, so val may be negative. Avoid exposing + * transient negative values. + */ + if (val < 0) + val = 0; return val; } @@ -1254,7 +1263,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) continue; - pr_cont(" %s:%ldKB", mem_cgroup_stat_names[i], + pr_cont(" %s:%luKB", mem_cgroup_stat_names[i], K(mem_cgroup_read_stat(iter, i))); } @@ -2819,14 +2828,11 @@ static unsigned long tree_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) { struct mem_cgroup *iter; - long val = 0; + unsigned long val = 0; - /* Per-cpu values can be negative, use a signed accumulator */ for_each_mem_cgroup_tree(iter, memcg) val += mem_cgroup_read_stat(iter, idx); - if (val < 0) /* race ? */ - val = 0; return val; } @@ -3169,7 +3175,7 @@ static int memcg_stat_show(struct seq_file *m, void *v) for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) continue; - seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i], + seq_printf(m, "%s %lu\n", mem_cgroup_stat_names[i], mem_cgroup_read_stat(memcg, i) * PAGE_SIZE); } @@ -3194,13 +3200,13 @@ static int memcg_stat_show(struct seq_file *m, void *v) (u64)memsw * PAGE_SIZE); for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { - long long val = 0; + unsigned long long val = 0; if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) continue; for_each_mem_cgroup_tree(mi, memcg) val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE; - seq_printf(m, "total_%s %lld\n", mem_cgroup_stat_names[i], val); + seq_printf(m, "total_%s %llu\n", mem_cgroup_stat_names[i], val); } for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) { From 09a59a9d57a9d6f49510c93304d6e105deb83b93 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 1 Oct 2015 15:37:08 -0700 Subject: [PATCH 206/240] drivers/input/joystick/Kconfig: zhenhua.c needs BITREVERSE It uses bitrev8(), so it must ensure that lib/bitrev.o gets included in vmlinux. Cc: Fengguang Wu Cc: yalin wang Cc: Dmitry Torokhov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/input/joystick/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/joystick/Kconfig b/drivers/input/joystick/Kconfig index 56eb471b5576..4215b5382092 100644 --- a/drivers/input/joystick/Kconfig +++ b/drivers/input/joystick/Kconfig @@ -196,6 +196,7 @@ config JOYSTICK_TWIDJOY config JOYSTICK_ZHENHUA tristate "5-byte Zhenhua RC transmitter" select SERIO + select BITREVERSE help Say Y here if you have a Zhen Hua PPM-4CH transmitter which is supplied with a ready to fly micro electric indoor helicopters From 54aea4542980a3ed580426a81c5af799df4d610d Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Thu, 1 Oct 2015 15:37:11 -0700 Subject: [PATCH 207/240] kprobes: use _do_fork() in samples to make them work again Commit 3033f14ab78c ("clone: support passing tls argument via C rather than pt_regs magic") introduced _do_fork() that allowed to pass @tls parameter. The old do_fork() is defined only for architectures that are not ready to use this way and do not define HAVE_COPY_THREAD_TLS. Let's use _do_fork() in the kprobe examples to make them work again on all architectures. Signed-off-by: Petr Mladek Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Thiago Macieira Cc: Jiri Kosina Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- samples/kprobes/jprobe_example.c | 14 +++++++------- samples/kprobes/kprobe_example.c | 6 +++--- samples/kprobes/kretprobe_example.c | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/samples/kprobes/jprobe_example.c b/samples/kprobes/jprobe_example.c index 9119ac6a8270..c285a3b8a9f1 100644 --- a/samples/kprobes/jprobe_example.c +++ b/samples/kprobes/jprobe_example.c @@ -1,13 +1,13 @@ /* * Here's a sample kernel module showing the use of jprobes to dump - * the arguments of do_fork(). + * the arguments of _do_fork(). * * For more information on theory of operation of jprobes, see * Documentation/kprobes.txt * * Build and insert the kernel module as done in the kprobe example. * You will see the trace data in /var/log/messages and on the - * console whenever do_fork() is invoked to create a new process. + * console whenever _do_fork() is invoked to create a new process. * (Some messages may be suppressed if syslogd is configured to * eliminate duplicate messages.) */ @@ -17,13 +17,13 @@ #include /* - * Jumper probe for do_fork. + * Jumper probe for _do_fork. * Mirror principle enables access to arguments of the probed routine * from the probe handler. */ -/* Proxy routine having the same arguments as actual do_fork() routine */ -static long jdo_fork(unsigned long clone_flags, unsigned long stack_start, +/* Proxy routine having the same arguments as actual _do_fork() routine */ +static long j_do_fork(unsigned long clone_flags, unsigned long stack_start, unsigned long stack_size, int __user *parent_tidptr, int __user *child_tidptr) { @@ -36,9 +36,9 @@ static long jdo_fork(unsigned long clone_flags, unsigned long stack_start, } static struct jprobe my_jprobe = { - .entry = jdo_fork, + .entry = j_do_fork, .kp = { - .symbol_name = "do_fork", + .symbol_name = "_do_fork", }, }; diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c index 366db1a9fb65..727eb21c9c56 100644 --- a/samples/kprobes/kprobe_example.c +++ b/samples/kprobes/kprobe_example.c @@ -1,13 +1,13 @@ /* * NOTE: This example is works on x86 and powerpc. * Here's a sample kernel module showing the use of kprobes to dump a - * stack trace and selected registers when do_fork() is called. + * stack trace and selected registers when _do_fork() is called. * * For more information on theory of operation of kprobes, see * Documentation/kprobes.txt * * You will see the trace data in /var/log/messages and on the console - * whenever do_fork() is invoked to create a new process. + * whenever _do_fork() is invoked to create a new process. */ #include @@ -16,7 +16,7 @@ /* For each probe you need to allocate a kprobe structure */ static struct kprobe kp = { - .symbol_name = "do_fork", + .symbol_name = "_do_fork", }; /* kprobe pre_handler: called just before the probed instruction is executed */ diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c index 1041b6731598..ebb1d1aed547 100644 --- a/samples/kprobes/kretprobe_example.c +++ b/samples/kprobes/kretprobe_example.c @@ -7,7 +7,7 @@ * * usage: insmod kretprobe_example.ko func= * - * If no func_name is specified, do_fork is instrumented + * If no func_name is specified, _do_fork is instrumented * * For more information on theory of operation of kretprobes, see * Documentation/kprobes.txt @@ -25,7 +25,7 @@ #include #include -static char func_name[NAME_MAX] = "do_fork"; +static char func_name[NAME_MAX] = "_do_fork"; module_param_string(func, func_name, NAME_MAX, S_IRUGO); MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the" " function's execution time"); From ef510194cefe0cd369ef73419cd65b0a5bb4fb5b Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Thu, 1 Oct 2015 15:37:13 -0700 Subject: [PATCH 208/240] memcg: remove pcp_counter_lock Commit 733a572e66d2 ("memcg: make mem_cgroup_read_{stat|event}() iterate possible cpus instead of online") removed the last use of the per memcg pcp_counter_lock but forgot to remove the variable. Kill the vestigial variable. Signed-off-by: Greg Thelen Acked-by: Michal Hocko Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 1 - mm/memcontrol.c | 1 - 2 files changed, 2 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ad800e62cb7a..6452ff4c463f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -242,7 +242,6 @@ struct mem_cgroup { * percpu counter. */ struct mem_cgroup_stat_cpu __percpu *stat; - spinlock_t pcp_counter_lock; #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) struct cg_proto tcp_mem; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 03cc0a742ff1..1fedbde68f59 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4185,7 +4185,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void) if (memcg_wb_domain_init(memcg, GFP_KERNEL)) goto out_free_stat; - spin_lock_init(&memcg->pcp_counter_lock); return memcg; out_free_stat: From 44241628bb207ec211bebd156aaf69470d90c209 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 1 Oct 2015 15:37:16 -0700 Subject: [PATCH 209/240] thermal: avoid division by zero in power allocator During boot I get a div by zero Oops regression starting in v4.3-rc3. Signed-off-by: Andrea Arcangeli Reviewed-by: Javi Merino Cc: Zhang Rui Cc: Eduardo Valentin Cc: Daniel Kurtz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/thermal/power_allocator.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c index 7ff96270c933..e570ff084add 100644 --- a/drivers/thermal/power_allocator.c +++ b/drivers/thermal/power_allocator.c @@ -144,6 +144,16 @@ static void estimate_pid_constants(struct thermal_zone_device *tz, switch_on_temp = 0; temperature_threshold = control_temp - switch_on_temp; + /* + * estimate_pid_constants() tries to find appropriate default + * values for thermal zones that don't provide them. If a + * system integrator has configured a thermal zone with two + * passive trip points at the same temperature, that person + * hasn't put any effort to set up the thermal zone properly + * so just give up. + */ + if (!temperature_threshold) + return; if (!tz->tzp->k_po || force) tz->tzp->k_po = int_to_frac(sustainable_power) / From 676bd99178cd962ed24ffdad222b7069d330a969 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 1 Oct 2015 15:37:19 -0700 Subject: [PATCH 210/240] dmapool: fix overflow condition in pool_find_page() If a DMA pool lies at the very top of the dma_addr_t range (as may happen with an IOMMU involved), the calculated end address of the pool wraps around to zero, and page lookup always fails. Tweak the relevant calculation to be overflow-proof. Signed-off-by: Robin Murphy Cc: Arnd Bergmann Cc: Marek Szyprowski Cc: Sumit Semwal Cc: Sakari Ailus Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/dmapool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/dmapool.c b/mm/dmapool.c index 71a8998cd03a..312a716fa14c 100644 --- a/mm/dmapool.c +++ b/mm/dmapool.c @@ -394,7 +394,7 @@ static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma) list_for_each_entry(page, &pool->page_list, page_list) { if (dma < page->dma) continue; - if (dma < (page->dma + pool->allocation)) + if ((dma - page->dma) < pool->allocation) return page; } return NULL; From 03f9abb28c3e10a1d0e08df8bca69e0606b23ea6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Sep 2015 14:47:37 -0400 Subject: [PATCH 211/240] drm: handle cursor_set2 in restore_fbdev_mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a driver uses the cursor_set2 crtc callback rather than cursor_set, use that. This fixes the fbdev helper for drivers that use cursor_set2. Signed-off-by: Alex Deucher Reviewed-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_fb_helper.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 418d299f3b12..ca08c472311b 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -345,7 +345,11 @@ static bool restore_fbdev_mode(struct drm_fb_helper *fb_helper) struct drm_crtc *crtc = mode_set->crtc; int ret; - if (crtc->funcs->cursor_set) { + if (crtc->funcs->cursor_set2) { + ret = crtc->funcs->cursor_set2(crtc, NULL, 0, 0, 0, 0, 0); + if (ret) + error = true; + } else if (crtc->funcs->cursor_set) { ret = crtc->funcs->cursor_set(crtc, NULL, 0, 0, 0); if (ret) error = true; From 0c6dadbe79c54818ed4c268297b663f0c4d1ef98 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Sep 2015 14:47:38 -0400 Subject: [PATCH 212/240] drm/radeon: drop radeon_fb_helper_set_par MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was just a wrapper around drm_fb_helper_set_par that called cursor_set2 in addition. Now that the core handles this, drop this radeon specific version. Signed-off-by: Alex Deucher Reviewed-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_fb.c | 32 +----------------------------- 1 file changed, 1 insertion(+), 31 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 7214858ffcea..1aa657fe31cb 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -48,40 +48,10 @@ struct radeon_fbdev { struct radeon_device *rdev; }; -/** - * radeon_fb_helper_set_par - Hide cursor on CRTCs used by fbdev. - * - * @info: fbdev info - * - * This function hides the cursor on all CRTCs used by fbdev. - */ -static int radeon_fb_helper_set_par(struct fb_info *info) -{ - int ret; - - ret = drm_fb_helper_set_par(info); - - /* XXX: with universal plane support fbdev will automatically disable - * all non-primary planes (including the cursor) - */ - if (ret == 0) { - struct drm_fb_helper *fb_helper = info->par; - int i; - - for (i = 0; i < fb_helper->crtc_count; i++) { - struct drm_crtc *crtc = fb_helper->crtc_info[i].mode_set.crtc; - - radeon_crtc_cursor_set2(crtc, NULL, 0, 0, 0, 0, 0); - } - } - - return ret; -} - static struct fb_ops radeonfb_ops = { .owner = THIS_MODULE, .fb_check_var = drm_fb_helper_check_var, - .fb_set_par = radeon_fb_helper_set_par, + .fb_set_par = drm_fb_helper_set_par, .fb_fillrect = drm_fb_helper_cfb_fillrect, .fb_copyarea = drm_fb_helper_cfb_copyarea, .fb_imageblit = drm_fb_helper_cfb_imageblit, From 1c960876be7cffd2798a9e2be090e0a5afaee895 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 16 Sep 2015 11:04:49 +1000 Subject: [PATCH 213/240] drm/dp/mst: don't pass port into the path builder function This is unnecessary and it makes it easier to see what is needed from port. also add blank line to make things nicer. Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_dp_mst_topology.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index e23df5fd3836..87112d389fd4 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1027,8 +1027,8 @@ static void drm_dp_check_port_guid(struct drm_dp_mst_branch *mstb, } } -static void build_mst_prop_path(struct drm_dp_mst_port *port, - struct drm_dp_mst_branch *mstb, +static void build_mst_prop_path(const struct drm_dp_mst_branch *mstb, + int pnum, char *proppath, size_t proppath_size) { @@ -1041,7 +1041,7 @@ static void build_mst_prop_path(struct drm_dp_mst_port *port, snprintf(temp, sizeof(temp), "-%d", port_num); strlcat(proppath, temp, proppath_size); } - snprintf(temp, sizeof(temp), "-%d", port->port_num); + snprintf(temp, sizeof(temp), "-%d", pnum); strlcat(proppath, temp, proppath_size); } @@ -1113,7 +1113,8 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb, if (created && !port->input) { char proppath[255]; - build_mst_prop_path(port, mstb, proppath, sizeof(proppath)); + + build_mst_prop_path(mstb, port->port_num, proppath, sizeof(proppath)); port->connector = (*mstb->mgr->cbs->add_connector)(mstb->mgr, port, proppath); if (port->port_num >= 8) { From df4839fdc9b3c922586b945f062f38cbbda022bb Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 16 Sep 2015 10:37:28 +1000 Subject: [PATCH 214/240] drm/dp/mst: fixup handling hotplug on port removal. output ports should always have a connector, unless in the rare case connector allocation fails in the driver. In this case we only need to teardown the pdt, and free the struct, and there is no need to send a hotplug msg. In the case were we add the port to the destroy list we need to send a hotplug if we destroy any connectors, so userspace knows to reprobe stuff. this patch also handles port->connector allocation failing which should be a rare event, but makes the code consistent. Reviewed-by: Daniel Vetter Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_dp_mst_topology.c | 36 +++++++++++++++++++-------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 87112d389fd4..d11052c2f84a 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -863,29 +863,33 @@ static void drm_dp_destroy_port(struct kref *kref) { struct drm_dp_mst_port *port = container_of(kref, struct drm_dp_mst_port, kref); struct drm_dp_mst_topology_mgr *mgr = port->mgr; + if (!port->input) { port->vcpi.num_slots = 0; kfree(port->cached_edid); - /* we can't destroy the connector here, as - we might be holding the mode_config.mutex - from an EDID retrieval */ + /* + * The only time we don't have a connector + * on an output port is if the connector init + * fails. + */ if (port->connector) { + /* we can't destroy the connector here, as + * we might be holding the mode_config.mutex + * from an EDID retrieval */ + mutex_lock(&mgr->destroy_connector_lock); list_add(&port->next, &mgr->destroy_connector_list); mutex_unlock(&mgr->destroy_connector_lock); schedule_work(&mgr->destroy_connector_work); return; } + /* no need to clean up vcpi + * as if we have no connector we never setup a vcpi */ drm_dp_port_teardown_pdt(port, port->pdt); - - if (!port->input && port->vcpi.vcpi > 0) - drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi); } kfree(port); - - (*mgr->cbs->hotplug)(mgr); } static void drm_dp_put_port(struct drm_dp_mst_port *port) @@ -1116,12 +1120,21 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb, build_mst_prop_path(mstb, port->port_num, proppath, sizeof(proppath)); port->connector = (*mstb->mgr->cbs->add_connector)(mstb->mgr, port, proppath); - + if (!port->connector) { + /* remove it from the port list */ + mutex_lock(&mstb->mgr->lock); + list_del(&port->next); + mutex_unlock(&mstb->mgr->lock); + /* drop port list reference */ + drm_dp_put_port(port); + goto out; + } if (port->port_num >= 8) { port->cached_edid = drm_get_edid(port->connector, &port->aux.ddc); } } +out: /* put reference to this port */ drm_dp_put_port(port); } @@ -2672,7 +2685,7 @@ static void drm_dp_destroy_connector_work(struct work_struct *work) { struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, destroy_connector_work); struct drm_dp_mst_port *port; - + bool send_hotplug = false; /* * Not a regular list traverse as we have to drop the destroy * connector lock before destroying the connector, to avoid AB->BA @@ -2695,7 +2708,10 @@ static void drm_dp_destroy_connector_work(struct work_struct *work) if (!port->input && port->vcpi.vcpi > 0) drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi); kfree(port); + send_hotplug = true; } + if (send_hotplug) + (*mgr->cbs->hotplug)(mgr); } /** From 68d8c9fc91a0f63a2a10ccf7adf56f69125c11c1 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 6 Sep 2015 18:53:00 +1000 Subject: [PATCH 215/240] drm/dp/mst: update the link_address_sent before sending the link address (v3) Update the state before sending the msg to close it. v2: reset value if return indicates we haven't send the msg. v3: just clean the code up. Pointed out by Adam J Richter on Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91481 Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_dp_mst_topology.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index d11052c2f84a..a2f5caaee242 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -53,8 +53,8 @@ static int drm_dp_send_dpcd_write(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port, int offset, int size, u8 *bytes); -static int drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr, - struct drm_dp_mst_branch *mstb); +static void drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_branch *mstb); static int drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_branch *mstb, struct drm_dp_mst_port *port); @@ -1109,10 +1109,8 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb, drm_dp_port_teardown_pdt(port, old_pdt); ret = drm_dp_port_setup_pdt(port); - if (ret == true) { + if (ret == true) drm_dp_send_link_address(mstb->mgr, port->mstb); - port->mstb->link_address_sent = true; - } } if (created && !port->input) { @@ -1216,10 +1214,9 @@ static void drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *m { struct drm_dp_mst_port *port; struct drm_dp_mst_branch *mstb_child; - if (!mstb->link_address_sent) { + if (!mstb->link_address_sent) drm_dp_send_link_address(mgr, mstb); - mstb->link_address_sent = true; - } + list_for_each_entry(port, &mstb->ports, next) { if (port->input) continue; @@ -1472,8 +1469,8 @@ static void drm_dp_queue_down_tx(struct drm_dp_mst_topology_mgr *mgr, mutex_unlock(&mgr->qlock); } -static int drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr, - struct drm_dp_mst_branch *mstb) +static void drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_branch *mstb) { int len; struct drm_dp_sideband_msg_tx *txmsg; @@ -1481,11 +1478,12 @@ static int drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr, txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL); if (!txmsg) - return -ENOMEM; + return; txmsg->dst = mstb; len = build_link_address(txmsg); + mstb->link_address_sent = true; drm_dp_queue_down_tx(mgr, txmsg); ret = drm_dp_mst_wait_tx_reply(mstb, txmsg); @@ -1513,11 +1511,12 @@ static int drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr, } (*mgr->cbs->hotplug)(mgr); } - } else + } else { + mstb->link_address_sent = false; DRM_DEBUG_KMS("link address failed %d\n", ret); + } kfree(txmsg); - return 0; } static int drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr, From d9515c5ec1a20c77d83471e634ad9bb12deb0eac Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 16 Sep 2015 17:55:23 +1000 Subject: [PATCH 216/240] drm/dp/mst: split connector registration into two parts (v2) In order to cache the EDID properly for tiled displays, we need to retrieve it before we register the connector with userspace, otherwise userspace can call get resources and try and get the edid before we've even cached it. This fixes some problems when hotplugging mst monitors, with X/mutter running. As mutter seems to get 0 modes for one of the monitors in the tile. v2: fix warning in radeon handle tile setting in cached path rather than get edid path. Reviewed-by: Daniel Vetter Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_dp_mst_topology.c | 8 +++++--- drivers/gpu/drm/i915/intel_dp_mst.c | 9 ++++++++- drivers/gpu/drm/radeon/radeon_dp_mst.c | 11 +++++++++-- include/drm/drm_dp_mst_helper.h | 1 + 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index a2f5caaee242..36454a146ae9 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1129,7 +1129,9 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb, } if (port->port_num >= 8) { port->cached_edid = drm_get_edid(port->connector, &port->aux.ddc); + drm_mode_connector_set_tile_property(port->connector); } + (*mstb->mgr->cbs->register_connector)(port->connector); } out: @@ -2276,10 +2278,10 @@ struct edid *drm_dp_mst_get_edid(struct drm_connector *connector, struct drm_dp_ if (port->cached_edid) edid = drm_edid_duplicate(port->cached_edid); - else + else { edid = drm_get_edid(connector, &port->aux.ddc); - - drm_mode_connector_set_tile_property(connector); + drm_mode_connector_set_tile_property(connector); + } drm_dp_put_port(port); return edid; } diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 3e4be5a3becd..6ade06888432 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -462,11 +462,17 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo drm_object_attach_property(&connector->base, dev->mode_config.tile_property, 0); drm_mode_connector_set_path_property(connector, pathprop); + return connector; +} + +static void intel_dp_register_mst_connector(struct drm_connector *connector) +{ + struct intel_connector *intel_connector = to_intel_connector(connector); + struct drm_device *dev = connector->dev; drm_modeset_lock_all(dev); intel_connector_add_to_fbdev(intel_connector); drm_modeset_unlock_all(dev); drm_connector_register(&intel_connector->base); - return connector; } static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, @@ -512,6 +518,7 @@ static void intel_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) static struct drm_dp_mst_topology_cbs mst_cbs = { .add_connector = intel_dp_add_mst_connector, + .register_connector = intel_dp_register_mst_connector, .destroy_connector = intel_dp_destroy_mst_connector, .hotplug = intel_dp_mst_hotplug, }; diff --git a/drivers/gpu/drm/radeon/radeon_dp_mst.c b/drivers/gpu/drm/radeon/radeon_dp_mst.c index 5e09c061847f..6cddae44fa6e 100644 --- a/drivers/gpu/drm/radeon/radeon_dp_mst.c +++ b/drivers/gpu/drm/radeon/radeon_dp_mst.c @@ -265,7 +265,6 @@ static struct drm_connector *radeon_dp_add_mst_connector(struct drm_dp_mst_topol { struct radeon_connector *master = container_of(mgr, struct radeon_connector, mst_mgr); struct drm_device *dev = master->base.dev; - struct radeon_device *rdev = dev->dev_private; struct radeon_connector *radeon_connector; struct drm_connector *connector; @@ -286,12 +285,19 @@ static struct drm_connector *radeon_dp_add_mst_connector(struct drm_dp_mst_topol drm_object_attach_property(&connector->base, dev->mode_config.path_property, 0); drm_mode_connector_set_path_property(connector, pathprop); + return connector; +} + +static void radeon_dp_register_mst_connector(struct drm_connector *connector) +{ + struct drm_device *dev = connector->dev; + struct radeon_device *rdev = dev->dev_private; + drm_modeset_lock_all(dev); radeon_fb_add_connector(rdev, connector); drm_modeset_unlock_all(dev); drm_connector_register(connector); - return connector; } static void radeon_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr, @@ -324,6 +330,7 @@ static void radeon_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr) struct drm_dp_mst_topology_cbs mst_cbs = { .add_connector = radeon_dp_add_mst_connector, + .register_connector = radeon_dp_register_mst_connector, .destroy_connector = radeon_dp_destroy_mst_connector, .hotplug = radeon_dp_mst_hotplug, }; diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h index 86d0b25ed054..0f408b002d98 100644 --- a/include/drm/drm_dp_mst_helper.h +++ b/include/drm/drm_dp_mst_helper.h @@ -374,6 +374,7 @@ struct drm_dp_mst_topology_mgr; struct drm_dp_mst_topology_cbs { /* create a connector for a port */ struct drm_connector *(*add_connector)(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port, const char *path); + void (*register_connector)(struct drm_connector *connector); void (*destroy_connector)(struct drm_dp_mst_topology_mgr *mgr, struct drm_connector *connector); void (*hotplug)(struct drm_dp_mst_topology_mgr *mgr); From 274d83524895fe41ca8debae4eec60ede7252bb5 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 30 Sep 2015 10:39:42 +1000 Subject: [PATCH 217/240] drm/dp/mst: drop cancel work sync in the mstb destroy path (v2) Since 9eb1e57f564d4e6e10991402726cc83fe0b9172f drm/dp/mst: make sure mst_primary mstb is valid in work function we validate the mstb structs in the work function, and doing that takes a reference. So we should never get here with the work function running using the mstb device, only if the work function hasn't run yet or is running for another mstb. So we don't need to sync the work here, this was causing lockdep spew as below. [ +0.000160] ============================================= [ +0.000001] [ INFO: possible recursive locking detected ] [ +0.000002] 3.10.0-320.el7.rhel72.stable.backport.3.x86_64.debug #1 Tainted: G W ------------ [ +0.000001] --------------------------------------------- [ +0.000001] kworker/4:2/1262 is trying to acquire lock: [ +0.000001] ((&mgr->work)){+.+.+.}, at: [] flush_work+0x5/0x2e0 [ +0.000007] but task is already holding lock: [ +0.000001] ((&mgr->work)){+.+.+.}, at: [] process_one_work+0x1b4/0x710 [ +0.000004] other info that might help us debug this: [ +0.000001] Possible unsafe locking scenario: [ +0.000002] CPU0 [ +0.000000] ---- [ +0.000001] lock((&mgr->work)); [ +0.000002] lock((&mgr->work)); [ +0.000001] *** DEADLOCK *** [ +0.000001] May be due to missing lock nesting notation [ +0.000002] 2 locks held by kworker/4:2/1262: [ +0.000001] #0: (events_long){.+.+.+}, at: [] process_one_work+0x1b4/0x710 [ +0.000004] #1: ((&mgr->work)){+.+.+.}, at: [] process_one_work+0x1b4/0x710 [ +0.000003] stack backtrace: [ +0.000003] CPU: 4 PID: 1262 Comm: kworker/4:2 Tainted: G W ------------ 3.10.0-320.el7.rhel72.stable.backport.3.x86_64.debug #1 [ +0.000001] Hardware name: LENOVO 20EGS0R600/20EGS0R600, BIOS GNET71WW (2.19 ) 02/05/2015 [ +0.000008] Workqueue: events_long drm_dp_mst_link_probe_work [drm_kms_helper] [ +0.000001] ffffffff82c26c90 00000000a527b914 ffff88046399bae8 ffffffff816fe04d [ +0.000004] ffff88046399bb58 ffffffff8110f47f ffff880461438000 0001009b840fc003 [ +0.000002] ffff880461438a98 0000000000000000 0000000804dc26e1 ffffffff824a2c00 [ +0.000003] Call Trace: [ +0.000004] [] dump_stack+0x19/0x1b [ +0.000004] [] __lock_acquire+0x115f/0x1250 [ +0.000002] [] lock_acquire+0x99/0x1e0 [ +0.000002] [] ? flush_work+0x5/0x2e0 [ +0.000002] [] flush_work+0x4e/0x2e0 [ +0.000002] [] ? flush_work+0x5/0x2e0 [ +0.000004] [] ? native_sched_clock+0x35/0x80 [ +0.000002] [] ? sched_clock+0x9/0x10 [ +0.000002] [] ? local_clock+0x25/0x30 [ +0.000002] [] ? mark_held_locks+0xb9/0x140 [ +0.000003] [] ? __cancel_work_timer+0x95/0x160 [ +0.000002] [] __cancel_work_timer+0xa8/0x160 [ +0.000002] [] cancel_work_sync+0x10/0x20 [ +0.000007] [] drm_dp_destroy_mst_branch_device+0x27/0x120 [drm_kms_helper] [ +0.000006] [] drm_dp_mst_link_probe_work+0x78/0xa0 [drm_kms_helper] [ +0.000002] [] process_one_work+0x220/0x710 [ +0.000002] [] ? process_one_work+0x1b4/0x710 [ +0.000005] [] worker_thread+0x11b/0x3a0 [ +0.000003] [] ? process_one_work+0x710/0x710 [ +0.000002] [] kthread+0xed/0x100 [ +0.000003] [] ? insert_kthread_work+0x80/0x80 [ +0.000003] [] ret_from_fork+0x58/0x90 v2: add flush_work. Reviewed-by: Daniel Vetter Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_dp_mst_topology.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 36454a146ae9..3084b3a7aeb3 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -804,8 +804,6 @@ static void drm_dp_destroy_mst_branch_device(struct kref *kref) struct drm_dp_mst_port *port, *tmp; bool wake_tx = false; - cancel_work_sync(&mstb->mgr->work); - /* * destroy all ports - don't need lock * as there are no more references to the mst branch @@ -1993,6 +1991,8 @@ void drm_dp_mst_topology_mgr_suspend(struct drm_dp_mst_topology_mgr *mgr) drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, DP_MST_EN | DP_UPSTREAM_IS_SRC); mutex_unlock(&mgr->lock); + flush_work(&mgr->work); + flush_work(&mgr->destroy_connector_work); } EXPORT_SYMBOL(drm_dp_mst_topology_mgr_suspend); @@ -2765,6 +2765,7 @@ EXPORT_SYMBOL(drm_dp_mst_topology_mgr_init); */ void drm_dp_mst_topology_mgr_destroy(struct drm_dp_mst_topology_mgr *mgr) { + flush_work(&mgr->work); flush_work(&mgr->destroy_connector_work); mutex_lock(&mgr->payload_lock); kfree(mgr->payloads); From ccf03d6995fa4b784f5b987726ba98f4859bf326 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 1 Oct 2015 16:28:25 +1000 Subject: [PATCH 218/240] drm/dp/mst: add some defines for logical/physical ports This just removes the magic number. Acked-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_dp_mst_topology.c | 2 +- include/drm/drm_dp_helper.h | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 3084b3a7aeb3..bf27a07dbce3 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1125,7 +1125,7 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb, drm_dp_put_port(port); goto out; } - if (port->port_num >= 8) { + if (port->port_num >= DP_MST_LOGICAL_PORT_0) { port->cached_edid = drm_get_edid(port->connector, &port->aux.ddc); drm_mode_connector_set_tile_property(port->connector); } diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 499e9f625aef..0212d139a480 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -568,6 +568,10 @@ #define MODE_I2C_READ 4 #define MODE_I2C_STOP 8 +/* DP 1.2 MST PORTs - Section 2.5.1 v1.2a spec */ +#define DP_MST_PHYSICAL_PORT_0 0 +#define DP_MST_LOGICAL_PORT_0 8 + #define DP_LINK_STATUS_SIZE 6 bool drm_dp_channel_eq_ok(const u8 link_status[DP_LINK_STATUS_SIZE], int lane_count); From e3c41e37b0f4b18cbd4dac76cbeece5a7558b909 Mon Sep 17 00:00:00 2001 From: "Lee, Chun-Yi" Date: Tue, 29 Sep 2015 20:58:57 +0800 Subject: [PATCH 219/240] x86/kexec: Fix kexec crash in syscall kexec_file_load() The original bug is a page fault crash that sometimes happens on big machines when preparing ELF headers: BUG: unable to handle kernel paging request at ffffc90613fc9000 IP: [] prepare_elf64_ram_headers_callback+0x165/0x260 The bug is caused by us under-counting the number of memory ranges and subsequently not allocating enough ELF header space for them. The bug is typically masked on smaller systems, because the ELF header allocation is rounded up to the next page. This patch modifies the code in fill_up_crash_elf_data() by using walk_system_ram_res() instead of walk_system_ram_range() to correctly count the max number of crash memory ranges. That's because the walk_system_ram_range() filters out small memory regions that reside in the same page, but walk_system_ram_res() does not. Here's how I found the bug: After tracing prepare_elf64_headers() and prepare_elf64_ram_headers_callback(), the code uses walk_system_ram_res() to fill-in crash memory regions information to the program header, so it counts those small memory regions that reside in a page area. But, when the kernel was using walk_system_ram_range() in fill_up_crash_elf_data() to count the number of crash memory regions, it filters out small regions. I printed those small memory regions, for example: kexec: Get nr_ram ranges. vaddr=0xffff880077592258 paddr=0x77592258, sz=0xdc0 Based on the code in walk_system_ram_range(), this memory region will be filtered out: pfn = (0x77592258 + 0x1000 - 1) >> 12 = 0x77593 end_pfn = (0x77592258 + 0xfc0 -1 + 1) >> 12 = 0x77593 end_pfn - pfn = 0x77593 - 0x77593 = 0 <=== if (end_pfn > pfn) is FALSE So, the max_nr_ranges that's counted by the kernel doesn't include small memory regions - causing us to under-allocate the required space. That causes the page fault crash that happens in a later code path when preparing ELF headers. This bug is not easy to reproduce on small machines that have few CPUs, because the allocated page aligned ELF buffer has more free space to cover those small memory regions' PT_LOAD headers. Signed-off-by: Lee, Chun-Yi Cc: Andy Lutomirski Cc: Baoquan He Cc: Jiang Liu Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Stephen Rothwell Cc: Takashi Iwai Cc: Thomas Gleixner Cc: Viresh Kumar Cc: Vivek Goyal Cc: kexec@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: Link: http://lkml.kernel.org/r/1443531537-29436-1-git-send-email-jlee@suse.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/crash.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index e068d6683dba..74ca2fe7a0b3 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -185,10 +185,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs) } #ifdef CONFIG_KEXEC_FILE -static int get_nr_ram_ranges_callback(unsigned long start_pfn, - unsigned long nr_pfn, void *arg) +static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg) { - int *nr_ranges = arg; + unsigned int *nr_ranges = arg; (*nr_ranges)++; return 0; @@ -214,7 +213,7 @@ static void fill_up_crash_elf_data(struct crash_elf_data *ced, ced->image = image; - walk_system_ram_range(0, -1, &nr_ranges, + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); ced->max_nr_ranges = nr_ranges; From ab76f7b4ab2397ffdd2f1eb07c55697d19991d10 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Thu, 1 Oct 2015 09:04:22 -0400 Subject: [PATCH 220/240] x86/mm: Set NX on gap between __ex_table and rodata Unused space between the end of __ex_table and the start of rodata can be left W+x in the kernel page tables. Extend the setting of the NX bit to cover this gap by starting from text_end rather than rodata_start. Before: ---[ High Kernel Mapping ]--- 0xffffffff80000000-0xffffffff81000000 16M pmd 0xffffffff81000000-0xffffffff81600000 6M ro PSE GLB x pmd 0xffffffff81600000-0xffffffff81754000 1360K ro GLB x pte 0xffffffff81754000-0xffffffff81800000 688K RW GLB x pte 0xffffffff81800000-0xffffffff81a00000 2M ro PSE GLB NX pmd 0xffffffff81a00000-0xffffffff81b3b000 1260K ro GLB NX pte 0xffffffff81b3b000-0xffffffff82000000 4884K RW GLB NX pte 0xffffffff82000000-0xffffffff82200000 2M RW PSE GLB NX pmd 0xffffffff82200000-0xffffffffa0000000 478M pmd After: ---[ High Kernel Mapping ]--- 0xffffffff80000000-0xffffffff81000000 16M pmd 0xffffffff81000000-0xffffffff81600000 6M ro PSE GLB x pmd 0xffffffff81600000-0xffffffff81754000 1360K ro GLB x pte 0xffffffff81754000-0xffffffff81800000 688K RW GLB NX pte 0xffffffff81800000-0xffffffff81a00000 2M ro PSE GLB NX pmd 0xffffffff81a00000-0xffffffff81b3b000 1260K ro GLB NX pte 0xffffffff81b3b000-0xffffffff82000000 4884K RW GLB NX pte 0xffffffff82000000-0xffffffff82200000 2M RW PSE GLB NX pmd 0xffffffff82200000-0xffffffffa0000000 478M pmd Signed-off-by: Stephen Smalley Acked-by: Kees Cook Cc: Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1443704662-3138-1-git-send-email-sds@tycho.nsa.gov Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 30564e2752d3..df48430c279b 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1132,7 +1132,7 @@ void mark_rodata_ro(void) * has been zapped already via cleanup_highmem(). */ all_end = roundup((unsigned long)_brk_end, PMD_SIZE); - set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); + set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT); rodata_test(); From 88724bfa68be792c1487d759e87568c36ac1a1cc Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 24 Sep 2015 14:00:51 +1000 Subject: [PATCH 221/240] md: wait for pending superblock updates before switching to read-only If a superblock update is pending, wait for it to complete before letting md_set_readonly() switch to readonly. Otherwise we might lose important information about a device having failed. For external arrays, waiting for superblock updates can wait on user-space, so in that case, just return an error. Reported-and-tested-by: Shaohua Li Signed-off-by: NeilBrown --- drivers/md/md.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 4f5ecbe94ccb..b8247bc32cf6 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5409,9 +5409,13 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) * which will now never happen */ wake_up_process(mddev->sync_thread->tsk); + if (mddev->external && test_bit(MD_CHANGE_PENDING, &mddev->flags)) + return -EBUSY; mddev_unlock(mddev); wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)); + wait_event(mddev->sb_wait, + !test_bit(MD_CHANGE_PENDING, &mddev->flags)); mddev_lock_nointr(mddev); mutex_lock(&mddev->open_mutex); From ebda780bce8d58ec0abab157397c9e099c41a05f Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 18 Sep 2015 10:20:13 -0700 Subject: [PATCH 222/240] raid5: update analysis state for failed stripe handle_failed_stripe() makes the stripe fail, eg, all IO will return with a failure, but it doesn't update stripe_head_state. Later handle_stripe() has special handling for raid6 for handle_stripe_fill(). That check before handle_stripe_fill() doesn't skip the failed stripe and we get a kernel crash in need_this_block. This patch clear the analysis state to make sure no functions wrongly called after handle_failed_stripe() Signed-off-by: Shaohua Li Signed-off-by: NeilBrown --- drivers/md/raid5.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 15ef2c641b2b..2105e5f1fd31 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3150,6 +3150,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, spin_unlock_irq(&sh->stripe_lock); if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) wake_up(&conf->wait_for_overlap); + if (bi) + s->to_read--; while (bi && bi->bi_iter.bi_sector < sh->dev[i].sector + STRIPE_SECTORS) { struct bio *nextbi = @@ -3169,6 +3171,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, */ clear_bit(R5_LOCKED, &sh->dev[i].flags); } + s->to_write = 0; + s->written = 0; if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) if (atomic_dec_and_test(&conf->pending_full_writes)) From 36707bb2e7c6730d79d6cdc6d1475d3d7e94c518 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 24 Sep 2015 15:25:36 +1000 Subject: [PATCH 223/240] md/raid5: don't index beyond end of array in need_this_block(). When need_this_block probably shouldn't be called when there are more than 2 failed devices, we really don't want it to try indexing beyond the end of the failed_num[] of fdev[] arrays. So limit the loops to at most 2 iterations. Reported-by: Shaohua Li Signed-off-by: NeilBrown --- drivers/md/raid5.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2105e5f1fd31..d66ff723df45 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3304,7 +3304,7 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s, */ return 0; - for (i = 0; i < s->failed; i++) { + for (i = 0; i < s->failed && i < 2; i++) { if (fdev[i]->towrite && !test_bit(R5_UPTODATE, &fdev[i]->flags) && !test_bit(R5_OVERWRITE, &fdev[i]->flags)) @@ -3328,7 +3328,7 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s, sh->sector < sh->raid_conf->mddev->recovery_cp) /* reconstruct-write isn't being forced */ return 0; - for (i = 0; i < s->failed; i++) { + for (i = 0; i < s->failed && i < 2; i++) { if (s->failed_num[i] != sh->pd_idx && s->failed_num[i] != sh->qd_idx && !test_bit(R5_UPTODATE, &fdev[i]->flags) && From 66eefe5de11db1e0d8f2edc3880d50e7c36a9d43 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 24 Sep 2015 15:47:47 +1000 Subject: [PATCH 224/240] md/raid0: apply base queue limits *before* disk_stack_limits Calling e.g. blk_queue_max_hw_sectors() after calls to disk_stack_limits() discards the settings determined by disk_stack_limits(). So we need to make those calls first. Fixes: 199dc6ed5179 ("md/raid0: update queue parameter in a safer location.") Cc: stable@vger.kernel.org (v2.6.35+ - please apply with 199dc6ed5179). Reported-by: Jes Sorensen Signed-off-by: NeilBrown --- drivers/md/raid0.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 63e619b2f44e..f8e5db0cb5aa 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -376,12 +376,6 @@ static int raid0_run(struct mddev *mddev) struct md_rdev *rdev; bool discard_supported = false; - rdev_for_each(rdev, mddev) { - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); - if (blk_queue_discard(bdev_get_queue(rdev->bdev))) - discard_supported = true; - } blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors); @@ -390,6 +384,12 @@ static int raid0_run(struct mddev *mddev) blk_queue_io_opt(mddev->queue, (mddev->chunk_sectors << 9) * mddev->raid_disks); + rdev_for_each(rdev, mddev) { + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); + if (blk_queue_discard(bdev_get_queue(rdev->bdev))) + discard_supported = true; + } if (!discard_supported) queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); else From d4929add83ad4660b1824a9282ab5dd4d60140fa Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 18 Sep 2015 10:20:12 -0700 Subject: [PATCH 225/240] md: clear CHANGE_PENDING in readonly array If faulty disks of an array are more than allowed degraded number, the array enters error handling. It will be marked as read-only with MD_CHANGE_PENDING/RECOVERY_NEEDED set. But currently recovery doesn't clear CHANGE_PENDING bit for read-only array. If MD_CHANGE_PENDING is set for a raid5 array, all returned IO will be hold on a list till the bit is clear. But recovery nevery clears this bit, the IO is always in pending state and nevery finish. This has bad effects like upper layer can't get an IO error and the array can't be stopped. Fixes: c3cce6cda162 ("md/raid5: ensure device failure recorded before write request returns.") Signed-off-by: Shaohua Li Signed-off-by: NeilBrown --- drivers/md/md.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index b8247bc32cf6..c702de18207a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8164,6 +8164,7 @@ void md_check_recovery(struct mddev *mddev) md_reap_sync_thread(mddev); clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + clear_bit(MD_CHANGE_PENDING, &mddev->flags); goto unlock; } From 644df1a85fc4b0c7a16800f55717261546f4e651 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 13 Sep 2015 14:15:10 +0200 Subject: [PATCH 226/240] md: drop null test before destroy functions Remove unneeded NULL test. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression x; @@ -if (x != NULL) \(kmem_cache_destroy\|mempool_destroy\|dma_pool_destroy\)(x); // Signed-off-by: Julia Lawall Signed-off-by: NeilBrown --- drivers/md/multipath.c | 3 +-- drivers/md/raid1.c | 6 ++---- drivers/md/raid10.c | 9 +++------ drivers/md/raid5.c | 3 +-- 4 files changed, 7 insertions(+), 14 deletions(-) diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index d222522c52e0..d132f06afdd1 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -470,8 +470,7 @@ static int multipath_run (struct mddev *mddev) return 0; out_free_conf: - if (conf->pool) - mempool_destroy(conf->pool); + mempool_destroy(conf->pool); kfree(conf->multipaths); kfree(conf); mddev->private = NULL; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 4517f06c41ba..5f4f5536f66f 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2843,8 +2843,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) abort: if (conf) { - if (conf->r1bio_pool) - mempool_destroy(conf->r1bio_pool); + mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); safe_put_page(conf->tmppage); kfree(conf->poolinfo); @@ -2946,8 +2945,7 @@ static void raid1_free(struct mddev *mddev, void *priv) { struct r1conf *conf = priv; - if (conf->r1bio_pool) - mempool_destroy(conf->r1bio_pool); + mempool_destroy(conf->r1bio_pool); kfree(conf->mirrors); safe_put_page(conf->tmppage); kfree(conf->poolinfo); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 0fc33eb88855..7c99a4037715 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3486,8 +3486,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) printk(KERN_ERR "md/raid10:%s: couldn't allocate memory.\n", mdname(mddev)); if (conf) { - if (conf->r10bio_pool) - mempool_destroy(conf->r10bio_pool); + mempool_destroy(conf->r10bio_pool); kfree(conf->mirrors); safe_put_page(conf->tmppage); kfree(conf); @@ -3682,8 +3681,7 @@ static int run(struct mddev *mddev) out_free_conf: md_unregister_thread(&mddev->thread); - if (conf->r10bio_pool) - mempool_destroy(conf->r10bio_pool); + mempool_destroy(conf->r10bio_pool); safe_put_page(conf->tmppage); kfree(conf->mirrors); kfree(conf); @@ -3696,8 +3694,7 @@ static void raid10_free(struct mddev *mddev, void *priv) { struct r10conf *conf = priv; - if (conf->r10bio_pool) - mempool_destroy(conf->r10bio_pool); + mempool_destroy(conf->r10bio_pool); safe_put_page(conf->tmppage); kfree(conf->mirrors); kfree(conf->mirrors_old); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index d66ff723df45..49bb8d3ff9be 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2271,8 +2271,7 @@ static void shrink_stripes(struct r5conf *conf) drop_one_stripe(conf)) ; - if (conf->slab_cache) - kmem_cache_destroy(conf->slab_cache); + kmem_cache_destroy(conf->slab_cache); conf->slab_cache = NULL; } From e8ff8bf09ff49733534ff3cee91bde030186055f Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Wed, 16 Sep 2015 10:20:05 -0400 Subject: [PATCH 227/240] md/raid1: Avoid raid1 resync getting stuck close_sync() needs to set conf->next_resync to a large, but safe value below MaxSector and use it to determine whether or not to set start_next_window in wait_barrier() Solution suggested by Neil Brown. Reported-by: Nate Dailey Tested-by: Xiao Ni Signed-off-by: Jes Sorensen Signed-off-by: NeilBrown --- drivers/md/raid1.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 5f4f5536f66f..049df6c4a8cc 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -881,8 +881,7 @@ static sector_t wait_barrier(struct r1conf *conf, struct bio *bio) } if (bio && bio_data_dir(bio) == WRITE) { - if (bio->bi_iter.bi_sector >= - conf->mddev->curr_resync_completed) { + if (bio->bi_iter.bi_sector >= conf->next_resync) { if (conf->start_next_window == MaxSector) conf->start_next_window = conf->next_resync + @@ -1516,7 +1515,7 @@ static void close_sync(struct r1conf *conf) conf->r1buf_pool = NULL; spin_lock_irq(&conf->resync_lock); - conf->next_resync = 0; + conf->next_resync = MaxSector - 2 * NEXT_NORMALIO_DISTANCE; conf->start_next_window = MaxSector; conf->current_window_requests += conf->next_window_requests; From da6fb7a9e5bd6f04f7e15070f630bdf1ea502841 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 1 Oct 2015 16:03:38 +1000 Subject: [PATCH 228/240] md/bitmap: don't pass -1 to bitmap_storage_alloc. Passing -1 to bitmap_storage_alloc() causes page->index to be set to -1, which is quite problematic. So only pass ->cluster_slot if mddev_is_clustered(). Fixes: b97e92574c0b ("Use separate bitmaps for each nodes in the cluster") Cc: stable@vger.kernel.org (v4.1+) Signed-off-by: NeilBrown --- drivers/md/bitmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index e51de52eeb94..48b5890c28e3 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1997,7 +1997,8 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file) ret = bitmap_storage_alloc(&store, chunks, !bitmap->mddev->bitmap_info.external, - bitmap->cluster_slot); + mddev_is_clustered(bitmap->mddev) + ? bitmap->cluster_slot : 0); if (ret) goto err; From f4b4aae1828855db761bf998ce37d3062b1d6446 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 1 Oct 2015 01:40:43 +0100 Subject: [PATCH 229/240] x86/headers/uapi: Fix __BITS_PER_LONG value for x32 builds On x32, gcc predefines __x86_64__ but long is only 32-bit. Use __ILP32__ to distinguish x32. Fixes this compiler error in perf: tools/include/asm-generic/bitops/__ffs.h: In function '__ffs': tools/include/asm-generic/bitops/__ffs.h:19:8: error: right shift count >= width of type [-Werror=shift-count-overflow] word >>= 32; ^ This isn't sufficient to build perf for x32, though. Signed-off-by: Ben Hutchings Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1443660043.2730.15.camel@decadent.org.uk Signed-off-by: Ingo Molnar --- arch/x86/include/uapi/asm/bitsperlong.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/uapi/asm/bitsperlong.h b/arch/x86/include/uapi/asm/bitsperlong.h index b0ae1c4dc791..217909b4d6f5 100644 --- a/arch/x86/include/uapi/asm/bitsperlong.h +++ b/arch/x86/include/uapi/asm/bitsperlong.h @@ -1,7 +1,7 @@ #ifndef __ASM_X86_BITSPERLONG_H #define __ASM_X86_BITSPERLONG_H -#ifdef __x86_64__ +#if defined(__x86_64__) && !defined(__ILP32__) # define __BITS_PER_LONG 64 #else # define __BITS_PER_LONG 32 From 0c5d187828588dd1b36cb93b4481a8db467ef3e8 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Fri, 2 Oct 2015 09:48:57 +0200 Subject: [PATCH 230/240] MIPS: BPF: Fix load delay slots. The entire bpf_jit_asm.S is written in noreorder mode because "we know better" according to a comment. This also prevented the assembler from throwing in the required NOPs for MIPS I processors which have no load-use interlock, thus the load's consumer might end up using the old value of the register from prior to the load. Fixed by putting the assembler in reorder mode for just the affected load instructions. This is not enough for gas to actually try to be clever by looking at the next instruction and inserting a nop only when needed but as the comment said "we know better", so getting gas to unconditionally emit a NOP is just right in this case and prevents adding further ifdefery. Signed-off-by: Ralf Baechle --- arch/mips/net/bpf_jit_asm.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S index eabb5e0889ee..5d2e0c8d29c0 100644 --- a/arch/mips/net/bpf_jit_asm.S +++ b/arch/mips/net/bpf_jit_asm.S @@ -61,7 +61,9 @@ FEXPORT(sk_load_word_positive) is_offset_in_header(4, word) /* Offset within header boundaries */ PTR_ADDU t1, $r_skb_data, offset + .set reorder lw $r_A, 0(t1) + .set noreorder #ifdef CONFIG_CPU_LITTLE_ENDIAN # if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) wsbh t0, $r_A @@ -88,7 +90,9 @@ FEXPORT(sk_load_half_positive) is_offset_in_header(2, half) /* Offset within header boundaries */ PTR_ADDU t1, $r_skb_data, offset + .set reorder lh $r_A, 0(t1) + .set noreorder #ifdef CONFIG_CPU_LITTLE_ENDIAN # if defined(__mips_isa_rev) && (__mips_isa_rev >= 2) wsbh t0, $r_A From ee556d00cf20012e889344a0adbbf809ab5015a3 Mon Sep 17 00:00:00 2001 From: Li Bin Date: Wed, 30 Sep 2015 10:49:55 +0800 Subject: [PATCH 231/240] arm64: ftrace: fix function_graph tracer panic When function graph tracer is enabled, the following operation will trigger panic: mount -t debugfs nodev /sys/kernel echo next_tgid > /sys/kernel/tracing/set_ftrace_filter echo function_graph > /sys/kernel/tracing/current_tracer ls /proc/ ------------[ cut here ]------------ [ 198.501417] Unable to handle kernel paging request at virtual address cb88537fdc8ba316 [ 198.506126] pgd = ffffffc008f79000 [ 198.509363] [cb88537fdc8ba316] *pgd=00000000488c6003, *pud=00000000488c6003, *pmd=0000000000000000 [ 198.517726] Internal error: Oops: 94000005 [#1] SMP [ 198.518798] Modules linked in: [ 198.520582] CPU: 1 PID: 1388 Comm: ls Tainted: G [ 198.521800] Hardware name: linux,dummy-virt (DT) [ 198.522852] task: ffffffc0fa9e8000 ti: ffffffc0f9ab0000 task.ti: ffffffc0f9ab0000 [ 198.524306] PC is at next_tgid+0x30/0x100 [ 198.525205] LR is at return_to_handler+0x0/0x20 [ 198.526090] pc : [] lr : [] pstate: 60000145 [ 198.527392] sp : ffffffc0f9ab3d40 [ 198.528084] x29: ffffffc0f9ab3d40 x28: ffffffc0f9ab0000 [ 198.529406] x27: ffffffc000d6a000 x26: ffffffc000b786e8 [ 198.530659] x25: ffffffc0002a1900 x24: ffffffc0faf16c00 [ 198.531942] x23: ffffffc0f9ab3ea0 x22: 0000000000000002 [ 198.533202] x21: ffffffc000d85050 x20: 0000000000000002 [ 198.534446] x19: 0000000000000002 x18: 0000000000000000 [ 198.535719] x17: 000000000049fa08 x16: ffffffc000242efc [ 198.537030] x15: 0000007fa472b54c x14: ffffffffff000000 [ 198.538347] x13: ffffffc0fada84a0 x12: 0000000000000001 [ 198.539634] x11: ffffffc0f9ab3d70 x10: ffffffc0f9ab3d70 [ 198.540915] x9 : ffffffc0000907c0 x8 : ffffffc0f9ab3d40 [ 198.542215] x7 : 0000002e330f08f0 x6 : 0000000000000015 [ 198.543508] x5 : 0000000000000f08 x4 : ffffffc0f9835ec0 [ 198.544792] x3 : cb88537fdc8ba316 x2 : cb88537fdc8ba306 [ 198.546108] x1 : 0000000000000002 x0 : ffffffc000d85050 [ 198.547432] [ 198.547920] Process ls (pid: 1388, stack limit = 0xffffffc0f9ab0020) [ 198.549170] Stack: (0xffffffc0f9ab3d40 to 0xffffffc0f9ab4000) [ 198.582568] Call trace: [ 198.583313] [] next_tgid+0x30/0x100 [ 198.584359] [] ftrace_graph_caller+0x6c/0x70 [ 198.585503] [] ftrace_graph_caller+0x6c/0x70 [ 198.586574] [] ftrace_graph_caller+0x6c/0x70 [ 198.587660] [] ftrace_graph_caller+0x6c/0x70 [ 198.588896] Code: aa0003f5 2a0103f4 b4000102 91004043 (885f7c60) [ 198.591092] ---[ end trace 6a346f8f20949ac8 ]--- This is because when using function graph tracer, if the traced function return value is in multi regs ([x0-x7]), return_to_handler may corrupt them. So in return_to_handler, the parameter regs should be protected properly. Cc: # 3.18+ Signed-off-by: Li Bin Acked-by: AKASHI Takahiro Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry-ftrace.S | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 08cafc518b9a..0f03a8fe2314 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -178,6 +178,24 @@ ENTRY(ftrace_stub) ENDPROC(ftrace_stub) #ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* save return value regs*/ + .macro save_return_regs + sub sp, sp, #64 + stp x0, x1, [sp] + stp x2, x3, [sp, #16] + stp x4, x5, [sp, #32] + stp x6, x7, [sp, #48] + .endm + + /* restore return value regs*/ + .macro restore_return_regs + ldp x0, x1, [sp] + ldp x2, x3, [sp, #16] + ldp x4, x5, [sp, #32] + ldp x6, x7, [sp, #48] + add sp, sp, #64 + .endm + /* * void ftrace_graph_caller(void) * @@ -204,11 +222,11 @@ ENDPROC(ftrace_graph_caller) * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled. */ ENTRY(return_to_handler) - str x0, [sp, #-16]! + save_return_regs mov x0, x29 // parent's fp bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp); mov x30, x0 // restore the original return address - ldr x0, [sp], #16 + restore_return_regs ret END(return_to_handler) #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ From 0fa24340f7c88d2814547d8d24d5e3a1803009cc Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 21 Sep 2015 10:07:41 -0700 Subject: [PATCH 232/240] MIPS: Fix octeon FP context switch handling Commit 1a3d59579b9f ("MIPS: Tidy up FPU context switching") removed FP context saving from the asm-written resume function in favour of reusing existing code to perform the same task. However it only removed the FP context saving code from the r4k_switch.S implementation of resume. Octeon uses its own implementation in octeon_switch.S, so remove FP context saving there too in order to prevent attempting to save context twice. That formerly led to an exception from the second save as follows because the FPU had already been disabled by the first save: do_cpu invoked from kernel context![#1]: CPU: 0 PID: 2 Comm: kthreadd Not tainted 4.3.0-rc2-dirty #2 task: 800000041f84a008 ti: 800000041f864000 task.ti: 800000041f864000 $ 0 : 0000000000000000 0000000010008ce1 0000000000100000 ffffffffbfffffff $ 4 : 800000041f84a008 800000041f84ac08 800000041f84c000 0000000000000004 $ 8 : 0000000000000001 0000000000000000 0000000000000000 0000000000000001 $12 : 0000000010008ce3 0000000000119c60 0000000000000036 800000041f864000 $16 : 800000041f84ac08 800000000792ce80 800000041f84a008 ffffffff81758b00 $20 : 0000000000000000 ffffffff8175ae50 0000000000000000 ffffffff8176c740 $24 : 0000000000000006 ffffffff81170300 $28 : 800000041f864000 800000041f867d90 0000000000000000 ffffffff815f3fa0 Hi : 0000000000fa8257 Lo : ffffffffe15cfc00 epc : ffffffff8112821c resume+0x9c/0x200 ra : ffffffff815f3fa0 __schedule+0x3f0/0x7d8 Status: 10008ce2 KX SX UX KERNEL EXL Cause : 1080002c (ExcCode 0b) PrId : 000d0601 (Cavium Octeon+) Modules linked in: Process kthreadd (pid: 2, threadinfo=800000041f864000, task=800000041f84a008, tls=0000000000000000) Stack : ffffffff81604218 ffffffff815f7e08 800000041f84a008 ffffffff811681b0 800000041f84a008 ffffffff817e9878 0000000000000000 ffffffff81770000 ffffffff81768340 ffffffff81161398 0000000000000001 0000000000000000 0000000000000000 ffffffff815f4424 0000000000000000 ffffffff81161d68 ffffffff81161be8 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 ffffffff8111e16c 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 ... Call Trace: [] resume+0x9c/0x200 [] __schedule+0x3f0/0x7d8 [] schedule+0x34/0x98 [] kthreadd+0x180/0x198 [] ret_from_kernel_thread+0x14/0x1c Tested using cavium_octeon_defconfig on an EdgeRouter Lite. Fixes: 1a3d59579b9f ("MIPS: Tidy up FPU context switching") Reported-by: Aaro Koskinen Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: Aleksey Makarov Cc: linux-kernel@vger.kernel.org Cc: Chandrakala Chavva Cc: David Daney Cc: Leonid Rosenboim Patchwork: https://patchwork.linux-mips.org/patch/11166/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/octeon_switch.S | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/arch/mips/kernel/octeon_switch.S b/arch/mips/kernel/octeon_switch.S index 423ae83af1fb..3375745b9198 100644 --- a/arch/mips/kernel/octeon_switch.S +++ b/arch/mips/kernel/octeon_switch.S @@ -18,7 +18,7 @@ .set pop /* * task_struct *resume(task_struct *prev, task_struct *next, - * struct thread_info *next_ti, int usedfpu) + * struct thread_info *next_ti) */ .align 7 LEAF(resume) @@ -28,30 +28,6 @@ cpu_save_nonscratch a0 LONG_S ra, THREAD_REG31(a0) - /* - * check if we need to save FPU registers - */ - .set push - .set noreorder - beqz a3, 1f - PTR_L t3, TASK_THREAD_INFO(a0) - .set pop - - /* - * clear saved user stack CU1 bit - */ - LONG_L t0, ST_OFF(t3) - li t1, ~ST0_CU1 - and t0, t0, t1 - LONG_S t0, ST_OFF(t3) - - .set push - .set arch=mips64r2 - fpu_save_double a0 t0 t1 # c0_status passed in t0 - # clobbers t1 - .set pop -1: - #if CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0 /* Check if we need to store CVMSEG state */ dmfc0 t0, $11,7 /* CvmMemCtl */ From 085c2f25d36ef4a69bb1dab933daee0692426f15 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 21 Sep 2015 10:07:42 -0700 Subject: [PATCH 233/240] MIPS: Fix R2300 FP context switch handling Commit 1a3d59579b9f ("MIPS: Tidy up FPU context switching") removed FP context saving from the asm-written resume function in favour of reusing existing code to perform the same task. However it only removed the FP context saving code from the r4k_switch.S implementation of resume. Remove it from the r2300_switch.S implementation too in order to prevent attempting to save the FP context twice, which would likely lead to an exception from the second save because the FPU had already been disabled by the first save. This patch has only been build tested, using rbtx49xx_defconfig. Fixes: 1a3d59579b9f ("MIPS: Tidy up FPU context switching") Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: Maciej W. Rozycki Cc: linux-kernel@vger.kernel.org Cc: Manuel Lauss Patchwork: https://patchwork.linux-mips.org/patch/11167/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/r2300_switch.S | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/arch/mips/kernel/r2300_switch.S b/arch/mips/kernel/r2300_switch.S index 5087a4b72e6b..ac27ef7d4d0e 100644 --- a/arch/mips/kernel/r2300_switch.S +++ b/arch/mips/kernel/r2300_switch.S @@ -30,19 +30,9 @@ */ #define ST_OFF (_THREAD_SIZE - 32 - PT_SIZE + PT_STATUS) -/* - * FPU context is saved iff the process has used it's FPU in the current - * time slice as indicated by TIF_USEDFPU. In any case, the CU1 bit for user - * space STATUS register should be 0, so that a process *always* starts its - * userland with FPU disabled after each context switch. - * - * FPU will be enabled as soon as the process accesses FPU again, through - * do_cpu() trap. - */ - /* * task_struct *resume(task_struct *prev, task_struct *next, - * struct thread_info *next_ti, int usedfpu) + * struct thread_info *next_ti) */ LEAF(resume) mfc0 t1, CP0_STATUS @@ -50,22 +40,6 @@ LEAF(resume) cpu_save_nonscratch a0 sw ra, THREAD_REG31(a0) - beqz a3, 1f - - PTR_L t3, TASK_THREAD_INFO(a0) - - /* - * clear saved user stack CU1 bit - */ - lw t0, ST_OFF(t3) - li t1, ~ST0_CU1 - and t0, t0, t1 - sw t0, ST_OFF(t3) - - fpu_save_single a0, t0 # clobbers t0 - -1: - #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP) PTR_LA t8, __stack_chk_guard LONG_L t9, TASK_STACK_CANARY(a1) From 66803dd9198cb57a4b7ed4a6846a63ab1d59a2e0 Mon Sep 17 00:00:00 2001 From: Matt Bennett Date: Wed, 30 Sep 2015 17:40:42 +1300 Subject: [PATCH 234/240] MIPS: Octeon: Fix kernel panic on startup from memory corruption During development it was found that a number of builds would panic during the kernel init process, more specifically in 'delayed_fput()'. The panic showed the kernel trying to access a memory address of '0xb7fdc00' while traversing the 'delayed_fput_list' structure. Comparing this memory address to the value of the pointer used on builds that did not panic confirmed that the pointer on crashing builds must have been corrupted at some stage earlier in the init process. By traversing the list earlier and earlier in the code it was found that 'plat_mem_setup()' was responsible for corrupting the list. Specifically the line: memory = cvmx_bootmem_phy_alloc(mem_alloc_size, __pa_symbol(&__init_end), -1, 0x100000, CVMX_BOOTMEM_FLAG_NO_LOCKING); Which would eventually call: cvmx_bootmem_phy_set_size(new_ent_addr, cvmx_bootmem_phy_get_size (ent_addr) - (desired_min_addr - ent_addr)); Where 'new_ent_addr'=0x4800000 (the address of 'delayed_fput_list') and the second argument (size)=0xb7fdc00 (the address causing the kernel panic). The job of this part of 'plat_mem_setup()' is to allocate chunks of memory for the kernel to use. At the start of each chunk of memory the size of the chunk is written, hence the value 0xb7fdc00 is written onto memory at 0x4800000, therefore the kernel panics when it goes back to access 'delayed_fput_list' later on in the initialisation process. On builds that were not crashing it was found that the compiler had placed 'delayed_fput_list' at 0x4800008, meaning it wasn't corrupted (but something else in memory was overwritten). As can be seen in the first function call above the code begins to allocate chunks of memory beginning from the symbol '__init_end'. The MIPS linker script (vmlinux.lds.S) however defines the .bss section to begin after '__init_end'. Therefore memory within the .bss section is allocated to the kernel to use (System.map shows 'delayed_fput_list' and other kernel structures to be in .bss). To stop the kernel panic (and the .bss section being corrupted) memory should begin being allocated from the symbol '_end'. Signed-off-by: Matt Bennett Acked-by: David Daney Cc: linux-mips@linux-mips.org Cc: aleksey.makarov@auriga.com Patchwork: https://patchwork.linux-mips.org/patch/11251/ Signed-off-by: Ralf Baechle --- arch/mips/cavium-octeon/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index 89a628455bc2..bd634259eab9 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -933,7 +933,7 @@ void __init plat_mem_setup(void) while ((boot_mem_map.nr_map < BOOT_MEM_MAP_MAX) && (total < MAX_MEMORY)) { memory = cvmx_bootmem_phy_alloc(mem_alloc_size, - __pa_symbol(&__init_end), -1, + __pa_symbol(&_end), -1, 0x100000, CVMX_BOOTMEM_FLAG_NO_LOCKING); if (memory >= 0) { From 62d78461447198b49383f20301aaa15fe97dfa4f Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 2 Oct 2015 10:31:32 -0700 Subject: [PATCH 235/240] Revert "Input: synaptics - fix handling of disabling gesture mode" This reverts commit e51e38494a8ecc18650efb0c840600637891de2c: we actually do want the device to work in extended W mode, as this is the mode that allows us receiving multiple contact information. Cc: stable@vger.kernel.org --- drivers/input/mouse/synaptics.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 994ae7886156..6025eb430c0a 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -519,18 +519,14 @@ static int synaptics_set_mode(struct psmouse *psmouse) struct synaptics_data *priv = psmouse->private; priv->mode = 0; - - if (priv->absolute_mode) { + if (priv->absolute_mode) priv->mode |= SYN_BIT_ABSOLUTE_MODE; - if (SYN_CAP_EXTENDED(priv->capabilities)) - priv->mode |= SYN_BIT_W_MODE; - } - - if (!SYN_MODE_WMODE(priv->mode) && priv->disable_gesture) + if (priv->disable_gesture) priv->mode |= SYN_BIT_DISABLE_GESTURE; - if (psmouse->rate >= 80) priv->mode |= SYN_BIT_HIGH_RATE; + if (SYN_CAP_EXTENDED(priv->capabilities)) + priv->mode |= SYN_BIT_W_MODE; if (synaptics_mode_cmd(psmouse, priv->mode)) return -1; From c8415b9470727f70afce8607d4fe521789aa6c1c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 2 Oct 2015 16:44:05 +0100 Subject: [PATCH 236/240] irqchip/gic-v3-its: Silence warning when its_lpi_alloc_chunks gets inlined More agressive inlining in recent versions of GCC have uncovered a new set of warnings: drivers/irqchip/irq-gic-v3-its.c: In function its_msi_prepare: drivers/irqchip/irq-gic-v3-its.c:1148:26: warning: lpi_base may be used uninitialized in this function [-Wmaybe-uninitialized] dev->event_map.lpi_base = lpi_base; ^ drivers/irqchip/irq-gic-v3-its.c:1116:6: note: lpi_base was declared here int lpi_base; ^ drivers/irqchip/irq-gic-v3-its.c:1149:25: warning: nr_lpis may be used uninitialized in this function [-Wmaybe-uninitialized] dev->event_map.nr_lpis = nr_lpis; ^ drivers/irqchip/irq-gic-v3-its.c:1117:6: note: nr_lpis was declared here int nr_lpis; ^ The warning is fairly benign (there is no code path that could actually use uninitialized variables), but let's silence it anyway by zeroing the variables on the error path. Reported-by: Alex Shi Tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Cc: linux-arm-kernel@lists.infradead.org Cc: David Daney Cc: Jason Cooper Link: http://lkml.kernel.org/r/1443800646-8074-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-gic-v3-its.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index ac7ae2b3cb83..25ceae9f7348 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -719,6 +719,9 @@ static unsigned long *its_lpi_alloc_chunks(int nr_irqs, int *base, int *nr_ids) out: spin_unlock(&lpi_lock); + if (!bitmap) + *base = *nr_ids = 0; + return bitmap; } From 791c76d58465a248cbd1ee422c8075cb90fa615f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 2 Oct 2015 16:44:06 +0100 Subject: [PATCH 237/240] irqchip/gic-v3-its: Count additional LPIs for the aliased devices When configuring the interrupt mapping for a new device, we iterate over all the possible aliases to account for their maximum MSI allocation. This was introduced by e8137f4f5088 ("irqchip: gicv3-its: Iterate over PCI aliases to generate ITS configuration"). Turns out that the code doing that is a bit braindead, and repeatedly accounts for the same device over and over. Fix this by counting the actual alias that is passed to us by the core code. Signed-off-by: Marc Zyngier Cc: linux-arm-kernel@lists.infradead.org Cc: Alex Shi Cc: Ard Biesheuvel Cc: David Daney Cc: Jason Cooper Link: http://lkml.kernel.org/r/1443800646-8074-3-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-gic-v3-its-pci-msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c index cf351c637464..a7c8c9ffbafd 100644 --- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c +++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c @@ -62,7 +62,7 @@ static int its_get_pci_alias(struct pci_dev *pdev, u16 alias, void *data) dev_alias->dev_id = alias; if (pdev != dev_alias->pdev) - dev_alias->count += its_pci_msi_vec_count(dev_alias->pdev); + dev_alias->count += its_pci_msi_vec_count(pdev); return 0; } From 67dfae0cd72fec5cd158b6e5fb1647b7dbe0834c Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 14 Sep 2015 18:05:20 -0700 Subject: [PATCH 238/240] clocksource: Fix abs() usage w/ 64bit values This patch fixes one cases where abs() was being used with 64-bit nanosecond values, where the result may be capped at 32-bits. This potentially could cause watchdog false negatives on 32-bit systems, so this patch addresses the issue by using abs64(). Signed-off-by: John Stultz Cc: Prarit Bhargava Cc: Richard Cochran Cc: Ingo Molnar Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1442279124-7309-2-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/clocksource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 841b72f720e8..3a38775b50c2 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -217,7 +217,7 @@ static void clocksource_watchdog(unsigned long data) continue; /* Check the deviation from the watchdog clocksource. */ - if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { + if (abs64(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n", cs->name); pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", From d218af78492a36a4ae607c08fedfb59258440314 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Fri, 25 Sep 2015 08:17:42 +0100 Subject: [PATCH 239/240] MIPS: scall: Always run the seccomp syscall filters The MIPS syscall handler code used to return -ENOSYS on invalid syscalls. Whilst this is expected, it caused problems for seccomp filters because the said filters never had the change to run since the code returned -ENOSYS before triggering them. This caused problems on the chromium testsuite for filters looking for invalid syscalls. This has now changed and the seccomp filters are always run even if the syscall is invalid. We return -ENOSYS once we return from the seccomp filters. Moreover, similar codepaths have been merged in the process which simplifies somewhat the overall syscall code. Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/11236/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/scall32-o32.S | 39 ++++++++++++++-------------------- arch/mips/kernel/scall64-64.S | 38 ++++++++++++++------------------- arch/mips/kernel/scall64-n32.S | 19 +++++------------ arch/mips/kernel/scall64-o32.S | 19 +++++------------ 4 files changed, 42 insertions(+), 73 deletions(-) diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index d06e30c1e4cd..65a74e4f0f45 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -36,16 +36,8 @@ NESTED(handle_sys, PT_SIZE, sp) lw t1, PT_EPC(sp) # skip syscall on return subu v0, v0, __NR_O32_Linux # check syscall number - sltiu t0, v0, __NR_O32_Linux_syscalls + 1 addiu t1, 4 # skip to next instruction sw t1, PT_EPC(sp) - beqz t0, illegal_syscall - - sll t0, v0, 2 - la t1, sys_call_table - addu t1, t0 - lw t2, (t1) # syscall routine - beqz t2, illegal_syscall sw a3, PT_R26(sp) # save a3 for syscall restarting @@ -96,6 +88,16 @@ loads_done: li t1, _TIF_WORK_SYSCALL_ENTRY and t0, t1 bnez t0, syscall_trace_entry # -> yes +syscall_common: + sltiu t0, v0, __NR_O32_Linux_syscalls + 1 + beqz t0, illegal_syscall + + sll t0, v0, 2 + la t1, sys_call_table + addu t1, t0 + lw t2, (t1) # syscall routine + + beqz t2, illegal_syscall jalr t2 # Do The Real Thing (TM) @@ -116,7 +118,7 @@ o32_syscall_exit: syscall_trace_entry: SAVE_STATIC - move s0, t2 + move s0, v0 move a0, sp /* @@ -129,27 +131,18 @@ syscall_trace_entry: 1: jal syscall_trace_enter - bltz v0, 2f # seccomp failed? Skip syscall + bltz v0, 1f # seccomp failed? Skip syscall + + move v0, s0 # restore syscall - move t0, s0 RESTORE_STATIC lw a0, PT_R4(sp) # Restore argument registers lw a1, PT_R5(sp) lw a2, PT_R6(sp) lw a3, PT_R7(sp) - jalr t0 + j syscall_common - li t0, -EMAXERRNO - 1 # error? - sltu t0, t0, v0 - sw t0, PT_R7(sp) # set error flag - beqz t0, 1f - - lw t1, PT_R2(sp) # syscall number - negu v0 # error - sw t1, PT_R0(sp) # save it for syscall restarting -1: sw v0, PT_R2(sp) # result - -2: j syscall_exit +1: j syscall_exit /* ------------------------------------------------------------------------ */ diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 428c2cd2326c..e732981cf99f 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -39,18 +39,11 @@ NESTED(handle_sys64, PT_SIZE, sp) .set at #endif - dsubu t0, v0, __NR_64_Linux # check syscall number - sltiu t0, t0, __NR_64_Linux_syscalls + 1 #if !defined(CONFIG_MIPS32_O32) && !defined(CONFIG_MIPS32_N32) ld t1, PT_EPC(sp) # skip syscall on return daddiu t1, 4 # skip to next instruction sd t1, PT_EPC(sp) #endif - beqz t0, illegal_syscall - - dsll t0, v0, 3 # offset into table - ld t2, (sys_call_table - (__NR_64_Linux * 8))(t0) - # syscall routine sd a3, PT_R26(sp) # save a3 for syscall restarting @@ -59,6 +52,17 @@ NESTED(handle_sys64, PT_SIZE, sp) and t0, t1, t0 bnez t0, syscall_trace_entry +syscall_common: + dsubu t2, v0, __NR_64_Linux + sltiu t0, t2, __NR_64_Linux_syscalls + 1 + beqz t0, illegal_syscall + + dsll t0, t2, 3 # offset into table + dla t2, sys_call_table + daddu t0, t2, t0 + ld t2, (t0) # syscall routine + beqz t2, illegal_syscall + jalr t2 # Do The Real Thing (TM) li t0, -EMAXERRNO - 1 # error? @@ -78,14 +82,14 @@ n64_syscall_exit: syscall_trace_entry: SAVE_STATIC - move s0, t2 + move s0, v0 move a0, sp move a1, v0 jal syscall_trace_enter - bltz v0, 2f # seccomp failed? Skip syscall + bltz v0, 1f # seccomp failed? Skip syscall - move t0, s0 + move v0, s0 RESTORE_STATIC ld a0, PT_R4(sp) # Restore argument registers ld a1, PT_R5(sp) @@ -93,19 +97,9 @@ syscall_trace_entry: ld a3, PT_R7(sp) ld a4, PT_R8(sp) ld a5, PT_R9(sp) - jalr t0 + j syscall_common - li t0, -EMAXERRNO - 1 # error? - sltu t0, t0, v0 - sd t0, PT_R7(sp) # set error flag - beqz t0, 1f - - ld t1, PT_R2(sp) # syscall number - dnegu v0 # error - sd t1, PT_R0(sp) # save it for syscall restarting -1: sd v0, PT_R2(sp) # result - -2: j syscall_exit +1: j syscall_exit illegal_syscall: /* This also isn't a 64-bit syscall, throw an error. */ diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 3868cf4c83df..c79484397584 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -52,6 +52,7 @@ NESTED(handle_sysn32, PT_SIZE, sp) and t0, t1, t0 bnez t0, n32_syscall_trace_entry +syscall_common: jalr t2 # Do The Real Thing (TM) li t0, -EMAXERRNO - 1 # error? @@ -75,9 +76,9 @@ n32_syscall_trace_entry: move a1, v0 jal syscall_trace_enter - bltz v0, 2f # seccomp failed? Skip syscall + bltz v0, 1f # seccomp failed? Skip syscall - move t0, s0 + move t2, s0 RESTORE_STATIC ld a0, PT_R4(sp) # Restore argument registers ld a1, PT_R5(sp) @@ -85,19 +86,9 @@ n32_syscall_trace_entry: ld a3, PT_R7(sp) ld a4, PT_R8(sp) ld a5, PT_R9(sp) - jalr t0 + j syscall_common - li t0, -EMAXERRNO - 1 # error? - sltu t0, t0, v0 - sd t0, PT_R7(sp) # set error flag - beqz t0, 1f - - ld t1, PT_R2(sp) # syscall number - dnegu v0 # error - sd t1, PT_R0(sp) # save it for syscall restarting -1: sd v0, PT_R2(sp) # result - -2: j syscall_exit +1: j syscall_exit not_n32_scall: /* This is not an n32 compatibility syscall, pass it on to diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 6622980d82ea..6369cfd390c6 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -87,6 +87,7 @@ loads_done: and t0, t1, t0 bnez t0, trace_a_syscall +syscall_common: jalr t2 # Do The Real Thing (TM) li t0, -EMAXERRNO - 1 # error? @@ -130,9 +131,9 @@ trace_a_syscall: 1: jal syscall_trace_enter - bltz v0, 2f # seccomp failed? Skip syscall + bltz v0, 1f # seccomp failed? Skip syscall - move t0, s0 + move t2, s0 RESTORE_STATIC ld a0, PT_R4(sp) # Restore argument registers ld a1, PT_R5(sp) @@ -142,19 +143,9 @@ trace_a_syscall: ld a5, PT_R9(sp) ld a6, PT_R10(sp) ld a7, PT_R11(sp) # For indirect syscalls - jalr t0 + j syscall_common - li t0, -EMAXERRNO - 1 # error? - sltu t0, t0, v0 - sd t0, PT_R7(sp) # set error flag - beqz t0, 1f - - ld t1, PT_R2(sp) # syscall number - dnegu v0 # error - sd t1, PT_R0(sp) # save it for syscall restarting -1: sd v0, PT_R2(sp) # result - -2: j syscall_exit +1: j syscall_exit /* ------------------------------------------------------------------------ */ From 049e6dde7e57f0054fdc49102e7ef4830c698b46 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Oct 2015 16:57:17 +0100 Subject: [PATCH 240/240] Linux 4.3-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1d341eba143d..fd46821e428d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 3 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Hurr durr I'ma sheep # *DOCUMENTATION*