Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "18 patches.

  Subsystems affected by this patch series: mm (hugetlb, compaction,
  vmalloc, shmem, memblock, pagecache, kasan, and hugetlb), mailmap,
  gcov, ubsan, and MAINTAINERS"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  MAINTAINERS/.mailmap: use my @kernel.org address
  mm: hugetlb: fix missing put_page in gather_surplus_pages()
  ubsan: implement __ubsan_handle_alignment_assumption
  kasan: make addr_has_metadata() return true for valid addresses
  kasan: add explicit preconditions to kasan_report()
  mm/filemap: add missing mem_cgroup_uncharge() to __add_to_page_cache_locked()
  mailmap: add entries for Manivannan Sadhasivam
  mailmap: fix name/email for Viresh Kumar
  memblock: do not start bottom-up allocations with kernel_end
  mm: thp: fix MADV_REMOVE deadlock on shmem THP
  init/gcov: allow CONFIG_CONSTRUCTORS on UML to fix module gcov
  mm/vmalloc: separate put pages and flush VM flags
  mm, compaction: move high_pfn to the for loop scope
  mm: migrate: do not migrate HugeTLB page whose refcount is one
  mm: hugetlb: remove VM_BUG_ON_PAGE from page_huge_active
  mm: hugetlb: fix a race between isolating and freeing page
  mm: hugetlb: fix a race between freeing and dissolving the page
  mm: hugetlbfs: fix cannot migrate the fallocated HugeTLB page
This commit is contained in:
Linus Torvalds 2021-02-05 13:07:27 -08:00
commit 1e0d27fce0
18 changed files with 153 additions and 77 deletions

View File

@ -199,6 +199,8 @@ Li Yang <leoyang.li@nxp.com> <leoli@freescale.com>
Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org>
Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com>
Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
Manivannan Sadhasivam <mani@kernel.org> <manivannanece23@gmail.com>
Manivannan Sadhasivam <mani@kernel.org> <manivannan.sadhasivam@linaro.org>
Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
Marc Zyngier <maz@kernel.org> <marc.zyngier@arm.com>
Mark Brown <broonie@sirena.org.uk>
@ -244,6 +246,7 @@ Morten Welinder <welinder@anemone.rentec.com>
Morten Welinder <welinder@darter.rentec.com>
Morten Welinder <welinder@troll.com>
Mythri P K <mythripk@ti.com>
Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com>
Nguyen Anh Quynh <aquynh@gmail.com>
Nicolas Ferre <nicolas.ferre@microchip.com> <nicolas.ferre@atmel.com>
Nicolas Pitre <nico@fluxnic.net> <nicolas.pitre@linaro.org>
@ -334,6 +337,8 @@ Vinod Koul <vkoul@kernel.org> <vkoul@infradead.org>
Viresh Kumar <vireshk@kernel.org> <viresh.kumar2@arm.com>
Viresh Kumar <vireshk@kernel.org> <viresh.kumar@st.com>
Viresh Kumar <vireshk@kernel.org> <viresh.linux@gmail.com>
Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.org>
Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.com>
Vivien Didelot <vivien.didelot@gmail.com> <vivien.didelot@savoirfairelinux.com>
Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com>
Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com>

View File

@ -4304,7 +4304,7 @@ S: Maintained
F: .clang-format
CLANG/LLVM BUILD SUPPORT
M: Nathan Chancellor <natechancellor@gmail.com>
M: Nathan Chancellor <nathan@kernel.org>
M: Nick Desaulniers <ndesaulniers@google.com>
L: clang-built-linux@googlegroups.com
S: Supported

View File

@ -735,9 +735,10 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
set_page_huge_active(page);
/*
* unlock_page because locked by add_to_page_cache()
* page_put due to reference from alloc_huge_page()
* put_page() due to reference from alloc_huge_page()
*/
unlock_page(page);
put_page(page);

View File

@ -770,6 +770,8 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
}
#endif
void set_page_huge_active(struct page *page);
#else /* CONFIG_HUGETLB_PAGE */
struct hstate {};

View File

@ -333,6 +333,13 @@ static inline void *kasan_reset_tag(const void *addr)
return (void *)arch_kasan_reset_tag(addr);
}
/**
* kasan_report - print a report about a bad memory access detected by KASAN
* @addr: address of the bad access
* @size: size of the bad access
* @is_write: whether the bad access is a write or a read
* @ip: instruction pointer for the accessibility check or the bad access itself
*/
bool kasan_report(unsigned long addr, size_t size,
bool is_write, unsigned long ip);

View File

@ -24,7 +24,8 @@ struct notifier_block; /* in notifier.h */
#define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */
#define VM_NO_GUARD 0x00000040 /* don't add guard page */
#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */
#define VM_MAP_PUT_PAGES 0x00000100 /* put pages and free array in vfree */
#define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */
#define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */
/*
* VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC.
@ -37,12 +38,6 @@ struct notifier_block; /* in notifier.h */
* determine which allocations need the module shadow freed.
*/
/*
* Memory with VM_FLUSH_RESET_PERMS cannot be freed in an interrupt or with
* vfree_atomic().
*/
#define VM_FLUSH_RESET_PERMS 0x00000100 /* Reset direct map and flush TLB on unmap */
/* bits [20..32] reserved for arch specific ioremap internals */
/*

View File

@ -76,7 +76,6 @@ config CC_HAS_ASM_INLINE
config CONSTRUCTORS
bool
depends on !UML
config IRQ_WORK
bool

View File

@ -1066,7 +1066,13 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
/* Call all constructor functions linked into the kernel. */
static void __init do_ctors(void)
{
#ifdef CONFIG_CONSTRUCTORS
/*
* For UML, the constructors have already been called by the
* normal setup code as it's just a normal ELF binary, so we
* cannot do it again - but we do need CONFIG_CONSTRUCTORS
* even on UML for modules.
*/
#if defined(CONFIG_CONSTRUCTORS) && !defined(CONFIG_UML)
ctor_fn_t *fn = (ctor_fn_t *) __ctors_start;
for (; fn < (ctor_fn_t *) __ctors_end; fn++)

View File

@ -4,7 +4,7 @@ menu "GCOV-based kernel profiling"
config GCOV_KERNEL
bool "Enable gcov-based kernel profiling"
depends on DEBUG_FS
select CONSTRUCTORS if !UML
select CONSTRUCTORS
default n
help
This option enables gcov-based code profiling (e.g. for code coverage

View File

@ -427,3 +427,34 @@ void __ubsan_handle_load_invalid_value(void *_data, void *val)
ubsan_epilogue();
}
EXPORT_SYMBOL(__ubsan_handle_load_invalid_value);
void __ubsan_handle_alignment_assumption(void *_data, unsigned long ptr,
unsigned long align,
unsigned long offset);
void __ubsan_handle_alignment_assumption(void *_data, unsigned long ptr,
unsigned long align,
unsigned long offset)
{
struct alignment_assumption_data *data = _data;
unsigned long real_ptr;
if (suppress_report(&data->location))
return;
ubsan_prologue(&data->location, "alignment-assumption");
if (offset)
pr_err("assumption of %lu byte alignment (with offset of %lu byte) for pointer of type %s failed",
align, offset, data->type->type_name);
else
pr_err("assumption of %lu byte alignment for pointer of type %s failed",
align, data->type->type_name);
real_ptr = ptr - offset;
pr_err("%saddress is %lu aligned, misalignment offset is %lu bytes",
offset ? "offset " : "", BIT(real_ptr ? __ffs(real_ptr) : 0),
real_ptr & (align - 1));
ubsan_epilogue();
}
EXPORT_SYMBOL(__ubsan_handle_alignment_assumption);

View File

@ -78,6 +78,12 @@ struct invalid_value_data {
struct type_descriptor *type;
};
struct alignment_assumption_data {
struct source_location location;
struct source_location assumption_location;
struct type_descriptor *type;
};
#if defined(CONFIG_ARCH_SUPPORTS_INT128)
typedef __int128 s_max;
typedef unsigned __int128 u_max;

View File

@ -1342,7 +1342,7 @@ fast_isolate_freepages(struct compact_control *cc)
{
unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1);
unsigned int nr_scanned = 0;
unsigned long low_pfn, min_pfn, high_pfn = 0, highest = 0;
unsigned long low_pfn, min_pfn, highest = 0;
unsigned long nr_isolated = 0;
unsigned long distance;
struct page *page = NULL;
@ -1387,6 +1387,7 @@ fast_isolate_freepages(struct compact_control *cc)
struct page *freepage;
unsigned long flags;
unsigned int order_scanned = 0;
unsigned long high_pfn = 0;
if (!area->nr_free)
continue;

View File

@ -835,6 +835,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
XA_STATE(xas, &mapping->i_pages, offset);
int huge = PageHuge(page);
int error;
bool charged = false;
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageSwapBacked(page), page);
@ -848,6 +849,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
error = mem_cgroup_charge(page, current->mm, gfp);
if (error)
goto error;
charged = true;
}
gfp &= GFP_RECLAIM_MASK;
@ -896,6 +898,8 @@ noinline int __add_to_page_cache_locked(struct page *page,
if (xas_error(&xas)) {
error = xas_error(&xas);
if (charged)
mem_cgroup_uncharge(page);
goto error;
}

View File

@ -2202,7 +2202,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
{
spinlock_t *ptl;
struct mmu_notifier_range range;
bool was_locked = false;
bool do_unlock_page = false;
pmd_t _pmd;
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
@ -2218,7 +2218,6 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
VM_BUG_ON(freeze && !page);
if (page) {
VM_WARN_ON_ONCE(!PageLocked(page));
was_locked = true;
if (page != pmd_page(*pmd))
goto out;
}
@ -2227,19 +2226,29 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
if (pmd_trans_huge(*pmd)) {
if (!page) {
page = pmd_page(*pmd);
if (unlikely(!trylock_page(page))) {
get_page(page);
_pmd = *pmd;
spin_unlock(ptl);
lock_page(page);
spin_lock(ptl);
if (unlikely(!pmd_same(*pmd, _pmd))) {
unlock_page(page);
/*
* An anonymous page must be locked, to ensure that a
* concurrent reuse_swap_page() sees stable mapcount;
* but reuse_swap_page() is not used on shmem or file,
* and page lock must not be taken when zap_pmd_range()
* calls __split_huge_pmd() while i_mmap_lock is held.
*/
if (PageAnon(page)) {
if (unlikely(!trylock_page(page))) {
get_page(page);
_pmd = *pmd;
spin_unlock(ptl);
lock_page(page);
spin_lock(ptl);
if (unlikely(!pmd_same(*pmd, _pmd))) {
unlock_page(page);
put_page(page);
page = NULL;
goto repeat;
}
put_page(page);
page = NULL;
goto repeat;
}
put_page(page);
do_unlock_page = true;
}
}
if (PageMlocked(page))
@ -2249,7 +2258,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
__split_huge_pmd_locked(vma, pmd, range.start, freeze);
out:
spin_unlock(ptl);
if (!was_locked && page)
if (do_unlock_page)
unlock_page(page);
/*
* No need to double call mmu_notifier->invalidate_range() callback.

View File

@ -79,6 +79,21 @@ DEFINE_SPINLOCK(hugetlb_lock);
static int num_fault_mutexes;
struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
static inline bool PageHugeFreed(struct page *head)
{
return page_private(head + 4) == -1UL;
}
static inline void SetPageHugeFreed(struct page *head)
{
set_page_private(head + 4, -1UL);
}
static inline void ClearPageHugeFreed(struct page *head)
{
set_page_private(head + 4, 0);
}
/* Forward declaration */
static int hugetlb_acct_memory(struct hstate *h, long delta);
@ -1028,6 +1043,7 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
list_move(&page->lru, &h->hugepage_freelists[nid]);
h->free_huge_pages++;
h->free_huge_pages_node[nid]++;
SetPageHugeFreed(page);
}
static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
@ -1044,6 +1060,7 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
list_move(&page->lru, &h->hugepage_activelist);
set_page_refcounted(page);
ClearPageHugeFreed(page);
h->free_huge_pages--;
h->free_huge_pages_node[nid]--;
return page;
@ -1344,12 +1361,11 @@ struct hstate *size_to_hstate(unsigned long size)
*/
bool page_huge_active(struct page *page)
{
VM_BUG_ON_PAGE(!PageHuge(page), page);
return PageHead(page) && PagePrivate(&page[1]);
return PageHeadHuge(page) && PagePrivate(&page[1]);
}
/* never called for tail page */
static void set_page_huge_active(struct page *page)
void set_page_huge_active(struct page *page)
{
VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
SetPagePrivate(&page[1]);
@ -1505,6 +1521,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
spin_lock(&hugetlb_lock);
h->nr_huge_pages++;
h->nr_huge_pages_node[nid]++;
ClearPageHugeFreed(page);
spin_unlock(&hugetlb_lock);
}
@ -1755,6 +1772,7 @@ int dissolve_free_huge_page(struct page *page)
{
int rc = -EBUSY;
retry:
/* Not to disrupt normal path by vainly holding hugetlb_lock */
if (!PageHuge(page))
return 0;
@ -1771,6 +1789,26 @@ int dissolve_free_huge_page(struct page *page)
int nid = page_to_nid(head);
if (h->free_huge_pages - h->resv_huge_pages == 0)
goto out;
/*
* We should make sure that the page is already on the free list
* when it is dissolved.
*/
if (unlikely(!PageHugeFreed(head))) {
spin_unlock(&hugetlb_lock);
cond_resched();
/*
* Theoretically, we should return -EBUSY when we
* encounter this race. In fact, we have a chance
* to successfully dissolve the page if we do a
* retry. Because the race window is quite small.
* If we seize this opportunity, it is an optimization
* for increasing the success rate of dissolving page.
*/
goto retry;
}
/*
* Move PageHWPoison flag from head page to the raw error page,
* which makes any subpages rather than the error page reusable.
@ -2009,13 +2047,16 @@ static int gather_surplus_pages(struct hstate *h, long delta)
/* Free the needed pages to the hugetlb pool */
list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
int zeroed;
if ((--needed) < 0)
break;
/*
* This page is now managed by the hugetlb allocator and has
* no users -- drop the buddy allocator's reference.
*/
VM_BUG_ON_PAGE(!put_page_testzero(page), page);
zeroed = put_page_testzero(page);
VM_BUG_ON_PAGE(!zeroed, page);
enqueue_huge_page(h, page);
}
free:
@ -5555,9 +5596,9 @@ bool isolate_huge_page(struct page *page, struct list_head *list)
{
bool ret = true;
VM_BUG_ON_PAGE(!PageHead(page), page);
spin_lock(&hugetlb_lock);
if (!page_huge_active(page) || !get_page_unless_zero(page)) {
if (!PageHeadHuge(page) || !page_huge_active(page) ||
!get_page_unless_zero(page)) {
ret = false;
goto unlock;
}

View File

@ -209,7 +209,7 @@ bool check_memory_region(unsigned long addr, size_t size, bool write,
static inline bool addr_has_metadata(const void *addr)
{
return true;
return (is_vmalloc_addr(addr) || virt_addr_valid(addr));
}
#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */

View File

@ -275,14 +275,6 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
*
* Find @size free area aligned to @align in the specified range and node.
*
* When allocation direction is bottom-up, the @start should be greater
* than the end of the kernel image. Otherwise, it will be trimmed. The
* reason is that we want the bottom-up allocation just near the kernel
* image so it is highly likely that the allocated memory and the kernel
* will reside in the same node.
*
* If bottom-up allocation failed, will try to allocate memory top-down.
*
* Return:
* Found address on success, 0 on failure.
*/
@ -291,8 +283,6 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
phys_addr_t end, int nid,
enum memblock_flags flags)
{
phys_addr_t kernel_end, ret;
/* pump up @end */
if (end == MEMBLOCK_ALLOC_ACCESSIBLE ||
end == MEMBLOCK_ALLOC_KASAN)
@ -301,40 +291,13 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
/* avoid allocating the first page */
start = max_t(phys_addr_t, start, PAGE_SIZE);
end = max(start, end);
kernel_end = __pa_symbol(_end);
/*
* try bottom-up allocation only when bottom-up mode
* is set and @end is above the kernel image.
*/
if (memblock_bottom_up() && end > kernel_end) {
phys_addr_t bottom_up_start;
/* make sure we will allocate above the kernel */
bottom_up_start = max(start, kernel_end);
/* ok, try bottom-up allocation first */
ret = __memblock_find_range_bottom_up(bottom_up_start, end,
size, align, nid, flags);
if (ret)
return ret;
/*
* we always limit bottom-up allocation above the kernel,
* but top-down allocation doesn't have the limit, so
* retrying top-down allocation may succeed when bottom-up
* allocation failed.
*
* bottom-up allocation is expected to be fail very rarely,
* so we use WARN_ONCE() here to see the stack trace if
* fail happens.
*/
WARN_ONCE(IS_ENABLED(CONFIG_MEMORY_HOTREMOVE),
"memblock: bottom-up allocation failed, memory hotremove may be affected\n");
}
return __memblock_find_range_top_down(start, end, size, align, nid,
flags);
if (memblock_bottom_up())
return __memblock_find_range_bottom_up(start, end, size, align,
nid, flags);
else
return __memblock_find_range_top_down(start, end, size, align,
nid, flags);
}
/**

View File

@ -1280,6 +1280,12 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
return -ENOSYS;
}
if (page_count(hpage) == 1) {
/* page was freed from under us. So we are done. */
putback_active_hugepage(hpage);
return MIGRATEPAGE_SUCCESS;
}
new_hpage = get_new_page(hpage, private);
if (!new_hpage)
return -ENOMEM;