Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton: - various misc bits - most of MM (quite a lot of MM material is awaiting the merge of linux-next dependencies) - kasan - printk updates - procfs updates - MAINTAINERS - /lib updates - checkpatch updates * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (123 commits) init: reduce rootwait polling interval time to 5ms binfmt_elf: use vmalloc() for allocation of vma_filesz checkpatch: don't emit unified-diff error for rename-only patches checkpatch: don't check c99 types like uint8_t under tools checkpatch: avoid multiple line dereferences checkpatch: don't check .pl files, improve absolute path commit log test scripts/checkpatch.pl: fix spelling checkpatch: don't try to get maintained status when --no-tree is given lib/ida: document locking requirements a bit better lib/rbtree.c: fix typo in comment of ____rb_erase_color lib/Kconfig.debug: make CONFIG_STRICT_DEVMEM depend on CONFIG_DEVMEM MAINTAINERS: add drm and drm/i915 irc channels MAINTAINERS: add "C:" for URI for chat where developers hang out MAINTAINERS: add drm and drm/i915 bug filing info MAINTAINERS: add "B:" for URI where to file bugs get_maintainer: look for arbitrary letter prefixes in sections printk: add Kconfig option to set default console loglevel printk/sound: handle more message headers printk/btrfs: handle more message headers printk/kdb: handle more message headers ...
This commit is contained in:
commit
e34bac726d
|
@ -974,6 +974,13 @@ compatibility.
|
|||
4Gb. Some vendors prefer splitting those ranges into smaller
|
||||
segments, but the kernel doesn't care.
|
||||
|
||||
Additional properties:
|
||||
|
||||
- hotpluggable : The presence of this property provides an explicit
|
||||
hint to the operating system that this memory may potentially be
|
||||
removed later. The kernel can take this into consideration when
|
||||
doing nonmovable allocations and when laying out memory zones.
|
||||
|
||||
e) The /chosen node
|
||||
|
||||
This node is a bit "special". Normally, that's where Open Firmware
|
||||
|
|
|
@ -191,6 +191,7 @@ read the file /proc/PID/status:
|
|||
CapPrm: 0000000000000000
|
||||
CapEff: 0000000000000000
|
||||
CapBnd: ffffffffffffffff
|
||||
NoNewPrivs: 0
|
||||
Seccomp: 0
|
||||
voluntary_ctxt_switches: 0
|
||||
nonvoluntary_ctxt_switches: 1
|
||||
|
@ -262,6 +263,7 @@ Table 1-2: Contents of the status files (as of 4.1)
|
|||
CapPrm bitmap of permitted capabilities
|
||||
CapEff bitmap of effective capabilities
|
||||
CapBnd bitmap of capabilities bounding set
|
||||
NoNewPrivs no_new_privs, like prctl(PR_GET_NO_NEW_PRIV, ...)
|
||||
Seccomp seccomp mode, like prctl(PR_GET_SECCOMP, ...)
|
||||
Cpus_allowed mask of CPUs on which this process may run
|
||||
Cpus_allowed_list Same as previous, but in "list format"
|
||||
|
|
|
@ -2397,7 +2397,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
that the amount of memory usable for all allocations
|
||||
is not too small.
|
||||
|
||||
movable_node [KNL,X86] Boot-time switch to enable the effects
|
||||
movable_node [KNL] Boot-time switch to enable the effects
|
||||
of CONFIG_MOVABLE_NODE=y. See mm/Kconfig for details.
|
||||
|
||||
MTD_Partition= [MTD]
|
||||
|
|
|
@ -136,6 +136,11 @@ or enable it back by writing 1:
|
|||
echo 0 >/sys/kernel/mm/transparent_hugepage/use_zero_page
|
||||
echo 1 >/sys/kernel/mm/transparent_hugepage/use_zero_page
|
||||
|
||||
Some userspace (such as a test program, or an optimized memory allocation
|
||||
library) may want to know the size (in bytes) of a transparent hugepage:
|
||||
|
||||
cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
|
||||
|
||||
khugepaged will be automatically started when
|
||||
transparent_hugepage/enabled is set to "always" or "madvise, and it'll
|
||||
be automatically shutdown if it's set to "never".
|
||||
|
|
|
@ -74,6 +74,10 @@ Descriptions of section entries:
|
|||
These reviewers should be CCed on patches.
|
||||
L: Mailing list that is relevant to this area
|
||||
W: Web-page with status/info
|
||||
B: URI for where to file bugs. A web-page with detailed bug
|
||||
filing info, a direct bug tracker link, or a mailto: URI.
|
||||
C: URI for chat protocol, server and channel where developers
|
||||
usually hang out, for example irc://server/channel.
|
||||
Q: Patchwork web based patch tracking system site
|
||||
T: SCM tree type and location.
|
||||
Type is one of: git, hg, quilt, stgit, topgit
|
||||
|
@ -4024,6 +4028,8 @@ DRM DRIVERS
|
|||
M: David Airlie <airlied@linux.ie>
|
||||
L: dri-devel@lists.freedesktop.org
|
||||
T: git git://people.freedesktop.org/~airlied/linux
|
||||
B: https://bugs.freedesktop.org/
|
||||
C: irc://chat.freenode.net/dri-devel
|
||||
S: Maintained
|
||||
F: drivers/gpu/drm/
|
||||
F: drivers/gpu/vga/
|
||||
|
@ -4076,6 +4082,8 @@ M: Jani Nikula <jani.nikula@linux.intel.com>
|
|||
L: intel-gfx@lists.freedesktop.org
|
||||
L: dri-devel@lists.freedesktop.org
|
||||
W: https://01.org/linuxgraphics/
|
||||
B: https://01.org/linuxgraphics/documentation/how-report-bugs
|
||||
C: irc://chat.freenode.net/intel-gfx
|
||||
Q: http://patchwork.freedesktop.org/project/intel-gfx/
|
||||
T: git git://anongit.freedesktop.org/drm-intel
|
||||
S: Supported
|
||||
|
|
|
@ -186,6 +186,8 @@ tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr)
|
|||
tlb_add_flush(tlb, addr);
|
||||
}
|
||||
|
||||
#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
|
||||
tlb_remove_tlb_entry(tlb, ptep, address)
|
||||
/*
|
||||
* In the case of tlb vma handling, we can optimise these away in the
|
||||
* case where we're doing a full MM flush. When we're doing a munmap,
|
||||
|
@ -211,18 +213,17 @@ tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
|
|||
|
||||
static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
|
||||
{
|
||||
tlb->pages[tlb->nr++] = page;
|
||||
VM_WARN_ON(tlb->nr > tlb->max);
|
||||
if (tlb->nr == tlb->max)
|
||||
return true;
|
||||
tlb->pages[tlb->nr++] = page;
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
|
||||
{
|
||||
if (__tlb_remove_page(tlb, page)) {
|
||||
if (__tlb_remove_page(tlb, page))
|
||||
tlb_flush_mmu(tlb);
|
||||
__tlb_remove_page(tlb, page);
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
|
||||
|
@ -231,12 +232,6 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
|
|||
return __tlb_remove_page(tlb, page);
|
||||
}
|
||||
|
||||
static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb,
|
||||
struct page *page)
|
||||
{
|
||||
return __tlb_remove_page(tlb, page);
|
||||
}
|
||||
|
||||
static inline void tlb_remove_page_size(struct mmu_gather *tlb,
|
||||
struct page *page, int page_size)
|
||||
{
|
||||
|
@ -284,5 +279,11 @@ tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr
|
|||
|
||||
#define tlb_migrate_finish(mm) do { } while (0)
|
||||
|
||||
#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
|
||||
static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
|
||||
unsigned int page_size)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_MMU */
|
||||
#endif
|
||||
|
|
|
@ -207,15 +207,15 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
|
|||
*/
|
||||
static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
|
||||
{
|
||||
if (tlb->nr == tlb->max)
|
||||
return true;
|
||||
|
||||
tlb->need_flush = 1;
|
||||
|
||||
if (!tlb->nr && tlb->pages == tlb->local)
|
||||
__tlb_alloc_page(tlb);
|
||||
|
||||
tlb->pages[tlb->nr++] = page;
|
||||
VM_WARN_ON(tlb->nr > tlb->max);
|
||||
if (tlb->nr == tlb->max)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -236,10 +236,8 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
|
|||
|
||||
static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
|
||||
{
|
||||
if (__tlb_remove_page(tlb, page)) {
|
||||
if (__tlb_remove_page(tlb, page))
|
||||
tlb_flush_mmu(tlb);
|
||||
__tlb_remove_page(tlb, page);
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
|
||||
|
@ -248,12 +246,6 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
|
|||
return __tlb_remove_page(tlb, page);
|
||||
}
|
||||
|
||||
static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb,
|
||||
struct page *page)
|
||||
{
|
||||
return __tlb_remove_page(tlb, page);
|
||||
}
|
||||
|
||||
static inline void tlb_remove_page_size(struct mmu_gather *tlb,
|
||||
struct page *page, int page_size)
|
||||
{
|
||||
|
@ -283,6 +275,15 @@ do { \
|
|||
__tlb_remove_tlb_entry(tlb, ptep, addr); \
|
||||
} while (0)
|
||||
|
||||
#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
|
||||
tlb_remove_tlb_entry(tlb, ptep, address)
|
||||
|
||||
#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
|
||||
static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
|
||||
unsigned int page_size)
|
||||
{
|
||||
}
|
||||
|
||||
#define pte_free_tlb(tlb, ptep, address) \
|
||||
do { \
|
||||
tlb->need_flush = 1; \
|
||||
|
|
|
@ -34,7 +34,7 @@ config NO_IOPORT_MAP
|
|||
def_bool y
|
||||
|
||||
config NO_DMA
|
||||
def_bool y
|
||||
def_bool n
|
||||
|
||||
config HZ
|
||||
int
|
||||
|
|
|
@ -3,5 +3,9 @@
|
|||
*
|
||||
* This file is released under the GPLv2
|
||||
*/
|
||||
#include <asm-generic/device.h>
|
||||
struct dev_archdata {
|
||||
struct dma_map_ops *dma_ops;
|
||||
};
|
||||
|
||||
struct pdev_archdata {
|
||||
};
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
#ifndef _ASM_M32R_DMA_MAPPING_H
|
||||
#define _ASM_M32R_DMA_MAPPING_H
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/dma-debug.h>
|
||||
#include <linux/io.h>
|
||||
|
||||
#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
|
||||
|
||||
static inline struct dma_map_ops *get_dma_ops(struct device *dev)
|
||||
{
|
||||
if (dev && dev->archdata.dma_ops)
|
||||
return dev->archdata.dma_ops;
|
||||
return &dma_noop_ops;
|
||||
}
|
||||
|
||||
static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
|
||||
enum dma_data_direction direction)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
|
||||
{
|
||||
if (!dev->dma_mask)
|
||||
return false;
|
||||
return addr + size - 1 <= *dev->dma_mask;
|
||||
}
|
||||
|
||||
#endif /* _ASM_M32R_DMA_MAPPING_H */
|
|
@ -201,6 +201,7 @@ static struct irq_chip m32700ut_lanpld_irq_type =
|
|||
#define lcdpldirq2port(x) (unsigned long)((int)M32700UT_LCD_ICUCR1 + \
|
||||
(((x) - 1) * sizeof(unsigned short)))
|
||||
|
||||
#ifdef CONFIG_USB
|
||||
static pld_icu_data_t lcdpld_icu_data[M32700UT_NUM_LCD_PLD_IRQ];
|
||||
|
||||
static void disable_m32700ut_lcdpld_irq(unsigned int irq)
|
||||
|
@ -253,6 +254,7 @@ static struct irq_chip m32700ut_lcdpld_irq_type =
|
|||
.irq_mask = mask_m32700ut_lcdpld,
|
||||
.irq_unmask = unmask_m32700ut_lcdpld,
|
||||
};
|
||||
#endif
|
||||
|
||||
void __init init_IRQ(void)
|
||||
{
|
||||
|
|
|
@ -1009,7 +1009,8 @@ static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma,
|
|||
#define pmd_move_must_withdraw pmd_move_must_withdraw
|
||||
struct spinlock;
|
||||
static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
|
||||
struct spinlock *old_pmd_ptl)
|
||||
struct spinlock *old_pmd_ptl,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
if (radix_enabled())
|
||||
return false;
|
||||
|
@ -1020,6 +1021,16 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
|
|||
*/
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
#define arch_needs_pgtable_deposit arch_needs_pgtable_deposit
|
||||
static inline bool arch_needs_pgtable_deposit(void)
|
||||
{
|
||||
if (radix_enabled())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#define tlb_start_vma(tlb, vma) do { } while (0)
|
||||
#define tlb_end_vma(tlb, vma) do { } while (0)
|
||||
#define __tlb_remove_tlb_entry __tlb_remove_tlb_entry
|
||||
#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
|
||||
|
||||
extern void tlb_flush(struct mmu_gather *tlb);
|
||||
|
||||
|
@ -46,6 +47,21 @@ static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
|
|||
#endif
|
||||
}
|
||||
|
||||
static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
|
||||
unsigned int page_size)
|
||||
{
|
||||
if (!tlb->page_size)
|
||||
tlb->page_size = page_size;
|
||||
else if (tlb->page_size != page_size) {
|
||||
tlb_flush_mmu(tlb);
|
||||
/*
|
||||
* update the page size after flush for the new
|
||||
* mmu_gather.
|
||||
*/
|
||||
tlb->page_size = page_size;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static inline int mm_is_core_local(struct mm_struct *mm)
|
||||
{
|
||||
|
|
|
@ -1085,7 +1085,7 @@ static int hot_add_node_scn_to_nid(unsigned long scn_addr)
|
|||
int hot_add_scn_to_nid(unsigned long scn_addr)
|
||||
{
|
||||
struct device_node *memory = NULL;
|
||||
int nid, found = 0;
|
||||
int nid;
|
||||
|
||||
if (!numa_enabled || (min_common_depth < 0))
|
||||
return first_online_node;
|
||||
|
@ -1101,17 +1101,6 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
|
|||
if (nid < 0 || !node_online(nid))
|
||||
nid = first_online_node;
|
||||
|
||||
if (NODE_DATA(nid)->node_spanned_pages)
|
||||
return nid;
|
||||
|
||||
for_each_online_node(nid) {
|
||||
if (NODE_DATA(nid)->node_spanned_pages) {
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
BUG_ON(!found);
|
||||
return nid;
|
||||
}
|
||||
|
||||
|
|
|
@ -104,12 +104,6 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
|
|||
return __tlb_remove_page(tlb, page);
|
||||
}
|
||||
|
||||
static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb,
|
||||
struct page *page)
|
||||
{
|
||||
return __tlb_remove_page(tlb, page);
|
||||
}
|
||||
|
||||
static inline void tlb_remove_page_size(struct mmu_gather *tlb,
|
||||
struct page *page, int page_size)
|
||||
{
|
||||
|
@ -162,5 +156,13 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
|
|||
#define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0)
|
||||
#define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr) do { } while (0)
|
||||
#define tlb_migrate_finish(mm) do { } while (0)
|
||||
#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
|
||||
tlb_remove_tlb_entry(tlb, ptep, address)
|
||||
|
||||
#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
|
||||
static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
|
||||
unsigned int page_size)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* _S390_TLB_H */
|
||||
|
|
|
@ -1015,7 +1015,7 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
|
|||
if (slot) {
|
||||
rmap->next = radix_tree_deref_slot_protected(slot,
|
||||
&sg->guest_table_lock);
|
||||
radix_tree_replace_slot(slot, rmap);
|
||||
radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap);
|
||||
} else {
|
||||
rmap->next = NULL;
|
||||
radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT,
|
||||
|
|
|
@ -65,6 +65,9 @@ tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long address)
|
|||
tlb->end = address + PAGE_SIZE;
|
||||
}
|
||||
|
||||
#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
|
||||
tlb_remove_tlb_entry(tlb, ptep, address)
|
||||
|
||||
/*
|
||||
* In the case of tlb vma handling, we can optimise these away in the
|
||||
* case where we're doing a full MM flush. When we're doing a munmap,
|
||||
|
@ -115,18 +118,18 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
|
|||
return __tlb_remove_page(tlb, page);
|
||||
}
|
||||
|
||||
static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb,
|
||||
struct page *page)
|
||||
{
|
||||
return __tlb_remove_page(tlb, page);
|
||||
}
|
||||
|
||||
static inline void tlb_remove_page_size(struct mmu_gather *tlb,
|
||||
struct page *page, int page_size)
|
||||
{
|
||||
return tlb_remove_page(tlb, page);
|
||||
}
|
||||
|
||||
#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
|
||||
static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
|
||||
unsigned int page_size)
|
||||
{
|
||||
}
|
||||
|
||||
#define pte_free_tlb(tlb, ptep, addr) pte_free((tlb)->mm, ptep)
|
||||
#define pmd_free_tlb(tlb, pmdp, addr) pmd_free((tlb)->mm, pmdp)
|
||||
#define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp)
|
||||
|
|
|
@ -116,12 +116,6 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
|
|||
return __tlb_remove_page(tlb, page);
|
||||
}
|
||||
|
||||
static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb,
|
||||
struct page *page)
|
||||
{
|
||||
return __tlb_remove_page(tlb, page);
|
||||
}
|
||||
|
||||
static inline void tlb_remove_page_size(struct mmu_gather *tlb,
|
||||
struct page *page, int page_size)
|
||||
{
|
||||
|
@ -141,6 +135,15 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb,
|
|||
__tlb_remove_tlb_entry(tlb, ptep, address); \
|
||||
} while (0)
|
||||
|
||||
#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
|
||||
tlb_remove_tlb_entry(tlb, ptep, address)
|
||||
|
||||
#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
|
||||
static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
|
||||
unsigned int page_size)
|
||||
{
|
||||
}
|
||||
|
||||
#define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr)
|
||||
|
||||
#define pud_free_tlb(tlb, pudp, addr) __pud_free_tlb(tlb, pudp, addr)
|
||||
|
|
|
@ -93,7 +93,7 @@ static void free_ldt_struct(struct ldt_struct *ldt)
|
|||
|
||||
paravirt_free_ldt(ldt->entries, ldt->size);
|
||||
if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
|
||||
vfree(ldt->entries);
|
||||
vfree_atomic(ldt->entries);
|
||||
else
|
||||
free_page((unsigned long)ldt->entries);
|
||||
kfree(ldt);
|
||||
|
|
|
@ -985,6 +985,30 @@ void __init setup_arch(char **cmdline_p)
|
|||
|
||||
parse_early_param();
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
/*
|
||||
* Memory used by the kernel cannot be hot-removed because Linux
|
||||
* cannot migrate the kernel pages. When memory hotplug is
|
||||
* enabled, we should prevent memblock from allocating memory
|
||||
* for the kernel.
|
||||
*
|
||||
* ACPI SRAT records all hotpluggable memory ranges. But before
|
||||
* SRAT is parsed, we don't know about it.
|
||||
*
|
||||
* The kernel image is loaded into memory at very early time. We
|
||||
* cannot prevent this anyway. So on NUMA system, we set any
|
||||
* node the kernel resides in as un-hotpluggable.
|
||||
*
|
||||
* Since on modern servers, one node could have double-digit
|
||||
* gigabytes memory, we can assume the memory around the kernel
|
||||
* image is also un-hotpluggable. So before SRAT is parsed, just
|
||||
* allocate memory near the kernel image to try the best to keep
|
||||
* the kernel away from hotpluggable memory.
|
||||
*/
|
||||
if (movable_node_is_enabled())
|
||||
memblock_set_bottom_up(true);
|
||||
#endif
|
||||
|
||||
x86_report_nx();
|
||||
|
||||
/* after early param, so could get panic from serial */
|
||||
|
|
|
@ -249,6 +249,7 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
|
|||
max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors);
|
||||
max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS);
|
||||
limits->max_sectors = max_sectors;
|
||||
q->backing_dev_info.io_pages = max_sectors >> (PAGE_SHIFT - 9);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_max_hw_sectors);
|
||||
|
||||
|
|
|
@ -212,6 +212,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
|
|||
|
||||
spin_lock_irq(q->queue_lock);
|
||||
q->limits.max_sectors = max_sectors_kb << 1;
|
||||
q->backing_dev_info.io_pages = max_sectors_kb >> (PAGE_SHIFT - 10);
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
|
||||
return ret;
|
||||
|
|
|
@ -1015,6 +1015,7 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
|
|||
const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
|
||||
const __be32 *reg, *endp;
|
||||
int l;
|
||||
bool hotpluggable;
|
||||
|
||||
/* We are scanning "memory" nodes only */
|
||||
if (type == NULL) {
|
||||
|
@ -1034,6 +1035,7 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
|
|||
return 0;
|
||||
|
||||
endp = reg + (l / sizeof(__be32));
|
||||
hotpluggable = of_get_flat_dt_prop(node, "hotpluggable", NULL);
|
||||
|
||||
pr_debug("memory scan node %s, reg size %d,\n", uname, l);
|
||||
|
||||
|
@ -1049,6 +1051,13 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
|
|||
(unsigned long long)size);
|
||||
|
||||
early_init_dt_add_memory_arch(base, size);
|
||||
|
||||
if (!hotpluggable)
|
||||
continue;
|
||||
|
||||
if (early_init_dt_mark_hotplug_memory_arch(base, size))
|
||||
pr_warn("failed to mark hotplug range 0x%llx - 0x%llx\n",
|
||||
base, base + size);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1146,6 +1155,11 @@ void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
|
|||
memblock_add(base, size);
|
||||
}
|
||||
|
||||
int __init __weak early_init_dt_mark_hotplug_memory_arch(u64 base, u64 size)
|
||||
{
|
||||
return memblock_mark_hotplug(base, size);
|
||||
}
|
||||
|
||||
int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
|
||||
phys_addr_t size, bool nomap)
|
||||
{
|
||||
|
@ -1168,6 +1182,11 @@ void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
|
|||
WARN_ON(1);
|
||||
}
|
||||
|
||||
int __init __weak early_init_dt_mark_hotplug_memory_arch(u64 base, u64 size)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
|
||||
phys_addr_t size, bool nomap)
|
||||
{
|
||||
|
|
|
@ -296,10 +296,11 @@ static int __init is_alive(u_short sock)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void add_pcc_socket(ulong base, int irq, ulong mapaddr,
|
||||
unsigned int ioaddr)
|
||||
static int add_pcc_socket(ulong base, int irq, ulong mapaddr,
|
||||
unsigned int ioaddr)
|
||||
{
|
||||
pcc_socket_t *t = &socket[pcc_sockets];
|
||||
int err;
|
||||
|
||||
/* add sockets */
|
||||
t->ioaddr = ioaddr;
|
||||
|
@ -328,11 +329,16 @@ static void add_pcc_socket(ulong base, int irq, ulong mapaddr,
|
|||
t->socket.irq_mask = 0;
|
||||
t->socket.pci_irq = 2 + pcc_sockets; /* XXX */
|
||||
|
||||
request_irq(irq, pcc_interrupt, 0, "m32r-pcc", pcc_interrupt);
|
||||
err = request_irq(irq, pcc_interrupt, 0, "m32r-pcc", pcc_interrupt);
|
||||
if (err) {
|
||||
if (t->base > 0)
|
||||
release_region(t->base, 0x20);
|
||||
return err;
|
||||
}
|
||||
|
||||
pcc_sockets++;
|
||||
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -683,26 +689,29 @@ static int __init init_m32r_pcc(void)
|
|||
return ret;
|
||||
|
||||
ret = platform_device_register(&pcc_device);
|
||||
if (ret){
|
||||
platform_driver_unregister(&pcc_driver);
|
||||
return ret;
|
||||
}
|
||||
if (ret)
|
||||
goto unreg_driv;
|
||||
|
||||
printk(KERN_INFO "m32r PCC probe:\n");
|
||||
|
||||
pcc_sockets = 0;
|
||||
|
||||
add_pcc_socket(M32R_PCC0_BASE, PCC0_IRQ, M32R_PCC0_MAPBASE, 0x1000);
|
||||
ret = add_pcc_socket(M32R_PCC0_BASE, PCC0_IRQ, M32R_PCC0_MAPBASE,
|
||||
0x1000);
|
||||
if (ret)
|
||||
goto unreg_dev;
|
||||
|
||||
#ifdef CONFIG_M32RPCC_SLOT2
|
||||
add_pcc_socket(M32R_PCC1_BASE, PCC1_IRQ, M32R_PCC1_MAPBASE, 0x2000);
|
||||
ret = add_pcc_socket(M32R_PCC1_BASE, PCC1_IRQ, M32R_PCC1_MAPBASE,
|
||||
0x2000);
|
||||
if (ret)
|
||||
goto unreg_dev;
|
||||
#endif
|
||||
|
||||
if (pcc_sockets == 0) {
|
||||
printk("socket is not found.\n");
|
||||
platform_device_unregister(&pcc_device);
|
||||
platform_driver_unregister(&pcc_driver);
|
||||
return -ENODEV;
|
||||
ret = -ENODEV;
|
||||
goto unreg_dev;
|
||||
}
|
||||
|
||||
/* Set up interrupt handler(s) */
|
||||
|
@ -728,6 +737,12 @@ static int __init init_m32r_pcc(void)
|
|||
}
|
||||
|
||||
return 0;
|
||||
|
||||
unreg_dev:
|
||||
platform_device_unregister(&pcc_device);
|
||||
unreg_driv:
|
||||
platform_driver_unregister(&pcc_driver);
|
||||
return ret;
|
||||
} /* init_m32r_pcc */
|
||||
|
||||
static void __exit exit_m32r_pcc(void)
|
||||
|
|
|
@ -254,7 +254,7 @@ static void __init intc_subgroup_map(struct intc_desc_int *d)
|
|||
|
||||
radix_tree_tag_clear(&d->tree, entry->enum_id,
|
||||
INTC_TAG_VIRQ_NEEDS_ALLOC);
|
||||
radix_tree_replace_slot((void **)entries[i],
|
||||
radix_tree_replace_slot(&d->tree, (void **)entries[i],
|
||||
&intc_irq_xlate[irq]);
|
||||
}
|
||||
|
||||
|
|
|
@ -2204,7 +2204,9 @@ static int elf_core_dump(struct coredump_params *cprm)
|
|||
|
||||
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
|
||||
|
||||
vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
|
||||
if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
|
||||
goto end_coredump;
|
||||
vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz));
|
||||
if (!vma_filesz)
|
||||
goto end_coredump;
|
||||
|
||||
|
@ -2311,7 +2313,7 @@ static int elf_core_dump(struct coredump_params *cprm)
|
|||
cleanup:
|
||||
free_note_info(&info);
|
||||
kfree(shdr4extnum);
|
||||
kfree(vma_filesz);
|
||||
vfree(vma_filesz);
|
||||
kfree(phdr4note);
|
||||
kfree(elf);
|
||||
out:
|
||||
|
|
|
@ -202,27 +202,31 @@ static struct ratelimit_state printk_limits[] = {
|
|||
void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
|
||||
{
|
||||
struct super_block *sb = fs_info->sb;
|
||||
char lvl[4];
|
||||
char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1];
|
||||
struct va_format vaf;
|
||||
va_list args;
|
||||
const char *type = logtypes[4];
|
||||
const char *type = NULL;
|
||||
int kern_level;
|
||||
struct ratelimit_state *ratelimit;
|
||||
|
||||
va_start(args, fmt);
|
||||
|
||||
kern_level = printk_get_level(fmt);
|
||||
if (kern_level) {
|
||||
while ((kern_level = printk_get_level(fmt)) != 0) {
|
||||
size_t size = printk_skip_level(fmt) - fmt;
|
||||
memcpy(lvl, fmt, size);
|
||||
lvl[size] = '\0';
|
||||
|
||||
if (kern_level >= '0' && kern_level <= '7') {
|
||||
memcpy(lvl, fmt, size);
|
||||
lvl[size] = '\0';
|
||||
type = logtypes[kern_level - '0'];
|
||||
ratelimit = &printk_limits[kern_level - '0'];
|
||||
}
|
||||
fmt += size;
|
||||
type = logtypes[kern_level - '0'];
|
||||
ratelimit = &printk_limits[kern_level - '0'];
|
||||
} else {
|
||||
}
|
||||
|
||||
if (!type) {
|
||||
*lvl = '\0';
|
||||
/* Default to debug output */
|
||||
ratelimit = &printk_limits[7];
|
||||
type = logtypes[4];
|
||||
ratelimit = &printk_limits[4];
|
||||
}
|
||||
|
||||
vaf.fmt = fmt;
|
||||
|
|
10
fs/dax.c
10
fs/dax.c
|
@ -342,7 +342,7 @@ static inline void *lock_slot(struct address_space *mapping, void **slot)
|
|||
radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
|
||||
|
||||
entry |= RADIX_DAX_ENTRY_LOCK;
|
||||
radix_tree_replace_slot(slot, (void *)entry);
|
||||
radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
|
||||
return (void *)entry;
|
||||
}
|
||||
|
||||
|
@ -356,7 +356,7 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
|
|||
radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
|
||||
|
||||
entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
|
||||
radix_tree_replace_slot(slot, (void *)entry);
|
||||
radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
|
||||
return (void *)entry;
|
||||
}
|
||||
|
||||
|
@ -643,12 +643,14 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
|
|||
}
|
||||
mapping->nrexceptional++;
|
||||
} else {
|
||||
struct radix_tree_node *node;
|
||||
void **slot;
|
||||
void *ret;
|
||||
|
||||
ret = __radix_tree_lookup(page_tree, index, NULL, &slot);
|
||||
ret = __radix_tree_lookup(page_tree, index, &node, &slot);
|
||||
WARN_ON_ONCE(ret != entry);
|
||||
radix_tree_replace_slot(slot, new_entry);
|
||||
__radix_tree_replace(page_tree, node, slot,
|
||||
new_entry, NULL, NULL);
|
||||
}
|
||||
if (vmf->flags & FAULT_FLAG_WRITE)
|
||||
radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
|
||||
|
|
|
@ -1769,15 +1769,13 @@ static long wb_writeback(struct bdi_writeback *wb,
|
|||
* become available for writeback. Otherwise
|
||||
* we'll just busyloop.
|
||||
*/
|
||||
if (!list_empty(&wb->b_more_io)) {
|
||||
trace_writeback_wait(wb, work);
|
||||
inode = wb_inode(wb->b_more_io.prev);
|
||||
spin_lock(&inode->i_lock);
|
||||
spin_unlock(&wb->list_lock);
|
||||
/* This function drops i_lock... */
|
||||
inode_sleep_on_writeback(inode);
|
||||
spin_lock(&wb->list_lock);
|
||||
}
|
||||
trace_writeback_wait(wb, work);
|
||||
inode = wb_inode(wb->b_more_io.prev);
|
||||
spin_lock(&inode->i_lock);
|
||||
spin_unlock(&wb->list_lock);
|
||||
/* This function drops i_lock... */
|
||||
inode_sleep_on_writeback(inode);
|
||||
spin_lock(&wb->list_lock);
|
||||
}
|
||||
spin_unlock(&wb->list_lock);
|
||||
blk_finish_plug(&plug);
|
||||
|
|
|
@ -1950,8 +1950,7 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,
|
|||
}
|
||||
|
||||
int ocfs2_write_end_nolock(struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned copied,
|
||||
struct page *page, void *fsdata)
|
||||
loff_t pos, unsigned len, unsigned copied, void *fsdata)
|
||||
{
|
||||
int i, ret;
|
||||
unsigned from, to, start = pos & (PAGE_SIZE - 1);
|
||||
|
@ -2064,7 +2063,7 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping,
|
|||
int ret;
|
||||
struct inode *inode = mapping->host;
|
||||
|
||||
ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata);
|
||||
ret = ocfs2_write_end_nolock(mapping, pos, len, copied, fsdata);
|
||||
|
||||
up_write(&OCFS2_I(inode)->ip_alloc_sem);
|
||||
ocfs2_inode_unlock(inode, 1);
|
||||
|
@ -2241,7 +2240,7 @@ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock,
|
|||
dwc->dw_zero_count++;
|
||||
}
|
||||
|
||||
ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, NULL, wc);
|
||||
ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, wc);
|
||||
BUG_ON(ret != len);
|
||||
ret = 0;
|
||||
unlock:
|
||||
|
|
|
@ -44,8 +44,7 @@ int walk_page_buffers( handle_t *handle,
|
|||
struct buffer_head *bh));
|
||||
|
||||
int ocfs2_write_end_nolock(struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned copied,
|
||||
struct page *page, void *fsdata);
|
||||
loff_t pos, unsigned len, unsigned copied, void *fsdata);
|
||||
|
||||
typedef enum {
|
||||
OCFS2_WRITE_BUFFER = 0,
|
||||
|
|
|
@ -741,7 +741,7 @@ static inline void o2hb_prepare_block(struct o2hb_region *reg,
|
|||
hb_block = (struct o2hb_disk_heartbeat_block *)slot->ds_raw_block;
|
||||
memset(hb_block, 0, reg->hr_block_bytes);
|
||||
/* TODO: time stuff */
|
||||
cputime = CURRENT_TIME.tv_sec;
|
||||
cputime = ktime_get_real_seconds();
|
||||
if (!cputime)
|
||||
cputime = 1;
|
||||
|
||||
|
|
|
@ -1609,8 +1609,6 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data,
|
|||
__dlm_insert_mle(dlm, mle);
|
||||
response = DLM_MASTER_RESP_NO;
|
||||
} else {
|
||||
// mlog(0, "mle was found\n");
|
||||
set_maybe = 1;
|
||||
spin_lock(&tmpmle->spinlock);
|
||||
if (tmpmle->master == dlm->node_num) {
|
||||
mlog(ML_ERROR, "no lockres, but an mle with this node as master!\n");
|
||||
|
@ -1625,8 +1623,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data,
|
|||
response = DLM_MASTER_RESP_NO;
|
||||
} else
|
||||
response = DLM_MASTER_RESP_MAYBE;
|
||||
if (set_maybe)
|
||||
set_bit(request->node_idx, tmpmle->maybe_map);
|
||||
set_bit(request->node_idx, tmpmle->maybe_map);
|
||||
spin_unlock(&tmpmle->spinlock);
|
||||
}
|
||||
spin_unlock(&dlm->master_lock);
|
||||
|
@ -1644,12 +1641,6 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data,
|
|||
* dlm_assert_master_worker() isn't called, we drop it here.
|
||||
*/
|
||||
if (dispatch_assert) {
|
||||
if (response != DLM_MASTER_RESP_YES)
|
||||
mlog(ML_ERROR, "invalid response %d\n", response);
|
||||
if (!res) {
|
||||
mlog(ML_ERROR, "bad lockres while trying to assert!\n");
|
||||
BUG();
|
||||
}
|
||||
mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
|
||||
dlm->node_num, res->lockname.len, res->lockname.name);
|
||||
spin_lock(&res->spinlock);
|
||||
|
|
|
@ -2966,8 +2966,6 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
|
|||
spin_unlock(&dlm->spinlock);
|
||||
dlm_kick_recovery_thread(dlm);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
mlog(0, "%s: recovery done, reco master was %u, dead now %u, master now %u\n",
|
||||
|
|
|
@ -703,7 +703,7 @@ static int ocfs2_remove_inode(struct inode *inode,
|
|||
goto bail_commit;
|
||||
}
|
||||
|
||||
di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec);
|
||||
di->i_dtime = cpu_to_le64(ktime_get_real_seconds());
|
||||
di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL));
|
||||
ocfs2_journal_dirty(handle, di_bh);
|
||||
|
||||
|
|
|
@ -1947,7 +1947,7 @@ static void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
|
|||
*/
|
||||
seqno++;
|
||||
os->os_count++;
|
||||
os->os_scantime = CURRENT_TIME;
|
||||
os->os_scantime = ktime_get_seconds();
|
||||
unlock:
|
||||
ocfs2_orphan_scan_unlock(osb, seqno);
|
||||
out:
|
||||
|
@ -2004,7 +2004,7 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
|
|||
struct ocfs2_orphan_scan *os;
|
||||
|
||||
os = &osb->osb_orphan_scan;
|
||||
os->os_scantime = CURRENT_TIME;
|
||||
os->os_scantime = ktime_get_seconds();
|
||||
if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
|
||||
atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
|
||||
else {
|
||||
|
|
|
@ -120,8 +120,7 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
|
|||
ret = VM_FAULT_NOPAGE;
|
||||
goto out;
|
||||
}
|
||||
ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page,
|
||||
fsdata);
|
||||
ret = ocfs2_write_end_nolock(mapping, pos, len, len, fsdata);
|
||||
BUG_ON(ret != len);
|
||||
ret = VM_FAULT_LOCKED;
|
||||
out:
|
||||
|
|
|
@ -516,6 +516,7 @@ static int __ocfs2_mknod_locked(struct inode *dir,
|
|||
struct ocfs2_extent_list *fel;
|
||||
u16 feat;
|
||||
struct ocfs2_inode_info *oi = OCFS2_I(inode);
|
||||
struct timespec64 ts;
|
||||
|
||||
*new_fe_bh = NULL;
|
||||
|
||||
|
@ -564,10 +565,11 @@ static int __ocfs2_mknod_locked(struct inode *dir,
|
|||
fe->i_last_eb_blk = 0;
|
||||
strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE);
|
||||
fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL);
|
||||
ktime_get_real_ts64(&ts);
|
||||
fe->i_atime = fe->i_ctime = fe->i_mtime =
|
||||
cpu_to_le64(CURRENT_TIME.tv_sec);
|
||||
cpu_to_le64(ts.tv_sec);
|
||||
fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec =
|
||||
cpu_to_le32(CURRENT_TIME.tv_nsec);
|
||||
cpu_to_le32(ts.tv_nsec);
|
||||
fe->i_dtime = 0;
|
||||
|
||||
/*
|
||||
|
|
|
@ -224,7 +224,7 @@ struct ocfs2_orphan_scan {
|
|||
struct ocfs2_super *os_osb;
|
||||
struct ocfs2_lock_res os_lockres; /* lock to synchronize scans */
|
||||
struct delayed_work os_orphan_scan_work;
|
||||
struct timespec os_scantime; /* time this node ran the scan */
|
||||
time64_t os_scantime; /* time this node ran the scan */
|
||||
u32 os_count; /* tracks node specific scans */
|
||||
u32 os_seqno; /* tracks cluster wide scans */
|
||||
atomic_t os_state; /* ACTIVE or INACTIVE */
|
||||
|
|
|
@ -478,7 +478,6 @@ int ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
|
|||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
ocfs2_unlock_refcount_tree(osb, tree, rw);
|
||||
ocfs2_refcount_tree_put(tree);
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
|
|
@ -337,7 +337,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
|
|||
out += snprintf(buf + out, len - out, "Disabled\n");
|
||||
else
|
||||
out += snprintf(buf + out, len - out, "%lu seconds ago\n",
|
||||
(get_seconds() - os->os_scantime.tv_sec));
|
||||
(unsigned long)(ktime_get_seconds() - os->os_scantime));
|
||||
|
||||
out += snprintf(buf + out, len - out, "%10s => %3s %10s\n",
|
||||
"Slots", "Num", "RecoGen");
|
||||
|
|
|
@ -245,7 +245,7 @@ void render_sigset_t(struct seq_file *m, const char *header,
|
|||
if (sigismember(set, i+2)) x |= 2;
|
||||
if (sigismember(set, i+3)) x |= 4;
|
||||
if (sigismember(set, i+4)) x |= 8;
|
||||
seq_printf(m, "%x", x);
|
||||
seq_putc(m, hex_asc[x]);
|
||||
} while (i >= 4);
|
||||
|
||||
seq_putc(m, '\n');
|
||||
|
@ -342,10 +342,11 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
|
|||
|
||||
static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
|
||||
{
|
||||
seq_put_decimal_ull(m, "NoNewPrivs:\t", task_no_new_privs(p));
|
||||
#ifdef CONFIG_SECCOMP
|
||||
seq_put_decimal_ull(m, "Seccomp:\t", p->seccomp.mode);
|
||||
seq_putc(m, '\n');
|
||||
seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode);
|
||||
#endif
|
||||
seq_putc(m, '\n');
|
||||
}
|
||||
|
||||
static inline void task_context_switch_counts(struct seq_file *m,
|
||||
|
|
|
@ -104,9 +104,12 @@
|
|||
* in /proc for a task before it execs a suid executable.
|
||||
*/
|
||||
|
||||
static u8 nlink_tid;
|
||||
static u8 nlink_tgid;
|
||||
|
||||
struct pid_entry {
|
||||
const char *name;
|
||||
int len;
|
||||
unsigned int len;
|
||||
umode_t mode;
|
||||
const struct inode_operations *iop;
|
||||
const struct file_operations *fop;
|
||||
|
@ -139,13 +142,13 @@ struct pid_entry {
|
|||
* Count the number of hardlinks for the pid_entry table, excluding the .
|
||||
* and .. links.
|
||||
*/
|
||||
static unsigned int pid_entry_count_dirs(const struct pid_entry *entries,
|
||||
static unsigned int __init pid_entry_nlink(const struct pid_entry *entries,
|
||||
unsigned int n)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int count;
|
||||
|
||||
count = 0;
|
||||
count = 2;
|
||||
for (i = 0; i < n; ++i) {
|
||||
if (S_ISDIR(entries[i].mode))
|
||||
++count;
|
||||
|
@ -1967,7 +1970,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path)
|
|||
|
||||
struct map_files_info {
|
||||
fmode_t mode;
|
||||
unsigned long len;
|
||||
unsigned int len;
|
||||
unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
|
||||
};
|
||||
|
||||
|
@ -2412,14 +2415,14 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
|
|||
* Yes, it does not scale. And it should not. Don't add
|
||||
* new entries into /proc/<tgid>/ without very good reasons.
|
||||
*/
|
||||
last = &ents[nents - 1];
|
||||
for (p = ents; p <= last; p++) {
|
||||
last = &ents[nents];
|
||||
for (p = ents; p < last; p++) {
|
||||
if (p->len != dentry->d_name.len)
|
||||
continue;
|
||||
if (!memcmp(dentry->d_name.name, p->name, p->len))
|
||||
break;
|
||||
}
|
||||
if (p > last)
|
||||
if (p >= last)
|
||||
goto out;
|
||||
|
||||
error = proc_pident_instantiate(dir, dentry, task, p);
|
||||
|
@ -2444,7 +2447,7 @@ static int proc_pident_readdir(struct file *file, struct dir_context *ctx,
|
|||
if (ctx->pos >= nents + 2)
|
||||
goto out;
|
||||
|
||||
for (p = ents + (ctx->pos - 2); p <= ents + nents - 1; p++) {
|
||||
for (p = ents + (ctx->pos - 2); p < ents + nents; p++) {
|
||||
if (!proc_fill_cache(file, ctx, p->name, p->len,
|
||||
proc_pident_instantiate, task, p))
|
||||
break;
|
||||
|
@ -3068,8 +3071,7 @@ static int proc_pid_instantiate(struct inode *dir,
|
|||
inode->i_fop = &proc_tgid_base_operations;
|
||||
inode->i_flags|=S_IMMUTABLE;
|
||||
|
||||
set_nlink(inode, 2 + pid_entry_count_dirs(tgid_base_stuff,
|
||||
ARRAY_SIZE(tgid_base_stuff)));
|
||||
set_nlink(inode, nlink_tgid);
|
||||
|
||||
d_set_d_op(dentry, &pid_dentry_operations);
|
||||
|
||||
|
@ -3361,8 +3363,7 @@ static int proc_task_instantiate(struct inode *dir,
|
|||
inode->i_fop = &proc_tid_base_operations;
|
||||
inode->i_flags|=S_IMMUTABLE;
|
||||
|
||||
set_nlink(inode, 2 + pid_entry_count_dirs(tid_base_stuff,
|
||||
ARRAY_SIZE(tid_base_stuff)));
|
||||
set_nlink(inode, nlink_tid);
|
||||
|
||||
d_set_d_op(dentry, &pid_dentry_operations);
|
||||
|
||||
|
@ -3552,3 +3553,9 @@ static const struct file_operations proc_task_operations = {
|
|||
.iterate_shared = proc_task_readdir,
|
||||
.llseek = generic_file_llseek,
|
||||
};
|
||||
|
||||
void __init set_proc_pid_nlink(void)
|
||||
{
|
||||
nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
|
||||
nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
|
||||
}
|
||||
|
|
|
@ -138,6 +138,16 @@ static void unuse_pde(struct proc_dir_entry *pde)
|
|||
/* pde is locked */
|
||||
static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
|
||||
{
|
||||
/*
|
||||
* close() (proc_reg_release()) can't delete an entry and proceed:
|
||||
* ->release hook needs to be available at the right moment.
|
||||
*
|
||||
* rmmod (remove_proc_entry() et al) can't delete an entry and proceed:
|
||||
* "struct file" needs to be available at the right moment.
|
||||
*
|
||||
* Therefore, first process to enter this function does ->release() and
|
||||
* signals its completion to the other process which does nothing.
|
||||
*/
|
||||
if (pdeo->closing) {
|
||||
/* somebody else is doing that, just wait */
|
||||
DECLARE_COMPLETION_ONSTACK(c);
|
||||
|
@ -147,12 +157,13 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
|
|||
spin_lock(&pde->pde_unload_lock);
|
||||
} else {
|
||||
struct file *file;
|
||||
pdeo->closing = 1;
|
||||
pdeo->closing = true;
|
||||
spin_unlock(&pde->pde_unload_lock);
|
||||
file = pdeo->file;
|
||||
pde->proc_fops->release(file_inode(file), file);
|
||||
spin_lock(&pde->pde_unload_lock);
|
||||
list_del_init(&pdeo->lh);
|
||||
/* After ->release. */
|
||||
list_del(&pdeo->lh);
|
||||
if (pdeo->c)
|
||||
complete(pdeo->c);
|
||||
kfree(pdeo);
|
||||
|
@ -167,6 +178,8 @@ void proc_entry_rundown(struct proc_dir_entry *de)
|
|||
if (atomic_add_return(BIAS, &de->in_use) != BIAS)
|
||||
wait_for_completion(&c);
|
||||
|
||||
/* ->pde_openers list can't grow from now on. */
|
||||
|
||||
spin_lock(&de->pde_unload_lock);
|
||||
while (!list_empty(&de->pde_openers)) {
|
||||
struct pde_opener *pdeo;
|
||||
|
@ -312,16 +325,17 @@ static int proc_reg_open(struct inode *inode, struct file *file)
|
|||
struct pde_opener *pdeo;
|
||||
|
||||
/*
|
||||
* What for, you ask? Well, we can have open, rmmod, remove_proc_entry
|
||||
* sequence. ->release won't be called because ->proc_fops will be
|
||||
* cleared. Depending on complexity of ->release, consequences vary.
|
||||
* Ensure that
|
||||
* 1) PDE's ->release hook will be called no matter what
|
||||
* either normally by close()/->release, or forcefully by
|
||||
* rmmod/remove_proc_entry.
|
||||
*
|
||||
* We can't wait for mercy when close will be done for real, it's
|
||||
* deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
|
||||
* by hand in remove_proc_entry(). For this, save opener's credentials
|
||||
* for later.
|
||||
* 2) rmmod isn't blocked by opening file in /proc and sitting on
|
||||
* the descriptor (including "rmmod foo </proc/foo" scenario).
|
||||
*
|
||||
* Save every "struct file" with custom ->release hook.
|
||||
*/
|
||||
pdeo = kzalloc(sizeof(struct pde_opener), GFP_KERNEL);
|
||||
pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
|
||||
if (!pdeo)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -338,7 +352,8 @@ static int proc_reg_open(struct inode *inode, struct file *file)
|
|||
if (rv == 0 && release) {
|
||||
/* To know what to release. */
|
||||
pdeo->file = file;
|
||||
/* Strictly for "too late" ->release in proc_reg_release(). */
|
||||
pdeo->closing = false;
|
||||
pdeo->c = NULL;
|
||||
spin_lock(&pde->pde_unload_lock);
|
||||
list_add(&pdeo->lh, &pde->pde_openers);
|
||||
spin_unlock(&pde->pde_unload_lock);
|
||||
|
|
|
@ -203,7 +203,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name);
|
|||
struct pde_opener {
|
||||
struct file *file;
|
||||
struct list_head lh;
|
||||
int closing;
|
||||
bool closing;
|
||||
struct completion *c;
|
||||
};
|
||||
extern const struct inode_operations proc_link_inode_operations;
|
||||
|
@ -211,6 +211,7 @@ extern const struct inode_operations proc_link_inode_operations;
|
|||
extern const struct inode_operations proc_pid_link_inode_operations;
|
||||
|
||||
extern void proc_init_inodecache(void);
|
||||
void set_proc_pid_nlink(void);
|
||||
extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
|
||||
extern int proc_fill_super(struct super_block *, void *data, int flags);
|
||||
extern void proc_entry_rundown(struct proc_dir_entry *);
|
||||
|
|
|
@ -122,6 +122,7 @@ void __init proc_root_init(void)
|
|||
int err;
|
||||
|
||||
proc_init_inodecache();
|
||||
set_proc_pid_nlink();
|
||||
err = register_filesystem(&proc_fs_type);
|
||||
if (err)
|
||||
return;
|
||||
|
|
|
@ -1588,6 +1588,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
|
|||
|
||||
} while (pte++, addr += PAGE_SIZE, addr != end);
|
||||
pte_unmap_unlock(orig_pte, ptl);
|
||||
cond_resched();
|
||||
return 0;
|
||||
}
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
|
|
|
@ -652,18 +652,9 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifndef pmd_move_must_withdraw
|
||||
static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl,
|
||||
spinlock_t *old_pmd_ptl)
|
||||
{
|
||||
/*
|
||||
* With split pmd lock we also need to move preallocated
|
||||
* PTE page table if new_pmd is on different PMD page table.
|
||||
*/
|
||||
return new_pmd_ptl != old_pmd_ptl;
|
||||
}
|
||||
#ifndef arch_needs_pgtable_deposit
|
||||
#define arch_needs_pgtable_deposit() (false)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This function is meant to be used by sites walking pagetables with
|
||||
* the mmap_sem hold in read mode to protect against MADV_DONTNEED and
|
||||
|
|
|
@ -107,11 +107,6 @@ struct mmu_gather {
|
|||
struct mmu_gather_batch local;
|
||||
struct page *__pages[MMU_GATHER_BUNDLE];
|
||||
unsigned int batch_count;
|
||||
/*
|
||||
* __tlb_adjust_range will track the new addr here,
|
||||
* that that we can adjust the range after the flush
|
||||
*/
|
||||
unsigned long addr;
|
||||
int page_size;
|
||||
};
|
||||
|
||||
|
@ -125,16 +120,11 @@ extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
|
|||
int page_size);
|
||||
|
||||
static inline void __tlb_adjust_range(struct mmu_gather *tlb,
|
||||
unsigned long address)
|
||||
unsigned long address,
|
||||
unsigned int range_size)
|
||||
{
|
||||
tlb->start = min(tlb->start, address);
|
||||
tlb->end = max(tlb->end, address + PAGE_SIZE);
|
||||
/*
|
||||
* Track the last address with which we adjusted the range. This
|
||||
* will be used later to adjust again after a mmu_flush due to
|
||||
* failed __tlb_remove_page
|
||||
*/
|
||||
tlb->addr = address;
|
||||
tlb->end = max(tlb->end, address + range_size);
|
||||
}
|
||||
|
||||
static inline void __tlb_reset_range(struct mmu_gather *tlb)
|
||||
|
@ -150,15 +140,11 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb)
|
|||
static inline void tlb_remove_page_size(struct mmu_gather *tlb,
|
||||
struct page *page, int page_size)
|
||||
{
|
||||
if (__tlb_remove_page_size(tlb, page, page_size)) {
|
||||
if (__tlb_remove_page_size(tlb, page, page_size))
|
||||
tlb_flush_mmu(tlb);
|
||||
tlb->page_size = page_size;
|
||||
__tlb_adjust_range(tlb, tlb->addr);
|
||||
__tlb_remove_page_size(tlb, page, page_size);
|
||||
}
|
||||
}
|
||||
|
||||
static bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
|
||||
static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
|
||||
{
|
||||
return __tlb_remove_page_size(tlb, page, PAGE_SIZE);
|
||||
}
|
||||
|
@ -172,14 +158,21 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
|
|||
return tlb_remove_page_size(tlb, page, PAGE_SIZE);
|
||||
}
|
||||
|
||||
static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, struct page *page)
|
||||
#ifndef tlb_remove_check_page_size_change
|
||||
#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
|
||||
static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
|
||||
unsigned int page_size)
|
||||
{
|
||||
/* active->nr should be zero when we call this */
|
||||
VM_BUG_ON_PAGE(tlb->active->nr, page);
|
||||
tlb->page_size = PAGE_SIZE;
|
||||
__tlb_adjust_range(tlb, tlb->addr);
|
||||
return __tlb_remove_page(tlb, page);
|
||||
/*
|
||||
* We don't care about page size change, just update
|
||||
* mmu_gather page size here so that debug checks
|
||||
* doesn't throw false warning.
|
||||
*/
|
||||
#ifdef CONFIG_DEBUG_VM
|
||||
tlb->page_size = page_size;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* In the case of tlb vma handling, we can optimise these away in the
|
||||
|
@ -215,10 +208,16 @@ static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, struct page *pa
|
|||
*/
|
||||
#define tlb_remove_tlb_entry(tlb, ptep, address) \
|
||||
do { \
|
||||
__tlb_adjust_range(tlb, address); \
|
||||
__tlb_adjust_range(tlb, address, PAGE_SIZE); \
|
||||
__tlb_remove_tlb_entry(tlb, ptep, address); \
|
||||
} while (0)
|
||||
|
||||
#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
|
||||
do { \
|
||||
__tlb_adjust_range(tlb, address, huge_page_size(h)); \
|
||||
__tlb_remove_tlb_entry(tlb, ptep, address); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation
|
||||
* This is a nop so far, because only x86 needs it.
|
||||
|
@ -227,29 +226,47 @@ static inline bool __tlb_remove_pte_page(struct mmu_gather *tlb, struct page *pa
|
|||
#define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0)
|
||||
#endif
|
||||
|
||||
#define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \
|
||||
do { \
|
||||
__tlb_adjust_range(tlb, address); \
|
||||
__tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \
|
||||
#define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \
|
||||
do { \
|
||||
__tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE); \
|
||||
__tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* For things like page tables caches (ie caching addresses "inside" the
|
||||
* page tables, like x86 does), for legacy reasons, flushing an
|
||||
* individual page had better flush the page table caches behind it. This
|
||||
* is definitely how x86 works, for example. And if you have an
|
||||
* architected non-legacy page table cache (which I'm not aware of
|
||||
* anybody actually doing), you're going to have some architecturally
|
||||
* explicit flushing for that, likely *separate* from a regular TLB entry
|
||||
* flush, and thus you'd need more than just some range expansion..
|
||||
*
|
||||
* So if we ever find an architecture
|
||||
* that would want something that odd, I think it is up to that
|
||||
* architecture to do its own odd thing, not cause pain for others
|
||||
* http://lkml.kernel.org/r/CA+55aFzBggoXtNXQeng5d_mRoDnaMBE5Y+URs+PHR67nUpMtaw@mail.gmail.com
|
||||
*
|
||||
* For now w.r.t page table cache, mark the range_size as PAGE_SIZE
|
||||
*/
|
||||
|
||||
#define pte_free_tlb(tlb, ptep, address) \
|
||||
do { \
|
||||
__tlb_adjust_range(tlb, address); \
|
||||
__tlb_adjust_range(tlb, address, PAGE_SIZE); \
|
||||
__pte_free_tlb(tlb, ptep, address); \
|
||||
} while (0)
|
||||
|
||||
#ifndef __ARCH_HAS_4LEVEL_HACK
|
||||
#define pud_free_tlb(tlb, pudp, address) \
|
||||
do { \
|
||||
__tlb_adjust_range(tlb, address); \
|
||||
__tlb_adjust_range(tlb, address, PAGE_SIZE); \
|
||||
__pud_free_tlb(tlb, pudp, address); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#define pmd_free_tlb(tlb, pmdp, address) \
|
||||
do { \
|
||||
__tlb_adjust_range(tlb, address); \
|
||||
__tlb_adjust_range(tlb, address, PAGE_SIZE); \
|
||||
__pmd_free_tlb(tlb, pmdp, address); \
|
||||
} while (0)
|
||||
|
||||
|
|
|
@ -136,12 +136,13 @@ struct bdi_writeback {
|
|||
struct backing_dev_info {
|
||||
struct list_head bdi_list;
|
||||
unsigned long ra_pages; /* max readahead in PAGE_SIZE units */
|
||||
unsigned int capabilities; /* Device capabilities */
|
||||
unsigned long io_pages; /* max allowed IO size */
|
||||
congested_fn *congested_fn; /* Function pointer if device is md/dm */
|
||||
void *congested_data; /* Pointer to aux data for congested func */
|
||||
|
||||
char *name;
|
||||
|
||||
unsigned int capabilities; /* Device capabilities */
|
||||
unsigned int min_ratio;
|
||||
unsigned int max_ratio, max_prop_frac;
|
||||
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
#ifndef __CMA_H__
|
||||
#define __CMA_H__
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
/*
|
||||
* There is always at least global CMA area and a few optional
|
||||
* areas configured in kernel .config.
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
* clobbered. The issue is as follows: while the inline asm might
|
||||
* access any memory it wants, the compiler could have fit all of
|
||||
* @ptr into memory registers instead, and since @ptr never escaped
|
||||
* from that, it proofed that the inline asm wasn't touching any of
|
||||
* from that, it proved that the inline asm wasn't touching any of
|
||||
* it. This version works well with both compilers, i.e. we're telling
|
||||
* the compiler that the inline asm absolutely may see the contents
|
||||
* of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495
|
||||
|
|
|
@ -189,6 +189,8 @@ static inline void deferred_split_huge_page(struct page *page) {}
|
|||
#define split_huge_pmd(__vma, __pmd, __address) \
|
||||
do { } while (0)
|
||||
|
||||
static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
|
||||
unsigned long address, bool freeze, struct page *page) {}
|
||||
static inline void split_huge_pmd_address(struct vm_area_struct *vma,
|
||||
unsigned long address, bool freeze, struct page *page) {}
|
||||
|
||||
|
|
|
@ -175,7 +175,7 @@ __printf(2, 3)
|
|||
struct kthread_worker *
|
||||
kthread_create_worker(unsigned int flags, const char namefmt[], ...);
|
||||
|
||||
struct kthread_worker *
|
||||
__printf(3, 4) struct kthread_worker *
|
||||
kthread_create_worker_on_cpu(int cpu, unsigned int flags,
|
||||
const char namefmt[], ...);
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
|
||||
#include <linux/mmzone.h>
|
||||
#include <linux/dax.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/spinlock.h>
|
||||
|
@ -177,6 +178,13 @@ static inline bool vma_migratable(struct vm_area_struct *vma)
|
|||
if (vma->vm_flags & (VM_IO | VM_PFNMAP))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* DAX device mappings require predictable access latency, so avoid
|
||||
* incurring periodic faults.
|
||||
*/
|
||||
if (vma_is_dax(vma))
|
||||
return false;
|
||||
|
||||
#ifndef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
|
||||
if (vma->vm_flags & VM_HUGETLB)
|
||||
return false;
|
||||
|
|
|
@ -71,6 +71,7 @@ extern int early_init_dt_scan_chosen_stdout(void);
|
|||
extern void early_init_fdt_scan_reserved_mem(void);
|
||||
extern void early_init_fdt_reserve_self(void);
|
||||
extern void early_init_dt_add_memory_arch(u64 base, u64 size);
|
||||
extern int early_init_dt_mark_hotplug_memory_arch(u64 base, u64 size);
|
||||
extern int early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size,
|
||||
bool no_map);
|
||||
extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align);
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
extern const char linux_banner[];
|
||||
extern const char linux_proc_banner[];
|
||||
|
||||
#define PRINTK_MAX_SINGLE_HEADER_LEN 2
|
||||
|
||||
static inline int printk_get_level(const char *buffer)
|
||||
{
|
||||
if (buffer[0] == KERN_SOH_ASCII && buffer[1]) {
|
||||
|
@ -31,6 +33,14 @@ static inline const char *printk_skip_level(const char *buffer)
|
|||
return buffer;
|
||||
}
|
||||
|
||||
static inline const char *printk_skip_headers(const char *buffer)
|
||||
{
|
||||
while (printk_get_level(buffer))
|
||||
buffer = printk_skip_level(buffer);
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
#define CONSOLE_EXT_LOG_MAX 8192
|
||||
|
||||
/* printk's without a loglevel use this.. */
|
||||
|
@ -40,10 +50,15 @@ static inline const char *printk_skip_level(const char *buffer)
|
|||
#define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */
|
||||
#define CONSOLE_LOGLEVEL_MIN 1 /* Minimum loglevel we let people use */
|
||||
#define CONSOLE_LOGLEVEL_QUIET 4 /* Shhh ..., when booted with "quiet" */
|
||||
#define CONSOLE_LOGLEVEL_DEFAULT 7 /* anything MORE serious than KERN_DEBUG */
|
||||
#define CONSOLE_LOGLEVEL_DEBUG 10 /* issue debug messages */
|
||||
#define CONSOLE_LOGLEVEL_MOTORMOUTH 15 /* You can't shut this one up */
|
||||
|
||||
/*
|
||||
* Default used to be hard-coded at 7, we're now allowing it to be set from
|
||||
* kernel config.
|
||||
*/
|
||||
#define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT
|
||||
|
||||
extern int console_printk[];
|
||||
|
||||
#define console_loglevel (console_printk[0])
|
||||
|
|
|
@ -80,14 +80,11 @@ static inline bool radix_tree_is_internal_node(void *ptr)
|
|||
#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
|
||||
RADIX_TREE_MAP_SHIFT))
|
||||
|
||||
/* Internally used bits of node->count */
|
||||
#define RADIX_TREE_COUNT_SHIFT (RADIX_TREE_MAP_SHIFT + 1)
|
||||
#define RADIX_TREE_COUNT_MASK ((1UL << RADIX_TREE_COUNT_SHIFT) - 1)
|
||||
|
||||
struct radix_tree_node {
|
||||
unsigned char shift; /* Bits remaining in each slot */
|
||||
unsigned char offset; /* Slot offset in parent */
|
||||
unsigned int count;
|
||||
unsigned char shift; /* Bits remaining in each slot */
|
||||
unsigned char offset; /* Slot offset in parent */
|
||||
unsigned char count; /* Total entry count */
|
||||
unsigned char exceptional; /* Exceptional entry count */
|
||||
union {
|
||||
struct {
|
||||
/* Used when ascending tree */
|
||||
|
@ -248,20 +245,6 @@ static inline int radix_tree_exception(void *arg)
|
|||
return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK);
|
||||
}
|
||||
|
||||
/**
|
||||
* radix_tree_replace_slot - replace item in a slot
|
||||
* @pslot: pointer to slot, returned by radix_tree_lookup_slot
|
||||
* @item: new item to store in the slot.
|
||||
*
|
||||
* For use with radix_tree_lookup_slot(). Caller must hold tree write locked
|
||||
* across slot lookup and replacement.
|
||||
*/
|
||||
static inline void radix_tree_replace_slot(void **pslot, void *item)
|
||||
{
|
||||
BUG_ON(radix_tree_is_internal_node(item));
|
||||
rcu_assign_pointer(*pslot, item);
|
||||
}
|
||||
|
||||
int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
|
||||
unsigned order, struct radix_tree_node **nodep,
|
||||
void ***slotp);
|
||||
|
@ -276,7 +259,14 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
|
|||
struct radix_tree_node **nodep, void ***slotp);
|
||||
void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
|
||||
void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
|
||||
bool __radix_tree_delete_node(struct radix_tree_root *root,
|
||||
typedef void (*radix_tree_update_node_t)(struct radix_tree_node *, void *);
|
||||
void __radix_tree_replace(struct radix_tree_root *root,
|
||||
struct radix_tree_node *node,
|
||||
void **slot, void *item,
|
||||
radix_tree_update_node_t update_node, void *private);
|
||||
void radix_tree_replace_slot(struct radix_tree_root *root,
|
||||
void **slot, void *item);
|
||||
void __radix_tree_delete_node(struct radix_tree_root *root,
|
||||
struct radix_tree_node *node);
|
||||
void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
|
||||
void *radix_tree_delete(struct radix_tree_root *, unsigned long);
|
||||
|
|
|
@ -137,11 +137,19 @@ static inline void anon_vma_unlock_read(struct anon_vma *anon_vma)
|
|||
* anon_vma helper functions.
|
||||
*/
|
||||
void anon_vma_init(void); /* create anon_vma_cachep */
|
||||
int anon_vma_prepare(struct vm_area_struct *);
|
||||
int __anon_vma_prepare(struct vm_area_struct *);
|
||||
void unlink_anon_vmas(struct vm_area_struct *);
|
||||
int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *);
|
||||
int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
|
||||
|
||||
static inline int anon_vma_prepare(struct vm_area_struct *vma)
|
||||
{
|
||||
if (likely(vma->anon_vma))
|
||||
return 0;
|
||||
|
||||
return __anon_vma_prepare(vma);
|
||||
}
|
||||
|
||||
static inline void anon_vma_merge(struct vm_area_struct *vma,
|
||||
struct vm_area_struct *next)
|
||||
{
|
||||
|
|
|
@ -540,7 +540,11 @@ static inline int get_dumpable(struct mm_struct *mm)
|
|||
/* leave room for more dump flags */
|
||||
#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */
|
||||
#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */
|
||||
#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */
|
||||
/*
|
||||
* This one-shot flag is dropped due to necessity of changing exe once again
|
||||
* on NFS restore
|
||||
*/
|
||||
//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */
|
||||
|
||||
#define MMF_HAS_UPROBES 19 /* has uprobes */
|
||||
#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */
|
||||
|
|
|
@ -246,39 +246,7 @@ struct swap_info_struct {
|
|||
void *workingset_eviction(struct address_space *mapping, struct page *page);
|
||||
bool workingset_refault(void *shadow);
|
||||
void workingset_activation(struct page *page);
|
||||
extern struct list_lru workingset_shadow_nodes;
|
||||
|
||||
static inline unsigned int workingset_node_pages(struct radix_tree_node *node)
|
||||
{
|
||||
return node->count & RADIX_TREE_COUNT_MASK;
|
||||
}
|
||||
|
||||
static inline void workingset_node_pages_inc(struct radix_tree_node *node)
|
||||
{
|
||||
node->count++;
|
||||
}
|
||||
|
||||
static inline void workingset_node_pages_dec(struct radix_tree_node *node)
|
||||
{
|
||||
VM_WARN_ON_ONCE(!workingset_node_pages(node));
|
||||
node->count--;
|
||||
}
|
||||
|
||||
static inline unsigned int workingset_node_shadows(struct radix_tree_node *node)
|
||||
{
|
||||
return node->count >> RADIX_TREE_COUNT_SHIFT;
|
||||
}
|
||||
|
||||
static inline void workingset_node_shadows_inc(struct radix_tree_node *node)
|
||||
{
|
||||
node->count += 1U << RADIX_TREE_COUNT_SHIFT;
|
||||
}
|
||||
|
||||
static inline void workingset_node_shadows_dec(struct radix_tree_node *node)
|
||||
{
|
||||
VM_WARN_ON_ONCE(!workingset_node_shadows(node));
|
||||
node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
|
||||
}
|
||||
void workingset_update_node(struct radix_tree_node *node, void *private);
|
||||
|
||||
/* linux/mm/page_alloc.c */
|
||||
extern unsigned long totalram_pages;
|
||||
|
|
|
@ -82,6 +82,7 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
|
|||
const void *caller);
|
||||
|
||||
extern void vfree(const void *addr);
|
||||
extern void vfree_atomic(const void *addr);
|
||||
|
||||
extern void *vmap(struct page **pages, unsigned int count,
|
||||
unsigned long flags, pgprot_t prot);
|
||||
|
|
|
@ -588,7 +588,7 @@ void __init prepare_namespace(void)
|
|||
saved_root_name);
|
||||
while (driver_probe_done() != 0 ||
|
||||
(ROOT_DEV = name_to_dev_t(saved_root_name)) == 0)
|
||||
msleep(100);
|
||||
msleep(5);
|
||||
async_synchronize_full();
|
||||
}
|
||||
|
||||
|
|
|
@ -697,7 +697,7 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
|
|||
* Write to all consoles.
|
||||
*/
|
||||
retlen = strlen(kdb_buffer);
|
||||
cp = (char *) printk_skip_level(kdb_buffer);
|
||||
cp = (char *) printk_skip_headers(kdb_buffer);
|
||||
if (!dbg_kdb_mode && kgdb_connected) {
|
||||
gdbstub_msg_write(cp, retlen - (cp - kdb_buffer));
|
||||
} else {
|
||||
|
|
|
@ -229,7 +229,7 @@ static inline void free_thread_stack(struct task_struct *tsk)
|
|||
}
|
||||
local_irq_restore(flags);
|
||||
|
||||
vfree(tsk->stack);
|
||||
vfree_atomic(tsk->stack);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -106,7 +106,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
|
|||
* complain:
|
||||
*/
|
||||
if (sysctl_hung_task_warnings) {
|
||||
sysctl_hung_task_warnings--;
|
||||
if (sysctl_hung_task_warnings > 0)
|
||||
sysctl_hung_task_warnings--;
|
||||
pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
|
||||
t->comm, t->pid, timeout);
|
||||
pr_err(" %s %s %.*s\n",
|
||||
|
|
|
@ -261,7 +261,8 @@ static void create_kthread(struct kthread_create_info *create)
|
|||
}
|
||||
}
|
||||
|
||||
static struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
|
||||
static __printf(4, 0)
|
||||
struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
|
||||
void *data, int node,
|
||||
const char namefmt[],
|
||||
va_list args)
|
||||
|
@ -635,7 +636,7 @@ int kthread_worker_fn(void *worker_ptr)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(kthread_worker_fn);
|
||||
|
||||
static struct kthread_worker *
|
||||
static __printf(3, 0) struct kthread_worker *
|
||||
__kthread_create_worker(int cpu, unsigned int flags,
|
||||
const char namefmt[], va_list args)
|
||||
{
|
||||
|
|
|
@ -67,7 +67,8 @@ static int vprintk_nmi(const char *fmt, va_list args)
|
|||
again:
|
||||
len = atomic_read(&s->len);
|
||||
|
||||
if (len >= sizeof(s->buffer)) {
|
||||
/* The trailing '\0' is not counted into len. */
|
||||
if (len >= sizeof(s->buffer) - 1) {
|
||||
atomic_inc(&nmi_message_lost);
|
||||
return 0;
|
||||
}
|
||||
|
@ -79,7 +80,7 @@ static int vprintk_nmi(const char *fmt, va_list args)
|
|||
if (!len)
|
||||
smp_rmb();
|
||||
|
||||
add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args);
|
||||
add = vscnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args);
|
||||
|
||||
/*
|
||||
* Do it once again if the buffer has been flushed in the meantime.
|
||||
|
@ -113,16 +114,51 @@ static void printk_nmi_flush_line(const char *text, int len)
|
|||
|
||||
}
|
||||
|
||||
/*
|
||||
* printk one line from the temporary buffer from @start index until
|
||||
* and including the @end index.
|
||||
*/
|
||||
static void printk_nmi_flush_seq_line(struct nmi_seq_buf *s,
|
||||
int start, int end)
|
||||
/* printk part of the temporary buffer line by line */
|
||||
static int printk_nmi_flush_buffer(const char *start, size_t len)
|
||||
{
|
||||
const char *buf = s->buffer + start;
|
||||
const char *c, *end;
|
||||
bool header;
|
||||
|
||||
printk_nmi_flush_line(buf, (end - start) + 1);
|
||||
c = start;
|
||||
end = start + len;
|
||||
header = true;
|
||||
|
||||
/* Print line by line. */
|
||||
while (c < end) {
|
||||
if (*c == '\n') {
|
||||
printk_nmi_flush_line(start, c - start + 1);
|
||||
start = ++c;
|
||||
header = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle continuous lines or missing new line. */
|
||||
if ((c + 1 < end) && printk_get_level(c)) {
|
||||
if (header) {
|
||||
c = printk_skip_level(c);
|
||||
continue;
|
||||
}
|
||||
|
||||
printk_nmi_flush_line(start, c - start);
|
||||
start = c++;
|
||||
header = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
header = false;
|
||||
c++;
|
||||
}
|
||||
|
||||
/* Check if there was a partial line. Ignore pure header. */
|
||||
if (start < end && !header) {
|
||||
static const char newline[] = KERN_CONT "\n";
|
||||
|
||||
printk_nmi_flush_line(start, end - start);
|
||||
printk_nmi_flush_line(newline, strlen(newline));
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -135,8 +171,8 @@ static void __printk_nmi_flush(struct irq_work *work)
|
|||
__RAW_SPIN_LOCK_INITIALIZER(read_lock);
|
||||
struct nmi_seq_buf *s = container_of(work, struct nmi_seq_buf, work);
|
||||
unsigned long flags;
|
||||
size_t len, size;
|
||||
int i, last_i;
|
||||
size_t len;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* The lock has two functions. First, one reader has to flush all
|
||||
|
@ -154,12 +190,14 @@ static void __printk_nmi_flush(struct irq_work *work)
|
|||
/*
|
||||
* This is just a paranoid check that nobody has manipulated
|
||||
* the buffer an unexpected way. If we printed something then
|
||||
* @len must only increase.
|
||||
* @len must only increase. Also it should never overflow the
|
||||
* buffer size.
|
||||
*/
|
||||
if (i && i >= len) {
|
||||
if ((i && i >= len) || len > sizeof(s->buffer)) {
|
||||
const char *msg = "printk_nmi_flush: internal error\n";
|
||||
|
||||
printk_nmi_flush_line(msg, strlen(msg));
|
||||
len = 0;
|
||||
}
|
||||
|
||||
if (!len)
|
||||
|
@ -167,22 +205,7 @@ static void __printk_nmi_flush(struct irq_work *work)
|
|||
|
||||
/* Make sure that data has been written up to the @len */
|
||||
smp_rmb();
|
||||
|
||||
size = min(len, sizeof(s->buffer));
|
||||
last_i = i;
|
||||
|
||||
/* Print line by line. */
|
||||
for (; i < size; i++) {
|
||||
if (s->buffer[i] == '\n') {
|
||||
printk_nmi_flush_seq_line(s, last_i, i);
|
||||
last_i = i + 1;
|
||||
}
|
||||
}
|
||||
/* Check if there was a partial line. */
|
||||
if (last_i < size) {
|
||||
printk_nmi_flush_seq_line(s, last_i, size - 1);
|
||||
printk_nmi_flush_line("\n", strlen("\n"));
|
||||
}
|
||||
i += printk_nmi_flush_buffer(s->buffer + i, len - i);
|
||||
|
||||
/*
|
||||
* Check that nothing has got added in the meantime and truncate
|
||||
|
|
10
kernel/sys.c
10
kernel/sys.c
|
@ -1697,16 +1697,6 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
|
|||
fput(exe_file);
|
||||
}
|
||||
|
||||
/*
|
||||
* The symlink can be changed only once, just to disallow arbitrary
|
||||
* transitions malicious software might bring in. This means one
|
||||
* could make a snapshot over all processes running and monitor
|
||||
* /proc/pid/exe changes to notice unusual activity if needed.
|
||||
*/
|
||||
err = -EPERM;
|
||||
if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags))
|
||||
goto exit;
|
||||
|
||||
err = 0;
|
||||
/* set the new file, lockless */
|
||||
get_file(exe.file);
|
||||
|
|
|
@ -15,6 +15,21 @@ config PRINTK_TIME
|
|||
The behavior is also controlled by the kernel command line
|
||||
parameter printk.time=1. See Documentation/kernel-parameters.txt
|
||||
|
||||
config CONSOLE_LOGLEVEL_DEFAULT
|
||||
int "Default console loglevel (1-15)"
|
||||
range 1 15
|
||||
default "7"
|
||||
help
|
||||
Default loglevel to determine what will be printed on the console.
|
||||
|
||||
Setting a default here is equivalent to passing in loglevel=<x> in
|
||||
the kernel bootargs. loglevel=<x> continues to override whatever
|
||||
value is specified here as well.
|
||||
|
||||
Note: This does not affect the log level of un-prefixed prink()
|
||||
usage in the kernel. That is controlled by the MESSAGE_LOGLEVEL_DEFAULT
|
||||
option.
|
||||
|
||||
config MESSAGE_LOGLEVEL_DEFAULT
|
||||
int "Default message log level (1-7)"
|
||||
range 1 7
|
||||
|
@ -26,6 +41,10 @@ config MESSAGE_LOGLEVEL_DEFAULT
|
|||
that are auditing their logs closely may want to set it to a lower
|
||||
priority.
|
||||
|
||||
Note: This does not affect what message level gets printed on the console
|
||||
by default. To change that, use loglevel=<x> in the kernel bootargs,
|
||||
or pick a different CONSOLE_LOGLEVEL_DEFAULT configuration value.
|
||||
|
||||
config BOOT_PRINTK_DELAY
|
||||
bool "Delay each boot printk message by N milliseconds"
|
||||
depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY
|
||||
|
@ -1986,7 +2005,7 @@ config ARCH_HAS_DEVMEM_IS_ALLOWED
|
|||
|
||||
config STRICT_DEVMEM
|
||||
bool "Filter access to /dev/mem"
|
||||
depends on MMU
|
||||
depends on MMU && DEVMEM
|
||||
depends on ARCH_HAS_DEVMEM_IS_ALLOWED
|
||||
default y if TILE || PPC
|
||||
---help---
|
||||
|
|
11
lib/idr.c
11
lib/idr.c
|
@ -927,6 +927,9 @@ EXPORT_SYMBOL(ida_pre_get);
|
|||
* and go back to the ida_pre_get() call. If the ida is full, it will
|
||||
* return %-ENOSPC.
|
||||
*
|
||||
* Note that callers must ensure that concurrent access to @ida is not possible.
|
||||
* See ida_simple_get() for a varaint which takes care of locking.
|
||||
*
|
||||
* @p_id returns a value in the range @starting_id ... %0x7fffffff.
|
||||
*/
|
||||
int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
|
||||
|
@ -1073,6 +1076,9 @@ EXPORT_SYMBOL(ida_destroy);
|
|||
* Allocates an id in the range start <= id < end, or returns -ENOSPC.
|
||||
* On memory allocation failure, returns -ENOMEM.
|
||||
*
|
||||
* Compared to ida_get_new_above() this function does its own locking, and
|
||||
* should be used unless there are special requirements.
|
||||
*
|
||||
* Use ida_simple_remove() to get rid of an id.
|
||||
*/
|
||||
int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
|
||||
|
@ -1119,6 +1125,11 @@ EXPORT_SYMBOL(ida_simple_get);
|
|||
* ida_simple_remove - remove an allocated id.
|
||||
* @ida: the (initialized) ida.
|
||||
* @id: the id returned by ida_simple_get.
|
||||
*
|
||||
* Use to release an id allocated with ida_simple_get().
|
||||
*
|
||||
* Compared to ida_remove() this function does its own locking, and should be
|
||||
* used unless there are special requirements.
|
||||
*/
|
||||
void ida_simple_remove(struct ida *ida, unsigned int id)
|
||||
{
|
||||
|
|
297
lib/radix-tree.c
297
lib/radix-tree.c
|
@ -220,10 +220,10 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
|
|||
{
|
||||
unsigned long i;
|
||||
|
||||
pr_debug("radix node: %p offset %d tags %lx %lx %lx shift %d count %d parent %p\n",
|
||||
pr_debug("radix node: %p offset %d tags %lx %lx %lx shift %d count %d exceptional %d parent %p\n",
|
||||
node, node->offset,
|
||||
node->tags[0][0], node->tags[1][0], node->tags[2][0],
|
||||
node->shift, node->count, node->parent);
|
||||
node->shift, node->count, node->exceptional, node->parent);
|
||||
|
||||
for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
|
||||
unsigned long first = index | (i << node->shift);
|
||||
|
@ -325,7 +325,6 @@ static void radix_tree_node_rcu_free(struct rcu_head *head)
|
|||
tag_clear(node, i, 0);
|
||||
|
||||
node->slots[0] = NULL;
|
||||
node->count = 0;
|
||||
|
||||
kmem_cache_free(radix_tree_node_cachep, node);
|
||||
}
|
||||
|
@ -522,8 +521,13 @@ static int radix_tree_extend(struct radix_tree_root *root,
|
|||
node->offset = 0;
|
||||
node->count = 1;
|
||||
node->parent = NULL;
|
||||
if (radix_tree_is_internal_node(slot))
|
||||
if (radix_tree_is_internal_node(slot)) {
|
||||
entry_to_node(slot)->parent = node;
|
||||
} else {
|
||||
/* Moving an exceptional root->rnode to a node */
|
||||
if (radix_tree_exceptional_entry(slot))
|
||||
node->exceptional = 1;
|
||||
}
|
||||
node->slots[0] = slot;
|
||||
slot = node_to_entry(node);
|
||||
rcu_assign_pointer(root->rnode, slot);
|
||||
|
@ -533,6 +537,104 @@ static int radix_tree_extend(struct radix_tree_root *root,
|
|||
return maxshift + RADIX_TREE_MAP_SHIFT;
|
||||
}
|
||||
|
||||
/**
|
||||
* radix_tree_shrink - shrink radix tree to minimum height
|
||||
* @root radix tree root
|
||||
*/
|
||||
static inline void radix_tree_shrink(struct radix_tree_root *root,
|
||||
radix_tree_update_node_t update_node,
|
||||
void *private)
|
||||
{
|
||||
for (;;) {
|
||||
struct radix_tree_node *node = root->rnode;
|
||||
struct radix_tree_node *child;
|
||||
|
||||
if (!radix_tree_is_internal_node(node))
|
||||
break;
|
||||
node = entry_to_node(node);
|
||||
|
||||
/*
|
||||
* The candidate node has more than one child, or its child
|
||||
* is not at the leftmost slot, or the child is a multiorder
|
||||
* entry, we cannot shrink.
|
||||
*/
|
||||
if (node->count != 1)
|
||||
break;
|
||||
child = node->slots[0];
|
||||
if (!child)
|
||||
break;
|
||||
if (!radix_tree_is_internal_node(child) && node->shift)
|
||||
break;
|
||||
|
||||
if (radix_tree_is_internal_node(child))
|
||||
entry_to_node(child)->parent = NULL;
|
||||
|
||||
/*
|
||||
* We don't need rcu_assign_pointer(), since we are simply
|
||||
* moving the node from one part of the tree to another: if it
|
||||
* was safe to dereference the old pointer to it
|
||||
* (node->slots[0]), it will be safe to dereference the new
|
||||
* one (root->rnode) as far as dependent read barriers go.
|
||||
*/
|
||||
root->rnode = child;
|
||||
|
||||
/*
|
||||
* We have a dilemma here. The node's slot[0] must not be
|
||||
* NULLed in case there are concurrent lookups expecting to
|
||||
* find the item. However if this was a bottom-level node,
|
||||
* then it may be subject to the slot pointer being visible
|
||||
* to callers dereferencing it. If item corresponding to
|
||||
* slot[0] is subsequently deleted, these callers would expect
|
||||
* their slot to become empty sooner or later.
|
||||
*
|
||||
* For example, lockless pagecache will look up a slot, deref
|
||||
* the page pointer, and if the page has 0 refcount it means it
|
||||
* was concurrently deleted from pagecache so try the deref
|
||||
* again. Fortunately there is already a requirement for logic
|
||||
* to retry the entire slot lookup -- the indirect pointer
|
||||
* problem (replacing direct root node with an indirect pointer
|
||||
* also results in a stale slot). So tag the slot as indirect
|
||||
* to force callers to retry.
|
||||
*/
|
||||
node->count = 0;
|
||||
if (!radix_tree_is_internal_node(child)) {
|
||||
node->slots[0] = RADIX_TREE_RETRY;
|
||||
if (update_node)
|
||||
update_node(node, private);
|
||||
}
|
||||
|
||||
radix_tree_node_free(node);
|
||||
}
|
||||
}
|
||||
|
||||
static void delete_node(struct radix_tree_root *root,
|
||||
struct radix_tree_node *node,
|
||||
radix_tree_update_node_t update_node, void *private)
|
||||
{
|
||||
do {
|
||||
struct radix_tree_node *parent;
|
||||
|
||||
if (node->count) {
|
||||
if (node == entry_to_node(root->rnode))
|
||||
radix_tree_shrink(root, update_node, private);
|
||||
return;
|
||||
}
|
||||
|
||||
parent = node->parent;
|
||||
if (parent) {
|
||||
parent->slots[node->offset] = NULL;
|
||||
parent->count--;
|
||||
} else {
|
||||
root_tag_clear_all(root);
|
||||
root->rnode = NULL;
|
||||
}
|
||||
|
||||
radix_tree_node_free(node);
|
||||
|
||||
node = parent;
|
||||
} while (node);
|
||||
}
|
||||
|
||||
/**
|
||||
* __radix_tree_create - create a slot in a radix tree
|
||||
* @root: radix tree root
|
||||
|
@ -649,6 +751,8 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
|
|||
if (node) {
|
||||
unsigned offset = get_slot_offset(node, slot);
|
||||
node->count++;
|
||||
if (radix_tree_exceptional_entry(item))
|
||||
node->exceptional++;
|
||||
BUG_ON(tag_get(node, 0, offset));
|
||||
BUG_ON(tag_get(node, 1, offset));
|
||||
BUG_ON(tag_get(node, 2, offset));
|
||||
|
@ -746,6 +850,85 @@ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
|
|||
}
|
||||
EXPORT_SYMBOL(radix_tree_lookup);
|
||||
|
||||
static void replace_slot(struct radix_tree_root *root,
|
||||
struct radix_tree_node *node,
|
||||
void **slot, void *item,
|
||||
bool warn_typeswitch)
|
||||
{
|
||||
void *old = rcu_dereference_raw(*slot);
|
||||
int count, exceptional;
|
||||
|
||||
WARN_ON_ONCE(radix_tree_is_internal_node(item));
|
||||
|
||||
count = !!item - !!old;
|
||||
exceptional = !!radix_tree_exceptional_entry(item) -
|
||||
!!radix_tree_exceptional_entry(old);
|
||||
|
||||
WARN_ON_ONCE(warn_typeswitch && (count || exceptional));
|
||||
|
||||
if (node) {
|
||||
node->count += count;
|
||||
node->exceptional += exceptional;
|
||||
}
|
||||
|
||||
rcu_assign_pointer(*slot, item);
|
||||
}
|
||||
|
||||
/**
|
||||
* __radix_tree_replace - replace item in a slot
|
||||
* @root: radix tree root
|
||||
* @node: pointer to tree node
|
||||
* @slot: pointer to slot in @node
|
||||
* @item: new item to store in the slot.
|
||||
* @update_node: callback for changing leaf nodes
|
||||
* @private: private data to pass to @update_node
|
||||
*
|
||||
* For use with __radix_tree_lookup(). Caller must hold tree write locked
|
||||
* across slot lookup and replacement.
|
||||
*/
|
||||
void __radix_tree_replace(struct radix_tree_root *root,
|
||||
struct radix_tree_node *node,
|
||||
void **slot, void *item,
|
||||
radix_tree_update_node_t update_node, void *private)
|
||||
{
|
||||
/*
|
||||
* This function supports replacing exceptional entries and
|
||||
* deleting entries, but that needs accounting against the
|
||||
* node unless the slot is root->rnode.
|
||||
*/
|
||||
replace_slot(root, node, slot, item,
|
||||
!node && slot != (void **)&root->rnode);
|
||||
|
||||
if (!node)
|
||||
return;
|
||||
|
||||
if (update_node)
|
||||
update_node(node, private);
|
||||
|
||||
delete_node(root, node, update_node, private);
|
||||
}
|
||||
|
||||
/**
|
||||
* radix_tree_replace_slot - replace item in a slot
|
||||
* @root: radix tree root
|
||||
* @slot: pointer to slot
|
||||
* @item: new item to store in the slot.
|
||||
*
|
||||
* For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(),
|
||||
* radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked
|
||||
* across slot lookup and replacement.
|
||||
*
|
||||
* NOTE: This cannot be used to switch between non-entries (empty slots),
|
||||
* regular entries, and exceptional entries, as that requires accounting
|
||||
* inside the radix tree node. When switching from one type of entry or
|
||||
* deleting, use __radix_tree_lookup() and __radix_tree_replace().
|
||||
*/
|
||||
void radix_tree_replace_slot(struct radix_tree_root *root,
|
||||
void **slot, void *item)
|
||||
{
|
||||
replace_slot(root, NULL, slot, item, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* radix_tree_tag_set - set a tag on a radix tree node
|
||||
* @root: radix tree root
|
||||
|
@ -1393,75 +1576,6 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
|
|||
}
|
||||
#endif /* CONFIG_SHMEM && CONFIG_SWAP */
|
||||
|
||||
/**
|
||||
* radix_tree_shrink - shrink radix tree to minimum height
|
||||
* @root radix tree root
|
||||
*/
|
||||
static inline bool radix_tree_shrink(struct radix_tree_root *root)
|
||||
{
|
||||
bool shrunk = false;
|
||||
|
||||
for (;;) {
|
||||
struct radix_tree_node *node = root->rnode;
|
||||
struct radix_tree_node *child;
|
||||
|
||||
if (!radix_tree_is_internal_node(node))
|
||||
break;
|
||||
node = entry_to_node(node);
|
||||
|
||||
/*
|
||||
* The candidate node has more than one child, or its child
|
||||
* is not at the leftmost slot, or the child is a multiorder
|
||||
* entry, we cannot shrink.
|
||||
*/
|
||||
if (node->count != 1)
|
||||
break;
|
||||
child = node->slots[0];
|
||||
if (!child)
|
||||
break;
|
||||
if (!radix_tree_is_internal_node(child) && node->shift)
|
||||
break;
|
||||
|
||||
if (radix_tree_is_internal_node(child))
|
||||
entry_to_node(child)->parent = NULL;
|
||||
|
||||
/*
|
||||
* We don't need rcu_assign_pointer(), since we are simply
|
||||
* moving the node from one part of the tree to another: if it
|
||||
* was safe to dereference the old pointer to it
|
||||
* (node->slots[0]), it will be safe to dereference the new
|
||||
* one (root->rnode) as far as dependent read barriers go.
|
||||
*/
|
||||
root->rnode = child;
|
||||
|
||||
/*
|
||||
* We have a dilemma here. The node's slot[0] must not be
|
||||
* NULLed in case there are concurrent lookups expecting to
|
||||
* find the item. However if this was a bottom-level node,
|
||||
* then it may be subject to the slot pointer being visible
|
||||
* to callers dereferencing it. If item corresponding to
|
||||
* slot[0] is subsequently deleted, these callers would expect
|
||||
* their slot to become empty sooner or later.
|
||||
*
|
||||
* For example, lockless pagecache will look up a slot, deref
|
||||
* the page pointer, and if the page has 0 refcount it means it
|
||||
* was concurrently deleted from pagecache so try the deref
|
||||
* again. Fortunately there is already a requirement for logic
|
||||
* to retry the entire slot lookup -- the indirect pointer
|
||||
* problem (replacing direct root node with an indirect pointer
|
||||
* also results in a stale slot). So tag the slot as indirect
|
||||
* to force callers to retry.
|
||||
*/
|
||||
if (!radix_tree_is_internal_node(child))
|
||||
node->slots[0] = RADIX_TREE_RETRY;
|
||||
|
||||
radix_tree_node_free(node);
|
||||
shrunk = true;
|
||||
}
|
||||
|
||||
return shrunk;
|
||||
}
|
||||
|
||||
/**
|
||||
* __radix_tree_delete_node - try to free node after clearing a slot
|
||||
* @root: radix tree root
|
||||
|
@ -1470,39 +1584,11 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root)
|
|||
* After clearing the slot at @index in @node from radix tree
|
||||
* rooted at @root, call this function to attempt freeing the
|
||||
* node and shrinking the tree.
|
||||
*
|
||||
* Returns %true if @node was freed, %false otherwise.
|
||||
*/
|
||||
bool __radix_tree_delete_node(struct radix_tree_root *root,
|
||||
void __radix_tree_delete_node(struct radix_tree_root *root,
|
||||
struct radix_tree_node *node)
|
||||
{
|
||||
bool deleted = false;
|
||||
|
||||
do {
|
||||
struct radix_tree_node *parent;
|
||||
|
||||
if (node->count) {
|
||||
if (node == entry_to_node(root->rnode))
|
||||
deleted |= radix_tree_shrink(root);
|
||||
return deleted;
|
||||
}
|
||||
|
||||
parent = node->parent;
|
||||
if (parent) {
|
||||
parent->slots[node->offset] = NULL;
|
||||
parent->count--;
|
||||
} else {
|
||||
root_tag_clear_all(root);
|
||||
root->rnode = NULL;
|
||||
}
|
||||
|
||||
radix_tree_node_free(node);
|
||||
deleted = true;
|
||||
|
||||
node = parent;
|
||||
} while (node);
|
||||
|
||||
return deleted;
|
||||
delete_node(root, node, NULL, NULL);
|
||||
}
|
||||
|
||||
static inline void delete_sibling_entries(struct radix_tree_node *node,
|
||||
|
@ -1559,10 +1645,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
|
|||
node_tag_clear(root, node, tag, offset);
|
||||
|
||||
delete_sibling_entries(node, node_to_entry(slot), offset);
|
||||
node->slots[offset] = NULL;
|
||||
node->count--;
|
||||
|
||||
__radix_tree_delete_node(root, node);
|
||||
__radix_tree_replace(root, node, slot, NULL, NULL, NULL);
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
|
23
lib/rbtree.c
23
lib/rbtree.c
|
@ -296,11 +296,26 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
|
|||
*
|
||||
* (p) (p)
|
||||
* / \ / \
|
||||
* N S --> N Sl
|
||||
* N S --> N sl
|
||||
* / \ \
|
||||
* sl Sr s
|
||||
* sl Sr S
|
||||
* \
|
||||
* Sr
|
||||
*
|
||||
* Note: p might be red, and then both
|
||||
* p and sl are red after rotation(which
|
||||
* breaks property 4). This is fixed in
|
||||
* Case 4 (in __rb_rotate_set_parents()
|
||||
* which set sl the color of p
|
||||
* and set p RB_BLACK)
|
||||
*
|
||||
* (p) (sl)
|
||||
* / \ / \
|
||||
* N sl --> P S
|
||||
* \ / \
|
||||
* S N Sr
|
||||
* \
|
||||
* Sr
|
||||
*/
|
||||
tmp1 = tmp2->rb_right;
|
||||
WRITE_ONCE(sibling->rb_left, tmp1);
|
||||
|
@ -365,7 +380,7 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
|
|||
}
|
||||
break;
|
||||
}
|
||||
/* Case 3 - right rotate at sibling */
|
||||
/* Case 3 - left rotate at sibling */
|
||||
tmp1 = tmp2->rb_left;
|
||||
WRITE_ONCE(sibling->rb_right, tmp1);
|
||||
WRITE_ONCE(tmp2->rb_left, sibling);
|
||||
|
@ -377,7 +392,7 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root,
|
|||
tmp1 = sibling;
|
||||
sibling = tmp2;
|
||||
}
|
||||
/* Case 4 - left rotate at parent + color flips */
|
||||
/* Case 4 - right rotate at parent + color flips */
|
||||
tmp2 = sibling->rb_right;
|
||||
WRITE_ONCE(parent->rb_left, tmp2);
|
||||
WRITE_ONCE(sibling->rb_right, parent);
|
||||
|
|
|
@ -153,7 +153,7 @@ config MOVABLE_NODE
|
|||
bool "Enable to assign a node which has only movable memory"
|
||||
depends on HAVE_MEMBLOCK
|
||||
depends on NO_BOOTMEM
|
||||
depends on X86_64
|
||||
depends on X86_64 || OF_EARLY_FLATTREE || MEMORY_HOTPLUG
|
||||
depends on NUMA
|
||||
default n
|
||||
help
|
||||
|
@ -447,13 +447,9 @@ choice
|
|||
benefit.
|
||||
endchoice
|
||||
|
||||
#
|
||||
# We don't deposit page tables on file THP mapping,
|
||||
# but Power makes use of them to address MMU quirk.
|
||||
#
|
||||
config TRANSPARENT_HUGE_PAGECACHE
|
||||
def_bool y
|
||||
depends on TRANSPARENT_HUGEPAGE && !PPC
|
||||
depends on TRANSPARENT_HUGEPAGE
|
||||
|
||||
#
|
||||
# UP and nommu archs use km based percpu allocator
|
||||
|
|
|
@ -634,22 +634,6 @@ isolate_freepages_range(struct compact_control *cc,
|
|||
return pfn;
|
||||
}
|
||||
|
||||
/* Update the number of anon and file isolated pages in the zone */
|
||||
static void acct_isolated(struct zone *zone, struct compact_control *cc)
|
||||
{
|
||||
struct page *page;
|
||||
unsigned int count[2] = { 0, };
|
||||
|
||||
if (list_empty(&cc->migratepages))
|
||||
return;
|
||||
|
||||
list_for_each_entry(page, &cc->migratepages, lru)
|
||||
count[!!page_is_file_cache(page)]++;
|
||||
|
||||
mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON, count[0]);
|
||||
mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, count[1]);
|
||||
}
|
||||
|
||||
/* Similar to reclaim, but different enough that they don't share logic */
|
||||
static bool too_many_isolated(struct zone *zone)
|
||||
{
|
||||
|
@ -866,6 +850,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
|||
|
||||
/* Successfully isolated */
|
||||
del_page_from_lru_list(page, lruvec, page_lru(page));
|
||||
inc_node_page_state(page,
|
||||
NR_ISOLATED_ANON + page_is_file_cache(page));
|
||||
|
||||
isolate_success:
|
||||
list_add(&page->lru, &cc->migratepages);
|
||||
|
@ -902,7 +888,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
|
|||
spin_unlock_irqrestore(zone_lru_lock(zone), flags);
|
||||
locked = false;
|
||||
}
|
||||
acct_isolated(zone, cc);
|
||||
putback_movable_pages(&cc->migratepages);
|
||||
cc->nr_migratepages = 0;
|
||||
cc->last_migrated_pfn = 0;
|
||||
|
@ -988,7 +973,6 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
|
|||
if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
|
||||
break;
|
||||
}
|
||||
acct_isolated(cc->zone, cc);
|
||||
|
||||
return pfn;
|
||||
}
|
||||
|
@ -1258,10 +1242,8 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
|
|||
low_pfn = isolate_migratepages_block(cc, low_pfn,
|
||||
block_end_pfn, isolate_mode);
|
||||
|
||||
if (!low_pfn || cc->contended) {
|
||||
acct_isolated(zone, cc);
|
||||
if (!low_pfn || cc->contended)
|
||||
return ISOLATE_ABORT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Either we isolated something and proceed with migration. Or
|
||||
|
@ -1271,7 +1253,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
|
|||
break;
|
||||
}
|
||||
|
||||
acct_isolated(zone, cc);
|
||||
/* Record where migration scanner will be restarted. */
|
||||
cc->migrate_pfn = low_pfn;
|
||||
|
||||
|
|
|
@ -59,6 +59,10 @@ void __dump_page(struct page *page, const char *reason)
|
|||
|
||||
pr_emerg("flags: %#lx(%pGp)\n", page->flags, &page->flags);
|
||||
|
||||
print_hex_dump(KERN_ALERT, "raw: ", DUMP_PREFIX_NONE, 32,
|
||||
sizeof(unsigned long), page,
|
||||
sizeof(struct page), false);
|
||||
|
||||
if (reason)
|
||||
pr_alert("page dumped because: %s\n", reason);
|
||||
|
||||
|
|
68
mm/filemap.c
68
mm/filemap.c
|
@ -132,44 +132,29 @@ static int page_cache_tree_insert(struct address_space *mapping,
|
|||
if (!dax_mapping(mapping)) {
|
||||
if (shadowp)
|
||||
*shadowp = p;
|
||||
if (node)
|
||||
workingset_node_shadows_dec(node);
|
||||
} else {
|
||||
/* DAX can replace empty locked entry with a hole */
|
||||
WARN_ON_ONCE(p !=
|
||||
(void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
|
||||
RADIX_DAX_ENTRY_LOCK));
|
||||
/* DAX accounts exceptional entries as normal pages */
|
||||
if (node)
|
||||
workingset_node_pages_dec(node);
|
||||
/* Wakeup waiters for exceptional entry lock */
|
||||
dax_wake_mapping_entry_waiter(mapping, page->index,
|
||||
false);
|
||||
}
|
||||
}
|
||||
radix_tree_replace_slot(slot, page);
|
||||
__radix_tree_replace(&mapping->page_tree, node, slot, page,
|
||||
workingset_update_node, mapping);
|
||||
mapping->nrpages++;
|
||||
if (node) {
|
||||
workingset_node_pages_inc(node);
|
||||
/*
|
||||
* Don't track node that contains actual pages.
|
||||
*
|
||||
* Avoid acquiring the list_lru lock if already
|
||||
* untracked. The list_empty() test is safe as
|
||||
* node->private_list is protected by
|
||||
* mapping->tree_lock.
|
||||
*/
|
||||
if (!list_empty(&node->private_list))
|
||||
list_lru_del(&workingset_shadow_nodes,
|
||||
&node->private_list);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void page_cache_tree_delete(struct address_space *mapping,
|
||||
struct page *page, void *shadow)
|
||||
{
|
||||
int i, nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
|
||||
int i, nr;
|
||||
|
||||
/* hugetlb pages are represented by one entry in the radix tree */
|
||||
nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
|
||||
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
VM_BUG_ON_PAGE(PageTail(page), page);
|
||||
|
@ -182,44 +167,11 @@ static void page_cache_tree_delete(struct address_space *mapping,
|
|||
__radix_tree_lookup(&mapping->page_tree, page->index + i,
|
||||
&node, &slot);
|
||||
|
||||
VM_BUG_ON_PAGE(!node && nr != 1, page);
|
||||
|
||||
radix_tree_clear_tags(&mapping->page_tree, node, slot);
|
||||
|
||||
if (!node) {
|
||||
VM_BUG_ON_PAGE(nr != 1, page);
|
||||
/*
|
||||
* We need a node to properly account shadow
|
||||
* entries. Don't plant any without. XXX
|
||||
*/
|
||||
shadow = NULL;
|
||||
}
|
||||
|
||||
radix_tree_replace_slot(slot, shadow);
|
||||
|
||||
if (!node)
|
||||
break;
|
||||
|
||||
workingset_node_pages_dec(node);
|
||||
if (shadow)
|
||||
workingset_node_shadows_inc(node);
|
||||
else
|
||||
if (__radix_tree_delete_node(&mapping->page_tree, node))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Track node that only contains shadow entries. DAX mappings
|
||||
* contain no shadow entries and may contain other exceptional
|
||||
* entries so skip those.
|
||||
*
|
||||
* Avoid acquiring the list_lru lock if already tracked.
|
||||
* The list_empty() test is safe as node->private_list is
|
||||
* protected by mapping->tree_lock.
|
||||
*/
|
||||
if (!dax_mapping(mapping) && !workingset_node_pages(node) &&
|
||||
list_empty(&node->private_list)) {
|
||||
node->private_data = mapping;
|
||||
list_lru_add(&workingset_shadow_nodes,
|
||||
&node->private_list);
|
||||
}
|
||||
__radix_tree_replace(&mapping->page_tree, node, slot, shadow,
|
||||
workingset_update_node, mapping);
|
||||
}
|
||||
|
||||
if (shadow) {
|
||||
|
|
19
mm/gup.c
19
mm/gup.c
|
@ -632,7 +632,8 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
|
|||
return i;
|
||||
}
|
||||
|
||||
bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags)
|
||||
static bool vma_permits_fault(struct vm_area_struct *vma,
|
||||
unsigned int fault_flags)
|
||||
{
|
||||
bool write = !!(fault_flags & FAULT_FLAG_WRITE);
|
||||
bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
|
||||
|
@ -857,14 +858,12 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
|
|||
EXPORT_SYMBOL(get_user_pages_locked);
|
||||
|
||||
/*
|
||||
* Same as get_user_pages_unlocked(...., FOLL_TOUCH) but it allows to
|
||||
* pass additional gup_flags as last parameter (like FOLL_HWPOISON).
|
||||
* Same as get_user_pages_unlocked(...., FOLL_TOUCH) but it allows for
|
||||
* tsk, mm to be specified.
|
||||
*
|
||||
* NOTE: here FOLL_TOUCH is not set implicitly and must be set by the
|
||||
* caller if required (just like with __get_user_pages). "FOLL_GET",
|
||||
* "FOLL_WRITE" and "FOLL_FORCE" are set implicitly as needed
|
||||
* according to the parameters "pages", "write", "force"
|
||||
* respectively.
|
||||
* caller if required (just like with __get_user_pages). "FOLL_GET"
|
||||
* is set implicitly if "pages" is non-NULL.
|
||||
*/
|
||||
__always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
|
||||
unsigned long start, unsigned long nr_pages,
|
||||
|
@ -894,10 +893,8 @@ EXPORT_SYMBOL(__get_user_pages_unlocked);
|
|||
* get_user_pages_unlocked(tsk, mm, ..., pages);
|
||||
*
|
||||
* It is functionally equivalent to get_user_pages_fast so
|
||||
* get_user_pages_fast should be used instead, if the two parameters
|
||||
* "tsk" and "mm" are respectively equal to current and current->mm,
|
||||
* or if "force" shall be set to 1 (get_user_pages_fast misses the
|
||||
* "force" parameter).
|
||||
* get_user_pages_fast should be used instead if specific gup_flags
|
||||
* (e.g. FOLL_FORCE) are not required.
|
||||
*/
|
||||
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
|
||||
struct page **pages, unsigned int gup_flags)
|
||||
|
|
|
@ -285,6 +285,15 @@ static ssize_t use_zero_page_store(struct kobject *kobj,
|
|||
}
|
||||
static struct kobj_attribute use_zero_page_attr =
|
||||
__ATTR(use_zero_page, 0644, use_zero_page_show, use_zero_page_store);
|
||||
|
||||
static ssize_t hpage_pmd_size_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%lu\n", HPAGE_PMD_SIZE);
|
||||
}
|
||||
static struct kobj_attribute hpage_pmd_size_attr =
|
||||
__ATTR_RO(hpage_pmd_size);
|
||||
|
||||
#ifdef CONFIG_DEBUG_VM
|
||||
static ssize_t debug_cow_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
|
@ -307,6 +316,7 @@ static struct attribute *hugepage_attr[] = {
|
|||
&enabled_attr.attr,
|
||||
&defrag_attr.attr,
|
||||
&use_zero_page_attr.attr,
|
||||
&hpage_pmd_size_attr.attr,
|
||||
#if defined(CONFIG_SHMEM) && defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE)
|
||||
&shmem_enabled_attr.attr,
|
||||
#endif
|
||||
|
@ -1323,6 +1333,8 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
|||
struct mm_struct *mm = tlb->mm;
|
||||
bool ret = false;
|
||||
|
||||
tlb_remove_check_page_size_change(tlb, HPAGE_PMD_SIZE);
|
||||
|
||||
ptl = pmd_trans_huge_lock(pmd, vma);
|
||||
if (!ptl)
|
||||
goto out_unlocked;
|
||||
|
@ -1378,12 +1390,23 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static inline void zap_deposited_table(struct mm_struct *mm, pmd_t *pmd)
|
||||
{
|
||||
pgtable_t pgtable;
|
||||
|
||||
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
|
||||
pte_free(mm, pgtable);
|
||||
atomic_long_dec(&mm->nr_ptes);
|
||||
}
|
||||
|
||||
int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||
pmd_t *pmd, unsigned long addr)
|
||||
{
|
||||
pmd_t orig_pmd;
|
||||
spinlock_t *ptl;
|
||||
|
||||
tlb_remove_check_page_size_change(tlb, HPAGE_PMD_SIZE);
|
||||
|
||||
ptl = __pmd_trans_huge_lock(pmd, vma);
|
||||
if (!ptl)
|
||||
return 0;
|
||||
|
@ -1399,12 +1422,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
|||
if (vma_is_dax(vma)) {
|
||||
spin_unlock(ptl);
|
||||
if (is_huge_zero_pmd(orig_pmd))
|
||||
tlb_remove_page(tlb, pmd_page(orig_pmd));
|
||||
tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE);
|
||||
} else if (is_huge_zero_pmd(orig_pmd)) {
|
||||
pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd));
|
||||
atomic_long_dec(&tlb->mm->nr_ptes);
|
||||
spin_unlock(ptl);
|
||||
tlb_remove_page(tlb, pmd_page(orig_pmd));
|
||||
tlb_remove_page_size(tlb, pmd_page(orig_pmd), HPAGE_PMD_SIZE);
|
||||
} else {
|
||||
struct page *page = pmd_page(orig_pmd);
|
||||
page_remove_rmap(page, true);
|
||||
|
@ -1417,6 +1440,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
|||
atomic_long_dec(&tlb->mm->nr_ptes);
|
||||
add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
|
||||
} else {
|
||||
if (arch_needs_pgtable_deposit())
|
||||
zap_deposited_table(tlb->mm, pmd);
|
||||
add_mm_counter(tlb->mm, MM_FILEPAGES, -HPAGE_PMD_NR);
|
||||
}
|
||||
spin_unlock(ptl);
|
||||
|
@ -1425,6 +1450,21 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
|||
return 1;
|
||||
}
|
||||
|
||||
#ifndef pmd_move_must_withdraw
|
||||
static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl,
|
||||
spinlock_t *old_pmd_ptl,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
/*
|
||||
* With split pmd lock we also need to move preallocated
|
||||
* PTE page table if new_pmd is on different PMD page table.
|
||||
*
|
||||
* We also don't deposit and withdraw tables for file pages.
|
||||
*/
|
||||
return (new_pmd_ptl != old_pmd_ptl) && vma_is_anonymous(vma);
|
||||
}
|
||||
#endif
|
||||
|
||||
bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
|
||||
unsigned long new_addr, unsigned long old_end,
|
||||
pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
|
||||
|
@ -1462,8 +1502,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
|
|||
force_flush = true;
|
||||
VM_BUG_ON(!pmd_none(*new_pmd));
|
||||
|
||||
if (pmd_move_must_withdraw(new_ptl, old_ptl) &&
|
||||
vma_is_anonymous(vma)) {
|
||||
if (pmd_move_must_withdraw(new_ptl, old_ptl, vma)) {
|
||||
pgtable_t pgtable;
|
||||
pgtable = pgtable_trans_huge_withdraw(mm, old_pmd);
|
||||
pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
|
||||
|
@ -1589,6 +1628,12 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
|
|||
|
||||
if (!vma_is_anonymous(vma)) {
|
||||
_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
|
||||
/*
|
||||
* We are going to unmap this huge page. So
|
||||
* just go ahead and zap it
|
||||
*/
|
||||
if (arch_needs_pgtable_deposit())
|
||||
zap_deposited_table(mm, pmd);
|
||||
if (vma_is_dax(vma))
|
||||
return;
|
||||
page = pmd_page(_pmd);
|
||||
|
|
25
mm/hugetlb.c
25
mm/hugetlb.c
|
@ -3286,6 +3286,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
|||
BUG_ON(start & ~huge_page_mask(h));
|
||||
BUG_ON(end & ~huge_page_mask(h));
|
||||
|
||||
/*
|
||||
* This is a hugetlb vma, all the pte entries should point
|
||||
* to huge page.
|
||||
*/
|
||||
tlb_remove_check_page_size_change(tlb, sz);
|
||||
tlb_start_vma(tlb, vma);
|
||||
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
|
||||
address = start;
|
||||
|
@ -3336,7 +3341,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
|||
}
|
||||
|
||||
pte = huge_ptep_get_and_clear(mm, address, ptep);
|
||||
tlb_remove_tlb_entry(tlb, ptep, address);
|
||||
tlb_remove_huge_tlb_entry(h, tlb, ptep, address);
|
||||
if (huge_pte_dirty(pte))
|
||||
set_page_dirty(page);
|
||||
|
||||
|
@ -3450,15 +3455,17 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||
* Keep the pte_same checks anyway to make transition from the mutex easier.
|
||||
*/
|
||||
static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
unsigned long address, pte_t *ptep, pte_t pte,
|
||||
struct page *pagecache_page, spinlock_t *ptl)
|
||||
unsigned long address, pte_t *ptep,
|
||||
struct page *pagecache_page, spinlock_t *ptl)
|
||||
{
|
||||
pte_t pte;
|
||||
struct hstate *h = hstate_vma(vma);
|
||||
struct page *old_page, *new_page;
|
||||
int ret = 0, outside_reserve = 0;
|
||||
unsigned long mmun_start; /* For mmu_notifiers */
|
||||
unsigned long mmun_end; /* For mmu_notifiers */
|
||||
|
||||
pte = huge_ptep_get(ptep);
|
||||
old_page = pte_page(pte);
|
||||
|
||||
retry_avoidcopy:
|
||||
|
@ -3711,8 +3718,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||
vma_end_reservation(h, vma, address);
|
||||
}
|
||||
|
||||
ptl = huge_pte_lockptr(h, mm, ptep);
|
||||
spin_lock(ptl);
|
||||
ptl = huge_pte_lock(h, mm, ptep);
|
||||
size = i_size_read(mapping->host) >> huge_page_shift(h);
|
||||
if (idx >= size)
|
||||
goto backout;
|
||||
|
@ -3733,7 +3739,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||
hugetlb_count_add(pages_per_huge_page(h), mm);
|
||||
if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
|
||||
/* Optimization, do the COW without a second fault */
|
||||
ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl);
|
||||
ret = hugetlb_cow(mm, vma, address, ptep, page, ptl);
|
||||
}
|
||||
|
||||
spin_unlock(ptl);
|
||||
|
@ -3888,8 +3894,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|||
|
||||
if (flags & FAULT_FLAG_WRITE) {
|
||||
if (!huge_pte_write(entry)) {
|
||||
ret = hugetlb_cow(mm, vma, address, ptep, entry,
|
||||
pagecache_page, ptl);
|
||||
ret = hugetlb_cow(mm, vma, address, ptep,
|
||||
pagecache_page, ptl);
|
||||
goto out_put_page;
|
||||
}
|
||||
entry = huge_pte_mkdirty(entry);
|
||||
|
@ -4330,8 +4336,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
|
|||
if (!spte)
|
||||
goto out;
|
||||
|
||||
ptl = huge_pte_lockptr(hstate_vma(vma), mm, spte);
|
||||
spin_lock(ptl);
|
||||
ptl = huge_pte_lock(hstate_vma(vma), mm, spte);
|
||||
if (pud_none(*pud)) {
|
||||
pud_populate(mm, pud,
|
||||
(pmd_t *)((unsigned long)spte & PAGE_MASK));
|
||||
|
|
|
@ -86,24 +86,9 @@ static void qlist_move_all(struct qlist_head *from, struct qlist_head *to)
|
|||
qlist_init(from);
|
||||
}
|
||||
|
||||
static void qlist_move(struct qlist_head *from, struct qlist_node *last,
|
||||
struct qlist_head *to, size_t size)
|
||||
{
|
||||
if (unlikely(last == from->tail)) {
|
||||
qlist_move_all(from, to);
|
||||
return;
|
||||
}
|
||||
if (qlist_empty(to))
|
||||
to->head = from->head;
|
||||
else
|
||||
to->tail->next = from->head;
|
||||
to->tail = last;
|
||||
from->head = last->next;
|
||||
last->next = NULL;
|
||||
from->bytes -= size;
|
||||
to->bytes += size;
|
||||
}
|
||||
|
||||
#define QUARANTINE_PERCPU_SIZE (1 << 20)
|
||||
#define QUARANTINE_BATCHES \
|
||||
(1024 > 4 * CONFIG_NR_CPUS ? 1024 : 4 * CONFIG_NR_CPUS)
|
||||
|
||||
/*
|
||||
* The object quarantine consists of per-cpu queues and a global queue,
|
||||
|
@ -111,11 +96,22 @@ static void qlist_move(struct qlist_head *from, struct qlist_node *last,
|
|||
*/
|
||||
static DEFINE_PER_CPU(struct qlist_head, cpu_quarantine);
|
||||
|
||||
static struct qlist_head global_quarantine;
|
||||
/* Round-robin FIFO array of batches. */
|
||||
static struct qlist_head global_quarantine[QUARANTINE_BATCHES];
|
||||
static int quarantine_head;
|
||||
static int quarantine_tail;
|
||||
/* Total size of all objects in global_quarantine across all batches. */
|
||||
static unsigned long quarantine_size;
|
||||
static DEFINE_SPINLOCK(quarantine_lock);
|
||||
|
||||
/* Maximum size of the global queue. */
|
||||
static unsigned long quarantine_size;
|
||||
static unsigned long quarantine_max_size;
|
||||
|
||||
/*
|
||||
* Target size of a batch in global_quarantine.
|
||||
* Usually equal to QUARANTINE_PERCPU_SIZE unless we have too much RAM.
|
||||
*/
|
||||
static unsigned long quarantine_batch_size;
|
||||
|
||||
/*
|
||||
* The fraction of physical memory the quarantine is allowed to occupy.
|
||||
|
@ -124,9 +120,6 @@ static unsigned long quarantine_size;
|
|||
*/
|
||||
#define QUARANTINE_FRACTION 32
|
||||
|
||||
#define QUARANTINE_LOW_SIZE (READ_ONCE(quarantine_size) * 3 / 4)
|
||||
#define QUARANTINE_PERCPU_SIZE (1 << 20)
|
||||
|
||||
static struct kmem_cache *qlink_to_cache(struct qlist_node *qlink)
|
||||
{
|
||||
return virt_to_head_page(qlink)->slab_cache;
|
||||
|
@ -191,21 +184,30 @@ void quarantine_put(struct kasan_free_meta *info, struct kmem_cache *cache)
|
|||
|
||||
if (unlikely(!qlist_empty(&temp))) {
|
||||
spin_lock_irqsave(&quarantine_lock, flags);
|
||||
qlist_move_all(&temp, &global_quarantine);
|
||||
WRITE_ONCE(quarantine_size, quarantine_size + temp.bytes);
|
||||
qlist_move_all(&temp, &global_quarantine[quarantine_tail]);
|
||||
if (global_quarantine[quarantine_tail].bytes >=
|
||||
READ_ONCE(quarantine_batch_size)) {
|
||||
int new_tail;
|
||||
|
||||
new_tail = quarantine_tail + 1;
|
||||
if (new_tail == QUARANTINE_BATCHES)
|
||||
new_tail = 0;
|
||||
if (new_tail != quarantine_head)
|
||||
quarantine_tail = new_tail;
|
||||
}
|
||||
spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
}
|
||||
}
|
||||
|
||||
void quarantine_reduce(void)
|
||||
{
|
||||
size_t new_quarantine_size, percpu_quarantines;
|
||||
size_t total_size, new_quarantine_size, percpu_quarantines;
|
||||
unsigned long flags;
|
||||
struct qlist_head to_free = QLIST_INIT;
|
||||
size_t size_to_free = 0;
|
||||
struct qlist_node *last;
|
||||
|
||||
if (likely(READ_ONCE(global_quarantine.bytes) <=
|
||||
READ_ONCE(quarantine_size)))
|
||||
if (likely(READ_ONCE(quarantine_size) <=
|
||||
READ_ONCE(quarantine_max_size)))
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&quarantine_lock, flags);
|
||||
|
@ -214,24 +216,23 @@ void quarantine_reduce(void)
|
|||
* Update quarantine size in case of hotplug. Allocate a fraction of
|
||||
* the installed memory to quarantine minus per-cpu queue limits.
|
||||
*/
|
||||
new_quarantine_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) /
|
||||
total_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) /
|
||||
QUARANTINE_FRACTION;
|
||||
percpu_quarantines = QUARANTINE_PERCPU_SIZE * num_online_cpus();
|
||||
new_quarantine_size = (new_quarantine_size < percpu_quarantines) ?
|
||||
0 : new_quarantine_size - percpu_quarantines;
|
||||
WRITE_ONCE(quarantine_size, new_quarantine_size);
|
||||
new_quarantine_size = (total_size < percpu_quarantines) ?
|
||||
0 : total_size - percpu_quarantines;
|
||||
WRITE_ONCE(quarantine_max_size, new_quarantine_size);
|
||||
/* Aim at consuming at most 1/2 of slots in quarantine. */
|
||||
WRITE_ONCE(quarantine_batch_size, max((size_t)QUARANTINE_PERCPU_SIZE,
|
||||
2 * total_size / QUARANTINE_BATCHES));
|
||||
|
||||
last = global_quarantine.head;
|
||||
while (last) {
|
||||
struct kmem_cache *cache = qlink_to_cache(last);
|
||||
|
||||
size_to_free += cache->size;
|
||||
if (!last->next || size_to_free >
|
||||
global_quarantine.bytes - QUARANTINE_LOW_SIZE)
|
||||
break;
|
||||
last = last->next;
|
||||
if (likely(quarantine_size > quarantine_max_size)) {
|
||||
qlist_move_all(&global_quarantine[quarantine_head], &to_free);
|
||||
WRITE_ONCE(quarantine_size, quarantine_size - to_free.bytes);
|
||||
quarantine_head++;
|
||||
if (quarantine_head == QUARANTINE_BATCHES)
|
||||
quarantine_head = 0;
|
||||
}
|
||||
qlist_move(&global_quarantine, last, &to_free, size_to_free);
|
||||
|
||||
spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
|
||||
|
@ -275,13 +276,14 @@ static void per_cpu_remove_cache(void *arg)
|
|||
|
||||
void quarantine_remove_cache(struct kmem_cache *cache)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long flags, i;
|
||||
struct qlist_head to_free = QLIST_INIT;
|
||||
|
||||
on_each_cpu(per_cpu_remove_cache, cache, 1);
|
||||
|
||||
spin_lock_irqsave(&quarantine_lock, flags);
|
||||
qlist_move_cache(&global_quarantine, &to_free, cache);
|
||||
for (i = 0; i < QUARANTINE_BATCHES; i++)
|
||||
qlist_move_cache(&global_quarantine[i], &to_free, cache);
|
||||
spin_unlock_irqrestore(&quarantine_lock, flags);
|
||||
|
||||
qlist_free_all(&to_free, cache);
|
||||
|
|
|
@ -136,6 +136,8 @@ static void kasan_end_report(unsigned long *flags)
|
|||
pr_err("==================================================================\n");
|
||||
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
|
||||
spin_unlock_irqrestore(&report_lock, *flags);
|
||||
if (panic_on_warn)
|
||||
panic("panic_on_warn set ...\n");
|
||||
kasan_enable_current();
|
||||
}
|
||||
|
||||
|
|
|
@ -1242,6 +1242,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
|
|||
struct vm_area_struct *vma;
|
||||
unsigned long addr;
|
||||
pmd_t *pmd, _pmd;
|
||||
bool deposited = false;
|
||||
|
||||
i_mmap_lock_write(mapping);
|
||||
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
|
||||
|
@ -1266,10 +1267,26 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
|
|||
spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd);
|
||||
/* assume page table is clear */
|
||||
_pmd = pmdp_collapse_flush(vma, addr, pmd);
|
||||
/*
|
||||
* now deposit the pgtable for arch that need it
|
||||
* otherwise free it.
|
||||
*/
|
||||
if (arch_needs_pgtable_deposit()) {
|
||||
/*
|
||||
* The deposit should be visibile only after
|
||||
* collapse is seen by others.
|
||||
*/
|
||||
smp_wmb();
|
||||
pgtable_trans_huge_deposit(vma->vm_mm, pmd,
|
||||
pmd_pgtable(_pmd));
|
||||
deposited = true;
|
||||
}
|
||||
spin_unlock(ptl);
|
||||
up_write(&vma->vm_mm->mmap_sem);
|
||||
atomic_long_dec(&vma->vm_mm->nr_ptes);
|
||||
pte_free(vma->vm_mm, pmd_pgtable(_pmd));
|
||||
if (!deposited) {
|
||||
atomic_long_dec(&vma->vm_mm->nr_ptes);
|
||||
pte_free(vma->vm_mm, pmd_pgtable(_pmd));
|
||||
}
|
||||
}
|
||||
}
|
||||
i_mmap_unlock_write(mapping);
|
||||
|
@ -1403,6 +1420,9 @@ static void collapse_shmem(struct mm_struct *mm,
|
|||
|
||||
spin_lock_irq(&mapping->tree_lock);
|
||||
|
||||
slot = radix_tree_lookup_slot(&mapping->page_tree, index);
|
||||
VM_BUG_ON_PAGE(page != radix_tree_deref_slot_protected(slot,
|
||||
&mapping->tree_lock), page);
|
||||
VM_BUG_ON_PAGE(page_mapped(page), page);
|
||||
|
||||
/*
|
||||
|
@ -1423,9 +1443,10 @@ static void collapse_shmem(struct mm_struct *mm,
|
|||
list_add_tail(&page->lru, &pagelist);
|
||||
|
||||
/* Finally, replace with the new page. */
|
||||
radix_tree_replace_slot(slot,
|
||||
radix_tree_replace_slot(&mapping->page_tree, slot,
|
||||
new_page + (index % HPAGE_PMD_NR));
|
||||
|
||||
slot = radix_tree_iter_next(&iter);
|
||||
index++;
|
||||
continue;
|
||||
out_lru:
|
||||
|
@ -1521,9 +1542,11 @@ static void collapse_shmem(struct mm_struct *mm,
|
|||
if (!page || iter.index < page->index) {
|
||||
if (!nr_none)
|
||||
break;
|
||||
/* Put holes back where they were */
|
||||
radix_tree_replace_slot(slot, NULL);
|
||||
nr_none--;
|
||||
/* Put holes back where they were */
|
||||
radix_tree_delete(&mapping->page_tree,
|
||||
iter.index);
|
||||
slot = radix_tree_iter_next(&iter);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1532,11 +1555,13 @@ static void collapse_shmem(struct mm_struct *mm,
|
|||
/* Unfreeze the page. */
|
||||
list_del(&page->lru);
|
||||
page_ref_unfreeze(page, 2);
|
||||
radix_tree_replace_slot(slot, page);
|
||||
radix_tree_replace_slot(&mapping->page_tree,
|
||||
slot, page);
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
putback_lru_page(page);
|
||||
unlock_page(page);
|
||||
spin_lock_irq(&mapping->tree_lock);
|
||||
slot = radix_tree_iter_next(&iter);
|
||||
}
|
||||
VM_BUG_ON(nr_none);
|
||||
spin_unlock_irq(&mapping->tree_lock);
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
*
|
||||
*
|
||||
* For more information on the algorithm and kmemleak usage, please see
|
||||
* Documentation/kmemleak.txt.
|
||||
* Documentation/dev-tools/kmemleak.rst.
|
||||
*
|
||||
* Notes on locking
|
||||
* ----------------
|
||||
|
|
|
@ -281,6 +281,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
|
|||
if (pmd_trans_unstable(pmd))
|
||||
return 0;
|
||||
|
||||
tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
|
||||
orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
|
||||
arch_enter_lazy_mmu_mode();
|
||||
for (; addr != end; pte++, addr += PAGE_SIZE) {
|
||||
|
|
|
@ -2145,6 +2145,8 @@ struct memcg_kmem_cache_create_work {
|
|||
struct work_struct work;
|
||||
};
|
||||
|
||||
static struct workqueue_struct *memcg_kmem_cache_create_wq;
|
||||
|
||||
static void memcg_kmem_cache_create_func(struct work_struct *w)
|
||||
{
|
||||
struct memcg_kmem_cache_create_work *cw =
|
||||
|
@ -2176,7 +2178,7 @@ static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
|
|||
cw->cachep = cachep;
|
||||
INIT_WORK(&cw->work, memcg_kmem_cache_create_func);
|
||||
|
||||
schedule_work(&cw->work);
|
||||
queue_work(memcg_kmem_cache_create_wq, &cw->work);
|
||||
}
|
||||
|
||||
static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
|
||||
|
@ -5774,6 +5776,17 @@ static int __init mem_cgroup_init(void)
|
|||
{
|
||||
int cpu, node;
|
||||
|
||||
#ifndef CONFIG_SLOB
|
||||
/*
|
||||
* Kmem cache creation is mostly done with the slab_mutex held,
|
||||
* so use a special workqueue to avoid stalling all worker
|
||||
* threads in case lots of cgroups are created simultaneously.
|
||||
*/
|
||||
memcg_kmem_cache_create_wq =
|
||||
alloc_ordered_workqueue("memcg_kmem_cache_create", 0);
|
||||
BUG_ON(!memcg_kmem_cache_create_wq);
|
||||
#endif
|
||||
|
||||
cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL,
|
||||
memcg_hotplug_cpu_dead);
|
||||
|
||||
|
|
92
mm/memory.c
92
mm/memory.c
|
@ -300,15 +300,14 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
|
|||
struct mmu_gather_batch *batch;
|
||||
|
||||
VM_BUG_ON(!tlb->end);
|
||||
|
||||
if (!tlb->page_size)
|
||||
tlb->page_size = page_size;
|
||||
else {
|
||||
if (page_size != tlb->page_size)
|
||||
return true;
|
||||
}
|
||||
VM_WARN_ON(tlb->page_size != page_size);
|
||||
|
||||
batch = tlb->active;
|
||||
/*
|
||||
* Add the page and check if we are full. If so
|
||||
* force a flush.
|
||||
*/
|
||||
batch->pages[batch->nr++] = page;
|
||||
if (batch->nr == batch->max) {
|
||||
if (!tlb_next_batch(tlb))
|
||||
return true;
|
||||
|
@ -316,7 +315,6 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
|
|||
}
|
||||
VM_BUG_ON_PAGE(batch->nr > batch->max, page);
|
||||
|
||||
batch->pages[batch->nr++] = page;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -528,7 +526,11 @@ void free_pgd_range(struct mmu_gather *tlb,
|
|||
end -= PMD_SIZE;
|
||||
if (addr > end - 1)
|
||||
return;
|
||||
|
||||
/*
|
||||
* We add page table cache pages with PAGE_SIZE,
|
||||
* (see pte_free_tlb()), flush the tlb if we need
|
||||
*/
|
||||
tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
|
||||
pgd = pgd_offset(tlb->mm, addr);
|
||||
do {
|
||||
next = pgd_addr_end(addr, end);
|
||||
|
@ -1118,8 +1120,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
|
|||
pte_t *start_pte;
|
||||
pte_t *pte;
|
||||
swp_entry_t entry;
|
||||
struct page *pending_page = NULL;
|
||||
|
||||
tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
|
||||
again:
|
||||
init_rss_vec(rss);
|
||||
start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
|
||||
|
@ -1172,7 +1174,6 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
|
|||
print_bad_pte(vma, addr, ptent, page);
|
||||
if (unlikely(__tlb_remove_page(tlb, page))) {
|
||||
force_flush = 1;
|
||||
pending_page = page;
|
||||
addr += PAGE_SIZE;
|
||||
break;
|
||||
}
|
||||
|
@ -1213,11 +1214,6 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
|
|||
if (force_flush) {
|
||||
force_flush = 0;
|
||||
tlb_flush_mmu_free(tlb);
|
||||
if (pending_page) {
|
||||
/* remove the page with new size */
|
||||
__tlb_remove_pte_page(tlb, pending_page);
|
||||
pending_page = NULL;
|
||||
}
|
||||
if (addr != end)
|
||||
goto again;
|
||||
}
|
||||
|
@ -1240,7 +1236,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
|
|||
if (next - addr != HPAGE_PMD_SIZE) {
|
||||
VM_BUG_ON_VMA(vma_is_anonymous(vma) &&
|
||||
!rwsem_is_locked(&tlb->mm->mmap_sem), vma);
|
||||
split_huge_pmd(vma, pmd, addr);
|
||||
__split_huge_pmd(vma, pmd, addr, false, NULL);
|
||||
} else if (zap_huge_pmd(tlb, vma, pmd, addr))
|
||||
goto next;
|
||||
/* fall through */
|
||||
|
@ -2939,6 +2935,19 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
|
|||
return true;
|
||||
}
|
||||
|
||||
static void deposit_prealloc_pte(struct fault_env *fe)
|
||||
{
|
||||
struct vm_area_struct *vma = fe->vma;
|
||||
|
||||
pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, fe->prealloc_pte);
|
||||
/*
|
||||
* We are going to consume the prealloc table,
|
||||
* count that as nr_ptes.
|
||||
*/
|
||||
atomic_long_inc(&vma->vm_mm->nr_ptes);
|
||||
fe->prealloc_pte = 0;
|
||||
}
|
||||
|
||||
static int do_set_pmd(struct fault_env *fe, struct page *page)
|
||||
{
|
||||
struct vm_area_struct *vma = fe->vma;
|
||||
|
@ -2953,6 +2962,17 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
|
|||
ret = VM_FAULT_FALLBACK;
|
||||
page = compound_head(page);
|
||||
|
||||
/*
|
||||
* Archs like ppc64 need additonal space to store information
|
||||
* related to pte entry. Use the preallocated table for that.
|
||||
*/
|
||||
if (arch_needs_pgtable_deposit() && !fe->prealloc_pte) {
|
||||
fe->prealloc_pte = pte_alloc_one(vma->vm_mm, fe->address);
|
||||
if (!fe->prealloc_pte)
|
||||
return VM_FAULT_OOM;
|
||||
smp_wmb(); /* See comment in __pte_alloc() */
|
||||
}
|
||||
|
||||
fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
|
||||
if (unlikely(!pmd_none(*fe->pmd)))
|
||||
goto out;
|
||||
|
@ -2966,6 +2986,11 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
|
|||
|
||||
add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR);
|
||||
page_add_file_rmap(page, true);
|
||||
/*
|
||||
* deposit and withdraw with pmd lock held
|
||||
*/
|
||||
if (arch_needs_pgtable_deposit())
|
||||
deposit_prealloc_pte(fe);
|
||||
|
||||
set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
|
||||
|
||||
|
@ -2975,6 +3000,13 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
|
|||
ret = 0;
|
||||
count_vm_event(THP_FILE_MAPPED);
|
||||
out:
|
||||
/*
|
||||
* If we are going to fallback to pte mapping, do a
|
||||
* withdraw with pmd lock held.
|
||||
*/
|
||||
if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK)
|
||||
fe->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm,
|
||||
fe->pmd);
|
||||
spin_unlock(fe->ptl);
|
||||
return ret;
|
||||
}
|
||||
|
@ -3014,18 +3046,20 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
|
|||
|
||||
ret = do_set_pmd(fe, page);
|
||||
if (ret != VM_FAULT_FALLBACK)
|
||||
return ret;
|
||||
goto fault_handled;
|
||||
}
|
||||
|
||||
if (!fe->pte) {
|
||||
ret = pte_alloc_one_map(fe);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto fault_handled;
|
||||
}
|
||||
|
||||
/* Re-check under ptl */
|
||||
if (unlikely(!pte_none(*fe->pte)))
|
||||
return VM_FAULT_NOPAGE;
|
||||
if (unlikely(!pte_none(*fe->pte))) {
|
||||
ret = VM_FAULT_NOPAGE;
|
||||
goto fault_handled;
|
||||
}
|
||||
|
||||
flush_icache_page(vma, page);
|
||||
entry = mk_pte(page, vma->vm_page_prot);
|
||||
|
@ -3045,8 +3079,15 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
|
|||
|
||||
/* no need to invalidate: a not-present page won't be cached */
|
||||
update_mmu_cache(vma, fe->address, fe->pte);
|
||||
ret = 0;
|
||||
|
||||
return 0;
|
||||
fault_handled:
|
||||
/* preallocated pagetable is unused: free it */
|
||||
if (fe->prealloc_pte) {
|
||||
pte_free(fe->vma->vm_mm, fe->prealloc_pte);
|
||||
fe->prealloc_pte = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static unsigned long fault_around_bytes __read_mostly =
|
||||
|
@ -3145,11 +3186,6 @@ static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff)
|
|||
|
||||
fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff);
|
||||
|
||||
/* preallocated pagetable is unused: free it */
|
||||
if (fe->prealloc_pte) {
|
||||
pte_free(fe->vma->vm_mm, fe->prealloc_pte);
|
||||
fe->prealloc_pte = 0;
|
||||
}
|
||||
/* Huge page is mapped? Page fault is solved */
|
||||
if (pmd_trans_huge(*fe->pmd)) {
|
||||
ret = VM_FAULT_NOPAGE;
|
||||
|
@ -3454,7 +3490,7 @@ static int wp_huge_pmd(struct fault_env *fe, pmd_t orig_pmd)
|
|||
|
||||
/* COW handled on pte level: split pmd */
|
||||
VM_BUG_ON_VMA(fe->vma->vm_flags & VM_SHARED, fe->vma);
|
||||
split_huge_pmd(fe->vma, fe->pmd, fe->address);
|
||||
__split_huge_pmd(fe->vma, fe->pmd, fe->address, false, NULL);
|
||||
|
||||
return VM_FAULT_FALLBACK;
|
||||
}
|
||||
|
|
|
@ -1727,26 +1727,6 @@ static bool can_offline_normal(struct zone *zone, unsigned long nr_pages)
|
|||
static int __init cmdline_parse_movable_node(char *p)
|
||||
{
|
||||
#ifdef CONFIG_MOVABLE_NODE
|
||||
/*
|
||||
* Memory used by the kernel cannot be hot-removed because Linux
|
||||
* cannot migrate the kernel pages. When memory hotplug is
|
||||
* enabled, we should prevent memblock from allocating memory
|
||||
* for the kernel.
|
||||
*
|
||||
* ACPI SRAT records all hotpluggable memory ranges. But before
|
||||
* SRAT is parsed, we don't know about it.
|
||||
*
|
||||
* The kernel image is loaded into memory at very early time. We
|
||||
* cannot prevent this anyway. So on NUMA system, we set any
|
||||
* node the kernel resides in as un-hotpluggable.
|
||||
*
|
||||
* Since on modern servers, one node could have double-digit
|
||||
* gigabytes memory, we can assume the memory around the kernel
|
||||
* image is also un-hotpluggable. So before SRAT is parsed, just
|
||||
* allocate memory near the kernel image to try the best to keep
|
||||
* the kernel away from hotpluggable memory.
|
||||
*/
|
||||
memblock_set_bottom_up(true);
|
||||
movable_node_enabled = true;
|
||||
#else
|
||||
pr_warn("movable_node option not supported\n");
|
||||
|
|
|
@ -276,7 +276,9 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
|
|||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
} else if (mode == MPOL_LOCAL) {
|
||||
if (!nodes_empty(*nodes))
|
||||
if (!nodes_empty(*nodes) ||
|
||||
(flags & MPOL_F_STATIC_NODES) ||
|
||||
(flags & MPOL_F_RELATIVE_NODES))
|
||||
return ERR_PTR(-EINVAL);
|
||||
mode = MPOL_PREFERRED;
|
||||
} else if (nodes_empty(*nodes))
|
||||
|
@ -496,7 +498,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
|
|||
page = pmd_page(*pmd);
|
||||
if (is_huge_zero_page(page)) {
|
||||
spin_unlock(ptl);
|
||||
split_huge_pmd(vma, pmd, addr);
|
||||
__split_huge_pmd(vma, pmd, addr, false, NULL);
|
||||
} else {
|
||||
get_page(page);
|
||||
spin_unlock(ptl);
|
||||
|
@ -1679,25 +1681,17 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy)
|
|||
static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy,
|
||||
int nd)
|
||||
{
|
||||
switch (policy->mode) {
|
||||
case MPOL_PREFERRED:
|
||||
if (!(policy->flags & MPOL_F_LOCAL))
|
||||
nd = policy->v.preferred_node;
|
||||
break;
|
||||
case MPOL_BIND:
|
||||
if (policy->mode == MPOL_PREFERRED && !(policy->flags & MPOL_F_LOCAL))
|
||||
nd = policy->v.preferred_node;
|
||||
else {
|
||||
/*
|
||||
* Normally, MPOL_BIND allocations are node-local within the
|
||||
* allowed nodemask. However, if __GFP_THISNODE is set and the
|
||||
* current node isn't part of the mask, we use the zonelist for
|
||||
* the first node in the mask instead.
|
||||
* __GFP_THISNODE shouldn't even be used with the bind policy
|
||||
* because we might easily break the expectation to stay on the
|
||||
* requested node and not break the policy.
|
||||
*/
|
||||
if (unlikely(gfp & __GFP_THISNODE) &&
|
||||
unlikely(!node_isset(nd, policy->v.nodes)))
|
||||
nd = first_node(policy->v.nodes);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
WARN_ON_ONCE(policy->mode == MPOL_BIND && (gfp & __GFP_THISNODE));
|
||||
}
|
||||
|
||||
return node_zonelist(nd, gfp);
|
||||
}
|
||||
|
||||
|
|
19
mm/migrate.c
19
mm/migrate.c
|
@ -168,8 +168,6 @@ void putback_movable_pages(struct list_head *l)
|
|||
continue;
|
||||
}
|
||||
list_del(&page->lru);
|
||||
dec_node_page_state(page, NR_ISOLATED_ANON +
|
||||
page_is_file_cache(page));
|
||||
/*
|
||||
* We isolated non-lru movable page so here we can use
|
||||
* __PageMovable because LRU page's mapping cannot have
|
||||
|
@ -186,6 +184,8 @@ void putback_movable_pages(struct list_head *l)
|
|||
put_page(page);
|
||||
} else {
|
||||
putback_lru_page(page);
|
||||
dec_node_page_state(page, NR_ISOLATED_ANON +
|
||||
page_is_file_cache(page));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -482,7 +482,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
|
|||
SetPageDirty(newpage);
|
||||
}
|
||||
|
||||
radix_tree_replace_slot(pslot, newpage);
|
||||
radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
|
||||
|
||||
/*
|
||||
* Drop cache reference from old page by unfreezing
|
||||
|
@ -556,7 +556,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
|
|||
|
||||
get_page(newpage);
|
||||
|
||||
radix_tree_replace_slot(pslot, newpage);
|
||||
radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
|
||||
|
||||
page_ref_unfreeze(page, expected_count - 1);
|
||||
|
||||
|
@ -1121,8 +1121,15 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
|
|||
* restored.
|
||||
*/
|
||||
list_del(&page->lru);
|
||||
dec_node_page_state(page, NR_ISOLATED_ANON +
|
||||
page_is_file_cache(page));
|
||||
|
||||
/*
|
||||
* Compaction can migrate also non-LRU pages which are
|
||||
* not accounted to NR_ISOLATED_*. They can be recognized
|
||||
* as __PageMovable
|
||||
*/
|
||||
if (likely(!__PageMovable(page)))
|
||||
dec_node_page_state(page, NR_ISOLATED_ANON +
|
||||
page_is_file_cache(page));
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -69,11 +69,17 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
|||
pte_t *pte, oldpte;
|
||||
spinlock_t *ptl;
|
||||
unsigned long pages = 0;
|
||||
int target_node = NUMA_NO_NODE;
|
||||
|
||||
pte = lock_pte_protection(vma, pmd, addr, prot_numa, &ptl);
|
||||
if (!pte)
|
||||
return 0;
|
||||
|
||||
/* Get target node for single threaded private VMAs */
|
||||
if (prot_numa && !(vma->vm_flags & VM_SHARED) &&
|
||||
atomic_read(&vma->vm_mm->mm_users) == 1)
|
||||
target_node = numa_node_id();
|
||||
|
||||
arch_enter_lazy_mmu_mode();
|
||||
do {
|
||||
oldpte = *pte;
|
||||
|
@ -95,6 +101,13 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
|||
/* Avoid TLB flush if possible */
|
||||
if (pte_protnone(oldpte))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Don't mess with PTEs if page is already on the node
|
||||
* a single-threaded process is running on.
|
||||
*/
|
||||
if (target_node == page_to_nid(page))
|
||||
continue;
|
||||
}
|
||||
|
||||
ptent = ptep_modify_prot_start(mm, addr, pte);
|
||||
|
@ -163,7 +176,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
|
|||
|
||||
if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
|
||||
if (next - addr != HPAGE_PMD_SIZE) {
|
||||
split_huge_pmd(vma, pmd, addr);
|
||||
__split_huge_pmd(vma, pmd, addr, false, NULL);
|
||||
if (pmd_trans_unstable(pmd))
|
||||
continue;
|
||||
} else {
|
||||
|
@ -484,6 +497,8 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
|
|||
return do_mprotect_pkey(start, len, prot, -1);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARCH_HAS_PKEYS
|
||||
|
||||
SYSCALL_DEFINE4(pkey_mprotect, unsigned long, start, size_t, len,
|
||||
unsigned long, prot, int, pkey)
|
||||
{
|
||||
|
@ -534,3 +549,5 @@ SYSCALL_DEFINE1(pkey_free, int, pkey)
|
|||
*/
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_ARCH_HAS_PKEYS */
|
||||
|
|
|
@ -2058,8 +2058,12 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone,
|
|||
* potentially hurts the reliability of high-order allocations when under
|
||||
* intense memory pressure but failed atomic allocations should be easier
|
||||
* to recover from than an OOM.
|
||||
*
|
||||
* If @force is true, try to unreserve a pageblock even though highatomic
|
||||
* pageblock is exhausted.
|
||||
*/
|
||||
static void unreserve_highatomic_pageblock(const struct alloc_context *ac)
|
||||
static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
|
||||
bool force)
|
||||
{
|
||||
struct zonelist *zonelist = ac->zonelist;
|
||||
unsigned long flags;
|
||||
|
@ -2067,11 +2071,16 @@ static void unreserve_highatomic_pageblock(const struct alloc_context *ac)
|
|||
struct zone *zone;
|
||||
struct page *page;
|
||||
int order;
|
||||
bool ret;
|
||||
|
||||
for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx,
|
||||
ac->nodemask) {
|
||||
/* Preserve at least one pageblock */
|
||||
if (zone->nr_reserved_highatomic <= pageblock_nr_pages)
|
||||
/*
|
||||
* Preserve at least one pageblock unless memory pressure
|
||||
* is really high.
|
||||
*/
|
||||
if (!force && zone->nr_reserved_highatomic <=
|
||||
pageblock_nr_pages)
|
||||
continue;
|
||||
|
||||
spin_lock_irqsave(&zone->lock, flags);
|
||||
|
@ -2085,13 +2094,25 @@ static void unreserve_highatomic_pageblock(const struct alloc_context *ac)
|
|||
continue;
|
||||
|
||||
/*
|
||||
* It should never happen but changes to locking could
|
||||
* inadvertently allow a per-cpu drain to add pages
|
||||
* to MIGRATE_HIGHATOMIC while unreserving so be safe
|
||||
* and watch for underflows.
|
||||
* In page freeing path, migratetype change is racy so
|
||||
* we can counter several free pages in a pageblock
|
||||
* in this loop althoug we changed the pageblock type
|
||||
* from highatomic to ac->migratetype. So we should
|
||||
* adjust the count once.
|
||||
*/
|
||||
zone->nr_reserved_highatomic -= min(pageblock_nr_pages,
|
||||
zone->nr_reserved_highatomic);
|
||||
if (get_pageblock_migratetype(page) ==
|
||||
MIGRATE_HIGHATOMIC) {
|
||||
/*
|
||||
* It should never happen but changes to
|
||||
* locking could inadvertently allow a per-cpu
|
||||
* drain to add pages to MIGRATE_HIGHATOMIC
|
||||
* while unreserving so be safe and watch for
|
||||
* underflows.
|
||||
*/
|
||||
zone->nr_reserved_highatomic -= min(
|
||||
pageblock_nr_pages,
|
||||
zone->nr_reserved_highatomic);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert to ac->migratetype and avoid the normal
|
||||
|
@ -2103,12 +2124,16 @@ static void unreserve_highatomic_pageblock(const struct alloc_context *ac)
|
|||
* may increase.
|
||||
*/
|
||||
set_pageblock_migratetype(page, ac->migratetype);
|
||||
move_freepages_block(zone, page, ac->migratetype);
|
||||
spin_unlock_irqrestore(&zone->lock, flags);
|
||||
return;
|
||||
ret = move_freepages_block(zone, page, ac->migratetype);
|
||||
if (ret) {
|
||||
spin_unlock_irqrestore(&zone->lock, flags);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&zone->lock, flags);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Remove an element from the buddy allocator from the fallback list */
|
||||
|
@ -2133,7 +2158,8 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
|
|||
|
||||
page = list_first_entry(&area->free_list[fallback_mt],
|
||||
struct page, lru);
|
||||
if (can_steal)
|
||||
if (can_steal &&
|
||||
get_pageblock_migratetype(page) != MIGRATE_HIGHATOMIC)
|
||||
steal_suitable_fallback(zone, page, start_migratetype);
|
||||
|
||||
/* Remove the page from the freelists */
|
||||
|
@ -2192,7 +2218,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
|||
unsigned long count, struct list_head *list,
|
||||
int migratetype, bool cold)
|
||||
{
|
||||
int i;
|
||||
int i, alloced = 0;
|
||||
|
||||
spin_lock(&zone->lock);
|
||||
for (i = 0; i < count; ++i) {
|
||||
|
@ -2217,13 +2243,21 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
|
|||
else
|
||||
list_add_tail(&page->lru, list);
|
||||
list = &page->lru;
|
||||
alloced++;
|
||||
if (is_migrate_cma(get_pcppage_migratetype(page)))
|
||||
__mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
|
||||
-(1 << order));
|
||||
}
|
||||
|
||||
/*
|
||||
* i pages were removed from the buddy list even if some leak due
|
||||
* to check_pcp_refill failing so adjust NR_FREE_PAGES based
|
||||
* on i. Do not confuse with 'alloced' which is the number of
|
||||
* pages added to the pcp list.
|
||||
*/
|
||||
__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
|
||||
spin_unlock(&zone->lock);
|
||||
return i;
|
||||
return alloced;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
|
@ -2534,7 +2568,8 @@ int __isolate_free_page(struct page *page, unsigned int order)
|
|||
struct page *endpage = page + (1 << order) - 1;
|
||||
for (; page < endpage; page += pageblock_nr_pages) {
|
||||
int mt = get_pageblock_migratetype(page);
|
||||
if (!is_migrate_isolate(mt) && !is_migrate_cma(mt))
|
||||
if (!is_migrate_isolate(mt) && !is_migrate_cma(mt)
|
||||
&& mt != MIGRATE_HIGHATOMIC)
|
||||
set_pageblock_migratetype(page,
|
||||
MIGRATE_MOVABLE);
|
||||
}
|
||||
|
@ -3305,7 +3340,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order,
|
|||
* Shrink them them and try again
|
||||
*/
|
||||
if (!page && !drained) {
|
||||
unreserve_highatomic_pageblock(ac);
|
||||
unreserve_highatomic_pageblock(ac, false);
|
||||
drain_all_pages(NULL);
|
||||
drained = true;
|
||||
goto retry;
|
||||
|
@ -3422,8 +3457,10 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
|
|||
* Make sure we converge to OOM if we cannot make any progress
|
||||
* several times in the row.
|
||||
*/
|
||||
if (*no_progress_loops > MAX_RECLAIM_RETRIES)
|
||||
return false;
|
||||
if (*no_progress_loops > MAX_RECLAIM_RETRIES) {
|
||||
/* Before OOM, exhaust highatomic_reserve */
|
||||
return unreserve_highatomic_pageblock(ac, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Keep reclaiming pages while there is a chance this will lead
|
||||
|
|
16
mm/percpu.c
16
mm/percpu.c
|
@ -2093,6 +2093,8 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
|
|||
size_t pages_size;
|
||||
struct page **pages;
|
||||
int unit, i, j, rc;
|
||||
int upa;
|
||||
int nr_g0_units;
|
||||
|
||||
snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
|
||||
|
||||
|
@ -2100,7 +2102,12 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
|
|||
if (IS_ERR(ai))
|
||||
return PTR_ERR(ai);
|
||||
BUG_ON(ai->nr_groups != 1);
|
||||
BUG_ON(ai->groups[0].nr_units != num_possible_cpus());
|
||||
upa = ai->alloc_size/ai->unit_size;
|
||||
nr_g0_units = roundup(num_possible_cpus(), upa);
|
||||
if (unlikely(WARN_ON(ai->groups[0].nr_units != nr_g0_units))) {
|
||||
pcpu_free_alloc_info(ai);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
unit_pages = ai->unit_size >> PAGE_SHIFT;
|
||||
|
||||
|
@ -2111,21 +2118,22 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
|
|||
|
||||
/* allocate pages */
|
||||
j = 0;
|
||||
for (unit = 0; unit < num_possible_cpus(); unit++)
|
||||
for (unit = 0; unit < num_possible_cpus(); unit++) {
|
||||
unsigned int cpu = ai->groups[0].cpu_map[unit];
|
||||
for (i = 0; i < unit_pages; i++) {
|
||||
unsigned int cpu = ai->groups[0].cpu_map[unit];
|
||||
void *ptr;
|
||||
|
||||
ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE);
|
||||
if (!ptr) {
|
||||
pr_warn("failed to allocate %s page for cpu%u\n",
|
||||
psize_str, cpu);
|
||||
psize_str, cpu);
|
||||
goto enomem;
|
||||
}
|
||||
/* kmemleak tracks the percpu allocations separately */
|
||||
kmemleak_free(ptr);
|
||||
pages[j++] = virt_to_page(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
/* allocate vm area, map the pages and copy static data */
|
||||
vm.flags = VM_ALLOC;
|
||||
|
|
|
@ -207,12 +207,21 @@ int __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
|
|||
* memory at once.
|
||||
*/
|
||||
int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
|
||||
pgoff_t offset, unsigned long nr_to_read)
|
||||
pgoff_t offset, unsigned long nr_to_read)
|
||||
{
|
||||
struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
|
||||
struct file_ra_state *ra = &filp->f_ra;
|
||||
unsigned long max_pages;
|
||||
|
||||
if (unlikely(!mapping->a_ops->readpage && !mapping->a_ops->readpages))
|
||||
return -EINVAL;
|
||||
|
||||
nr_to_read = min(nr_to_read, inode_to_bdi(mapping->host)->ra_pages);
|
||||
/*
|
||||
* If the request exceeds the readahead window, allow the read to
|
||||
* be up to the optimal hardware IO size
|
||||
*/
|
||||
max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages);
|
||||
nr_to_read = min(nr_to_read, max_pages);
|
||||
while (nr_to_read) {
|
||||
int err;
|
||||
|
||||
|
@ -369,9 +378,17 @@ ondemand_readahead(struct address_space *mapping,
|
|||
bool hit_readahead_marker, pgoff_t offset,
|
||||
unsigned long req_size)
|
||||
{
|
||||
unsigned long max = ra->ra_pages;
|
||||
struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
|
||||
unsigned long max_pages = ra->ra_pages;
|
||||
pgoff_t prev_offset;
|
||||
|
||||
/*
|
||||
* If the request exceeds the readahead window, allow the read to
|
||||
* be up to the optimal hardware IO size
|
||||
*/
|
||||
if (req_size > max_pages && bdi->io_pages > max_pages)
|
||||
max_pages = min(req_size, bdi->io_pages);
|
||||
|
||||
/*
|
||||
* start of file
|
||||
*/
|
||||
|
@ -385,7 +402,7 @@ ondemand_readahead(struct address_space *mapping,
|
|||
if ((offset == (ra->start + ra->size - ra->async_size) ||
|
||||
offset == (ra->start + ra->size))) {
|
||||
ra->start += ra->size;
|
||||
ra->size = get_next_ra_size(ra, max);
|
||||
ra->size = get_next_ra_size(ra, max_pages);
|
||||
ra->async_size = ra->size;
|
||||
goto readit;
|
||||
}
|
||||
|
@ -400,16 +417,16 @@ ondemand_readahead(struct address_space *mapping,
|
|||
pgoff_t start;
|
||||
|
||||
rcu_read_lock();
|
||||
start = page_cache_next_hole(mapping, offset + 1, max);
|
||||
start = page_cache_next_hole(mapping, offset + 1, max_pages);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!start || start - offset > max)
|
||||
if (!start || start - offset > max_pages)
|
||||
return 0;
|
||||
|
||||
ra->start = start;
|
||||
ra->size = start - offset; /* old async_size */
|
||||
ra->size += req_size;
|
||||
ra->size = get_next_ra_size(ra, max);
|
||||
ra->size = get_next_ra_size(ra, max_pages);
|
||||
ra->async_size = ra->size;
|
||||
goto readit;
|
||||
}
|
||||
|
@ -417,7 +434,7 @@ ondemand_readahead(struct address_space *mapping,
|
|||
/*
|
||||
* oversize read
|
||||
*/
|
||||
if (req_size > max)
|
||||
if (req_size > max_pages)
|
||||
goto initial_readahead;
|
||||
|
||||
/*
|
||||
|
@ -433,7 +450,7 @@ ondemand_readahead(struct address_space *mapping,
|
|||
* Query the page cache and look for the traces(cached history pages)
|
||||
* that a sequential stream would leave behind.
|
||||
*/
|
||||
if (try_context_readahead(mapping, ra, offset, req_size, max))
|
||||
if (try_context_readahead(mapping, ra, offset, req_size, max_pages))
|
||||
goto readit;
|
||||
|
||||
/*
|
||||
|
@ -444,7 +461,7 @@ ondemand_readahead(struct address_space *mapping,
|
|||
|
||||
initial_readahead:
|
||||
ra->start = offset;
|
||||
ra->size = get_init_ra_size(req_size, max);
|
||||
ra->size = get_init_ra_size(req_size, max_pages);
|
||||
ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
|
||||
|
||||
readit:
|
||||
|
@ -454,7 +471,7 @@ ondemand_readahead(struct address_space *mapping,
|
|||
* the resulted next readahead window into the current one.
|
||||
*/
|
||||
if (offset == ra->start && ra->size == ra->async_size) {
|
||||
ra->async_size = get_next_ra_size(ra, max);
|
||||
ra->async_size = get_next_ra_size(ra, max_pages);
|
||||
ra->size += ra->async_size;
|
||||
}
|
||||
|
||||
|
|
73
mm/rmap.c
73
mm/rmap.c
|
@ -141,14 +141,15 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
|
|||
}
|
||||
|
||||
/**
|
||||
* anon_vma_prepare - attach an anon_vma to a memory region
|
||||
* __anon_vma_prepare - attach an anon_vma to a memory region
|
||||
* @vma: the memory region in question
|
||||
*
|
||||
* This makes sure the memory mapping described by 'vma' has
|
||||
* an 'anon_vma' attached to it, so that we can associate the
|
||||
* anonymous pages mapped into it with that anon_vma.
|
||||
*
|
||||
* The common case will be that we already have one, but if
|
||||
* The common case will be that we already have one, which
|
||||
* is handled inline by anon_vma_prepare(). But if
|
||||
* not we either need to find an adjacent mapping that we
|
||||
* can re-use the anon_vma from (very common when the only
|
||||
* reason for splitting a vma has been mprotect()), or we
|
||||
|
@ -167,48 +168,46 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
|
|||
*
|
||||
* This must be called with the mmap_sem held for reading.
|
||||
*/
|
||||
int anon_vma_prepare(struct vm_area_struct *vma)
|
||||
int __anon_vma_prepare(struct vm_area_struct *vma)
|
||||
{
|
||||
struct anon_vma *anon_vma = vma->anon_vma;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
struct anon_vma *anon_vma, *allocated;
|
||||
struct anon_vma_chain *avc;
|
||||
|
||||
might_sleep();
|
||||
if (unlikely(!anon_vma)) {
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
struct anon_vma *allocated;
|
||||
|
||||
avc = anon_vma_chain_alloc(GFP_KERNEL);
|
||||
if (!avc)
|
||||
goto out_enomem;
|
||||
avc = anon_vma_chain_alloc(GFP_KERNEL);
|
||||
if (!avc)
|
||||
goto out_enomem;
|
||||
|
||||
anon_vma = find_mergeable_anon_vma(vma);
|
||||
allocated = NULL;
|
||||
if (!anon_vma) {
|
||||
anon_vma = anon_vma_alloc();
|
||||
if (unlikely(!anon_vma))
|
||||
goto out_enomem_free_avc;
|
||||
allocated = anon_vma;
|
||||
}
|
||||
|
||||
anon_vma_lock_write(anon_vma);
|
||||
/* page_table_lock to protect against threads */
|
||||
spin_lock(&mm->page_table_lock);
|
||||
if (likely(!vma->anon_vma)) {
|
||||
vma->anon_vma = anon_vma;
|
||||
anon_vma_chain_link(vma, avc, anon_vma);
|
||||
/* vma reference or self-parent link for new root */
|
||||
anon_vma->degree++;
|
||||
allocated = NULL;
|
||||
avc = NULL;
|
||||
}
|
||||
spin_unlock(&mm->page_table_lock);
|
||||
anon_vma_unlock_write(anon_vma);
|
||||
|
||||
if (unlikely(allocated))
|
||||
put_anon_vma(allocated);
|
||||
if (unlikely(avc))
|
||||
anon_vma_chain_free(avc);
|
||||
anon_vma = find_mergeable_anon_vma(vma);
|
||||
allocated = NULL;
|
||||
if (!anon_vma) {
|
||||
anon_vma = anon_vma_alloc();
|
||||
if (unlikely(!anon_vma))
|
||||
goto out_enomem_free_avc;
|
||||
allocated = anon_vma;
|
||||
}
|
||||
|
||||
anon_vma_lock_write(anon_vma);
|
||||
/* page_table_lock to protect against threads */
|
||||
spin_lock(&mm->page_table_lock);
|
||||
if (likely(!vma->anon_vma)) {
|
||||
vma->anon_vma = anon_vma;
|
||||
anon_vma_chain_link(vma, avc, anon_vma);
|
||||
/* vma reference or self-parent link for new root */
|
||||
anon_vma->degree++;
|
||||
allocated = NULL;
|
||||
avc = NULL;
|
||||
}
|
||||
spin_unlock(&mm->page_table_lock);
|
||||
anon_vma_unlock_write(anon_vma);
|
||||
|
||||
if (unlikely(allocated))
|
||||
put_anon_vma(allocated);
|
||||
if (unlikely(avc))
|
||||
anon_vma_chain_free(avc);
|
||||
|
||||
return 0;
|
||||
|
||||
out_enomem_free_avc:
|
||||
|
|
15
mm/shmem.c
15
mm/shmem.c
|
@ -300,18 +300,19 @@ void shmem_uncharge(struct inode *inode, long pages)
|
|||
static int shmem_radix_tree_replace(struct address_space *mapping,
|
||||
pgoff_t index, void *expected, void *replacement)
|
||||
{
|
||||
struct radix_tree_node *node;
|
||||
void **pslot;
|
||||
void *item;
|
||||
|
||||
VM_BUG_ON(!expected);
|
||||
VM_BUG_ON(!replacement);
|
||||
pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
|
||||
if (!pslot)
|
||||
item = __radix_tree_lookup(&mapping->page_tree, index, &node, &pslot);
|
||||
if (!item)
|
||||
return -ENOENT;
|
||||
item = radix_tree_deref_slot_protected(pslot, &mapping->tree_lock);
|
||||
if (item != expected)
|
||||
return -ENOENT;
|
||||
radix_tree_replace_slot(pslot, replacement);
|
||||
__radix_tree_replace(&mapping->page_tree, node, pslot,
|
||||
replacement, NULL, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -370,6 +371,7 @@ static bool shmem_confirm_swap(struct address_space *mapping,
|
|||
|
||||
int shmem_huge __read_mostly;
|
||||
|
||||
#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS)
|
||||
static int shmem_parse_huge(const char *str)
|
||||
{
|
||||
if (!strcmp(str, "never"))
|
||||
|
@ -407,6 +409,7 @@ static const char *shmem_format_huge(int huge)
|
|||
return "bad_val";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
|
||||
struct shrink_control *sc, unsigned long nr_to_split)
|
||||
|
@ -1539,7 +1542,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
|
|||
struct mm_struct *fault_mm, int *fault_type)
|
||||
{
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
struct shmem_inode_info *info;
|
||||
struct shmem_inode_info *info = SHMEM_I(inode);
|
||||
struct shmem_sb_info *sbinfo;
|
||||
struct mm_struct *charge_mm;
|
||||
struct mem_cgroup *memcg;
|
||||
|
@ -1589,7 +1592,6 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
|
|||
* Fast cache lookup did not find it:
|
||||
* bring it back from swap or allocate.
|
||||
*/
|
||||
info = SHMEM_I(inode);
|
||||
sbinfo = SHMEM_SB(inode->i_sb);
|
||||
charge_mm = fault_mm ? : current->mm;
|
||||
|
||||
|
@ -1837,7 +1839,6 @@ alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
|
|||
put_page(page);
|
||||
}
|
||||
if (error == -ENOSPC && !once++) {
|
||||
info = SHMEM_I(inode);
|
||||
spin_lock_irq(&info->lock);
|
||||
shmem_recalc_inode(inode);
|
||||
spin_unlock_irq(&info->lock);
|
||||
|
|
129
mm/slab.c
129
mm/slab.c
|
@ -227,13 +227,14 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
|
|||
INIT_LIST_HEAD(&parent->slabs_full);
|
||||
INIT_LIST_HEAD(&parent->slabs_partial);
|
||||
INIT_LIST_HEAD(&parent->slabs_free);
|
||||
parent->total_slabs = 0;
|
||||
parent->free_slabs = 0;
|
||||
parent->shared = NULL;
|
||||
parent->alien = NULL;
|
||||
parent->colour_next = 0;
|
||||
spin_lock_init(&parent->list_lock);
|
||||
parent->free_objects = 0;
|
||||
parent->free_touched = 0;
|
||||
parent->num_slabs = 0;
|
||||
}
|
||||
|
||||
#define MAKE_LIST(cachep, listp, slab, nodeid) \
|
||||
|
@ -1366,7 +1367,6 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
|
|||
{
|
||||
#if DEBUG
|
||||
struct kmem_cache_node *n;
|
||||
struct page *page;
|
||||
unsigned long flags;
|
||||
int node;
|
||||
static DEFINE_RATELIMIT_STATE(slab_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
|
||||
|
@ -1381,32 +1381,18 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
|
|||
cachep->name, cachep->size, cachep->gfporder);
|
||||
|
||||
for_each_kmem_cache_node(cachep, node, n) {
|
||||
unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
|
||||
unsigned long active_slabs = 0, num_slabs = 0;
|
||||
unsigned long num_slabs_partial = 0, num_slabs_free = 0;
|
||||
unsigned long num_slabs_full;
|
||||
unsigned long total_slabs, free_slabs, free_objs;
|
||||
|
||||
spin_lock_irqsave(&n->list_lock, flags);
|
||||
num_slabs = n->num_slabs;
|
||||
list_for_each_entry(page, &n->slabs_partial, lru) {
|
||||
active_objs += page->active;
|
||||
num_slabs_partial++;
|
||||
}
|
||||
list_for_each_entry(page, &n->slabs_free, lru)
|
||||
num_slabs_free++;
|
||||
|
||||
free_objects += n->free_objects;
|
||||
total_slabs = n->total_slabs;
|
||||
free_slabs = n->free_slabs;
|
||||
free_objs = n->free_objects;
|
||||
spin_unlock_irqrestore(&n->list_lock, flags);
|
||||
|
||||
num_objs = num_slabs * cachep->num;
|
||||
active_slabs = num_slabs - num_slabs_free;
|
||||
num_slabs_full = num_slabs -
|
||||
(num_slabs_partial + num_slabs_free);
|
||||
active_objs += (num_slabs_full * cachep->num);
|
||||
|
||||
pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n",
|
||||
node, active_slabs, num_slabs, active_objs, num_objs,
|
||||
free_objects);
|
||||
pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld\n",
|
||||
node, total_slabs - free_slabs, total_slabs,
|
||||
(total_slabs * cachep->num) - free_objs,
|
||||
total_slabs * cachep->num);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -2318,7 +2304,8 @@ static int drain_freelist(struct kmem_cache *cache,
|
|||
|
||||
page = list_entry(p, struct page, lru);
|
||||
list_del(&page->lru);
|
||||
n->num_slabs--;
|
||||
n->free_slabs--;
|
||||
n->total_slabs--;
|
||||
/*
|
||||
* Safe to drop the lock. The slab is no longer linked
|
||||
* to the cache.
|
||||
|
@ -2332,7 +2319,7 @@ static int drain_freelist(struct kmem_cache *cache,
|
|||
return nr_freed;
|
||||
}
|
||||
|
||||
int __kmem_cache_shrink(struct kmem_cache *cachep, bool deactivate)
|
||||
int __kmem_cache_shrink(struct kmem_cache *cachep)
|
||||
{
|
||||
int ret = 0;
|
||||
int node;
|
||||
|
@ -2352,7 +2339,7 @@ int __kmem_cache_shrink(struct kmem_cache *cachep, bool deactivate)
|
|||
|
||||
int __kmem_cache_shutdown(struct kmem_cache *cachep)
|
||||
{
|
||||
return __kmem_cache_shrink(cachep, false);
|
||||
return __kmem_cache_shrink(cachep);
|
||||
}
|
||||
|
||||
void __kmem_cache_release(struct kmem_cache *cachep)
|
||||
|
@ -2753,12 +2740,13 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
|
|||
n = get_node(cachep, page_to_nid(page));
|
||||
|
||||
spin_lock(&n->list_lock);
|
||||
if (!page->active)
|
||||
n->total_slabs++;
|
||||
if (!page->active) {
|
||||
list_add_tail(&page->lru, &(n->slabs_free));
|
||||
else
|
||||
n->free_slabs++;
|
||||
} else
|
||||
fixup_slab_list(cachep, n, page, &list);
|
||||
|
||||
n->num_slabs++;
|
||||
STATS_INC_GROWN(cachep);
|
||||
n->free_objects += cachep->num - page->active;
|
||||
spin_unlock(&n->list_lock);
|
||||
|
@ -2903,9 +2891,10 @@ static noinline struct page *get_valid_first_slab(struct kmem_cache_node *n,
|
|||
|
||||
/* Move pfmemalloc slab to the end of list to speed up next search */
|
||||
list_del(&page->lru);
|
||||
if (!page->active)
|
||||
if (!page->active) {
|
||||
list_add_tail(&page->lru, &n->slabs_free);
|
||||
else
|
||||
n->free_slabs++;
|
||||
} else
|
||||
list_add_tail(&page->lru, &n->slabs_partial);
|
||||
|
||||
list_for_each_entry(page, &n->slabs_partial, lru) {
|
||||
|
@ -2913,9 +2902,12 @@ static noinline struct page *get_valid_first_slab(struct kmem_cache_node *n,
|
|||
return page;
|
||||
}
|
||||
|
||||
n->free_touched = 1;
|
||||
list_for_each_entry(page, &n->slabs_free, lru) {
|
||||
if (!PageSlabPfmemalloc(page))
|
||||
if (!PageSlabPfmemalloc(page)) {
|
||||
n->free_slabs--;
|
||||
return page;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
@ -2925,16 +2917,18 @@ static struct page *get_first_slab(struct kmem_cache_node *n, bool pfmemalloc)
|
|||
{
|
||||
struct page *page;
|
||||
|
||||
page = list_first_entry_or_null(&n->slabs_partial,
|
||||
struct page, lru);
|
||||
assert_spin_locked(&n->list_lock);
|
||||
page = list_first_entry_or_null(&n->slabs_partial, struct page, lru);
|
||||
if (!page) {
|
||||
n->free_touched = 1;
|
||||
page = list_first_entry_or_null(&n->slabs_free,
|
||||
struct page, lru);
|
||||
page = list_first_entry_or_null(&n->slabs_free, struct page,
|
||||
lru);
|
||||
if (page)
|
||||
n->free_slabs--;
|
||||
}
|
||||
|
||||
if (sk_memalloc_socks())
|
||||
return get_valid_first_slab(n, page, pfmemalloc);
|
||||
page = get_valid_first_slab(n, page, pfmemalloc);
|
||||
|
||||
return page;
|
||||
}
|
||||
|
@ -3434,9 +3428,10 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
|
|||
STATS_DEC_ACTIVE(cachep);
|
||||
|
||||
/* fixup slab chains */
|
||||
if (page->active == 0)
|
||||
if (page->active == 0) {
|
||||
list_add(&page->lru, &n->slabs_free);
|
||||
else {
|
||||
n->free_slabs++;
|
||||
} else {
|
||||
/* Unconditionally move a slab to the end of the
|
||||
* partial list on free - maximum time for the
|
||||
* other objects to be freed, too.
|
||||
|
@ -3450,7 +3445,8 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
|
|||
|
||||
page = list_last_entry(&n->slabs_free, struct page, lru);
|
||||
list_move(&page->lru, list);
|
||||
n->num_slabs--;
|
||||
n->free_slabs--;
|
||||
n->total_slabs--;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4102,64 +4098,33 @@ static void cache_reap(struct work_struct *w)
|
|||
#ifdef CONFIG_SLABINFO
|
||||
void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
|
||||
{
|
||||
struct page *page;
|
||||
unsigned long active_objs;
|
||||
unsigned long num_objs;
|
||||
unsigned long active_slabs = 0;
|
||||
unsigned long num_slabs, free_objects = 0, shared_avail = 0;
|
||||
unsigned long num_slabs_partial = 0, num_slabs_free = 0;
|
||||
unsigned long num_slabs_full = 0;
|
||||
const char *name;
|
||||
char *error = NULL;
|
||||
unsigned long active_objs, num_objs, active_slabs;
|
||||
unsigned long total_slabs = 0, free_objs = 0, shared_avail = 0;
|
||||
unsigned long free_slabs = 0;
|
||||
int node;
|
||||
struct kmem_cache_node *n;
|
||||
|
||||
active_objs = 0;
|
||||
num_slabs = 0;
|
||||
for_each_kmem_cache_node(cachep, node, n) {
|
||||
|
||||
check_irq_on();
|
||||
spin_lock_irq(&n->list_lock);
|
||||
|
||||
num_slabs += n->num_slabs;
|
||||
total_slabs += n->total_slabs;
|
||||
free_slabs += n->free_slabs;
|
||||
free_objs += n->free_objects;
|
||||
|
||||
list_for_each_entry(page, &n->slabs_partial, lru) {
|
||||
if (page->active == cachep->num && !error)
|
||||
error = "slabs_partial accounting error";
|
||||
if (!page->active && !error)
|
||||
error = "slabs_partial accounting error";
|
||||
active_objs += page->active;
|
||||
num_slabs_partial++;
|
||||
}
|
||||
|
||||
list_for_each_entry(page, &n->slabs_free, lru) {
|
||||
if (page->active && !error)
|
||||
error = "slabs_free accounting error";
|
||||
num_slabs_free++;
|
||||
}
|
||||
|
||||
free_objects += n->free_objects;
|
||||
if (n->shared)
|
||||
shared_avail += n->shared->avail;
|
||||
|
||||
spin_unlock_irq(&n->list_lock);
|
||||
}
|
||||
num_objs = num_slabs * cachep->num;
|
||||
active_slabs = num_slabs - num_slabs_free;
|
||||
num_slabs_full = num_slabs - (num_slabs_partial + num_slabs_free);
|
||||
active_objs += (num_slabs_full * cachep->num);
|
||||
|
||||
if (num_objs - active_objs != free_objects && !error)
|
||||
error = "free_objects accounting error";
|
||||
|
||||
name = cachep->name;
|
||||
if (error)
|
||||
pr_err("slab: cache %s error: %s\n", name, error);
|
||||
num_objs = total_slabs * cachep->num;
|
||||
active_slabs = total_slabs - free_slabs;
|
||||
active_objs = num_objs - free_objs;
|
||||
|
||||
sinfo->active_objs = active_objs;
|
||||
sinfo->num_objs = num_objs;
|
||||
sinfo->active_slabs = active_slabs;
|
||||
sinfo->num_slabs = num_slabs;
|
||||
sinfo->num_slabs = total_slabs;
|
||||
sinfo->shared_avail = shared_avail;
|
||||
sinfo->limit = cachep->limit;
|
||||
sinfo->batchcount = cachep->batchcount;
|
||||
|
|
20
mm/slab.h
20
mm/slab.h
|
@ -142,11 +142,26 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size,
|
|||
#define SLAB_CACHE_FLAGS (0)
|
||||
#endif
|
||||
|
||||
/* Common flags available with current configuration */
|
||||
#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
|
||||
|
||||
/* Common flags permitted for kmem_cache_create */
|
||||
#define SLAB_FLAGS_PERMITTED (SLAB_CORE_FLAGS | \
|
||||
SLAB_RED_ZONE | \
|
||||
SLAB_POISON | \
|
||||
SLAB_STORE_USER | \
|
||||
SLAB_TRACE | \
|
||||
SLAB_CONSISTENCY_CHECKS | \
|
||||
SLAB_MEM_SPREAD | \
|
||||
SLAB_NOLEAKTRACE | \
|
||||
SLAB_RECLAIM_ACCOUNT | \
|
||||
SLAB_TEMPORARY | \
|
||||
SLAB_NOTRACK | \
|
||||
SLAB_ACCOUNT)
|
||||
|
||||
int __kmem_cache_shutdown(struct kmem_cache *);
|
||||
void __kmem_cache_release(struct kmem_cache *);
|
||||
int __kmem_cache_shrink(struct kmem_cache *, bool);
|
||||
int __kmem_cache_shrink(struct kmem_cache *);
|
||||
void slab_kmem_cache_release(struct kmem_cache *);
|
||||
|
||||
struct seq_file;
|
||||
|
@ -432,7 +447,8 @@ struct kmem_cache_node {
|
|||
struct list_head slabs_partial; /* partial list first, better asm code */
|
||||
struct list_head slabs_full;
|
||||
struct list_head slabs_free;
|
||||
unsigned long num_slabs;
|
||||
unsigned long total_slabs; /* length of all slab lists */
|
||||
unsigned long free_slabs; /* length of free slab list only */
|
||||
unsigned long free_objects;
|
||||
unsigned int free_limit;
|
||||
unsigned int colour_next; /* Per-node cache coloring */
|
||||
|
|
|
@ -404,6 +404,12 @@ kmem_cache_create(const char *name, size_t size, size_t align,
|
|||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* Refuse requests with allocator specific flags */
|
||||
if (flags & ~SLAB_FLAGS_PERMITTED) {
|
||||
err = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Some allocators will constraint the set of valid flags to a subset
|
||||
* of all flags. We expect them to define CACHE_CREATE_MASK in this
|
||||
|
@ -573,6 +579,29 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
|
|||
get_online_cpus();
|
||||
get_online_mems();
|
||||
|
||||
#ifdef CONFIG_SLUB
|
||||
/*
|
||||
* In case of SLUB, we need to disable empty slab caching to
|
||||
* avoid pinning the offline memory cgroup by freeable kmem
|
||||
* pages charged to it. SLAB doesn't need this, as it
|
||||
* periodically purges unused slabs.
|
||||
*/
|
||||
mutex_lock(&slab_mutex);
|
||||
list_for_each_entry(s, &slab_caches, list) {
|
||||
c = is_root_cache(s) ? cache_from_memcg_idx(s, idx) : NULL;
|
||||
if (c) {
|
||||
c->cpu_partial = 0;
|
||||
c->min_partial = 0;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&slab_mutex);
|
||||
/*
|
||||
* kmem_cache->cpu_partial is checked locklessly (see
|
||||
* put_cpu_partial()). Make sure the change is visible.
|
||||
*/
|
||||
synchronize_sched();
|
||||
#endif
|
||||
|
||||
mutex_lock(&slab_mutex);
|
||||
list_for_each_entry(s, &slab_caches, list) {
|
||||
if (!is_root_cache(s))
|
||||
|
@ -584,7 +613,7 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
|
|||
if (!c)
|
||||
continue;
|
||||
|
||||
__kmem_cache_shrink(c, true);
|
||||
__kmem_cache_shrink(c);
|
||||
arr->entries[idx] = NULL;
|
||||
}
|
||||
mutex_unlock(&slab_mutex);
|
||||
|
@ -755,7 +784,7 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
|
|||
get_online_cpus();
|
||||
get_online_mems();
|
||||
kasan_cache_shrink(cachep);
|
||||
ret = __kmem_cache_shrink(cachep, false);
|
||||
ret = __kmem_cache_shrink(cachep);
|
||||
put_online_mems();
|
||||
put_online_cpus();
|
||||
return ret;
|
||||
|
|
|
@ -634,7 +634,7 @@ void __kmem_cache_release(struct kmem_cache *c)
|
|||
{
|
||||
}
|
||||
|
||||
int __kmem_cache_shrink(struct kmem_cache *d, bool deactivate)
|
||||
int __kmem_cache_shrink(struct kmem_cache *d)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue