Merge branch 'akpm' (patches from Andrew)

Merge updates from Andrew Morton:
 "Am experimenting with splitting MM up into identifiable subsystems
  perhaps with a view to gitifying it in complex ways. Also with more
  verbose "incoming" emails.

  Most of MM is here and a few other trees.

  Subsystems affected by this patch series:
   - hotfixes
   - iommu
   - scripts
   - arch/sh
   - ocfs2
   - mm:slab-generic
   - mm:slub
   - mm:kmemleak
   - mm:kasan
   - mm:cleanups
   - mm:debug
   - mm:pagecache
   - mm:swap
   - mm:memcg
   - mm:gup
   - mm:pagemap
   - mm:infrastructure
   - mm:vmalloc
   - mm:initialization
   - mm:pagealloc
   - mm:vmscan
   - mm:tools
   - mm:proc
   - mm:ras
   - mm:oom-kill

  hotfixes:
      mm: vmscan: scan anonymous pages on file refaults
      mm/nvdimm: add is_ioremap_addr and use that to check ioremap address
      mm/memcontrol: fix wrong statistics in memory.stat
      mm/z3fold.c: lock z3fold page before  __SetPageMovable()
      nilfs2: do not use unexported cpu_to_le32()/le32_to_cpu() in uapi header
      MAINTAINERS: nilfs2: update email address

  iommu:
      include/linux/dmar.h: replace single-char identifiers in macros

  scripts:
      scripts/decode_stacktrace: match basepath using shell prefix operator, not regex
      scripts/decode_stacktrace: look for modules with .ko.debug extension
      scripts/spelling.txt: drop "sepc" from the misspelling list
      scripts/spelling.txt: add spelling fix for prohibited
      scripts/decode_stacktrace: Accept dash/underscore in modules
      scripts/spelling.txt: add more spellings to spelling.txt

  arch/sh:
      arch/sh/configs/sdk7786_defconfig: remove CONFIG_LOGFS
      sh: config: remove left-over BACKLIGHT_LCD_SUPPORT
      sh: prevent warnings when using iounmap

  ocfs2:
      fs: ocfs: fix spelling mistake "hearbeating" -> "heartbeat"
      ocfs2/dlm: use struct_size() helper
      ocfs2: add last unlock times in locking_state
      ocfs2: add locking filter debugfs file
      ocfs2: add first lock wait time in locking_state
      ocfs: no need to check return value of debugfs_create functions
      fs/ocfs2/dlmglue.c: unneeded variable: "status"
      ocfs2: use kmemdup rather than duplicating its implementation

  mm:slab-generic:
    Patch series "mm/slab: Improved sanity checking":
      mm/slab: validate cache membership under freelist hardening
      mm/slab: sanity-check page type when looking up cache
      lkdtm/heap: add tests for freelist hardening

  mm:slub:
      mm/slub.c: avoid double string traverse in kmem_cache_flags()
      slub: don't panic for memcg kmem cache creation failure

  mm:kmemleak:
      mm/kmemleak.c: fix check for softirq context
      mm/kmemleak.c: change error at _write when kmemleak is disabled
      docs: kmemleak: add more documentation details

  mm:kasan:
      mm/kasan: print frame description for stack bugs
      Patch series "Bitops instrumentation for KASAN", v5:
        lib/test_kasan: add bitops tests
        x86: use static_cpu_has in uaccess region to avoid instrumentation
        asm-generic, x86: add bitops instrumentation for KASAN
      Patch series "mm/kasan: Add object validation in ksize()", v3:
        mm/kasan: introduce __kasan_check_{read,write}
        mm/kasan: change kasan_check_{read,write} to return boolean
        lib/test_kasan: Add test for double-kzfree detection
        mm/slab: refactor common ksize KASAN logic into slab_common.c
        mm/kasan: add object validation in ksize()

  mm:cleanups:
      include/linux/pfn_t.h: remove pfn_t_to_virt()
      Patch series "remove ARCH_SELECT_MEMORY_MODEL where it has no effect":
        arm: remove ARCH_SELECT_MEMORY_MODEL
        s390: remove ARCH_SELECT_MEMORY_MODEL
        sparc: remove ARCH_SELECT_MEMORY_MODEL
      mm/gup.c: make follow_page_mask() static
      mm/memory.c: trivial clean up in insert_page()
      mm: make !CONFIG_HUGE_PAGE wrappers into static inlines
      include/linux/mm_types.h: ifdef struct vm_area_struct::swap_readahead_info
      mm: remove the account_page_dirtied export
      mm/page_isolation.c: change the prototype of undo_isolate_page_range()
      include/linux/vmpressure.h: use spinlock_t instead of struct spinlock
      mm: remove the exporting of totalram_pages
      include/linux/pagemap.h: document trylock_page() return value

  mm:debug:
      mm/failslab.c: by default, do not fail allocations with direct reclaim only
      Patch series "debug_pagealloc improvements":
        mm, debug_pagelloc: use static keys to enable debugging
        mm, page_alloc: more extensive free page checking with debug_pagealloc
        mm, debug_pagealloc: use a page type instead of page_ext flag

  mm:pagecache:
      Patch series "fix filler_t callback type mismatches", v2:
        mm/filemap.c: fix an overly long line in read_cache_page
        mm/filemap: don't cast ->readpage to filler_t for do_read_cache_page
        jffs2: pass the correct prototype to read_cache_page
        9p: pass the correct prototype to read_cache_page
      mm/filemap.c: correct the comment about VM_FAULT_RETRY

  mm:swap:
      mm, swap: fix race between swapoff and some swap operations
      mm/swap_state.c: simplify total_swapcache_pages() with get_swap_device()
      mm, swap: use rbtree for swap_extent
      mm/mincore.c: fix race between swapoff and mincore

  mm:memcg:
      memcg, oom: no oom-kill for __GFP_RETRY_MAYFAIL
      memcg, fsnotify: no oom-kill for remote memcg charging
      mm, memcg: introduce memory.events.local
      mm: memcontrol: dump memory.stat during cgroup OOM
      Patch series "mm: reparent slab memory on cgroup removal", v7:
        mm: memcg/slab: postpone kmem_cache memcg pointer initialization to memcg_link_cache()
        mm: memcg/slab: rename slab delayed deactivation functions and fields
        mm: memcg/slab: generalize postponed non-root kmem_cache deactivation
        mm: memcg/slab: introduce __memcg_kmem_uncharge_memcg()
        mm: memcg/slab: unify SLAB and SLUB page accounting
        mm: memcg/slab: don't check the dying flag on kmem_cache creation
        mm: memcg/slab: synchronize access to kmem_cache dying flag using a spinlock
        mm: memcg/slab: rework non-root kmem_cache lifecycle management
        mm: memcg/slab: stop setting page->mem_cgroup pointer for slab pages
        mm: memcg/slab: reparent memcg kmem_caches on cgroup removal
      mm, memcg: add a memcg_slabinfo debugfs file

  mm:gup:
      Patch series "switch the remaining architectures to use generic GUP", v4:
        mm: use untagged_addr() for get_user_pages_fast addresses
        mm: simplify gup_fast_permitted
        mm: lift the x86_32 PAE version of gup_get_pte to common code
        MIPS: use the generic get_user_pages_fast code
        sh: add the missing pud_page definition
        sh: use the generic get_user_pages_fast code
        sparc64: add the missing pgd_page definition
        sparc64: define untagged_addr()
        sparc64: use the generic get_user_pages_fast code
        mm: rename CONFIG_HAVE_GENERIC_GUP to CONFIG_HAVE_FAST_GUP
        mm: reorder code blocks in gup.c
        mm: consolidate the get_user_pages* implementations
        mm: validate get_user_pages_fast flags
        mm: move the powerpc hugepd code to mm/gup.c
        mm: switch gup_hugepte to use try_get_compound_head
        mm: mark the page referenced in gup_hugepte
      mm/gup: speed up check_and_migrate_cma_pages() on huge page
      mm/gup.c: remove some BUG_ONs from get_gate_page()
      mm/gup.c: mark undo_dev_pagemap as __maybe_unused

  mm:pagemap:
      asm-generic, x86: introduce generic pte_{alloc,free}_one[_kernel]
      alpha: switch to generic version of pte allocation
      arm: switch to generic version of pte allocation
      arm64: switch to generic version of pte allocation
      csky: switch to generic version of pte allocation
      m68k: sun3: switch to generic version of pte allocation
      mips: switch to generic version of pte allocation
      nds32: switch to generic version of pte allocation
      nios2: switch to generic version of pte allocation
      parisc: switch to generic version of pte allocation
      riscv: switch to generic version of pte allocation
      um: switch to generic version of pte allocation
      unicore32: switch to generic version of pte allocation
      mm/pgtable: drop pgtable_t variable from pte_fn_t functions
      mm/memory.c: fail when offset == num in first check of __vm_map_pages()

  mm:infrastructure:
      mm/mmu_notifier: use hlist_add_head_rcu()

  mm:vmalloc:
      Patch series "Some cleanups for the KVA/vmalloc", v5:
        mm/vmalloc.c: remove "node" argument
        mm/vmalloc.c: preload a CPU with one object for split purpose
        mm/vmalloc.c: get rid of one single unlink_va() when merge
        mm/vmalloc.c: switch to WARN_ON() and move it under unlink_va()
      mm/vmalloc.c: spelling> s/informaion/information/

  mm:initialization:
      mm/large system hash: use vmalloc for size > MAX_ORDER when !hashdist
      mm/large system hash: clear hashdist when only one node with memory is booted

  mm:pagealloc:
      arm64: move jump_label_init() before parse_early_param()
      Patch series "add init_on_alloc/init_on_free boot options", v10:
        mm: security: introduce init_on_alloc=1 and init_on_free=1 boot options
        mm: init: report memory auto-initialization features at boot time

  mm:vmscan:
      mm: vmscan: remove double slab pressure by inc'ing sc->nr_scanned
      mm: vmscan: correct some vmscan counters for THP swapout

  mm:tools:
      tools/vm/slabinfo: order command line options
      tools/vm/slabinfo: add partial slab listing to -X
      tools/vm/slabinfo: add option to sort by partial slabs
      tools/vm/slabinfo: add sorting info to help menu

  mm:proc:
      proc: use down_read_killable mmap_sem for /proc/pid/maps
      proc: use down_read_killable mmap_sem for /proc/pid/smaps_rollup
      proc: use down_read_killable mmap_sem for /proc/pid/pagemap
      proc: use down_read_killable mmap_sem for /proc/pid/clear_refs
      proc: use down_read_killable mmap_sem for /proc/pid/map_files
      mm: use down_read_killable for locking mmap_sem in access_remote_vm
      mm: smaps: split PSS into components
      mm: vmalloc: show number of vmalloc pages in /proc/meminfo

  mm:ras:
      mm/memory-failure.c: clarify error message

  mm:oom-kill:
      mm: memcontrol: use CSS_TASK_ITER_PROCS at mem_cgroup_scan_tasks()
      mm, oom: refactor dump_tasks for memcg OOMs
      mm, oom: remove redundant task_in_mem_cgroup() check
      oom: decouple mems_allowed from oom_unkillable_task
      mm/oom_kill.c: remove redundant OOM score normalization in select_bad_process()"

* akpm: (147 commits)
  mm/oom_kill.c: remove redundant OOM score normalization in select_bad_process()
  oom: decouple mems_allowed from oom_unkillable_task
  mm, oom: remove redundant task_in_mem_cgroup() check
  mm, oom: refactor dump_tasks for memcg OOMs
  mm: memcontrol: use CSS_TASK_ITER_PROCS at mem_cgroup_scan_tasks()
  mm/memory-failure.c: clarify error message
  mm: vmalloc: show number of vmalloc pages in /proc/meminfo
  mm: smaps: split PSS into components
  mm: use down_read_killable for locking mmap_sem in access_remote_vm
  proc: use down_read_killable mmap_sem for /proc/pid/map_files
  proc: use down_read_killable mmap_sem for /proc/pid/clear_refs
  proc: use down_read_killable mmap_sem for /proc/pid/pagemap
  proc: use down_read_killable mmap_sem for /proc/pid/smaps_rollup
  proc: use down_read_killable mmap_sem for /proc/pid/maps
  tools/vm/slabinfo: add sorting info to help menu
  tools/vm/slabinfo: add option to sort by partial slabs
  tools/vm/slabinfo: add partial slab listing to -X
  tools/vm/slabinfo: order command line options
  mm: vmscan: correct some vmscan counters for THP swapout
  mm: vmscan: remove double slab pressure by inc'ing sc->nr_scanned
  ...
This commit is contained in:
Linus Torvalds 2019-07-12 11:40:28 -07:00
commit ef8f3d48af
173 changed files with 3710 additions and 3505 deletions

View File

@ -3,18 +3,28 @@ Date: August 2017
Contact: Daniel Colascione <dancol@google.com>
Description:
This file provides pre-summed memory information for a
process. The format is identical to /proc/pid/smaps,
process. The format is almost identical to /proc/pid/smaps,
except instead of an entry for each VMA in a process,
smaps_rollup has a single entry (tagged "[rollup]")
for which each field is the sum of the corresponding
fields from all the maps in /proc/pid/smaps.
For more details, see the procfs man page.
Additionally, the fields Pss_Anon, Pss_File and Pss_Shmem
are not present in /proc/pid/smaps. These fields represent
the sum of the Pss field of each type (anon, file, shmem).
For more details, see Documentation/filesystems/proc.txt
and the procfs man page.
Typical output looks like this:
00100000-ff709000 ---p 00000000 00:00 0 [rollup]
Size: 1192 kB
KernelPageSize: 4 kB
MMUPageSize: 4 kB
Rss: 884 kB
Pss: 385 kB
Pss_Anon: 301 kB
Pss_File: 80 kB
Pss_Shmem: 4 kB
Shared_Clean: 696 kB
Shared_Dirty: 0 kB
Private_Clean: 120 kB

View File

@ -1146,6 +1146,11 @@ PAGE_SIZE multiple when read back.
otherwise, a value change in this file generates a file
modified event.
Note that all fields in this file are hierarchical and the
file modified event can be generated due to an event down the
hierarchy. For for the local events at the cgroup level see
memory.events.local.
low
The number of times the cgroup is reclaimed due to
high memory pressure even though its usage is under
@ -1185,6 +1190,11 @@ PAGE_SIZE multiple when read back.
The number of processes belonging to this cgroup
killed by any kind of OOM killer.
memory.events.local
Similar to memory.events but the fields in the file are local
to the cgroup i.e. not hierarchical. The file modified event
generated on this file reflects only the local events.
memory.stat
A read-only flat-keyed file which exists on non-root cgroups.

View File

@ -805,12 +805,10 @@
tracking down these problems.
debug_pagealloc=
[KNL] When CONFIG_DEBUG_PAGEALLOC is set, this
parameter enables the feature at boot time. In
default, it is disabled. We can avoid allocating huge
chunk of memory for debug pagealloc if we don't enable
it at boot time and the system will work mostly same
with the kernel built without CONFIG_DEBUG_PAGEALLOC.
[KNL] When CONFIG_DEBUG_PAGEALLOC is set, this parameter
enables the feature at boot time. By default, it is
disabled and the system will work mostly the same as a
kernel built without CONFIG_DEBUG_PAGEALLOC.
on: enable the feature
debugpat [X86] Enable PAT debugging
@ -1670,6 +1668,15 @@
initrd= [BOOT] Specify the location of the initial ramdisk
init_on_alloc= [MM] Fill newly allocated pages and heap objects with
zeroes.
Format: 0 | 1
Default set by CONFIG_INIT_ON_ALLOC_DEFAULT_ON.
init_on_free= [MM] Fill freed pages and heap objects with zeroes.
Format: 0 | 1
Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON.
init_pkru= [x86] Specify the default memory protection keys rights
register contents for all processes. 0x55555554 by
default (disallow access to all but pkey 0). Can

View File

@ -54,7 +54,7 @@ The Linux kernel provides more basic utility functions.
Bit Operations
--------------
.. kernel-doc:: arch/x86/include/asm/bitops.h
.. kernel-doc:: include/asm-generic/bitops-instrumented.h
:internal:
Bitmap Operations

View File

@ -2,8 +2,8 @@ Kernel Memory Leak Detector
===========================
Kmemleak provides a way of detecting possible kernel memory leaks in a
way similar to a tracing garbage collector
(https://en.wikipedia.org/wiki/Garbage_collection_%28computer_science%29#Tracing_garbage_collectors),
way similar to a `tracing garbage collector
<https://en.wikipedia.org/wiki/Tracing_garbage_collection>`_,
with the difference that the orphan objects are not freed but only
reported via /sys/kernel/debug/kmemleak. A similar method is used by the
Valgrind tool (``memcheck --leak-check``) to detect the memory leaks in
@ -15,10 +15,13 @@ Usage
CONFIG_DEBUG_KMEMLEAK in "Kernel hacking" has to be enabled. A kernel
thread scans the memory every 10 minutes (by default) and prints the
number of new unreferenced objects found. To display the details of all
the possible memory leaks::
number of new unreferenced objects found. If the ``debugfs`` isn't already
mounted, mount with::
# mount -t debugfs nodev /sys/kernel/debug/
To display the details of all the possible scanned memory leaks::
# cat /sys/kernel/debug/kmemleak
To trigger an intermediate memory scan::
@ -72,6 +75,9 @@ If CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF are enabled, the kmemleak is
disabled by default. Passing ``kmemleak=on`` on the kernel command
line enables the function.
If you are getting errors like "Error while writing to stdout" or "write_loop:
Invalid argument", make sure kmemleak is properly enabled.
Basic Algorithm
---------------
@ -218,3 +224,37 @@ the pointer is calculated by other methods than the usual container_of
macro or the pointer is stored in a location not scanned by kmemleak.
Page allocations and ioremap are not tracked.
Testing with kmemleak-test
--------------------------
To check if you have all set up to use kmemleak, you can use the kmemleak-test
module, a module that deliberately leaks memory. Set CONFIG_DEBUG_KMEMLEAK_TEST
as module (it can't be used as bult-in) and boot the kernel with kmemleak
enabled. Load the module and perform a scan with::
# modprobe kmemleak-test
# echo scan > /sys/kernel/debug/kmemleak
Note that the you may not get results instantly or on the first scanning. When
kmemleak gets results, it'll log ``kmemleak: <count of leaks> new suspected
memory leaks``. Then read the file to see then::
# cat /sys/kernel/debug/kmemleak
unreferenced object 0xffff89862ca702e8 (size 32):
comm "modprobe", pid 2088, jiffies 4294680594 (age 375.486s)
hex dump (first 32 bytes):
6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk.
backtrace:
[<00000000e0a73ec7>] 0xffffffffc01d2036
[<000000000c5d2a46>] do_one_initcall+0x41/0x1df
[<0000000046db7e0a>] do_init_module+0x55/0x200
[<00000000542b9814>] load_module+0x203c/0x2480
[<00000000c2850256>] __do_sys_finit_module+0xba/0xe0
[<000000006564e7ef>] do_syscall_64+0x43/0x110
[<000000007c873fa6>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
...
Removing the module with ``rmmod kmemleak_test`` should also trigger some
kmemleak results.

View File

@ -154,9 +154,11 @@ Table 1-1: Process specific entries in /proc
symbol the task is blocked in - or "0" if not blocked.
pagemap Page table
stack Report full stack trace, enable via CONFIG_STACKTRACE
smaps an extension based on maps, showing the memory consumption of
smaps An extension based on maps, showing the memory consumption of
each mapping and flags associated with it
numa_maps an extension based on maps, showing the memory locality and
smaps_rollup Accumulated smaps stats for all mappings of the process. This
can be derived from smaps, but is faster and more convenient
numa_maps An extension based on maps, showing the memory locality and
binding policy as well as mem usage (in pages) of each mapping.
..............................................................................
@ -366,7 +368,7 @@ Table 1-4: Contents of the stat files (as of 2.6.30-rc7)
exit_code the thread's exit_code in the form reported by the waitpid system call
..............................................................................
The /proc/PID/maps file containing the currently mapped memory regions and
The /proc/PID/maps file contains the currently mapped memory regions and
their access permissions.
The format is:
@ -417,11 +419,14 @@ is not associated with a file:
or if empty, the mapping is anonymous.
The /proc/PID/smaps is an extension based on maps, showing the memory
consumption for each of the process's mappings. For each of mappings there
is a series of lines such as the following:
consumption for each of the process's mappings. For each mapping (aka Virtual
Memory Area, or VMA) there is a series of lines such as the following:
08048000-080bc000 r-xp 00000000 03:02 13130 /bin/bash
Size: 1084 kB
KernelPageSize: 4 kB
MMUPageSize: 4 kB
Rss: 892 kB
Pss: 374 kB
Shared_Clean: 892 kB
@ -443,11 +448,14 @@ Locked: 0 kB
THPeligible: 0
VmFlags: rd ex mr mw me dw
the first of these lines shows the same information as is displayed for the
mapping in /proc/PID/maps. The remaining lines show the size of the mapping
(size), the amount of the mapping that is currently resident in RAM (RSS), the
process' proportional share of this mapping (PSS), the number of clean and
dirty private pages in the mapping.
The first of these lines shows the same information as is displayed for the
mapping in /proc/PID/maps. Following lines show the size of the mapping
(size); the size of each page allocated when backing a VMA (KernelPageSize),
which is usually the same as the size in the page table entries; the page size
used by the MMU when backing a VMA (in most cases, the same as KernelPageSize);
the amount of the mapping that is currently resident in RAM (RSS); the
process' proportional share of this mapping (PSS); and the number of clean and
dirty shared and private pages in the mapping.
The "proportional set size" (PSS) of a process is the count of pages it has
in memory, where each page is divided by the number of processes sharing it.
@ -532,6 +540,19 @@ guarantees:
2) If there is something at a given vaddr during the entirety of the
life of the smaps/maps walk, there will be some output for it.
The /proc/PID/smaps_rollup file includes the same fields as /proc/PID/smaps,
but their values are the sums of the corresponding values for all mappings of
the process. Additionally, it contains these fields:
Pss_Anon
Pss_File
Pss_Shmem
They represent the proportional shares of anonymous, file, and shmem pages, as
described for smaps above. These fields are omitted in smaps since each
mapping identifies the type (anon, file, or shmem) of all pages it contains.
Thus all information in smaps_rollup can be derived from smaps, but at a
significantly higher cost.
The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG
bits on both physical and virtual pages associated with a process, and the

View File

@ -11260,7 +11260,7 @@ F: include/uapi/linux/nfs*
F: include/uapi/linux/sunrpc/
NILFS2 FILESYSTEM
M: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
M: Ryusuke Konishi <konishi.ryusuke@gmail.com>
L: linux-nilfs@vger.kernel.org
W: https://nilfs.sourceforge.io/
W: https://nilfs.osdn.jp/

View File

@ -5,6 +5,8 @@
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
/*
* Allocate and free page tables. The xxx_kernel() versions are
* used to allocate a kernel page table - this turns on ASN bits
@ -41,7 +43,7 @@ pgd_free(struct mm_struct *mm, pgd_t *pgd)
static inline pmd_t *
pmd_alloc_one(struct mm_struct *mm, unsigned long address)
{
pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
pmd_t *ret = (pmd_t *)__get_free_page(GFP_PGTABLE_USER);
return ret;
}
@ -51,42 +53,6 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
free_page((unsigned long)pmd);
}
static inline pte_t *
pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
return pte;
}
static inline void
pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_page((unsigned long)pte);
}
static inline pgtable_t
pte_alloc_one(struct mm_struct *mm)
{
pte_t *pte = pte_alloc_one_kernel(mm);
struct page *page;
if (!pte)
return NULL;
page = virt_to_page(pte);
if (!pgtable_page_ctor(page)) {
__free_page(page);
return NULL;
}
return page;
}
static inline void
pte_free(struct mm_struct *mm, pgtable_t page)
{
pgtable_page_dtor(page);
__free_page(page);
}
#define check_pgt_cache() do { } while (0)
#endif /* _ALPHA_PGALLOC_H */

View File

@ -75,6 +75,7 @@ config ARM
select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
select HAVE_EFFICIENT_UNALIGNED_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && MMU
select HAVE_EXIT_THREAD
select HAVE_FAST_GUP if ARM_LPAE
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
select HAVE_FUNCTION_TRACER if !XIP_KERNEL
@ -1622,16 +1623,9 @@ config ARCH_SPARSEMEM_ENABLE
config ARCH_SPARSEMEM_DEFAULT
def_bool ARCH_SPARSEMEM_ENABLE
config ARCH_SELECT_MEMORY_MODEL
def_bool ARCH_SPARSEMEM_ENABLE
config HAVE_ARCH_PFN_VALID
def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
config HAVE_GENERIC_GUP
def_bool y
depends on ARM_LPAE
config HIGHMEM
bool "High Memory Support"
depends on MMU

View File

@ -54,8 +54,6 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
extern pgd_t *pgd_alloc(struct mm_struct *mm);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
static inline void clean_pte_table(pte_t *pte)
{
clean_dcache_area(pte + PTE_HWTABLE_PTRS, PTE_HWTABLE_SIZE);
@ -77,54 +75,41 @@ static inline void clean_pte_table(pte_t *pte)
* | h/w pt 1 |
* +------------+
*/
#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
#define __HAVE_ARCH_PTE_ALLOC_ONE
#include <asm-generic/pgalloc.h>
static inline pte_t *
pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte;
pte_t *pte = __pte_alloc_one_kernel(mm);
pte = (pte_t *)__get_free_page(PGALLOC_GFP);
if (pte)
clean_pte_table(pte);
return pte;
}
#ifdef CONFIG_HIGHPTE
#define PGTABLE_HIGHMEM __GFP_HIGHMEM
#else
#define PGTABLE_HIGHMEM 0
#endif
static inline pgtable_t
pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
#ifdef CONFIG_HIGHPTE
pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
#else
pte = alloc_pages(PGALLOC_GFP, 0);
#endif
pte = __pte_alloc_one(mm, GFP_PGTABLE_USER | PGTABLE_HIGHMEM);
if (!pte)
return NULL;
if (!PageHighMem(pte))
clean_pte_table(page_address(pte));
if (!pgtable_page_ctor(pte)) {
__free_page(pte);
return NULL;
}
return pte;
}
/*
* Free one PTE table.
*/
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
if (pte)
free_page((unsigned long)pte);
}
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
pgtable_page_dtor(pte);
__free_page(pte);
}
static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
pmdval_t prot)
{

View File

@ -8,8 +8,7 @@
#include <asm/mach/map.h>
#include <asm/mmu_context.h>
static int __init set_permissions(pte_t *ptep, pgtable_t token,
unsigned long addr, void *data)
static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
{
efi_memory_desc_t *md = data;
pte_t pte = *ptep;

View File

@ -493,8 +493,7 @@ void __init dma_contiguous_remap(void)
}
}
static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr,
void *data)
static int __dma_update_pte(pte_t *pte, unsigned long addr, void *data)
{
struct page *page = virt_to_page(addr);
pgprot_t prot = *(pgprot_t *)data;

View File

@ -729,7 +729,7 @@ static void __init *early_alloc(unsigned long sz)
static void *__init late_alloc(unsigned long sz)
{
void *ptr = (void *)__get_free_pages(PGALLOC_GFP, get_order(sz));
void *ptr = (void *)__get_free_pages(GFP_PGTABLE_KERNEL, get_order(sz));
if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
BUG();

View File

@ -14,8 +14,7 @@ struct page_change_data {
pgprot_t clear_mask;
};
static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
void *data)
static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
{
struct page_change_data *cdata = data;
pte_t pte = *ptep;

View File

@ -143,6 +143,7 @@ config ARM64
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
@ -267,9 +268,6 @@ config ZONE_DMA32
bool "Support DMA32 zone" if EXPERT
default y
config HAVE_GENERIC_GUP
def_bool y
config ARCH_ENABLE_MEMORY_HOTPLUG
def_bool y

View File

@ -13,18 +13,23 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
#define check_pgt_cache() do { } while (0)
#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t))
#if CONFIG_PGTABLE_LEVELS > 2
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
gfp_t gfp = GFP_PGTABLE_USER;
struct page *page;
page = alloc_page(PGALLOC_GFP);
if (mm == &init_mm)
gfp = GFP_PGTABLE_KERNEL;
page = alloc_page(gfp);
if (!page)
return NULL;
if (!pgtable_pmd_page_ctor(page)) {
@ -61,7 +66,7 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
return (pud_t *)__get_free_page(PGALLOC_GFP);
return (pud_t *)__get_free_page(GFP_PGTABLE_USER);
}
static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
@ -89,42 +94,6 @@ static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
extern pgd_t *pgd_alloc(struct mm_struct *mm);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
static inline pte_t *
pte_alloc_one_kernel(struct mm_struct *mm)
{
return (pte_t *)__get_free_page(PGALLOC_GFP);
}
static inline pgtable_t
pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
pte = alloc_pages(PGALLOC_GFP, 0);
if (!pte)
return NULL;
if (!pgtable_page_ctor(pte)) {
__free_page(pte);
return NULL;
}
return pte;
}
/*
* Free a PTE table.
*/
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep)
{
if (ptep)
free_page((unsigned long)ptep);
}
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
pgtable_page_dtor(pte);
__free_page(pte);
}
static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
pmdval_t prot)
{

View File

@ -82,8 +82,7 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
return 0;
}
static int __init set_permissions(pte_t *ptep, pgtable_t token,
unsigned long addr, void *data)
static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
{
efi_memory_desc_t *md = data;
pte_t pte = READ_ONCE(*ptep);

View File

@ -283,6 +283,11 @@ void __init setup_arch(char **cmdline_p)
setup_machine_fdt(__fdt_pointer);
/*
* Initialise the static keys early as they may be enabled by the
* cpufeature code and early parameters.
*/
jump_label_init();
parse_early_param();
/*

View File

@ -420,11 +420,6 @@ void __init smp_cpus_done(unsigned int max_cpus)
void __init smp_prepare_boot_cpu(void)
{
set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
/*
* Initialise the static keys early as they may be enabled by the
* cpufeature code.
*/
jump_label_init();
cpuinfo_store_boot_cpu();
/*

View File

@ -362,7 +362,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
static phys_addr_t __pgd_pgtable_alloc(int shift)
{
void *ptr = (void *)__get_free_page(PGALLOC_GFP);
void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL);
BUG_ON(!ptr);
/* Ensure the zeroed page is visible to the page table walker */

View File

@ -19,8 +19,7 @@ struct page_change_data {
bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED);
static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
void *data)
static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
{
struct page_change_data *cdata = data;
pte_t pte = READ_ONCE(*ptep);

View File

@ -19,10 +19,12 @@ static struct kmem_cache *pgd_cache __ro_after_init;
pgd_t *pgd_alloc(struct mm_struct *mm)
{
gfp_t gfp = GFP_PGTABLE_USER;
if (PGD_SIZE == PAGE_SIZE)
return (pgd_t *)__get_free_page(PGALLOC_GFP);
return (pgd_t *)__get_free_page(gfp);
else
return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
return kmem_cache_alloc(pgd_cache, gfp);
}
void pgd_free(struct mm_struct *mm, pgd_t *pgd)

View File

@ -8,6 +8,9 @@
#include <linux/mm.h>
#include <linux/sched.h>
#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte)
{
@ -39,33 +42,6 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
return pte;
}
static inline struct page *pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
pte = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
if (!pte)
return NULL;
if (!pgtable_page_ctor(pte)) {
__free_page(pte);
return NULL;
}
return pte;
}
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_pages((unsigned long)pte, PTE_ORDER);
}
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
pgtable_page_dtor(pte);
__free_pages(pte, PTE_ORDER);
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
free_pages((unsigned long)pgd, PGD_ORDER);

View File

@ -13,55 +13,18 @@
#include <asm/tlb.h>
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
extern const char bad_pmd_string[];
#define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); })
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_page((unsigned long) pte);
}
static inline void pte_free(struct mm_struct *mm, pgtable_t page)
{
pgtable_page_dtor(page);
__free_page(page);
}
#define __pte_free_tlb(tlb,pte,addr) \
do { \
pgtable_page_dtor(pte); \
tlb_remove_page((tlb), pte); \
} while (0)
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
unsigned long page = __get_free_page(GFP_KERNEL);
if (!page)
return NULL;
memset((void *)page, 0, PAGE_SIZE);
return (pte_t *) (page);
}
static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
{
struct page *page = alloc_pages(GFP_KERNEL, 0);
if (page == NULL)
return NULL;
clear_highpage(page);
if (!pgtable_page_ctor(page)) {
__free_page(page);
return NULL;
}
return page;
}
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
{
pmd_val(*pmd) = __pa((unsigned long)pte);

View File

@ -34,6 +34,7 @@ config MIPS
select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
select GUP_GET_PTE_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT
select HANDLE_DOMAIN_IRQ
select HAVE_ARCH_COMPILER_H
select HAVE_ARCH_JUMP_LABEL
@ -52,6 +53,7 @@ config MIPS
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_EXIT_THREAD
select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER

View File

@ -13,6 +13,8 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte)
{
@ -50,37 +52,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_pages((unsigned long)pgd, PGD_ORDER);
}
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
}
static inline struct page *pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
if (!pte)
return NULL;
clear_highpage(pte);
if (!pgtable_page_ctor(pte)) {
__free_page(pte);
return NULL;
}
return pte;
}
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_pages((unsigned long)pte, PTE_ORDER);
}
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
pgtable_page_dtor(pte);
__free_pages(pte, PTE_ORDER);
}
#define __pte_free_tlb(tlb,pte,address) \
do { \
pgtable_page_dtor(pte); \

View File

@ -20,6 +20,7 @@
#include <asm/cmpxchg.h>
#include <asm/io.h>
#include <asm/pgtable-bits.h>
#include <asm/cpu-features.h>
struct mm_struct;
struct vm_area_struct;
@ -626,6 +627,8 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#define gup_fast_permitted(start, end) (!cpu_has_dc_aliases)
#include <asm-generic/pgtable.h>
/*

View File

@ -7,7 +7,6 @@ obj-y += cache.o
obj-y += context.o
obj-y += extable.o
obj-y += fault.o
obj-y += gup.o
obj-y += init.o
obj-y += mmap.o
obj-y += page.o

View File

@ -1,303 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Lockless get_user_pages_fast for MIPS
*
* Copyright (C) 2008 Nick Piggin
* Copyright (C) 2008 Novell Inc.
* Copyright (C) 2011 Ralf Baechle
*/
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/vmstat.h>
#include <linux/highmem.h>
#include <linux/swap.h>
#include <linux/hugetlb.h>
#include <asm/cpu-features.h>
#include <asm/pgtable.h>
static inline pte_t gup_get_pte(pte_t *ptep)
{
#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32)
pte_t pte;
retry:
pte.pte_low = ptep->pte_low;
smp_rmb();
pte.pte_high = ptep->pte_high;
smp_rmb();
if (unlikely(pte.pte_low != ptep->pte_low))
goto retry;
return pte;
#else
return READ_ONCE(*ptep);
#endif
}
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
pte_t *ptep = pte_offset_map(&pmd, addr);
do {
pte_t pte = gup_get_pte(ptep);
struct page *page;
if (!pte_present(pte) ||
pte_special(pte) || (write && !pte_write(pte))) {
pte_unmap(ptep);
return 0;
}
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
get_page(page);
SetPageReferenced(page);
pages[*nr] = page;
(*nr)++;
} while (ptep++, addr += PAGE_SIZE, addr != end);
pte_unmap(ptep - 1);
return 1;
}
static inline void get_head_page_multiple(struct page *page, int nr)
{
VM_BUG_ON(page != compound_head(page));
VM_BUG_ON(page_count(page) == 0);
page_ref_add(page, nr);
SetPageReferenced(page);
}
static int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
pte_t pte = *(pte_t *)&pmd;
struct page *head, *page;
int refs;
if (write && !pte_write(pte))
return 0;
/* hugepages are never "special" */
VM_BUG_ON(pte_special(pte));
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
refs = 0;
head = pte_page(pte);
page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
do {
VM_BUG_ON(compound_head(page) != head);
pages[*nr] = page;
(*nr)++;
page++;
refs++;
} while (addr += PAGE_SIZE, addr != end);
get_head_page_multiple(head, refs);
return 1;
}
static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
unsigned long next;
pmd_t *pmdp;
pmdp = pmd_offset(&pud, addr);
do {
pmd_t pmd = *pmdp;
next = pmd_addr_end(addr, end);
if (pmd_none(pmd))
return 0;
if (unlikely(pmd_huge(pmd))) {
if (!gup_huge_pmd(pmd, addr, next, write, pages,nr))
return 0;
} else {
if (!gup_pte_range(pmd, addr, next, write, pages,nr))
return 0;
}
} while (pmdp++, addr = next, addr != end);
return 1;
}
static int gup_huge_pud(pud_t pud, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
pte_t pte = *(pte_t *)&pud;
struct page *head, *page;
int refs;
if (write && !pte_write(pte))
return 0;
/* hugepages are never "special" */
VM_BUG_ON(pte_special(pte));
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
refs = 0;
head = pte_page(pte);
page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
do {
VM_BUG_ON(compound_head(page) != head);
pages[*nr] = page;
(*nr)++;
page++;
refs++;
} while (addr += PAGE_SIZE, addr != end);
get_head_page_multiple(head, refs);
return 1;
}
static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
unsigned long next;
pud_t *pudp;
pudp = pud_offset(&pgd, addr);
do {
pud_t pud = *pudp;
next = pud_addr_end(addr, end);
if (pud_none(pud))
return 0;
if (unlikely(pud_huge(pud))) {
if (!gup_huge_pud(pud, addr, next, write, pages,nr))
return 0;
} else {
if (!gup_pmd_range(pud, addr, next, write, pages,nr))
return 0;
}
} while (pudp++, addr = next, addr != end);
return 1;
}
/*
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
* back to the regular GUP.
* Note a difference with get_user_pages_fast: this always returns the
* number of pages pinned, 0 if no pages were pinned.
*/
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
struct mm_struct *mm = current->mm;
unsigned long addr, len, end;
unsigned long next;
unsigned long flags;
pgd_t *pgdp;
int nr = 0;
start &= PAGE_MASK;
addr = start;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
if (unlikely(!access_ok((void __user *)start, len)))
return 0;
/*
* XXX: batch / limit 'nr', to avoid large irq off latency
* needs some instrumenting to determine the common sizes used by
* important workloads (eg. DB2), and whether limiting the batch
* size will decrease performance.
*
* It seems like we're in the clear for the moment. Direct-IO is
* the main guy that batches up lots of get_user_pages, and even
* they are limited to 64-at-a-time which is not so many.
*/
/*
* This doesn't prevent pagetable teardown, but does prevent
* the pagetables and pages from being freed.
*
* So long as we atomically load page table pointers versus teardown,
* we can follow the address down to the page and take a ref on it.
*/
local_irq_save(flags);
pgdp = pgd_offset(mm, addr);
do {
pgd_t pgd = *pgdp;
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
break;
if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
break;
} while (pgdp++, addr = next, addr != end);
local_irq_restore(flags);
return nr;
}
/**
* get_user_pages_fast() - pin user pages in memory
* @start: starting user address
* @nr_pages: number of pages from start to pin
* @gup_flags: flags modifying pin behaviour
* @pages: array that receives pointers to the pages pinned.
* Should be at least nr_pages long.
*
* Attempt to pin user pages in memory without taking mm->mmap_sem.
* If not successful, it will fall back to taking the lock and
* calling get_user_pages().
*
* Returns number of pages pinned. This may be fewer than the number
* requested. If nr_pages is 0 or negative, returns 0. If no pages
* were pinned, returns -errno.
*/
int get_user_pages_fast(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages)
{
struct mm_struct *mm = current->mm;
unsigned long addr, len, end;
unsigned long next;
pgd_t *pgdp;
int ret, nr = 0;
start &= PAGE_MASK;
addr = start;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
if (end < start || cpu_has_dc_aliases)
goto slow_irqon;
/* XXX: batch / limit 'nr' */
local_irq_disable();
pgdp = pgd_offset(mm, addr);
do {
pgd_t pgd = *pgdp;
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
goto slow;
if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE,
pages, &nr))
goto slow;
} while (pgdp++, addr = next, addr != end);
local_irq_enable();
VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
return nr;
slow:
local_irq_enable();
slow_irqon:
/* Try to get the remaining pages with get_user_pages */
start += nr << PAGE_SHIFT;
pages += nr;
ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT,
pages, gup_flags);
/* Have to be a bit careful with return values */
if (nr > 0) {
if (ret < 0)
ret = nr;
else
ret += nr;
}
return ret;
}

View File

@ -9,6 +9,9 @@
#include <asm/tlbflush.h>
#include <asm/proc-fns.h>
#define __HAVE_ARCH_PTE_ALLOC_ONE
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
/*
* Since we have only two-level page tables, these are trivial
*/
@ -22,43 +25,17 @@ extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
#define check_pgt_cache() do { } while (0)
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte;
pte =
(pte_t *) __get_free_page(GFP_KERNEL | __GFP_RETRY_MAYFAIL |
__GFP_ZERO);
return pte;
}
static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
{
pgtable_t pte;
pte = alloc_pages(GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO, 0);
pte = __pte_alloc_one(mm, GFP_PGTABLE_USER);
if (pte)
cpu_dcache_wb_page((unsigned long)page_address(pte));
return pte;
}
/*
* Free one PTE table.
*/
static inline void pte_free_kernel(struct mm_struct *mm, pte_t * pte)
{
if (pte) {
free_page((unsigned long)pte);
}
}
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
__free_page(pte);
}
/*
* Populate the pmdp entry with a pointer to the pte. This pmd is part
* of the mm address space.

View File

@ -12,6 +12,8 @@
#include <linux/mm.h>
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte)
{
@ -37,41 +39,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_pages((unsigned long)pgd, PGD_ORDER);
}
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte;
pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER);
return pte;
}
static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
pte = alloc_pages(GFP_KERNEL, PTE_ORDER);
if (pte) {
if (!pgtable_page_ctor(pte)) {
__free_page(pte);
return NULL;
}
clear_highpage(pte);
}
return pte;
}
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_pages((unsigned long)pte, PTE_ORDER);
}
static inline void pte_free(struct mm_struct *mm, struct page *pte)
{
pgtable_page_dtor(pte);
__free_pages(pte, PTE_ORDER);
}
#define __pte_free_tlb(tlb, pte, addr) \
do { \
pgtable_page_dtor(pte); \

View File

@ -10,6 +10,8 @@
#include <asm/cache.h>
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
/* Allocate the top level pgd (page directory)
*
* Here (for 64 bit kernels) we implement a Hybrid L2/L3 scheme: we
@ -122,37 +124,6 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
pmd_populate_kernel(mm, pmd, page_address(pte_page))
#define pmd_pgtable(pmd) pmd_page(pmd)
static inline pgtable_t
pte_alloc_one(struct mm_struct *mm)
{
struct page *page = alloc_page(GFP_KERNEL|__GFP_ZERO);
if (!page)
return NULL;
if (!pgtable_page_ctor(page)) {
__free_page(page);
return NULL;
}
return page;
}
static inline pte_t *
pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
return pte;
}
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_page((unsigned long)pte);
}
static inline void pte_free(struct mm_struct *mm, struct page *pte)
{
pgtable_page_dtor(pte);
pte_free_kernel(mm, page_address(pte));
}
#define check_pgt_cache() do { } while (0)
#endif

View File

@ -125,6 +125,7 @@ config PPC
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV
select ARCH_HAS_HUGEPD if HUGETLB_PAGE
select ARCH_HAS_MMIOWB if PPC64
select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_PMEM_API if PPC64
@ -185,12 +186,12 @@ config PPC
select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL
select HAVE_EBPF_JIT if PPC64
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC
select HAVE_GENERIC_GUP
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
select HAVE_IDE
select HAVE_IOREMAP_PROT

View File

@ -140,6 +140,20 @@ static inline void pte_frag_set(mm_context_t *ctx, void *p)
}
#endif
#ifdef CONFIG_PPC64
#define is_ioremap_addr is_ioremap_addr
static inline bool is_ioremap_addr(const void *x)
{
#ifdef CONFIG_MMU
unsigned long addr = (unsigned long)x;
return addr >= IOREMAP_BASE && addr < IOREMAP_END;
#else
return false;
#endif
}
#endif /* CONFIG_PPC64 */
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_PGTABLE_H */

View File

@ -827,7 +827,7 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
*
* Note: If EOI is incorrectly used by SW to lower the CPPR
* value (ie more favored), we do not check for rejection of
* a pending interrupt, this is a SW error and PAPR sepcifies
* a pending interrupt, this is a SW error and PAPR specifies
* that we don't have to deal with it.
*
* The sending of an EOI to the ICS is handled after the

View File

@ -511,13 +511,6 @@ struct page *follow_huge_pd(struct vm_area_struct *vma,
return page;
}
static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
unsigned long sz)
{
unsigned long __boundary = (addr + sz) & ~(sz-1);
return (__boundary - 1 < end - 1) ? __boundary : end;
}
#ifdef CONFIG_PPC_MM_SLICES
unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff,
@ -665,68 +658,3 @@ void flush_dcache_icache_hugepage(struct page *page)
}
}
}
static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long pte_end;
struct page *head, *page;
pte_t pte;
int refs;
pte_end = (addr + sz) & ~(sz-1);
if (pte_end < end)
end = pte_end;
pte = READ_ONCE(*ptep);
if (!pte_access_permitted(pte, write))
return 0;
/* hugepages are never "special" */
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
refs = 0;
head = pte_page(pte);
page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
do {
VM_BUG_ON(compound_head(page) != head);
pages[*nr] = page;
(*nr)++;
page++;
refs++;
} while (addr += PAGE_SIZE, addr != end);
if (!page_cache_add_speculative(head, refs)) {
*nr -= refs;
return 0;
}
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
/* Could be optimized better */
*nr -= refs;
while (refs--)
put_page(head);
return 0;
}
return 1;
}
int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift,
unsigned long end, int write, struct page **pages, int *nr)
{
pte_t *ptep;
unsigned long sz = 1UL << hugepd_shift(hugepd);
unsigned long next;
ptep = hugepte_offset(hugepd, addr, pdshift);
do {
next = hugepte_addr_end(addr, end, sz);
if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
return 0;
} while (ptep++, addr = next, addr != end);
return 1;
}

View File

@ -10,6 +10,8 @@
#include <linux/mm.h>
#include <asm/tlb.h>
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
static inline void pmd_populate_kernel(struct mm_struct *mm,
pmd_t *pmd, pte_t *pte)
{
@ -74,33 +76,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
#endif /* __PAGETABLE_PMD_FOLDED */
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
return (pte_t *)__get_free_page(
GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
}
static inline struct page *pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
pte = alloc_page(GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
if (likely(pte != NULL))
pgtable_page_ctor(pte);
return pte;
}
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_page((unsigned long)pte);
}
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
pgtable_page_dtor(pte);
__free_page(pte);
}
#define __pte_free_tlb(tlb, pte, buf) \
do { \
pgtable_page_dtor(pte); \

View File

@ -139,6 +139,7 @@ config S390
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_FAST_GUP
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_FENTRY
select HAVE_FTRACE_MCOUNT_RECORD
@ -146,7 +147,6 @@ config S390
select HAVE_FUNCTION_TRACER
select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_GCC_PLUGINS
select HAVE_GENERIC_GUP
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZ4
@ -641,9 +641,6 @@ config ARCH_SPARSEMEM_ENABLE
config ARCH_SPARSEMEM_DEFAULT
def_bool y
config ARCH_SELECT_MEMORY_MODEL
def_bool y
config ARCH_ENABLE_MEMORY_HOTPLUG
def_bool y if SPARSEMEM

View File

@ -1270,14 +1270,8 @@ static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address)
#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
#define pte_unmap(pte) do { } while (0)
static inline bool gup_fast_permitted(unsigned long start, int nr_pages)
static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
{
unsigned long len, end;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
if (end < start)
return false;
return end <= current->mm->context.asce_limit;
}
#define gup_fast_permitted gup_fast_permitted

View File

@ -15,6 +15,7 @@ config SUPERH
select HAVE_ARCH_TRACEHOOK
select HAVE_PERF_EVENTS
select HAVE_DEBUG_BUGVERBOSE
select HAVE_FAST_GUP if MMU
select ARCH_HAVE_CUSTOM_GPIO_H
select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
select ARCH_HAS_GCOV_PROFILE_ALL
@ -64,6 +65,7 @@ config SUPERH
config SUPERH32
def_bool "$(ARCH)" = "sh"
select ARCH_32BIT_OFF_T
select GUP_GET_PTE_LOW_HIGH if X2TLB
select HAVE_KPROBES
select HAVE_KRETPROBES
select HAVE_IOREMAP_PROT if MMU && !X2TLB

View File

@ -40,7 +40,6 @@ CONFIG_FB=y
CONFIG_FIRMWARE_EDID=y
CONFIG_FB_HIT=y
CONFIG_FB_SH_MOBILE_LCDC=y
CONFIG_BACKLIGHT_LCD_SUPPORT=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FONTS=y
CONFIG_FONT_PEARL_8x8=y

View File

@ -191,7 +191,6 @@ CONFIG_CONFIGFS_FS=y
CONFIG_JFFS2_FS=m
CONFIG_JFFS2_FS_XATTR=y
CONFIG_UBIFS_FS=m
CONFIG_LOGFS=m
CONFIG_CRAMFS=m
CONFIG_SQUASHFS=m
CONFIG_ROMFS_FS=m

View File

@ -85,7 +85,6 @@ CONFIG_WATCHDOG=y
CONFIG_SH_WDT=y
CONFIG_SSB=y
CONFIG_FB=y
CONFIG_BACKLIGHT_LCD_SUPPORT=y
# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y

View File

@ -369,7 +369,11 @@ static inline int iounmap_fixed(void __iomem *addr) { return -EINVAL; }
#define ioremap_nocache ioremap
#define ioremap_uc ioremap
#define iounmap __iounmap
static inline void iounmap(void __iomem *addr)
{
__iounmap(addr);
}
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem

View File

@ -38,6 +38,9 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
return pud_val(pud);
}
/* only used by the stubbed out hugetlb gup code, should never be called */
#define pud_page(pud) NULL
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
{

View File

@ -149,6 +149,43 @@ extern void paging_init(void);
extern void page_table_range_init(unsigned long start, unsigned long end,
pgd_t *pgd);
static inline bool __pte_access_permitted(pte_t pte, u64 prot)
{
return (pte_val(pte) & (prot | _PAGE_SPECIAL)) == prot;
}
#ifdef CONFIG_X2TLB
static inline bool pte_access_permitted(pte_t pte, bool write)
{
u64 prot = _PAGE_PRESENT;
prot |= _PAGE_EXT(_PAGE_EXT_KERN_READ | _PAGE_EXT_USER_READ);
if (write)
prot |= _PAGE_EXT(_PAGE_EXT_KERN_WRITE | _PAGE_EXT_USER_WRITE);
return __pte_access_permitted(pte, prot);
}
#elif defined(CONFIG_SUPERH64)
static inline bool pte_access_permitted(pte_t pte, bool write)
{
u64 prot = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ;
if (write)
prot |= _PAGE_WRITE;
return __pte_access_permitted(pte, prot);
}
#else
static inline bool pte_access_permitted(pte_t pte, bool write)
{
u64 prot = _PAGE_PRESENT | _PAGE_USER;
if (write)
prot |= _PAGE_RW;
return __pte_access_permitted(pte, prot);
}
#endif
#define pte_access_permitted pte_access_permitted
/* arch/sh/mm/mmap.c */
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN

View File

@ -17,7 +17,7 @@ cacheops-$(CONFIG_CPU_SHX3) += cache-shx3.o
obj-y += $(cacheops-y)
mmu-y := nommu.o extable_32.o
mmu-$(CONFIG_MMU) := extable_$(BITS).o fault.o gup.o ioremap.o kmap.o \
mmu-$(CONFIG_MMU) := extable_$(BITS).o fault.o ioremap.o kmap.o \
pgtable.o tlbex_$(BITS).o tlbflush_$(BITS).o
obj-y += $(mmu-y)

View File

@ -1,277 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Lockless get_user_pages_fast for SuperH
*
* Copyright (C) 2009 - 2010 Paul Mundt
*
* Cloned from the x86 and PowerPC versions, by:
*
* Copyright (C) 2008 Nick Piggin
* Copyright (C) 2008 Novell Inc.
*/
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/vmstat.h>
#include <linux/highmem.h>
#include <asm/pgtable.h>
static inline pte_t gup_get_pte(pte_t *ptep)
{
#ifndef CONFIG_X2TLB
return READ_ONCE(*ptep);
#else
/*
* With get_user_pages_fast, we walk down the pagetables without
* taking any locks. For this we would like to load the pointers
* atomically, but that is not possible with 64-bit PTEs. What
* we do have is the guarantee that a pte will only either go
* from not present to present, or present to not present or both
* -- it will not switch to a completely different present page
* without a TLB flush in between; something that we are blocking
* by holding interrupts off.
*
* Setting ptes from not present to present goes:
* ptep->pte_high = h;
* smp_wmb();
* ptep->pte_low = l;
*
* And present to not present goes:
* ptep->pte_low = 0;
* smp_wmb();
* ptep->pte_high = 0;
*
* We must ensure here that the load of pte_low sees l iff pte_high
* sees h. We load pte_high *after* loading pte_low, which ensures we
* don't see an older value of pte_high. *Then* we recheck pte_low,
* which ensures that we haven't picked up a changed pte high. We might
* have got rubbish values from pte_low and pte_high, but we are
* guaranteed that pte_low will not have the present bit set *unless*
* it is 'l'. And get_user_pages_fast only operates on present ptes, so
* we're safe.
*
* gup_get_pte should not be used or copied outside gup.c without being
* very careful -- it does not atomically load the pte or anything that
* is likely to be useful for you.
*/
pte_t pte;
retry:
pte.pte_low = ptep->pte_low;
smp_rmb();
pte.pte_high = ptep->pte_high;
smp_rmb();
if (unlikely(pte.pte_low != ptep->pte_low))
goto retry;
return pte;
#endif
}
/*
* The performance critical leaf functions are made noinline otherwise gcc
* inlines everything into a single function which results in too much
* register pressure.
*/
static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
u64 mask, result;
pte_t *ptep;
#ifdef CONFIG_X2TLB
result = _PAGE_PRESENT | _PAGE_EXT(_PAGE_EXT_KERN_READ | _PAGE_EXT_USER_READ);
if (write)
result |= _PAGE_EXT(_PAGE_EXT_KERN_WRITE | _PAGE_EXT_USER_WRITE);
#elif defined(CONFIG_SUPERH64)
result = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ;
if (write)
result |= _PAGE_WRITE;
#else
result = _PAGE_PRESENT | _PAGE_USER;
if (write)
result |= _PAGE_RW;
#endif
mask = result | _PAGE_SPECIAL;
ptep = pte_offset_map(&pmd, addr);
do {
pte_t pte = gup_get_pte(ptep);
struct page *page;
if ((pte_val(pte) & mask) != result) {
pte_unmap(ptep);
return 0;
}
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
get_page(page);
__flush_anon_page(page, addr);
flush_dcache_page(page);
pages[*nr] = page;
(*nr)++;
} while (ptep++, addr += PAGE_SIZE, addr != end);
pte_unmap(ptep - 1);
return 1;
}
static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
unsigned long next;
pmd_t *pmdp;
pmdp = pmd_offset(&pud, addr);
do {
pmd_t pmd = *pmdp;
next = pmd_addr_end(addr, end);
if (pmd_none(pmd))
return 0;
if (!gup_pte_range(pmd, addr, next, write, pages, nr))
return 0;
} while (pmdp++, addr = next, addr != end);
return 1;
}
static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
unsigned long next;
pud_t *pudp;
pudp = pud_offset(&pgd, addr);
do {
pud_t pud = *pudp;
next = pud_addr_end(addr, end);
if (pud_none(pud))
return 0;
if (!gup_pmd_range(pud, addr, next, write, pages, nr))
return 0;
} while (pudp++, addr = next, addr != end);
return 1;
}
/*
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
* back to the regular GUP.
* Note a difference with get_user_pages_fast: this always returns the
* number of pages pinned, 0 if no pages were pinned.
*/
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
struct mm_struct *mm = current->mm;
unsigned long addr, len, end;
unsigned long next;
unsigned long flags;
pgd_t *pgdp;
int nr = 0;
start &= PAGE_MASK;
addr = start;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
if (unlikely(!access_ok((void __user *)start, len)))
return 0;
/*
* This doesn't prevent pagetable teardown, but does prevent
* the pagetables and pages from being freed.
*/
local_irq_save(flags);
pgdp = pgd_offset(mm, addr);
do {
pgd_t pgd = *pgdp;
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
break;
if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
break;
} while (pgdp++, addr = next, addr != end);
local_irq_restore(flags);
return nr;
}
/**
* get_user_pages_fast() - pin user pages in memory
* @start: starting user address
* @nr_pages: number of pages from start to pin
* @gup_flags: flags modifying pin behaviour
* @pages: array that receives pointers to the pages pinned.
* Should be at least nr_pages long.
*
* Attempt to pin user pages in memory without taking mm->mmap_sem.
* If not successful, it will fall back to taking the lock and
* calling get_user_pages().
*
* Returns number of pages pinned. This may be fewer than the number
* requested. If nr_pages is 0 or negative, returns 0. If no pages
* were pinned, returns -errno.
*/
int get_user_pages_fast(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages)
{
struct mm_struct *mm = current->mm;
unsigned long addr, len, end;
unsigned long next;
pgd_t *pgdp;
int nr = 0;
start &= PAGE_MASK;
addr = start;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
if (end < start)
goto slow_irqon;
local_irq_disable();
pgdp = pgd_offset(mm, addr);
do {
pgd_t pgd = *pgdp;
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
goto slow;
if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE,
pages, &nr))
goto slow;
} while (pgdp++, addr = next, addr != end);
local_irq_enable();
VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
return nr;
{
int ret;
slow:
local_irq_enable();
slow_irqon:
/* Try to get the remaining pages with get_user_pages */
start += nr << PAGE_SHIFT;
pages += nr;
ret = get_user_pages_unlocked(start,
(end - start) >> PAGE_SHIFT, pages,
gup_flags);
/* Have to be a bit careful with return values */
if (nr > 0) {
if (ret < 0)
ret = nr;
else
ret += nr;
}
return ret;
}
}

View File

@ -28,6 +28,7 @@ config SPARC
select RTC_DRV_M48T59
select RTC_SYSTOHC
select HAVE_ARCH_JUMP_LABEL if SPARC64
select HAVE_FAST_GUP if SPARC64
select GENERIC_IRQ_SHOW
select ARCH_WANT_IPC_PARSE_VERSION
select GENERIC_PCI_IOMAP
@ -300,9 +301,6 @@ config NODES_SPAN_OTHER_NODES
def_bool y
depends on NEED_MULTIPLE_NODES
config ARCH_SELECT_MEMORY_MODEL
def_bool y if SPARC64
config ARCH_SPARSEMEM_ENABLE
def_bool y if SPARC64
select SPARSEMEM_VMEMMAP_ENABLE

View File

@ -864,6 +864,9 @@ static inline unsigned long pud_page_vaddr(pud_t pud)
#define pgd_present(pgd) (pgd_val(pgd) != 0U)
#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0UL)
/* only used by the stubbed out hugetlb gup code, should never be called */
#define pgd_page(pgd) NULL
static inline unsigned long pud_large(pud_t pud)
{
pte_t pte = __pte(pud_val(pud));
@ -1075,6 +1078,46 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma,
}
#define io_remap_pfn_range io_remap_pfn_range
static inline unsigned long untagged_addr(unsigned long start)
{
if (adi_capable()) {
long addr = start;
/* If userspace has passed a versioned address, kernel
* will not find it in the VMAs since it does not store
* the version tags in the list of VMAs. Storing version
* tags in list of VMAs is impractical since they can be
* changed any time from userspace without dropping into
* kernel. Any address search in VMAs will be done with
* non-versioned addresses. Ensure the ADI version bits
* are dropped here by sign extending the last bit before
* ADI bits. IOMMU does not implement version tags.
*/
return (addr << (long)adi_nbits()) >> (long)adi_nbits();
}
return start;
}
#define untagged_addr untagged_addr
static inline bool pte_access_permitted(pte_t pte, bool write)
{
u64 prot;
if (tlb_type == hypervisor) {
prot = _PAGE_PRESENT_4V | _PAGE_P_4V;
if (write)
prot |= _PAGE_WRITE_4V;
} else {
prot = _PAGE_PRESENT_4U | _PAGE_P_4U;
if (write)
prot |= _PAGE_WRITE_4U;
}
return (pte_val(pte) & (prot | _PAGE_SPECIAL)) == prot;
}
#define pte_access_permitted pte_access_permitted
#include <asm/tlbflush.h>
#include <asm-generic/pgtable.h>

View File

@ -5,7 +5,7 @@
asflags-y := -ansi
ccflags-y := -Werror
obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o gup.o
obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o
obj-y += fault_$(BITS).o
obj-y += init_$(BITS).o
obj-$(CONFIG_SPARC32) += extable.o srmmu.o iommu.o io-unit.o

View File

@ -1,340 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Lockless get_user_pages_fast for sparc, cribbed from powerpc
*
* Copyright (C) 2008 Nick Piggin
* Copyright (C) 2008 Novell Inc.
*/
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/vmstat.h>
#include <linux/pagemap.h>
#include <linux/rwsem.h>
#include <asm/pgtable.h>
#include <asm/adi.h>
/*
* The performance critical leaf functions are made noinline otherwise gcc
* inlines everything into a single function which results in too much
* register pressure.
*/
static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long mask, result;
pte_t *ptep;
if (tlb_type == hypervisor) {
result = _PAGE_PRESENT_4V|_PAGE_P_4V;
if (write)
result |= _PAGE_WRITE_4V;
} else {
result = _PAGE_PRESENT_4U|_PAGE_P_4U;
if (write)
result |= _PAGE_WRITE_4U;
}
mask = result | _PAGE_SPECIAL;
ptep = pte_offset_kernel(&pmd, addr);
do {
struct page *page, *head;
pte_t pte = *ptep;
if ((pte_val(pte) & mask) != result)
return 0;
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
/* The hugepage case is simplified on sparc64 because
* we encode the sub-page pfn offsets into the
* hugepage PTEs. We could optimize this in the future
* use page_cache_add_speculative() for the hugepage case.
*/
page = pte_page(pte);
head = compound_head(page);
if (!page_cache_get_speculative(head))
return 0;
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
put_page(head);
return 0;
}
pages[*nr] = page;
(*nr)++;
} while (ptep++, addr += PAGE_SIZE, addr != end);
return 1;
}
static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages,
int *nr)
{
struct page *head, *page;
int refs;
if (!(pmd_val(pmd) & _PAGE_VALID))
return 0;
if (write && !pmd_write(pmd))
return 0;
refs = 0;
page = pmd_page(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
head = compound_head(page);
do {
VM_BUG_ON(compound_head(page) != head);
pages[*nr] = page;
(*nr)++;
page++;
refs++;
} while (addr += PAGE_SIZE, addr != end);
if (!page_cache_add_speculative(head, refs)) {
*nr -= refs;
return 0;
}
if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
*nr -= refs;
while (refs--)
put_page(head);
return 0;
}
return 1;
}
static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
unsigned long end, int write, struct page **pages,
int *nr)
{
struct page *head, *page;
int refs;
if (!(pud_val(pud) & _PAGE_VALID))
return 0;
if (write && !pud_write(pud))
return 0;
refs = 0;
page = pud_page(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
head = compound_head(page);
do {
VM_BUG_ON(compound_head(page) != head);
pages[*nr] = page;
(*nr)++;
page++;
refs++;
} while (addr += PAGE_SIZE, addr != end);
if (!page_cache_add_speculative(head, refs)) {
*nr -= refs;
return 0;
}
if (unlikely(pud_val(pud) != pud_val(*pudp))) {
*nr -= refs;
while (refs--)
put_page(head);
return 0;
}
return 1;
}
static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
unsigned long next;
pmd_t *pmdp;
pmdp = pmd_offset(&pud, addr);
do {
pmd_t pmd = *pmdp;
next = pmd_addr_end(addr, end);
if (pmd_none(pmd))
return 0;
if (unlikely(pmd_large(pmd))) {
if (!gup_huge_pmd(pmdp, pmd, addr, next,
write, pages, nr))
return 0;
} else if (!gup_pte_range(pmd, addr, next, write,
pages, nr))
return 0;
} while (pmdp++, addr = next, addr != end);
return 1;
}
static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
unsigned long next;
pud_t *pudp;
pudp = pud_offset(&pgd, addr);
do {
pud_t pud = *pudp;
next = pud_addr_end(addr, end);
if (pud_none(pud))
return 0;
if (unlikely(pud_large(pud))) {
if (!gup_huge_pud(pudp, pud, addr, next,
write, pages, nr))
return 0;
} else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
return 0;
} while (pudp++, addr = next, addr != end);
return 1;
}
/*
* Note a difference with get_user_pages_fast: this always returns the
* number of pages pinned, 0 if no pages were pinned.
*/
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
struct mm_struct *mm = current->mm;
unsigned long addr, len, end;
unsigned long next, flags;
pgd_t *pgdp;
int nr = 0;
#ifdef CONFIG_SPARC64
if (adi_capable()) {
long addr = start;
/* If userspace has passed a versioned address, kernel
* will not find it in the VMAs since it does not store
* the version tags in the list of VMAs. Storing version
* tags in list of VMAs is impractical since they can be
* changed any time from userspace without dropping into
* kernel. Any address search in VMAs will be done with
* non-versioned addresses. Ensure the ADI version bits
* are dropped here by sign extending the last bit before
* ADI bits. IOMMU does not implement version tags.
*/
addr = (addr << (long)adi_nbits()) >> (long)adi_nbits();
start = addr;
}
#endif
start &= PAGE_MASK;
addr = start;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
local_irq_save(flags);
pgdp = pgd_offset(mm, addr);
do {
pgd_t pgd = *pgdp;
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
break;
if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
break;
} while (pgdp++, addr = next, addr != end);
local_irq_restore(flags);
return nr;
}
int get_user_pages_fast(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages)
{
struct mm_struct *mm = current->mm;
unsigned long addr, len, end;
unsigned long next;
pgd_t *pgdp;
int nr = 0;
#ifdef CONFIG_SPARC64
if (adi_capable()) {
long addr = start;
/* If userspace has passed a versioned address, kernel
* will not find it in the VMAs since it does not store
* the version tags in the list of VMAs. Storing version
* tags in list of VMAs is impractical since they can be
* changed any time from userspace without dropping into
* kernel. Any address search in VMAs will be done with
* non-versioned addresses. Ensure the ADI version bits
* are dropped here by sign extending the last bit before
* ADI bits. IOMMU does not implements version tags,
*/
addr = (addr << (long)adi_nbits()) >> (long)adi_nbits();
start = addr;
}
#endif
start &= PAGE_MASK;
addr = start;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
/*
* XXX: batch / limit 'nr', to avoid large irq off latency
* needs some instrumenting to determine the common sizes used by
* important workloads (eg. DB2), and whether limiting the batch size
* will decrease performance.
*
* It seems like we're in the clear for the moment. Direct-IO is
* the main guy that batches up lots of get_user_pages, and even
* they are limited to 64-at-a-time which is not so many.
*/
/*
* This doesn't prevent pagetable teardown, but does prevent
* the pagetables from being freed on sparc.
*
* So long as we atomically load page table pointers versus teardown,
* we can follow the address down to the the page and take a ref on it.
*/
local_irq_disable();
pgdp = pgd_offset(mm, addr);
do {
pgd_t pgd = *pgdp;
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
goto slow;
if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE,
pages, &nr))
goto slow;
} while (pgdp++, addr = next, addr != end);
local_irq_enable();
VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
return nr;
{
int ret;
slow:
local_irq_enable();
/* Try to get the remaining pages with get_user_pages */
start += nr << PAGE_SHIFT;
pages += nr;
ret = get_user_pages_unlocked(start,
(end - start) >> PAGE_SHIFT, pages,
gup_flags);
/* Have to be a bit careful with return values */
if (nr > 0) {
if (ret < 0)
ret = nr;
else
ret += nr;
}
return ret;
}
}

View File

@ -10,6 +10,8 @@
#include <linux/mm.h>
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
#define pmd_populate_kernel(mm, pmd, pte) \
set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) __pa(pte)))
@ -25,20 +27,6 @@
extern pgd_t *pgd_alloc(struct mm_struct *);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
extern pgtable_t pte_alloc_one(struct mm_struct *);
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
free_page((unsigned long) pte);
}
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
pgtable_page_dtor(pte);
__free_page(pte);
}
#define __pte_free_tlb(tlb,pte, address) \
do { \
pgtable_page_dtor(pte); \

View File

@ -208,28 +208,6 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_page((unsigned long) pgd);
}
pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte;
pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
return pte;
}
pgtable_t pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
pte = alloc_page(GFP_KERNEL|__GFP_ZERO);
if (!pte)
return NULL;
if (!pgtable_page_ctor(pte)) {
__free_page(pte);
return NULL;
}
return pte;
}
#ifdef CONFIG_3_LEVEL_PGTABLES
pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
{

View File

@ -14,6 +14,10 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
#define __HAVE_ARCH_PTE_ALLOC_ONE
#include <asm-generic/pgalloc.h>
#define check_pgt_cache() do { } while (0)
#define _PAGE_USER_TABLE (PMD_TYPE_TABLE | PMD_PRESENT)
@ -25,17 +29,14 @@ extern void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd);
#define pgd_alloc(mm) get_pgd_slow(mm)
#define pgd_free(mm, pgd) free_pgd_slow(mm, pgd)
#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
/*
* Allocate one PTE table.
*/
static inline pte_t *
pte_alloc_one_kernel(struct mm_struct *mm)
{
pte_t *pte;
pte_t *pte = __pte_alloc_one_kernel(mm);
pte = (pte_t *)__get_free_page(PGALLOC_GFP);
if (pte)
clean_dcache_area(pte, PTRS_PER_PTE * sizeof(pte_t));
@ -47,35 +48,14 @@ pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
pte = alloc_pages(PGALLOC_GFP, 0);
pte = __pte_alloc_one(mm, GFP_PGTABLE_USER);
if (!pte)
return NULL;
if (!PageHighMem(pte)) {
void *page = page_address(pte);
clean_dcache_area(page, PTRS_PER_PTE * sizeof(pte_t));
}
if (!pgtable_page_ctor(pte)) {
__free_page(pte);
}
if (!PageHighMem(pte))
clean_pte_table(page_address(pte));
return pte;
}
/*
* Free one PTE table.
*/
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
if (pte)
free_page((unsigned long)pte);
}
static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
{
pgtable_page_dtor(pte);
__free_page(pte);
}
static inline void __pmd_populate(pmd_t *pmdp, unsigned long pmdval)
{
set_pmd(pmdp, __pmd(pmdval));

View File

@ -32,7 +32,7 @@
*/
#define GPIO_GEDR (PKUNITY_GPIO_BASE + 0x0018)
/*
* Sepcial Voltage Detect Reg GPIO_GPIR.
* Special Voltage Detect Reg GPIO_GPIR.
*/
#define GPIO_GPIR (PKUNITY_GPIO_BASE + 0x0020)

View File

@ -123,6 +123,7 @@ config X86
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
select GENERIC_GETTIMEOFDAY
select GUP_GET_PTE_LOW_HIGH if X86_PAE
select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
select HAVE_ACPI_APEI if ACPI
select HAVE_ACPI_APEI_NMI if ACPI
@ -158,6 +159,7 @@ config X86
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_EISA
select HAVE_EXIT_THREAD
select HAVE_FAST_GUP
select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_TRACER
@ -2906,9 +2908,6 @@ config HAVE_ATOMIC_IOMAP
config X86_DEV_DMA_OPS
bool
config HAVE_GENERIC_GUP
def_bool y
source "drivers/firmware/Kconfig"
source "arch/x86/kvm/Kconfig"

View File

@ -358,7 +358,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
put_user_ex(ptr_to_compat(&frame->uc), &frame->puc);
/* Create the ucontext. */
if (boot_cpu_has(X86_FEATURE_XSAVE))
if (static_cpu_has(X86_FEATURE_XSAVE))
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);

View File

@ -49,23 +49,8 @@
#define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3))
#define CONST_MASK(nr) (1 << ((nr) & 7))
/**
* set_bit - Atomically set a bit in memory
* @nr: the bit to set
* @addr: the address to start counting from
*
* This function is atomic and may not be reordered. See __set_bit()
* if you do not require the atomic guarantees.
*
* Note: there are no guarantees that this function will not be reordered
* on non x86 architectures, so if you are writing portable code,
* make sure not to rely on its reordering guarantees.
*
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
static __always_inline void
set_bit(long nr, volatile unsigned long *addr)
arch_set_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "orb %1,%0"
@ -78,32 +63,14 @@ set_bit(long nr, volatile unsigned long *addr)
}
}
/**
* __set_bit - Set a bit in memory
* @nr: the bit to set
* @addr: the address to start counting from
*
* Unlike set_bit(), this function is non-atomic and may be reordered.
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
static __always_inline void
arch___set_bit(long nr, volatile unsigned long *addr)
{
asm volatile(__ASM_SIZE(bts) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
/**
* clear_bit - Clears a bit in memory
* @nr: Bit to clear
* @addr: Address to start counting from
*
* clear_bit() is atomic and may not be reordered. However, it does
* not contain a memory barrier, so if it is used for locking purposes,
* you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()
* in order to ensure changes are visible on other processors.
*/
static __always_inline void
clear_bit(long nr, volatile unsigned long *addr)
arch_clear_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "andb %1,%0"
@ -115,26 +82,21 @@ clear_bit(long nr, volatile unsigned long *addr)
}
}
/*
* clear_bit_unlock - Clears a bit in memory
* @nr: Bit to clear
* @addr: Address to start counting from
*
* clear_bit() is atomic and implies release semantics before the memory
* operation. It can be used for an unlock.
*/
static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
static __always_inline void
arch_clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
clear_bit(nr, addr);
arch_clear_bit(nr, addr);
}
static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
static __always_inline void
arch___clear_bit(long nr, volatile unsigned long *addr)
{
asm volatile(__ASM_SIZE(btr) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
static __always_inline bool
arch_clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
{
bool negative;
asm volatile(LOCK_PREFIX "andb %2,%1"
@ -143,48 +105,23 @@ static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile
: "ir" ((char) ~(1 << nr)) : "memory");
return negative;
}
#define arch_clear_bit_unlock_is_negative_byte \
arch_clear_bit_unlock_is_negative_byte
// Let everybody know we have it
#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
/*
* __clear_bit_unlock - Clears a bit in memory
* @nr: Bit to clear
* @addr: Address to start counting from
*
* __clear_bit() is non-atomic and implies release semantics before the memory
* operation. It can be used for an unlock if no other CPUs can concurrently
* modify other bits in the word.
*/
static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
static __always_inline void
arch___clear_bit_unlock(long nr, volatile unsigned long *addr)
{
__clear_bit(nr, addr);
arch___clear_bit(nr, addr);
}
/**
* __change_bit - Toggle a bit in memory
* @nr: the bit to change
* @addr: the address to start counting from
*
* Unlike change_bit(), this function is non-atomic and may be reordered.
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
static __always_inline void
arch___change_bit(long nr, volatile unsigned long *addr)
{
asm volatile(__ASM_SIZE(btc) " %1,%0" : : ADDR, "Ir" (nr) : "memory");
}
/**
* change_bit - Toggle a bit in memory
* @nr: Bit to change
* @addr: Address to start counting from
*
* change_bit() is atomic and may not be reordered.
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
static __always_inline void change_bit(long nr, volatile unsigned long *addr)
static __always_inline void
arch_change_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "xorb %1,%0"
@ -196,42 +133,20 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
}
}
/**
* test_and_set_bit - Set a bit and return its old value
* @nr: Bit to set
* @addr: Address to count from
*
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
static __always_inline bool
arch_test_and_set_bit(long nr, volatile unsigned long *addr)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr);
}
/**
* test_and_set_bit_lock - Set a bit and return its old value for lock
* @nr: Bit to set
* @addr: Address to count from
*
* This is the same as test_and_set_bit on x86.
*/
static __always_inline bool
test_and_set_bit_lock(long nr, volatile unsigned long *addr)
arch_test_and_set_bit_lock(long nr, volatile unsigned long *addr)
{
return test_and_set_bit(nr, addr);
return arch_test_and_set_bit(nr, addr);
}
/**
* __test_and_set_bit - Set a bit and return its old value
* @nr: Bit to set
* @addr: Address to count from
*
* This operation is non-atomic and can be reordered.
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
*/
static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr)
static __always_inline bool
arch___test_and_set_bit(long nr, volatile unsigned long *addr)
{
bool oldbit;
@ -242,28 +157,13 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
return oldbit;
}
/**
* test_and_clear_bit - Clear a bit and return its old value
* @nr: Bit to clear
* @addr: Address to count from
*
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
static __always_inline bool
arch_test_and_clear_bit(long nr, volatile unsigned long *addr)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr);
}
/**
* __test_and_clear_bit - Clear a bit and return its old value
* @nr: Bit to clear
* @addr: Address to count from
*
* This operation is non-atomic and can be reordered.
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
*
/*
* Note: the operation is performed atomically with respect to
* the local CPU, but not other CPUs. Portable code should not
* rely on this behaviour.
@ -271,7 +171,8 @@ static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *
* accessed from a hypervisor on the same CPU if running in a VM: don't change
* this without also updating arch/x86/kernel/kvm.c
*/
static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr)
static __always_inline bool
arch___test_and_clear_bit(long nr, volatile unsigned long *addr)
{
bool oldbit;
@ -282,8 +183,8 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
return oldbit;
}
/* WARNING: non atomic and it can be reordered! */
static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr)
static __always_inline bool
arch___test_and_change_bit(long nr, volatile unsigned long *addr)
{
bool oldbit;
@ -295,15 +196,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
return oldbit;
}
/**
* test_and_change_bit - Change a bit and return its old value
* @nr: Bit to change
* @addr: Address to count from
*
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
static __always_inline bool
arch_test_and_change_bit(long nr, volatile unsigned long *addr)
{
return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr);
}
@ -326,16 +220,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
return oldbit;
}
#if 0 /* Fool kernel-doc since it doesn't do macros yet */
/**
* test_bit - Determine whether a bit is set
* @nr: bit number to test
* @addr: Address to start counting from
*/
static bool test_bit(int nr, const volatile unsigned long *addr);
#endif
#define test_bit(nr, addr) \
#define arch_test_bit(nr, addr) \
(__builtin_constant_p((nr)) \
? constant_test_bit((nr), (addr)) \
: variable_test_bit((nr), (addr)))
@ -504,6 +389,8 @@ static __always_inline int fls64(__u64 x)
#include <asm-generic/bitops/const_hweight.h>
#include <asm-generic/bitops-instrumented.h>
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic-setbit.h>

View File

@ -6,6 +6,9 @@
#include <linux/mm.h> /* for struct page */
#include <linux/pagemap.h>
#define __HAVE_ARCH_PTE_ALLOC_ONE
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
static inline int __paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
#ifdef CONFIG_PARAVIRT_XXL
@ -47,24 +50,8 @@ extern gfp_t __userpte_alloc_gfp;
extern pgd_t *pgd_alloc(struct mm_struct *);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
extern pte_t *pte_alloc_one_kernel(struct mm_struct *);
extern pgtable_t pte_alloc_one(struct mm_struct *);
/* Should really implement gc for free page table pages. This could be
done with a reference count in struct page. */
static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
{
BUG_ON((unsigned long)pte & (PAGE_SIZE-1));
free_page((unsigned long)pte);
}
static inline void pte_free(struct mm_struct *mm, struct page *pte)
{
pgtable_page_dtor(pte);
__free_page(pte);
}
extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,

View File

@ -285,53 +285,6 @@ static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
#define __pte_to_swp_entry(pte) (__swp_entry(__pteval_swp_type(pte), \
__pteval_swp_offset(pte)))
#define gup_get_pte gup_get_pte
/*
* WARNING: only to be used in the get_user_pages_fast() implementation.
*
* With get_user_pages_fast(), we walk down the pagetables without taking
* any locks. For this we would like to load the pointers atomically,
* but that is not possible (without expensive cmpxchg8b) on PAE. What
* we do have is the guarantee that a PTE will only either go from not
* present to present, or present to not present or both -- it will not
* switch to a completely different present page without a TLB flush in
* between; something that we are blocking by holding interrupts off.
*
* Setting ptes from not present to present goes:
*
* ptep->pte_high = h;
* smp_wmb();
* ptep->pte_low = l;
*
* And present to not present goes:
*
* ptep->pte_low = 0;
* smp_wmb();
* ptep->pte_high = 0;
*
* We must ensure here that the load of pte_low sees 'l' iff pte_high
* sees 'h'. We load pte_high *after* loading pte_low, which ensures we
* don't see an older value of pte_high. *Then* we recheck pte_low,
* which ensures that we haven't picked up a changed pte high. We might
* have gotten rubbish values from pte_low and pte_high, but we are
* guaranteed that pte_low will not have the present bit set *unless*
* it is 'l'. Because get_user_pages_fast() only operates on present ptes
* we're safe.
*/
static inline pte_t gup_get_pte(pte_t *ptep)
{
pte_t pte;
do {
pte.pte_low = ptep->pte_low;
smp_rmb();
pte.pte_high = ptep->pte_high;
smp_rmb();
} while (unlikely(pte.pte_low != ptep->pte_low));
return pte;
}
#include <asm/pgtable-invert.h>
#endif /* _ASM_X86_PGTABLE_3LEVEL_H */

View File

@ -259,14 +259,8 @@ extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
#define gup_fast_permitted gup_fast_permitted
static inline bool gup_fast_permitted(unsigned long start, int nr_pages)
static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
{
unsigned long len, end;
len = (unsigned long)nr_pages << PAGE_SHIFT;
end = start + len;
if (end < start)
return false;
if (end >> __VIRTUAL_MASK_SHIFT)
return false;
return true;

View File

@ -391,7 +391,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
put_user_ex(&frame->uc, &frame->puc);
/* Create the ucontext. */
if (boot_cpu_has(X86_FEATURE_XSAVE))
if (static_cpu_has(X86_FEATURE_XSAVE))
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);

View File

@ -650,7 +650,7 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
/*
* The idea using the light way get the spte on x86_32 guest is from
* gup_get_pte(arch/x86/mm/gup.c).
* gup_get_pte (mm/gup.c).
*
* An spte tlb flush may be pending, because kvm_set_pte_rmapp
* coalesces them and we are running out of the MMU lock. Therefore

View File

@ -13,33 +13,17 @@ phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1;
EXPORT_SYMBOL(physical_mask);
#endif
#define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO)
#ifdef CONFIG_HIGHPTE
#define PGALLOC_USER_GFP __GFP_HIGHMEM
#define PGTABLE_HIGHMEM __GFP_HIGHMEM
#else
#define PGALLOC_USER_GFP 0
#define PGTABLE_HIGHMEM 0
#endif
gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
{
return (pte_t *)__get_free_page(PGALLOC_GFP & ~__GFP_ACCOUNT);
}
gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM;
pgtable_t pte_alloc_one(struct mm_struct *mm)
{
struct page *pte;
pte = alloc_pages(__userpte_alloc_gfp, 0);
if (!pte)
return NULL;
if (!pgtable_page_ctor(pte)) {
__free_page(pte);
return NULL;
}
return pte;
return __pte_alloc_one(mm, __userpte_alloc_gfp);
}
static int __init setup_userpte(char *arg)
@ -235,7 +219,7 @@ static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[], int count)
{
int i;
bool failed = false;
gfp_t gfp = PGALLOC_GFP;
gfp_t gfp = GFP_PGTABLE_USER;
if (mm == &init_mm)
gfp &= ~__GFP_ACCOUNT;
@ -399,14 +383,14 @@ static inline pgd_t *_pgd_alloc(void)
* We allocate one page for pgd.
*/
if (!SHARED_KERNEL_PMD)
return (pgd_t *)__get_free_pages(PGALLOC_GFP,
return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
PGD_ALLOCATION_ORDER);
/*
* Now PAE kernel is not running as a Xen domain. We can allocate
* a 32-byte slab for pgd to save memory space.
*/
return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
return kmem_cache_alloc(pgd_cache, GFP_PGTABLE_USER);
}
static inline void _pgd_free(pgd_t *pgd)
@ -424,7 +408,8 @@ void __init pgd_cache_init(void)
static inline pgd_t *_pgd_alloc(void)
{
return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
PGD_ALLOCATION_ORDER);
}
static inline void _pgd_free(pgd_t *pgd)

View File

@ -2700,8 +2700,7 @@ struct remap_data {
struct mmu_update *mmu_update;
};
static int remap_area_pfn_pte_fn(pte_t *ptep, pgtable_t token,
unsigned long addr, void *data)
static int remap_area_pfn_pte_fn(pte_t *ptep, unsigned long addr, void *data)
{
struct remap_data *rmd = data;
pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot));

View File

@ -175,7 +175,7 @@ static const unsigned int r8a77470_crit_mod_clks[] __initconst = {
*---------------------------------------------------
* 0 0 20 x80 x78 x50
* 0 1 26 x60 x60 x56
* 1 0 Prohibitted setting
* 1 0 Prohibited setting
* 1 1 30 x52 x52 x50
*
* *1 : Table 7.4 indicates VCO output (PLL0 = VCO)

View File

@ -35,8 +35,7 @@ struct remap_pfn {
pgprot_t prot;
};
static int remap_pfn(pte_t *pte, pgtable_t token,
unsigned long addr, void *data)
static int remap_pfn(pte_t *pte, unsigned long addr, void *data)
{
struct remap_pfn *r = data;

View File

@ -127,7 +127,7 @@ __malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size,
res = (void *)pbundle->internal_buffer + pbundle->internal_used;
pbundle->internal_used =
ALIGN(new_used, sizeof(*pbundle->internal_buffer));
if (flags & __GFP_ZERO)
if (want_init_on_alloc(flags))
memset(res, 0, size);
return res;
}

View File

@ -121,6 +121,9 @@ static const struct crashtype crashtypes[] = {
CRASHTYPE(READ_AFTER_FREE),
CRASHTYPE(WRITE_BUDDY_AFTER_FREE),
CRASHTYPE(READ_BUDDY_AFTER_FREE),
CRASHTYPE(SLAB_FREE_DOUBLE),
CRASHTYPE(SLAB_FREE_CROSS),
CRASHTYPE(SLAB_FREE_PAGE),
CRASHTYPE(SOFTLOCKUP),
CRASHTYPE(HARDLOCKUP),
CRASHTYPE(SPINLOCKUP),
@ -427,6 +430,7 @@ static int __init lkdtm_module_init(void)
lkdtm_bugs_init(&recur_count);
lkdtm_perms_init();
lkdtm_usercopy_init();
lkdtm_heap_init();
/* Register debugfs interface */
lkdtm_debugfs_root = debugfs_create_dir("provoke-crash", NULL);
@ -473,6 +477,7 @@ static void __exit lkdtm_module_exit(void)
debugfs_remove_recursive(lkdtm_debugfs_root);
/* Handle test-specific clean-up. */
lkdtm_heap_exit();
lkdtm_usercopy_exit();
if (lkdtm_kprobe != NULL)

View File

@ -7,6 +7,10 @@
#include <linux/slab.h>
#include <linux/sched.h>
static struct kmem_cache *double_free_cache;
static struct kmem_cache *a_cache;
static struct kmem_cache *b_cache;
/*
* This tries to stay within the next largest power-of-2 kmalloc cache
* to avoid actually overwriting anything important if it's not detected
@ -146,3 +150,71 @@ void lkdtm_READ_BUDDY_AFTER_FREE(void)
kfree(val);
}
void lkdtm_SLAB_FREE_DOUBLE(void)
{
int *val;
val = kmem_cache_alloc(double_free_cache, GFP_KERNEL);
if (!val) {
pr_info("Unable to allocate double_free_cache memory.\n");
return;
}
/* Just make sure we got real memory. */
*val = 0x12345678;
pr_info("Attempting double slab free ...\n");
kmem_cache_free(double_free_cache, val);
kmem_cache_free(double_free_cache, val);
}
void lkdtm_SLAB_FREE_CROSS(void)
{
int *val;
val = kmem_cache_alloc(a_cache, GFP_KERNEL);
if (!val) {
pr_info("Unable to allocate a_cache memory.\n");
return;
}
/* Just make sure we got real memory. */
*val = 0x12345679;
pr_info("Attempting cross-cache slab free ...\n");
kmem_cache_free(b_cache, val);
}
void lkdtm_SLAB_FREE_PAGE(void)
{
unsigned long p = __get_free_page(GFP_KERNEL);
pr_info("Attempting non-Slab slab free ...\n");
kmem_cache_free(NULL, (void *)p);
free_page(p);
}
/*
* We have constructors to keep the caches distinctly separated without
* needing to boot with "slab_nomerge".
*/
static void ctor_double_free(void *region)
{ }
static void ctor_a(void *region)
{ }
static void ctor_b(void *region)
{ }
void __init lkdtm_heap_init(void)
{
double_free_cache = kmem_cache_create("lkdtm-heap-double_free",
64, 0, 0, ctor_double_free);
a_cache = kmem_cache_create("lkdtm-heap-a", 64, 0, 0, ctor_a);
b_cache = kmem_cache_create("lkdtm-heap-b", 64, 0, 0, ctor_b);
}
void __exit lkdtm_heap_exit(void)
{
kmem_cache_destroy(double_free_cache);
kmem_cache_destroy(a_cache);
kmem_cache_destroy(b_cache);
}

View File

@ -29,11 +29,16 @@ void lkdtm_STACK_GUARD_PAGE_TRAILING(void);
void lkdtm_UNSET_SMEP(void);
/* lkdtm_heap.c */
void __init lkdtm_heap_init(void);
void __exit lkdtm_heap_exit(void);
void lkdtm_OVERWRITE_ALLOCATION(void);
void lkdtm_WRITE_AFTER_FREE(void);
void lkdtm_READ_AFTER_FREE(void);
void lkdtm_WRITE_BUDDY_AFTER_FREE(void);
void lkdtm_READ_BUDDY_AFTER_FREE(void);
void lkdtm_SLAB_FREE_DOUBLE(void);
void lkdtm_SLAB_FREE_CROSS(void);
void lkdtm_SLAB_FREE_PAGE(void);
/* lkdtm_perms.c */
void __init lkdtm_perms_init(void);

View File

@ -598,7 +598,7 @@ enum ht_channel_width {
HT_CHANNEL_WIDTH_MAX,
};
/* Ref: 802.11i sepc D10.0 7.3.2.25.1
/* Ref: 802.11i spec D10.0 7.3.2.25.1
* Cipher Suites Encryption Algorithms
*/
enum rt_enc_alg {

View File

@ -2965,7 +2965,7 @@ lpfc_stop_hba_timers(struct lpfc_hba *phba)
del_timer_sync(&phba->fcp_poll_timer);
break;
case LPFC_PCI_DEV_OC:
/* Stop any OneConnect device sepcific driver timers */
/* Stop any OneConnect device specific driver timers */
lpfc_sli4_stop_fcf_redisc_wait_timer(phba);
break;
default:

View File

@ -45,7 +45,7 @@ static u32 phy_CalculateBitShift(u32 BitMask)
/**
* Function: PHY_QueryBBReg
*
* OverView: Read "sepcific bits" from BB register
* OverView: Read "specific bits" from BB register
*
* Input:
* struct adapter * Adapter,

View File

@ -264,8 +264,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
/* ------------------------------------------------------------------ */
static int find_grant_ptes(pte_t *pte, pgtable_t token,
unsigned long addr, void *data)
static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
{
struct gntdev_grant_map *map = data;
unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
@ -292,8 +291,7 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token,
}
#ifdef CONFIG_X86
static int set_grant_ptes_as_special(pte_t *pte, pgtable_t token,
unsigned long addr, void *data)
static int set_grant_ptes_as_special(pte_t *pte, unsigned long addr, void *data)
{
set_pte_at(current->mm, addr, pte, pte_mkspecial(*pte));
return 0;

View File

@ -731,8 +731,7 @@ struct remap_pfn {
unsigned long i;
};
static int remap_pfn_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
void *data)
static int remap_pfn_fn(pte_t *ptep, unsigned long addr, void *data)
{
struct remap_pfn *r = data;
struct page *page = r->pages[r->i];
@ -966,8 +965,7 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
* on a per pfn/pte basis. Mapping calls that fail with ENOENT
* can be then retried until success.
*/
static int is_mapped_fn(pte_t *pte, struct page *pmd_page,
unsigned long addr, void *data)
static int is_mapped_fn(pte_t *pte, unsigned long addr, void *data)
{
return pte_none(*pte) ? 0 : -EBUSY;
}

View File

@ -93,8 +93,7 @@ static void setup_hparams(unsigned long gfn, void *data)
info->fgfn++;
}
static int remap_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr,
void *data)
static int remap_pte_fn(pte_t *ptep, unsigned long addr, void *data)
{
struct remap_data *info = data;
struct page *page = info->pages[info->index++];

View File

@ -35,8 +35,9 @@
* @page: structure to page
*
*/
static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
static int v9fs_fid_readpage(void *data, struct page *page)
{
struct p9_fid *fid = data;
struct inode *inode = page->mapping->host;
struct bio_vec bvec = {.bv_page = page, .bv_len = PAGE_SIZE};
struct iov_iter to;
@ -107,7 +108,8 @@ static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping,
if (ret == 0)
return ret;
ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp);
ret = read_cache_pages(mapping, pages, v9fs_fid_readpage,
filp->private_data);
p9_debug(P9_DEBUG_VFS, " = %d\n", ret);
return ret;
}

View File

@ -109,9 +109,9 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
return ret;
}
int jffs2_do_readpage_unlock(struct inode *inode, struct page *pg)
int jffs2_do_readpage_unlock(void *data, struct page *pg)
{
int ret = jffs2_do_readpage_nolock(inode, pg);
int ret = jffs2_do_readpage_nolock(data, pg);
unlock_page(pg);
return ret;
}

View File

@ -687,7 +687,7 @@ unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c,
struct page *pg;
pg = read_cache_page(inode->i_mapping, offset >> PAGE_SHIFT,
(void *)jffs2_do_readpage_unlock, inode);
jffs2_do_readpage_unlock, inode);
if (IS_ERR(pg))
return (void *)pg;

View File

@ -155,7 +155,7 @@ extern const struct file_operations jffs2_file_operations;
extern const struct inode_operations jffs2_file_inode_operations;
extern const struct address_space_operations jffs2_file_address_operations;
int jffs2_fsync(struct file *, loff_t, loff_t, int);
int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
int jffs2_do_readpage_unlock(void *data, struct page *pg);
/* ioctl.c */
long jffs2_ioctl(struct file *, unsigned int, unsigned long);

View File

@ -288,10 +288,13 @@ struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
/*
* For queues with unlimited length lost events are not expected and
* can possibly have security implications. Avoid losing events when
* memory is short.
* memory is short. For the limited size queues, avoid OOM killer in the
* target monitoring memcg as it may have security repercussion.
*/
if (group->max_events == UINT_MAX)
gfp |= __GFP_NOFAIL;
else
gfp |= __GFP_RETRY_MAYFAIL;
/* Whoever is interested in the event, pays for the allocation. */
memalloc_use_memcg(group->memcg);

View File

@ -90,9 +90,13 @@ int inotify_handle_event(struct fsnotify_group *group,
i_mark = container_of(inode_mark, struct inotify_inode_mark,
fsn_mark);
/* Whoever is interested in the event, pays for the allocation. */
/*
* Whoever is interested in the event, pays for the allocation. Do not
* trigger OOM killer in the target monitoring memcg as it may have
* security repercussion.
*/
memalloc_use_memcg(group->memcg);
event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT);
event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
memalloc_unuse_memcg();
if (unlikely(!event)) {

View File

@ -6191,17 +6191,17 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
if (le16_to_cpu(tl->tl_used)) {
trace_ocfs2_truncate_log_recovery_num(le16_to_cpu(tl->tl_used));
*tl_copy = kmalloc(tl_bh->b_size, GFP_KERNEL);
/*
* Assuming the write-out below goes well, this copy will be
* passed back to recovery for processing.
*/
*tl_copy = kmemdup(tl_bh->b_data, tl_bh->b_size, GFP_KERNEL);
if (!(*tl_copy)) {
status = -ENOMEM;
mlog_errno(status);
goto bail;
}
/* Assuming the write-out below goes well, this copy
* will be passed back to recovery for processing. */
memcpy(*tl_copy, tl_bh->b_data, tl_bh->b_size);
/* All we need to do to clear the truncate log is set
* tl_used. */
tl->tl_used = 0;

View File

@ -242,57 +242,29 @@ static struct dentry *blockcheck_debugfs_create(const char *name,
static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
{
if (stats) {
debugfs_remove(stats->b_debug_check);
stats->b_debug_check = NULL;
debugfs_remove(stats->b_debug_failure);
stats->b_debug_failure = NULL;
debugfs_remove(stats->b_debug_recover);
stats->b_debug_recover = NULL;
debugfs_remove(stats->b_debug_dir);
debugfs_remove_recursive(stats->b_debug_dir);
stats->b_debug_dir = NULL;
}
}
static int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
static void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
struct dentry *parent)
{
int rc = -EINVAL;
if (!stats)
goto out;
stats->b_debug_dir = debugfs_create_dir("blockcheck", parent);
if (!stats->b_debug_dir)
goto out;
stats->b_debug_check =
blockcheck_debugfs_create("blocks_checked",
stats->b_debug_dir,
blockcheck_debugfs_create("blocks_checked", stats->b_debug_dir,
&stats->b_check_count);
stats->b_debug_failure =
blockcheck_debugfs_create("checksums_failed",
stats->b_debug_dir,
blockcheck_debugfs_create("checksums_failed", stats->b_debug_dir,
&stats->b_failure_count);
stats->b_debug_recover =
blockcheck_debugfs_create("ecc_recoveries",
stats->b_debug_dir,
blockcheck_debugfs_create("ecc_recoveries", stats->b_debug_dir,
&stats->b_recover_count);
if (stats->b_debug_check && stats->b_debug_failure &&
stats->b_debug_recover)
rc = 0;
out:
if (rc)
ocfs2_blockcheck_debug_remove(stats);
return rc;
}
#else
static inline int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
static inline void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
struct dentry *parent)
{
return 0;
}
static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
@ -301,10 +273,10 @@ static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *
#endif /* CONFIG_DEBUG_FS */
/* Always-called wrappers for starting and stopping the debugfs files */
int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
void ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
struct dentry *parent)
{
return ocfs2_blockcheck_debug_install(stats, parent);
ocfs2_blockcheck_debug_install(stats, parent);
}
void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats)

View File

@ -25,9 +25,6 @@ struct ocfs2_blockcheck_stats {
* ocfs2_blockcheck_stats_debugfs_install()
*/
struct dentry *b_debug_dir; /* Parent of the debugfs files */
struct dentry *b_debug_check; /* Exposes b_check_count */
struct dentry *b_debug_failure; /* Exposes b_failure_count */
struct dentry *b_debug_recover; /* Exposes b_recover_count */
};
@ -56,7 +53,7 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
struct ocfs2_blockcheck_stats *stats);
/* Debug Initialization */
int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
void ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
struct dentry *parent);
void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats);

View File

@ -92,10 +92,6 @@ static struct o2hb_debug_buf *o2hb_db_failedregions;
#define O2HB_DEBUG_REGION_PINNED "pinned"
static struct dentry *o2hb_debug_dir;
static struct dentry *o2hb_debug_livenodes;
static struct dentry *o2hb_debug_liveregions;
static struct dentry *o2hb_debug_quorumregions;
static struct dentry *o2hb_debug_failedregions;
static LIST_HEAD(o2hb_all_regions);
@ -1184,7 +1180,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
if (atomic_read(&reg->hr_steady_iterations) != 0) {
if (atomic_dec_and_test(&reg->hr_unsteady_iterations)) {
printk(KERN_NOTICE "o2hb: Unable to stabilize "
"heartbeart on region %s (%s)\n",
"heartbeat on region %s (%s)\n",
config_item_name(&reg->hr_item),
reg->hr_dev_name);
atomic_set(&reg->hr_steady_iterations, 0);
@ -1391,11 +1387,7 @@ static const struct file_operations o2hb_debug_fops = {
void o2hb_exit(void)
{
debugfs_remove(o2hb_debug_failedregions);
debugfs_remove(o2hb_debug_quorumregions);
debugfs_remove(o2hb_debug_liveregions);
debugfs_remove(o2hb_debug_livenodes);
debugfs_remove(o2hb_debug_dir);
debugfs_remove_recursive(o2hb_debug_dir);
kfree(o2hb_db_livenodes);
kfree(o2hb_db_liveregions);
kfree(o2hb_db_quorumregions);
@ -1419,79 +1411,37 @@ static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir,
&o2hb_debug_fops);
}
static int o2hb_debug_init(void)
static void o2hb_debug_init(void)
{
int ret = -ENOMEM;
o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL);
if (!o2hb_debug_dir) {
mlog_errno(ret);
goto bail;
}
o2hb_debug_livenodes = o2hb_debug_create(O2HB_DEBUG_LIVENODES,
o2hb_debug_dir,
&o2hb_db_livenodes,
sizeof(*o2hb_db_livenodes),
O2HB_DB_TYPE_LIVENODES,
sizeof(o2hb_live_node_bitmap),
O2NM_MAX_NODES,
o2hb_live_node_bitmap);
if (!o2hb_debug_livenodes) {
mlog_errno(ret);
goto bail;
}
o2hb_debug_create(O2HB_DEBUG_LIVENODES, o2hb_debug_dir,
&o2hb_db_livenodes, sizeof(*o2hb_db_livenodes),
O2HB_DB_TYPE_LIVENODES, sizeof(o2hb_live_node_bitmap),
O2NM_MAX_NODES, o2hb_live_node_bitmap);
o2hb_debug_liveregions = o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS,
o2hb_debug_dir,
&o2hb_db_liveregions,
sizeof(*o2hb_db_liveregions),
o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS, o2hb_debug_dir,
&o2hb_db_liveregions, sizeof(*o2hb_db_liveregions),
O2HB_DB_TYPE_LIVEREGIONS,
sizeof(o2hb_live_region_bitmap),
O2NM_MAX_REGIONS,
sizeof(o2hb_live_region_bitmap), O2NM_MAX_REGIONS,
o2hb_live_region_bitmap);
if (!o2hb_debug_liveregions) {
mlog_errno(ret);
goto bail;
}
o2hb_debug_quorumregions =
o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS,
o2hb_debug_dir,
o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS, o2hb_debug_dir,
&o2hb_db_quorumregions,
sizeof(*o2hb_db_quorumregions),
O2HB_DB_TYPE_QUORUMREGIONS,
sizeof(o2hb_quorum_region_bitmap),
O2NM_MAX_REGIONS,
sizeof(o2hb_quorum_region_bitmap), O2NM_MAX_REGIONS,
o2hb_quorum_region_bitmap);
if (!o2hb_debug_quorumregions) {
mlog_errno(ret);
goto bail;
}
o2hb_debug_failedregions =
o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS,
o2hb_debug_dir,
o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS, o2hb_debug_dir,
&o2hb_db_failedregions,
sizeof(*o2hb_db_failedregions),
O2HB_DB_TYPE_FAILEDREGIONS,
sizeof(o2hb_failed_region_bitmap),
O2NM_MAX_REGIONS,
sizeof(o2hb_failed_region_bitmap), O2NM_MAX_REGIONS,
o2hb_failed_region_bitmap);
if (!o2hb_debug_failedregions) {
mlog_errno(ret);
goto bail;
}
ret = 0;
bail:
if (ret)
o2hb_exit();
return ret;
}
int o2hb_init(void)
void o2hb_init(void)
{
int i;
@ -1511,7 +1461,7 @@ int o2hb_init(void)
o2hb_dependent_users = 0;
return o2hb_debug_init();
o2hb_debug_init();
}
/* if we're already in a callback then we're already serialized by the sem */

View File

@ -63,7 +63,7 @@ void o2hb_unregister_callback(const char *region_uuid,
void o2hb_fill_node_map(unsigned long *map,
unsigned bytes);
void o2hb_exit(void);
int o2hb_init(void);
void o2hb_init(void);
int o2hb_check_node_heartbeating_no_sem(u8 node_num);
int o2hb_check_node_heartbeating_from_callback(u8 node_num);
void o2hb_stop_all_regions(void);

View File

@ -38,10 +38,6 @@
#define SHOW_SOCK_STATS 1
static struct dentry *o2net_dentry;
static struct dentry *sc_dentry;
static struct dentry *nst_dentry;
static struct dentry *stats_dentry;
static struct dentry *nodes_dentry;
static DEFINE_SPINLOCK(o2net_debug_lock);
@ -490,36 +486,23 @@ static const struct file_operations nodes_fops = {
void o2net_debugfs_exit(void)
{
debugfs_remove(nodes_dentry);
debugfs_remove(stats_dentry);
debugfs_remove(sc_dentry);
debugfs_remove(nst_dentry);
debugfs_remove(o2net_dentry);
debugfs_remove_recursive(o2net_dentry);
}
int o2net_debugfs_init(void)
void o2net_debugfs_init(void)
{
umode_t mode = S_IFREG|S_IRUSR;
o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL);
if (o2net_dentry)
nst_dentry = debugfs_create_file(NST_DEBUG_NAME, mode,
o2net_dentry, NULL, &nst_seq_fops);
if (nst_dentry)
sc_dentry = debugfs_create_file(SC_DEBUG_NAME, mode,
o2net_dentry, NULL, &sc_seq_fops);
if (sc_dentry)
stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, mode,
o2net_dentry, NULL, &stats_seq_fops);
if (stats_dentry)
nodes_dentry = debugfs_create_file(NODES_DEBUG_NAME, mode,
o2net_dentry, NULL, &nodes_fops);
if (nodes_dentry)
return 0;
o2net_debugfs_exit();
mlog_errno(-ENOMEM);
return -ENOMEM;
debugfs_create_file(NST_DEBUG_NAME, mode, o2net_dentry, NULL,
&nst_seq_fops);
debugfs_create_file(SC_DEBUG_NAME, mode, o2net_dentry, NULL,
&sc_seq_fops);
debugfs_create_file(STATS_DEBUG_NAME, mode, o2net_dentry, NULL,
&stats_seq_fops);
debugfs_create_file(NODES_DEBUG_NAME, mode, o2net_dentry, NULL,
&nodes_fops);
}
#endif /* CONFIG_DEBUG_FS */

View File

@ -828,9 +828,7 @@ static int __init init_o2nm(void)
{
int ret = -1;
ret = o2hb_init();
if (ret)
goto out;
o2hb_init();
ret = o2net_init();
if (ret)

View File

@ -76,7 +76,7 @@ static void o2quo_fence_self(void)
};
}
/* Indicate that a timeout occurred on a hearbeat region write. The
/* Indicate that a timeout occurred on a heartbeat region write. The
* other nodes in the cluster may consider us dead at that time so we
* want to "fence" ourselves so that we don't scribble on the disk
* after they think they've recovered us. This can't solve all

View File

@ -1762,7 +1762,7 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
(msecs_to_jiffies(o2net_reconnect_delay()) + 1);
if (node_num != o2nm_this_node()) {
/* believe it or not, accept and node hearbeating testing
/* believe it or not, accept and node heartbeating testing
* can succeed for this node before we got here.. so
* only use set_nn_state to clear the persistent error
* if that hasn't already happened */
@ -2129,8 +2129,7 @@ int o2net_init(void)
o2quo_init();
if (o2net_debugfs_init())
goto out;
o2net_debugfs_init();
o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL);
o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL);

View File

@ -109,16 +109,15 @@ struct o2net_send_tracking;
struct o2net_sock_container;
#ifdef CONFIG_DEBUG_FS
int o2net_debugfs_init(void);
void o2net_debugfs_init(void);
void o2net_debugfs_exit(void);
void o2net_debug_add_nst(struct o2net_send_tracking *nst);
void o2net_debug_del_nst(struct o2net_send_tracking *nst);
void o2net_debug_add_sc(struct o2net_sock_container *sc);
void o2net_debug_del_sc(struct o2net_sock_container *sc);
#else
static inline int o2net_debugfs_init(void)
static inline void o2net_debugfs_init(void)
{
return 0;
}
static inline void o2net_debugfs_exit(void)
{

View File

@ -851,7 +851,7 @@ static const struct file_operations debug_state_fops = {
/* end - debug state funcs */
/* files in subroot */
int dlm_debug_init(struct dlm_ctxt *dlm)
void dlm_debug_init(struct dlm_ctxt *dlm)
{
struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt;
@ -860,10 +860,6 @@ int dlm_debug_init(struct dlm_ctxt *dlm)
S_IFREG|S_IRUSR,
dlm->dlm_debugfs_subroot,
dlm, &debug_state_fops);
if (!dc->debug_state_dentry) {
mlog_errno(-ENOMEM);
goto bail;
}
/* for dumping lockres */
dc->debug_lockres_dentry =
@ -871,20 +867,12 @@ int dlm_debug_init(struct dlm_ctxt *dlm)
S_IFREG|S_IRUSR,
dlm->dlm_debugfs_subroot,
dlm, &debug_lockres_fops);
if (!dc->debug_lockres_dentry) {
mlog_errno(-ENOMEM);
goto bail;
}
/* for dumping mles */
dc->debug_mle_dentry = debugfs_create_file(DLM_DEBUGFS_MLE_STATE,
S_IFREG|S_IRUSR,
dlm->dlm_debugfs_subroot,
dlm, &debug_mle_fops);
if (!dc->debug_mle_dentry) {
mlog_errno(-ENOMEM);
goto bail;
}
/* for dumping lockres on the purge list */
dc->debug_purgelist_dentry =
@ -892,15 +880,6 @@ int dlm_debug_init(struct dlm_ctxt *dlm)
S_IFREG|S_IRUSR,
dlm->dlm_debugfs_subroot,
dlm, &debug_purgelist_fops);
if (!dc->debug_purgelist_dentry) {
mlog_errno(-ENOMEM);
goto bail;
}
return 0;
bail:
return -ENOMEM;
}
void dlm_debug_shutdown(struct dlm_ctxt *dlm)
@ -920,24 +899,16 @@ void dlm_debug_shutdown(struct dlm_ctxt *dlm)
/* subroot - domain dir */
int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
{
dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name,
dlm_debugfs_root);
if (!dlm->dlm_debugfs_subroot) {
mlog_errno(-ENOMEM);
goto bail;
}
dlm->dlm_debug_ctxt = kzalloc(sizeof(struct dlm_debug_ctxt),
GFP_KERNEL);
if (!dlm->dlm_debug_ctxt) {
mlog_errno(-ENOMEM);
goto bail;
return -ENOMEM;
}
dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name,
dlm_debugfs_root);
return 0;
bail:
dlm_destroy_debugfs_subroot(dlm);
return -ENOMEM;
}
void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
@ -946,14 +917,9 @@ void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
}
/* debugfs root */
int dlm_create_debugfs_root(void)
void dlm_create_debugfs_root(void)
{
dlm_debugfs_root = debugfs_create_dir(DLM_DEBUGFS_DIR, NULL);
if (!dlm_debugfs_root) {
mlog_errno(-ENOMEM);
return -ENOMEM;
}
return 0;
}
void dlm_destroy_debugfs_root(void)

View File

@ -28,20 +28,19 @@ struct debug_lockres {
struct dlm_lock_resource *dl_res;
};
int dlm_debug_init(struct dlm_ctxt *dlm);
void dlm_debug_init(struct dlm_ctxt *dlm);
void dlm_debug_shutdown(struct dlm_ctxt *dlm);
int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm);
void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm);
int dlm_create_debugfs_root(void);
void dlm_create_debugfs_root(void);
void dlm_destroy_debugfs_root(void);
#else
static inline int dlm_debug_init(struct dlm_ctxt *dlm)
static inline void dlm_debug_init(struct dlm_ctxt *dlm)
{
return 0;
}
static inline void dlm_debug_shutdown(struct dlm_ctxt *dlm)
{
@ -53,9 +52,8 @@ static inline int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm)
static inline void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm)
{
}
static inline int dlm_create_debugfs_root(void)
static inline void dlm_create_debugfs_root(void)
{
return 0;
}
static inline void dlm_destroy_debugfs_root(void)
{

View File

@ -1881,11 +1881,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
goto bail;
}
status = dlm_debug_init(dlm);
if (status < 0) {
mlog_errno(status);
goto bail;
}
dlm_debug_init(dlm);
snprintf(wq_name, O2NM_MAX_NAME_LEN, "dlm_wq-%s", dlm->name);
dlm->dlm_worker = alloc_workqueue(wq_name, WQ_MEM_RECLAIM, 0);
@ -2346,9 +2342,7 @@ static int __init dlm_init(void)
goto error;
}
status = dlm_create_debugfs_root();
if (status)
goto error;
dlm_create_debugfs_root();
return 0;
error:

View File

@ -2161,7 +2161,7 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
* think that $RECOVERY is currently mastered by a dead node. If so,
* we wait a short time to allow that node to get notified by its own
* heartbeat stack, then check again. All $RECOVERY lock resources
* mastered by dead nodes are purged when the hearbeat callback is
* mastered by dead nodes are purged when the heartbeat callback is
* fired, so we can know for sure that it is safe to continue once
* the node returns a live node or no node. */
static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,

View File

@ -1109,7 +1109,7 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
{
u64 mig_cookie = be64_to_cpu(mres->mig_cookie);
int mres_total_locks = be32_to_cpu(mres->total_locks);
int sz, ret = 0, status = 0;
int ret = 0, status = 0;
u8 orig_flags = mres->flags,
orig_master = mres->master;
@ -1117,9 +1117,6 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
if (!mres->num_locks)
return 0;
sz = sizeof(struct dlm_migratable_lockres) +
(mres->num_locks * sizeof(struct dlm_migratable_lock));
/* add an all-done flag if we reached the last lock */
orig_flags = mres->flags;
BUG_ON(total_locks > mres_total_locks);
@ -1133,7 +1130,8 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
/* send it */
ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres,
sz, send_to, &status);
struct_size(mres, ml, mres->num_locks),
send_to, &status);
if (ret < 0) {
/* XXX: negative status is not handled.
* this will end up killing this node. */

View File

@ -426,6 +426,7 @@ static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
static void ocfs2_init_lock_stats(struct ocfs2_lock_res *res)
{
res->l_lock_refresh = 0;
res->l_lock_wait = 0;
memset(&res->l_lock_prmode, 0, sizeof(struct ocfs2_lock_stats));
memset(&res->l_lock_exmode, 0, sizeof(struct ocfs2_lock_stats));
}
@ -460,6 +461,8 @@ static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
if (ret)
stats->ls_fail++;
stats->ls_last = ktime_to_us(ktime_get_real());
}
static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
@ -467,6 +470,21 @@ static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
lockres->l_lock_refresh++;
}
static inline void ocfs2_track_lock_wait(struct ocfs2_lock_res *lockres)
{
struct ocfs2_mask_waiter *mw;
if (list_empty(&lockres->l_mask_waiters)) {
lockres->l_lock_wait = 0;
return;
}
mw = list_first_entry(&lockres->l_mask_waiters,
struct ocfs2_mask_waiter, mw_item);
lockres->l_lock_wait =
ktime_to_us(ktime_mono_to_real(mw->mw_lock_start));
}
static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
{
mw->mw_lock_start = ktime_get();
@ -482,6 +500,9 @@ static inline void ocfs2_update_lock_stats(struct ocfs2_lock_res *res,
static inline void ocfs2_track_lock_refresh(struct ocfs2_lock_res *lockres)
{
}
static inline void ocfs2_track_lock_wait(struct ocfs2_lock_res *lockres)
{
}
static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
{
}
@ -875,6 +896,7 @@ static void lockres_set_flags(struct ocfs2_lock_res *lockres,
list_del_init(&mw->mw_item);
mw->mw_status = 0;
complete(&mw->mw_complete);
ocfs2_track_lock_wait(lockres);
}
}
static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)
@ -1386,6 +1408,7 @@ static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);
mw->mw_mask = mask;
mw->mw_goal = goal;
ocfs2_track_lock_wait(lockres);
}
/* returns 0 if the mw that was removed was already satisfied, -EBUSY
@ -1402,6 +1425,7 @@ static int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
list_del_init(&mw->mw_item);
init_completion(&mw->mw_complete);
ocfs2_track_lock_wait(lockres);
}
return ret;
@ -2989,6 +3013,8 @@ struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void)
kref_init(&dlm_debug->d_refcnt);
INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking);
dlm_debug->d_locking_state = NULL;
dlm_debug->d_locking_filter = NULL;
dlm_debug->d_filter_secs = 0;
out:
return dlm_debug;
}
@ -3079,17 +3105,43 @@ static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
* - Lock stats printed
* New in version 3
* - Max time in lock stats is in usecs (instead of nsecs)
* New in version 4
* - Add last pr/ex unlock times and first lock wait time in usecs
*/
#define OCFS2_DLM_DEBUG_STR_VERSION 3
#define OCFS2_DLM_DEBUG_STR_VERSION 4
static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
{
int i;
char *lvb;
struct ocfs2_lock_res *lockres = v;
#ifdef CONFIG_OCFS2_FS_STATS
u64 now, last;
struct ocfs2_dlm_debug *dlm_debug =
((struct ocfs2_dlm_seq_priv *)m->private)->p_dlm_debug;
#endif
if (!lockres)
return -EINVAL;
#ifdef CONFIG_OCFS2_FS_STATS
if (!lockres->l_lock_wait && dlm_debug->d_filter_secs) {
now = ktime_to_us(ktime_get_real());
if (lockres->l_lock_prmode.ls_last >
lockres->l_lock_exmode.ls_last)
last = lockres->l_lock_prmode.ls_last;
else
last = lockres->l_lock_exmode.ls_last;
/*
* Use d_filter_secs field to filter lock resources dump,
* the default d_filter_secs(0) value filters nothing,
* otherwise, only dump the last N seconds active lock
* resources.
*/
if (div_u64(now - last, 1000000) > dlm_debug->d_filter_secs)
return 0;
}
#endif
seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
@ -3131,6 +3183,9 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
# define lock_max_prmode(_l) ((_l)->l_lock_prmode.ls_max)
# define lock_max_exmode(_l) ((_l)->l_lock_exmode.ls_max)
# define lock_refresh(_l) ((_l)->l_lock_refresh)
# define lock_last_prmode(_l) ((_l)->l_lock_prmode.ls_last)
# define lock_last_exmode(_l) ((_l)->l_lock_exmode.ls_last)
# define lock_wait(_l) ((_l)->l_lock_wait)
#else
# define lock_num_prmode(_l) (0)
# define lock_num_exmode(_l) (0)
@ -3141,6 +3196,9 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
# define lock_max_prmode(_l) (0)
# define lock_max_exmode(_l) (0)
# define lock_refresh(_l) (0)
# define lock_last_prmode(_l) (0ULL)
# define lock_last_exmode(_l) (0ULL)
# define lock_wait(_l) (0ULL)
#endif
/* The following seq_print was added in version 2 of this output */
seq_printf(m, "%u\t"
@ -3151,7 +3209,10 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
"%llu\t"
"%u\t"
"%u\t"
"%u\t",
"%u\t"
"%llu\t"
"%llu\t"
"%llu\t",
lock_num_prmode(lockres),
lock_num_exmode(lockres),
lock_num_prmode_failed(lockres),
@ -3160,7 +3221,10 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
lock_total_exmode(lockres),
lock_max_prmode(lockres),
lock_max_exmode(lockres),
lock_refresh(lockres));
lock_refresh(lockres),
lock_last_prmode(lockres),
lock_last_exmode(lockres),
lock_wait(lockres));
/* End the line */
seq_printf(m, "\n");
@ -3214,9 +3278,8 @@ static const struct file_operations ocfs2_dlm_debug_fops = {
.llseek = seq_lseek,
};
static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
static void ocfs2_dlm_init_debug(struct ocfs2_super *osb)
{
int ret = 0;
struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug;
dlm_debug->d_locking_state = debugfs_create_file("locking_state",
@ -3224,16 +3287,11 @@ static int ocfs2_dlm_init_debug(struct ocfs2_super *osb)
osb->osb_debug_root,
osb,
&ocfs2_dlm_debug_fops);
if (!dlm_debug->d_locking_state) {
ret = -EINVAL;
mlog(ML_ERROR,
"Unable to create locking state debugfs file.\n");
goto out;
}
ocfs2_get_dlm_debug(dlm_debug);
out:
return ret;
dlm_debug->d_locking_filter = debugfs_create_u32("locking_filter",
0600,
osb->osb_debug_root,
&dlm_debug->d_filter_secs);
}
static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
@ -3242,6 +3300,7 @@ static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
if (dlm_debug) {
debugfs_remove(dlm_debug->d_locking_state);
debugfs_remove(dlm_debug->d_locking_filter);
ocfs2_put_dlm_debug(dlm_debug);
}
}
@ -3256,11 +3315,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
goto local;
}
status = ocfs2_dlm_init_debug(osb);
if (status < 0) {
mlog_errno(status);
goto bail;
}
ocfs2_dlm_init_debug(osb);
/* launch downconvert thread */
osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc-%s",
@ -4352,7 +4407,6 @@ static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
static int ocfs2_downconvert_thread(void *arg)
{
int status = 0;
struct ocfs2_super *osb = arg;
/* only quit once we've been asked to stop and there is no more
@ -4370,7 +4424,7 @@ static int ocfs2_downconvert_thread(void *arg)
}
osb->dc_task = NULL;
return status;
return 0;
}
void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)

Some files were not shown because too many files have changed in this diff Show More