Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton: "A large amount of MM, plenty more to come. Subsystems affected by this patch series: - tools - kthread - kbuild - scripts - ocfs2 - vfs - mm: slub, kmemleak, pagecache, gup, swap, memcg, pagemap, mremap, sparsemem, kasan, pagealloc, vmscan, compaction, mempolicy, hugetlbfs, hugetlb" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (155 commits) include/linux/huge_mm.h: check PageTail in hpage_nr_pages even when !THP mm/hugetlb: fix build failure with HUGETLB_PAGE but not HUGEBTLBFS selftests/vm: fix map_hugetlb length used for testing read and write mm/hugetlb: remove unnecessary memory fetch in PageHeadHuge() mm/hugetlb.c: clean code by removing unnecessary initialization hugetlb_cgroup: add hugetlb_cgroup reservation docs hugetlb_cgroup: add hugetlb_cgroup reservation tests hugetlb: support file_region coalescing again hugetlb_cgroup: support noreserve mappings hugetlb_cgroup: add accounting for shared mappings hugetlb: disable region_add file_region coalescing hugetlb_cgroup: add reservation accounting for private mappings mm/hugetlb_cgroup: fix hugetlb_cgroup migration hugetlb_cgroup: add interface for charge/uncharge hugetlb reservations hugetlb_cgroup: add hugetlb_cgroup reservation counter hugetlbfs: Use i_mmap_rwsem to address page fault/truncate race hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization mm/memblock.c: remove redundant assignment to variable max_addr mm: mempolicy: require at least one nodeid for MPOL_PREFERRED mm: mempolicy: use VM_BUG_ON_VMA in queue_pages_test_walk() ...
This commit is contained in:
commit
6cad420cc6
|
@ -2,13 +2,6 @@
|
|||
HugeTLB Controller
|
||||
==================
|
||||
|
||||
The HugeTLB controller allows to limit the HugeTLB usage per control group and
|
||||
enforces the controller limit during page fault. Since HugeTLB doesn't
|
||||
support page reclaim, enforcing the limit at page fault time implies that,
|
||||
the application will get SIGBUS signal if it tries to access HugeTLB pages
|
||||
beyond its limit. This requires the application to know beforehand how much
|
||||
HugeTLB pages it would require for its use.
|
||||
|
||||
HugeTLB controller can be created by first mounting the cgroup filesystem.
|
||||
|
||||
# mount -t cgroup -o hugetlb none /sys/fs/cgroup
|
||||
|
@ -28,10 +21,14 @@ process (bash) into it.
|
|||
|
||||
Brief summary of control files::
|
||||
|
||||
hugetlb.<hugepagesize>.limit_in_bytes # set/show limit of "hugepagesize" hugetlb usage
|
||||
hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb usage recorded
|
||||
hugetlb.<hugepagesize>.usage_in_bytes # show current usage for "hugepagesize" hugetlb
|
||||
hugetlb.<hugepagesize>.failcnt # show the number of allocation failure due to HugeTLB limit
|
||||
hugetlb.<hugepagesize>.rsvd.limit_in_bytes # set/show limit of "hugepagesize" hugetlb reservations
|
||||
hugetlb.<hugepagesize>.rsvd.max_usage_in_bytes # show max "hugepagesize" hugetlb reservations and no-reserve faults
|
||||
hugetlb.<hugepagesize>.rsvd.usage_in_bytes # show current reservations and no-reserve faults for "hugepagesize" hugetlb
|
||||
hugetlb.<hugepagesize>.rsvd.failcnt # show the number of allocation failure due to HugeTLB reservation limit
|
||||
hugetlb.<hugepagesize>.limit_in_bytes # set/show limit of "hugepagesize" hugetlb faults
|
||||
hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb usage recorded
|
||||
hugetlb.<hugepagesize>.usage_in_bytes # show current usage for "hugepagesize" hugetlb
|
||||
hugetlb.<hugepagesize>.failcnt # show the number of allocation failure due to HugeTLB usage limit
|
||||
|
||||
For a system supporting three hugepage sizes (64k, 32M and 1G), the control
|
||||
files include::
|
||||
|
@ -40,11 +37,95 @@ files include::
|
|||
hugetlb.1GB.max_usage_in_bytes
|
||||
hugetlb.1GB.usage_in_bytes
|
||||
hugetlb.1GB.failcnt
|
||||
hugetlb.1GB.rsvd.limit_in_bytes
|
||||
hugetlb.1GB.rsvd.max_usage_in_bytes
|
||||
hugetlb.1GB.rsvd.usage_in_bytes
|
||||
hugetlb.1GB.rsvd.failcnt
|
||||
hugetlb.64KB.limit_in_bytes
|
||||
hugetlb.64KB.max_usage_in_bytes
|
||||
hugetlb.64KB.usage_in_bytes
|
||||
hugetlb.64KB.failcnt
|
||||
hugetlb.64KB.rsvd.limit_in_bytes
|
||||
hugetlb.64KB.rsvd.max_usage_in_bytes
|
||||
hugetlb.64KB.rsvd.usage_in_bytes
|
||||
hugetlb.64KB.rsvd.failcnt
|
||||
hugetlb.32MB.limit_in_bytes
|
||||
hugetlb.32MB.max_usage_in_bytes
|
||||
hugetlb.32MB.usage_in_bytes
|
||||
hugetlb.32MB.failcnt
|
||||
hugetlb.32MB.rsvd.limit_in_bytes
|
||||
hugetlb.32MB.rsvd.max_usage_in_bytes
|
||||
hugetlb.32MB.rsvd.usage_in_bytes
|
||||
hugetlb.32MB.rsvd.failcnt
|
||||
|
||||
|
||||
1. Page fault accounting
|
||||
|
||||
hugetlb.<hugepagesize>.limit_in_bytes
|
||||
hugetlb.<hugepagesize>.max_usage_in_bytes
|
||||
hugetlb.<hugepagesize>.usage_in_bytes
|
||||
hugetlb.<hugepagesize>.failcnt
|
||||
|
||||
The HugeTLB controller allows users to limit the HugeTLB usage (page fault) per
|
||||
control group and enforces the limit during page fault. Since HugeTLB
|
||||
doesn't support page reclaim, enforcing the limit at page fault time implies
|
||||
that, the application will get SIGBUS signal if it tries to fault in HugeTLB
|
||||
pages beyond its limit. Therefore the application needs to know exactly how many
|
||||
HugeTLB pages it uses before hand, and the sysadmin needs to make sure that
|
||||
there are enough available on the machine for all the users to avoid processes
|
||||
getting SIGBUS.
|
||||
|
||||
|
||||
2. Reservation accounting
|
||||
|
||||
hugetlb.<hugepagesize>.rsvd.limit_in_bytes
|
||||
hugetlb.<hugepagesize>.rsvd.max_usage_in_bytes
|
||||
hugetlb.<hugepagesize>.rsvd.usage_in_bytes
|
||||
hugetlb.<hugepagesize>.rsvd.failcnt
|
||||
|
||||
The HugeTLB controller allows to limit the HugeTLB reservations per control
|
||||
group and enforces the controller limit at reservation time and at the fault of
|
||||
HugeTLB memory for which no reservation exists. Since reservation limits are
|
||||
enforced at reservation time (on mmap or shget), reservation limits never causes
|
||||
the application to get SIGBUS signal if the memory was reserved before hand. For
|
||||
MAP_NORESERVE allocations, the reservation limit behaves the same as the fault
|
||||
limit, enforcing memory usage at fault time and causing the application to
|
||||
receive a SIGBUS if it's crossing its limit.
|
||||
|
||||
Reservation limits are superior to page fault limits described above, since
|
||||
reservation limits are enforced at reservation time (on mmap or shget), and
|
||||
never causes the application to get SIGBUS signal if the memory was reserved
|
||||
before hand. This allows for easier fallback to alternatives such as
|
||||
non-HugeTLB memory for example. In the case of page fault accounting, it's very
|
||||
hard to avoid processes getting SIGBUS since the sysadmin needs precisely know
|
||||
the HugeTLB usage of all the tasks in the system and make sure there is enough
|
||||
pages to satisfy all requests. Avoiding tasks getting SIGBUS on overcommited
|
||||
systems is practically impossible with page fault accounting.
|
||||
|
||||
|
||||
3. Caveats with shared memory
|
||||
|
||||
For shared HugeTLB memory, both HugeTLB reservation and page faults are charged
|
||||
to the first task that causes the memory to be reserved or faulted, and all
|
||||
subsequent uses of this reserved or faulted memory is done without charging.
|
||||
|
||||
Shared HugeTLB memory is only uncharged when it is unreserved or deallocated.
|
||||
This is usually when the HugeTLB file is deleted, and not when the task that
|
||||
caused the reservation or fault has exited.
|
||||
|
||||
|
||||
4. Caveats with HugeTLB cgroup offline.
|
||||
|
||||
When a HugeTLB cgroup goes offline with some reservations or faults still
|
||||
charged to it, the behavior is as follows:
|
||||
|
||||
- The fault charges are charged to the parent HugeTLB cgroup (reparented),
|
||||
- the reservation charges remain on the offline HugeTLB cgroup.
|
||||
|
||||
This means that if a HugeTLB cgroup gets offlined while there is still HugeTLB
|
||||
reservations charged to it, that cgroup persists as a zombie until all HugeTLB
|
||||
reservations are uncharged. HugeTLB reservations behave in this manner to match
|
||||
the memory controller whose cgroups also persist as zombie until all charged
|
||||
memory is uncharged. Also, the tracking of HugeTLB reservations is a bit more
|
||||
complex compared to the tracking of HugeTLB faults, so it is significantly
|
||||
harder to reparent reservations at offline time.
|
||||
|
|
|
@ -188,6 +188,17 @@ cgroup v2 currently supports the following mount options.
|
|||
modified through remount from the init namespace. The mount
|
||||
option is ignored on non-init namespace mounts.
|
||||
|
||||
memory_recursiveprot
|
||||
|
||||
Recursively apply memory.min and memory.low protection to
|
||||
entire subtrees, without requiring explicit downward
|
||||
propagation into leaf cgroups. This allows protecting entire
|
||||
subtrees from one another, while retaining free competition
|
||||
within those subtrees. This should have been the default
|
||||
behavior but is a mount-option to avoid regressing setups
|
||||
relying on the original semantics (e.g. specifying bogusly
|
||||
high 'bypass' protection values at higher tree levels).
|
||||
|
||||
|
||||
Organizing Processes and Threads
|
||||
--------------------------------
|
||||
|
|
|
@ -128,6 +128,9 @@ allowed to examine the unevictable lru (mlocked pages) for pages to compact.
|
|||
This should be used on systems where stalls for minor page faults are an
|
||||
acceptable trade for large contiguous free memory. Set to 0 to prevent
|
||||
compaction from moving pages that are unevictable. Default value is 1.
|
||||
On CONFIG_PREEMPT_RT the default value is 0 in order to avoid a page fault, due
|
||||
to compaction, which would block the task from becomming active until the fault
|
||||
is resolved.
|
||||
|
||||
|
||||
dirty_background_bytes
|
||||
|
|
|
@ -73,6 +73,9 @@ File Mapping and Page Cache
|
|||
.. kernel-doc:: mm/truncate.c
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: include/linux/pagemap.h
|
||||
:internal:
|
||||
|
||||
Memory pools
|
||||
============
|
||||
|
||||
|
|
|
@ -52,8 +52,22 @@ Which flags are set by each wrapper
|
|||
|
||||
For these pin_user_pages*() functions, FOLL_PIN is OR'd in with whatever gup
|
||||
flags the caller provides. The caller is required to pass in a non-null struct
|
||||
pages* array, and the function then pin pages by incrementing each by a special
|
||||
value. For now, that value is +1, just like get_user_pages*().::
|
||||
pages* array, and the function then pins pages by incrementing each by a special
|
||||
value: GUP_PIN_COUNTING_BIAS.
|
||||
|
||||
For huge pages (and in fact, any compound page of more than 2 pages), the
|
||||
GUP_PIN_COUNTING_BIAS scheme is not used. Instead, an exact form of pin counting
|
||||
is achieved, by using the 3rd struct page in the compound page. A new struct
|
||||
page field, hpage_pinned_refcount, has been added in order to support this.
|
||||
|
||||
This approach for compound pages avoids the counting upper limit problems that
|
||||
are discussed below. Those limitations would have been aggravated severely by
|
||||
huge pages, because each tail page adds a refcount to the head page. And in
|
||||
fact, testing revealed that, without a separate hpage_pinned_refcount field,
|
||||
page overflows were seen in some huge page stress tests.
|
||||
|
||||
This also means that huge pages and compound pages (of order > 1) do not suffer
|
||||
from the false positives problem that is mentioned below.::
|
||||
|
||||
Function
|
||||
--------
|
||||
|
@ -99,27 +113,6 @@ pages:
|
|||
This also leads to limitations: there are only 31-10==21 bits available for a
|
||||
counter that increments 10 bits at a time.
|
||||
|
||||
TODO: for 1GB and larger huge pages, this is cutting it close. That's because
|
||||
when pin_user_pages() follows such pages, it increments the head page by "1"
|
||||
(where "1" used to mean "+1" for get_user_pages(), but now means "+1024" for
|
||||
pin_user_pages()) for each tail page. So if you have a 1GB huge page:
|
||||
|
||||
* There are 256K (18 bits) worth of 4 KB tail pages.
|
||||
* There are 21 bits available to count up via GUP_PIN_COUNTING_BIAS (that is,
|
||||
10 bits at a time)
|
||||
* There are 21 - 18 == 3 bits available to count. Except that there aren't,
|
||||
because you need to allow for a few normal get_page() calls on the head page,
|
||||
as well. Fortunately, the approach of using addition, rather than "hard"
|
||||
bitfields, within page->_refcount, allows for sharing these bits gracefully.
|
||||
But we're still looking at about 8 references.
|
||||
|
||||
This, however, is a missing feature more than anything else, because it's easily
|
||||
solved by addressing an obvious inefficiency in the original get_user_pages()
|
||||
approach of retrieving pages: stop treating all the pages as if they were
|
||||
PAGE_SIZE. Retrieve huge pages as huge pages. The callers need to be aware of
|
||||
this, so some work is required. Once that's in place, this limitation mostly
|
||||
disappears from view, because there will be ample refcounting range available.
|
||||
|
||||
* Callers must specifically request "dma-pinned tracking of pages". In other
|
||||
words, just calling get_user_pages() will not suffice; a new set of functions,
|
||||
pin_user_page() and related, must be used.
|
||||
|
@ -173,8 +166,8 @@ CASE 4: Pinning for struct page manipulation only
|
|||
-------------------------------------------------
|
||||
Here, normal GUP calls are sufficient, so neither flag needs to be set.
|
||||
|
||||
page_dma_pinned(): the whole point of pinning
|
||||
=============================================
|
||||
page_maybe_dma_pinned(): the whole point of pinning
|
||||
===================================================
|
||||
|
||||
The whole point of marking pages as "DMA-pinned" or "gup-pinned" is to be able
|
||||
to query, "is this page DMA-pinned?" That allows code such as page_mkclean()
|
||||
|
@ -186,7 +179,7 @@ and debates (see the References at the end of this document). It's a TODO item
|
|||
here: fill in the details once that's worked out. Meanwhile, it's safe to say
|
||||
that having this available: ::
|
||||
|
||||
static inline bool page_dma_pinned(struct page *page)
|
||||
static inline bool page_maybe_dma_pinned(struct page *page)
|
||||
|
||||
...is a prerequisite to solving the long-running gup+DMA problem.
|
||||
|
||||
|
@ -215,12 +208,42 @@ has the following new calls to exercise the new pin*() wrapper functions:
|
|||
You can monitor how many total dma-pinned pages have been acquired and released
|
||||
since the system was booted, via two new /proc/vmstat entries: ::
|
||||
|
||||
/proc/vmstat/nr_foll_pin_requested
|
||||
/proc/vmstat/nr_foll_pin_requested
|
||||
/proc/vmstat/nr_foll_pin_acquired
|
||||
/proc/vmstat/nr_foll_pin_released
|
||||
|
||||
Those are both going to show zero, unless CONFIG_DEBUG_VM is set. This is
|
||||
because there is a noticeable performance drop in unpin_user_page(), when they
|
||||
are activated.
|
||||
Under normal conditions, these two values will be equal unless there are any
|
||||
long-term [R]DMA pins in place, or during pin/unpin transitions.
|
||||
|
||||
* nr_foll_pin_acquired: This is the number of logical pins that have been
|
||||
acquired since the system was powered on. For huge pages, the head page is
|
||||
pinned once for each page (head page and each tail page) within the huge page.
|
||||
This follows the same sort of behavior that get_user_pages() uses for huge
|
||||
pages: the head page is refcounted once for each tail or head page in the huge
|
||||
page, when get_user_pages() is applied to a huge page.
|
||||
|
||||
* nr_foll_pin_released: The number of logical pins that have been released since
|
||||
the system was powered on. Note that pages are released (unpinned) on a
|
||||
PAGE_SIZE granularity, even if the original pin was applied to a huge page.
|
||||
Becaused of the pin count behavior described above in "nr_foll_pin_acquired",
|
||||
the accounting balances out, so that after doing this::
|
||||
|
||||
pin_user_pages(huge_page);
|
||||
for (each page in huge_page)
|
||||
unpin_user_page(page);
|
||||
|
||||
...the following is expected::
|
||||
|
||||
nr_foll_pin_released == nr_foll_pin_acquired
|
||||
|
||||
(...unless it was already out of balance due to a long-term RDMA pin being in
|
||||
place.)
|
||||
|
||||
Other diagnostics
|
||||
=================
|
||||
|
||||
dump_page() has been enhanced slightly, to handle these new counting fields, and
|
||||
to better report on compound pages in general. Specifically, for compound pages
|
||||
with order > 1, the exact (hpage_pinned_refcount) pincount is reported.
|
||||
|
||||
References
|
||||
==========
|
||||
|
@ -228,5 +251,6 @@ References
|
|||
* `Some slow progress on get_user_pages() (Apr 2, 2019) <https://lwn.net/Articles/784574/>`_
|
||||
* `DMA and get_user_pages() (LPC: Dec 12, 2018) <https://lwn.net/Articles/774411/>`_
|
||||
* `The trouble with get_user_pages() (Apr 30, 2018) <https://lwn.net/Articles/753027/>`_
|
||||
* `LWN kernel index: get_user_pages() <https://lwn.net/Kernel/Index/#Memory_management-get_user_pages>`_
|
||||
|
||||
John Hubbard, October, 2019
|
||||
|
|
|
@ -1,17 +1,6 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
generated-y += syscall_table.h
|
||||
generic-y += compat.h
|
||||
generic-y += exec.h
|
||||
generic-y += export.h
|
||||
generic-y += fb.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
generic-y += preempt.h
|
||||
generic-y += sections.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += current.h
|
||||
generic-y += kprobes.h
|
||||
|
|
|
@ -89,7 +89,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
|
|||
const struct exception_table_entry *fixup;
|
||||
int si_code = SEGV_MAPERR;
|
||||
vm_fault_t fault;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
|
||||
/* As of EV6, a load into $31/$f31 is a prefetch, and never faults
|
||||
(or is suppressed by the PALcode). Support that for older CPUs
|
||||
|
@ -150,7 +150,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
|
|||
the fault. */
|
||||
fault = handle_mm_fault(vma, address, flags);
|
||||
|
||||
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
|
||||
if (fault_signal_pending(fault, regs))
|
||||
return;
|
||||
|
||||
if (unlikely(fault & VM_FAULT_ERROR)) {
|
||||
|
@ -169,7 +169,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
|
|||
else
|
||||
current->min_flt++;
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
|
||||
/* No need to up_read(&mm->mmap_sem) as we would
|
||||
* have already released it in __lock_page_or_retry
|
||||
|
|
|
@ -1,28 +1,7 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
generic-y += bugs.h
|
||||
generic-y += compat.h
|
||||
generic-y += device.h
|
||||
generic-y += div64.h
|
||||
generic-y += dma-mapping.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += extable.h
|
||||
generic-y += ftrace.h
|
||||
generic-y += hardirq.h
|
||||
generic-y += hw_irq.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += local.h
|
||||
generic-y += local64.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
generic-y += parport.h
|
||||
generic-y += percpu.h
|
||||
generic-y += preempt.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += user.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
|
|
@ -100,7 +100,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
|
|||
(regs->ecr_cause == ECR_C_PROTV_INST_FETCH))
|
||||
exec = 1;
|
||||
|
||||
flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
flags = FAULT_FLAG_DEFAULT;
|
||||
if (user_mode(regs))
|
||||
flags |= FAULT_FLAG_USER;
|
||||
if (write)
|
||||
|
@ -133,29 +133,20 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
|
|||
|
||||
fault = handle_mm_fault(vma, address, flags);
|
||||
|
||||
/*
|
||||
* Fault retry nuances
|
||||
*/
|
||||
if (unlikely(fault & VM_FAULT_RETRY)) {
|
||||
/* Quick path to respond to signals */
|
||||
if (fault_signal_pending(fault, regs)) {
|
||||
if (!user_mode(regs))
|
||||
goto no_context;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If fault needs to be retried, handle any pending signals
|
||||
* first (by returning to user mode).
|
||||
* mmap_sem already relinquished by core mm for RETRY case
|
||||
*/
|
||||
if (fatal_signal_pending(current)) {
|
||||
if (!user_mode(regs))
|
||||
goto no_context;
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* retry state machine
|
||||
*/
|
||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
goto retry;
|
||||
}
|
||||
/*
|
||||
* Fault retry nuances, mmap_sem already relinquished by core mm
|
||||
*/
|
||||
if (unlikely((fault & VM_FAULT_RETRY) &&
|
||||
(flags & FAULT_FLAG_ALLOW_RETRY))) {
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
bad_area:
|
||||
|
|
|
@ -1,22 +1,10 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
generic-y += compat.h
|
||||
generic-y += current.h
|
||||
generic-y += early_ioremap.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += exec.h
|
||||
generic-y += extable.h
|
||||
generic-y += flat.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += kdebug.h
|
||||
generic-y += local.h
|
||||
generic-y += local64.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
generic-y += parport.h
|
||||
generic-y += preempt.h
|
||||
generic-y += seccomp.h
|
||||
generic-y += serial.h
|
||||
generic-y += trace_clock.h
|
||||
|
||||
generated-y += mach-types.h
|
||||
generated-y += unistd-nr.h
|
||||
|
|
|
@ -241,7 +241,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
|
|||
struct mm_struct *mm;
|
||||
int sig, code;
|
||||
vm_fault_t fault;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
|
||||
if (kprobe_page_fault(regs, fsr))
|
||||
return 0;
|
||||
|
@ -295,7 +295,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
|
|||
* signal first. We do not need to release the mmap_sem because
|
||||
* it would already be released in __lock_page_or_retry in
|
||||
* mm/filemap.c. */
|
||||
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
|
||||
if (fault_signal_pending(fault, regs)) {
|
||||
if (!user_mode(regs))
|
||||
goto no_context;
|
||||
return 0;
|
||||
|
@ -319,9 +319,6 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
|
|||
regs, addr);
|
||||
}
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
|
||||
* of starvation. */
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
goto retry;
|
||||
}
|
||||
|
|
|
@ -1,26 +1,8 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
generic-y += bugs.h
|
||||
generic-y += delay.h
|
||||
generic-y += div64.h
|
||||
generic-y += dma.h
|
||||
generic-y += dma-mapping.h
|
||||
generic-y += early_ioremap.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += hw_irq.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += kdebug.h
|
||||
generic-y += kmap_types.h
|
||||
generic-y += local.h
|
||||
generic-y += local64.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
generic-y += qrwlock.h
|
||||
generic-y += qspinlock.h
|
||||
generic-y += serial.h
|
||||
generic-y += set_memory.h
|
||||
generic-y += switch_to.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += unaligned.h
|
||||
generic-y += user.h
|
||||
generic-y += vga.h
|
||||
|
|
|
@ -446,7 +446,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
|
|||
struct mm_struct *mm = current->mm;
|
||||
vm_fault_t fault, major = 0;
|
||||
unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
|
||||
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int mm_flags = FAULT_FLAG_DEFAULT;
|
||||
|
||||
if (kprobe_page_fault(regs, esr))
|
||||
return 0;
|
||||
|
@ -513,25 +513,15 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
|
|||
fault = __do_page_fault(mm, addr, mm_flags, vm_flags);
|
||||
major |= fault & VM_FAULT_MAJOR;
|
||||
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
/*
|
||||
* If we need to retry but a fatal signal is pending,
|
||||
* handle the signal first. We do not need to release
|
||||
* the mmap_sem because it would already be released
|
||||
* in __lock_page_or_retry in mm/filemap.c.
|
||||
*/
|
||||
if (fatal_signal_pending(current)) {
|
||||
if (!user_mode(regs))
|
||||
goto no_context;
|
||||
return 0;
|
||||
}
|
||||
/* Quick path to respond to signals */
|
||||
if (fault_signal_pending(fault, regs)) {
|
||||
if (!user_mode(regs))
|
||||
goto no_context;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
|
||||
* starvation.
|
||||
*/
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||
mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
mm_flags |= FAULT_FLAG_TRIED;
|
||||
goto retry;
|
||||
}
|
||||
|
|
|
@ -1,42 +1,5 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
generic-y += atomic.h
|
||||
generic-y += barrier.h
|
||||
generic-y += bugs.h
|
||||
generic-y += compat.h
|
||||
generic-y += current.h
|
||||
generic-y += device.h
|
||||
generic-y += div64.h
|
||||
generic-y += dma.h
|
||||
generic-y += dma-mapping.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += exec.h
|
||||
generic-y += extable.h
|
||||
generic-y += fb.h
|
||||
generic-y += futex.h
|
||||
generic-y += hw_irq.h
|
||||
generic-y += io.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kdebug.h
|
||||
generic-y += kmap_types.h
|
||||
generic-y += kprobes.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += local.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
generic-y += mmu.h
|
||||
generic-y += mmu_context.h
|
||||
generic-y += pci.h
|
||||
generic-y += percpu.h
|
||||
generic-y += pgalloc.h
|
||||
generic-y += preempt.h
|
||||
generic-y += serial.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += tlbflush.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += user.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
|
|
@ -1,44 +1,8 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
generic-y += asm-offsets.h
|
||||
generic-y += bugs.h
|
||||
generic-y += compat.h
|
||||
generic-y += current.h
|
||||
generic-y += delay.h
|
||||
generic-y += device.h
|
||||
generic-y += div64.h
|
||||
generic-y += dma.h
|
||||
generic-y += dma-mapping.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += exec.h
|
||||
generic-y += fb.h
|
||||
generic-y += futex.h
|
||||
generic-y += gpio.h
|
||||
generic-y += hardirq.h
|
||||
generic-y += hw_irq.h
|
||||
generic-y += irq.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kdebug.h
|
||||
generic-y += kmap_types.h
|
||||
generic-y += kprobes.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += linkage.h
|
||||
generic-y += local.h
|
||||
generic-y += local64.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
generic-y += module.h
|
||||
generic-y += percpu.h
|
||||
generic-y += preempt.h
|
||||
generic-y += qrwlock.h
|
||||
generic-y += sections.h
|
||||
generic-y += serial.h
|
||||
generic-y += timex.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += unaligned.h
|
||||
generic-y += user.h
|
||||
generic-y += vga.h
|
||||
generic-y += vmlinux.lds.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
|
|
@ -1,54 +1,8 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
generic-y += asm-offsets.h
|
||||
generic-y += barrier.h
|
||||
generic-y += bugs.h
|
||||
generic-y += cacheflush.h
|
||||
generic-y += checksum.h
|
||||
generic-y += compat.h
|
||||
generic-y += current.h
|
||||
generic-y += delay.h
|
||||
generic-y += device.h
|
||||
generic-y += div64.h
|
||||
generic-y += dma.h
|
||||
generic-y += dma-mapping.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += exec.h
|
||||
generic-y += extable.h
|
||||
generic-y += fb.h
|
||||
generic-y += ftrace.h
|
||||
generic-y += futex.h
|
||||
generic-y += hardirq.h
|
||||
generic-y += hw_irq.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kdebug.h
|
||||
generic-y += kmap_types.h
|
||||
generic-y += kprobes.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += linkage.h
|
||||
generic-y += local.h
|
||||
generic-y += local64.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
generic-y += mmu.h
|
||||
generic-y += mmu_context.h
|
||||
generic-y += module.h
|
||||
generic-y += parport.h
|
||||
generic-y += pci.h
|
||||
generic-y += percpu.h
|
||||
generic-y += pgalloc.h
|
||||
generic-y += preempt.h
|
||||
generic-y += sections.h
|
||||
generic-y += serial.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += spinlock.h
|
||||
generic-y += timex.h
|
||||
generic-y += tlbflush.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += uaccess.h
|
||||
generic-y += unaligned.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
|
|
@ -1,39 +1,6 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
generic-y += barrier.h
|
||||
generic-y += bug.h
|
||||
generic-y += bugs.h
|
||||
generic-y += compat.h
|
||||
generic-y += current.h
|
||||
generic-y += device.h
|
||||
generic-y += div64.h
|
||||
generic-y += dma-mapping.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += extable.h
|
||||
generic-y += fb.h
|
||||
generic-y += ftrace.h
|
||||
generic-y += hardirq.h
|
||||
generic-y += hw_irq.h
|
||||
generic-y += iomap.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kdebug.h
|
||||
generic-y += kmap_types.h
|
||||
generic-y += kprobes.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += local.h
|
||||
generic-y += local64.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
generic-y += pci.h
|
||||
generic-y += percpu.h
|
||||
generic-y += preempt.h
|
||||
generic-y += sections.h
|
||||
generic-y += serial.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += unaligned.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
|
|
@ -41,7 +41,7 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
|
|||
int si_code = SEGV_MAPERR;
|
||||
vm_fault_t fault;
|
||||
const struct exception_table_entry *fixup;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
|
||||
/*
|
||||
* If we're in an interrupt or have no user context,
|
||||
|
@ -91,7 +91,7 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
|
|||
|
||||
fault = handle_mm_fault(vma, address, flags);
|
||||
|
||||
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
|
||||
if (fault_signal_pending(fault, regs))
|
||||
return;
|
||||
|
||||
/* The most common case -- we are done. */
|
||||
|
@ -102,7 +102,6 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
|
|||
else
|
||||
current->min_flt++;
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
goto retry;
|
||||
}
|
||||
|
|
|
@ -1,12 +1,5 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
generated-y += syscall_table.h
|
||||
generic-y += compat.h
|
||||
generic-y += exec.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += preempt.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += vtime.h
|
||||
generic-y += word-at-a-time.h
|
||||
|
|
|
@ -65,7 +65,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
|
|||
struct mm_struct *mm = current->mm;
|
||||
unsigned long mask;
|
||||
vm_fault_t fault;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
|
||||
mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
|
||||
| (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT));
|
||||
|
@ -141,7 +141,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
|
|||
*/
|
||||
fault = handle_mm_fault(vma, address, flags);
|
||||
|
||||
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
|
||||
if (fault_signal_pending(fault, regs))
|
||||
return;
|
||||
|
||||
if (unlikely(fault & VM_FAULT_ERROR)) {
|
||||
|
@ -167,7 +167,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
|
|||
else
|
||||
current->min_flt++;
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
|
||||
/* No need to up_read(&mm->mmap_sem) as we would
|
||||
|
|
|
@ -1,32 +1,8 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
generated-y += syscall_table.h
|
||||
generic-y += barrier.h
|
||||
generic-y += compat.h
|
||||
generic-y += device.h
|
||||
generic-y += dma-mapping.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += exec.h
|
||||
generic-y += extable.h
|
||||
generic-y += futex.h
|
||||
generic-y += hardirq.h
|
||||
generic-y += hw_irq.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kdebug.h
|
||||
generic-y += kmap_types.h
|
||||
generic-y += kprobes.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += local.h
|
||||
generic-y += local64.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
generic-y += percpu.h
|
||||
generic-y += preempt.h
|
||||
generic-y += sections.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += spinlock.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
|
|
@ -71,7 +71,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct * vma;
|
||||
vm_fault_t fault;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
|
||||
pr_debug("do page fault:\nregs->sr=%#x, regs->pc=%#lx, address=%#lx, %ld, %p\n",
|
||||
regs->sr, regs->pc, address, error_code, mm ? mm->pgd : NULL);
|
||||
|
@ -138,7 +138,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||
fault = handle_mm_fault(vma, address, flags);
|
||||
pr_debug("handle_mm_fault returns %x\n", fault);
|
||||
|
||||
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
|
||||
if (fault_signal_pending(fault, regs))
|
||||
return 0;
|
||||
|
||||
if (unlikely(fault & VM_FAULT_ERROR)) {
|
||||
|
@ -162,9 +162,6 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||
else
|
||||
current->min_flt++;
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
|
||||
* of starvation. */
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,40 +1,11 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
generated-y += syscall_table.h
|
||||
generic-y += bitops.h
|
||||
generic-y += bug.h
|
||||
generic-y += bugs.h
|
||||
generic-y += compat.h
|
||||
generic-y += device.h
|
||||
generic-y += div64.h
|
||||
generic-y += dma-mapping.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += exec.h
|
||||
generic-y += extable.h
|
||||
generic-y += fb.h
|
||||
generic-y += hardirq.h
|
||||
generic-y += hw_irq.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kdebug.h
|
||||
generic-y += kmap_types.h
|
||||
generic-y += kprobes.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += linkage.h
|
||||
generic-y += local.h
|
||||
generic-y += local64.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
generic-y += parport.h
|
||||
generic-y += percpu.h
|
||||
generic-y += preempt.h
|
||||
generic-y += serial.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += syscalls.h
|
||||
generic-y += tlb.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += user.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
|
|
@ -91,7 +91,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||
int code = SEGV_MAPERR;
|
||||
int is_write = error_code & ESR_S;
|
||||
vm_fault_t fault;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
|
||||
regs->ear = address;
|
||||
regs->esr = error_code;
|
||||
|
@ -217,7 +217,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||
*/
|
||||
fault = handle_mm_fault(vma, address, flags);
|
||||
|
||||
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
|
||||
if (fault_signal_pending(fault, regs))
|
||||
return;
|
||||
|
||||
if (unlikely(fault & VM_FAULT_ERROR)) {
|
||||
|
@ -236,7 +236,6 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||
else
|
||||
current->min_flt++;
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
|
||||
/*
|
||||
|
|
|
@ -4,23 +4,10 @@ generated-y += syscall_table_32_o32.h
|
|||
generated-y += syscall_table_64_n32.h
|
||||
generated-y += syscall_table_64_n64.h
|
||||
generated-y += syscall_table_64_o32.h
|
||||
generic-y += current.h
|
||||
generic-y += device.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += export.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += local64.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += parport.h
|
||||
generic-y += percpu.h
|
||||
generic-y += preempt.h
|
||||
generic-y += qrwlock.h
|
||||
generic-y += qspinlock.h
|
||||
generic-y += sections.h
|
||||
generic-y += serial.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += unaligned.h
|
||||
generic-y += user.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
|
|
@ -44,7 +44,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
|
|||
const int field = sizeof(unsigned long) * 2;
|
||||
int si_code;
|
||||
vm_fault_t fault;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
|
||||
static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
|
||||
|
||||
|
@ -154,7 +154,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
|
|||
*/
|
||||
fault = handle_mm_fault(vma, address, flags);
|
||||
|
||||
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
|
||||
if (fault_signal_pending(fault, regs))
|
||||
return;
|
||||
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||
|
@ -178,7 +178,6 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
|
|||
tsk->min_flt++;
|
||||
}
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,46 +1,9 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
generic-y += asm-offsets.h
|
||||
generic-y += atomic.h
|
||||
generic-y += bitops.h
|
||||
generic-y += bug.h
|
||||
generic-y += bugs.h
|
||||
generic-y += checksum.h
|
||||
generic-y += cmpxchg.h
|
||||
generic-y += compat.h
|
||||
generic-y += device.h
|
||||
generic-y += div64.h
|
||||
generic-y += dma.h
|
||||
generic-y += dma-mapping.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += exec.h
|
||||
generic-y += export.h
|
||||
generic-y += fb.h
|
||||
generic-y += gpio.h
|
||||
generic-y += hardirq.h
|
||||
generic-y += hw_irq.h
|
||||
generic-y += irq.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kdebug.h
|
||||
generic-y += kmap_types.h
|
||||
generic-y += kprobes.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += local.h
|
||||
generic-y += local64.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
generic-y += parport.h
|
||||
generic-y += pci.h
|
||||
generic-y += percpu.h
|
||||
generic-y += preempt.h
|
||||
generic-y += sections.h
|
||||
generic-y += serial.h
|
||||
generic-y += switch_to.h
|
||||
generic-y += timex.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += xor.h
|
||||
generic-y += unaligned.h
|
||||
generic-y += user.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
|
|
|
@ -80,7 +80,7 @@ void do_page_fault(unsigned long entry, unsigned long addr,
|
|||
int si_code;
|
||||
vm_fault_t fault;
|
||||
unsigned int mask = VM_READ | VM_WRITE | VM_EXEC;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
|
||||
error_code = error_code & (ITYPE_mskINST | ITYPE_mskETYPE);
|
||||
tsk = current;
|
||||
|
@ -214,7 +214,7 @@ void do_page_fault(unsigned long entry, unsigned long addr,
|
|||
* signal first. We do not need to release the mmap_sem because it
|
||||
* would already be released in __lock_page_or_retry in mm/filemap.c.
|
||||
*/
|
||||
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
|
||||
if (fault_signal_pending(fault, regs)) {
|
||||
if (!user_mode(regs))
|
||||
goto no_context;
|
||||
return;
|
||||
|
@ -246,7 +246,6 @@ void do_page_fault(unsigned long entry, unsigned long addr,
|
|||
1, regs, addr);
|
||||
}
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
|
||||
/* No need to up_read(&mm->mmap_sem) as we would
|
||||
|
|
|
@ -1,45 +1,7 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
generic-y += atomic.h
|
||||
generic-y += barrier.h
|
||||
generic-y += bitops.h
|
||||
generic-y += bug.h
|
||||
generic-y += bugs.h
|
||||
generic-y += cmpxchg.h
|
||||
generic-y += compat.h
|
||||
generic-y += current.h
|
||||
generic-y += device.h
|
||||
generic-y += div64.h
|
||||
generic-y += dma.h
|
||||
generic-y += dma-mapping.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += exec.h
|
||||
generic-y += extable.h
|
||||
generic-y += fb.h
|
||||
generic-y += ftrace.h
|
||||
generic-y += futex.h
|
||||
generic-y += hardirq.h
|
||||
generic-y += hw_irq.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kdebug.h
|
||||
generic-y += kmap_types.h
|
||||
generic-y += kprobes.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += local.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
generic-y += module.h
|
||||
generic-y += pci.h
|
||||
generic-y += percpu.h
|
||||
generic-y += preempt.h
|
||||
generic-y += sections.h
|
||||
generic-y += serial.h
|
||||
generic-y += spinlock.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += unaligned.h
|
||||
generic-y += user.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
|
|
@ -47,7 +47,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause,
|
|||
struct mm_struct *mm = tsk->mm;
|
||||
int code = SEGV_MAPERR;
|
||||
vm_fault_t fault;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
|
||||
cause >>= 2;
|
||||
|
||||
|
@ -133,7 +133,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause,
|
|||
*/
|
||||
fault = handle_mm_fault(vma, address, flags);
|
||||
|
||||
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
|
||||
if (fault_signal_pending(fault, regs))
|
||||
return;
|
||||
|
||||
if (unlikely(fault & VM_FAULT_ERROR)) {
|
||||
|
@ -157,9 +157,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause,
|
|||
else
|
||||
current->min_flt++;
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
|
||||
* of starvation. */
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,45 +1,9 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
generic-y += barrier.h
|
||||
generic-y += bug.h
|
||||
generic-y += bugs.h
|
||||
generic-y += checksum.h
|
||||
generic-y += compat.h
|
||||
generic-y += current.h
|
||||
generic-y += device.h
|
||||
generic-y += div64.h
|
||||
generic-y += dma.h
|
||||
generic-y += dma-mapping.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += exec.h
|
||||
generic-y += extable.h
|
||||
generic-y += fb.h
|
||||
generic-y += ftrace.h
|
||||
generic-y += hardirq.h
|
||||
generic-y += hw_irq.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kdebug.h
|
||||
generic-y += kmap_types.h
|
||||
generic-y += kprobes.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += local.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
generic-y += module.h
|
||||
generic-y += pci.h
|
||||
generic-y += percpu.h
|
||||
generic-y += preempt.h
|
||||
generic-y += qspinlock_types.h
|
||||
generic-y += qspinlock.h
|
||||
generic-y += qrwlock_types.h
|
||||
generic-y += qrwlock.h
|
||||
generic-y += sections.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += switch_to.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += user.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
|
|
@ -50,7 +50,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||
struct vm_area_struct *vma;
|
||||
int si_code;
|
||||
vm_fault_t fault;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
|
||||
tsk = current;
|
||||
|
||||
|
@ -161,7 +161,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||
|
||||
fault = handle_mm_fault(vma, address, flags);
|
||||
|
||||
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
|
||||
if (fault_signal_pending(fault, regs))
|
||||
return;
|
||||
|
||||
if (unlikely(fault & VM_FAULT_ERROR)) {
|
||||
|
@ -181,7 +181,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||
else
|
||||
tsk->min_flt++;
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
|
||||
/* No need to up_read(&mm->mmap_sem) as we would
|
||||
|
|
|
@ -2,26 +2,8 @@
|
|||
generated-y += syscall_table_32.h
|
||||
generated-y += syscall_table_64.h
|
||||
generated-y += syscall_table_c32.h
|
||||
generic-y += current.h
|
||||
generic-y += device.h
|
||||
generic-y += div64.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += exec.h
|
||||
generic-y += hw_irq.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kdebug.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += local.h
|
||||
generic-y += local64.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
generic-y += percpu.h
|
||||
generic-y += preempt.h
|
||||
generic-y += seccomp.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += user.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
|
|
@ -274,7 +274,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
|
|||
if (!mm)
|
||||
goto no_context;
|
||||
|
||||
flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
flags = FAULT_FLAG_DEFAULT;
|
||||
if (user_mode(regs))
|
||||
flags |= FAULT_FLAG_USER;
|
||||
|
||||
|
@ -304,7 +304,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
|
|||
|
||||
fault = handle_mm_fault(vma, address, flags);
|
||||
|
||||
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
|
||||
if (fault_signal_pending(fault, regs))
|
||||
return;
|
||||
|
||||
if (unlikely(fault & VM_FAULT_ERROR)) {
|
||||
|
@ -328,14 +328,12 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
|
|||
else
|
||||
current->min_flt++;
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
|
||||
/*
|
||||
* No need to up_read(&mm->mmap_sem) as we would
|
||||
* have already released it in __lock_page_or_retry
|
||||
* in mm/filemap.c.
|
||||
*/
|
||||
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,12 +3,8 @@ generated-y += syscall_table_32.h
|
|||
generated-y += syscall_table_64.h
|
||||
generated-y += syscall_table_c32.h
|
||||
generated-y += syscall_table_spu.h
|
||||
generic-y += div64.h
|
||||
generic-y += dma-mapping.h
|
||||
generic-y += export.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += local64.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += preempt.h
|
||||
generic-y += vtime.h
|
||||
generic-y += early_ioremap.h
|
||||
|
|
|
@ -381,18 +381,6 @@ bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
|
|||
* So do not enforce things if the VMA is not from the current mm, or if we are
|
||||
* in a kernel thread.
|
||||
*/
|
||||
static inline bool vma_is_foreign(struct vm_area_struct *vma)
|
||||
{
|
||||
if (!current->mm)
|
||||
return true;
|
||||
|
||||
/* if it is not our ->mm, it has to be foreign */
|
||||
if (current->mm != vma->vm_mm)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
|
||||
bool execute, bool foreign)
|
||||
{
|
||||
|
|
|
@ -434,7 +434,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||
{
|
||||
struct vm_area_struct * vma;
|
||||
struct mm_struct *mm = current->mm;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
int is_exec = TRAP(regs) == 0x400;
|
||||
int is_user = user_mode(regs);
|
||||
int is_write = page_fault_is_write(error_code);
|
||||
|
@ -582,28 +582,18 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||
|
||||
major |= fault & VM_FAULT_MAJOR;
|
||||
|
||||
if (fault_signal_pending(fault, regs))
|
||||
return user_mode(regs) ? 0 : SIGBUS;
|
||||
|
||||
/*
|
||||
* Handle the retry right now, the mmap_sem has been released in that
|
||||
* case.
|
||||
*/
|
||||
if (unlikely(fault & VM_FAULT_RETRY)) {
|
||||
/* We retry only once */
|
||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||
/*
|
||||
* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
|
||||
* of starvation.
|
||||
*/
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
if (!fatal_signal_pending(current))
|
||||
goto retry;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/*
|
||||
* User mode? Just return to handle the fatal exception otherwise
|
||||
* return to bad_page_fault
|
||||
*/
|
||||
return is_user ? 0 : SIGBUS;
|
||||
}
|
||||
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
|
|
@ -360,7 +360,7 @@ static bool lmb_is_removable(struct drmem_lmb *lmb)
|
|||
|
||||
for (i = 0; i < scns_per_block; i++) {
|
||||
pfn = PFN_DOWN(phys_addr);
|
||||
if (!pfn_present(pfn)) {
|
||||
if (!pfn_in_present_section(pfn)) {
|
||||
phys_addr += MIN_MEMORY_BLOCK_SIZE;
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -1,35 +1,7 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
generic-y += bugs.h
|
||||
generic-y += checksum.h
|
||||
generic-y += compat.h
|
||||
generic-y += device.h
|
||||
generic-y += div64.h
|
||||
generic-y += extable.h
|
||||
generic-y += flat.h
|
||||
generic-y += dma.h
|
||||
generic-y += dma-mapping.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += exec.h
|
||||
generic-y += fb.h
|
||||
generic-y += hardirq.h
|
||||
generic-y += hw_irq.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kdebug.h
|
||||
generic-y += kmap_types.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += local.h
|
||||
generic-y += local64.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += percpu.h
|
||||
generic-y += preempt.h
|
||||
generic-y += sections.h
|
||||
generic-y += serial.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += unaligned.h
|
||||
generic-y += user.h
|
||||
generic-y += vga.h
|
||||
generic-y += vmlinux.lds.h
|
||||
generic-y += xor.h
|
||||
|
|
|
@ -30,7 +30,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
|
|||
struct vm_area_struct *vma;
|
||||
struct mm_struct *mm;
|
||||
unsigned long addr, cause;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
int code = SEGV_MAPERR;
|
||||
vm_fault_t fault;
|
||||
|
||||
|
@ -117,7 +117,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
|
|||
* signal first. We do not need to release the mmap_sem because it
|
||||
* would already be released in __lock_page_or_retry in mm/filemap.c.
|
||||
*/
|
||||
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(tsk))
|
||||
if (fault_signal_pending(fault, regs))
|
||||
return;
|
||||
|
||||
if (unlikely(fault & VM_FAULT_ERROR)) {
|
||||
|
@ -144,11 +144,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
|
|||
1, regs, addr);
|
||||
}
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
/*
|
||||
* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
|
||||
* of starvation.
|
||||
*/
|
||||
flags &= ~(FAULT_FLAG_ALLOW_RETRY);
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
|
||||
/*
|
||||
|
|
|
@ -5,21 +5,6 @@ generated-y += syscall_table.h
|
|||
generated-y += unistd_nr.h
|
||||
|
||||
generic-y += asm-offsets.h
|
||||
generic-y += cacheflush.h
|
||||
generic-y += device.h
|
||||
generic-y += dma-mapping.h
|
||||
generic-y += div64.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += export.h
|
||||
generic-y += fb.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kmap_types.h
|
||||
generic-y += local.h
|
||||
generic-y += local64.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += unaligned.h
|
||||
generic-y += word-at-a-time.h
|
||||
|
|
|
@ -429,7 +429,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
|
|||
|
||||
address = trans_exc_code & __FAIL_ADDR_MASK;
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||
flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
flags = FAULT_FLAG_DEFAULT;
|
||||
if (user_mode(regs))
|
||||
flags |= FAULT_FLAG_USER;
|
||||
if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
|
||||
|
@ -480,8 +480,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
|
|||
* the fault.
|
||||
*/
|
||||
fault = handle_mm_fault(vma, address, flags);
|
||||
/* No reason to continue if interrupted by SIGKILL. */
|
||||
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
|
||||
if (fault_signal_pending(fault, regs)) {
|
||||
fault = VM_FAULT_SIGNAL;
|
||||
if (flags & FAULT_FLAG_RETRY_NOWAIT)
|
||||
goto out_up;
|
||||
|
@ -514,10 +513,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
|
|||
fault = VM_FAULT_PFAULT;
|
||||
goto out_up;
|
||||
}
|
||||
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
|
||||
* of starvation. */
|
||||
flags &= ~(FAULT_FLAG_ALLOW_RETRY |
|
||||
FAULT_FLAG_RETRY_NOWAIT);
|
||||
flags &= ~FAULT_FLAG_RETRY_NOWAIT;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
down_read(&mm->mmap_sem);
|
||||
goto retry;
|
||||
|
|
|
@ -1,22 +1,6 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
generated-y += syscall_table.h
|
||||
generic-y += compat.h
|
||||
generic-y += current.h
|
||||
generic-y += delay.h
|
||||
generic-y += div64.h
|
||||
generic-y += dma-mapping.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += exec.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += local.h
|
||||
generic-y += local64.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += parport.h
|
||||
generic-y += percpu.h
|
||||
generic-y += preempt.h
|
||||
generic-y += serial.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += xor.h
|
||||
|
|
|
@ -302,25 +302,25 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
|
|||
* Pagefault was interrupted by SIGKILL. We have no reason to
|
||||
* continue pagefault.
|
||||
*/
|
||||
if (fatal_signal_pending(current)) {
|
||||
if (!(fault & VM_FAULT_RETRY))
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
if (fault_signal_pending(fault, regs)) {
|
||||
if (!user_mode(regs))
|
||||
no_context(regs, error_code, address);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Release mmap_sem first if necessary */
|
||||
if (!(fault & VM_FAULT_RETRY))
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
if (!(fault & VM_FAULT_ERROR))
|
||||
return 0;
|
||||
|
||||
if (fault & VM_FAULT_OOM) {
|
||||
/* Kernel mode? Handle exceptions or die: */
|
||||
if (!user_mode(regs)) {
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
no_context(regs, error_code, address);
|
||||
return 1;
|
||||
}
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
/*
|
||||
* We ran out of memory, call the OOM killer, and return the
|
||||
|
@ -380,7 +380,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
|
|||
struct mm_struct *mm;
|
||||
struct vm_area_struct * vma;
|
||||
vm_fault_t fault;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
|
||||
tsk = current;
|
||||
mm = tsk->mm;
|
||||
|
@ -481,7 +481,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
|
|||
regs, address);
|
||||
}
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
|
||||
/*
|
||||
|
|
|
@ -4,21 +4,7 @@
|
|||
generated-y += syscall_table_32.h
|
||||
generated-y += syscall_table_64.h
|
||||
generated-y += syscall_table_c32.h
|
||||
generic-y += div64.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += exec.h
|
||||
generic-y += export.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += linkage.h
|
||||
generic-y += local.h
|
||||
generic-y += local64.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
generic-y += module.h
|
||||
generic-y += preempt.h
|
||||
generic-y += serial.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += word-at-a-time.h
|
||||
|
|
|
@ -168,7 +168,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
|
|||
int from_user = !(regs->psr & PSR_PS);
|
||||
int code;
|
||||
vm_fault_t fault;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
|
||||
if (text_fault)
|
||||
address = regs->pc;
|
||||
|
@ -237,7 +237,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
|
|||
*/
|
||||
fault = handle_mm_fault(vma, address, flags);
|
||||
|
||||
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
|
||||
if (fault_signal_pending(fault, regs))
|
||||
return;
|
||||
|
||||
if (unlikely(fault & VM_FAULT_ERROR)) {
|
||||
|
@ -261,7 +261,6 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
|
|||
1, regs, address);
|
||||
}
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
|
||||
/* No need to up_read(&mm->mmap_sem) as we would
|
||||
|
|
|
@ -271,7 +271,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
|
|||
int si_code, fault_code;
|
||||
vm_fault_t fault;
|
||||
unsigned long address, mm_rss;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
|
||||
fault_code = get_thread_fault_code();
|
||||
|
||||
|
@ -425,7 +425,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
|
|||
|
||||
fault = handle_mm_fault(vma, address, flags);
|
||||
|
||||
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
|
||||
if (fault_signal_pending(fault, regs))
|
||||
goto exit_exception;
|
||||
|
||||
if (unlikely(fault & VM_FAULT_ERROR)) {
|
||||
|
@ -449,7 +449,6 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
|
|||
1, regs, address);
|
||||
}
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
|
||||
/* No need to up_read(&mm->mmap_sem) as we would
|
||||
|
|
|
@ -33,7 +33,7 @@ int handle_page_fault(unsigned long address, unsigned long ip,
|
|||
pmd_t *pmd;
|
||||
pte_t *pte;
|
||||
int err = -EFAULT;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
|
||||
*code_out = SEGV_MAPERR;
|
||||
|
||||
|
@ -97,7 +97,6 @@ int handle_page_fault(unsigned long address, unsigned long ip,
|
|||
else
|
||||
current->min_flt++;
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
|
||||
goto retry;
|
||||
|
|
|
@ -1,41 +1,7 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
generic-y += atomic.h
|
||||
generic-y += bugs.h
|
||||
generic-y += compat.h
|
||||
generic-y += current.h
|
||||
generic-y += device.h
|
||||
generic-y += div64.h
|
||||
generic-y += dma-mapping.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += exec.h
|
||||
generic-y += extable.h
|
||||
generic-y += fb.h
|
||||
generic-y += ftrace.h
|
||||
generic-y += futex.h
|
||||
generic-y += hardirq.h
|
||||
generic-y += hw_irq.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kdebug.h
|
||||
generic-y += kmap_types.h
|
||||
generic-y += kprobes.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += local.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
generic-y += module.h
|
||||
generic-y += parport.h
|
||||
generic-y += percpu.h
|
||||
generic-y += preempt.h
|
||||
generic-y += sections.h
|
||||
generic-y += serial.h
|
||||
generic-y += shmparam.h
|
||||
generic-y += syscalls.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += unaligned.h
|
||||
generic-y += user.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
|
|
@ -202,7 +202,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
|
|||
struct mm_struct *mm;
|
||||
int sig, code;
|
||||
vm_fault_t fault;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
|
||||
tsk = current;
|
||||
mm = tsk->mm;
|
||||
|
@ -250,7 +250,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
|
|||
* signal first. We do not need to release the mmap_sem because
|
||||
* it would already be released in __lock_page_or_retry in
|
||||
* mm/filemap.c. */
|
||||
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
|
||||
if (fault_signal_pending(fault, regs))
|
||||
return 0;
|
||||
|
||||
if (!(fault & VM_FAULT_ERROR) && (flags & FAULT_FLAG_ALLOW_RETRY)) {
|
||||
|
@ -259,9 +259,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
|
|||
else
|
||||
tsk->min_flt++;
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
|
||||
* of starvation. */
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,5 +10,3 @@ generated-y += xen-hypercalls.h
|
|||
generic-y += early_ioremap.h
|
||||
generic-y += export.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
|
|
|
@ -213,21 +213,6 @@ static inline void arch_unmap(struct mm_struct *mm, unsigned long start,
|
|||
* So do not enforce things if the VMA is not from the current
|
||||
* mm, or if we are in a kernel thread.
|
||||
*/
|
||||
static inline bool vma_is_foreign(struct vm_area_struct *vma)
|
||||
{
|
||||
if (!current->mm)
|
||||
return true;
|
||||
/*
|
||||
* Should PKRU be enforced on the access to this VMA? If
|
||||
* the VMA is from another process, then PKRU has no
|
||||
* relevance and should not be enforced.
|
||||
*/
|
||||
if (current->mm != vma->vm_mm)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
|
||||
bool write, bool execute, bool foreign)
|
||||
{
|
||||
|
|
|
@ -1310,7 +1310,7 @@ void do_user_addr_fault(struct pt_regs *regs,
|
|||
struct task_struct *tsk;
|
||||
struct mm_struct *mm;
|
||||
vm_fault_t fault, major = 0;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
|
||||
tsk = current;
|
||||
mm = tsk->mm;
|
||||
|
@ -1464,27 +1464,23 @@ void do_user_addr_fault(struct pt_regs *regs,
|
|||
fault = handle_mm_fault(vma, address, flags);
|
||||
major |= fault & VM_FAULT_MAJOR;
|
||||
|
||||
/* Quick path to respond to signals */
|
||||
if (fault_signal_pending(fault, regs)) {
|
||||
if (!user_mode(regs))
|
||||
no_context(regs, hw_error_code, address, SIGBUS,
|
||||
BUS_ADRERR);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we need to retry the mmap_sem has already been released,
|
||||
* and if there is a fatal signal pending there is no guarantee
|
||||
* that we made any progress. Handle this case first.
|
||||
*/
|
||||
if (unlikely(fault & VM_FAULT_RETRY)) {
|
||||
/* Retry at most once */
|
||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
if (!fatal_signal_pending(tsk))
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* User mode? Just return to handle the fatal exception */
|
||||
if (flags & FAULT_FLAG_USER)
|
||||
return;
|
||||
|
||||
/* Not returning to user mode? Handle exceptions or die: */
|
||||
no_context(regs, hw_error_code, address, SIGBUS, BUS_ADRERR);
|
||||
return;
|
||||
if (unlikely((fault & VM_FAULT_RETRY) &&
|
||||
(flags & FAULT_FLAG_ALLOW_RETRY))) {
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
up_read(&mm->mmap_sem);
|
||||
|
|
|
@ -1,36 +1,10 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
generated-y += syscall_table.h
|
||||
generic-y += bug.h
|
||||
generic-y += compat.h
|
||||
generic-y += device.h
|
||||
generic-y += div64.h
|
||||
generic-y += dma-mapping.h
|
||||
generic-y += emergency-restart.h
|
||||
generic-y += exec.h
|
||||
generic-y += extable.h
|
||||
generic-y += fb.h
|
||||
generic-y += hardirq.h
|
||||
generic-y += hw_irq.h
|
||||
generic-y += irq_regs.h
|
||||
generic-y += irq_work.h
|
||||
generic-y += kdebug.h
|
||||
generic-y += kmap_types.h
|
||||
generic-y += kprobes.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += local.h
|
||||
generic-y += local64.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mmiowb.h
|
||||
generic-y += param.h
|
||||
generic-y += percpu.h
|
||||
generic-y += preempt.h
|
||||
generic-y += qrwlock.h
|
||||
generic-y += qspinlock.h
|
||||
generic-y += sections.h
|
||||
generic-y += topology.h
|
||||
generic-y += trace_clock.h
|
||||
generic-y += user.h
|
||||
generic-y += vga.h
|
||||
generic-y += word-at-a-time.h
|
||||
generic-y += xor.h
|
||||
|
|
|
@ -43,7 +43,7 @@ void do_page_fault(struct pt_regs *regs)
|
|||
|
||||
int is_write, is_exec;
|
||||
vm_fault_t fault;
|
||||
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
|
||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||
|
||||
code = SEGV_MAPERR;
|
||||
|
||||
|
@ -110,7 +110,7 @@ void do_page_fault(struct pt_regs *regs)
|
|||
*/
|
||||
fault = handle_mm_fault(vma, address, flags);
|
||||
|
||||
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
|
||||
if (fault_signal_pending(fault, regs))
|
||||
return;
|
||||
|
||||
if (unlikely(fault & VM_FAULT_ERROR)) {
|
||||
|
@ -128,7 +128,6 @@ void do_page_fault(struct pt_regs *regs)
|
|||
else
|
||||
current->min_flt++;
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
|
||||
/* No need to up_read(&mm->mmap_sem) as we would
|
||||
|
|
|
@ -772,7 +772,7 @@ static int register_mem_sect_under_node(struct memory_block *mem_blk,
|
|||
* memory block could have several absent sections from start.
|
||||
* skip pfn range from absent section
|
||||
*/
|
||||
if (!pfn_present(pfn)) {
|
||||
if (!pfn_in_present_section(pfn)) {
|
||||
pfn = round_down(pfn + PAGES_PER_SECTION,
|
||||
PAGES_PER_SECTION) - 1;
|
||||
continue;
|
||||
|
|
|
@ -59,9 +59,10 @@ static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
|
|||
|
||||
/*
|
||||
* If possible, avoid waiting for GPU with mmap_sem
|
||||
* held.
|
||||
* held. We only do this if the fault allows retry and this
|
||||
* is the first attempt.
|
||||
*/
|
||||
if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||
if (fault_flag_allow_retry_first(vmf->flags)) {
|
||||
ret = VM_FAULT_RETRY;
|
||||
if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
|
||||
goto out_unlock;
|
||||
|
@ -135,7 +136,12 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
|
|||
* for the buffer to become unreserved.
|
||||
*/
|
||||
if (unlikely(!dma_resv_trylock(bo->base.resv))) {
|
||||
if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||
/*
|
||||
* If the fault allows retry and this is the first
|
||||
* fault attempt, we try to release the mmap_sem
|
||||
* before waiting
|
||||
*/
|
||||
if (fault_flag_allow_retry_first(vmf->flags)) {
|
||||
if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
|
||||
ttm_bo_get(bo);
|
||||
up_read(&vmf->vma->vm_mm->mmap_sem);
|
||||
|
|
|
@ -368,8 +368,6 @@ bool fs_validate_description(const char *name,
|
|||
const struct fs_parameter_spec *param, *p2;
|
||||
bool good = true;
|
||||
|
||||
pr_notice("*** VALIDATE %s ***\n", name);
|
||||
|
||||
for (param = desc; param->name; param++) {
|
||||
/* Check for duplicate parameter names */
|
||||
for (p2 = desc; p2 < param; p2++) {
|
||||
|
|
|
@ -393,10 +393,9 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
|
|||
* In this case, we first scan the range and release found pages.
|
||||
* After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
|
||||
* maps and global counts. Page faults can not race with truncation
|
||||
* in this routine. hugetlb_no_page() prevents page faults in the
|
||||
* truncated range. It checks i_size before allocation, and again after
|
||||
* with the page table lock for the page held. The same lock must be
|
||||
* acquired to unmap a page.
|
||||
* in this routine. hugetlb_no_page() holds i_mmap_rwsem and prevents
|
||||
* page faults in the truncated range by checking i_size. i_size is
|
||||
* modified while holding i_mmap_rwsem.
|
||||
* hole punch is indicated if end is not LLONG_MAX
|
||||
* In the hole punch case we scan the range and release found pages.
|
||||
* Only when releasing a page is the associated region/reserv map
|
||||
|
@ -436,7 +435,15 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
|
|||
|
||||
index = page->index;
|
||||
hash = hugetlb_fault_mutex_hash(mapping, index);
|
||||
mutex_lock(&hugetlb_fault_mutex_table[hash]);
|
||||
if (!truncate_op) {
|
||||
/*
|
||||
* Only need to hold the fault mutex in the
|
||||
* hole punch case. This prevents races with
|
||||
* page faults. Races are not possible in the
|
||||
* case of truncation.
|
||||
*/
|
||||
mutex_lock(&hugetlb_fault_mutex_table[hash]);
|
||||
}
|
||||
|
||||
/*
|
||||
* If page is mapped, it was faulted in after being
|
||||
|
@ -450,7 +457,9 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
|
|||
if (unlikely(page_mapped(page))) {
|
||||
BUG_ON(truncate_op);
|
||||
|
||||
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
|
||||
i_mmap_lock_write(mapping);
|
||||
mutex_lock(&hugetlb_fault_mutex_table[hash]);
|
||||
hugetlb_vmdelete_list(&mapping->i_mmap,
|
||||
index * pages_per_huge_page(h),
|
||||
(index + 1) * pages_per_huge_page(h));
|
||||
|
@ -477,7 +486,8 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
|
|||
}
|
||||
|
||||
unlock_page(page);
|
||||
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
|
||||
if (!truncate_op)
|
||||
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
|
||||
}
|
||||
huge_pagevec_release(&pvec);
|
||||
cond_resched();
|
||||
|
@ -515,8 +525,8 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
|
|||
BUG_ON(offset & ~huge_page_mask(h));
|
||||
pgoff = offset >> PAGE_SHIFT;
|
||||
|
||||
i_size_write(inode, offset);
|
||||
i_mmap_lock_write(mapping);
|
||||
i_size_write(inode, offset);
|
||||
if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
|
||||
hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
|
||||
i_mmap_unlock_write(mapping);
|
||||
|
@ -638,7 +648,11 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
|
|||
/* addr is the offset within the file (zero based) */
|
||||
addr = index * hpage_size;
|
||||
|
||||
/* mutex taken here, fault path and hole punch */
|
||||
/*
|
||||
* fault mutex taken here, protects against fault path
|
||||
* and hole punch. inode_lock previously taken protects
|
||||
* against truncation.
|
||||
*/
|
||||
hash = hugetlb_fault_mutex_hash(mapping, index);
|
||||
mutex_lock(&hugetlb_fault_mutex_table[hash]);
|
||||
|
||||
|
|
|
@ -1060,7 +1060,6 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle,
|
|||
brelse(bhs[i]);
|
||||
bhs[i] = NULL;
|
||||
}
|
||||
mlog_errno(status);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
@ -3942,7 +3941,7 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_tree *et,
|
|||
* above.
|
||||
*
|
||||
* This leaf needs to have space, either by the empty 1st
|
||||
* extent record, or by virtue of an l_next_rec < l_count.
|
||||
* extent record, or by virtue of an l_next_free_rec < l_count.
|
||||
*/
|
||||
ocfs2_rotate_leaf(el, insert_rec);
|
||||
}
|
||||
|
|
|
@ -101,8 +101,6 @@ static struct o2hb_callback {
|
|||
|
||||
static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type);
|
||||
|
||||
#define O2HB_DEFAULT_BLOCK_BITS 9
|
||||
|
||||
enum o2hb_heartbeat_modes {
|
||||
O2HB_HEARTBEAT_LOCAL = 0,
|
||||
O2HB_HEARTBEAT_GLOBAL,
|
||||
|
@ -1309,7 +1307,7 @@ static int o2hb_debug_open(struct inode *inode, struct file *file)
|
|||
|
||||
case O2HB_DB_TYPE_REGION_NUMBER:
|
||||
reg = (struct o2hb_region *)db->db_data;
|
||||
out += snprintf(buf + out, PAGE_SIZE - out, "%d\n",
|
||||
out += scnprintf(buf + out, PAGE_SIZE - out, "%d\n",
|
||||
reg->hr_region_num);
|
||||
goto done;
|
||||
|
||||
|
@ -1319,12 +1317,12 @@ static int o2hb_debug_open(struct inode *inode, struct file *file)
|
|||
/* If 0, it has never been set before */
|
||||
if (lts)
|
||||
lts = jiffies_to_msecs(jiffies - lts);
|
||||
out += snprintf(buf + out, PAGE_SIZE - out, "%lu\n", lts);
|
||||
out += scnprintf(buf + out, PAGE_SIZE - out, "%lu\n", lts);
|
||||
goto done;
|
||||
|
||||
case O2HB_DB_TYPE_REGION_PINNED:
|
||||
reg = (struct o2hb_region *)db->db_data;
|
||||
out += snprintf(buf + out, PAGE_SIZE - out, "%u\n",
|
||||
out += scnprintf(buf + out, PAGE_SIZE - out, "%u\n",
|
||||
!!reg->hr_item_pinned);
|
||||
goto done;
|
||||
|
||||
|
@ -1333,8 +1331,8 @@ static int o2hb_debug_open(struct inode *inode, struct file *file)
|
|||
}
|
||||
|
||||
while ((i = find_next_bit(map, db->db_len, i + 1)) < db->db_len)
|
||||
out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i);
|
||||
out += snprintf(buf + out, PAGE_SIZE - out, "\n");
|
||||
out += scnprintf(buf + out, PAGE_SIZE - out, "%d ", i);
|
||||
out += scnprintf(buf + out, PAGE_SIZE - out, "\n");
|
||||
|
||||
done:
|
||||
i_size_write(inode, out);
|
||||
|
|
|
@ -443,8 +443,8 @@ static int o2net_fill_bitmap(char *buf, int len)
|
|||
o2net_fill_node_map(map, sizeof(map));
|
||||
|
||||
while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES)
|
||||
out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i);
|
||||
out += snprintf(buf + out, PAGE_SIZE - out, "\n");
|
||||
out += scnprintf(buf + out, PAGE_SIZE - out, "%d ", i);
|
||||
out += scnprintf(buf + out, PAGE_SIZE - out, "\n");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
|
|
@ -1570,15 +1570,13 @@ static void o2net_start_connect(struct work_struct *work)
|
|||
struct sockaddr_in myaddr = {0, }, remoteaddr = {0, };
|
||||
int ret = 0, stop;
|
||||
unsigned int timeout;
|
||||
unsigned int noio_flag;
|
||||
unsigned int nofs_flag;
|
||||
|
||||
/*
|
||||
* sock_create allocates the sock with GFP_KERNEL. We must set
|
||||
* per-process flag PF_MEMALLOC_NOIO so that all allocations done
|
||||
* by this process are done as if GFP_NOIO was specified. So we
|
||||
* are not reentering filesystem while doing memory reclaim.
|
||||
* sock_create allocates the sock with GFP_KERNEL. We must
|
||||
* prevent the filesystem from being reentered by memory reclaim.
|
||||
*/
|
||||
noio_flag = memalloc_noio_save();
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
/* if we're greater we initiate tx, otherwise we accept */
|
||||
if (o2nm_this_node() <= o2net_num_from_nn(nn))
|
||||
goto out;
|
||||
|
@ -1683,7 +1681,7 @@ static void o2net_start_connect(struct work_struct *work)
|
|||
if (mynode)
|
||||
o2nm_node_put(mynode);
|
||||
|
||||
memalloc_noio_restore(noio_flag);
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1810,15 +1808,13 @@ static int o2net_accept_one(struct socket *sock, int *more)
|
|||
struct o2nm_node *local_node = NULL;
|
||||
struct o2net_sock_container *sc = NULL;
|
||||
struct o2net_node *nn;
|
||||
unsigned int noio_flag;
|
||||
unsigned int nofs_flag;
|
||||
|
||||
/*
|
||||
* sock_create_lite allocates the sock with GFP_KERNEL. We must set
|
||||
* per-process flag PF_MEMALLOC_NOIO so that all allocations done
|
||||
* by this process are done as if GFP_NOIO was specified. So we
|
||||
* are not reentering filesystem while doing memory reclaim.
|
||||
* sock_create_lite allocates the sock with GFP_KERNEL. We must
|
||||
* prevent the filesystem from being reentered by memory reclaim.
|
||||
*/
|
||||
noio_flag = memalloc_noio_save();
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
|
||||
BUG_ON(sock == NULL);
|
||||
*more = 0;
|
||||
|
@ -1934,7 +1930,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
|
|||
if (sc)
|
||||
sc_put(sc);
|
||||
|
||||
memalloc_noio_restore(noio_flag);
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1948,7 +1944,6 @@ static void o2net_accept_many(struct work_struct *work)
|
|||
{
|
||||
struct socket *sock = o2net_listen_sock;
|
||||
int more;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* It is critical to note that due to interrupt moderation
|
||||
|
@ -1963,7 +1958,7 @@ static void o2net_accept_many(struct work_struct *work)
|
|||
*/
|
||||
|
||||
for (;;) {
|
||||
err = o2net_accept_one(sock, &more);
|
||||
o2net_accept_one(sock, &more);
|
||||
if (!more)
|
||||
break;
|
||||
cond_resched();
|
||||
|
|
|
@ -32,7 +32,7 @@ struct o2net_msg
|
|||
__be32 status;
|
||||
__be32 key;
|
||||
__be32 msg_num;
|
||||
__u8 buf[0];
|
||||
__u8 buf[];
|
||||
};
|
||||
|
||||
typedef int (o2net_msg_handler_func)(struct o2net_msg *msg, u32 len, void *data,
|
||||
|
|
|
@ -676,7 +676,7 @@ static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
|
|||
int ra_ptr = 0; /* Current index into readahead
|
||||
buffer */
|
||||
int num = 0;
|
||||
int nblocks, i, err;
|
||||
int nblocks, i;
|
||||
|
||||
sb = dir->i_sb;
|
||||
|
||||
|
@ -708,7 +708,7 @@ static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
|
|||
num++;
|
||||
|
||||
bh = NULL;
|
||||
err = ocfs2_read_dir_block(dir, b++, &bh,
|
||||
ocfs2_read_dir_block(dir, b++, &bh,
|
||||
OCFS2_BH_READAHEAD);
|
||||
bh_use[ra_max] = bh;
|
||||
}
|
||||
|
|
|
@ -564,7 +564,7 @@ struct dlm_migratable_lockres
|
|||
// 48 bytes
|
||||
u8 lvb[DLM_LVB_LEN];
|
||||
// 112 bytes
|
||||
struct dlm_migratable_lock ml[0]; // 16 bytes each, begins at byte 112
|
||||
struct dlm_migratable_lock ml[]; // 16 bytes each, begins at byte 112
|
||||
};
|
||||
#define DLM_MIG_LOCKRES_MAX_LEN \
|
||||
(sizeof(struct dlm_migratable_lockres) + \
|
||||
|
@ -601,7 +601,7 @@ struct dlm_convert_lock
|
|||
|
||||
u8 name[O2NM_MAX_NAME_LEN];
|
||||
|
||||
s8 lvb[0];
|
||||
s8 lvb[];
|
||||
};
|
||||
#define DLM_CONVERT_LOCK_MAX_LEN (sizeof(struct dlm_convert_lock)+DLM_LVB_LEN)
|
||||
|
||||
|
@ -616,7 +616,7 @@ struct dlm_unlock_lock
|
|||
|
||||
u8 name[O2NM_MAX_NAME_LEN];
|
||||
|
||||
s8 lvb[0];
|
||||
s8 lvb[];
|
||||
};
|
||||
#define DLM_UNLOCK_LOCK_MAX_LEN (sizeof(struct dlm_unlock_lock)+DLM_LVB_LEN)
|
||||
|
||||
|
@ -632,7 +632,7 @@ struct dlm_proxy_ast
|
|||
|
||||
u8 name[O2NM_MAX_NAME_LEN];
|
||||
|
||||
s8 lvb[0];
|
||||
s8 lvb[];
|
||||
};
|
||||
#define DLM_PROXY_AST_MAX_LEN (sizeof(struct dlm_proxy_ast)+DLM_LVB_LEN)
|
||||
|
||||
|
|
|
@ -244,11 +244,11 @@ static int stringify_lockname(const char *lockname, int locklen, char *buf,
|
|||
memcpy((__be64 *)&inode_blkno_be,
|
||||
(char *)&lockname[OCFS2_DENTRY_LOCK_INO_START],
|
||||
sizeof(__be64));
|
||||
out += snprintf(buf + out, len - out, "%.*s%08x",
|
||||
out += scnprintf(buf + out, len - out, "%.*s%08x",
|
||||
OCFS2_DENTRY_LOCK_INO_START - 1, lockname,
|
||||
(unsigned int)be64_to_cpu(inode_blkno_be));
|
||||
} else
|
||||
out += snprintf(buf + out, len - out, "%.*s",
|
||||
out += scnprintf(buf + out, len - out, "%.*s",
|
||||
locklen, lockname);
|
||||
return out;
|
||||
}
|
||||
|
@ -260,7 +260,7 @@ static int stringify_nodemap(unsigned long *nodemap, int maxnodes,
|
|||
int i = -1;
|
||||
|
||||
while ((i = find_next_bit(nodemap, maxnodes, i + 1)) < maxnodes)
|
||||
out += snprintf(buf + out, len - out, "%d ", i);
|
||||
out += scnprintf(buf + out, len - out, "%d ", i);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
@ -278,34 +278,34 @@ static int dump_mle(struct dlm_master_list_entry *mle, char *buf, int len)
|
|||
mle_type = "MIG";
|
||||
|
||||
out += stringify_lockname(mle->mname, mle->mnamelen, buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"\t%3s\tmas=%3u\tnew=%3u\tevt=%1d\tuse=%1d\tref=%3d\n",
|
||||
mle_type, mle->master, mle->new_master,
|
||||
!list_empty(&mle->hb_events),
|
||||
!!mle->inuse,
|
||||
kref_read(&mle->mle_refs));
|
||||
|
||||
out += snprintf(buf + out, len - out, "Maybe=");
|
||||
out += scnprintf(buf + out, len - out, "Maybe=");
|
||||
out += stringify_nodemap(mle->maybe_map, O2NM_MAX_NODES,
|
||||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
out += scnprintf(buf + out, len - out, "\n");
|
||||
|
||||
out += snprintf(buf + out, len - out, "Vote=");
|
||||
out += scnprintf(buf + out, len - out, "Vote=");
|
||||
out += stringify_nodemap(mle->vote_map, O2NM_MAX_NODES,
|
||||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
out += scnprintf(buf + out, len - out, "\n");
|
||||
|
||||
out += snprintf(buf + out, len - out, "Response=");
|
||||
out += scnprintf(buf + out, len - out, "Response=");
|
||||
out += stringify_nodemap(mle->response_map, O2NM_MAX_NODES,
|
||||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
out += scnprintf(buf + out, len - out, "\n");
|
||||
|
||||
out += snprintf(buf + out, len - out, "Node=");
|
||||
out += scnprintf(buf + out, len - out, "Node=");
|
||||
out += stringify_nodemap(mle->node_map, O2NM_MAX_NODES,
|
||||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
out += scnprintf(buf + out, len - out, "\n");
|
||||
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
out += scnprintf(buf + out, len - out, "\n");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
@ -353,7 +353,7 @@ static int debug_purgelist_print(struct dlm_ctxt *dlm, char *buf, int len)
|
|||
int out = 0;
|
||||
unsigned long total = 0;
|
||||
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"Dumping Purgelist for Domain: %s\n", dlm->name);
|
||||
|
||||
spin_lock(&dlm->spinlock);
|
||||
|
@ -365,13 +365,13 @@ static int debug_purgelist_print(struct dlm_ctxt *dlm, char *buf, int len)
|
|||
out += stringify_lockname(res->lockname.name,
|
||||
res->lockname.len,
|
||||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\t%ld\n",
|
||||
out += scnprintf(buf + out, len - out, "\t%ld\n",
|
||||
(jiffies - res->last_used)/HZ);
|
||||
spin_unlock(&res->spinlock);
|
||||
}
|
||||
spin_unlock(&dlm->spinlock);
|
||||
|
||||
out += snprintf(buf + out, len - out, "Total on list: %lu\n", total);
|
||||
out += scnprintf(buf + out, len - out, "Total on list: %lu\n", total);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
@ -410,7 +410,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len)
|
|||
int i, out = 0;
|
||||
unsigned long total = 0, longest = 0, bucket_count = 0;
|
||||
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"Dumping MLEs for Domain: %s\n", dlm->name);
|
||||
|
||||
spin_lock(&dlm->master_lock);
|
||||
|
@ -428,7 +428,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len)
|
|||
}
|
||||
spin_unlock(&dlm->master_lock);
|
||||
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"Total: %lu, Longest: %lu\n", total, longest);
|
||||
return out;
|
||||
}
|
||||
|
@ -467,7 +467,7 @@ static int dump_lock(struct dlm_lock *lock, int list_type, char *buf, int len)
|
|||
|
||||
#define DEBUG_LOCK_VERSION 1
|
||||
spin_lock(&lock->spinlock);
|
||||
out = snprintf(buf, len, "LOCK:%d,%d,%d,%d,%d,%d:%lld,%d,%d,%d,%d,%d,"
|
||||
out = scnprintf(buf, len, "LOCK:%d,%d,%d,%d,%d,%d:%lld,%d,%d,%d,%d,%d,"
|
||||
"%d,%d,%d,%d\n",
|
||||
DEBUG_LOCK_VERSION,
|
||||
list_type, lock->ml.type, lock->ml.convert_type,
|
||||
|
@ -491,13 +491,13 @@ static int dump_lockres(struct dlm_lock_resource *res, char *buf, int len)
|
|||
int i;
|
||||
int out = 0;
|
||||
|
||||
out += snprintf(buf + out, len - out, "NAME:");
|
||||
out += scnprintf(buf + out, len - out, "NAME:");
|
||||
out += stringify_lockname(res->lockname.name, res->lockname.len,
|
||||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
out += scnprintf(buf + out, len - out, "\n");
|
||||
|
||||
#define DEBUG_LRES_VERSION 1
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"LRES:%d,%d,%d,%ld,%d,%d,%d,%d,%d,%d,%d\n",
|
||||
DEBUG_LRES_VERSION,
|
||||
res->owner, res->state, res->last_used,
|
||||
|
@ -509,17 +509,17 @@ static int dump_lockres(struct dlm_lock_resource *res, char *buf, int len)
|
|||
kref_read(&res->refs));
|
||||
|
||||
/* refmap */
|
||||
out += snprintf(buf + out, len - out, "RMAP:");
|
||||
out += scnprintf(buf + out, len - out, "RMAP:");
|
||||
out += stringify_nodemap(res->refmap, O2NM_MAX_NODES,
|
||||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
out += scnprintf(buf + out, len - out, "\n");
|
||||
|
||||
/* lvb */
|
||||
out += snprintf(buf + out, len - out, "LVBX:");
|
||||
out += scnprintf(buf + out, len - out, "LVBX:");
|
||||
for (i = 0; i < DLM_LVB_LEN; i++)
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"%02x", (unsigned char)res->lvb[i]);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
out += scnprintf(buf + out, len - out, "\n");
|
||||
|
||||
/* granted */
|
||||
list_for_each_entry(lock, &res->granted, list)
|
||||
|
@ -533,7 +533,7 @@ static int dump_lockres(struct dlm_lock_resource *res, char *buf, int len)
|
|||
list_for_each_entry(lock, &res->blocked, list)
|
||||
out += dump_lock(lock, 2, buf + out, len - out);
|
||||
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
out += scnprintf(buf + out, len - out, "\n");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
@ -683,41 +683,41 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len)
|
|||
}
|
||||
|
||||
/* Domain: xxxxxxxxxx Key: 0xdfbac769 */
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"Domain: %s Key: 0x%08x Protocol: %d.%d\n",
|
||||
dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major,
|
||||
dlm->dlm_locking_proto.pv_minor);
|
||||
|
||||
/* Thread Pid: xxx Node: xxx State: xxxxx */
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"Thread Pid: %d Node: %d State: %s\n",
|
||||
task_pid_nr(dlm->dlm_thread_task), dlm->node_num, state);
|
||||
|
||||
/* Number of Joins: xxx Joining Node: xxx */
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"Number of Joins: %d Joining Node: %d\n",
|
||||
dlm->num_joins, dlm->joining_node);
|
||||
|
||||
/* Domain Map: xx xx xx */
|
||||
out += snprintf(buf + out, len - out, "Domain Map: ");
|
||||
out += scnprintf(buf + out, len - out, "Domain Map: ");
|
||||
out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES,
|
||||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
out += scnprintf(buf + out, len - out, "\n");
|
||||
|
||||
/* Exit Domain Map: xx xx xx */
|
||||
out += snprintf(buf + out, len - out, "Exit Domain Map: ");
|
||||
out += scnprintf(buf + out, len - out, "Exit Domain Map: ");
|
||||
out += stringify_nodemap(dlm->exit_domain_map, O2NM_MAX_NODES,
|
||||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
out += scnprintf(buf + out, len - out, "\n");
|
||||
|
||||
/* Live Map: xx xx xx */
|
||||
out += snprintf(buf + out, len - out, "Live Map: ");
|
||||
out += scnprintf(buf + out, len - out, "Live Map: ");
|
||||
out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES,
|
||||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
out += scnprintf(buf + out, len - out, "\n");
|
||||
|
||||
/* Lock Resources: xxx (xxx) */
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"Lock Resources: %d (%d)\n",
|
||||
atomic_read(&dlm->res_cur_count),
|
||||
atomic_read(&dlm->res_tot_count));
|
||||
|
@ -729,29 +729,29 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len)
|
|||
cur_mles += atomic_read(&dlm->mle_cur_count[i]);
|
||||
|
||||
/* MLEs: xxx (xxx) */
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"MLEs: %d (%d)\n", cur_mles, tot_mles);
|
||||
|
||||
/* Blocking: xxx (xxx) */
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
" Blocking: %d (%d)\n",
|
||||
atomic_read(&dlm->mle_cur_count[DLM_MLE_BLOCK]),
|
||||
atomic_read(&dlm->mle_tot_count[DLM_MLE_BLOCK]));
|
||||
|
||||
/* Mastery: xxx (xxx) */
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
" Mastery: %d (%d)\n",
|
||||
atomic_read(&dlm->mle_cur_count[DLM_MLE_MASTER]),
|
||||
atomic_read(&dlm->mle_tot_count[DLM_MLE_MASTER]));
|
||||
|
||||
/* Migration: xxx (xxx) */
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
" Migration: %d (%d)\n",
|
||||
atomic_read(&dlm->mle_cur_count[DLM_MLE_MIGRATION]),
|
||||
atomic_read(&dlm->mle_tot_count[DLM_MLE_MIGRATION]));
|
||||
|
||||
/* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"Lists: Dirty=%s Purge=%s PendingASTs=%s "
|
||||
"PendingBASTs=%s\n",
|
||||
(list_empty(&dlm->dirty_list) ? "Empty" : "InUse"),
|
||||
|
@ -760,12 +760,12 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len)
|
|||
(list_empty(&dlm->pending_basts) ? "Empty" : "InUse"));
|
||||
|
||||
/* Purge Count: xxx Refs: xxx */
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"Purge Count: %d Refs: %d\n", dlm->purge_count,
|
||||
kref_read(&dlm->dlm_refs));
|
||||
|
||||
/* Dead Node: xxx */
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"Dead Node: %d\n", dlm->reco.dead_node);
|
||||
|
||||
/* What about DLM_RECO_STATE_FINALIZE? */
|
||||
|
@ -775,19 +775,19 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len)
|
|||
state = "INACTIVE";
|
||||
|
||||
/* Recovery Pid: xxxx Master: xxx State: xxxx */
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"Recovery Pid: %d Master: %d State: %s\n",
|
||||
task_pid_nr(dlm->dlm_reco_thread_task),
|
||||
dlm->reco.new_master, state);
|
||||
|
||||
/* Recovery Map: xx xx */
|
||||
out += snprintf(buf + out, len - out, "Recovery Map: ");
|
||||
out += scnprintf(buf + out, len - out, "Recovery Map: ");
|
||||
out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES,
|
||||
buf + out, len - out);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
out += scnprintf(buf + out, len - out, "\n");
|
||||
|
||||
/* Recovery Node State: */
|
||||
out += snprintf(buf + out, len - out, "Recovery Node State:\n");
|
||||
out += scnprintf(buf + out, len - out, "Recovery Node State:\n");
|
||||
list_for_each_entry(node, &dlm->reco.node_data, list) {
|
||||
switch (node->state) {
|
||||
case DLM_RECO_NODE_DATA_INIT:
|
||||
|
@ -815,7 +815,7 @@ static int debug_state_print(struct dlm_ctxt *dlm, char *buf, int len)
|
|||
state = "BAD";
|
||||
break;
|
||||
}
|
||||
out += snprintf(buf + out, len - out, "\t%u - %s\n",
|
||||
out += scnprintf(buf + out, len - out, "\t%u - %s\n",
|
||||
node->node_num, state);
|
||||
}
|
||||
|
||||
|
|
|
@ -2749,8 +2749,6 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
|
|||
return ret;
|
||||
}
|
||||
|
||||
#define DLM_MIGRATION_RETRY_MS 100
|
||||
|
||||
/*
|
||||
* Should be called only after beginning the domain leave process.
|
||||
* There should not be any remaining locks on nonlocal lock resources,
|
||||
|
|
|
@ -39,8 +39,6 @@
|
|||
static int dlm_thread(void *data);
|
||||
static void dlm_flush_asts(struct dlm_ctxt *dlm);
|
||||
|
||||
#define dlm_lock_is_remote(dlm, lock) ((lock)->ml.node != (dlm)->node_num)
|
||||
|
||||
/* will exit holding res->spinlock, but may drop in function */
|
||||
/* waits until flags are cleared on res->state */
|
||||
void __dlm_wait_on_lockres_flags(struct dlm_lock_resource *res, int flags)
|
||||
|
@ -680,7 +678,6 @@ static void dlm_flush_asts(struct dlm_ctxt *dlm)
|
|||
|
||||
#define DLM_THREAD_TIMEOUT_MS (4 * 1000)
|
||||
#define DLM_THREAD_MAX_DIRTY 100
|
||||
#define DLM_THREAD_MAX_ASTS 10
|
||||
|
||||
static int dlm_thread(void *data)
|
||||
{
|
||||
|
|
|
@ -2133,7 +2133,7 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
|
|||
}
|
||||
|
||||
#define OCFS2_SEC_BITS 34
|
||||
#define OCFS2_SEC_SHIFT (64 - 34)
|
||||
#define OCFS2_SEC_SHIFT (64 - OCFS2_SEC_BITS)
|
||||
#define OCFS2_NSEC_MASK ((1ULL << OCFS2_SEC_SHIFT) - 1)
|
||||
|
||||
/* LVB only has room for 64 bits of time here so we pack it for
|
||||
|
|
|
@ -91,7 +91,7 @@ enum ocfs2_replay_state {
|
|||
struct ocfs2_replay_map {
|
||||
unsigned int rm_slots;
|
||||
enum ocfs2_replay_state rm_state;
|
||||
unsigned char rm_replay_slots[0];
|
||||
unsigned char rm_replay_slots[];
|
||||
};
|
||||
|
||||
static void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
|
||||
|
|
|
@ -406,7 +406,7 @@ static int ocfs2_mknod(struct inode *dir,
|
|||
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto leave;
|
||||
goto roll_back;
|
||||
}
|
||||
|
||||
if (si.enable) {
|
||||
|
@ -414,7 +414,7 @@ static int ocfs2_mknod(struct inode *dir,
|
|||
meta_ac, data_ac);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto leave;
|
||||
goto roll_back;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -427,7 +427,7 @@ static int ocfs2_mknod(struct inode *dir,
|
|||
OCFS2_I(dir)->ip_blkno);
|
||||
if (status) {
|
||||
mlog_errno(status);
|
||||
goto leave;
|
||||
goto roll_back;
|
||||
}
|
||||
|
||||
dl = dentry->d_fsdata;
|
||||
|
@ -437,12 +437,19 @@ static int ocfs2_mknod(struct inode *dir,
|
|||
&lookup);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto leave;
|
||||
goto roll_back;
|
||||
}
|
||||
|
||||
insert_inode_hash(inode);
|
||||
d_instantiate(dentry, inode);
|
||||
status = 0;
|
||||
|
||||
roll_back:
|
||||
if (status < 0 && S_ISDIR(mode)) {
|
||||
ocfs2_add_links_count(dirfe, -1);
|
||||
drop_nlink(dir);
|
||||
}
|
||||
|
||||
leave:
|
||||
if (status < 0 && did_quota_inode)
|
||||
dquot_free_inode(inode);
|
||||
|
|
|
@ -470,7 +470,7 @@ struct ocfs2_extent_list {
|
|||
__le16 l_reserved1;
|
||||
__le64 l_reserved2; /* Pad to
|
||||
sizeof(ocfs2_extent_rec) */
|
||||
/*10*/ struct ocfs2_extent_rec l_recs[0]; /* Extent records */
|
||||
/*10*/ struct ocfs2_extent_rec l_recs[]; /* Extent records */
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -484,7 +484,7 @@ struct ocfs2_chain_list {
|
|||
__le16 cl_count; /* Total chains in this list */
|
||||
__le16 cl_next_free_rec; /* Next unused chain slot */
|
||||
__le64 cl_reserved1;
|
||||
/*10*/ struct ocfs2_chain_rec cl_recs[0]; /* Chain records */
|
||||
/*10*/ struct ocfs2_chain_rec cl_recs[]; /* Chain records */
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -496,7 +496,7 @@ struct ocfs2_truncate_log {
|
|||
/*00*/ __le16 tl_count; /* Total records in this log */
|
||||
__le16 tl_used; /* Number of records in use */
|
||||
__le32 tl_reserved1;
|
||||
/*08*/ struct ocfs2_truncate_rec tl_recs[0]; /* Truncate records */
|
||||
/*08*/ struct ocfs2_truncate_rec tl_recs[]; /* Truncate records */
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -640,7 +640,7 @@ struct ocfs2_local_alloc
|
|||
__le16 la_size; /* Size of included bitmap, in bytes */
|
||||
__le16 la_reserved1;
|
||||
__le64 la_reserved2;
|
||||
/*10*/ __u8 la_bitmap[0];
|
||||
/*10*/ __u8 la_bitmap[];
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -653,7 +653,7 @@ struct ocfs2_inline_data
|
|||
* for data, starting at id_data */
|
||||
__le16 id_reserved0;
|
||||
__le32 id_reserved1;
|
||||
__u8 id_data[0]; /* Start of user data */
|
||||
__u8 id_data[]; /* Start of user data */
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -798,7 +798,7 @@ struct ocfs2_dx_entry_list {
|
|||
* possible in de_entries */
|
||||
__le16 de_num_used; /* Current number of
|
||||
* de_entries entries */
|
||||
struct ocfs2_dx_entry de_entries[0]; /* Indexed dir entries
|
||||
struct ocfs2_dx_entry de_entries[]; /* Indexed dir entries
|
||||
* in a packed array of
|
||||
* length de_num_used */
|
||||
};
|
||||
|
@ -935,7 +935,7 @@ struct ocfs2_refcount_list {
|
|||
__le16 rl_used; /* Current number of used records */
|
||||
__le32 rl_reserved2;
|
||||
__le64 rl_reserved1; /* Pad to sizeof(ocfs2_refcount_record) */
|
||||
/*10*/ struct ocfs2_refcount_rec rl_recs[0]; /* Refcount records */
|
||||
/*10*/ struct ocfs2_refcount_rec rl_recs[]; /* Refcount records */
|
||||
};
|
||||
|
||||
|
||||
|
@ -1021,7 +1021,7 @@ struct ocfs2_xattr_header {
|
|||
buckets. A block uses
|
||||
xb_check and sets
|
||||
this field to zero.) */
|
||||
struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */
|
||||
struct ocfs2_xattr_entry xh_entries[]; /* xattr entry list. */
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -1207,7 +1207,7 @@ struct ocfs2_local_disk_dqinfo {
|
|||
/* Header of one chunk of a quota file */
|
||||
struct ocfs2_local_disk_chunk {
|
||||
__le32 dqc_free; /* Number of free entries in the bitmap */
|
||||
__u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding
|
||||
__u8 dqc_bitmap[]; /* Bitmap of entries in the corresponding
|
||||
* chunk of quota file */
|
||||
};
|
||||
|
||||
|
|
|
@ -154,6 +154,7 @@ ocfs2_refcount_cache_get_super(struct ocfs2_caching_info *ci)
|
|||
}
|
||||
|
||||
static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci)
|
||||
__acquires(&rf->rf_lock)
|
||||
{
|
||||
struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
|
||||
|
||||
|
@ -161,6 +162,7 @@ static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci)
|
|||
}
|
||||
|
||||
static void ocfs2_refcount_cache_unlock(struct ocfs2_caching_info *ci)
|
||||
__releases(&rf->rf_lock)
|
||||
{
|
||||
struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
|
||||
|
||||
|
|
|
@ -33,9 +33,6 @@
|
|||
|
||||
static DEFINE_SPINLOCK(resv_lock);
|
||||
|
||||
#define OCFS2_MIN_RESV_WINDOW_BITS 8
|
||||
#define OCFS2_MAX_RESV_WINDOW_BITS 1024
|
||||
|
||||
int ocfs2_dir_resv_allowed(struct ocfs2_super *osb)
|
||||
{
|
||||
return (osb->osb_resv_level && osb->osb_dir_resv_level);
|
||||
|
|
|
@ -656,8 +656,6 @@ static int ocfs2_sysfs_init(void)
|
|||
* and easier to preserve the name.
|
||||
*/
|
||||
|
||||
#define FS_OCFS2_NM 1
|
||||
|
||||
static struct ctl_table ocfs2_nm_table[] = {
|
||||
{
|
||||
.procname = "hb_ctl_path",
|
||||
|
|
|
@ -2509,9 +2509,6 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
|
|||
|
||||
bail:
|
||||
brelse(group_bh);
|
||||
|
||||
if (status)
|
||||
mlog_errno(status);
|
||||
return status;
|
||||
}
|
||||
|
||||
|
@ -2582,8 +2579,6 @@ static int _ocfs2_free_clusters(handle_t *handle,
|
|||
num_clusters);
|
||||
|
||||
out:
|
||||
if (status)
|
||||
mlog_errno(status);
|
||||
return status;
|
||||
}
|
||||
|
||||
|
|
|
@ -220,31 +220,31 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
|
|||
int i, out = 0;
|
||||
unsigned long flags;
|
||||
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n",
|
||||
"Device", osb->dev_str, osb->uuid_str,
|
||||
osb->fs_generation, osb->vol_label);
|
||||
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"%10s => State: %d Flags: 0x%lX\n", "Volume",
|
||||
atomic_read(&osb->vol_state), osb->osb_flags);
|
||||
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"%10s => Block: %lu Cluster: %d\n", "Sizes",
|
||||
osb->sb->s_blocksize, osb->s_clustersize);
|
||||
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"%10s => Compat: 0x%X Incompat: 0x%X "
|
||||
"ROcompat: 0x%X\n",
|
||||
"Features", osb->s_feature_compat,
|
||||
osb->s_feature_incompat, osb->s_feature_ro_compat);
|
||||
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"%10s => Opts: 0x%lX AtimeQuanta: %u\n", "Mount",
|
||||
osb->s_mount_opt, osb->s_atime_quantum);
|
||||
|
||||
if (cconn) {
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"%10s => Stack: %s Name: %*s "
|
||||
"Version: %d.%d\n", "Cluster",
|
||||
(*osb->osb_cluster_stack == '\0' ?
|
||||
|
@ -255,7 +255,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
|
|||
}
|
||||
|
||||
spin_lock_irqsave(&osb->dc_task_lock, flags);
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"%10s => Pid: %d Count: %lu WakeSeq: %lu "
|
||||
"WorkSeq: %lu\n", "DownCnvt",
|
||||
(osb->dc_task ? task_pid_nr(osb->dc_task) : -1),
|
||||
|
@ -264,32 +264,32 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
|
|||
spin_unlock_irqrestore(&osb->dc_task_lock, flags);
|
||||
|
||||
spin_lock(&osb->osb_lock);
|
||||
out += snprintf(buf + out, len - out, "%10s => Pid: %d Nodes:",
|
||||
out += scnprintf(buf + out, len - out, "%10s => Pid: %d Nodes:",
|
||||
"Recovery",
|
||||
(osb->recovery_thread_task ?
|
||||
task_pid_nr(osb->recovery_thread_task) : -1));
|
||||
if (rm->rm_used == 0)
|
||||
out += snprintf(buf + out, len - out, " None\n");
|
||||
out += scnprintf(buf + out, len - out, " None\n");
|
||||
else {
|
||||
for (i = 0; i < rm->rm_used; i++)
|
||||
out += snprintf(buf + out, len - out, " %d",
|
||||
out += scnprintf(buf + out, len - out, " %d",
|
||||
rm->rm_entries[i]);
|
||||
out += snprintf(buf + out, len - out, "\n");
|
||||
out += scnprintf(buf + out, len - out, "\n");
|
||||
}
|
||||
spin_unlock(&osb->osb_lock);
|
||||
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"%10s => Pid: %d Interval: %lu\n", "Commit",
|
||||
(osb->commit_task ? task_pid_nr(osb->commit_task) : -1),
|
||||
osb->osb_commit_interval);
|
||||
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"%10s => State: %d TxnId: %lu NumTxns: %d\n",
|
||||
"Journal", osb->journal->j_state,
|
||||
osb->journal->j_trans_id,
|
||||
atomic_read(&osb->journal->j_num_trans));
|
||||
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"%10s => GlobalAllocs: %d LocalAllocs: %d "
|
||||
"SubAllocs: %d LAWinMoves: %d SAExtends: %d\n",
|
||||
"Stats",
|
||||
|
@ -299,7 +299,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
|
|||
atomic_read(&osb->alloc_stats.moves),
|
||||
atomic_read(&osb->alloc_stats.bg_extends));
|
||||
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"%10s => State: %u Descriptor: %llu Size: %u bits "
|
||||
"Default: %u bits\n",
|
||||
"LocalAlloc", osb->local_alloc_state,
|
||||
|
@ -307,7 +307,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
|
|||
osb->local_alloc_bits, osb->local_alloc_default_bits);
|
||||
|
||||
spin_lock(&osb->osb_lock);
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"%10s => InodeSlot: %d StolenInodes: %d, "
|
||||
"MetaSlot: %d StolenMeta: %d\n", "Steal",
|
||||
osb->s_inode_steal_slot,
|
||||
|
@ -316,20 +316,20 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
|
|||
atomic_read(&osb->s_num_meta_stolen));
|
||||
spin_unlock(&osb->osb_lock);
|
||||
|
||||
out += snprintf(buf + out, len - out, "OrphanScan => ");
|
||||
out += snprintf(buf + out, len - out, "Local: %u Global: %u ",
|
||||
out += scnprintf(buf + out, len - out, "OrphanScan => ");
|
||||
out += scnprintf(buf + out, len - out, "Local: %u Global: %u ",
|
||||
os->os_count, os->os_seqno);
|
||||
out += snprintf(buf + out, len - out, " Last Scan: ");
|
||||
out += scnprintf(buf + out, len - out, " Last Scan: ");
|
||||
if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE)
|
||||
out += snprintf(buf + out, len - out, "Disabled\n");
|
||||
out += scnprintf(buf + out, len - out, "Disabled\n");
|
||||
else
|
||||
out += snprintf(buf + out, len - out, "%lu seconds ago\n",
|
||||
out += scnprintf(buf + out, len - out, "%lu seconds ago\n",
|
||||
(unsigned long)(ktime_get_seconds() - os->os_scantime));
|
||||
|
||||
out += snprintf(buf + out, len - out, "%10s => %3s %10s\n",
|
||||
out += scnprintf(buf + out, len - out, "%10s => %3s %10s\n",
|
||||
"Slots", "Num", "RecoGen");
|
||||
for (i = 0; i < osb->max_slots; ++i) {
|
||||
out += snprintf(buf + out, len - out,
|
||||
out += scnprintf(buf + out, len - out,
|
||||
"%10s %c %3d %10d\n",
|
||||
" ",
|
||||
(i == osb->slot_num ? '*' : ' '),
|
||||
|
|
|
@ -146,7 +146,7 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
|
|||
struct page *page = buf->page;
|
||||
|
||||
if (page_count(page) == 1) {
|
||||
memcg_kmem_uncharge(page, 0);
|
||||
memcg_kmem_uncharge_page(page, 0);
|
||||
__SetPageLocked(page);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -334,6 +334,30 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/* Should pair with userfaultfd_signal_pending() */
|
||||
static inline long userfaultfd_get_blocking_state(unsigned int flags)
|
||||
{
|
||||
if (flags & FAULT_FLAG_INTERRUPTIBLE)
|
||||
return TASK_INTERRUPTIBLE;
|
||||
|
||||
if (flags & FAULT_FLAG_KILLABLE)
|
||||
return TASK_KILLABLE;
|
||||
|
||||
return TASK_UNINTERRUPTIBLE;
|
||||
}
|
||||
|
||||
/* Should pair with userfaultfd_get_blocking_state() */
|
||||
static inline bool userfaultfd_signal_pending(unsigned int flags)
|
||||
{
|
||||
if (flags & FAULT_FLAG_INTERRUPTIBLE)
|
||||
return signal_pending(current);
|
||||
|
||||
if (flags & FAULT_FLAG_KILLABLE)
|
||||
return fatal_signal_pending(current);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* The locking rules involved in returning VM_FAULT_RETRY depending on
|
||||
* FAULT_FLAG_ALLOW_RETRY, FAULT_FLAG_RETRY_NOWAIT and
|
||||
|
@ -355,7 +379,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
|
|||
struct userfaultfd_ctx *ctx;
|
||||
struct userfaultfd_wait_queue uwq;
|
||||
vm_fault_t ret = VM_FAULT_SIGBUS;
|
||||
bool must_wait, return_to_userland;
|
||||
bool must_wait;
|
||||
long blocking_state;
|
||||
|
||||
/*
|
||||
|
@ -462,11 +486,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
|
|||
uwq.ctx = ctx;
|
||||
uwq.waken = false;
|
||||
|
||||
return_to_userland =
|
||||
(vmf->flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) ==
|
||||
(FAULT_FLAG_USER|FAULT_FLAG_KILLABLE);
|
||||
blocking_state = return_to_userland ? TASK_INTERRUPTIBLE :
|
||||
TASK_KILLABLE;
|
||||
blocking_state = userfaultfd_get_blocking_state(vmf->flags);
|
||||
|
||||
spin_lock_irq(&ctx->fault_pending_wqh.lock);
|
||||
/*
|
||||
|
@ -492,8 +512,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
|
|||
up_read(&mm->mmap_sem);
|
||||
|
||||
if (likely(must_wait && !READ_ONCE(ctx->released) &&
|
||||
(return_to_userland ? !signal_pending(current) :
|
||||
!fatal_signal_pending(current)))) {
|
||||
!userfaultfd_signal_pending(vmf->flags))) {
|
||||
wake_up_poll(&ctx->fd_wqh, EPOLLIN);
|
||||
schedule();
|
||||
ret |= VM_FAULT_MAJOR;
|
||||
|
@ -515,8 +534,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
|
|||
set_current_state(blocking_state);
|
||||
if (READ_ONCE(uwq.waken) ||
|
||||
READ_ONCE(ctx->released) ||
|
||||
(return_to_userland ? signal_pending(current) :
|
||||
fatal_signal_pending(current)))
|
||||
userfaultfd_signal_pending(vmf->flags))
|
||||
break;
|
||||
schedule();
|
||||
}
|
||||
|
@ -524,30 +542,6 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
|
|||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
if (return_to_userland) {
|
||||
if (signal_pending(current) &&
|
||||
!fatal_signal_pending(current)) {
|
||||
/*
|
||||
* If we got a SIGSTOP or SIGCONT and this is
|
||||
* a normal userland page fault, just let
|
||||
* userland return so the signal will be
|
||||
* handled and gdb debugging works. The page
|
||||
* fault code immediately after we return from
|
||||
* this function is going to release the
|
||||
* mmap_sem and it's not depending on it
|
||||
* (unlike gup would if we were not to return
|
||||
* VM_FAULT_RETRY).
|
||||
*
|
||||
* If a fatal signal is pending we still take
|
||||
* the streamlined VM_FAULT_RETRY failure path
|
||||
* and there's no need to retake the mmap_sem
|
||||
* in such case.
|
||||
*/
|
||||
down_read(&mm->mmap_sem);
|
||||
ret = VM_FAULT_NOPAGE;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Here we race with the list_del; list_add in
|
||||
* userfaultfd_ctx_read(), however because we don't ever run
|
||||
|
|
|
@ -4,6 +4,58 @@
|
|||
# (This file is not included when SRCARCH=um since UML borrows several
|
||||
# asm headers from the host architecutre.)
|
||||
|
||||
mandatory-y += atomic.h
|
||||
mandatory-y += barrier.h
|
||||
mandatory-y += bitops.h
|
||||
mandatory-y += bug.h
|
||||
mandatory-y += bugs.h
|
||||
mandatory-y += cacheflush.h
|
||||
mandatory-y += checksum.h
|
||||
mandatory-y += compat.h
|
||||
mandatory-y += current.h
|
||||
mandatory-y += delay.h
|
||||
mandatory-y += device.h
|
||||
mandatory-y += div64.h
|
||||
mandatory-y += dma-contiguous.h
|
||||
mandatory-y += dma-mapping.h
|
||||
mandatory-y += dma.h
|
||||
mandatory-y += emergency-restart.h
|
||||
mandatory-y += exec.h
|
||||
mandatory-y += fb.h
|
||||
mandatory-y += ftrace.h
|
||||
mandatory-y += futex.h
|
||||
mandatory-y += hardirq.h
|
||||
mandatory-y += hw_irq.h
|
||||
mandatory-y += io.h
|
||||
mandatory-y += irq.h
|
||||
mandatory-y += irq_regs.h
|
||||
mandatory-y += irq_work.h
|
||||
mandatory-y += kdebug.h
|
||||
mandatory-y += kmap_types.h
|
||||
mandatory-y += kprobes.h
|
||||
mandatory-y += linkage.h
|
||||
mandatory-y += local.h
|
||||
mandatory-y += mm-arch-hooks.h
|
||||
mandatory-y += mmiowb.h
|
||||
mandatory-y += mmu.h
|
||||
mandatory-y += mmu_context.h
|
||||
mandatory-y += module.h
|
||||
mandatory-y += msi.h
|
||||
mandatory-y += pci.h
|
||||
mandatory-y += percpu.h
|
||||
mandatory-y += pgalloc.h
|
||||
mandatory-y += preempt.h
|
||||
mandatory-y += sections.h
|
||||
mandatory-y += serial.h
|
||||
mandatory-y += shmparam.h
|
||||
mandatory-y += simd.h
|
||||
mandatory-y += switch_to.h
|
||||
mandatory-y += timex.h
|
||||
mandatory-y += tlbflush.h
|
||||
mandatory-y += topology.h
|
||||
mandatory-y += trace_clock.h
|
||||
mandatory-y += uaccess.h
|
||||
mandatory-y += unaligned.h
|
||||
mandatory-y += vga.h
|
||||
mandatory-y += word-at-a-time.h
|
||||
mandatory-y += xor.h
|
||||
|
|
|
@ -94,6 +94,11 @@ enum {
|
|||
* Enable legacy local memory.events.
|
||||
*/
|
||||
CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 5),
|
||||
|
||||
/*
|
||||
* Enable recursive subtree protection
|
||||
*/
|
||||
CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 6),
|
||||
};
|
||||
|
||||
/* cftype->flags */
|
||||
|
|
|
@ -526,6 +526,11 @@ static inline void i_mmap_lock_write(struct address_space *mapping)
|
|||
down_write(&mapping->i_mmap_rwsem);
|
||||
}
|
||||
|
||||
static inline int i_mmap_trylock_write(struct address_space *mapping)
|
||||
{
|
||||
return down_write_trylock(&mapping->i_mmap_rwsem);
|
||||
}
|
||||
|
||||
static inline void i_mmap_unlock_write(struct address_space *mapping)
|
||||
{
|
||||
up_write(&mapping->i_mmap_rwsem);
|
||||
|
|
|
@ -485,6 +485,12 @@ static inline void arch_free_page(struct page *page, int order) { }
|
|||
#ifndef HAVE_ARCH_ALLOC_PAGE
|
||||
static inline void arch_alloc_page(struct page *page, int order) { }
|
||||
#endif
|
||||
#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
|
||||
static inline int arch_make_page_accessible(struct page *page)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct page *
|
||||
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
|
||||
|
|
|
@ -87,8 +87,6 @@ extern struct kobj_attribute shmem_enabled_attr;
|
|||
#define HPAGE_PUD_SIZE ((1UL) << HPAGE_PUD_SHIFT)
|
||||
#define HPAGE_PUD_MASK (~(HPAGE_PUD_SIZE - 1))
|
||||
|
||||
extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
|
||||
|
||||
extern unsigned long transparent_hugepage_flags;
|
||||
|
||||
/*
|
||||
|
@ -100,7 +98,7 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
|
|||
if (vma->vm_flags & VM_NOHUGEPAGE)
|
||||
return false;
|
||||
|
||||
if (is_vma_temporary_stack(vma))
|
||||
if (vma_is_temporary_stack(vma))
|
||||
return false;
|
||||
|
||||
if (test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
|
||||
|
@ -289,7 +287,11 @@ static inline struct list_head *page_deferred_list(struct page *page)
|
|||
#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
|
||||
#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
|
||||
|
||||
#define hpage_nr_pages(x) 1
|
||||
static inline int hpage_nr_pages(struct page *page)
|
||||
{
|
||||
VM_BUG_ON_PAGE(PageTail(page), page);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
|
||||
{
|
||||
|
|
|
@ -46,7 +46,52 @@ struct resv_map {
|
|||
long adds_in_progress;
|
||||
struct list_head region_cache;
|
||||
long region_cache_count;
|
||||
#ifdef CONFIG_CGROUP_HUGETLB
|
||||
/*
|
||||
* On private mappings, the counter to uncharge reservations is stored
|
||||
* here. If these fields are 0, then either the mapping is shared, or
|
||||
* cgroup accounting is disabled for this resv_map.
|
||||
*/
|
||||
struct page_counter *reservation_counter;
|
||||
unsigned long pages_per_hpage;
|
||||
struct cgroup_subsys_state *css;
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* Region tracking -- allows tracking of reservations and instantiated pages
|
||||
* across the pages in a mapping.
|
||||
*
|
||||
* The region data structures are embedded into a resv_map and protected
|
||||
* by a resv_map's lock. The set of regions within the resv_map represent
|
||||
* reservations for huge pages, or huge pages that have already been
|
||||
* instantiated within the map. The from and to elements are huge page
|
||||
* indicies into the associated mapping. from indicates the starting index
|
||||
* of the region. to represents the first index past the end of the region.
|
||||
*
|
||||
* For example, a file region structure with from == 0 and to == 4 represents
|
||||
* four huge pages in a mapping. It is important to note that the to element
|
||||
* represents the first element past the end of the region. This is used in
|
||||
* arithmetic as 4(to) - 0(from) = 4 huge pages in the region.
|
||||
*
|
||||
* Interval notation of the form [from, to) will be used to indicate that
|
||||
* the endpoint from is inclusive and to is exclusive.
|
||||
*/
|
||||
struct file_region {
|
||||
struct list_head link;
|
||||
long from;
|
||||
long to;
|
||||
#ifdef CONFIG_CGROUP_HUGETLB
|
||||
/*
|
||||
* On shared mappings, each reserved region appears as a struct
|
||||
* file_region in resv_map. These fields hold the info needed to
|
||||
* uncharge each reservation.
|
||||
*/
|
||||
struct page_counter *reservation_counter;
|
||||
struct cgroup_subsys_state *css;
|
||||
#endif
|
||||
};
|
||||
|
||||
extern struct resv_map *resv_map_alloc(void);
|
||||
void resv_map_release(struct kref *ref);
|
||||
|
||||
|
@ -109,6 +154,8 @@ u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx);
|
|||
|
||||
pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
|
||||
|
||||
struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage);
|
||||
|
||||
extern int sysctl_hugetlb_shm_group;
|
||||
extern struct list_head huge_boot_pages;
|
||||
|
||||
|
@ -151,6 +198,12 @@ static inline unsigned long hugetlb_total_pages(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static inline struct address_space *hugetlb_page_mapping_lock_write(
|
||||
struct page *hpage)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
|
||||
pte_t *ptep)
|
||||
{
|
||||
|
@ -390,7 +443,10 @@ static inline bool is_file_hugepages(struct file *file)
|
|||
return is_file_shm_hugepages(file);
|
||||
}
|
||||
|
||||
|
||||
static inline struct hstate *hstate_inode(struct inode *i)
|
||||
{
|
||||
return HUGETLBFS_SB(i->i_sb)->hstate;
|
||||
}
|
||||
#else /* !CONFIG_HUGETLBFS */
|
||||
|
||||
#define is_file_hugepages(file) false
|
||||
|
@ -402,6 +458,10 @@ hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag,
|
|||
return ERR_PTR(-ENOSYS);
|
||||
}
|
||||
|
||||
static inline struct hstate *hstate_inode(struct inode *i)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif /* !CONFIG_HUGETLBFS */
|
||||
|
||||
#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
|
||||
|
@ -432,8 +492,8 @@ struct hstate {
|
|||
unsigned int surplus_huge_pages_node[MAX_NUMNODES];
|
||||
#ifdef CONFIG_CGROUP_HUGETLB
|
||||
/* cgroup control files */
|
||||
struct cftype cgroup_files_dfl[5];
|
||||
struct cftype cgroup_files_legacy[5];
|
||||
struct cftype cgroup_files_dfl[7];
|
||||
struct cftype cgroup_files_legacy[9];
|
||||
#endif
|
||||
char name[HSTATE_NAME_LEN];
|
||||
};
|
||||
|
@ -472,11 +532,6 @@ extern unsigned int default_hstate_idx;
|
|||
|
||||
#define default_hstate (hstates[default_hstate_idx])
|
||||
|
||||
static inline struct hstate *hstate_inode(struct inode *i)
|
||||
{
|
||||
return HUGETLBFS_SB(i->i_sb)->hstate;
|
||||
}
|
||||
|
||||
static inline struct hstate *hstate_file(struct file *f)
|
||||
{
|
||||
return hstate_inode(file_inode(f));
|
||||
|
@ -729,11 +784,6 @@ static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct hstate *hstate_inode(struct inode *i)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct hstate *page_hstate(struct page *page)
|
||||
{
|
||||
return NULL;
|
||||
|
|
|
@ -18,34 +18,96 @@
|
|||
#include <linux/mmdebug.h>
|
||||
|
||||
struct hugetlb_cgroup;
|
||||
struct resv_map;
|
||||
struct file_region;
|
||||
|
||||
/*
|
||||
* Minimum page order trackable by hugetlb cgroup.
|
||||
* At least 3 pages are necessary for all the tracking information.
|
||||
* At least 4 pages are necessary for all the tracking information.
|
||||
* The second tail page (hpage[2]) is the fault usage cgroup.
|
||||
* The third tail page (hpage[3]) is the reservation usage cgroup.
|
||||
*/
|
||||
#define HUGETLB_CGROUP_MIN_ORDER 2
|
||||
|
||||
#ifdef CONFIG_CGROUP_HUGETLB
|
||||
enum hugetlb_memory_event {
|
||||
HUGETLB_MAX,
|
||||
HUGETLB_NR_MEMORY_EVENTS,
|
||||
};
|
||||
|
||||
static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
|
||||
struct hugetlb_cgroup {
|
||||
struct cgroup_subsys_state css;
|
||||
|
||||
/*
|
||||
* the counter to account for hugepages from hugetlb.
|
||||
*/
|
||||
struct page_counter hugepage[HUGE_MAX_HSTATE];
|
||||
|
||||
/*
|
||||
* the counter to account for hugepage reservations from hugetlb.
|
||||
*/
|
||||
struct page_counter rsvd_hugepage[HUGE_MAX_HSTATE];
|
||||
|
||||
atomic_long_t events[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
|
||||
atomic_long_t events_local[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
|
||||
|
||||
/* Handle for "hugetlb.events" */
|
||||
struct cgroup_file events_file[HUGE_MAX_HSTATE];
|
||||
|
||||
/* Handle for "hugetlb.events.local" */
|
||||
struct cgroup_file events_local_file[HUGE_MAX_HSTATE];
|
||||
};
|
||||
|
||||
static inline struct hugetlb_cgroup *
|
||||
__hugetlb_cgroup_from_page(struct page *page, bool rsvd)
|
||||
{
|
||||
VM_BUG_ON_PAGE(!PageHuge(page), page);
|
||||
|
||||
if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
|
||||
return NULL;
|
||||
return (struct hugetlb_cgroup *)page[2].private;
|
||||
if (rsvd)
|
||||
return (struct hugetlb_cgroup *)page[3].private;
|
||||
else
|
||||
return (struct hugetlb_cgroup *)page[2].private;
|
||||
}
|
||||
|
||||
static inline
|
||||
int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg)
|
||||
static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
|
||||
{
|
||||
return __hugetlb_cgroup_from_page(page, false);
|
||||
}
|
||||
|
||||
static inline struct hugetlb_cgroup *
|
||||
hugetlb_cgroup_from_page_rsvd(struct page *page)
|
||||
{
|
||||
return __hugetlb_cgroup_from_page(page, true);
|
||||
}
|
||||
|
||||
static inline int __set_hugetlb_cgroup(struct page *page,
|
||||
struct hugetlb_cgroup *h_cg, bool rsvd)
|
||||
{
|
||||
VM_BUG_ON_PAGE(!PageHuge(page), page);
|
||||
|
||||
if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
|
||||
return -1;
|
||||
page[2].private = (unsigned long)h_cg;
|
||||
if (rsvd)
|
||||
page[3].private = (unsigned long)h_cg;
|
||||
else
|
||||
page[2].private = (unsigned long)h_cg;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int set_hugetlb_cgroup(struct page *page,
|
||||
struct hugetlb_cgroup *h_cg)
|
||||
{
|
||||
return __set_hugetlb_cgroup(page, h_cg, false);
|
||||
}
|
||||
|
||||
static inline int set_hugetlb_cgroup_rsvd(struct page *page,
|
||||
struct hugetlb_cgroup *h_cg)
|
||||
{
|
||||
return __set_hugetlb_cgroup(page, h_cg, true);
|
||||
}
|
||||
|
||||
static inline bool hugetlb_cgroup_disabled(void)
|
||||
{
|
||||
return !cgroup_subsys_enabled(hugetlb_cgrp_subsys);
|
||||
|
@ -53,25 +115,67 @@ static inline bool hugetlb_cgroup_disabled(void)
|
|||
|
||||
extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
|
||||
struct hugetlb_cgroup **ptr);
|
||||
extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
|
||||
struct hugetlb_cgroup **ptr);
|
||||
extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
|
||||
struct hugetlb_cgroup *h_cg,
|
||||
struct page *page);
|
||||
extern void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
|
||||
struct hugetlb_cgroup *h_cg,
|
||||
struct page *page);
|
||||
extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
|
||||
struct page *page);
|
||||
extern void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages,
|
||||
struct page *page);
|
||||
|
||||
extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
|
||||
struct hugetlb_cgroup *h_cg);
|
||||
extern void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
|
||||
struct hugetlb_cgroup *h_cg);
|
||||
extern void hugetlb_cgroup_uncharge_counter(struct resv_map *resv,
|
||||
unsigned long start,
|
||||
unsigned long end);
|
||||
|
||||
extern void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
|
||||
struct file_region *rg,
|
||||
unsigned long nr_pages);
|
||||
|
||||
extern void hugetlb_cgroup_file_init(void) __init;
|
||||
extern void hugetlb_cgroup_migrate(struct page *oldhpage,
|
||||
struct page *newhpage);
|
||||
|
||||
#else
|
||||
static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
|
||||
struct file_region *rg,
|
||||
unsigned long nr_pages)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline
|
||||
int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg)
|
||||
static inline struct hugetlb_cgroup *
|
||||
hugetlb_cgroup_from_page_resv(struct page *page)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline struct hugetlb_cgroup *
|
||||
hugetlb_cgroup_from_page_rsvd(struct page *page)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int set_hugetlb_cgroup(struct page *page,
|
||||
struct hugetlb_cgroup *h_cg)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int set_hugetlb_cgroup_rsvd(struct page *page,
|
||||
struct hugetlb_cgroup *h_cg)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
@ -81,28 +185,57 @@ static inline bool hugetlb_cgroup_disabled(void)
|
|||
return true;
|
||||
}
|
||||
|
||||
static inline int
|
||||
hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
|
||||
struct hugetlb_cgroup **ptr)
|
||||
static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
|
||||
struct hugetlb_cgroup **ptr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
|
||||
struct hugetlb_cgroup *h_cg,
|
||||
struct page *page)
|
||||
static inline int hugetlb_cgroup_charge_cgroup_rsvd(int idx,
|
||||
unsigned long nr_pages,
|
||||
struct hugetlb_cgroup **ptr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
|
||||
struct hugetlb_cgroup *h_cg,
|
||||
struct page *page)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, struct page *page)
|
||||
hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
|
||||
struct hugetlb_cgroup *h_cg,
|
||||
struct page *page)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
|
||||
struct page *page)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void hugetlb_cgroup_uncharge_page_rsvd(int idx,
|
||||
unsigned long nr_pages,
|
||||
struct page *page)
|
||||
{
|
||||
}
|
||||
static inline void hugetlb_cgroup_uncharge_cgroup(int idx,
|
||||
unsigned long nr_pages,
|
||||
struct hugetlb_cgroup *h_cg)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
|
||||
struct hugetlb_cgroup *h_cg)
|
||||
hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
|
||||
struct hugetlb_cgroup *h_cg)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void hugetlb_cgroup_uncharge_counter(struct resv_map *resv,
|
||||
unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
@ -190,7 +190,7 @@ void kasan_init_tags(void);
|
|||
|
||||
void *kasan_reset_tag(const void *addr);
|
||||
|
||||
void kasan_report(unsigned long addr, size_t size,
|
||||
bool kasan_report(unsigned long addr, size_t size,
|
||||
bool is_write, unsigned long ip);
|
||||
|
||||
#else /* CONFIG_KASAN_SW_TAGS */
|
||||
|
|
|
@ -165,7 +165,8 @@ extern void __kthread_init_worker(struct kthread_worker *worker,
|
|||
do { \
|
||||
kthread_init_work(&(dwork)->work, (fn)); \
|
||||
timer_setup(&(dwork)->timer, \
|
||||
kthread_delayed_work_timer_fn, 0); \
|
||||
kthread_delayed_work_timer_fn, \
|
||||
TIMER_IRQSAFE); \
|
||||
} while (0)
|
||||
|
||||
int kthread_worker_fn(void *worker_ptr);
|
||||
|
|
|
@ -1367,12 +1367,11 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
|
|||
void memcg_kmem_put_cache(struct kmem_cache *cachep);
|
||||
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order);
|
||||
void __memcg_kmem_uncharge(struct page *page, int order);
|
||||
int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
|
||||
struct mem_cgroup *memcg);
|
||||
void __memcg_kmem_uncharge_memcg(struct mem_cgroup *memcg,
|
||||
unsigned int nr_pages);
|
||||
int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
|
||||
unsigned int nr_pages);
|
||||
void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages);
|
||||
int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order);
|
||||
void __memcg_kmem_uncharge_page(struct page *page, int order);
|
||||
|
||||
extern struct static_key_false memcg_kmem_enabled_key;
|
||||
extern struct workqueue_struct *memcg_kmem_cache_wq;
|
||||
|
@ -1394,32 +1393,33 @@ static inline bool memcg_kmem_enabled(void)
|
|||
return static_branch_unlikely(&memcg_kmem_enabled_key);
|
||||
}
|
||||
|
||||
static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
|
||||
static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp,
|
||||
int order)
|
||||
{
|
||||
if (memcg_kmem_enabled())
|
||||
return __memcg_kmem_charge(page, gfp, order);
|
||||
return __memcg_kmem_charge_page(page, gfp, order);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void memcg_kmem_uncharge(struct page *page, int order)
|
||||
static inline void memcg_kmem_uncharge_page(struct page *page, int order)
|
||||
{
|
||||
if (memcg_kmem_enabled())
|
||||
__memcg_kmem_uncharge(page, order);
|
||||
__memcg_kmem_uncharge_page(page, order);
|
||||
}
|
||||
|
||||
static inline int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp,
|
||||
int order, struct mem_cgroup *memcg)
|
||||
static inline int memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
|
||||
unsigned int nr_pages)
|
||||
{
|
||||
if (memcg_kmem_enabled())
|
||||
return __memcg_kmem_charge_memcg(page, gfp, order, memcg);
|
||||
return __memcg_kmem_charge(memcg, gfp, nr_pages);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void memcg_kmem_uncharge_memcg(struct page *page, int order,
|
||||
struct mem_cgroup *memcg)
|
||||
static inline void memcg_kmem_uncharge(struct mem_cgroup *memcg,
|
||||
unsigned int nr_pages)
|
||||
{
|
||||
if (memcg_kmem_enabled())
|
||||
__memcg_kmem_uncharge_memcg(memcg, 1 << order);
|
||||
__memcg_kmem_uncharge(memcg, nr_pages);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1436,21 +1436,23 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p);
|
|||
|
||||
#else
|
||||
|
||||
static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
|
||||
static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp,
|
||||
int order)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void memcg_kmem_uncharge(struct page *page, int order)
|
||||
static inline void memcg_kmem_uncharge_page(struct page *page, int order)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
|
||||
static inline int __memcg_kmem_charge_page(struct page *page, gfp_t gfp,
|
||||
int order)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void __memcg_kmem_uncharge(struct page *page, int order)
|
||||
static inline void __memcg_kmem_uncharge_page(struct page *page, int order)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
@ -173,34 +173,7 @@ extern int mpol_parse_str(char *str, struct mempolicy **mpol);
|
|||
extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
|
||||
|
||||
/* Check if a vma is migratable */
|
||||
static inline bool vma_migratable(struct vm_area_struct *vma)
|
||||
{
|
||||
if (vma->vm_flags & (VM_IO | VM_PFNMAP))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* DAX device mappings require predictable access latency, so avoid
|
||||
* incurring periodic faults.
|
||||
*/
|
||||
if (vma_is_dax(vma))
|
||||
return false;
|
||||
|
||||
#ifndef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
|
||||
if (vma->vm_flags & VM_HUGETLB)
|
||||
return false;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Migration allocates pages in the highest zone. If we cannot
|
||||
* do so then migration (at least from node to node) is not
|
||||
* possible.
|
||||
*/
|
||||
if (vma->vm_file &&
|
||||
gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping))
|
||||
< policy_zone)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
extern bool vma_migratable(struct vm_area_struct *vma);
|
||||
|
||||
extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
|
||||
extern void mpol_put_task_policy(struct task_struct *);
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include <linux/memremap.h>
|
||||
#include <linux/overflow.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
struct mempolicy;
|
||||
struct anon_vma;
|
||||
|
@ -356,10 +357,12 @@ extern unsigned int kobjsize(const void *objp);
|
|||
|
||||
/*
|
||||
* Special vmas that are non-mergable, non-mlock()able.
|
||||
* Note: mm/huge_memory.c VM_NO_THP depends on this definition.
|
||||
*/
|
||||
#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
|
||||
|
||||
/* This mask prevents VMA from being scanned with khugepaged */
|
||||
#define VM_NO_KHUGEPAGED (VM_SPECIAL | VM_HUGETLB)
|
||||
|
||||
/* This mask defines which mm->def_flags a process can inherit its parent */
|
||||
#define VM_INIT_DEF_MASK VM_NOHUGEPAGE
|
||||
|
||||
|
@ -378,15 +381,75 @@ extern unsigned int kobjsize(const void *objp);
|
|||
*/
|
||||
extern pgprot_t protection_map[16];
|
||||
|
||||
#define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */
|
||||
#define FAULT_FLAG_MKWRITE 0x02 /* Fault was mkwrite of existing pte */
|
||||
#define FAULT_FLAG_ALLOW_RETRY 0x04 /* Retry fault if blocking */
|
||||
#define FAULT_FLAG_RETRY_NOWAIT 0x08 /* Don't drop mmap_sem and wait when retrying */
|
||||
#define FAULT_FLAG_KILLABLE 0x10 /* The fault task is in SIGKILL killable region */
|
||||
#define FAULT_FLAG_TRIED 0x20 /* Second try */
|
||||
#define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */
|
||||
#define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */
|
||||
#define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */
|
||||
/**
|
||||
* Fault flag definitions.
|
||||
*
|
||||
* @FAULT_FLAG_WRITE: Fault was a write fault.
|
||||
* @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE.
|
||||
* @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked.
|
||||
* @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_sem and wait when retrying.
|
||||
* @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region.
|
||||
* @FAULT_FLAG_TRIED: The fault has been tried once.
|
||||
* @FAULT_FLAG_USER: The fault originated in userspace.
|
||||
* @FAULT_FLAG_REMOTE: The fault is not for current task/mm.
|
||||
* @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch.
|
||||
* @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals.
|
||||
*
|
||||
* About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify
|
||||
* whether we would allow page faults to retry by specifying these two
|
||||
* fault flags correctly. Currently there can be three legal combinations:
|
||||
*
|
||||
* (a) ALLOW_RETRY and !TRIED: this means the page fault allows retry, and
|
||||
* this is the first try
|
||||
*
|
||||
* (b) ALLOW_RETRY and TRIED: this means the page fault allows retry, and
|
||||
* we've already tried at least once
|
||||
*
|
||||
* (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry
|
||||
*
|
||||
* The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never
|
||||
* be used. Note that page faults can be allowed to retry for multiple times,
|
||||
* in which case we'll have an initial fault with flags (a) then later on
|
||||
* continuous faults with flags (b). We should always try to detect pending
|
||||
* signals before a retry to make sure the continuous page faults can still be
|
||||
* interrupted if necessary.
|
||||
*/
|
||||
#define FAULT_FLAG_WRITE 0x01
|
||||
#define FAULT_FLAG_MKWRITE 0x02
|
||||
#define FAULT_FLAG_ALLOW_RETRY 0x04
|
||||
#define FAULT_FLAG_RETRY_NOWAIT 0x08
|
||||
#define FAULT_FLAG_KILLABLE 0x10
|
||||
#define FAULT_FLAG_TRIED 0x20
|
||||
#define FAULT_FLAG_USER 0x40
|
||||
#define FAULT_FLAG_REMOTE 0x80
|
||||
#define FAULT_FLAG_INSTRUCTION 0x100
|
||||
#define FAULT_FLAG_INTERRUPTIBLE 0x200
|
||||
|
||||
/*
|
||||
* The default fault flags that should be used by most of the
|
||||
* arch-specific page fault handlers.
|
||||
*/
|
||||
#define FAULT_FLAG_DEFAULT (FAULT_FLAG_ALLOW_RETRY | \
|
||||
FAULT_FLAG_KILLABLE | \
|
||||
FAULT_FLAG_INTERRUPTIBLE)
|
||||
|
||||
/**
|
||||
* fault_flag_allow_retry_first - check ALLOW_RETRY the first time
|
||||
*
|
||||
* This is mostly used for places where we want to try to avoid taking
|
||||
* the mmap_sem for too long a time when waiting for another condition
|
||||
* to change, in which case we can try to be polite to release the
|
||||
* mmap_sem in the first round to avoid potential starvation of other
|
||||
* processes that would also want the mmap_sem.
|
||||
*
|
||||
* Return: true if the page fault allows retry and this is the first
|
||||
* attempt of the fault handling; false otherwise.
|
||||
*/
|
||||
static inline bool fault_flag_allow_retry_first(unsigned int flags)
|
||||
{
|
||||
return (flags & FAULT_FLAG_ALLOW_RETRY) &&
|
||||
(!(flags & FAULT_FLAG_TRIED));
|
||||
}
|
||||
|
||||
#define FAULT_FLAG_TRACE \
|
||||
{ FAULT_FLAG_WRITE, "WRITE" }, \
|
||||
|
@ -397,7 +460,8 @@ extern pgprot_t protection_map[16];
|
|||
{ FAULT_FLAG_TRIED, "TRIED" }, \
|
||||
{ FAULT_FLAG_USER, "USER" }, \
|
||||
{ FAULT_FLAG_REMOTE, "REMOTE" }, \
|
||||
{ FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }
|
||||
{ FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }, \
|
||||
{ FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" }
|
||||
|
||||
/*
|
||||
* vm_fault is filled by the the pagefault handler and passed to the vma's
|
||||
|
@ -541,6 +605,30 @@ static inline bool vma_is_anonymous(struct vm_area_struct *vma)
|
|||
return !vma->vm_ops;
|
||||
}
|
||||
|
||||
static inline bool vma_is_temporary_stack(struct vm_area_struct *vma)
|
||||
{
|
||||
int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
|
||||
|
||||
if (!maybe_stack)
|
||||
return false;
|
||||
|
||||
if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
|
||||
VM_STACK_INCOMPLETE_SETUP)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool vma_is_foreign(struct vm_area_struct *vma)
|
||||
{
|
||||
if (!current->mm)
|
||||
return true;
|
||||
|
||||
if (current->mm != vma->vm_mm)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
#ifdef CONFIG_SHMEM
|
||||
/*
|
||||
* The vma_is_shmem is not inline because it is used only by slow
|
||||
|
@ -770,6 +858,24 @@ static inline unsigned int compound_order(struct page *page)
|
|||
return page[1].compound_order;
|
||||
}
|
||||
|
||||
static inline bool hpage_pincount_available(struct page *page)
|
||||
{
|
||||
/*
|
||||
* Can the page->hpage_pinned_refcount field be used? That field is in
|
||||
* the 3rd page of the compound page, so the smallest (2-page) compound
|
||||
* pages cannot support it.
|
||||
*/
|
||||
page = compound_head(page);
|
||||
return PageCompound(page) && compound_order(page) > 1;
|
||||
}
|
||||
|
||||
static inline int compound_pincount(struct page *page)
|
||||
{
|
||||
VM_BUG_ON_PAGE(!hpage_pincount_available(page), page);
|
||||
page = compound_head(page);
|
||||
return atomic_read(compound_pincount_ptr(page));
|
||||
}
|
||||
|
||||
static inline void set_compound_order(struct page *page, unsigned int order)
|
||||
{
|
||||
page[1].compound_order = order;
|
||||
|
@ -1001,6 +1107,8 @@ static inline void get_page(struct page *page)
|
|||
page_ref_inc(page);
|
||||
}
|
||||
|
||||
bool __must_check try_grab_page(struct page *page, unsigned int flags);
|
||||
|
||||
static inline __must_check bool try_get_page(struct page *page)
|
||||
{
|
||||
page = compound_head(page);
|
||||
|
@ -1029,29 +1137,87 @@ static inline void put_page(struct page *page)
|
|||
__put_page(page);
|
||||
}
|
||||
|
||||
/**
|
||||
* unpin_user_page() - release a gup-pinned page
|
||||
* @page: pointer to page to be released
|
||||
/*
|
||||
* GUP_PIN_COUNTING_BIAS, and the associated functions that use it, overload
|
||||
* the page's refcount so that two separate items are tracked: the original page
|
||||
* reference count, and also a new count of how many pin_user_pages() calls were
|
||||
* made against the page. ("gup-pinned" is another term for the latter).
|
||||
*
|
||||
* Pages that were pinned via pin_user_pages*() must be released via either
|
||||
* unpin_user_page(), or one of the unpin_user_pages*() routines. This is so
|
||||
* that eventually such pages can be separately tracked and uniquely handled. In
|
||||
* particular, interactions with RDMA and filesystems need special handling.
|
||||
* With this scheme, pin_user_pages() becomes special: such pages are marked as
|
||||
* distinct from normal pages. As such, the unpin_user_page() call (and its
|
||||
* variants) must be used in order to release gup-pinned pages.
|
||||
*
|
||||
* unpin_user_page() and put_page() are not interchangeable, despite this early
|
||||
* implementation that makes them look the same. unpin_user_page() calls must
|
||||
* be perfectly matched up with pin*() calls.
|
||||
* Choice of value:
|
||||
*
|
||||
* By making GUP_PIN_COUNTING_BIAS a power of two, debugging of page reference
|
||||
* counts with respect to pin_user_pages() and unpin_user_page() becomes
|
||||
* simpler, due to the fact that adding an even power of two to the page
|
||||
* refcount has the effect of using only the upper N bits, for the code that
|
||||
* counts up using the bias value. This means that the lower bits are left for
|
||||
* the exclusive use of the original code that increments and decrements by one
|
||||
* (or at least, by much smaller values than the bias value).
|
||||
*
|
||||
* Of course, once the lower bits overflow into the upper bits (and this is
|
||||
* OK, because subtraction recovers the original values), then visual inspection
|
||||
* no longer suffices to directly view the separate counts. However, for normal
|
||||
* applications that don't have huge page reference counts, this won't be an
|
||||
* issue.
|
||||
*
|
||||
* Locking: the lockless algorithm described in page_cache_get_speculative()
|
||||
* and page_cache_gup_pin_speculative() provides safe operation for
|
||||
* get_user_pages and page_mkclean and other calls that race to set up page
|
||||
* table entries.
|
||||
*/
|
||||
static inline void unpin_user_page(struct page *page)
|
||||
{
|
||||
put_page(page);
|
||||
}
|
||||
#define GUP_PIN_COUNTING_BIAS (1U << 10)
|
||||
|
||||
void unpin_user_page(struct page *page);
|
||||
void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
|
||||
bool make_dirty);
|
||||
|
||||
void unpin_user_pages(struct page **pages, unsigned long npages);
|
||||
|
||||
/**
|
||||
* page_maybe_dma_pinned() - report if a page is pinned for DMA.
|
||||
*
|
||||
* This function checks if a page has been pinned via a call to
|
||||
* pin_user_pages*().
|
||||
*
|
||||
* For non-huge pages, the return value is partially fuzzy: false is not fuzzy,
|
||||
* because it means "definitely not pinned for DMA", but true means "probably
|
||||
* pinned for DMA, but possibly a false positive due to having at least
|
||||
* GUP_PIN_COUNTING_BIAS worth of normal page references".
|
||||
*
|
||||
* False positives are OK, because: a) it's unlikely for a page to get that many
|
||||
* refcounts, and b) all the callers of this routine are expected to be able to
|
||||
* deal gracefully with a false positive.
|
||||
*
|
||||
* For huge pages, the result will be exactly correct. That's because we have
|
||||
* more tracking data available: the 3rd struct page in the compound page is
|
||||
* used to track the pincount (instead using of the GUP_PIN_COUNTING_BIAS
|
||||
* scheme).
|
||||
*
|
||||
* For more information, please see Documentation/vm/pin_user_pages.rst.
|
||||
*
|
||||
* @page: pointer to page to be queried.
|
||||
* @Return: True, if it is likely that the page has been "dma-pinned".
|
||||
* False, if the page is definitely not dma-pinned.
|
||||
*/
|
||||
static inline bool page_maybe_dma_pinned(struct page *page)
|
||||
{
|
||||
if (hpage_pincount_available(page))
|
||||
return compound_pincount(page) > 0;
|
||||
|
||||
/*
|
||||
* page_ref_count() is signed. If that refcount overflows, then
|
||||
* page_ref_count() returns a negative value, and callers will avoid
|
||||
* further incrementing the refcount.
|
||||
*
|
||||
* Here, for that overflow case, use the signed bit to count a little
|
||||
* bit higher via unsigned math, and thus still get an accurate result.
|
||||
*/
|
||||
return ((unsigned int)page_ref_count(compound_head(page))) >=
|
||||
GUP_PIN_COUNTING_BIAS;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
|
||||
#define SECTION_IN_PAGE_FLAGS
|
||||
#endif
|
||||
|
@ -2364,26 +2530,7 @@ struct vm_unmapped_area_info {
|
|||
unsigned long align_offset;
|
||||
};
|
||||
|
||||
extern unsigned long unmapped_area(struct vm_unmapped_area_info *info);
|
||||
extern unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info);
|
||||
|
||||
/*
|
||||
* Search for an unmapped address range.
|
||||
*
|
||||
* We are looking for a range that:
|
||||
* - does not intersect with any VMA;
|
||||
* - is contained within the [low_limit, high_limit) interval;
|
||||
* - is at least the desired size.
|
||||
* - satisfies (begin_addr & align_mask) == (align_offset & align_mask)
|
||||
*/
|
||||
static inline unsigned long
|
||||
vm_unmapped_area(struct vm_unmapped_area_info *info)
|
||||
{
|
||||
if (info->flags & VM_UNMAPPED_AREA_TOPDOWN)
|
||||
return unmapped_area_topdown(info);
|
||||
else
|
||||
return unmapped_area(info);
|
||||
}
|
||||
extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info);
|
||||
|
||||
/* truncate.c */
|
||||
extern void truncate_inode_pages(struct address_space *, loff_t);
|
||||
|
|
|
@ -137,7 +137,7 @@ struct page {
|
|||
};
|
||||
struct { /* Second tail page of compound page */
|
||||
unsigned long _compound_pad_1; /* compound_head */
|
||||
unsigned long _compound_pad_2;
|
||||
atomic_t hpage_pinned_refcount;
|
||||
/* For both global and memcg */
|
||||
struct list_head deferred_list;
|
||||
};
|
||||
|
@ -226,6 +226,11 @@ static inline atomic_t *compound_mapcount_ptr(struct page *page)
|
|||
return &page[1].compound_mapcount;
|
||||
}
|
||||
|
||||
static inline atomic_t *compound_pincount_ptr(struct page *page)
|
||||
{
|
||||
return &page[2].hpage_pinned_refcount;
|
||||
}
|
||||
|
||||
/*
|
||||
* Used for sizing the vmemmap region on some architectures
|
||||
*/
|
||||
|
|
|
@ -243,6 +243,8 @@ enum node_stat_item {
|
|||
NR_DIRTIED, /* page dirtyings since bootup */
|
||||
NR_WRITTEN, /* page writings since bootup */
|
||||
NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */
|
||||
NR_FOLL_PIN_ACQUIRED, /* via: pin_user_page(), gup flag: FOLL_PIN */
|
||||
NR_FOLL_PIN_RELEASED, /* pages returned via unpin_user_page() */
|
||||
NR_VM_NODE_STAT_ITEMS
|
||||
};
|
||||
|
||||
|
@ -1372,7 +1374,7 @@ static inline int pfn_valid(unsigned long pfn)
|
|||
}
|
||||
#endif
|
||||
|
||||
static inline int pfn_present(unsigned long pfn)
|
||||
static inline int pfn_in_present_section(unsigned long pfn)
|
||||
{
|
||||
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
|
||||
return 0;
|
||||
|
@ -1409,7 +1411,7 @@ void sparse_init(void);
|
|||
#else
|
||||
#define sparse_init() do {} while (0)
|
||||
#define sparse_index_init(_sec, _nid) do {} while (0)
|
||||
#define pfn_present pfn_valid
|
||||
#define pfn_in_present_section pfn_valid
|
||||
#define subsection_map_init(_pfn, _nr_pages) do {} while (0)
|
||||
#endif /* CONFIG_SPARSEMEM */
|
||||
|
||||
|
|
|
@ -102,6 +102,15 @@ static inline void page_ref_sub(struct page *page, int nr)
|
|||
__page_ref_mod(page, -nr);
|
||||
}
|
||||
|
||||
static inline int page_ref_sub_return(struct page *page, int nr)
|
||||
{
|
||||
int ret = atomic_sub_return(nr, &page->_refcount);
|
||||
|
||||
if (page_ref_tracepoint_active(__tracepoint_page_ref_mod_and_return))
|
||||
__page_ref_mod_and_return(page, -nr, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void page_ref_inc(struct page *page)
|
||||
{
|
||||
atomic_inc(&page->_refcount);
|
||||
|
|
|
@ -33,8 +33,8 @@ enum mapping_flags {
|
|||
|
||||
/**
|
||||
* mapping_set_error - record a writeback error in the address_space
|
||||
* @mapping - the mapping in which an error should be set
|
||||
* @error - the error to set in the mapping
|
||||
* @mapping: the mapping in which an error should be set
|
||||
* @error: the error to set in the mapping
|
||||
*
|
||||
* When writeback fails in some way, we must record that error so that
|
||||
* userspace can be informed when fsync and the like are called. We endeavor
|
||||
|
@ -70,11 +70,9 @@ static inline void mapping_clear_unevictable(struct address_space *mapping)
|
|||
clear_bit(AS_UNEVICTABLE, &mapping->flags);
|
||||
}
|
||||
|
||||
static inline int mapping_unevictable(struct address_space *mapping)
|
||||
static inline bool mapping_unevictable(struct address_space *mapping)
|
||||
{
|
||||
if (mapping)
|
||||
return test_bit(AS_UNEVICTABLE, &mapping->flags);
|
||||
return !!mapping;
|
||||
return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags);
|
||||
}
|
||||
|
||||
static inline void mapping_set_exiting(struct address_space *mapping)
|
||||
|
@ -305,9 +303,9 @@ static inline struct page *find_lock_page(struct address_space *mapping,
|
|||
* atomic allocation!
|
||||
*/
|
||||
static inline struct page *find_or_create_page(struct address_space *mapping,
|
||||
pgoff_t offset, gfp_t gfp_mask)
|
||||
pgoff_t index, gfp_t gfp_mask)
|
||||
{
|
||||
return pagecache_get_page(mapping, offset,
|
||||
return pagecache_get_page(mapping, index,
|
||||
FGP_LOCK|FGP_ACCESSED|FGP_CREAT,
|
||||
gfp_mask);
|
||||
}
|
||||
|
@ -333,14 +331,19 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
|
|||
mapping_gfp_mask(mapping));
|
||||
}
|
||||
|
||||
static inline struct page *find_subpage(struct page *page, pgoff_t offset)
|
||||
/*
|
||||
* Given the page we found in the page cache, return the page corresponding
|
||||
* to this index in the file
|
||||
*/
|
||||
static inline struct page *find_subpage(struct page *head, pgoff_t index)
|
||||
{
|
||||
if (PageHuge(page))
|
||||
return page;
|
||||
/* HugeTLBfs wants the head page regardless */
|
||||
if (PageHuge(head))
|
||||
return head;
|
||||
|
||||
VM_BUG_ON_PAGE(PageTail(page), page);
|
||||
VM_BUG_ON_PAGE(PageTail(head), head);
|
||||
|
||||
return page + (offset & (compound_nr(page) - 1));
|
||||
return head + (index & (compound_nr(head) - 1));
|
||||
}
|
||||
|
||||
struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
#include <linux/cred.h>
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/posix-timers.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
/*
|
||||
* Types defining task->signal and task->sighand and APIs using them:
|
||||
|
@ -376,6 +378,20 @@ static inline int signal_pending_state(long state, struct task_struct *p)
|
|||
return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
|
||||
}
|
||||
|
||||
/*
|
||||
* This should only be used in fault handlers to decide whether we
|
||||
* should stop the current fault routine to handle the signals
|
||||
* instead, especially with the case where we've got interrupted with
|
||||
* a VM_FAULT_RETRY.
|
||||
*/
|
||||
static inline bool fault_signal_pending(vm_fault_t fault_flags,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
return unlikely((fault_flags & VM_FAULT_RETRY) &&
|
||||
(fatal_signal_pending(current) ||
|
||||
(user_mode(regs) && signal_pending(current))));
|
||||
}
|
||||
|
||||
/*
|
||||
* Reevaluate whether the task has signals pending delivery.
|
||||
* Wake the task if so.
|
||||
|
|
|
@ -374,7 +374,6 @@ extern int sysctl_min_slab_ratio;
|
|||
#define node_reclaim_mode 0
|
||||
#endif
|
||||
|
||||
extern int page_evictable(struct page *page);
|
||||
extern void check_move_unevictable_pages(struct pagevec *pvec);
|
||||
|
||||
extern int kswapd_run(int nid);
|
||||
|
|
|
@ -130,20 +130,11 @@ static inline int numa_node_id(void)
|
|||
* Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem().
|
||||
*/
|
||||
DECLARE_PER_CPU(int, _numa_mem_);
|
||||
extern int _node_numa_mem_[MAX_NUMNODES];
|
||||
|
||||
#ifndef set_numa_mem
|
||||
static inline void set_numa_mem(int node)
|
||||
{
|
||||
this_cpu_write(_numa_mem_, node);
|
||||
_node_numa_mem_[numa_node_id()] = node;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef node_to_mem_node
|
||||
static inline int node_to_mem_node(int node)
|
||||
{
|
||||
return _node_numa_mem_[node];
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -166,7 +157,6 @@ static inline int cpu_to_mem(int cpu)
|
|||
static inline void set_cpu_numa_mem(int cpu, int node)
|
||||
{
|
||||
per_cpu(_numa_mem_, cpu) = node;
|
||||
_node_numa_mem_[cpu_to_node(cpu)] = node;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -180,13 +170,6 @@ static inline int numa_mem_id(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifndef node_to_mem_node
|
||||
static inline int node_to_mem_node(int node)
|
||||
{
|
||||
return node;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef cpu_to_mem
|
||||
static inline int cpu_to_mem(int cpu)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM mmap
|
||||
|
||||
#if !defined(_TRACE_MMAP_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_MMAP_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
TRACE_EVENT(vm_unmapped_area,
|
||||
|
||||
TP_PROTO(unsigned long addr, struct vm_unmapped_area_info *info),
|
||||
|
||||
TP_ARGS(addr, info),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned long, addr)
|
||||
__field(unsigned long, total_vm)
|
||||
__field(unsigned long, flags)
|
||||
__field(unsigned long, length)
|
||||
__field(unsigned long, low_limit)
|
||||
__field(unsigned long, high_limit)
|
||||
__field(unsigned long, align_mask)
|
||||
__field(unsigned long, align_offset)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->addr = addr;
|
||||
__entry->total_vm = current->mm->total_vm;
|
||||
__entry->flags = info->flags;
|
||||
__entry->length = info->length;
|
||||
__entry->low_limit = info->low_limit;
|
||||
__entry->high_limit = info->high_limit;
|
||||
__entry->align_mask = info->align_mask;
|
||||
__entry->align_offset = info->align_offset;
|
||||
),
|
||||
|
||||
TP_printk("addr=0x%lx err=%ld total_vm=0x%lx flags=0x%lx len=0x%lx lo=0x%lx hi=0x%lx mask=0x%lx ofs=0x%lx\n",
|
||||
IS_ERR_VALUE(__entry->addr) ? 0 : __entry->addr,
|
||||
IS_ERR_VALUE(__entry->addr) ? __entry->addr : 0,
|
||||
__entry->total_vm, __entry->flags, __entry->length,
|
||||
__entry->low_limit, __entry->high_limit, __entry->align_mask,
|
||||
__entry->align_offset)
|
||||
);
|
||||
#endif
|
||||
|
||||
/* This part must be outside protection */
|
||||
#include <trace/define_trace.h>
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue