Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: - membarrier updates (Mathieu Desnoyers) - SMP balancing optimizations (Mel Gorman) - stats update optimizations (Peter Zijlstra) - RT scheduler race fixes (Steven Rostedt) - misc fixes and updates * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/fair: Use a recently used CPU as an idle candidate and the basis for SIS sched/fair: Do not migrate if the prev_cpu is idle sched/fair: Restructure wake_affine*() to return a CPU id sched/fair: Remove unnecessary parameters from wake_affine_idle() sched/rt: Make update_curr_rt() more accurate sched/rt: Up the root domain ref count when passing it around via IPIs sched/rt: Use container_of() to get root domain in rto_push_irq_work_func() sched/core: Optimize update_stats_*() sched/core: Optimize ttwu_stat() membarrier/selftest: Test private expedited sync core command membarrier/arm64: Provide core serializing command membarrier/x86: Provide core serializing command membarrier: Provide core serializing command, *_SYNC_CORE lockin/x86: Implement sync_core_before_usermode() locking: Introduce sync_core_before_usermode() membarrier/selftest: Test global expedited command membarrier: Provide GLOBAL_EXPEDITED command membarrier: Document scheduler barrier requirements powerpc, membarrier: Skip memory barrier in switch_mm() membarrier/selftest: Test private expedited command
This commit is contained in:
commit
ab2d92ad88
|
@ -9025,6 +9025,7 @@ L: linux-kernel@vger.kernel.org
|
||||||
S: Supported
|
S: Supported
|
||||||
F: kernel/sched/membarrier.c
|
F: kernel/sched/membarrier.c
|
||||||
F: include/uapi/linux/membarrier.h
|
F: include/uapi/linux/membarrier.h
|
||||||
|
F: arch/powerpc/include/asm/membarrier.h
|
||||||
|
|
||||||
MEMORY MANAGEMENT
|
MEMORY MANAGEMENT
|
||||||
L: linux-mm@kvack.org
|
L: linux-mm@kvack.org
|
||||||
|
|
|
@ -16,6 +16,7 @@ config ARM64
|
||||||
select ARCH_HAS_GCOV_PROFILE_ALL
|
select ARCH_HAS_GCOV_PROFILE_ALL
|
||||||
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
|
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
|
||||||
select ARCH_HAS_KCOV
|
select ARCH_HAS_KCOV
|
||||||
|
select ARCH_HAS_MEMBARRIER_SYNC_CORE
|
||||||
select ARCH_HAS_SET_MEMORY
|
select ARCH_HAS_SET_MEMORY
|
||||||
select ARCH_HAS_SG_CHAIN
|
select ARCH_HAS_SG_CHAIN
|
||||||
select ARCH_HAS_STRICT_KERNEL_RWX
|
select ARCH_HAS_STRICT_KERNEL_RWX
|
||||||
|
|
|
@ -324,6 +324,10 @@ alternative_else_nop_endif
|
||||||
ldp x28, x29, [sp, #16 * 14]
|
ldp x28, x29, [sp, #16 * 14]
|
||||||
ldr lr, [sp, #S_LR]
|
ldr lr, [sp, #S_LR]
|
||||||
add sp, sp, #S_FRAME_SIZE // restore sp
|
add sp, sp, #S_FRAME_SIZE // restore sp
|
||||||
|
/*
|
||||||
|
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on eret context synchronization
|
||||||
|
* when returning from IPI handler, and when returning to user-space.
|
||||||
|
*/
|
||||||
|
|
||||||
.if \el == 0
|
.if \el == 0
|
||||||
alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
|
alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
|
||||||
|
|
|
@ -141,6 +141,7 @@ config PPC
|
||||||
select ARCH_HAS_GCOV_PROFILE_ALL
|
select ARCH_HAS_GCOV_PROFILE_ALL
|
||||||
select ARCH_HAS_PHYS_TO_DMA
|
select ARCH_HAS_PHYS_TO_DMA
|
||||||
select ARCH_HAS_PMEM_API if PPC64
|
select ARCH_HAS_PMEM_API if PPC64
|
||||||
|
select ARCH_HAS_MEMBARRIER_CALLBACKS
|
||||||
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
|
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
|
||||||
select ARCH_HAS_SG_CHAIN
|
select ARCH_HAS_SG_CHAIN
|
||||||
select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
|
select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
#ifndef _ASM_POWERPC_MEMBARRIER_H
|
||||||
|
#define _ASM_POWERPC_MEMBARRIER_H
|
||||||
|
|
||||||
|
static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
|
||||||
|
struct mm_struct *next,
|
||||||
|
struct task_struct *tsk)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Only need the full barrier when switching between processes.
|
||||||
|
* Barrier when switching from kernel to userspace is not
|
||||||
|
* required here, given that it is implied by mmdrop(). Barrier
|
||||||
|
* when switching from userspace to kernel is not needed after
|
||||||
|
* store to rq->curr.
|
||||||
|
*/
|
||||||
|
if (likely(!(atomic_read(&next->membarrier_state) &
|
||||||
|
(MEMBARRIER_STATE_PRIVATE_EXPEDITED |
|
||||||
|
MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The membarrier system call requires a full memory barrier
|
||||||
|
* after storing to rq->curr, before going back to user-space.
|
||||||
|
*/
|
||||||
|
smp_mb();
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* _ASM_POWERPC_MEMBARRIER_H */
|
|
@ -12,6 +12,7 @@
|
||||||
|
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/cpu.h>
|
#include <linux/cpu.h>
|
||||||
|
#include <linux/sched/mm.h>
|
||||||
|
|
||||||
#include <asm/mmu_context.h>
|
#include <asm/mmu_context.h>
|
||||||
|
|
||||||
|
@ -58,6 +59,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||||
*
|
*
|
||||||
* On the read side the barrier is in pte_xchg(), which orders
|
* On the read side the barrier is in pte_xchg(), which orders
|
||||||
* the store to the PTE vs the load of mm_cpumask.
|
* the store to the PTE vs the load of mm_cpumask.
|
||||||
|
*
|
||||||
|
* This full barrier is needed by membarrier when switching
|
||||||
|
* between processes after store to rq->curr, before user-space
|
||||||
|
* memory accesses.
|
||||||
*/
|
*/
|
||||||
smp_mb();
|
smp_mb();
|
||||||
|
|
||||||
|
@ -80,6 +85,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||||
|
|
||||||
if (new_on_cpu)
|
if (new_on_cpu)
|
||||||
radix_kvm_prefetch_workaround(next);
|
radix_kvm_prefetch_workaround(next);
|
||||||
|
else
|
||||||
|
membarrier_arch_switch_mm(prev, next, tsk);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The actual HW switching method differs between the various
|
* The actual HW switching method differs between the various
|
||||||
|
|
|
@ -55,6 +55,7 @@ config X86
|
||||||
select ARCH_HAS_GCOV_PROFILE_ALL
|
select ARCH_HAS_GCOV_PROFILE_ALL
|
||||||
select ARCH_HAS_KCOV if X86_64
|
select ARCH_HAS_KCOV if X86_64
|
||||||
select ARCH_HAS_PHYS_TO_DMA
|
select ARCH_HAS_PHYS_TO_DMA
|
||||||
|
select ARCH_HAS_MEMBARRIER_SYNC_CORE
|
||||||
select ARCH_HAS_PMEM_API if X86_64
|
select ARCH_HAS_PMEM_API if X86_64
|
||||||
select ARCH_HAS_REFCOUNT
|
select ARCH_HAS_REFCOUNT
|
||||||
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
|
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
|
||||||
|
@ -62,6 +63,7 @@ config X86
|
||||||
select ARCH_HAS_SG_CHAIN
|
select ARCH_HAS_SG_CHAIN
|
||||||
select ARCH_HAS_STRICT_KERNEL_RWX
|
select ARCH_HAS_STRICT_KERNEL_RWX
|
||||||
select ARCH_HAS_STRICT_MODULE_RWX
|
select ARCH_HAS_STRICT_MODULE_RWX
|
||||||
|
select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
|
||||||
select ARCH_HAS_UBSAN_SANITIZE_ALL
|
select ARCH_HAS_UBSAN_SANITIZE_ALL
|
||||||
select ARCH_HAS_ZONE_DEVICE if X86_64
|
select ARCH_HAS_ZONE_DEVICE if X86_64
|
||||||
select ARCH_HAVE_NMI_SAFE_CMPXCHG
|
select ARCH_HAVE_NMI_SAFE_CMPXCHG
|
||||||
|
|
|
@ -566,6 +566,11 @@ restore_all:
|
||||||
.Lrestore_nocheck:
|
.Lrestore_nocheck:
|
||||||
RESTORE_REGS 4 # skip orig_eax/error_code
|
RESTORE_REGS 4 # skip orig_eax/error_code
|
||||||
.Lirq_return:
|
.Lirq_return:
|
||||||
|
/*
|
||||||
|
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
|
||||||
|
* when returning from IPI handler and when returning from
|
||||||
|
* scheduler to user-space.
|
||||||
|
*/
|
||||||
INTERRUPT_RETURN
|
INTERRUPT_RETURN
|
||||||
|
|
||||||
.section .fixup, "ax"
|
.section .fixup, "ax"
|
||||||
|
|
|
@ -691,6 +691,10 @@ GLOBAL(restore_regs_and_return_to_kernel)
|
||||||
POP_EXTRA_REGS
|
POP_EXTRA_REGS
|
||||||
POP_C_REGS
|
POP_C_REGS
|
||||||
addq $8, %rsp /* skip regs->orig_ax */
|
addq $8, %rsp /* skip regs->orig_ax */
|
||||||
|
/*
|
||||||
|
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
|
||||||
|
* when returning from IPI handler.
|
||||||
|
*/
|
||||||
INTERRUPT_RETURN
|
INTERRUPT_RETURN
|
||||||
|
|
||||||
ENTRY(native_iret)
|
ENTRY(native_iret)
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
#ifndef _ASM_X86_SYNC_CORE_H
|
||||||
|
#define _ASM_X86_SYNC_CORE_H
|
||||||
|
|
||||||
|
#include <linux/preempt.h>
|
||||||
|
#include <asm/processor.h>
|
||||||
|
#include <asm/cpufeature.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ensure that a core serializing instruction is issued before returning
|
||||||
|
* to user-mode. x86 implements return to user-space through sysexit,
|
||||||
|
* sysrel, and sysretq, which are not core serializing.
|
||||||
|
*/
|
||||||
|
static inline void sync_core_before_usermode(void)
|
||||||
|
{
|
||||||
|
/* With PTI, we unconditionally serialize before running user code. */
|
||||||
|
if (static_cpu_has(X86_FEATURE_PTI))
|
||||||
|
return;
|
||||||
|
/*
|
||||||
|
* Return from interrupt and NMI is done through iret, which is core
|
||||||
|
* serializing.
|
||||||
|
*/
|
||||||
|
if (in_irq() || in_nmi())
|
||||||
|
return;
|
||||||
|
sync_core();
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* _ASM_X86_SYNC_CORE_H */
|
|
@ -229,6 +229,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||||
#endif
|
#endif
|
||||||
this_cpu_write(cpu_tlbstate.is_lazy, false);
|
this_cpu_write(cpu_tlbstate.is_lazy, false);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The membarrier system call requires a full memory barrier and
|
||||||
|
* core serialization before returning to user-space, after
|
||||||
|
* storing to rq->curr. Writing to CR3 provides that full
|
||||||
|
* memory barrier and core serializing instruction.
|
||||||
|
*/
|
||||||
if (real_prev == next) {
|
if (real_prev == next) {
|
||||||
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
|
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
|
||||||
next->context.ctx_id);
|
next->context.ctx_id);
|
||||||
|
|
|
@ -555,6 +555,14 @@ struct task_struct {
|
||||||
unsigned long wakee_flip_decay_ts;
|
unsigned long wakee_flip_decay_ts;
|
||||||
struct task_struct *last_wakee;
|
struct task_struct *last_wakee;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* recent_used_cpu is initially set as the last CPU used by a task
|
||||||
|
* that wakes affine another task. Waker/wakee relationships can
|
||||||
|
* push tasks around a CPU where each wakeup moves to the next one.
|
||||||
|
* Tracking a recently used CPU allows a quick search for a recently
|
||||||
|
* used CPU that may be idle.
|
||||||
|
*/
|
||||||
|
int recent_used_cpu;
|
||||||
int wake_cpu;
|
int wake_cpu;
|
||||||
#endif
|
#endif
|
||||||
int on_rq;
|
int on_rq;
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/mm_types.h>
|
#include <linux/mm_types.h>
|
||||||
#include <linux/gfp.h>
|
#include <linux/gfp.h>
|
||||||
|
#include <linux/sync_core.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Routines for handling mm_structs
|
* Routines for handling mm_structs
|
||||||
|
@ -194,18 +195,48 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
|
||||||
|
|
||||||
#ifdef CONFIG_MEMBARRIER
|
#ifdef CONFIG_MEMBARRIER
|
||||||
enum {
|
enum {
|
||||||
MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0),
|
MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0),
|
||||||
MEMBARRIER_STATE_SWITCH_MM = (1U << 1),
|
MEMBARRIER_STATE_PRIVATE_EXPEDITED = (1U << 1),
|
||||||
|
MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY = (1U << 2),
|
||||||
|
MEMBARRIER_STATE_GLOBAL_EXPEDITED = (1U << 3),
|
||||||
|
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY = (1U << 4),
|
||||||
|
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE = (1U << 5),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
MEMBARRIER_FLAG_SYNC_CORE = (1U << 0),
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
|
||||||
|
#include <asm/membarrier.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
if (likely(!(atomic_read(&mm->membarrier_state) &
|
||||||
|
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE)))
|
||||||
|
return;
|
||||||
|
sync_core_before_usermode();
|
||||||
|
}
|
||||||
|
|
||||||
static inline void membarrier_execve(struct task_struct *t)
|
static inline void membarrier_execve(struct task_struct *t)
|
||||||
{
|
{
|
||||||
atomic_set(&t->mm->membarrier_state, 0);
|
atomic_set(&t->mm->membarrier_state, 0);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
|
||||||
|
static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
|
||||||
|
struct mm_struct *next,
|
||||||
|
struct task_struct *tsk)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif
|
||||||
static inline void membarrier_execve(struct task_struct *t)
|
static inline void membarrier_execve(struct task_struct *t)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* _LINUX_SCHED_MM_H */
|
#endif /* _LINUX_SCHED_MM_H */
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
#ifndef _LINUX_SYNC_CORE_H
|
||||||
|
#define _LINUX_SYNC_CORE_H
|
||||||
|
|
||||||
|
#ifdef CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
|
||||||
|
#include <asm/sync_core.h>
|
||||||
|
#else
|
||||||
|
/*
|
||||||
|
* This is a dummy sync_core_before_usermode() implementation that can be used
|
||||||
|
* on all architectures which return to user-space through core serializing
|
||||||
|
* instructions.
|
||||||
|
* If your architecture returns to user-space through non-core-serializing
|
||||||
|
* instructions, you need to write your own functions.
|
||||||
|
*/
|
||||||
|
static inline void sync_core_before_usermode(void)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* _LINUX_SYNC_CORE_H */
|
||||||
|
|
|
@ -31,7 +31,7 @@
|
||||||
* enum membarrier_cmd - membarrier system call command
|
* enum membarrier_cmd - membarrier system call command
|
||||||
* @MEMBARRIER_CMD_QUERY: Query the set of supported commands. It returns
|
* @MEMBARRIER_CMD_QUERY: Query the set of supported commands. It returns
|
||||||
* a bitmask of valid commands.
|
* a bitmask of valid commands.
|
||||||
* @MEMBARRIER_CMD_SHARED: Execute a memory barrier on all running threads.
|
* @MEMBARRIER_CMD_GLOBAL: Execute a memory barrier on all running threads.
|
||||||
* Upon return from system call, the caller thread
|
* Upon return from system call, the caller thread
|
||||||
* is ensured that all running threads have passed
|
* is ensured that all running threads have passed
|
||||||
* through a state where all memory accesses to
|
* through a state where all memory accesses to
|
||||||
|
@ -40,6 +40,28 @@
|
||||||
* (non-running threads are de facto in such a
|
* (non-running threads are de facto in such a
|
||||||
* state). This covers threads from all processes
|
* state). This covers threads from all processes
|
||||||
* running on the system. This command returns 0.
|
* running on the system. This command returns 0.
|
||||||
|
* @MEMBARRIER_CMD_GLOBAL_EXPEDITED:
|
||||||
|
* Execute a memory barrier on all running threads
|
||||||
|
* of all processes which previously registered
|
||||||
|
* with MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED.
|
||||||
|
* Upon return from system call, the caller thread
|
||||||
|
* is ensured that all running threads have passed
|
||||||
|
* through a state where all memory accesses to
|
||||||
|
* user-space addresses match program order between
|
||||||
|
* entry to and return from the system call
|
||||||
|
* (non-running threads are de facto in such a
|
||||||
|
* state). This only covers threads from processes
|
||||||
|
* which registered with
|
||||||
|
* MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED.
|
||||||
|
* This command returns 0. Given that
|
||||||
|
* registration is about the intent to receive
|
||||||
|
* the barriers, it is valid to invoke
|
||||||
|
* MEMBARRIER_CMD_GLOBAL_EXPEDITED from a
|
||||||
|
* non-registered process.
|
||||||
|
* @MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
|
||||||
|
* Register the process intent to receive
|
||||||
|
* MEMBARRIER_CMD_GLOBAL_EXPEDITED memory
|
||||||
|
* barriers. Always returns 0.
|
||||||
* @MEMBARRIER_CMD_PRIVATE_EXPEDITED:
|
* @MEMBARRIER_CMD_PRIVATE_EXPEDITED:
|
||||||
* Execute a memory barrier on each running
|
* Execute a memory barrier on each running
|
||||||
* thread belonging to the same process as the current
|
* thread belonging to the same process as the current
|
||||||
|
@ -51,7 +73,7 @@
|
||||||
* to and return from the system call
|
* to and return from the system call
|
||||||
* (non-running threads are de facto in such a
|
* (non-running threads are de facto in such a
|
||||||
* state). This only covers threads from the
|
* state). This only covers threads from the
|
||||||
* same processes as the caller thread. This
|
* same process as the caller thread. This
|
||||||
* command returns 0 on success. The
|
* command returns 0 on success. The
|
||||||
* "expedited" commands complete faster than
|
* "expedited" commands complete faster than
|
||||||
* the non-expedited ones, they never block,
|
* the non-expedited ones, they never block,
|
||||||
|
@ -64,18 +86,54 @@
|
||||||
* Register the process intent to use
|
* Register the process intent to use
|
||||||
* MEMBARRIER_CMD_PRIVATE_EXPEDITED. Always
|
* MEMBARRIER_CMD_PRIVATE_EXPEDITED. Always
|
||||||
* returns 0.
|
* returns 0.
|
||||||
|
* @MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
|
||||||
|
* In addition to provide memory ordering
|
||||||
|
* guarantees described in
|
||||||
|
* MEMBARRIER_CMD_PRIVATE_EXPEDITED, ensure
|
||||||
|
* the caller thread, upon return from system
|
||||||
|
* call, that all its running threads siblings
|
||||||
|
* have executed a core serializing
|
||||||
|
* instruction. (architectures are required to
|
||||||
|
* guarantee that non-running threads issue
|
||||||
|
* core serializing instructions before they
|
||||||
|
* resume user-space execution). This only
|
||||||
|
* covers threads from the same process as the
|
||||||
|
* caller thread. This command returns 0 on
|
||||||
|
* success. The "expedited" commands complete
|
||||||
|
* faster than the non-expedited ones, they
|
||||||
|
* never block, but have the downside of
|
||||||
|
* causing extra overhead. If this command is
|
||||||
|
* not implemented by an architecture, -EINVAL
|
||||||
|
* is returned. A process needs to register its
|
||||||
|
* intent to use the private expedited sync
|
||||||
|
* core command prior to using it, otherwise
|
||||||
|
* this command returns -EPERM.
|
||||||
|
* @MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
|
||||||
|
* Register the process intent to use
|
||||||
|
* MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE.
|
||||||
|
* If this command is not implemented by an
|
||||||
|
* architecture, -EINVAL is returned.
|
||||||
|
* Returns 0 on success.
|
||||||
|
* @MEMBARRIER_CMD_SHARED:
|
||||||
|
* Alias to MEMBARRIER_CMD_GLOBAL. Provided for
|
||||||
|
* header backward compatibility.
|
||||||
*
|
*
|
||||||
* Command to be passed to the membarrier system call. The commands need to
|
* Command to be passed to the membarrier system call. The commands need to
|
||||||
* be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
|
* be a single bit each, except for MEMBARRIER_CMD_QUERY which is assigned to
|
||||||
* the value 0.
|
* the value 0.
|
||||||
*/
|
*/
|
||||||
enum membarrier_cmd {
|
enum membarrier_cmd {
|
||||||
MEMBARRIER_CMD_QUERY = 0,
|
MEMBARRIER_CMD_QUERY = 0,
|
||||||
MEMBARRIER_CMD_SHARED = (1 << 0),
|
MEMBARRIER_CMD_GLOBAL = (1 << 0),
|
||||||
/* reserved for MEMBARRIER_CMD_SHARED_EXPEDITED (1 << 1) */
|
MEMBARRIER_CMD_GLOBAL_EXPEDITED = (1 << 1),
|
||||||
/* reserved for MEMBARRIER_CMD_PRIVATE (1 << 2) */
|
MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED = (1 << 2),
|
||||||
MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3),
|
MEMBARRIER_CMD_PRIVATE_EXPEDITED = (1 << 3),
|
||||||
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4),
|
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED = (1 << 4),
|
||||||
|
MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE = (1 << 5),
|
||||||
|
MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE = (1 << 6),
|
||||||
|
|
||||||
|
/* Alias for header backward compatibility. */
|
||||||
|
MEMBARRIER_CMD_SHARED = MEMBARRIER_CMD_GLOBAL,
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* _UAPI_LINUX_MEMBARRIER_H */
|
#endif /* _UAPI_LINUX_MEMBARRIER_H */
|
||||||
|
|
|
@ -1412,6 +1412,12 @@ config USERFAULTFD
|
||||||
Enable the userfaultfd() system call that allows to intercept and
|
Enable the userfaultfd() system call that allows to intercept and
|
||||||
handle page faults in userland.
|
handle page faults in userland.
|
||||||
|
|
||||||
|
config ARCH_HAS_MEMBARRIER_CALLBACKS
|
||||||
|
bool
|
||||||
|
|
||||||
|
config ARCH_HAS_MEMBARRIER_SYNC_CORE
|
||||||
|
bool
|
||||||
|
|
||||||
config EMBEDDED
|
config EMBEDDED
|
||||||
bool "Embedded system"
|
bool "Embedded system"
|
||||||
option allnoconfig_y
|
option allnoconfig_y
|
||||||
|
@ -1915,3 +1921,6 @@ config ASN1
|
||||||
functions to call on what tags.
|
functions to call on what tags.
|
||||||
|
|
||||||
source "kernel/Kconfig.locks"
|
source "kernel/Kconfig.locks"
|
||||||
|
|
||||||
|
config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
|
||||||
|
bool
|
||||||
|
|
|
@ -606,6 +606,11 @@ static void __mmdrop(struct mm_struct *mm)
|
||||||
|
|
||||||
void mmdrop(struct mm_struct *mm)
|
void mmdrop(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* The implicit full barrier implied by atomic_dec_and_test() is
|
||||||
|
* required by the membarrier system call before returning to
|
||||||
|
* user-space, after storing to rq->curr.
|
||||||
|
*/
|
||||||
if (unlikely(atomic_dec_and_test(&mm->mm_count)))
|
if (unlikely(atomic_dec_and_test(&mm->mm_count)))
|
||||||
__mmdrop(mm);
|
__mmdrop(mm);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1630,16 +1630,16 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
if (cpu == rq->cpu) {
|
if (cpu == rq->cpu) {
|
||||||
schedstat_inc(rq->ttwu_local);
|
__schedstat_inc(rq->ttwu_local);
|
||||||
schedstat_inc(p->se.statistics.nr_wakeups_local);
|
__schedstat_inc(p->se.statistics.nr_wakeups_local);
|
||||||
} else {
|
} else {
|
||||||
struct sched_domain *sd;
|
struct sched_domain *sd;
|
||||||
|
|
||||||
schedstat_inc(p->se.statistics.nr_wakeups_remote);
|
__schedstat_inc(p->se.statistics.nr_wakeups_remote);
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
for_each_domain(rq->cpu, sd) {
|
for_each_domain(rq->cpu, sd) {
|
||||||
if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
|
if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
|
||||||
schedstat_inc(sd->ttwu_wake_remote);
|
__schedstat_inc(sd->ttwu_wake_remote);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1647,14 +1647,14 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (wake_flags & WF_MIGRATED)
|
if (wake_flags & WF_MIGRATED)
|
||||||
schedstat_inc(p->se.statistics.nr_wakeups_migrate);
|
__schedstat_inc(p->se.statistics.nr_wakeups_migrate);
|
||||||
#endif /* CONFIG_SMP */
|
#endif /* CONFIG_SMP */
|
||||||
|
|
||||||
schedstat_inc(rq->ttwu_count);
|
__schedstat_inc(rq->ttwu_count);
|
||||||
schedstat_inc(p->se.statistics.nr_wakeups);
|
__schedstat_inc(p->se.statistics.nr_wakeups);
|
||||||
|
|
||||||
if (wake_flags & WF_SYNC)
|
if (wake_flags & WF_SYNC)
|
||||||
schedstat_inc(p->se.statistics.nr_wakeups_sync);
|
__schedstat_inc(p->se.statistics.nr_wakeups_sync);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
|
static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
|
||||||
|
@ -2461,6 +2461,7 @@ void wake_up_new_task(struct task_struct *p)
|
||||||
* Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
|
* Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
|
||||||
* as we're not fully set-up yet.
|
* as we're not fully set-up yet.
|
||||||
*/
|
*/
|
||||||
|
p->recent_used_cpu = task_cpu(p);
|
||||||
__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
|
__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
|
||||||
#endif
|
#endif
|
||||||
rq = __task_rq_lock(p, &rf);
|
rq = __task_rq_lock(p, &rf);
|
||||||
|
@ -2698,23 +2699,27 @@ static struct rq *finish_task_switch(struct task_struct *prev)
|
||||||
prev_state = prev->state;
|
prev_state = prev->state;
|
||||||
vtime_task_switch(prev);
|
vtime_task_switch(prev);
|
||||||
perf_event_task_sched_in(prev, current);
|
perf_event_task_sched_in(prev, current);
|
||||||
/*
|
|
||||||
* The membarrier system call requires a full memory barrier
|
|
||||||
* after storing to rq->curr, before going back to user-space.
|
|
||||||
*
|
|
||||||
* TODO: This smp_mb__after_unlock_lock can go away if PPC end
|
|
||||||
* up adding a full barrier to switch_mm(), or we should figure
|
|
||||||
* out if a smp_mb__after_unlock_lock is really the proper API
|
|
||||||
* to use.
|
|
||||||
*/
|
|
||||||
smp_mb__after_unlock_lock();
|
|
||||||
finish_task(prev);
|
finish_task(prev);
|
||||||
finish_lock_switch(rq);
|
finish_lock_switch(rq);
|
||||||
finish_arch_post_lock_switch();
|
finish_arch_post_lock_switch();
|
||||||
|
|
||||||
fire_sched_in_preempt_notifiers(current);
|
fire_sched_in_preempt_notifiers(current);
|
||||||
if (mm)
|
/*
|
||||||
|
* When switching through a kernel thread, the loop in
|
||||||
|
* membarrier_{private,global}_expedited() may have observed that
|
||||||
|
* kernel thread and not issued an IPI. It is therefore possible to
|
||||||
|
* schedule between user->kernel->user threads without passing though
|
||||||
|
* switch_mm(). Membarrier requires a barrier after storing to
|
||||||
|
* rq->curr, before returning to userspace, so provide them here:
|
||||||
|
*
|
||||||
|
* - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
|
||||||
|
* provided by mmdrop(),
|
||||||
|
* - a sync_core for SYNC_CORE.
|
||||||
|
*/
|
||||||
|
if (mm) {
|
||||||
|
membarrier_mm_sync_core_before_usermode(mm);
|
||||||
mmdrop(mm);
|
mmdrop(mm);
|
||||||
|
}
|
||||||
if (unlikely(prev_state == TASK_DEAD)) {
|
if (unlikely(prev_state == TASK_DEAD)) {
|
||||||
if (prev->sched_class->task_dead)
|
if (prev->sched_class->task_dead)
|
||||||
prev->sched_class->task_dead(prev);
|
prev->sched_class->task_dead(prev);
|
||||||
|
@ -2818,6 +2823,13 @@ context_switch(struct rq *rq, struct task_struct *prev,
|
||||||
*/
|
*/
|
||||||
arch_start_context_switch(prev);
|
arch_start_context_switch(prev);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If mm is non-NULL, we pass through switch_mm(). If mm is
|
||||||
|
* NULL, we will pass through mmdrop() in finish_task_switch().
|
||||||
|
* Both of these contain the full memory barrier required by
|
||||||
|
* membarrier after storing to rq->curr, before returning to
|
||||||
|
* user-space.
|
||||||
|
*/
|
||||||
if (!mm) {
|
if (!mm) {
|
||||||
next->active_mm = oldmm;
|
next->active_mm = oldmm;
|
||||||
mmgrab(oldmm);
|
mmgrab(oldmm);
|
||||||
|
@ -3354,6 +3366,9 @@ static void __sched notrace __schedule(bool preempt)
|
||||||
* Make sure that signal_pending_state()->signal_pending() below
|
* Make sure that signal_pending_state()->signal_pending() below
|
||||||
* can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
|
* can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
|
||||||
* done by the caller to avoid the race with signal_wake_up().
|
* done by the caller to avoid the race with signal_wake_up().
|
||||||
|
*
|
||||||
|
* The membarrier system call requires a full memory barrier
|
||||||
|
* after coming from user-space, before storing to rq->curr.
|
||||||
*/
|
*/
|
||||||
rq_lock(rq, &rf);
|
rq_lock(rq, &rf);
|
||||||
smp_mb__after_spinlock();
|
smp_mb__after_spinlock();
|
||||||
|
@ -3401,17 +3416,16 @@ static void __sched notrace __schedule(bool preempt)
|
||||||
/*
|
/*
|
||||||
* The membarrier system call requires each architecture
|
* The membarrier system call requires each architecture
|
||||||
* to have a full memory barrier after updating
|
* to have a full memory barrier after updating
|
||||||
* rq->curr, before returning to user-space. For TSO
|
* rq->curr, before returning to user-space.
|
||||||
* (e.g. x86), the architecture must provide its own
|
*
|
||||||
* barrier in switch_mm(). For weakly ordered machines
|
* Here are the schemes providing that barrier on the
|
||||||
* for which spin_unlock() acts as a full memory
|
* various architectures:
|
||||||
* barrier, finish_lock_switch() in common code takes
|
* - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
|
||||||
* care of this barrier. For weakly ordered machines for
|
* switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
|
||||||
* which spin_unlock() acts as a RELEASE barrier (only
|
* - finish_lock_switch() for weakly-ordered
|
||||||
* arm64 and PowerPC), arm64 has a full barrier in
|
* architectures where spin_unlock is a full barrier,
|
||||||
* switch_to(), and PowerPC has
|
* - switch_to() for arm64 (weakly-ordered, spin_unlock
|
||||||
* smp_mb__after_unlock_lock() before
|
* is a RELEASE barrier),
|
||||||
* finish_lock_switch().
|
|
||||||
*/
|
*/
|
||||||
++*switch_count;
|
++*switch_count;
|
||||||
|
|
||||||
|
|
|
@ -871,7 +871,7 @@ update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||||
likely(wait_start > prev_wait_start))
|
likely(wait_start > prev_wait_start))
|
||||||
wait_start -= prev_wait_start;
|
wait_start -= prev_wait_start;
|
||||||
|
|
||||||
schedstat_set(se->statistics.wait_start, wait_start);
|
__schedstat_set(se->statistics.wait_start, wait_start);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
|
@ -893,17 +893,17 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||||
* time stamp can be adjusted to accumulate wait time
|
* time stamp can be adjusted to accumulate wait time
|
||||||
* prior to migration.
|
* prior to migration.
|
||||||
*/
|
*/
|
||||||
schedstat_set(se->statistics.wait_start, delta);
|
__schedstat_set(se->statistics.wait_start, delta);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
trace_sched_stat_wait(p, delta);
|
trace_sched_stat_wait(p, delta);
|
||||||
}
|
}
|
||||||
|
|
||||||
schedstat_set(se->statistics.wait_max,
|
__schedstat_set(se->statistics.wait_max,
|
||||||
max(schedstat_val(se->statistics.wait_max), delta));
|
max(schedstat_val(se->statistics.wait_max), delta));
|
||||||
schedstat_inc(se->statistics.wait_count);
|
__schedstat_inc(se->statistics.wait_count);
|
||||||
schedstat_add(se->statistics.wait_sum, delta);
|
__schedstat_add(se->statistics.wait_sum, delta);
|
||||||
schedstat_set(se->statistics.wait_start, 0);
|
__schedstat_set(se->statistics.wait_start, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
|
@ -928,10 +928,10 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||||
delta = 0;
|
delta = 0;
|
||||||
|
|
||||||
if (unlikely(delta > schedstat_val(se->statistics.sleep_max)))
|
if (unlikely(delta > schedstat_val(se->statistics.sleep_max)))
|
||||||
schedstat_set(se->statistics.sleep_max, delta);
|
__schedstat_set(se->statistics.sleep_max, delta);
|
||||||
|
|
||||||
schedstat_set(se->statistics.sleep_start, 0);
|
__schedstat_set(se->statistics.sleep_start, 0);
|
||||||
schedstat_add(se->statistics.sum_sleep_runtime, delta);
|
__schedstat_add(se->statistics.sum_sleep_runtime, delta);
|
||||||
|
|
||||||
if (tsk) {
|
if (tsk) {
|
||||||
account_scheduler_latency(tsk, delta >> 10, 1);
|
account_scheduler_latency(tsk, delta >> 10, 1);
|
||||||
|
@ -945,15 +945,15 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||||
delta = 0;
|
delta = 0;
|
||||||
|
|
||||||
if (unlikely(delta > schedstat_val(se->statistics.block_max)))
|
if (unlikely(delta > schedstat_val(se->statistics.block_max)))
|
||||||
schedstat_set(se->statistics.block_max, delta);
|
__schedstat_set(se->statistics.block_max, delta);
|
||||||
|
|
||||||
schedstat_set(se->statistics.block_start, 0);
|
__schedstat_set(se->statistics.block_start, 0);
|
||||||
schedstat_add(se->statistics.sum_sleep_runtime, delta);
|
__schedstat_add(se->statistics.sum_sleep_runtime, delta);
|
||||||
|
|
||||||
if (tsk) {
|
if (tsk) {
|
||||||
if (tsk->in_iowait) {
|
if (tsk->in_iowait) {
|
||||||
schedstat_add(se->statistics.iowait_sum, delta);
|
__schedstat_add(se->statistics.iowait_sum, delta);
|
||||||
schedstat_inc(se->statistics.iowait_count);
|
__schedstat_inc(se->statistics.iowait_count);
|
||||||
trace_sched_stat_iowait(tsk, delta);
|
trace_sched_stat_iowait(tsk, delta);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1012,10 +1012,10 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||||
struct task_struct *tsk = task_of(se);
|
struct task_struct *tsk = task_of(se);
|
||||||
|
|
||||||
if (tsk->state & TASK_INTERRUPTIBLE)
|
if (tsk->state & TASK_INTERRUPTIBLE)
|
||||||
schedstat_set(se->statistics.sleep_start,
|
__schedstat_set(se->statistics.sleep_start,
|
||||||
rq_clock(rq_of(cfs_rq)));
|
rq_clock(rq_of(cfs_rq)));
|
||||||
if (tsk->state & TASK_UNINTERRUPTIBLE)
|
if (tsk->state & TASK_UNINTERRUPTIBLE)
|
||||||
schedstat_set(se->statistics.block_start,
|
__schedstat_set(se->statistics.block_start,
|
||||||
rq_clock(rq_of(cfs_rq)));
|
rq_clock(rq_of(cfs_rq)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5692,27 +5692,31 @@ static int wake_wide(struct task_struct *p)
|
||||||
* scheduling latency of the CPUs. This seems to work
|
* scheduling latency of the CPUs. This seems to work
|
||||||
* for the overloaded case.
|
* for the overloaded case.
|
||||||
*/
|
*/
|
||||||
|
static int
|
||||||
static bool
|
wake_affine_idle(int this_cpu, int prev_cpu, int sync)
|
||||||
wake_affine_idle(struct sched_domain *sd, struct task_struct *p,
|
|
||||||
int this_cpu, int prev_cpu, int sync)
|
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* If this_cpu is idle, it implies the wakeup is from interrupt
|
* If this_cpu is idle, it implies the wakeup is from interrupt
|
||||||
* context. Only allow the move if cache is shared. Otherwise an
|
* context. Only allow the move if cache is shared. Otherwise an
|
||||||
* interrupt intensive workload could force all tasks onto one
|
* interrupt intensive workload could force all tasks onto one
|
||||||
* node depending on the IO topology or IRQ affinity settings.
|
* node depending on the IO topology or IRQ affinity settings.
|
||||||
|
*
|
||||||
|
* If the prev_cpu is idle and cache affine then avoid a migration.
|
||||||
|
* There is no guarantee that the cache hot data from an interrupt
|
||||||
|
* is more important than cache hot data on the prev_cpu and from
|
||||||
|
* a cpufreq perspective, it's better to have higher utilisation
|
||||||
|
* on one CPU.
|
||||||
*/
|
*/
|
||||||
if (idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
|
if (idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
|
||||||
return true;
|
return idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
|
||||||
|
|
||||||
if (sync && cpu_rq(this_cpu)->nr_running == 1)
|
if (sync && cpu_rq(this_cpu)->nr_running == 1)
|
||||||
return true;
|
return this_cpu;
|
||||||
|
|
||||||
return false;
|
return nr_cpumask_bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static int
|
||||||
wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
|
wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
|
||||||
int this_cpu, int prev_cpu, int sync)
|
int this_cpu, int prev_cpu, int sync)
|
||||||
{
|
{
|
||||||
|
@ -5726,7 +5730,7 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
|
||||||
unsigned long current_load = task_h_load(current);
|
unsigned long current_load = task_h_load(current);
|
||||||
|
|
||||||
if (current_load > this_eff_load)
|
if (current_load > this_eff_load)
|
||||||
return true;
|
return this_cpu;
|
||||||
|
|
||||||
this_eff_load -= current_load;
|
this_eff_load -= current_load;
|
||||||
}
|
}
|
||||||
|
@ -5743,28 +5747,28 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
|
||||||
prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
|
prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
|
||||||
prev_eff_load *= capacity_of(this_cpu);
|
prev_eff_load *= capacity_of(this_cpu);
|
||||||
|
|
||||||
return this_eff_load <= prev_eff_load;
|
return this_eff_load <= prev_eff_load ? this_cpu : nr_cpumask_bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int wake_affine(struct sched_domain *sd, struct task_struct *p,
|
static int wake_affine(struct sched_domain *sd, struct task_struct *p,
|
||||||
int prev_cpu, int sync)
|
int prev_cpu, int sync)
|
||||||
{
|
{
|
||||||
int this_cpu = smp_processor_id();
|
int this_cpu = smp_processor_id();
|
||||||
bool affine = false;
|
int target = nr_cpumask_bits;
|
||||||
|
|
||||||
if (sched_feat(WA_IDLE) && !affine)
|
if (sched_feat(WA_IDLE))
|
||||||
affine = wake_affine_idle(sd, p, this_cpu, prev_cpu, sync);
|
target = wake_affine_idle(this_cpu, prev_cpu, sync);
|
||||||
|
|
||||||
if (sched_feat(WA_WEIGHT) && !affine)
|
if (sched_feat(WA_WEIGHT) && target == nr_cpumask_bits)
|
||||||
affine = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
|
target = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
|
||||||
|
|
||||||
schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
|
schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
|
||||||
if (affine) {
|
if (target == nr_cpumask_bits)
|
||||||
schedstat_inc(sd->ttwu_move_affine);
|
return prev_cpu;
|
||||||
schedstat_inc(p->se.statistics.nr_wakeups_affine);
|
|
||||||
}
|
|
||||||
|
|
||||||
return affine;
|
schedstat_inc(sd->ttwu_move_affine);
|
||||||
|
schedstat_inc(p->se.statistics.nr_wakeups_affine);
|
||||||
|
return target;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned long task_util(struct task_struct *p);
|
static inline unsigned long task_util(struct task_struct *p);
|
||||||
|
@ -6193,7 +6197,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
|
||||||
static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||||
{
|
{
|
||||||
struct sched_domain *sd;
|
struct sched_domain *sd;
|
||||||
int i;
|
int i, recent_used_cpu;
|
||||||
|
|
||||||
if (idle_cpu(target))
|
if (idle_cpu(target))
|
||||||
return target;
|
return target;
|
||||||
|
@ -6204,6 +6208,21 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||||
if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
|
if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
|
||||||
return prev;
|
return prev;
|
||||||
|
|
||||||
|
/* Check a recently used CPU as a potential idle candidate */
|
||||||
|
recent_used_cpu = p->recent_used_cpu;
|
||||||
|
if (recent_used_cpu != prev &&
|
||||||
|
recent_used_cpu != target &&
|
||||||
|
cpus_share_cache(recent_used_cpu, target) &&
|
||||||
|
idle_cpu(recent_used_cpu) &&
|
||||||
|
cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {
|
||||||
|
/*
|
||||||
|
* Replace recent_used_cpu with prev as it is a potential
|
||||||
|
* candidate for the next wake.
|
||||||
|
*/
|
||||||
|
p->recent_used_cpu = prev;
|
||||||
|
return recent_used_cpu;
|
||||||
|
}
|
||||||
|
|
||||||
sd = rcu_dereference(per_cpu(sd_llc, target));
|
sd = rcu_dereference(per_cpu(sd_llc, target));
|
||||||
if (!sd)
|
if (!sd)
|
||||||
return target;
|
return target;
|
||||||
|
@ -6357,8 +6376,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
|
||||||
if (cpu == prev_cpu)
|
if (cpu == prev_cpu)
|
||||||
goto pick_cpu;
|
goto pick_cpu;
|
||||||
|
|
||||||
if (wake_affine(affine_sd, p, prev_cpu, sync))
|
new_cpu = wake_affine(affine_sd, p, prev_cpu, sync);
|
||||||
new_cpu = cpu;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sd && !(sd_flag & SD_BALANCE_FORK)) {
|
if (sd && !(sd_flag & SD_BALANCE_FORK)) {
|
||||||
|
@ -6372,9 +6390,12 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
|
||||||
|
|
||||||
if (!sd) {
|
if (!sd) {
|
||||||
pick_cpu:
|
pick_cpu:
|
||||||
if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
|
if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
|
||||||
new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
|
new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
|
||||||
|
|
||||||
|
if (want_affine)
|
||||||
|
current->recent_used_cpu = cpu;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
|
new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,24 +26,110 @@
|
||||||
* Bitmask made from a "or" of all commands within enum membarrier_cmd,
|
* Bitmask made from a "or" of all commands within enum membarrier_cmd,
|
||||||
* except MEMBARRIER_CMD_QUERY.
|
* except MEMBARRIER_CMD_QUERY.
|
||||||
*/
|
*/
|
||||||
|
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE
|
||||||
|
#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \
|
||||||
|
(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE \
|
||||||
|
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)
|
||||||
|
#else
|
||||||
|
#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK 0
|
||||||
|
#endif
|
||||||
|
|
||||||
#define MEMBARRIER_CMD_BITMASK \
|
#define MEMBARRIER_CMD_BITMASK \
|
||||||
(MEMBARRIER_CMD_SHARED | MEMBARRIER_CMD_PRIVATE_EXPEDITED \
|
(MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \
|
||||||
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED)
|
| MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
|
||||||
|
| MEMBARRIER_CMD_PRIVATE_EXPEDITED \
|
||||||
|
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
|
||||||
|
| MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)
|
||||||
|
|
||||||
static void ipi_mb(void *info)
|
static void ipi_mb(void *info)
|
||||||
{
|
{
|
||||||
smp_mb(); /* IPIs should be serializing but paranoid. */
|
smp_mb(); /* IPIs should be serializing but paranoid. */
|
||||||
}
|
}
|
||||||
|
|
||||||
static int membarrier_private_expedited(void)
|
static int membarrier_global_expedited(void)
|
||||||
{
|
{
|
||||||
int cpu;
|
int cpu;
|
||||||
bool fallback = false;
|
bool fallback = false;
|
||||||
cpumask_var_t tmpmask;
|
cpumask_var_t tmpmask;
|
||||||
|
|
||||||
if (!(atomic_read(¤t->mm->membarrier_state)
|
if (num_online_cpus() == 1)
|
||||||
& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
|
return 0;
|
||||||
return -EPERM;
|
|
||||||
|
/*
|
||||||
|
* Matches memory barriers around rq->curr modification in
|
||||||
|
* scheduler.
|
||||||
|
*/
|
||||||
|
smp_mb(); /* system call entry is not a mb. */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Expedited membarrier commands guarantee that they won't
|
||||||
|
* block, hence the GFP_NOWAIT allocation flag and fallback
|
||||||
|
* implementation.
|
||||||
|
*/
|
||||||
|
if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
|
||||||
|
/* Fallback for OOM. */
|
||||||
|
fallback = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
cpus_read_lock();
|
||||||
|
for_each_online_cpu(cpu) {
|
||||||
|
struct task_struct *p;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Skipping the current CPU is OK even through we can be
|
||||||
|
* migrated at any point. The current CPU, at the point
|
||||||
|
* where we read raw_smp_processor_id(), is ensured to
|
||||||
|
* be in program order with respect to the caller
|
||||||
|
* thread. Therefore, we can skip this CPU from the
|
||||||
|
* iteration.
|
||||||
|
*/
|
||||||
|
if (cpu == raw_smp_processor_id())
|
||||||
|
continue;
|
||||||
|
rcu_read_lock();
|
||||||
|
p = task_rcu_dereference(&cpu_rq(cpu)->curr);
|
||||||
|
if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &
|
||||||
|
MEMBARRIER_STATE_GLOBAL_EXPEDITED)) {
|
||||||
|
if (!fallback)
|
||||||
|
__cpumask_set_cpu(cpu, tmpmask);
|
||||||
|
else
|
||||||
|
smp_call_function_single(cpu, ipi_mb, NULL, 1);
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
if (!fallback) {
|
||||||
|
preempt_disable();
|
||||||
|
smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
|
||||||
|
preempt_enable();
|
||||||
|
free_cpumask_var(tmpmask);
|
||||||
|
}
|
||||||
|
cpus_read_unlock();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Memory barrier on the caller thread _after_ we finished
|
||||||
|
* waiting for the last IPI. Matches memory barriers around
|
||||||
|
* rq->curr modification in scheduler.
|
||||||
|
*/
|
||||||
|
smp_mb(); /* exit from system call is not a mb */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int membarrier_private_expedited(int flags)
|
||||||
|
{
|
||||||
|
int cpu;
|
||||||
|
bool fallback = false;
|
||||||
|
cpumask_var_t tmpmask;
|
||||||
|
|
||||||
|
if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
|
||||||
|
if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
|
||||||
|
return -EINVAL;
|
||||||
|
if (!(atomic_read(¤t->mm->membarrier_state) &
|
||||||
|
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
|
||||||
|
return -EPERM;
|
||||||
|
} else {
|
||||||
|
if (!(atomic_read(¤t->mm->membarrier_state) &
|
||||||
|
MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
|
||||||
|
return -EPERM;
|
||||||
|
}
|
||||||
|
|
||||||
if (num_online_cpus() == 1)
|
if (num_online_cpus() == 1)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -105,21 +191,69 @@ static int membarrier_private_expedited(void)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void membarrier_register_private_expedited(void)
|
static int membarrier_register_global_expedited(void)
|
||||||
{
|
{
|
||||||
struct task_struct *p = current;
|
struct task_struct *p = current;
|
||||||
struct mm_struct *mm = p->mm;
|
struct mm_struct *mm = p->mm;
|
||||||
|
|
||||||
|
if (atomic_read(&mm->membarrier_state) &
|
||||||
|
MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY)
|
||||||
|
return 0;
|
||||||
|
atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state);
|
||||||
|
if (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) {
|
||||||
|
/*
|
||||||
|
* For single mm user, single threaded process, we can
|
||||||
|
* simply issue a memory barrier after setting
|
||||||
|
* MEMBARRIER_STATE_GLOBAL_EXPEDITED to guarantee that
|
||||||
|
* no memory access following registration is reordered
|
||||||
|
* before registration.
|
||||||
|
*/
|
||||||
|
smp_mb();
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* For multi-mm user threads, we need to ensure all
|
||||||
|
* future scheduler executions will observe the new
|
||||||
|
* thread flag state for this mm.
|
||||||
|
*/
|
||||||
|
synchronize_sched();
|
||||||
|
}
|
||||||
|
atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
|
||||||
|
&mm->membarrier_state);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int membarrier_register_private_expedited(int flags)
|
||||||
|
{
|
||||||
|
struct task_struct *p = current;
|
||||||
|
struct mm_struct *mm = p->mm;
|
||||||
|
int state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY;
|
||||||
|
|
||||||
|
if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
|
||||||
|
if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
|
||||||
|
return -EINVAL;
|
||||||
|
state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need to consider threads belonging to different thread
|
* We need to consider threads belonging to different thread
|
||||||
* groups, which use the same mm. (CLONE_VM but not
|
* groups, which use the same mm. (CLONE_VM but not
|
||||||
* CLONE_THREAD).
|
* CLONE_THREAD).
|
||||||
*/
|
*/
|
||||||
if (atomic_read(&mm->membarrier_state)
|
if (atomic_read(&mm->membarrier_state) & state)
|
||||||
& MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
|
return 0;
|
||||||
return;
|
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state);
|
||||||
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
|
if (flags & MEMBARRIER_FLAG_SYNC_CORE)
|
||||||
&mm->membarrier_state);
|
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE,
|
||||||
|
&mm->membarrier_state);
|
||||||
|
if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) {
|
||||||
|
/*
|
||||||
|
* Ensure all future scheduler executions will observe the
|
||||||
|
* new thread flag state for this process.
|
||||||
|
*/
|
||||||
|
synchronize_sched();
|
||||||
|
}
|
||||||
|
atomic_or(state, &mm->membarrier_state);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -159,21 +293,28 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
|
||||||
int cmd_mask = MEMBARRIER_CMD_BITMASK;
|
int cmd_mask = MEMBARRIER_CMD_BITMASK;
|
||||||
|
|
||||||
if (tick_nohz_full_enabled())
|
if (tick_nohz_full_enabled())
|
||||||
cmd_mask &= ~MEMBARRIER_CMD_SHARED;
|
cmd_mask &= ~MEMBARRIER_CMD_GLOBAL;
|
||||||
return cmd_mask;
|
return cmd_mask;
|
||||||
}
|
}
|
||||||
case MEMBARRIER_CMD_SHARED:
|
case MEMBARRIER_CMD_GLOBAL:
|
||||||
/* MEMBARRIER_CMD_SHARED is not compatible with nohz_full. */
|
/* MEMBARRIER_CMD_GLOBAL is not compatible with nohz_full. */
|
||||||
if (tick_nohz_full_enabled())
|
if (tick_nohz_full_enabled())
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
if (num_online_cpus() > 1)
|
if (num_online_cpus() > 1)
|
||||||
synchronize_sched();
|
synchronize_sched();
|
||||||
return 0;
|
return 0;
|
||||||
|
case MEMBARRIER_CMD_GLOBAL_EXPEDITED:
|
||||||
|
return membarrier_global_expedited();
|
||||||
|
case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
|
||||||
|
return membarrier_register_global_expedited();
|
||||||
case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
|
case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
|
||||||
return membarrier_private_expedited();
|
return membarrier_private_expedited(0);
|
||||||
case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
|
case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
|
||||||
membarrier_register_private_expedited();
|
return membarrier_register_private_expedited(0);
|
||||||
return 0;
|
case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
|
||||||
|
return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
|
||||||
|
case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
|
||||||
|
return membarrier_register_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
|
||||||
default:
|
default:
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
|
@ -950,12 +950,13 @@ static void update_curr_rt(struct rq *rq)
|
||||||
{
|
{
|
||||||
struct task_struct *curr = rq->curr;
|
struct task_struct *curr = rq->curr;
|
||||||
struct sched_rt_entity *rt_se = &curr->rt;
|
struct sched_rt_entity *rt_se = &curr->rt;
|
||||||
|
u64 now = rq_clock_task(rq);
|
||||||
u64 delta_exec;
|
u64 delta_exec;
|
||||||
|
|
||||||
if (curr->sched_class != &rt_sched_class)
|
if (curr->sched_class != &rt_sched_class)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
delta_exec = rq_clock_task(rq) - curr->se.exec_start;
|
delta_exec = now - curr->se.exec_start;
|
||||||
if (unlikely((s64)delta_exec <= 0))
|
if (unlikely((s64)delta_exec <= 0))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -968,7 +969,7 @@ static void update_curr_rt(struct rq *rq)
|
||||||
curr->se.sum_exec_runtime += delta_exec;
|
curr->se.sum_exec_runtime += delta_exec;
|
||||||
account_group_exec_runtime(curr, delta_exec);
|
account_group_exec_runtime(curr, delta_exec);
|
||||||
|
|
||||||
curr->se.exec_start = rq_clock_task(rq);
|
curr->se.exec_start = now;
|
||||||
cgroup_account_cputime(curr, delta_exec);
|
cgroup_account_cputime(curr, delta_exec);
|
||||||
|
|
||||||
sched_rt_avg_update(rq, delta_exec);
|
sched_rt_avg_update(rq, delta_exec);
|
||||||
|
@ -1907,9 +1908,8 @@ static void push_rt_tasks(struct rq *rq)
|
||||||
* the rt_loop_next will cause the iterator to perform another scan.
|
* the rt_loop_next will cause the iterator to perform another scan.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
static int rto_next_cpu(struct rq *rq)
|
static int rto_next_cpu(struct root_domain *rd)
|
||||||
{
|
{
|
||||||
struct root_domain *rd = rq->rd;
|
|
||||||
int next;
|
int next;
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
|
@ -1985,19 +1985,24 @@ static void tell_cpu_to_push(struct rq *rq)
|
||||||
* Otherwise it is finishing up and an ipi needs to be sent.
|
* Otherwise it is finishing up and an ipi needs to be sent.
|
||||||
*/
|
*/
|
||||||
if (rq->rd->rto_cpu < 0)
|
if (rq->rd->rto_cpu < 0)
|
||||||
cpu = rto_next_cpu(rq);
|
cpu = rto_next_cpu(rq->rd);
|
||||||
|
|
||||||
raw_spin_unlock(&rq->rd->rto_lock);
|
raw_spin_unlock(&rq->rd->rto_lock);
|
||||||
|
|
||||||
rto_start_unlock(&rq->rd->rto_loop_start);
|
rto_start_unlock(&rq->rd->rto_loop_start);
|
||||||
|
|
||||||
if (cpu >= 0)
|
if (cpu >= 0) {
|
||||||
|
/* Make sure the rd does not get freed while pushing */
|
||||||
|
sched_get_rd(rq->rd);
|
||||||
irq_work_queue_on(&rq->rd->rto_push_work, cpu);
|
irq_work_queue_on(&rq->rd->rto_push_work, cpu);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Called from hardirq context */
|
/* Called from hardirq context */
|
||||||
void rto_push_irq_work_func(struct irq_work *work)
|
void rto_push_irq_work_func(struct irq_work *work)
|
||||||
{
|
{
|
||||||
|
struct root_domain *rd =
|
||||||
|
container_of(work, struct root_domain, rto_push_work);
|
||||||
struct rq *rq;
|
struct rq *rq;
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
|
@ -2013,18 +2018,20 @@ void rto_push_irq_work_func(struct irq_work *work)
|
||||||
raw_spin_unlock(&rq->lock);
|
raw_spin_unlock(&rq->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
raw_spin_lock(&rq->rd->rto_lock);
|
raw_spin_lock(&rd->rto_lock);
|
||||||
|
|
||||||
/* Pass the IPI to the next rt overloaded queue */
|
/* Pass the IPI to the next rt overloaded queue */
|
||||||
cpu = rto_next_cpu(rq);
|
cpu = rto_next_cpu(rd);
|
||||||
|
|
||||||
raw_spin_unlock(&rq->rd->rto_lock);
|
raw_spin_unlock(&rd->rto_lock);
|
||||||
|
|
||||||
if (cpu < 0)
|
if (cpu < 0) {
|
||||||
|
sched_put_rd(rd);
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/* Try the next RT overloaded CPU */
|
/* Try the next RT overloaded CPU */
|
||||||
irq_work_queue_on(&rq->rd->rto_push_work, cpu);
|
irq_work_queue_on(&rd->rto_push_work, cpu);
|
||||||
}
|
}
|
||||||
#endif /* HAVE_RT_PUSH_IPI */
|
#endif /* HAVE_RT_PUSH_IPI */
|
||||||
|
|
||||||
|
|
|
@ -691,6 +691,8 @@ extern struct mutex sched_domains_mutex;
|
||||||
extern void init_defrootdomain(void);
|
extern void init_defrootdomain(void);
|
||||||
extern int sched_init_domains(const struct cpumask *cpu_map);
|
extern int sched_init_domains(const struct cpumask *cpu_map);
|
||||||
extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
|
extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
|
||||||
|
extern void sched_get_rd(struct root_domain *rd);
|
||||||
|
extern void sched_put_rd(struct root_domain *rd);
|
||||||
|
|
||||||
#ifdef HAVE_RT_PUSH_IPI
|
#ifdef HAVE_RT_PUSH_IPI
|
||||||
extern void rto_push_irq_work_func(struct irq_work *work);
|
extern void rto_push_irq_work_func(struct irq_work *work);
|
||||||
|
|
|
@ -31,8 +31,11 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
|
||||||
rq->rq_sched_info.run_delay += delta;
|
rq->rq_sched_info.run_delay += delta;
|
||||||
}
|
}
|
||||||
#define schedstat_enabled() static_branch_unlikely(&sched_schedstats)
|
#define schedstat_enabled() static_branch_unlikely(&sched_schedstats)
|
||||||
|
#define __schedstat_inc(var) do { var++; } while (0)
|
||||||
#define schedstat_inc(var) do { if (schedstat_enabled()) { var++; } } while (0)
|
#define schedstat_inc(var) do { if (schedstat_enabled()) { var++; } } while (0)
|
||||||
|
#define __schedstat_add(var, amt) do { var += (amt); } while (0)
|
||||||
#define schedstat_add(var, amt) do { if (schedstat_enabled()) { var += (amt); } } while (0)
|
#define schedstat_add(var, amt) do { if (schedstat_enabled()) { var += (amt); } } while (0)
|
||||||
|
#define __schedstat_set(var, val) do { var = (val); } while (0)
|
||||||
#define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0)
|
#define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0)
|
||||||
#define schedstat_val(var) (var)
|
#define schedstat_val(var) (var)
|
||||||
#define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0)
|
#define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0)
|
||||||
|
@ -48,8 +51,11 @@ static inline void
|
||||||
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
||||||
{}
|
{}
|
||||||
#define schedstat_enabled() 0
|
#define schedstat_enabled() 0
|
||||||
|
#define __schedstat_inc(var) do { } while (0)
|
||||||
#define schedstat_inc(var) do { } while (0)
|
#define schedstat_inc(var) do { } while (0)
|
||||||
|
#define __schedstat_add(var, amt) do { } while (0)
|
||||||
#define schedstat_add(var, amt) do { } while (0)
|
#define schedstat_add(var, amt) do { } while (0)
|
||||||
|
#define __schedstat_set(var, val) do { } while (0)
|
||||||
#define schedstat_set(var, val) do { } while (0)
|
#define schedstat_set(var, val) do { } while (0)
|
||||||
#define schedstat_val(var) 0
|
#define schedstat_val(var) 0
|
||||||
#define schedstat_val_or_zero(var) 0
|
#define schedstat_val_or_zero(var) 0
|
||||||
|
|
|
@ -259,6 +259,19 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
|
||||||
call_rcu_sched(&old_rd->rcu, free_rootdomain);
|
call_rcu_sched(&old_rd->rcu, free_rootdomain);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void sched_get_rd(struct root_domain *rd)
|
||||||
|
{
|
||||||
|
atomic_inc(&rd->refcount);
|
||||||
|
}
|
||||||
|
|
||||||
|
void sched_put_rd(struct root_domain *rd)
|
||||||
|
{
|
||||||
|
if (!atomic_dec_and_test(&rd->refcount))
|
||||||
|
return;
|
||||||
|
|
||||||
|
call_rcu_sched(&rd->rcu, free_rootdomain);
|
||||||
|
}
|
||||||
|
|
||||||
static int init_rootdomain(struct root_domain *rd)
|
static int init_rootdomain(struct root_domain *rd)
|
||||||
{
|
{
|
||||||
if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))
|
if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))
|
||||||
|
|
|
@ -16,49 +16,210 @@ static int sys_membarrier(int cmd, int flags)
|
||||||
static int test_membarrier_cmd_fail(void)
|
static int test_membarrier_cmd_fail(void)
|
||||||
{
|
{
|
||||||
int cmd = -1, flags = 0;
|
int cmd = -1, flags = 0;
|
||||||
|
const char *test_name = "sys membarrier invalid command";
|
||||||
|
|
||||||
if (sys_membarrier(cmd, flags) != -1) {
|
if (sys_membarrier(cmd, flags) != -1) {
|
||||||
ksft_exit_fail_msg(
|
ksft_exit_fail_msg(
|
||||||
"sys membarrier invalid command test: command = %d, flags = %d. Should fail, but passed\n",
|
"%s test: command = %d, flags = %d. Should fail, but passed\n",
|
||||||
cmd, flags);
|
test_name, cmd, flags);
|
||||||
|
}
|
||||||
|
if (errno != EINVAL) {
|
||||||
|
ksft_exit_fail_msg(
|
||||||
|
"%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
|
||||||
|
test_name, flags, EINVAL, strerror(EINVAL),
|
||||||
|
errno, strerror(errno));
|
||||||
}
|
}
|
||||||
|
|
||||||
ksft_test_result_pass(
|
ksft_test_result_pass(
|
||||||
"sys membarrier invalid command test: command = %d, flags = %d. Failed as expected\n",
|
"%s test: command = %d, flags = %d, errno = %d. Failed as expected\n",
|
||||||
cmd, flags);
|
test_name, cmd, flags, errno);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int test_membarrier_flags_fail(void)
|
static int test_membarrier_flags_fail(void)
|
||||||
{
|
{
|
||||||
int cmd = MEMBARRIER_CMD_QUERY, flags = 1;
|
int cmd = MEMBARRIER_CMD_QUERY, flags = 1;
|
||||||
|
const char *test_name = "sys membarrier MEMBARRIER_CMD_QUERY invalid flags";
|
||||||
|
|
||||||
if (sys_membarrier(cmd, flags) != -1) {
|
if (sys_membarrier(cmd, flags) != -1) {
|
||||||
ksft_exit_fail_msg(
|
ksft_exit_fail_msg(
|
||||||
"sys membarrier MEMBARRIER_CMD_QUERY invalid flags test: flags = %d. Should fail, but passed\n",
|
"%s test: flags = %d. Should fail, but passed\n",
|
||||||
flags);
|
test_name, flags);
|
||||||
|
}
|
||||||
|
if (errno != EINVAL) {
|
||||||
|
ksft_exit_fail_msg(
|
||||||
|
"%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
|
||||||
|
test_name, flags, EINVAL, strerror(EINVAL),
|
||||||
|
errno, strerror(errno));
|
||||||
}
|
}
|
||||||
|
|
||||||
ksft_test_result_pass(
|
ksft_test_result_pass(
|
||||||
"sys membarrier MEMBARRIER_CMD_QUERY invalid flags test: flags = %d. Failed as expected\n",
|
"%s test: flags = %d, errno = %d. Failed as expected\n",
|
||||||
flags);
|
test_name, flags, errno);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int test_membarrier_success(void)
|
static int test_membarrier_global_success(void)
|
||||||
{
|
{
|
||||||
int cmd = MEMBARRIER_CMD_SHARED, flags = 0;
|
int cmd = MEMBARRIER_CMD_GLOBAL, flags = 0;
|
||||||
const char *test_name = "sys membarrier MEMBARRIER_CMD_SHARED\n";
|
const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL";
|
||||||
|
|
||||||
if (sys_membarrier(cmd, flags) != 0) {
|
if (sys_membarrier(cmd, flags) != 0) {
|
||||||
ksft_exit_fail_msg(
|
ksft_exit_fail_msg(
|
||||||
"sys membarrier MEMBARRIER_CMD_SHARED test: flags = %d\n",
|
"%s test: flags = %d, errno = %d\n",
|
||||||
flags);
|
test_name, flags, errno);
|
||||||
}
|
}
|
||||||
|
|
||||||
ksft_test_result_pass(
|
ksft_test_result_pass(
|
||||||
"sys membarrier MEMBARRIER_CMD_SHARED test: flags = %d\n",
|
"%s test: flags = %d\n", test_name, flags);
|
||||||
flags);
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int test_membarrier_private_expedited_fail(void)
|
||||||
|
{
|
||||||
|
int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
|
||||||
|
const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED not registered failure";
|
||||||
|
|
||||||
|
if (sys_membarrier(cmd, flags) != -1) {
|
||||||
|
ksft_exit_fail_msg(
|
||||||
|
"%s test: flags = %d. Should fail, but passed\n",
|
||||||
|
test_name, flags);
|
||||||
|
}
|
||||||
|
if (errno != EPERM) {
|
||||||
|
ksft_exit_fail_msg(
|
||||||
|
"%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
|
||||||
|
test_name, flags, EPERM, strerror(EPERM),
|
||||||
|
errno, strerror(errno));
|
||||||
|
}
|
||||||
|
|
||||||
|
ksft_test_result_pass(
|
||||||
|
"%s test: flags = %d, errno = %d\n",
|
||||||
|
test_name, flags, errno);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int test_membarrier_register_private_expedited_success(void)
|
||||||
|
{
|
||||||
|
int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, flags = 0;
|
||||||
|
const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED";
|
||||||
|
|
||||||
|
if (sys_membarrier(cmd, flags) != 0) {
|
||||||
|
ksft_exit_fail_msg(
|
||||||
|
"%s test: flags = %d, errno = %d\n",
|
||||||
|
test_name, flags, errno);
|
||||||
|
}
|
||||||
|
|
||||||
|
ksft_test_result_pass(
|
||||||
|
"%s test: flags = %d\n",
|
||||||
|
test_name, flags);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int test_membarrier_private_expedited_success(void)
|
||||||
|
{
|
||||||
|
int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
|
||||||
|
const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED";
|
||||||
|
|
||||||
|
if (sys_membarrier(cmd, flags) != 0) {
|
||||||
|
ksft_exit_fail_msg(
|
||||||
|
"%s test: flags = %d, errno = %d\n",
|
||||||
|
test_name, flags, errno);
|
||||||
|
}
|
||||||
|
|
||||||
|
ksft_test_result_pass(
|
||||||
|
"%s test: flags = %d\n",
|
||||||
|
test_name, flags);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int test_membarrier_private_expedited_sync_core_fail(void)
|
||||||
|
{
|
||||||
|
int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0;
|
||||||
|
const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE not registered failure";
|
||||||
|
|
||||||
|
if (sys_membarrier(cmd, flags) != -1) {
|
||||||
|
ksft_exit_fail_msg(
|
||||||
|
"%s test: flags = %d. Should fail, but passed\n",
|
||||||
|
test_name, flags);
|
||||||
|
}
|
||||||
|
if (errno != EPERM) {
|
||||||
|
ksft_exit_fail_msg(
|
||||||
|
"%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
|
||||||
|
test_name, flags, EPERM, strerror(EPERM),
|
||||||
|
errno, strerror(errno));
|
||||||
|
}
|
||||||
|
|
||||||
|
ksft_test_result_pass(
|
||||||
|
"%s test: flags = %d, errno = %d\n",
|
||||||
|
test_name, flags, errno);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int test_membarrier_register_private_expedited_sync_core_success(void)
|
||||||
|
{
|
||||||
|
int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0;
|
||||||
|
const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE";
|
||||||
|
|
||||||
|
if (sys_membarrier(cmd, flags) != 0) {
|
||||||
|
ksft_exit_fail_msg(
|
||||||
|
"%s test: flags = %d, errno = %d\n",
|
||||||
|
test_name, flags, errno);
|
||||||
|
}
|
||||||
|
|
||||||
|
ksft_test_result_pass(
|
||||||
|
"%s test: flags = %d\n",
|
||||||
|
test_name, flags);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int test_membarrier_private_expedited_sync_core_success(void)
|
||||||
|
{
|
||||||
|
int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
|
||||||
|
const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE";
|
||||||
|
|
||||||
|
if (sys_membarrier(cmd, flags) != 0) {
|
||||||
|
ksft_exit_fail_msg(
|
||||||
|
"%s test: flags = %d, errno = %d\n",
|
||||||
|
test_name, flags, errno);
|
||||||
|
}
|
||||||
|
|
||||||
|
ksft_test_result_pass(
|
||||||
|
"%s test: flags = %d\n",
|
||||||
|
test_name, flags);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int test_membarrier_register_global_expedited_success(void)
|
||||||
|
{
|
||||||
|
int cmd = MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED, flags = 0;
|
||||||
|
const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED";
|
||||||
|
|
||||||
|
if (sys_membarrier(cmd, flags) != 0) {
|
||||||
|
ksft_exit_fail_msg(
|
||||||
|
"%s test: flags = %d, errno = %d\n",
|
||||||
|
test_name, flags, errno);
|
||||||
|
}
|
||||||
|
|
||||||
|
ksft_test_result_pass(
|
||||||
|
"%s test: flags = %d\n",
|
||||||
|
test_name, flags);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int test_membarrier_global_expedited_success(void)
|
||||||
|
{
|
||||||
|
int cmd = MEMBARRIER_CMD_GLOBAL_EXPEDITED, flags = 0;
|
||||||
|
const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL_EXPEDITED";
|
||||||
|
|
||||||
|
if (sys_membarrier(cmd, flags) != 0) {
|
||||||
|
ksft_exit_fail_msg(
|
||||||
|
"%s test: flags = %d, errno = %d\n",
|
||||||
|
test_name, flags, errno);
|
||||||
|
}
|
||||||
|
|
||||||
|
ksft_test_result_pass(
|
||||||
|
"%s test: flags = %d\n",
|
||||||
|
test_name, flags);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -72,7 +233,45 @@ static int test_membarrier(void)
|
||||||
status = test_membarrier_flags_fail();
|
status = test_membarrier_flags_fail();
|
||||||
if (status)
|
if (status)
|
||||||
return status;
|
return status;
|
||||||
status = test_membarrier_success();
|
status = test_membarrier_global_success();
|
||||||
|
if (status)
|
||||||
|
return status;
|
||||||
|
status = test_membarrier_private_expedited_fail();
|
||||||
|
if (status)
|
||||||
|
return status;
|
||||||
|
status = test_membarrier_register_private_expedited_success();
|
||||||
|
if (status)
|
||||||
|
return status;
|
||||||
|
status = test_membarrier_private_expedited_success();
|
||||||
|
if (status)
|
||||||
|
return status;
|
||||||
|
status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0);
|
||||||
|
if (status < 0) {
|
||||||
|
ksft_test_result_fail("sys_membarrier() failed\n");
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
|
||||||
|
status = test_membarrier_private_expedited_sync_core_fail();
|
||||||
|
if (status)
|
||||||
|
return status;
|
||||||
|
status = test_membarrier_register_private_expedited_sync_core_success();
|
||||||
|
if (status)
|
||||||
|
return status;
|
||||||
|
status = test_membarrier_private_expedited_sync_core_success();
|
||||||
|
if (status)
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* It is valid to send a global membarrier from a non-registered
|
||||||
|
* process.
|
||||||
|
*/
|
||||||
|
status = test_membarrier_global_expedited_success();
|
||||||
|
if (status)
|
||||||
|
return status;
|
||||||
|
status = test_membarrier_register_global_expedited_success();
|
||||||
|
if (status)
|
||||||
|
return status;
|
||||||
|
status = test_membarrier_global_expedited_success();
|
||||||
if (status)
|
if (status)
|
||||||
return status;
|
return status;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -94,8 +293,10 @@ static int test_membarrier_query(void)
|
||||||
}
|
}
|
||||||
ksft_exit_fail_msg("sys_membarrier() failed\n");
|
ksft_exit_fail_msg("sys_membarrier() failed\n");
|
||||||
}
|
}
|
||||||
if (!(ret & MEMBARRIER_CMD_SHARED))
|
if (!(ret & MEMBARRIER_CMD_GLOBAL)) {
|
||||||
|
ksft_test_result_fail("sys_membarrier() CMD_GLOBAL query failed\n");
|
||||||
ksft_exit_fail_msg("sys_membarrier is not supported.\n");
|
ksft_exit_fail_msg("sys_membarrier is not supported.\n");
|
||||||
|
}
|
||||||
|
|
||||||
ksft_test_result_pass("sys_membarrier available\n");
|
ksft_test_result_pass("sys_membarrier available\n");
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -108,5 +309,5 @@ int main(int argc, char **argv)
|
||||||
test_membarrier_query();
|
test_membarrier_query();
|
||||||
test_membarrier();
|
test_membarrier();
|
||||||
|
|
||||||
ksft_exit_pass();
|
return ksft_exit_pass();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue