x86/fpu: Change fpu->fpregs_active from 'int' to 'char', add lazy switching comments
Improve the memory layout of 'struct fpu': - change ->fpregs_active from 'int' to 'char' - it's just a single flag and modern x86 CPUs can do efficient byte accesses. - pack related fields closer to each other: often 'fpu->state' will not be touched, while the other fields will - so pack them into a group. Also add comments to each field, describing their purpose, and add some background information about lazy restores. Also fix an obsolete, lazy switching related comment in fpu_copy()'s description. Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
c47ada305d
commit
aeb997b9f2
|
@ -159,8 +159,44 @@ union fpregs_state {
|
|||
|
||||
struct fpu {
|
||||
/*
|
||||
* @state:
|
||||
*
|
||||
* In-memory copy of all FPU registers that we save/restore
|
||||
* over context switches. If the task is using the FPU then
|
||||
* the registers in the FPU are more recent than this state
|
||||
* copy. If the task context-switches away then they get
|
||||
* saved here and represent the FPU state.
|
||||
*
|
||||
* After context switches there may be a (short) time period
|
||||
* during which the in-FPU hardware registers are unchanged
|
||||
* and still perfectly match this state, if the tasks
|
||||
* scheduled afterwards are not using the FPU.
|
||||
*
|
||||
* This is the 'lazy restore' window of optimization, which
|
||||
* we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
|
||||
*
|
||||
* We detect whether a subsequent task uses the FPU via setting
|
||||
* CR0::TS to 1, which causes any FPU use to raise a #NM fault.
|
||||
*
|
||||
* During this window, if the task gets scheduled again, we
|
||||
* might be able to skip having to do a restore from this
|
||||
* memory buffer to the hardware registers - at the cost of
|
||||
* incurring the overhead of #NM fault traps.
|
||||
*
|
||||
* Note that on modern CPUs that support the XSAVEOPT (or other
|
||||
* optimized XSAVE instructions), we don't use #NM traps anymore,
|
||||
* as the hardware can track whether FPU registers need saving
|
||||
* or not. On such CPUs we activate the non-lazy ('eagerfpu')
|
||||
* logic, which unconditionally saves/restores all FPU state
|
||||
* across context switches. (if FPU state exists.)
|
||||
*/
|
||||
union fpregs_state state;
|
||||
|
||||
/*
|
||||
* @last_cpu:
|
||||
*
|
||||
* Records the last CPU on which this context was loaded into
|
||||
* FPU registers. (In the lazy-switching case we might be
|
||||
* FPU registers. (In the lazy-restore case we might be
|
||||
* able to reuse FPU registers across multiple context switches
|
||||
* this way, if no intermediate task used the FPU.)
|
||||
*
|
||||
|
@ -170,23 +206,49 @@ struct fpu {
|
|||
*/
|
||||
unsigned int last_cpu;
|
||||
|
||||
unsigned int fpregs_active;
|
||||
union fpregs_state state;
|
||||
/*
|
||||
* @fpstate_active:
|
||||
*
|
||||
* This flag indicates whether this context is active: if the task
|
||||
* is not running then we can restore from this context, if the task
|
||||
* is running then we should save into this context.
|
||||
*/
|
||||
unsigned char fpstate_active;
|
||||
|
||||
/*
|
||||
* @fpregs_active:
|
||||
*
|
||||
* This flag determines whether a given context is actively
|
||||
* loaded into the FPU's registers and that those registers
|
||||
* represent the task's current FPU state.
|
||||
*
|
||||
* Note the interaction with fpstate_active:
|
||||
*
|
||||
* # task does not use the FPU:
|
||||
* fpstate_active == 0
|
||||
*
|
||||
* # task uses the FPU and regs are active:
|
||||
* fpstate_active == 1 && fpregs_active == 1
|
||||
*
|
||||
* # the regs are inactive but still match fpstate:
|
||||
* fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
|
||||
*
|
||||
* The third state is what we use for the lazy restore optimization
|
||||
* on lazy-switching CPUs.
|
||||
*/
|
||||
unsigned char fpregs_active;
|
||||
|
||||
/*
|
||||
* @counter:
|
||||
*
|
||||
* This counter contains the number of consecutive context switches
|
||||
* during which the FPU stays used. If this is over a threshold, the
|
||||
* lazy fpu saving logic becomes unlazy, to save the trap overhead.
|
||||
* lazy FPU restore logic becomes eager, to save the trap overhead.
|
||||
* This is an unsigned char so that after 256 iterations the counter
|
||||
* wraps and the context switch behavior turns lazy again; this is to
|
||||
* deal with bursty apps that only use the FPU for a short time:
|
||||
*/
|
||||
unsigned char counter;
|
||||
/*
|
||||
* This flag indicates whether this context is fpstate_active: if the task is
|
||||
* not running then we can restore from this context, if the task
|
||||
* is running then we should save into this context.
|
||||
*/
|
||||
unsigned char fpstate_active;
|
||||
};
|
||||
|
||||
#endif /* _ASM_X86_FPU_H */
|
||||
|
|
|
@ -227,10 +227,8 @@ EXPORT_SYMBOL_GPL(fpstate_init);
|
|||
/*
|
||||
* Copy the current task's FPU state to a new task's FPU context.
|
||||
*
|
||||
* In the 'eager' case we just save to the destination context.
|
||||
*
|
||||
* In the 'lazy' case we save to the source context, mark the FPU lazy
|
||||
* via stts() and copy the source context into the destination context.
|
||||
* In both the 'eager' and the 'lazy' case we save hardware registers
|
||||
* directly to the destination buffer.
|
||||
*/
|
||||
static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
|
||||
{
|
||||
|
|
|
@ -76,10 +76,11 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
|
|||
EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
|
||||
|
||||
/*
|
||||
* When executing XSAVEOPT (optimized XSAVE), if a processor implementation
|
||||
* detects that an FPU state component is still (or is again) in its
|
||||
* initialized state, it may clear the corresponding bit in the header.xfeatures
|
||||
* field, and can skip the writeout of registers to the corresponding memory layout.
|
||||
* When executing XSAVEOPT (or other optimized XSAVE instructions), if
|
||||
* a processor implementation detects that an FPU state component is still
|
||||
* (or is again) in its initialized state, it may clear the corresponding
|
||||
* bit in the header.xfeatures field, and can skip the writeout of registers
|
||||
* to the corresponding memory layout.
|
||||
*
|
||||
* This means that when the bit is zero, the state component might still contain
|
||||
* some previous - non-initialized register state.
|
||||
|
|
Loading…
Reference in New Issue