linux/arch/powerpc/kernel/asm-offsets.c

511 lines
24 KiB
C
Raw Normal View History

/*
* This program is used to generate definitions needed by
* assembly language modules.
*
* We use the technique used in the OSF Mach kernel code:
* generate asm statements containing #defines,
* compile this file to assembler, and then extract the
* #defines from the assembly-language output.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/suspend.h>
#include <linux/hrtimer.h>
#ifdef CONFIG_PPC64
#include <linux/time.h>
#include <linux/hardirq.h>
#endif
#include <linux/kbuild.h>
#include <asm/io.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/rtas.h>
#include <asm/vdso_datapage.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#include <asm/lppaca.h>
#include <asm/cache.h>
#include <asm/compat.h>
#include <asm/mmu.h>
#include <asm/hvcall.h>
#endif
#ifdef CONFIG_PPC_ISERIES
#include <asm/iseries/alpaca.h>
#endif
#if defined(CONFIG_KVM) || defined(CONFIG_KVM_GUEST)
#include <linux/kvm_host.h>
#endif
#if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S)
#include <asm/kvm_book3s.h>
#endif
#ifdef CONFIG_PPC32
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
#include "head_booke.h"
#endif
#endif
#if defined(CONFIG_PPC_FSL_BOOK3E)
#include "../mm/mmu_decl.h"
#endif
int main(void)
{
DEFINE(THREAD, offsetof(struct task_struct, thread));
DEFINE(MM, offsetof(struct task_struct, mm));
DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id));
#ifdef CONFIG_PPC64
DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context));
powerpc: Allow perf_counters to access user memory at interrupt time This provides a mechanism to allow the perf_counters code to access user memory in a PMU interrupt routine. Such an access can cause various kinds of interrupt: SLB miss, MMU hash table miss, segment table miss, or TLB miss, depending on the processor. This commit only deals with 64-bit classic/server processors, which use an MMU hash table. 32-bit processors are already able to access user memory at interrupt time. Since we don't soft-disable on 32-bit, we avoid the possibility of reentering hash_page or the TLB miss handlers, since they run with interrupts disabled. On 64-bit processors, an SLB miss interrupt on a user address will update the slb_cache and slb_cache_ptr fields in the paca. This is OK except in the case where a PMU interrupt occurs in switch_slb, which also accesses those fields. To prevent this, we hard-disable interrupts in switch_slb. Interrupts are already soft-disabled at this point, and will get hard-enabled when they get soft-enabled later. This also reworks slb_flush_and_rebolt: to avoid hard-disabling twice, and to make sure that it clears the slb_cache_ptr when called from other callers than switch_slb, the existing routine is renamed to __slb_flush_and_rebolt, which is called by switch_slb and the new version of slb_flush_and_rebolt. Similarly, switch_stab (used on POWER3 and RS64 processors) gets a hard_irq_disable() to protect the per-cpu variables used there and in ste_allocate. If a MMU hashtable miss interrupt occurs, normally we would call hash_page to look up the Linux PTE for the address and create a HPTE. However, hash_page is fairly complex and takes some locks, so to avoid the possibility of deadlock, we check the preemption count to see if we are in a (pseudo-)NMI handler, and if so, we don't call hash_page but instead treat it like a bad access that will get reported up through the exception table mechanism. An interrupt whose handler runs even though the interrupt occurred when soft-disabled (such as the PMU interrupt) is considered a pseudo-NMI handler, which should use nmi_enter()/nmi_exit() rather than irq_enter()/irq_exit(). Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2009-08-17 13:17:54 +08:00
DEFINE(SIGSEGV, SIGSEGV);
DEFINE(NMI_MASK, NMI_MASK);
#else
rename thread_info to stack This finally renames the thread_info field in task structure to stack, so that the assumptions about this field are gone and archs have more freedom about placing the thread_info structure. Nonbroken archs which have a proper thread pointer can do the access to both current thread and task structure via a single pointer. It'll allow for a few more cleanups of the fork code, from which e.g. ia64 could benefit. Signed-off-by: Roman Zippel <zippel@linux-m68k.org> [akpm@linux-foundation.org: build fix] Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Russell King <rmk@arm.linux.org.uk> Cc: Ian Molton <spyro@f2s.com> Cc: Haavard Skinnemoen <hskinnemoen@atmel.com> Cc: Mikael Starvik <starvik@axis.com> Cc: David Howells <dhowells@redhat.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: "Luck, Tony" <tony.luck@intel.com> Cc: Hirokazu Takata <takata@linux-m32r.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Roman Zippel <zippel@linux-m68k.org> Cc: Greg Ungerer <gerg@uclinux.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp> Cc: Richard Curnow <rc@rc0.org.uk> Cc: William Lee Irwin III <wli@holomorphy.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Jeff Dike <jdike@addtoit.com> Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> Cc: Miles Bader <uclinux-v850@lsi.nec.co.jp> Cc: Andi Kleen <ak@muc.de> Cc: Chris Zankel <chris@zankel.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-09 17:35:17 +08:00
DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
#endif /* CONFIG_PPC64 */
DEFINE(KSP, offsetof(struct thread_struct, ksp));
DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode));
DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0]));
DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr));
#ifdef CONFIG_ALTIVEC
DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0]));
DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr));
DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
#endif /* CONFIG_ALTIVEC */
powerpc: Introduce VSX thread_struct and CONFIG_VSX The layout of the new VSR registers and how they overlap on top of the legacy FPR and VR registers is: VSR doubleword 0 VSR doubleword 1 ---------------------------------------------------------------- VSR[0] | FPR[0] | | ---------------------------------------------------------------- VSR[1] | FPR[1] | | ---------------------------------------------------------------- | ... | | | ... | | ---------------------------------------------------------------- VSR[30] | FPR[30] | | ---------------------------------------------------------------- VSR[31] | FPR[31] | | ---------------------------------------------------------------- VSR[32] | VR[0] | ---------------------------------------------------------------- VSR[33] | VR[1] | ---------------------------------------------------------------- | ... | | ... | ---------------------------------------------------------------- VSR[62] | VR[30] | ---------------------------------------------------------------- VSR[63] | VR[31] | ---------------------------------------------------------------- VSX has 64 128bit registers. The first 32 regs overlap with the FP registers and hence extend them with and additional 64 bits. The second 32 regs overlap with the VMX registers. This commit introduces the thread_struct changes required to reflect this register layout. Ptrace and signals code is updated so that the floating point registers are correctly accessed from the thread_struct when CONFIG_VSX is enabled. Signed-off-by: Michael Neuling <mikey@neuling.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-06-25 12:07:18 +08:00
#ifdef CONFIG_VSX
DEFINE(THREAD_VSR0, offsetof(struct thread_struct, fpr));
DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr));
#endif /* CONFIG_VSX */
#ifdef CONFIG_PPC64
DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid));
#else /* CONFIG_PPC64 */
DEFINE(PGDIR, offsetof(struct thread_struct, pgdir));
#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0));
#endif
#ifdef CONFIG_SPE
DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0]));
DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc));
DEFINE(THREAD_SPEFSCR, offsetof(struct thread_struct, spefscr));
DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe));
#endif /* CONFIG_SPE */
#endif /* CONFIG_PPC64 */
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
#endif
DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
DEFINE(TI_TASK, offsetof(struct thread_info, task));
DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
#ifdef CONFIG_PPC64
DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size));
DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_dline_size));
DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, dlines_per_page));
DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size));
DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size));
DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page));
/* paca */
DEFINE(PACA_SIZE, sizeof(struct paca_struct));
DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index));
DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start));
DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack));
DEFINE(PACACURRENT, offsetof(struct paca_struct, __current));
DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr));
DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr));
DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1));
DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc));
powerpc: Make it possible to move the interrupt handlers away from the kernel This changes the way that the exception prologs transfer control to the handlers in 64-bit kernels with the aim of making it possible to have the prologs separate from the main body of the kernel. Now, instead of computing the address of the handler by taking the top 32 bits of the paca address (to get the 0xc0000000........ part) and ORing in something in the bottom 16 bits, we get the base address of the kernel by doing a load from the paca and add an offset. This also replaces an mfmsr and an ori to compute the MSR value for the handler with a load from the paca. That makes it unnecessary to have a separate version of EXCEPTION_PROLOG_PSERIES that forces 64-bit mode. We can no longer use a direct branches in the exception prolog code, which means that the SLB miss handlers can't branch directly to .slb_miss_realmode any more. Instead we have to compute the address and do an indirect branch. This is conditional on CONFIG_RELOCATABLE; for non-relocatable kernels we use a direct branch as before. (A later change will allow CONFIG_RELOCATABLE to be set on 64-bit powerpc.) Since the secondary CPUs on pSeries start execution in the first 0x100 bytes of real memory and then have to get to wherever the kernel is, we can't use a direct branch to get there. Instead this changes __secondary_hold_spinloop from a flag to a function pointer. When it is set to a non-NULL value, the secondary CPUs jump to the function pointed to by that value. Finally this eliminates one code difference between 32-bit and 64-bit by making __secondary_hold be the text address of the secondary CPU spinloop rather than a function descriptor for it. Signed-off-by: Paul Mackerras <paulus@samba.org>
2008-08-30 09:40:24 +08:00
DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase));
DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
[POWERPC] Introduce address space "slices" The basic issue is to be able to do what hugetlbfs does but with different page sizes for some other special filesystems; more specifically, my need is: - Huge pages - SPE local store mappings using 64K pages on a 4K base page size kernel on Cell - Some special 4K segments in 64K-page kernels for mapping a dodgy type of powerpc-specific infiniband hardware that requires 4K MMU mappings for various reasons I won't explain here. The main issues are: - To maintain/keep track of the page size per "segment" (as we can only have one page size per segment on powerpc, which are 256MB divisions of the address space). - To make sure special mappings stay within their allotted "segments" (including MAP_FIXED crap) - To make sure everybody else doesn't mmap/brk/grow_stack into a "segment" that is used for a special mapping Some of the necessary mechanisms to handle that were present in the hugetlbfs code, but mostly in ways not suitable for anything else. The patch relies on some changes to the generic get_unmapped_area() that just got merged. It still hijacks hugetlb callbacks here or there as the generic code hasn't been entirely cleaned up yet but that shouldn't be a problem. So what is a slice ? Well, I re-used the mechanism used formerly by our hugetlbfs implementation which divides the address space in "meta-segments" which I called "slices". The division is done using 256MB slices below 4G, and 1T slices above. Thus the address space is divided currently into 16 "low" slices and 16 "high" slices. (Special case: high slice 0 is the area between 4G and 1T). Doing so simplifies significantly the tracking of segments and avoids having to keep track of all the 256MB segments in the address space. While I used the "concepts" of hugetlbfs, I mostly re-implemented everything in a more generic way and "ported" hugetlbfs to it. Slices can have an associated page size, which is encoded in the mmu context and used by the SLB miss handler to set the segment sizes. The hash code currently doesn't care, it has a specific check for hugepages, though I might add a mechanism to provide per-slice hash mapping functions in the future. The slice code provide a pair of "generic" get_unmapped_area() (bottomup and topdown) functions that should work with any slice size. There is some trickiness here so I would appreciate people to have a look at the implementation of these and let me know if I got something wrong. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-05-08 14:27:27 +08:00
#ifdef CONFIG_PPC_MM_SLICES
DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
context.low_slices_psize));
DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct,
context.high_slices_psize));
DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
#endif /* CONFIG_PPC_MM_SLICES */
#ifdef CONFIG_PPC_BOOK3E
DEFINE(PACAPGD, offsetof(struct paca_struct, pgd));
DEFINE(PACA_KERNELPGD, offsetof(struct paca_struct, kernel_pgd));
DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
DEFINE(PACA_EXTLB, offsetof(struct paca_struct, extlb));
DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
DEFINE(PACA_EXCRIT, offsetof(struct paca_struct, excrit));
DEFINE(PACA_EXDBG, offsetof(struct paca_struct, exdbg));
DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack));
DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack));
DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack));
#endif /* CONFIG_PPC_BOOK3E */
#ifdef CONFIG_PPC_STD_MMU_64
DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real));
DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr));
DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp));
#ifdef CONFIG_PPC_MM_SLICES
[POWERPC] Introduce address space "slices" The basic issue is to be able to do what hugetlbfs does but with different page sizes for some other special filesystems; more specifically, my need is: - Huge pages - SPE local store mappings using 64K pages on a 4K base page size kernel on Cell - Some special 4K segments in 64K-page kernels for mapping a dodgy type of powerpc-specific infiniband hardware that requires 4K MMU mappings for various reasons I won't explain here. The main issues are: - To maintain/keep track of the page size per "segment" (as we can only have one page size per segment on powerpc, which are 256MB divisions of the address space). - To make sure special mappings stay within their allotted "segments" (including MAP_FIXED crap) - To make sure everybody else doesn't mmap/brk/grow_stack into a "segment" that is used for a special mapping Some of the necessary mechanisms to handle that were present in the hugetlbfs code, but mostly in ways not suitable for anything else. The patch relies on some changes to the generic get_unmapped_area() that just got merged. It still hijacks hugetlb callbacks here or there as the generic code hasn't been entirely cleaned up yet but that shouldn't be a problem. So what is a slice ? Well, I re-used the mechanism used formerly by our hugetlbfs implementation which divides the address space in "meta-segments" which I called "slices". The division is done using 256MB slices below 4G, and 1T slices above. Thus the address space is divided currently into 16 "low" slices and 16 "high" slices. (Special case: high slice 0 is the area between 4G and 1T). Doing so simplifies significantly the tracking of segments and avoids having to keep track of all the 256MB segments in the address space. While I used the "concepts" of hugetlbfs, I mostly re-implemented everything in a more generic way and "ported" hugetlbfs to it. Slices can have an associated page size, which is encoded in the mmu context and used by the SLB miss handler to set the segment sizes. The hash code currently doesn't care, it has a specific check for hugepages, though I might add a mechanism to provide per-slice hash mapping functions in the future. The slice code provide a pair of "generic" get_unmapped_area() (bottomup and topdown) functions that should work with any slice size. There is some trickiness here so I would appreciate people to have a look at the implementation of these and let me know if I got something wrong. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2007-05-08 14:27:27 +08:00
DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp));
#else
DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp));
#endif /* CONFIG_PPC_MM_SLICES */
DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb));
DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr));
DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr));
DEFINE(SLBSHADOW_STACKVSID,
offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid));
DEFINE(SLBSHADOW_STACKESID,
offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid));
powerpc: Account time using timebase rather than PURR Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the PURR register for measuring the user and system time used by processes, as well as other related times such as hardirq and softirq times. This turns out to be quite confusing for users because it means that a program will often be measured as taking less time when run on a multi-threaded processor (SMT2 or SMT4 mode) than it does when run on a single-threaded processor (ST mode), even though the program takes longer to finish. The discrepancy is accounted for as stolen time, which is also confusing, particularly when there are no other partitions running. This changes the accounting to use the timebase instead, meaning that the reported user and system times are the actual number of real-time seconds that the program was executing on the processor thread, regardless of which SMT mode the processor is in. Thus a program will generally show greater user and system times when run on a multi-threaded processor than on a single-threaded processor. On pSeries systems on POWER5 or later processors, we measure the stolen time (time when this partition wasn't running) using the hypervisor dispatch trace log. We check for new entries in the log on every entry from user mode and on every transition from kernel process context to soft or hard IRQ context (i.e. when account_system_vtime() gets called). So that we can correctly distinguish time stolen from user time and time stolen from system time, without having to check the log on every exit to user mode, we store separate timestamps for exit to user mode and entry from user mode. On systems that have a SPURR (POWER6 and POWER7), we read the SPURR in account_system_vtime() (as before), and then apportion the SPURR ticks since the last time we read it between scaled user time and scaled system time according to the relative proportions of user time and system time over the same interval. This avoids having to read the SPURR on every kernel entry and exit. On systems that have PURR but not SPURR (i.e., POWER5), we do the same using the PURR rather than the SPURR. This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl for now since it conflicts with the use of the dispatch trace log by the time accounting code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-27 03:56:43 +08:00
DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area));
DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int));
DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int));
powerpc: Account time using timebase rather than PURR Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the PURR register for measuring the user and system time used by processes, as well as other related times such as hardirq and softirq times. This turns out to be quite confusing for users because it means that a program will often be measured as taking less time when run on a multi-threaded processor (SMT2 or SMT4 mode) than it does when run on a single-threaded processor (ST mode), even though the program takes longer to finish. The discrepancy is accounted for as stolen time, which is also confusing, particularly when there are no other partitions running. This changes the accounting to use the timebase instead, meaning that the reported user and system times are the actual number of real-time seconds that the program was executing on the processor thread, regardless of which SMT mode the processor is in. Thus a program will generally show greater user and system times when run on a multi-threaded processor than on a single-threaded processor. On pSeries systems on POWER5 or later processors, we measure the stolen time (time when this partition wasn't running) using the hypervisor dispatch trace log. We check for new entries in the log on every entry from user mode and on every transition from kernel process context to soft or hard IRQ context (i.e. when account_system_vtime() gets called). So that we can correctly distinguish time stolen from user time and time stolen from system time, without having to check the log on every exit to user mode, we store separate timestamps for exit to user mode and entry from user mode. On systems that have a SPURR (POWER6 and POWER7), we read the SPURR in account_system_vtime() (as before), and then apportion the SPURR ticks since the last time we read it between scaled user time and scaled system time according to the relative proportions of user time and system time over the same interval. This avoids having to read the SPURR on every kernel entry and exit. On systems that have PURR but not SPURR (i.e., POWER5), we do the same using the PURR rather than the SPURR. This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl for now since it conflicts with the use of the dispatch trace log by the time accounting code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-27 03:56:43 +08:00
DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx));
DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx));
#endif /* CONFIG_PPC_STD_MMU_64 */
DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
powerpc: Account time using timebase rather than PURR Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the PURR register for measuring the user and system time used by processes, as well as other related times such as hardirq and softirq times. This turns out to be quite confusing for users because it means that a program will often be measured as taking less time when run on a multi-threaded processor (SMT2 or SMT4 mode) than it does when run on a single-threaded processor (ST mode), even though the program takes longer to finish. The discrepancy is accounted for as stolen time, which is also confusing, particularly when there are no other partitions running. This changes the accounting to use the timebase instead, meaning that the reported user and system times are the actual number of real-time seconds that the program was executing on the processor thread, regardless of which SMT mode the processor is in. Thus a program will generally show greater user and system times when run on a multi-threaded processor than on a single-threaded processor. On pSeries systems on POWER5 or later processors, we measure the stolen time (time when this partition wasn't running) using the hypervisor dispatch trace log. We check for new entries in the log on every entry from user mode and on every transition from kernel process context to soft or hard IRQ context (i.e. when account_system_vtime() gets called). So that we can correctly distinguish time stolen from user time and time stolen from system time, without having to check the log on every exit to user mode, we store separate timestamps for exit to user mode and entry from user mode. On systems that have a SPURR (POWER6 and POWER7), we read the SPURR in account_system_vtime() (as before), and then apportion the SPURR ticks since the last time we read it between scaled user time and scaled system time according to the relative proportions of user time and system time over the same interval. This avoids having to read the SPURR on every kernel entry and exit. On systems that have PURR but not SPURR (i.e., POWER5), we do the same using the PURR rather than the SPURR. This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl for now since it conflicts with the use of the dispatch trace log by the time accounting code. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-08-27 03:56:43 +08:00
DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user));
DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
DEFINE(PACA_KVM_SVCPU, offsetof(struct paca_struct, shadow_vcpu));
DEFINE(SVCPU_SLB, offsetof(struct kvmppc_book3s_shadow_vcpu, slb));
DEFINE(SVCPU_SLB_MAX, offsetof(struct kvmppc_book3s_shadow_vcpu, slb_max));
#endif
#endif /* CONFIG_PPC64 */
/* RTAS */
DEFINE(RTASBASE, offsetof(struct rtas_t, base));
DEFINE(RTASENTRY, offsetof(struct rtas_t, entry));
/* Interrupt register frame */
DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
#ifdef CONFIG_PPC64
/* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
/* hcall statistics */
DEFINE(HCALL_STAT_SIZE, sizeof(struct hcall_stats));
DEFINE(HCALL_STAT_CALLS, offsetof(struct hcall_stats, num_calls));
DEFINE(HCALL_STAT_TB, offsetof(struct hcall_stats, tb_total));
DEFINE(HCALL_STAT_PURR, offsetof(struct hcall_stats, purr_total));
#endif /* CONFIG_PPC64 */
DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0]));
DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1]));
DEFINE(GPR2, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[2]));
DEFINE(GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[3]));
DEFINE(GPR4, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[4]));
DEFINE(GPR5, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[5]));
DEFINE(GPR6, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[6]));
DEFINE(GPR7, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[7]));
DEFINE(GPR8, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[8]));
DEFINE(GPR9, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[9]));
DEFINE(GPR10, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[10]));
DEFINE(GPR11, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[11]));
DEFINE(GPR12, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[12]));
DEFINE(GPR13, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[13]));
#ifndef CONFIG_PPC64
DEFINE(GPR14, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[14]));
DEFINE(GPR15, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[15]));
DEFINE(GPR16, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[16]));
DEFINE(GPR17, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[17]));
DEFINE(GPR18, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[18]));
DEFINE(GPR19, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[19]));
DEFINE(GPR20, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[20]));
DEFINE(GPR21, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[21]));
DEFINE(GPR22, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[22]));
DEFINE(GPR23, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[23]));
DEFINE(GPR24, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[24]));
DEFINE(GPR25, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[25]));
DEFINE(GPR26, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[26]));
DEFINE(GPR27, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[27]));
DEFINE(GPR28, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[28]));
DEFINE(GPR29, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[29]));
DEFINE(GPR30, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[30]));
DEFINE(GPR31, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[31]));
#endif /* CONFIG_PPC64 */
/*
* Note: these symbols include _ because they overlap with special
* register names
*/
DEFINE(_NIP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, nip));
DEFINE(_MSR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, msr));
DEFINE(_CTR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ctr));
DEFINE(_LINK, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, link));
DEFINE(_CCR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ccr));
DEFINE(_XER, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, xer));
DEFINE(_DAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar));
DEFINE(_DSISR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr));
DEFINE(ORIG_GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, orig_gpr3));
DEFINE(RESULT, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, result));
DEFINE(_TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap));
#ifndef CONFIG_PPC64
DEFINE(_MQ, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, mq));
/*
* The PowerPC 400-class & Book-E processors have neither the DAR
* nor the DSISR SPRs. Hence, we overload them to hold the similar
* DEAR and ESR SPRs for such processors. For critical interrupts
* we use them to hold SRR0 and SRR1.
*/
DEFINE(_DEAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar));
DEFINE(_ESR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr));
#else /* CONFIG_PPC64 */
DEFINE(SOFTE, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, softe));
/* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */
DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs));
DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8);
#endif /* CONFIG_PPC64 */
#if defined(CONFIG_PPC32)
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE);
DEFINE(MAS0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
/* we overload MMUCR for 44x on MAS0 since they are mutually exclusive */
DEFINE(MMUCR, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
DEFINE(MAS1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas1));
DEFINE(MAS2, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas2));
DEFINE(MAS3, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas3));
DEFINE(MAS6, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas6));
DEFINE(MAS7, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas7));
DEFINE(_SRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr0));
DEFINE(_SRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr1));
DEFINE(_CSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr0));
DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1));
DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0));
DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1));
DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit));
#endif
#endif
DEFINE(CLONE_VM, CLONE_VM);
DEFINE(CLONE_UNTRACED, CLONE_UNTRACED);
#ifndef CONFIG_PPC64
DEFINE(MM_PGD, offsetof(struct mm_struct, pgd));
#endif /* ! CONFIG_PPC64 */
/* About the CPU features table */
DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features));
DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup));
DEFINE(CPU_SPEC_RESTORE, offsetof(struct cpu_spec, cpu_restore));
DEFINE(pbe_address, offsetof(struct pbe, address));
DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
DEFINE(pbe_next, offsetof(struct pbe, next));
#ifndef CONFIG_PPC64
DEFINE(TASK_SIZE, TASK_SIZE);
DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28);
#endif /* ! CONFIG_PPC64 */
/* datapage offsets for use by vdso */
DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct vdso_data, tb_orig_stamp));
DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct vdso_data, tb_ticks_per_sec));
DEFINE(CFG_TB_TO_XS, offsetof(struct vdso_data, tb_to_xs));
DEFINE(CFG_STAMP_XSEC, offsetof(struct vdso_data, stamp_xsec));
DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct vdso_data, tb_update_count));
DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct vdso_data, tz_minuteswest));
DEFINE(CFG_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32));
DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec));
DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
DEFINE(STAMP_XTIME, offsetof(struct vdso_data, stamp_xtime));
powerpc: Rework VDSO gettimeofday to prevent time going backwards Currently it is possible for userspace to see the result of gettimeofday() going backwards by 1 microsecond, assuming that userspace is using the gettimeofday() in the VDSO. The VDSO gettimeofday() algorithm computes the time in "xsecs", which are units of 2^-20 seconds, or approximately 0.954 microseconds, using the algorithm now = (timebase - tb_orig_stamp) * tb_to_xs + stamp_xsec and then converts the time in xsecs to seconds and microseconds. The kernel updates the tb_orig_stamp and stamp_xsec values every tick in update_vsyscall(). If the length of the tick is not an integer number of xsecs, then some precision is lost in converting the current time to xsecs. For example, with CONFIG_HZ=1000, the tick is 1ms long, which is 1048.576 xsecs. That means that stamp_xsec will advance by either 1048 or 1049 on each tick. With the right conditions, it is possible for userspace to get (timebase - tb_orig_stamp) * tb_to_xs being 1049 if the kernel is slightly late in updating the vdso_datapage, and then for stamp_xsec to advance by 1048 when the kernel does update it, and for userspace to then see (timebase - tb_orig_stamp) * tb_to_xs being zero due to integer truncation. The result is that time appears to go backwards by 1 microsecond. To fix this we change the VDSO gettimeofday to use a new field in the VDSO datapage which stores the nanoseconds part of the time as a fractional number of seconds in a 0.32 binary fraction format. (Or put another way, as a 32-bit number in units of 0.23283 ns.) This is convenient because we can use the mulhwu instruction to convert it to either microseconds or nanoseconds. Since it turns out that computing the time of day using this new field is simpler than either using stamp_xsec (as gettimeofday does) or stamp_xtime.tv_nsec (as clock_gettime does), this converts both gettimeofday and clock_gettime to use the new field. The existing __do_get_tspec function is converted to use the new field and take a parameter in r7 that indicates the desired resolution, 1,000,000 for microseconds or 1,000,000,000 for nanoseconds. The __do_get_xsec function is then unused and is deleted. The new algorithm is now = ((timebase - tb_orig_stamp) << 12) * tb_to_xs + (stamp_xtime_seconds << 32) + stamp_sec_fraction with 'now' in units of 2^-32 seconds. That is then converted to seconds and either microseconds or nanoseconds with seconds = now >> 32 partseconds = ((now & 0xffffffff) * resolution) >> 32 The 32-bit VDSO code also makes a further simplification: it ignores the bottom 32 bits of the tb_to_xs value, which is a 0.64 format binary fraction. Doing so gets rid of 4 multiply instructions. Assuming a timebase frequency of 1GHz or less and an update interval of no more than 10ms, the upper 32 bits of tb_to_xs will be at least 4503599, so the error from ignoring the low 32 bits will be at most 2.2ns, which is more than an order of magnitude less than the time taken to do gettimeofday or clock_gettime on our fastest processors, so there is no possibility of seeing inconsistent values due to this. This also moves update_gtod() down next to its only caller, and makes update_vsyscall use the time passed in via the wall_time argument rather than accessing xtime directly. At present, wall_time always points to xtime, but that could change in future. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2010-06-21 03:03:08 +08:00
DEFINE(STAMP_SEC_FRAC, offsetof(struct vdso_data, stamp_sec_fraction));
DEFINE(CFG_ICACHE_BLOCKSZ, offsetof(struct vdso_data, icache_block_size));
DEFINE(CFG_DCACHE_BLOCKSZ, offsetof(struct vdso_data, dcache_block_size));
DEFINE(CFG_ICACHE_LOGBLOCKSZ, offsetof(struct vdso_data, icache_log_block_size));
DEFINE(CFG_DCACHE_LOGBLOCKSZ, offsetof(struct vdso_data, dcache_log_block_size));
#ifdef CONFIG_PPC64
DEFINE(CFG_SYSCALL_MAP64, offsetof(struct vdso_data, syscall_map_64));
DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec));
DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec));
DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec));
DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec));
DEFINE(TSPC64_TV_SEC, offsetof(struct timespec, tv_sec));
DEFINE(TSPC64_TV_NSEC, offsetof(struct timespec, tv_nsec));
DEFINE(TSPC32_TV_SEC, offsetof(struct compat_timespec, tv_sec));
DEFINE(TSPC32_TV_NSEC, offsetof(struct compat_timespec, tv_nsec));
#else
DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec));
DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec));
DEFINE(TSPC32_TV_SEC, offsetof(struct timespec, tv_sec));
DEFINE(TSPC32_TV_NSEC, offsetof(struct timespec, tv_nsec));
#endif
/* timeval/timezone offsets for use by vdso */
DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
/* Other bits used by the vdso */
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
#ifdef CONFIG_BUG
DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
#endif
#ifdef CONFIG_PPC_ISERIES
/* the assembler miscalculates the VSID values */
DEFINE(PAGE_OFFSET_ESID, GET_ESID(PAGE_OFFSET));
DEFINE(PAGE_OFFSET_VSID, KERNEL_VSID(PAGE_OFFSET));
DEFINE(VMALLOC_START_ESID, GET_ESID(VMALLOC_START));
DEFINE(VMALLOC_START_VSID, KERNEL_VSID(VMALLOC_START));
/* alpaca */
DEFINE(ALPACA_SIZE, sizeof(struct alpaca));
#endif
DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
DEFINE(PTE_SIZE, sizeof(pte_t));
#ifdef CONFIG_KVM
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7));
DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid));
DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
/* book3s */
#ifdef CONFIG_PPC_BOOK3S
DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip));
DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr));
DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem));
DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter));
DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler));
DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall));
DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
offsetof(struct kvmppc_vcpu_book3s, vcpu));
DEFINE(SVCPU_CR, offsetof(struct kvmppc_book3s_shadow_vcpu, cr));
DEFINE(SVCPU_XER, offsetof(struct kvmppc_book3s_shadow_vcpu, xer));
DEFINE(SVCPU_CTR, offsetof(struct kvmppc_book3s_shadow_vcpu, ctr));
DEFINE(SVCPU_LR, offsetof(struct kvmppc_book3s_shadow_vcpu, lr));
DEFINE(SVCPU_PC, offsetof(struct kvmppc_book3s_shadow_vcpu, pc));
DEFINE(SVCPU_R0, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[0]));
DEFINE(SVCPU_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[1]));
DEFINE(SVCPU_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[2]));
DEFINE(SVCPU_R3, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[3]));
DEFINE(SVCPU_R4, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[4]));
DEFINE(SVCPU_R5, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[5]));
DEFINE(SVCPU_R6, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[6]));
DEFINE(SVCPU_R7, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[7]));
DEFINE(SVCPU_R8, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[8]));
DEFINE(SVCPU_R9, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[9]));
DEFINE(SVCPU_R10, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[10]));
DEFINE(SVCPU_R11, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[11]));
DEFINE(SVCPU_R12, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[12]));
DEFINE(SVCPU_R13, offsetof(struct kvmppc_book3s_shadow_vcpu, gpr[13]));
DEFINE(SVCPU_HOST_R1, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r1));
DEFINE(SVCPU_HOST_R2, offsetof(struct kvmppc_book3s_shadow_vcpu, host_r2));
DEFINE(SVCPU_VMHANDLER, offsetof(struct kvmppc_book3s_shadow_vcpu,
vmhandler));
DEFINE(SVCPU_SCRATCH0, offsetof(struct kvmppc_book3s_shadow_vcpu,
scratch0));
DEFINE(SVCPU_SCRATCH1, offsetof(struct kvmppc_book3s_shadow_vcpu,
scratch1));
DEFINE(SVCPU_IN_GUEST, offsetof(struct kvmppc_book3s_shadow_vcpu,
in_guest));
DEFINE(SVCPU_FAULT_DSISR, offsetof(struct kvmppc_book3s_shadow_vcpu,
fault_dsisr));
DEFINE(SVCPU_FAULT_DAR, offsetof(struct kvmppc_book3s_shadow_vcpu,
fault_dar));
DEFINE(SVCPU_LAST_INST, offsetof(struct kvmppc_book3s_shadow_vcpu,
last_inst));
DEFINE(SVCPU_SHADOW_SRR1, offsetof(struct kvmppc_book3s_shadow_vcpu,
shadow_srr1));
#ifdef CONFIG_PPC_BOOK3S_32
DEFINE(SVCPU_SR, offsetof(struct kvmppc_book3s_shadow_vcpu, sr));
#endif
#else
DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
#endif /* CONFIG_PPC_BOOK3S */
#endif
#ifdef CONFIG_KVM_GUEST
DEFINE(KVM_MAGIC_SCRATCH1, offsetof(struct kvm_vcpu_arch_shared,
scratch1));
DEFINE(KVM_MAGIC_SCRATCH2, offsetof(struct kvm_vcpu_arch_shared,
scratch2));
DEFINE(KVM_MAGIC_SCRATCH3, offsetof(struct kvm_vcpu_arch_shared,
scratch3));
DEFINE(KVM_MAGIC_INT, offsetof(struct kvm_vcpu_arch_shared,
int_pending));
DEFINE(KVM_MAGIC_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
DEFINE(KVM_MAGIC_CRITICAL, offsetof(struct kvm_vcpu_arch_shared,
critical));
DEFINE(KVM_MAGIC_SR, offsetof(struct kvm_vcpu_arch_shared, sr));
#endif
#ifdef CONFIG_44x
DEFINE(PGD_T_LOG2, PGD_T_LOG2);
DEFINE(PTE_T_LOG2, PTE_T_LOG2);
#endif
#ifdef CONFIG_PPC_FSL_BOOK3E
DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam));
DEFINE(TLBCAM_MAS0, offsetof(struct tlbcam, MAS0));
DEFINE(TLBCAM_MAS1, offsetof(struct tlbcam, MAS1));
DEFINE(TLBCAM_MAS2, offsetof(struct tlbcam, MAS2));
DEFINE(TLBCAM_MAS3, offsetof(struct tlbcam, MAS3));
DEFINE(TLBCAM_MAS7, offsetof(struct tlbcam, MAS7));
#endif
#ifdef CONFIG_KVM_EXIT_TIMING
DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
arch.timing_exit.tv32.tbu));
DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu,
arch.timing_exit.tv32.tbl));
DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu,
arch.timing_last_enter.tv32.tbu));
DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu,
arch.timing_last_enter.tv32.tbl));
#endif
return 0;
}