powerpc fixes for 4.13 #3

A handful of fixes, mostly for new code.
 
 Some reworking of the new STRICT_KERNEL_RWX support to make sure we also remove
 executable permission from __init memory before it's freed.
 
 A fix to some recent optimisations to the hypercall entry where we were
 clobbering r12, this was breaking nested guests (PR KVM).
 
 A fix for the recent patch to opal_configure_cores(). This could break booting
 on bare metal Power8 boxes if the kernel was built without
 CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG.
 
 And finally a workaround for spurious PMU interrupts on Power9 DD2.
 
 Thanks to:
   Nicholas Piggin, Anton Blanchard, Balbir Singh.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJZcctRAAoJEFHr6jzI4aWAji8P/RaG3/vGmhP4HGsAh7jHx/3v
 EIBsjXPozQ16AIs4235JcQ2Vg54LLms6IaFfw1w9oYZxi98sNC3b56Rtd3AGxLbq
 WHEGAD+mE9aXUOXkycLRkKZ+THiNzR0ZQKiMDqU+oJ9XbIAoof2gI4nc7UzUxGnt
 EqxERiL2RrYqRBvO3W2ErtL29yXhopUhv+HKdbuMlIcoH4uooSdn435MPleIJWxl
 WRFFv17DFw7PN6juwlff2YWvbTgrM1ND8czLfINzSZQDy0F+HEbKDFbtHlgAvOFN
 GjbODX9OuU/QRrPQv6MoY2UumZNtLSCUsw5BUg0y4Qaak8p0gAEb4D/gmvIZNp/r
 9ZqJDPfFKh/oA08apLTPJBKQmDUwCaYHyHVJNw10CU9GZ9CzW/2/B2/h2Egpgqao
 vt0TgR3FXYizhXGAEpmBZzadAg9VKZTXH/irN6X62wMxNfvRIDOS57829QFHr0ka
 wbsOK1gI1rE1g1GwsZafyURRSe95Sv84RDHW1fFQAtvFgpA3eHl26LUkpcvwOOFI
 53g9zMedYOyZXXIdBn05QojwxgXZ0ry1Wc93oDHOn3rgDL6XBeOXFfFI5jIwPJDL
 VZhOSvN1v+Ti3Q+I05zH+ll6BMhlU8RKBf+esq419DiLUeaNLHZR6GF0+UVJyJo+
 WcLsJ1x/GDa9pOBfMEJy
 =Oy4/
 -----END PGP SIGNATURE-----

Merge tag 'powerpc-4.13-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:
 "A handful of fixes, mostly for new code:

   - some reworking of the new STRICT_KERNEL_RWX support to make sure we
     also remove executable permission from __init memory before it's
     freed.

   - a fix to some recent optimisations to the hypercall entry where we
     were clobbering r12, this was breaking nested guests (PR KVM).

   - a fix for the recent patch to opal_configure_cores(). This could
     break booting on bare metal Power8 boxes if the kernel was built
     without CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG.

   - .. and finally a workaround for spurious PMU interrupts on Power9
     DD2.

  Thanks to: Nicholas Piggin, Anton Blanchard, Balbir Singh"

* tag 'powerpc-4.13-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/mm: Mark __init memory no-execute when STRICT_KERNEL_RWX=y
  powerpc/mm/hash: Refactor hash__mark_rodata_ro()
  powerpc/mm/radix: Refactor radix__mark_rodata_ro()
  powerpc/64s: Fix hypercall entry clobbering r12 input
  powerpc/perf: Avoid spurious PMU interrupts after idle
  powerpc/powernv: Fix boot on Power8 bare metal due to opal_configure_cores()
This commit is contained in:
Linus Torvalds 2017-07-21 13:54:37 -07:00
commit 10fc95547f
11 changed files with 102 additions and 34 deletions

View File

@ -91,6 +91,7 @@ static inline int hash__pgd_bad(pgd_t pgd)
} }
#ifdef CONFIG_STRICT_KERNEL_RWX #ifdef CONFIG_STRICT_KERNEL_RWX
extern void hash__mark_rodata_ro(void); extern void hash__mark_rodata_ro(void);
extern void hash__mark_initmem_nx(void);
#endif #endif
extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr, extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,

View File

@ -1192,5 +1192,6 @@ static inline const int pud_pfn(pud_t pud)
BUILD_BUG(); BUILD_BUG();
return 0; return 0;
} }
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */ #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */

View File

@ -118,6 +118,7 @@
#ifdef CONFIG_STRICT_KERNEL_RWX #ifdef CONFIG_STRICT_KERNEL_RWX
extern void radix__mark_rodata_ro(void); extern void radix__mark_rodata_ro(void);
extern void radix__mark_initmem_nx(void);
#endif #endif
static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr, static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,

View File

@ -80,6 +80,13 @@ unsigned long vmalloc_to_phys(void *vmalloc_addr);
void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
void pgtable_cache_init(void); void pgtable_cache_init(void);
#ifdef CONFIG_STRICT_KERNEL_RWX
void mark_initmem_nx(void);
#else
static inline void mark_initmem_nx(void) { }
#endif
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_PGTABLE_H */ #endif /* _ASM_POWERPC_PGTABLE_H */

View File

@ -824,7 +824,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
* r3 volatile parameter and return value for status * r3 volatile parameter and return value for status
* r4-r10 volatile input and output value * r4-r10 volatile input and output value
* r11 volatile hypercall number and output value * r11 volatile hypercall number and output value
* r12 volatile * r12 volatile input and output value
* r13-r31 nonvolatile * r13-r31 nonvolatile
* LR nonvolatile * LR nonvolatile
* CTR volatile * CTR volatile
@ -834,25 +834,26 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
* Other registers nonvolatile * Other registers nonvolatile
* *
* The intersection of volatile registers that don't contain possible * The intersection of volatile registers that don't contain possible
* inputs is: r12, cr0, xer, ctr. We may use these as scratch regs * inputs is: cr0, xer, ctr. We may use these as scratch regs upon entry
* upon entry without saving. * without saving, though xer is not a good idea to use, as hardware may
* interpret some bits so it may be costly to change them.
*/ */
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/* /*
* There is a little bit of juggling to get syscall and hcall * There is a little bit of juggling to get syscall and hcall
* working well. Save r10 in ctr to be restored in case it is a * working well. Save r13 in ctr to avoid using SPRG scratch
* hcall. * register.
* *
* Userspace syscalls have already saved the PPR, hcalls must save * Userspace syscalls have already saved the PPR, hcalls must save
* it before setting HMT_MEDIUM. * it before setting HMT_MEDIUM.
*/ */
#define SYSCALL_KVMTEST \ #define SYSCALL_KVMTEST \
mr r12,r13; \ mtctr r13; \
GET_PACA(r13); \ GET_PACA(r13); \
mtctr r10; \ std r10,PACA_EXGEN+EX_R10(r13); \
KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \ KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \
HMT_MEDIUM; \ HMT_MEDIUM; \
mr r9,r12; \ mfctr r9;
#else #else
#define SYSCALL_KVMTEST \ #define SYSCALL_KVMTEST \
@ -935,8 +936,8 @@ EXC_VIRT_END(system_call, 0x4c00, 0x100)
* This is a hcall, so register convention is as above, with these * This is a hcall, so register convention is as above, with these
* differences: * differences:
* r13 = PACA * r13 = PACA
* r12 = orig r13 * ctr = orig r13
* ctr = orig r10 * orig r10 saved in PACA
*/ */
TRAMP_KVM_BEGIN(do_kvm_0xc00) TRAMP_KVM_BEGIN(do_kvm_0xc00)
/* /*
@ -944,14 +945,13 @@ TRAMP_KVM_BEGIN(do_kvm_0xc00)
* HMT_MEDIUM. That allows the KVM code to save that value into the * HMT_MEDIUM. That allows the KVM code to save that value into the
* guest state (it is the guest's PPR value). * guest state (it is the guest's PPR value).
*/ */
OPT_GET_SPR(r0, SPRN_PPR, CPU_FTR_HAS_PPR) OPT_GET_SPR(r10, SPRN_PPR, CPU_FTR_HAS_PPR)
HMT_MEDIUM HMT_MEDIUM
OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r0, CPU_FTR_HAS_PPR) OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r10, CPU_FTR_HAS_PPR)
mfctr r10 mfctr r10
SET_SCRATCH0(r12) SET_SCRATCH0(r10)
std r9,PACA_EXGEN+EX_R9(r13) std r9,PACA_EXGEN+EX_R9(r13)
mfcr r9 mfcr r9
std r10,PACA_EXGEN+EX_R10(r13)
KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
#endif #endif

View File

@ -30,6 +30,7 @@
* Use unused space in the interrupt stack to save and restore * Use unused space in the interrupt stack to save and restore
* registers for winkle support. * registers for winkle support.
*/ */
#define _MMCR0 GPR0
#define _SDR1 GPR3 #define _SDR1 GPR3
#define _PTCR GPR3 #define _PTCR GPR3
#define _RPR GPR4 #define _RPR GPR4
@ -272,6 +273,14 @@ power_enter_stop:
b pnv_wakeup_noloss b pnv_wakeup_noloss
.Lhandle_esl_ec_set: .Lhandle_esl_ec_set:
/*
* POWER9 DD2 can incorrectly set PMAO when waking up after a
* state-loss idle. Saving and restoring MMCR0 over idle is a
* workaround.
*/
mfspr r4,SPRN_MMCR0
std r4,_MMCR0(r1)
/* /*
* Check if the requested state is a deep idle state. * Check if the requested state is a deep idle state.
*/ */
@ -450,10 +459,14 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
pnv_restore_hyp_resource_arch300: pnv_restore_hyp_resource_arch300:
/* /*
* Workaround for POWER9, if we lost resources, the ERAT * Workaround for POWER9, if we lost resources, the ERAT
* might have been mixed up and needs flushing. * might have been mixed up and needs flushing. We also need
* to reload MMCR0 (see comment above).
*/ */
blt cr3,1f blt cr3,1f
PPC_INVALIDATE_ERAT PPC_INVALIDATE_ERAT
ld r1,PACAR1(r13)
ld r4,_MMCR0(r1)
mtspr SPRN_MMCR0,r4
1: 1:
/* /*
* POWER ISA 3. Use PSSCR to determine if we * POWER ISA 3. Use PSSCR to determine if we

View File

@ -402,6 +402,7 @@ void __init mem_init(void)
void free_initmem(void) void free_initmem(void)
{ {
ppc_md.progress = ppc_printk_progress; ppc_md.progress = ppc_printk_progress;
mark_initmem_nx();
free_initmem_default(POISON_FREE_INITMEM); free_initmem_default(POISON_FREE_INITMEM);
} }

View File

@ -425,33 +425,51 @@ int hash__has_transparent_hugepage(void)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#ifdef CONFIG_STRICT_KERNEL_RWX #ifdef CONFIG_STRICT_KERNEL_RWX
void hash__mark_rodata_ro(void) static bool hash__change_memory_range(unsigned long start, unsigned long end,
unsigned long newpp)
{ {
unsigned long start = (unsigned long)_stext;
unsigned long end = (unsigned long)__init_begin;
unsigned long idx; unsigned long idx;
unsigned int step, shift; unsigned int step, shift;
unsigned long newpp = PP_RXXX;
shift = mmu_psize_defs[mmu_linear_psize].shift; shift = mmu_psize_defs[mmu_linear_psize].shift;
step = 1 << shift; step = 1 << shift;
start = ((start + step - 1) >> shift) << shift; start = ALIGN_DOWN(start, step);
end = (end >> shift) << shift; end = ALIGN(end, step); // aligns up
pr_devel("marking ro start %lx, end %lx, step %x\n", if (start >= end)
start, end, step); return false;
if (start == end) { pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n",
pr_warn("could not set rodata ro, relocate the start" start, end, newpp, step);
" of the kernel to a 0x%x boundary\n", step);
return;
}
for (idx = start; idx < end; idx += step) for (idx = start; idx < end; idx += step)
/* Not sure if we can do much with the return value */ /* Not sure if we can do much with the return value */
mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize, mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize,
mmu_kernel_ssize); mmu_kernel_ssize);
return true;
}
void hash__mark_rodata_ro(void)
{
unsigned long start, end;
start = (unsigned long)_stext;
end = (unsigned long)__init_begin;
WARN_ON(!hash__change_memory_range(start, end, PP_RXXX));
}
void hash__mark_initmem_nx(void)
{
unsigned long start, end, pp;
start = (unsigned long)__init_begin;
end = (unsigned long)__init_end;
pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL));
WARN_ON(!hash__change_memory_range(start, end, pp));
} }
#endif #endif

View File

@ -112,10 +112,9 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa,
} }
#ifdef CONFIG_STRICT_KERNEL_RWX #ifdef CONFIG_STRICT_KERNEL_RWX
void radix__mark_rodata_ro(void) void radix__change_memory_range(unsigned long start, unsigned long end,
unsigned long clear)
{ {
unsigned long start = (unsigned long)_stext;
unsigned long end = (unsigned long)__init_begin;
unsigned long idx; unsigned long idx;
pgd_t *pgdp; pgd_t *pgdp;
pud_t *pudp; pud_t *pudp;
@ -125,7 +124,8 @@ void radix__mark_rodata_ro(void)
start = ALIGN_DOWN(start, PAGE_SIZE); start = ALIGN_DOWN(start, PAGE_SIZE);
end = PAGE_ALIGN(end); // aligns up end = PAGE_ALIGN(end); // aligns up
pr_devel("marking ro start %lx, end %lx\n", start, end); pr_debug("Changing flags on range %lx-%lx removing 0x%lx\n",
start, end, clear);
for (idx = start; idx < end; idx += PAGE_SIZE) { for (idx = start; idx < end; idx += PAGE_SIZE) {
pgdp = pgd_offset_k(idx); pgdp = pgd_offset_k(idx);
@ -147,11 +147,29 @@ void radix__mark_rodata_ro(void)
if (!ptep) if (!ptep)
continue; continue;
update_the_pte: update_the_pte:
radix__pte_update(&init_mm, idx, ptep, _PAGE_WRITE, 0, 0); radix__pte_update(&init_mm, idx, ptep, clear, 0, 0);
} }
radix__flush_tlb_kernel_range(start, end); radix__flush_tlb_kernel_range(start, end);
} }
void radix__mark_rodata_ro(void)
{
unsigned long start, end;
start = (unsigned long)_stext;
end = (unsigned long)__init_begin;
radix__change_memory_range(start, end, _PAGE_WRITE);
}
void radix__mark_initmem_nx(void)
{
unsigned long start = (unsigned long)__init_begin;
unsigned long end = (unsigned long)__init_end;
radix__change_memory_range(start, end, _PAGE_EXEC);
}
#endif /* CONFIG_STRICT_KERNEL_RWX */ #endif /* CONFIG_STRICT_KERNEL_RWX */
static inline void __meminit print_mapping(unsigned long start, static inline void __meminit print_mapping(unsigned long start,

View File

@ -505,4 +505,12 @@ void mark_rodata_ro(void)
else else
hash__mark_rodata_ro(); hash__mark_rodata_ro();
} }
void mark_initmem_nx(void)
{
if (radix_enabled())
radix__mark_initmem_nx();
else
hash__mark_initmem_nx();
}
#endif #endif

View File

@ -78,7 +78,7 @@ void opal_configure_cores(void)
* ie. Host hash supports hash guests * ie. Host hash supports hash guests
* Host radix supports hash/radix guests * Host radix supports hash/radix guests
*/ */
if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (early_cpu_has_feature(CPU_FTR_ARCH_300)) {
reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH; reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH;
if (early_radix_enabled()) if (early_radix_enabled())
reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX; reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX;