powerpc fixes for 5.8 #8

Fix a bug introduced by the changes we made to lockless page table walking this cycle. When using the hash MMU, and perf with callchain recording, we can deadlock if the PMI interrupts a hash fault, and the callchain recording then takes a hash fault on the same page. Thanks to: Nicholas Piggin, Aneesh Kumar K.V, Anton Blanchard, Athira Rajeev. -----BEGIN PGP SIGNATURE----- iQJHBAABCAAxFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAl8kE4ATHG1wZUBlbGxl cm1hbi5pZC5hdQAKCRBR6+o8yOGlgA+CD/9wDQ86dfw3Bu/v2gVeQK/2b5bYhCuo F6Ns1udWYz6Nmbf8QoCxBWlmbzfHSapV58ZLo84eLRSRh5ulNubBolO6JNB2ZJop j2nyClDNfWu3Eup2u4H9zhPLKZFSfAqHxGS4HIgrkAqkzIGkdF3rCn+OE5HvSPen Zy3JV2aUVhyMyJquia71q0q88G7q2TmxfGggmMHJxes93baYodof3jzK0f/5AVkp FWDs6irLcd+Zt+Ix+M6No7v/7e4ZvAp/K5mpJ91tLCEq1Z26Ubpq/SQbSGfr1P9c 1XfUypLTbdiHjNf2c/MkhOMTcl1TG0BETscjAC8Ku6UgO+GpSrUgv/lCuSVDXU2p uk6uF4JXw46ohrQY50yBTC91I1/8TG4yCi1PtT/Z8kFejNXsGgS9O4KaNLsa1Rg8 f9qH4bXxmL7jbSInJjG2uHTGsG4xw/ze5zD1jg9uI5F4JsY4OAWD3rLOkbGZhqhI tpM9lYFL8weDOv+aa/+TmgfNBuEd0V9YgjOcJFw1zz9ssrCpz0WrGwVy7N2Vq2pX UT52y09LyUopFBFuvfQpzUr8C8FAkSnfzdKSFt4fc88NSluf2vt+DwOfNeI6NzbU dPhJD0gPcJvmJKjKHlVwFB1dgQKlXiHSKjlQejS7Ig/zJCUXbi5MzBgMWvVMIHM2 7EAE50Ok0aeCVg== =/dwh -----END PGP SIGNATURE----- Merge tag 'powerpc-5.8-8' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux Pull powerpc fix from Michael Ellerman: "Fix a bug introduced by the changes we made to lockless page table walking this cycle. When using the hash MMU, and perf with callchain recording, we can deadlock if the PMI interrupts a hash fault, and the callchain recording then takes a hash fault on the same page. Thanks to Nicholas Piggin, Aneesh Kumar K.V, Anton Blanchard, and Athira Rajeev" * tag 'powerpc-5.8-8' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: powerpc/64s/hash: Fix hash_preload running with interrupts enabled
2020-07-31 09:38:39 -07:00 · 2020-07-31 09:38:39 -07:00 · deacdb3e39
parent 14aab7eeb9 909adfc66b
commit deacdb3e39
3 changed files with 42 additions and 3 deletions
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@ -3072,10 +3072,18 @@ do_hash_page:
 	ori	r0,r0,DSISR_BAD_FAULT_64S@l
 	and.	r0,r5,r0		/* weird error? */
 	bne-	handle_page_fault	/* if not, try to insert a HPTE */
+
+	/*
+	 * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
+	 * don't call hash_page, just fail the fault. This is required to
+	 * prevent re-entrancy problems in the hash code, namely perf
+	 * interrupts hitting while something holds H_PAGE_BUSY, and taking a
+	 * hash fault. See the comment in hash_preload().
+	 */
 	ld	r11, PACA_THREAD_INFO(r13)
-	lwz	r0,TI_PREEMPT(r11)	/* If we're in an "NMI" */
-	andis.	r0,r0,NMI_MASK@h	/* (i.e. an irq when soft-disabled) */
-	bne	77f			/* then don't call hash_page now */
+	lwz	r0,TI_PREEMPT(r11)
+	andis.	r0,r0,NMI_MASK@h
+	bne	77f

 	/*
 	 * r3 contains the trap number
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@ -1559,6 +1559,7 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
 	pgd_t *pgdir;
 	int rc, ssize, update_flags = 0;
 	unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0);
+	unsigned long flags;

 	BUG_ON(get_region_id(ea) != USER_REGION_ID);

@ -1592,6 +1593,28 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
 		return;
 #endif /* CONFIG_PPC_64K_PAGES */

+	/*
+	 * __hash_page_* must run with interrupts off, as it sets the
+	 * H_PAGE_BUSY bit. It's possible for perf interrupts to hit at any
+	 * time and may take a hash fault reading the user stack, see
+	 * read_user_stack_slow() in the powerpc/perf code.
+	 *
+	 * If that takes a hash fault on the same page as we lock here, it
+	 * will bail out when seeing H_PAGE_BUSY set, and retry the access
+	 * leading to an infinite loop.
+	 *
+	 * Disabling interrupts here does not prevent perf interrupts, but it
+	 * will prevent them taking hash faults (see the NMI test in
+	 * do_hash_page), then read_user_stack's copy_from_user_nofault will
+	 * fail and perf will fall back to read_user_stack_slow(), which
+	 * walks the Linux page tables.
+	 *
+	 * Interrupts must also be off for the duration of the
+	 * mm_is_thread_local test and update, to prevent preempt running the
+	 * mm on another CPU (XXX: this may be racy vs kthread_use_mm).
+	 */
+	local_irq_save(flags);
+
 	/* Is that local to this CPU ? */
 	if (mm_is_thread_local(mm))
 		update_flags |= HPTE_LOCAL_UPDATE;
@ -1614,6 +1637,8 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
 				   mm_ctx_user_psize(&mm->context),
 				   mm_ctx_user_psize(&mm->context),
 				   pte_val(*ptep));
+
+	local_irq_restore(flags);
 }

 /*
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@ -2179,6 +2179,12 @@ static void __perf_event_interrupt(struct pt_regs *regs)

 	perf_read_regs(regs);

+	/*
+	 * If perf interrupts hit in a local_irq_disable (soft-masked) region,
+	 * we consider them as NMIs. This is required to prevent hash faults on
+	 * user addresses when reading callchains. See the NMI test in
+	 * do_hash_page.
+	 */
 	nmi = perf_intr_is_nmi(regs);
 	if (nmi)
 		nmi_enter();