powerpc/64s/hash: improve context tracking of hash faults

This moves the 64s/hash context tracking from hash_page_mm() to __do_hash_fault(), so it's no longer called by OCXL / SPU accelerators, which was certainly the wrong thing to be doing, because those callers are not low level interrupt handlers, so should have entered a kernel context tracking already. Then remain in kernel context for the duration of the fault, rather than enter/exit for the hash fault then enter/exit for the page fault, which is pointless. Even still, calling exception_enter/exit in __do_hash_fault seems questionable because that's touching per-cpu variables, tracing, etc., which might have been interrupted by this hash fault or themselves cause hash faults. But maybe I miss something because hash_page_mm very deliberately calls trace_hash_fault too, for example. So for now go with it, it's no worse than before, in this regard. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20210130130852.2952424-32-npiggin@gmail.com

powerpc/64s/hash: improve context tracking of hash faults
This moves the 64s/hash context tracking from hash_page_mm() to __do_hash_fault(), so it's no longer called by OCXL / SPU accelerators, which was certainly the wrong thing to be doing, because those callers are not low level interrupt handlers, so should have entered a kernel context tracking already. Then remain in kernel context for the duration of the fault, rather than enter/exit for the hash fault then enter/exit for the page fault, which is pointless. Even still, calling exception_enter/exit in __do_hash_fault seems questionable because that's touching per-cpu variables, tracing, etc., which might have been interrupted by this hash fault or themselves cause hash faults. But maybe I miss something because hash_page_mm very deliberately calls trace_hash_fault too, for example. So for now go with it, it's no worse than before, in this regard. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20210130130852.2952424-32-npiggin@gmail.com
a008f8f9 · Nicholas Piggin · Michael Ellerman · 2a06bf3e · a008f8f9 · a008f8f9
Commit a008f8f9 authored Jan 30, 2021 by Nicholas Piggin Committed by Michael Ellerman Feb 09, 2021
Showing with 33 additions and 14 deletions

arch/powerpc/include/asm/bug.h arch/powerpc/include/asm/bug.h +1 -0

arch/powerpc/mm/book3s64/hash_utils.c arch/powerpc/mm/book3s64/hash_utils.c +4 -3

arch/powerpc/mm/fault.c arch/powerpc/mm/fault.c +28 -11

No files found.
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -112,6 +112,7 @@
 struct pt_regs;
 long do_page_fault(struct pt_regs *);
+long hash__do_page_fault(struct pt_regs *);
 void bad_page_fault(struct pt_regs *, int);
 void __bad_page_fault(struct pt_regs *regs, int sig);
 void do_bad_page_fault_segv(struct pt_regs *regs);

--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1289,7 +1289,6 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 		 unsigned long flags)
 {
 	bool is_thp;
-	enum ctx_state prev_state = exception_enter();
 	pgd_t *pgdir;
 	unsigned long vsid;
 	pte_t *ptep;
@@ -1491,7 +1490,6 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 	DBG_LOW(" -> rc=%d\n", rc);
 bail:
-	exception_exit(prev_state);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(hash_page_mm);
@@ -1516,6 +1514,7 @@ EXPORT_SYMBOL_GPL(hash_page);
 DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault);
 DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
 {
+	enum ctx_state prev_state = exception_enter();
 	unsigned long ea = regs->dar;
 	unsigned long dsisr = regs->dsisr;
 	unsigned long access = _PAGE_PRESENT | _PAGE_READ;
@@ -1564,6 +1563,8 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
 		err = 0;
 	}
+	exception_exit(prev_state);
 	return err;
 }
@@ -1600,7 +1601,7 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
 	err = __do_hash_fault(regs);
 	if (err) {
 page_fault:
-		err = do_page_fault(regs);
+		err = hash__do_page_fault(regs);
 	}
 	return err;

--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -387,7 +387,7 @@ static void sanity_check_fault(bool is_write, bool is_user,
 * The return value is 0 if the fault was handled, or the signal
 * number if this is a kernel fault that can't be handled here.
 */
-static int __do_page_fault(struct pt_regs *regs, unsigned long address,
+static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
 			   unsigned long error_code)
 {
 	struct vm_area_struct * vma;
@@ -537,36 +537,53 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
 	return 0;
 }
-NOKPROBE_SYMBOL(__do_page_fault);
+NOKPROBE_SYMBOL(___do_page_fault);
-DEFINE_INTERRUPT_HANDLER_RET(do_page_fault)
+static long __do_page_fault(struct pt_regs *regs)
 {
 	const struct exception_table_entry *entry;
-	enum ctx_state prev_state;
 	long err;
-	prev_state = exception_enter();
+	err = ___do_page_fault(regs, regs->dar, regs->dsisr);
-	err = __do_page_fault(regs, regs->dar, regs->dsisr);
 	if (likely(!err))
-		goto out;
+		return err;
 	entry = search_exception_tables(regs->nip);
 	if (likely(entry)) {
 		instruction_pointer_set(regs, extable_fixup(entry));
-		err = 0;
+		return 0;
 	} else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) {
-		/* 32 and 64e handle this in asm */
 		__bad_page_fault(regs, err);
-		err = 0;
+		return 0;
+	} else {
+		/* 32 and 64e handle the bad page fault in asm */
+		return err;
 	}
+}
+NOKPROBE_SYMBOL(__do_page_fault);
+DEFINE_INTERRUPT_HANDLER_RET(do_page_fault)
+{
+	enum ctx_state prev_state = exception_enter();
+	long err;
+	err = __do_page_fault(regs);
-out:
 	exception_exit(prev_state);
 	return err;
 }
 NOKPROBE_SYMBOL(do_page_fault);
+#ifdef CONFIG_PPC_BOOK3S_64
+/* Same as do_page_fault but interrupt entry has already run in do_hash_fault */
+long hash__do_page_fault(struct pt_regs *regs)
+{
+	return __do_page_fault(regs);
+}
+NOKPROBE_SYMBOL(hash__do_page_fault);
+#endif
 /*
 * bad_page_fault is called when we have a bad access from the kernel.
 * It is called from the DSI and ISI handlers in head.S and from some