Commit 4bd1d80e authored by Sergey Matyukevich's avatar Sergey Matyukevich Committed by Palmer Dabbelt

riscv: mm: notify remote harts about mmu cache updates

Current implementation of update_mmu_cache function performs local TLB
flush. It does not take into account ASID information. Besides, it does
not take into account other harts currently running the same mm context
or possible migration of the running context to other harts. Meanwhile
TLB flush is not performed for every context switch if ASID support
is enabled.

Patch [1] proposed to add ASID support to update_mmu_cache to avoid
flushing local TLB entirely. This patch takes into account other
harts currently running the same mm context as well as possible
migration of this context to other harts.

For this purpose the approach from flush_icache_mm is reused. Remote
harts currently running the same mm context are informed via SBI calls
that they need to flush their local TLBs. All the other harts are marked
as needing a deferred TLB flush when this mm context runs on them.

[1] https://lore.kernel.org/linux-riscv/20220821013926.8968-1-tjytimi@163.com/Signed-off-by: default avatarSergey Matyukevich <sergey.matyukevich@syntacore.com>
Fixes: 65d4b9c5 ("RISC-V: Implement ASID allocator")
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/linux-riscv/20220829205219.283543-1-geomatsi@gmail.com/#tSigned-off-by: default avatarPalmer Dabbelt <palmer@rivosinc.com>
parent 7ecdadf7
...@@ -19,6 +19,8 @@ typedef struct { ...@@ -19,6 +19,8 @@ typedef struct {
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* A local icache flush is needed before user execution can resume. */ /* A local icache flush is needed before user execution can resume. */
cpumask_t icache_stale_mask; cpumask_t icache_stale_mask;
/* A local tlb flush is needed before user execution can resume. */
cpumask_t tlb_stale_mask;
#endif #endif
} mm_context_t; } mm_context_t;
......
...@@ -415,7 +415,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, ...@@ -415,7 +415,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
* Relying on flush_tlb_fix_spurious_fault would suffice, but * Relying on flush_tlb_fix_spurious_fault would suffice, but
* the extra traps reduce performance. So, eagerly SFENCE.VMA. * the extra traps reduce performance. So, eagerly SFENCE.VMA.
*/ */
local_flush_tlb_page(address); flush_tlb_page(vma, address);
} }
#define __HAVE_ARCH_UPDATE_MMU_TLB #define __HAVE_ARCH_UPDATE_MMU_TLB
......
...@@ -22,6 +22,24 @@ static inline void local_flush_tlb_page(unsigned long addr) ...@@ -22,6 +22,24 @@ static inline void local_flush_tlb_page(unsigned long addr)
{ {
ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"));
} }
static inline void local_flush_tlb_all_asid(unsigned long asid)
{
__asm__ __volatile__ ("sfence.vma x0, %0"
:
: "r" (asid)
: "memory");
}
static inline void local_flush_tlb_page_asid(unsigned long addr,
unsigned long asid)
{
__asm__ __volatile__ ("sfence.vma %0, %1"
:
: "r" (addr), "r" (asid)
: "memory");
}
#else /* CONFIG_MMU */ #else /* CONFIG_MMU */
#define local_flush_tlb_all() do { } while (0) #define local_flush_tlb_all() do { } while (0)
#define local_flush_tlb_page(addr) do { } while (0) #define local_flush_tlb_page(addr) do { } while (0)
......
...@@ -196,6 +196,16 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) ...@@ -196,6 +196,16 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu)
if (need_flush_tlb) if (need_flush_tlb)
local_flush_tlb_all(); local_flush_tlb_all();
#ifdef CONFIG_SMP
else {
cpumask_t *mask = &mm->context.tlb_stale_mask;
if (cpumask_test_cpu(cpu, mask)) {
cpumask_clear_cpu(cpu, mask);
local_flush_tlb_all_asid(cntx & asid_mask);
}
}
#endif
} }
static void set_mm_noasid(struct mm_struct *mm) static void set_mm_noasid(struct mm_struct *mm)
......
...@@ -5,23 +5,7 @@ ...@@ -5,23 +5,7 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <asm/sbi.h> #include <asm/sbi.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/tlbflush.h>
static inline void local_flush_tlb_all_asid(unsigned long asid)
{
__asm__ __volatile__ ("sfence.vma x0, %0"
:
: "r" (asid)
: "memory");
}
static inline void local_flush_tlb_page_asid(unsigned long addr,
unsigned long asid)
{
__asm__ __volatile__ ("sfence.vma %0, %1"
:
: "r" (addr), "r" (asid)
: "memory");
}
void flush_tlb_all(void) void flush_tlb_all(void)
{ {
...@@ -31,6 +15,7 @@ void flush_tlb_all(void) ...@@ -31,6 +15,7 @@ void flush_tlb_all(void)
static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
unsigned long size, unsigned long stride) unsigned long size, unsigned long stride)
{ {
struct cpumask *pmask = &mm->context.tlb_stale_mask;
struct cpumask *cmask = mm_cpumask(mm); struct cpumask *cmask = mm_cpumask(mm);
unsigned int cpuid; unsigned int cpuid;
bool broadcast; bool broadcast;
...@@ -44,6 +29,15 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, ...@@ -44,6 +29,15 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
if (static_branch_unlikely(&use_asid_allocator)) { if (static_branch_unlikely(&use_asid_allocator)) {
unsigned long asid = atomic_long_read(&mm->context.id); unsigned long asid = atomic_long_read(&mm->context.id);
/*
* TLB will be immediately flushed on harts concurrently
* executing this MM context. TLB flush on other harts
* is deferred until this MM context migrates there.
*/
cpumask_setall(pmask);
cpumask_clear_cpu(cpuid, pmask);
cpumask_andnot(pmask, pmask, cmask);
if (broadcast) { if (broadcast) {
sbi_remote_sfence_vma_asid(cmask, start, size, asid); sbi_remote_sfence_vma_asid(cmask, start, size, asid);
} else if (size <= stride) { } else if (size <= stride) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment