Commit 118bd31b authored by Martin Schwidefsky's avatar Martin Schwidefsky

s390/mm: add no-dat TLB flush optimization

Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
parent c9b5ad54
...@@ -952,15 +952,27 @@ static inline pte_t pte_mkhuge(pte_t pte) ...@@ -952,15 +952,27 @@ static inline pte_t pte_mkhuge(pte_t pte)
#define IPTE_GLOBAL 0 #define IPTE_GLOBAL 0
#define IPTE_LOCAL 1 #define IPTE_LOCAL 1
static inline void __ptep_ipte(unsigned long address, pte_t *ptep, int local) #define IPTE_NODAT 0x400
static inline void __ptep_ipte(unsigned long address, pte_t *ptep,
unsigned long opt, int local)
{ {
unsigned long pto = (unsigned long) ptep; unsigned long pto = (unsigned long) ptep;
if (__builtin_constant_p(opt) && opt == 0) {
/* Invalidation + TLB flush for the pte */ /* Invalidation + TLB flush for the pte */
asm volatile( asm volatile(
" .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]" " .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]"
: "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address), : "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
[m4] "i" (local)); [m4] "i" (local));
return;
}
/* Invalidate ptes with options + TLB flush of the ptes */
asm volatile(
" .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]"
: [r2] "+a" (address), [r3] "+a" (opt)
: [r1] "a" (pto), [m4] "i" (local) : "memory");
} }
static inline void __ptep_ipte_range(unsigned long address, int nr, static inline void __ptep_ipte_range(unsigned long address, int nr,
...@@ -1341,31 +1353,36 @@ static inline void __pmdp_csp(pmd_t *pmdp) ...@@ -1341,31 +1353,36 @@ static inline void __pmdp_csp(pmd_t *pmdp)
#define IDTE_GLOBAL 0 #define IDTE_GLOBAL 0
#define IDTE_LOCAL 1 #define IDTE_LOCAL 1
static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp, int local) #define IDTE_PTOA 0x0800
#define IDTE_NODAT 0x1000
static inline void __pmdp_idte(unsigned long addr, pmd_t *pmdp,
unsigned long opt, int local)
{ {
unsigned long sto; unsigned long sto;
sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); sto = (unsigned long) pmdp - pmd_index(addr) * sizeof(pmd_t);
asm volatile( asm volatile(
" .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
: "+m" (*pmdp) : "+m" (*pmdp)
: [r1] "a" (sto), [r2] "a" ((address & HPAGE_MASK)), : [r1] "a" (sto), [r2] "a" ((addr & HPAGE_MASK) | opt),
[m4] "i" (local) [m4] "i" (local)
: "cc" ); : "cc" );
} }
static inline void __pudp_idte(unsigned long address, pud_t *pudp, int local) static inline void __pudp_idte(unsigned long addr, pud_t *pudp,
unsigned long opt, int local)
{ {
unsigned long r3o; unsigned long r3o;
r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t); r3o = (unsigned long) pudp - pud_index(addr) * sizeof(pud_t);
r3o |= _ASCE_TYPE_REGION3; r3o |= _ASCE_TYPE_REGION3;
asm volatile( asm volatile(
" .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]" " .insn rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
: "+m" (*pudp) : "+m" (*pudp)
: [r1] "a" (r3o), [r2] "a" ((address & PUD_MASK)), : [r1] "a" (r3o), [r2] "a" ((addr & PUD_MASK) | opt),
[m4] "i" (local) [m4] "i" (local)
: "cc"); : "cc" );
} }
pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t); pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t);
......
...@@ -29,8 +29,9 @@ ...@@ -29,8 +29,9 @@
#define MACHINE_FLAG_TE _BITUL(11) #define MACHINE_FLAG_TE _BITUL(11)
#define MACHINE_FLAG_TLB_LC _BITUL(12) #define MACHINE_FLAG_TLB_LC _BITUL(12)
#define MACHINE_FLAG_VX _BITUL(13) #define MACHINE_FLAG_VX _BITUL(13)
#define MACHINE_FLAG_NX _BITUL(14) #define MACHINE_FLAG_TLB_GUEST _BITUL(14)
#define MACHINE_FLAG_GS _BITUL(15) #define MACHINE_FLAG_NX _BITUL(15)
#define MACHINE_FLAG_GS _BITUL(16)
#define LPP_MAGIC _BITUL(31) #define LPP_MAGIC _BITUL(31)
#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) #define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL)
...@@ -68,6 +69,7 @@ extern void detect_memory_memblock(void); ...@@ -68,6 +69,7 @@ extern void detect_memory_memblock(void);
#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) #define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) #define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX)
#define MACHINE_HAS_TLB_GUEST (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_GUEST)
#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX) #define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX)
#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS) #define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
......
...@@ -20,10 +20,13 @@ static inline void __tlb_flush_local(void) ...@@ -20,10 +20,13 @@ static inline void __tlb_flush_local(void)
*/ */
static inline void __tlb_flush_idte(unsigned long asce) static inline void __tlb_flush_idte(unsigned long asce)
{ {
unsigned long opt;
opt = IDTE_PTOA;
/* Global TLB flush for the mm */ /* Global TLB flush for the mm */
asm volatile( asm volatile(
" .insn rrf,0xb98e0000,0,%0,%1,0" " .insn rrf,0xb98e0000,0,%0,%1,0"
: : "a" (2048), "a" (asce) : "cc"); : : "a" (opt), "a" (asce) : "cc");
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
......
...@@ -328,7 +328,7 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr) ...@@ -328,7 +328,7 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr)
return; return;
} }
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
__ptep_ipte(address, pte, IPTE_GLOBAL); __ptep_ipte(address, pte, 0, IPTE_GLOBAL);
address += PAGE_SIZE; address += PAGE_SIZE;
pte++; pte++;
} }
......
...@@ -25,6 +25,38 @@ ...@@ -25,6 +25,38 @@
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/page-states.h> #include <asm/page-states.h>
static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
unsigned long opt, asce;
if (MACHINE_HAS_TLB_GUEST) {
opt = 0;
asce = READ_ONCE(mm->context.gmap_asce);
if (asce == 0UL)
opt |= IPTE_NODAT;
__ptep_ipte(addr, ptep, opt, IPTE_LOCAL);
} else {
__ptep_ipte(addr, ptep, 0, IPTE_LOCAL);
}
}
static inline void ptep_ipte_global(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
unsigned long opt, asce;
if (MACHINE_HAS_TLB_GUEST) {
opt = 0;
asce = READ_ONCE(mm->context.gmap_asce);
if (asce == 0UL)
opt |= IPTE_NODAT;
__ptep_ipte(addr, ptep, opt, IPTE_GLOBAL);
} else {
__ptep_ipte(addr, ptep, 0, IPTE_GLOBAL);
}
}
static inline pte_t ptep_flush_direct(struct mm_struct *mm, static inline pte_t ptep_flush_direct(struct mm_struct *mm,
unsigned long addr, pte_t *ptep) unsigned long addr, pte_t *ptep)
{ {
...@@ -36,9 +68,9 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm, ...@@ -36,9 +68,9 @@ static inline pte_t ptep_flush_direct(struct mm_struct *mm,
atomic_inc(&mm->context.flush_count); atomic_inc(&mm->context.flush_count);
if (MACHINE_HAS_TLB_LC && if (MACHINE_HAS_TLB_LC &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
__ptep_ipte(addr, ptep, IPTE_LOCAL); ptep_ipte_local(mm, addr, ptep);
else else
__ptep_ipte(addr, ptep, IPTE_GLOBAL); ptep_ipte_global(mm, addr, ptep);
atomic_dec(&mm->context.flush_count); atomic_dec(&mm->context.flush_count);
return old; return old;
} }
...@@ -57,7 +89,7 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm, ...@@ -57,7 +89,7 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
pte_val(*ptep) |= _PAGE_INVALID; pte_val(*ptep) |= _PAGE_INVALID;
mm->context.flush_mm = 1; mm->context.flush_mm = 1;
} else } else
__ptep_ipte(addr, ptep, IPTE_GLOBAL); ptep_ipte_global(mm, addr, ptep);
atomic_dec(&mm->context.flush_count); atomic_dec(&mm->context.flush_count);
return old; return old;
} }
...@@ -290,6 +322,26 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, ...@@ -290,6 +322,26 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
} }
EXPORT_SYMBOL(ptep_modify_prot_commit); EXPORT_SYMBOL(ptep_modify_prot_commit);
static inline void pmdp_idte_local(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
if (MACHINE_HAS_TLB_GUEST)
__pmdp_idte(addr, pmdp, IDTE_NODAT, IDTE_LOCAL);
else
__pmdp_idte(addr, pmdp, 0, IDTE_LOCAL);
}
static inline void pmdp_idte_global(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
if (MACHINE_HAS_TLB_GUEST)
__pmdp_idte(addr, pmdp, IDTE_NODAT, IDTE_GLOBAL);
else if (MACHINE_HAS_IDTE)
__pmdp_idte(addr, pmdp, 0, IDTE_GLOBAL);
else
__pmdp_csp(pmdp);
}
static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp) unsigned long addr, pmd_t *pmdp)
{ {
...@@ -298,16 +350,12 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, ...@@ -298,16 +350,12 @@ static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
old = *pmdp; old = *pmdp;
if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
return old; return old;
if (!MACHINE_HAS_IDTE) {
__pmdp_csp(pmdp);
return old;
}
atomic_inc(&mm->context.flush_count); atomic_inc(&mm->context.flush_count);
if (MACHINE_HAS_TLB_LC && if (MACHINE_HAS_TLB_LC &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
__pmdp_idte(addr, pmdp, IDTE_LOCAL); pmdp_idte_local(mm, addr, pmdp);
else else
__pmdp_idte(addr, pmdp, IDTE_GLOBAL); pmdp_idte_global(mm, addr, pmdp);
atomic_dec(&mm->context.flush_count); atomic_dec(&mm->context.flush_count);
return old; return old;
} }
...@@ -325,10 +373,9 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, ...@@ -325,10 +373,9 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
cpumask_of(smp_processor_id()))) { cpumask_of(smp_processor_id()))) {
pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
mm->context.flush_mm = 1; mm->context.flush_mm = 1;
} else if (MACHINE_HAS_IDTE) } else {
__pmdp_idte(addr, pmdp, IDTE_GLOBAL); pmdp_idte_global(mm, addr, pmdp);
else }
__pmdp_csp(pmdp);
atomic_dec(&mm->context.flush_count); atomic_dec(&mm->context.flush_count);
return old; return old;
} }
...@@ -359,28 +406,44 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, ...@@ -359,28 +406,44 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
} }
EXPORT_SYMBOL(pmdp_xchg_lazy); EXPORT_SYMBOL(pmdp_xchg_lazy);
static inline pud_t pudp_flush_direct(struct mm_struct *mm, static inline void pudp_idte_local(struct mm_struct *mm,
unsigned long addr, pud_t *pudp) unsigned long addr, pud_t *pudp)
{ {
pud_t old; if (MACHINE_HAS_TLB_GUEST)
__pudp_idte(addr, pudp, IDTE_NODAT, IDTE_LOCAL);
else
__pudp_idte(addr, pudp, 0, IDTE_LOCAL);
}
old = *pudp; static inline void pudp_idte_global(struct mm_struct *mm,
if (pud_val(old) & _REGION_ENTRY_INVALID) unsigned long addr, pud_t *pudp)
return old; {
if (!MACHINE_HAS_IDTE) { if (MACHINE_HAS_TLB_GUEST)
__pudp_idte(addr, pudp, IDTE_NODAT, IDTE_GLOBAL);
else if (MACHINE_HAS_IDTE)
__pudp_idte(addr, pudp, 0, IDTE_GLOBAL);
else
/* /*
* Invalid bit position is the same for pmd and pud, so we can * Invalid bit position is the same for pmd and pud, so we can
* re-use _pmd_csp() here * re-use _pmd_csp() here
*/ */
__pmdp_csp((pmd_t *) pudp); __pmdp_csp((pmd_t *) pudp);
}
static inline pud_t pudp_flush_direct(struct mm_struct *mm,
unsigned long addr, pud_t *pudp)
{
pud_t old;
old = *pudp;
if (pud_val(old) & _REGION_ENTRY_INVALID)
return old; return old;
}
atomic_inc(&mm->context.flush_count); atomic_inc(&mm->context.flush_count);
if (MACHINE_HAS_TLB_LC && if (MACHINE_HAS_TLB_LC &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
__pudp_idte(addr, pudp, IDTE_LOCAL); pudp_idte_local(mm, addr, pudp);
else else
__pudp_idte(addr, pudp, IDTE_GLOBAL); pudp_idte_global(mm, addr, pudp);
atomic_dec(&mm->context.flush_count); atomic_dec(&mm->context.flush_count);
return old; return old;
} }
...@@ -645,7 +708,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) ...@@ -645,7 +708,7 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
pte = *ptep; pte = *ptep;
if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
pgste = pgste_pte_notify(mm, addr, ptep, pgste); pgste = pgste_pte_notify(mm, addr, ptep, pgste);
__ptep_ipte(addr, ptep, IPTE_GLOBAL); ptep_ipte_global(mm, addr, ptep);
if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
pte_val(pte) |= _PAGE_PROTECT; pte_val(pte) |= _PAGE_PROTECT;
else else
......
...@@ -39,7 +39,7 @@ struct read_info_sccb { ...@@ -39,7 +39,7 @@ struct read_info_sccb {
u8 fac84; /* 84 */ u8 fac84; /* 84 */
u8 fac85; /* 85 */ u8 fac85; /* 85 */
u8 _pad_86[91 - 86]; /* 86-90 */ u8 _pad_86[91 - 86]; /* 86-90 */
u8 flags; /* 91 */ u8 fac91; /* 91 */
u8 _pad_92[98 - 92]; /* 92-97 */ u8 _pad_92[98 - 92]; /* 92-97 */
u8 fac98; /* 98 */ u8 fac98; /* 98 */
u8 hamaxpow; /* 99 */ u8 hamaxpow; /* 99 */
...@@ -103,6 +103,8 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb) ...@@ -103,6 +103,8 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb)
sclp.has_kss = !!(sccb->fac98 & 0x01); sclp.has_kss = !!(sccb->fac98 & 0x01);
if (sccb->fac85 & 0x02) if (sccb->fac85 & 0x02)
S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
if (sccb->fac91 & 0x40)
S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_GUEST;
sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
sclp.rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; sclp.rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2;
sclp.rzm <<= 20; sclp.rzm <<= 20;
...@@ -139,7 +141,7 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb) ...@@ -139,7 +141,7 @@ static void __init sclp_early_facilities_detect(struct read_info_sccb *sccb)
/* Save IPL information */ /* Save IPL information */
sclp_ipl_info.is_valid = 1; sclp_ipl_info.is_valid = 1;
if (sccb->flags & 0x2) if (sccb->fac91 & 0x2)
sclp_ipl_info.has_dump = 1; sclp_ipl_info.has_dump = 1;
memcpy(&sclp_ipl_info.loadparm, &sccb->loadparm, LOADPARM_LEN); memcpy(&sclp_ipl_info.loadparm, &sccb->loadparm, LOADPARM_LEN);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment