Commit 0807b856 authored by Gerald Schaefer's avatar Gerald Schaefer Committed by Heiko Carstens

s390/mm: add support for RDP (Reset DAT-Protection)

RDP instruction allows to reset DAT-protection bit in a PTE, with less
CPU synchronization overhead than IPTE instruction. In particular, IPTE
can cause machine-wide synchronization overhead, and excessive IPTE usage
can negatively impact machine performance.

RDP can be used instead of IPTE, if the new PTE only differs in SW bits
and _PAGE_PROTECT HW bit, for PTE protection changes from RO to RW.
SW PTE bit changes are allowed, e.g. for dirty and young tracking, but none
of the other HW-defined part of the PTE must change. This is because the
architecture forbids such changes to an active and valid PTE, which
is why invalidation with IPTE is always used first, before writing a new
entry.

The RDP optimization helps mainly for fault-driven SW dirty-bit tracking.
Writable PTEs are initially always mapped with HW _PAGE_PROTECT bit set,
to allow SW dirty-bit accounting on first write protection fault, where
the DAT-protection would then be reset. The reset is now done with RDP
instead of IPTE, if RDP instruction is available.

RDP cannot always guarantee that the DAT-protection reset is propagated
to all CPUs immediately. This means that spurious TLB protection faults
on other CPUs can now occur. For this, common code provides a
flush_tlb_fix_spurious_fault() handler, which will now be used to do a
CPU-local TLB flush. However, this will clear the whole TLB of a CPU, and
not just the affected entry. For more fine-grained flushing, by simply
doing a (local) RDP again, flush_tlb_fix_spurious_fault() would need to
also provide the PTE pointer.

Note that spurious TLB protection faults cannot really be distinguished
from racing pagetable updates, where another thread already installed the
correct PTE. In such a case, the local TLB flush would be unnecessary
overhead, but overall reduction of CPU synchronization overhead by not
using IPTE is still expected to be beneficial.
Reviewed-by: default avatarAlexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: default avatarGerald Schaefer <gerald.schaefer@linux.ibm.com>
Signed-off-by: default avatarHeiko Carstens <hca@linux.ibm.com>
parent d939474b
......@@ -182,12 +182,20 @@ static inline int is_module_addr(void *addr)
#define _PAGE_SOFT_DIRTY 0x000
#endif
#define _PAGE_SW_BITS 0xffUL /* All SW bits */
#define _PAGE_SWP_EXCLUSIVE _PAGE_LARGE /* SW pte exclusive swap bit */
/* Set of bits not changed in pte_modify */
#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_DIRTY | \
_PAGE_YOUNG | _PAGE_SOFT_DIRTY)
/*
* Mask of bits that must not be changed with RDP. Allow only _PAGE_PROTECT
* HW bit and all SW bits.
*/
#define _PAGE_RDP_MASK ~(_PAGE_PROTECT | _PAGE_SW_BITS)
/*
* handle_pte_fault uses pte_present and pte_none to find out the pte type
* WITHOUT holding the page table lock. The _PAGE_PRESENT bit is used to
......@@ -1052,6 +1060,19 @@ static inline pte_t pte_mkhuge(pte_t pte)
#define IPTE_NODAT 0x400
#define IPTE_GUEST_ASCE 0x800
static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep,
unsigned long opt, unsigned long asce,
int local)
{
unsigned long pto;
pto = __pa(ptep) & ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%[asce],%[m4]"
: "+m" (*ptep)
: [r1] "a" (pto), [r2] "a" ((addr & PAGE_MASK) | opt),
[asce] "a" (asce), [m4] "i" (local));
}
static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep,
unsigned long opt, unsigned long asce,
int local)
......@@ -1202,6 +1223,42 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte));
}
/*
* Check if PTEs only differ in _PAGE_PROTECT HW bit, but also allow SW PTE
* bits in the comparison. Those might change e.g. because of dirty and young
* tracking.
*/
static inline int pte_allow_rdp(pte_t old, pte_t new)
{
/*
* Only allow changes from RO to RW
*/
if (!(pte_val(old) & _PAGE_PROTECT) || pte_val(new) & _PAGE_PROTECT)
return 0;
return (pte_val(old) & _PAGE_RDP_MASK) == (pte_val(new) & _PAGE_RDP_MASK);
}
static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
unsigned long address)
{
/*
* RDP might not have propagated the PTE protection reset to all CPUs,
* so there could be spurious TLB protection faults.
* NOTE: This will also be called when a racing pagetable update on
* another thread already installed the correct PTE. Both cases cannot
* really be distinguished.
* Therefore, only do the local TLB flush when RDP can be used, to avoid
* unnecessary overhead.
*/
if (MACHINE_HAS_RDP)
asm volatile("ptlb" : : : "memory");
}
#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault
void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
pte_t new);
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
static inline int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
......@@ -1209,7 +1266,10 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
{
if (pte_same(*ptep, entry))
return 0;
ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
if (MACHINE_HAS_RDP && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry))
ptep_reset_dat_prot(vma->vm_mm, addr, ptep, entry);
else
ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
return 1;
}
......
......@@ -34,6 +34,7 @@
#define MACHINE_FLAG_GS BIT(16)
#define MACHINE_FLAG_SCC BIT(17)
#define MACHINE_FLAG_PCI_MIO BIT(18)
#define MACHINE_FLAG_RDP BIT(19)
#define LPP_MAGIC BIT(31)
#define LPP_PID_MASK _AC(0xffffffff, UL)
......@@ -99,6 +100,7 @@ extern unsigned long mio_wb_bit_mask;
#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
#define MACHINE_HAS_SCC (S390_lowcore.machine_flags & MACHINE_FLAG_SCC)
#define MACHINE_HAS_PCI_MIO (S390_lowcore.machine_flags & MACHINE_FLAG_PCI_MIO)
#define MACHINE_HAS_RDP (S390_lowcore.machine_flags & MACHINE_FLAG_RDP)
/*
* Console mode. Override with conmode=
......
......@@ -226,6 +226,8 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO;
/* the control bit is set during PCI initialization */
}
if (test_facility(194))
S390_lowcore.machine_flags |= MACHINE_FLAG_RDP;
}
static inline void save_vector_registers(void)
......
......@@ -302,6 +302,31 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL(ptep_xchg_direct);
/*
* Caller must check that new PTE only differs in _PAGE_PROTECT HW bit, so that
* RDP can be used instead of IPTE. See also comments at pte_allow_rdp().
*/
void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
pte_t new)
{
preempt_disable();
atomic_inc(&mm->context.flush_count);
if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
__ptep_rdp(addr, ptep, 0, 0, 1);
else
__ptep_rdp(addr, ptep, 0, 0, 0);
/*
* PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That
* means it is still valid and active, and must not be changed according
* to the architecture. But writing a new value that only differs in SW
* bits is allowed.
*/
set_pte(ptep, new);
atomic_dec(&mm->context.flush_count);
preempt_enable();
}
EXPORT_SYMBOL(ptep_reset_dat_prot);
pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t new)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment