Commit a2953204 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'powerpc-5.4-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:
 "An assortment of fixes that were either missed by me, or didn't arrive
  quite in time for the first v5.4 pull.

   - Most notable is a fix for an issue with tlbie (broadcast TLB
     invalidation) on Power9, when using the Radix MMU. The tlbie can
     race with an mtpid (move to PID register, essentially MMU context
     switch) on another thread of the core, which can cause stores to
     continue to go to a page after it's unmapped.

   - A fix in our KVM code to add a missing barrier, the lack of which
     has been observed to cause missed IPIs and subsequently stuck CPUs
     in the host.

   - A change to the way we initialise PCR (Processor Compatibility
     Register) to make it forward compatible with future CPUs.

   - On some older PowerVM systems our H_BLOCK_REMOVE support could
     oops, fix it to detect such systems and fallback to the old
     invalidation method.

   - A fix for an oops seen on some machines when using KASAN on 32-bit.

   - A handful of other minor fixes, and two new selftests.

  Thanks to: Alistair Popple, Aneesh Kumar K.V, Christophe Leroy,
  Gustavo Romero, Joel Stanley, Jordan Niethe, Laurent Dufour, Michael
  Roth, Oliver O'Halloran"

* tag 'powerpc-5.4-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/eeh: Fix eeh eeh_debugfs_break_device() with SRIOV devices
  powerpc/nvdimm: use H_SCM_QUERY hcall on H_OVERLAP error
  powerpc/nvdimm: Use HCALL error as the return value
  selftests/powerpc: Add test case for tlbie vs mtpidr ordering issue
  powerpc/mm: Fixup tlbie vs mtpidr/mtlpidr ordering issue on POWER9
  powerpc/book3s64/radix: Rename CPU_FTR_P9_TLBIE_BUG feature flag
  powerpc/book3s64/mm: Don't do tlbie fixup for some hardware revisions
  powerpc/pseries: Call H_BLOCK_REMOVE when supported
  powerpc/pseries: Read TLB Block Invalidate Characteristics
  KVM: PPC: Book3S HV: use smp_mb() when setting/clearing host_ipi flag
  powerpc/mm: Fix an Oops in kasan_mmu_init()
  powerpc/mm: Add a helper to select PAGE_KERNEL_RO or PAGE_READONLY
  powerpc/64s: Set reserved PCR bits
  powerpc: Fix definition of PCR bits to work with old binutils
  powerpc/book3s64/radix: Remove WARN_ON in destroy_context()
  powerpc/tm: Add tm-poison test
parents f19e00ee 253c8921
......@@ -209,8 +209,9 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x0000080000000000)
#define CPU_FTR_P9_TM_HV_ASSIST LONG_ASM_CONST(0x0000100000000000)
#define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000)
#define CPU_FTR_P9_TLBIE_BUG LONG_ASM_CONST(0x0000400000000000)
#define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000)
#define CPU_FTR_P9_TIDR LONG_ASM_CONST(0x0000800000000000)
#define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000)
#ifndef __ASSEMBLY__
......@@ -457,7 +458,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
CPU_FTR_P9_TLBIE_BUG | CPU_FTR_P9_TIDR)
CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR)
#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
......
......@@ -452,9 +452,100 @@ static inline u32 kvmppc_get_xics_latch(void)
return xirr;
}
static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
/*
* To avoid the need to unnecessarily exit fully to the host kernel, an IPI to
* a CPU thread that's running/napping inside of a guest is by default regarded
* as a request to wake the CPU (if needed) and continue execution within the
* guest, potentially to process new state like externally-generated
* interrupts or IPIs sent from within the guest itself (e.g. H_PROD/H_IPI).
*
* To force an exit to the host kernel, kvmppc_set_host_ipi() must be called
* prior to issuing the IPI to set the corresponding 'host_ipi' flag in the
* target CPU's PACA. To avoid unnecessary exits to the host, this flag should
* be immediately cleared via kvmppc_clear_host_ipi() by the IPI handler on
* the receiving side prior to processing the IPI work.
*
* NOTE:
*
* We currently issue an smp_mb() at the beginning of kvmppc_set_host_ipi().
* This is to guard against sequences such as the following:
*
* CPU
* X: smp_muxed_ipi_set_message():
* X: smp_mb()
* X: message[RESCHEDULE] = 1
* X: doorbell_global_ipi(42):
* X: kvmppc_set_host_ipi(42)
* X: ppc_msgsnd_sync()/smp_mb()
* X: ppc_msgsnd() -> 42
* 42: doorbell_exception(): // from CPU X
* 42: ppc_msgsync()
* 105: smp_muxed_ipi_set_message():
* 105: smb_mb()
* // STORE DEFERRED DUE TO RE-ORDERING
* --105: message[CALL_FUNCTION] = 1
* | 105: doorbell_global_ipi(42):
* | 105: kvmppc_set_host_ipi(42)
* | 42: kvmppc_clear_host_ipi(42)
* | 42: smp_ipi_demux_relaxed()
* | 42: // returns to executing guest
* | // RE-ORDERED STORE COMPLETES
* ->105: message[CALL_FUNCTION] = 1
* 105: ppc_msgsnd_sync()/smp_mb()
* 105: ppc_msgsnd() -> 42
* 42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
* 105: // hangs waiting on 42 to process messages/call_single_queue
*
* We also issue an smp_mb() at the end of kvmppc_clear_host_ipi(). This is
* to guard against sequences such as the following (as well as to create
* a read-side pairing with the barrier in kvmppc_set_host_ipi()):
*
* CPU
* X: smp_muxed_ipi_set_message():
* X: smp_mb()
* X: message[RESCHEDULE] = 1
* X: doorbell_global_ipi(42):
* X: kvmppc_set_host_ipi(42)
* X: ppc_msgsnd_sync()/smp_mb()
* X: ppc_msgsnd() -> 42
* 42: doorbell_exception(): // from CPU X
* 42: ppc_msgsync()
* // STORE DEFERRED DUE TO RE-ORDERING
* -- 42: kvmppc_clear_host_ipi(42)
* | 42: smp_ipi_demux_relaxed()
* | 105: smp_muxed_ipi_set_message():
* | 105: smb_mb()
* | 105: message[CALL_FUNCTION] = 1
* | 105: doorbell_global_ipi(42):
* | 105: kvmppc_set_host_ipi(42)
* | // RE-ORDERED STORE COMPLETES
* -> 42: kvmppc_clear_host_ipi(42)
* 42: // returns to executing guest
* 105: ppc_msgsnd_sync()/smp_mb()
* 105: ppc_msgsnd() -> 42
* 42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
* 105: // hangs waiting on 42 to process messages/call_single_queue
*/
static inline void kvmppc_set_host_ipi(int cpu)
{
paca_ptrs[cpu]->kvm_hstate.host_ipi = host_ipi;
/*
* order stores of IPI messages vs. setting of host_ipi flag
*
* pairs with the barrier in kvmppc_clear_host_ipi()
*/
smp_mb();
paca_ptrs[cpu]->kvm_hstate.host_ipi = 1;
}
static inline void kvmppc_clear_host_ipi(int cpu)
{
paca_ptrs[cpu]->kvm_hstate.host_ipi = 0;
/*
* order clearing of host_ipi flag vs. processing of IPI messages
*
* pairs with the barrier in kvmppc_set_host_ipi()
*/
smp_mb();
}
static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
......@@ -486,7 +577,10 @@ static inline u32 kvmppc_get_xics_latch(void)
return 0;
}
static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
static inline void kvmppc_set_host_ipi(int cpu)
{}
static inline void kvmppc_clear_host_ipi(int cpu)
{}
static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
......
......@@ -475,9 +475,10 @@
#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */
#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */
#define SPRN_PCR 0x152 /* Processor compatibility register */
#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
#define PCR_TM_DIS (1ul << (63-2)) /* Trans. memory disable (POWER8) */
#define PCR_VEC_DIS (__MASK(63-0)) /* Vec. disable (bit NA since POWER8) */
#define PCR_VSX_DIS (__MASK(63-1)) /* VSX disable (bit NA since POWER8) */
#define PCR_TM_DIS (__MASK(63-2)) /* Trans. memory disable (POWER8) */
#define PCR_HIGH_BITS (PCR_VEC_DIS | PCR_VSX_DIS | PCR_TM_DIS)
/*
* These bits are used in the function kvmppc_set_arch_compat() to specify and
* determine both the compatibility level which we want to emulate and the
......@@ -486,6 +487,8 @@
#define PCR_ARCH_207 0x8 /* Architecture 2.07 */
#define PCR_ARCH_206 0x4 /* Architecture 2.06 */
#define PCR_ARCH_205 0x2 /* Architecture 2.05 */
#define PCR_LOW_BITS (PCR_ARCH_207 | PCR_ARCH_206 | PCR_ARCH_205)
#define PCR_MASK ~(PCR_HIGH_BITS | PCR_LOW_BITS) /* PCR Reserved Bits */
#define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */
#define SPRN_TLBINDEXR 0x154 /* P7 TLB control register */
#define SPRN_TLBVPNR 0x155 /* P7 TLB control register */
......
......@@ -23,6 +23,7 @@ _GLOBAL(__setup_cpu_power7)
beqlr
li r0,0
mtspr SPRN_LPID,r0
LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
......@@ -37,6 +38,7 @@ _GLOBAL(__restore_cpu_power7)
beqlr
li r0,0
mtspr SPRN_LPID,r0
LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
......@@ -54,6 +56,7 @@ _GLOBAL(__setup_cpu_power8)
beqlr
li r0,0
mtspr SPRN_LPID,r0
LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
......@@ -76,6 +79,7 @@ _GLOBAL(__restore_cpu_power8)
beqlr
li r0,0
mtspr SPRN_LPID,r0
LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
......@@ -98,6 +102,7 @@ _GLOBAL(__setup_cpu_power9)
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mtspr SPRN_PID,r0
LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
......@@ -123,6 +128,7 @@ _GLOBAL(__restore_cpu_power9)
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mtspr SPRN_PID,r0
LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
......
......@@ -33,7 +33,7 @@ void doorbell_global_ipi(int cpu)
{
u32 tag = get_hard_smp_processor_id(cpu);
kvmppc_set_host_ipi(cpu, 1);
kvmppc_set_host_ipi(cpu);
/* Order previous accesses vs. msgsnd, which is treated as a store */
ppc_msgsnd_sync();
ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
......@@ -48,7 +48,7 @@ void doorbell_core_ipi(int cpu)
{
u32 tag = cpu_thread_in_core(cpu);
kvmppc_set_host_ipi(cpu, 1);
kvmppc_set_host_ipi(cpu);
/* Order previous accesses vs. msgsnd, which is treated as a store */
ppc_msgsnd_sync();
ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
......@@ -84,7 +84,7 @@ void doorbell_exception(struct pt_regs *regs)
may_hard_irq_enable();
kvmppc_set_host_ipi(smp_processor_id(), 0);
kvmppc_clear_host_ipi(smp_processor_id());
__this_cpu_inc(irq_stat.doorbell_irqs);
smp_ipi_demux_relaxed(); /* already performed the barrier */
......
......@@ -101,7 +101,7 @@ static void __restore_cpu_cpufeatures(void)
if (hv_mode) {
mtspr(SPRN_LPID, 0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
mtspr(SPRN_PCR, 0);
mtspr(SPRN_PCR, PCR_MASK);
}
mtspr(SPRN_FSCR, system_registers.fscr);
......@@ -144,6 +144,7 @@ static void __init cpufeatures_setup_cpu(void)
mtspr(SPRN_HFSCR, 0);
}
mtspr(SPRN_FSCR, 0);
mtspr(SPRN_PCR, PCR_MASK);
/*
* LPCR does not get cleared, to match behaviour with secondaries
......@@ -691,9 +692,37 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
return true;
}
/*
* Handle POWER9 broadcast tlbie invalidation issue using
* cpu feature flag.
*/
static __init void update_tlbie_feature_flag(unsigned long pvr)
{
if (PVR_VER(pvr) == PVR_POWER9) {
/*
* Set the tlbie feature flag for anything below
* Nimbus DD 2.3 and Cumulus DD 1.3
*/
if ((pvr & 0xe000) == 0) {
/* Nimbus */
if ((pvr & 0xfff) < 0x203)
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
} else if ((pvr & 0xc000) == 0) {
/* Cumulus */
if ((pvr & 0xfff) < 0x103)
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
} else {
WARN_ONCE(1, "Unknown PVR");
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
}
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG;
}
}
static __init void cpufeatures_cpu_quirks(void)
{
int version = mfspr(SPRN_PVR);
unsigned long version = mfspr(SPRN_PVR);
/*
* Not all quirks can be derived from the cpufeatures device tree.
......@@ -712,10 +741,10 @@ static __init void cpufeatures_cpu_quirks(void)
if ((version & 0xffff0000) == 0x004e0000) {
cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TIDR;
}
update_tlbie_feature_flag(version);
/*
* PKEY was not in the initial base or feature node
* specification, but it should become optional in the next
......
......@@ -1960,7 +1960,7 @@ static int eeh_debugfs_break_device(struct pci_dev *pdev)
pci_err(pdev, "Going to break: %pR\n", bar);
if (pdev->is_virtfn) {
#ifndef CONFIG_IOV
#ifndef CONFIG_PCI_IOV
return -ENXIO;
#else
/*
......@@ -1980,7 +1980,7 @@ static int eeh_debugfs_break_device(struct pci_dev *pdev)
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
pos += PCI_SRIOV_CTRL;
bit = PCI_SRIOV_CTRL_MSE;
#endif /* !CONFIG_IOV */
#endif /* !CONFIG_PCI_IOV */
} else {
bit = PCI_COMMAND_MEMORY;
pos = PCI_COMMAND;
......
......@@ -401,8 +401,11 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
spin_lock(&vc->lock);
vc->arch_compat = arch_compat;
/* Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit */
vc->pcr = host_pcr_bit - guest_pcr_bit;
/*
* Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit
* Also set all reserved PCR bits
*/
vc->pcr = (host_pcr_bit - guest_pcr_bit) | PCR_MASK;
spin_unlock(&vc->lock);
return 0;
......@@ -3410,7 +3413,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
}
if (vc->pcr)
mtspr(SPRN_PCR, vc->pcr);
mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
mtspr(SPRN_DPDES, vc->dpdes);
mtspr(SPRN_VTB, vc->vtb);
......@@ -3490,7 +3493,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
vc->vtb = mfspr(SPRN_VTB);
mtspr(SPRN_DPDES, 0);
if (vc->pcr)
mtspr(SPRN_PCR, 0);
mtspr(SPRN_PCR, PCR_MASK);
if (vc->tb_offset_applied) {
u64 new_tb = mftb() - vc->tb_offset_applied;
......
......@@ -29,7 +29,7 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
hr->pcr = vc->pcr;
hr->pcr = vc->pcr | PCR_MASK;
hr->dpdes = vc->dpdes;
hr->hfscr = vcpu->arch.hfscr;
hr->tb_offset = vc->tb_offset;
......@@ -65,7 +65,7 @@ static void byteswap_hv_regs(struct hv_guest_state *hr)
hr->lpid = swab32(hr->lpid);
hr->vcpu_token = swab32(hr->vcpu_token);
hr->lpcr = swab64(hr->lpcr);
hr->pcr = swab64(hr->pcr);
hr->pcr = swab64(hr->pcr) | PCR_MASK;
hr->amor = swab64(hr->amor);
hr->dpdes = swab64(hr->dpdes);
hr->hfscr = swab64(hr->hfscr);
......@@ -148,7 +148,7 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
vc->pcr = hr->pcr;
vc->pcr = hr->pcr | PCR_MASK;
vc->dpdes = hr->dpdes;
vcpu->arch.hfscr = hr->hfscr;
vcpu->arch.dawr = hr->dawr0;
......
......@@ -433,6 +433,37 @@ static inline int is_mmio_hpte(unsigned long v, unsigned long r)
(HPTE_R_KEY_HI | HPTE_R_KEY_LO));
}
static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid)
{
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
/* Radix flush for a hash guest */
unsigned long rb,rs,prs,r,ric;
rb = PPC_BIT(52); /* IS = 2 */
rs = 0; /* lpid = 0 */
prs = 0; /* partition scoped */
r = 1; /* radix format */
ric = 0; /* RIC_FLSUH_TLB */
/*
* Need the extra ptesync to make sure we don't
* re-order the tlbie
*/
asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs),
"i"(ric), "r"(rs) : "memory");
}
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
"r" (rb_value), "r" (lpid));
}
}
static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
long npages, int global, bool need_sync)
{
......@@ -451,16 +482,7 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
"r" (rbvalues[i]), "r" (kvm->arch.lpid));
}
if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
/*
* Need the extra ptesync to make sure we don't
* re-order the tlbie
*/
asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
"r" (rbvalues[0]), "r" (kvm->arch.lpid));
}
fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid);
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
} else {
if (need_sync)
......
......@@ -58,7 +58,7 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
hcpu = hcore << threads_shift;
kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
kvmppc_set_host_ipi(hcpu, 1);
kvmppc_set_host_ipi(hcpu);
smp_mb();
kvmhv_rm_send_ipi(hcpu);
}
......
......@@ -644,8 +644,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* Load guest PCR value to select appropriate compat mode */
37: ld r7, VCORE_PCR(r5)
cmpdi r7, 0
LOAD_REG_IMMEDIATE(r6, PCR_MASK)
cmpld r7, r6
beq 38f
or r7, r7, r6
mtspr SPRN_PCR, r7
38:
......@@ -1913,10 +1915,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/* Reset PCR */
ld r0, VCORE_PCR(r5)
cmpdi r0, 0
LOAD_REG_IMMEDIATE(r6, PCR_MASK)
cmpld r0, r6
beq 18f
li r0, 0
mtspr SPRN_PCR, r0
mtspr SPRN_PCR, r6
18:
/* Signal secondary CPUs to continue */
stb r0,VCORE_IN_GUEST(r5)
......
......@@ -197,9 +197,32 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize,
return va;
}
static inline void fixup_tlbie(unsigned long vpn, int psize, int apsize, int ssize)
static inline void fixup_tlbie_vpn(unsigned long vpn, int psize,
int apsize, int ssize)
{
if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
/* Radix flush for a hash guest */
unsigned long rb,rs,prs,r,ric;
rb = PPC_BIT(52); /* IS = 2 */
rs = 0; /* lpid = 0 */
prs = 0; /* partition scoped */
r = 1; /* radix format */
ric = 0; /* RIC_FLSUH_TLB */
/*
* Need the extra ptesync to make sure we don't
* re-order the tlbie
*/
asm volatile("ptesync": : :"memory");
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs),
"i"(ric), "r"(rs) : "memory");
}
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
/* Need the extra ptesync to ensure we don't reorder tlbie*/
asm volatile("ptesync": : :"memory");
___tlbie(vpn, psize, apsize, ssize);
......@@ -283,7 +306,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize,
asm volatile("ptesync": : :"memory");
} else {
__tlbie(vpn, psize, apsize, ssize);
fixup_tlbie(vpn, psize, apsize, ssize);
fixup_tlbie_vpn(vpn, psize, apsize, ssize);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
if (lock_tlbie && !use_local)
......@@ -856,7 +879,7 @@ static void native_flush_hash_range(unsigned long number, int local)
/*
* Just do one more with the last used values.
*/
fixup_tlbie(vpn, psize, psize, ssize);
fixup_tlbie_vpn(vpn, psize, psize, ssize);
asm volatile("eieio; tlbsync; ptesync":::"memory");
if (lock_tlbie)
......
......@@ -256,8 +256,21 @@ void destroy_context(struct mm_struct *mm)
#ifdef CONFIG_SPAPR_TCE_IOMMU
WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
#endif
/*
* For tasks which were successfully initialized we end up calling
* arch_exit_mmap() which clears the process table entry. And
* arch_exit_mmap() is called before the required fullmm TLB flush
* which does a RIC=2 flush. Hence for an initialized task, we do clear
* any cached process table entries.
*
* The condition below handles the error case during task init. We have
* set the process table entry early and if we fail a task
* initialization, we need to ensure the process table entry is zeroed.
* We need not worry about process table entry caches because the task
* never ran with the PID value.
*/
if (radix_enabled())
WARN_ON(process_tb[mm->context.id].prtb0 != 0);
process_tb[mm->context.id].prtb0 = 0;
else
subpage_prot_free(mm);
destroy_contexts(&mm->context);
......
......@@ -196,22 +196,83 @@ static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid
trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
static inline void fixup_tlbie(void)
static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
unsigned long ap)
{
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
}
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
}
}
static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
unsigned long ap)
{
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_pid(0, RIC_FLUSH_TLB);
}
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
}
}
static inline void fixup_tlbie_pid(unsigned long pid)
{
unsigned long pid = 0;
/*
* We can use any address for the invalidation, pick one which is
* probably unused as an optimisation.
*/
unsigned long va = ((1UL << 52) - 1);
if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_pid(0, RIC_FLUSH_TLB);
}
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
}
}
static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long ap)
{
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
}
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
}
}
static inline void fixup_tlbie_lpid(unsigned long lpid)
{
/*
* We can use any address for the invalidation, pick one which is
* probably unused as an optimisation.
*/
unsigned long va = ((1UL << 52) - 1);
if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) {
if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_lpid(0, RIC_FLUSH_TLB);
}
if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
asm volatile("ptesync": : :"memory");
__tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
}
......@@ -258,6 +319,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
switch (ric) {
case RIC_FLUSH_TLB:
__tlbie_pid(pid, RIC_FLUSH_TLB);
fixup_tlbie_pid(pid);
break;
case RIC_FLUSH_PWC:
__tlbie_pid(pid, RIC_FLUSH_PWC);
......@@ -265,8 +327,8 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
case RIC_FLUSH_ALL:
default:
__tlbie_pid(pid, RIC_FLUSH_ALL);
fixup_tlbie_pid(pid);
}
fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
......@@ -315,6 +377,7 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
switch (ric) {
case RIC_FLUSH_TLB:
__tlbie_lpid(lpid, RIC_FLUSH_TLB);
fixup_tlbie_lpid(lpid);
break;
case RIC_FLUSH_PWC:
__tlbie_lpid(lpid, RIC_FLUSH_PWC);
......@@ -322,8 +385,8 @@ static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
case RIC_FLUSH_ALL:
default:
__tlbie_lpid(lpid, RIC_FLUSH_ALL);
fixup_tlbie_lpid(lpid);
}
fixup_tlbie_lpid(lpid);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
......@@ -390,6 +453,8 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
for (addr = start; addr < end; addr += page_size)
__tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
fixup_tlbie_va_range(addr - page_size, pid, ap);
}
static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
......@@ -399,7 +464,7 @@ static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
asm volatile("ptesync": : :"memory");
__tlbie_va(va, pid, ap, ric);
fixup_tlbie();
fixup_tlbie_va(va, pid, ap);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
......@@ -457,7 +522,7 @@ static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
asm volatile("ptesync": : :"memory");
__tlbie_lpid_va(va, lpid, ap, ric);
fixup_tlbie_lpid(lpid);
fixup_tlbie_lpid_va(va, lpid, ap);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
......@@ -469,7 +534,6 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
if (also_pwc)
__tlbie_pid(pid, RIC_FLUSH_PWC);
__tlbie_va_range(start, end, pid, page_size, psize);
fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
......@@ -856,7 +920,7 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
if (gflush)
__tlbie_va_range(gstart, gend, pid,
PUD_SIZE, MMU_PAGE_1G);
fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
} else {
_tlbiel_va_range_multicast(mm,
......
......@@ -12,6 +12,14 @@
#include <asm/code-patching.h>
#include <mm/mmu_decl.h>
static pgprot_t kasan_prot_ro(void)
{
if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
return PAGE_READONLY;
return PAGE_KERNEL_RO;
}
static void kasan_populate_pte(pte_t *ptep, pgprot_t prot)
{
unsigned long va = (unsigned long)kasan_early_shadow_page;
......@@ -26,6 +34,7 @@ static int __ref kasan_init_shadow_page_tables(unsigned long k_start, unsigned l
{
pmd_t *pmd;
unsigned long k_cur, k_next;
pgprot_t prot = slab_is_available() ? kasan_prot_ro() : PAGE_KERNEL;
pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start);
......@@ -43,10 +52,7 @@ static int __ref kasan_init_shadow_page_tables(unsigned long k_start, unsigned l
if (!new)
return -ENOMEM;
if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
kasan_populate_pte(new, PAGE_READONLY);
else
kasan_populate_pte(new, PAGE_KERNEL_RO);
kasan_populate_pte(new, prot);
smp_wmb(); /* See comment in __pte_alloc */
......@@ -103,11 +109,23 @@ static int __ref kasan_init_region(void *start, size_t size)
static void __init kasan_remap_early_shadow_ro(void)
{
if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
kasan_populate_pte(kasan_early_shadow_pte, PAGE_READONLY);
else
kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL_RO);
pgprot_t prot = kasan_prot_ro();
unsigned long k_start = KASAN_SHADOW_START;
unsigned long k_end = KASAN_SHADOW_END;
unsigned long k_cur;
phys_addr_t pa = __pa(kasan_early_shadow_page);
kasan_populate_pte(kasan_early_shadow_pte, prot);
for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur);
pte_t *ptep = pte_offset_kernel(pmd, k_cur);
if ((pte_val(*ptep) & PTE_RPN_MASK) != pa)
continue;
__set_pte_at(&init_mm, k_cur, ptep, pfn_pte(PHYS_PFN(pa), prot), 0);
}
flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END);
}
......
......@@ -193,7 +193,7 @@ static void pnv_smp_cpu_kill_self(void)
* for coming online, which are handled via
* generic_check_cpu_restart() calls.
*/
kvmppc_set_host_ipi(cpu, 0);
kvmppc_clear_host_ipi(cpu);
srr1 = pnv_cpu_offline(cpu);
......
......@@ -56,6 +56,22 @@ EXPORT_SYMBOL(plpar_hcall);
EXPORT_SYMBOL(plpar_hcall9);
EXPORT_SYMBOL(plpar_hcall_norets);
/*
* H_BLOCK_REMOVE supported block size for this page size in segment who's base
* page size is that page size.
*
* The first index is the segment base page size, the second one is the actual
* page size.
*/
static int hblkrm_size[MMU_PAGE_COUNT][MMU_PAGE_COUNT] __ro_after_init;
/*
* Due to the involved complexity, and that the current hypervisor is only
* returning this value or 0, we are limiting the support of the H_BLOCK_REMOVE
* buffer size to 8 size block.
*/
#define HBLKRM_SUPPORTED_BLOCK_SIZE 8
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static u8 dtl_mask = DTL_LOG_PREEMPT;
#else
......@@ -984,6 +1000,17 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
#define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL
#define HBLKR_CTRL_ERRBUSY 0xa000000000000000UL
/*
* Returned true if we are supporting this block size for the specified segment
* base page size and actual page size.
*
* Currently, we only support 8 size block.
*/
static inline bool is_supported_hlbkrm(int bpsize, int psize)
{
return (hblkrm_size[bpsize][psize] == HBLKRM_SUPPORTED_BLOCK_SIZE);
}
/**
* H_BLOCK_REMOVE caller.
* @idx should point to the latest @param entry set with a PTEX.
......@@ -1143,7 +1170,8 @@ static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
if (lock_tlbie)
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
/* Assuming THP size is 16M */
if (is_supported_hlbkrm(psize, MMU_PAGE_16M))
hugepage_block_invalidate(slot, vpn, count, psize, ssize);
else
hugepage_bulk_invalidate(slot, vpn, count, psize, ssize);
......@@ -1311,6 +1339,137 @@ static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch,
(void)call_block_remove(pix, param, true);
}
/*
* TLB Block Invalidate Characteristics
*
* These characteristics define the size of the block the hcall H_BLOCK_REMOVE
* is able to process for each couple segment base page size, actual page size.
*
* The ibm,get-system-parameter properties is returning a buffer with the
* following layout:
*
* [ 2 bytes size of the RTAS buffer (excluding these 2 bytes) ]
* -----------------
* TLB Block Invalidate Specifiers:
* [ 1 byte LOG base 2 of the TLB invalidate block size being specified ]
* [ 1 byte Number of page sizes (N) that are supported for the specified
* TLB invalidate block size ]
* [ 1 byte Encoded segment base page size and actual page size
* MSB=0 means 4k segment base page size and actual page size
* MSB=1 the penc value in mmu_psize_def ]
* ...
* -----------------
* Next TLB Block Invalidate Specifiers...
* -----------------
* [ 0 ]
*/
static inline void set_hblkrm_bloc_size(int bpsize, int psize,
unsigned int block_size)
{
if (block_size > hblkrm_size[bpsize][psize])
hblkrm_size[bpsize][psize] = block_size;
}
/*
* Decode the Encoded segment base page size and actual page size.
* PAPR specifies:
* - bit 7 is the L bit
* - bits 0-5 are the penc value
* If the L bit is 0, this means 4K segment base page size and actual page size
* otherwise the penc value should be read.
*/
#define HBLKRM_L_MASK 0x80
#define HBLKRM_PENC_MASK 0x3f
static inline void __init check_lp_set_hblkrm(unsigned int lp,
unsigned int block_size)
{
unsigned int bpsize, psize;
/* First, check the L bit, if not set, this means 4K */
if ((lp & HBLKRM_L_MASK) == 0) {
set_hblkrm_bloc_size(MMU_PAGE_4K, MMU_PAGE_4K, block_size);
return;
}
lp &= HBLKRM_PENC_MASK;
for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) {
struct mmu_psize_def *def = &mmu_psize_defs[bpsize];
for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
if (def->penc[psize] == lp) {
set_hblkrm_bloc_size(bpsize, psize, block_size);
return;
}
}
}
}
#define SPLPAR_TLB_BIC_TOKEN 50
/*
* The size of the TLB Block Invalidate Characteristics is variable. But at the
* maximum it will be the number of possible page sizes *2 + 10 bytes.
* Currently MMU_PAGE_COUNT is 16, which means 42 bytes. Use a cache line size
* (128 bytes) for the buffer to get plenty of space.
*/
#define SPLPAR_TLB_BIC_MAXLENGTH 128
void __init pseries_lpar_read_hblkrm_characteristics(void)
{
unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH];
int call_status, len, idx, bpsize;
spin_lock(&rtas_data_buf_lock);
memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
NULL,
SPLPAR_TLB_BIC_TOKEN,
__pa(rtas_data_buf),
RTAS_DATA_BUF_SIZE);
memcpy(local_buffer, rtas_data_buf, SPLPAR_TLB_BIC_MAXLENGTH);
local_buffer[SPLPAR_TLB_BIC_MAXLENGTH - 1] = '\0';
spin_unlock(&rtas_data_buf_lock);
if (call_status != 0) {
pr_warn("%s %s Error calling get-system-parameter (0x%x)\n",
__FILE__, __func__, call_status);
return;
}
/*
* The first two (2) bytes of the data in the buffer are the length of
* the returned data, not counting these first two (2) bytes.
*/
len = be16_to_cpu(*((u16 *)local_buffer)) + 2;
if (len > SPLPAR_TLB_BIC_MAXLENGTH) {
pr_warn("%s too large returned buffer %d", __func__, len);
return;
}
idx = 2;
while (idx < len) {
u8 block_shift = local_buffer[idx++];
u32 block_size;
unsigned int npsize;
if (!block_shift)
break;
block_size = 1 << block_shift;
for (npsize = local_buffer[idx++];
npsize > 0 && idx < len; npsize--)
check_lp_set_hblkrm((unsigned int) local_buffer[idx++],
block_size);
}
for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
for (idx = 0; idx < MMU_PAGE_COUNT; idx++)
if (hblkrm_size[bpsize][idx])
pr_info("H_BLOCK_REMOVE supports base psize:%d psize:%d block size:%d",
bpsize, idx, hblkrm_size[bpsize][idx]);
}
/*
* Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
* lock.
......@@ -1330,7 +1489,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
if (lock_tlbie)
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) {
if (is_supported_hlbkrm(batch->psize, batch->psize)) {
do_block_remove(number, batch, param);
goto out;
}
......
......@@ -65,29 +65,21 @@ static int drc_pmem_bind(struct papr_scm_priv *p)
cond_resched();
} while (rc == H_BUSY);
if (rc) {
/* H_OVERLAP needs a separate error path */
if (rc == H_OVERLAP)
return -EBUSY;
dev_err(&p->pdev->dev, "bind err: %lld\n", rc);
return -ENXIO;
}
if (rc)
return rc;
p->bound_addr = saved;
dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res);
return 0;
dev_dbg(&p->pdev->dev, "bound drc 0x%x to %pR\n", p->drc_index, &p->res);
return rc;
}
static int drc_pmem_unbind(struct papr_scm_priv *p)
static void drc_pmem_unbind(struct papr_scm_priv *p)
{
unsigned long ret[PLPAR_HCALL_BUFSIZE];
uint64_t token = 0;
int64_t rc;
dev_dbg(&p->pdev->dev, "unbind drc %x\n", p->drc_index);
dev_dbg(&p->pdev->dev, "unbind drc 0x%x\n", p->drc_index);
/* NB: unbind has the same retry requirements as drc_pmem_bind() */
do {
......@@ -110,12 +102,48 @@ static int drc_pmem_unbind(struct papr_scm_priv *p)
if (rc)
dev_err(&p->pdev->dev, "unbind error: %lld\n", rc);
else
dev_dbg(&p->pdev->dev, "unbind drc %x complete\n",
dev_dbg(&p->pdev->dev, "unbind drc 0x%x complete\n",
p->drc_index);
return rc == H_SUCCESS ? 0 : -ENXIO;
return;
}
static int drc_pmem_query_n_bind(struct papr_scm_priv *p)
{
unsigned long start_addr;
unsigned long end_addr;
unsigned long ret[PLPAR_HCALL_BUFSIZE];
int64_t rc;
rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret,
p->drc_index, 0);
if (rc)
goto err_out;
start_addr = ret[0];
/* Make sure the full region is bound. */
rc = plpar_hcall(H_SCM_QUERY_BLOCK_MEM_BINDING, ret,
p->drc_index, p->blocks - 1);
if (rc)
goto err_out;
end_addr = ret[0];
if ((end_addr - start_addr) != ((p->blocks - 1) * p->block_size))
goto err_out;
p->bound_addr = start_addr;
dev_dbg(&p->pdev->dev, "bound drc 0x%x to %pR\n", p->drc_index, &p->res);
return rc;
err_out:
dev_info(&p->pdev->dev,
"Failed to query, trying an unbind followed by bind");
drc_pmem_unbind(p);
return drc_pmem_bind(p);
}
static int papr_scm_meta_get(struct papr_scm_priv *p,
struct nd_cmd_get_config_data_hdr *hdr)
{
......@@ -436,14 +464,14 @@ static int papr_scm_probe(struct platform_device *pdev)
rc = drc_pmem_bind(p);
/* If phyp says drc memory still bound then force unbound and retry */
if (rc == -EBUSY) {
dev_warn(&pdev->dev, "Retrying bind after unbinding\n");
drc_pmem_unbind(p);
rc = drc_pmem_bind(p);
}
if (rc == H_OVERLAP)
rc = drc_pmem_query_n_bind(p);
if (rc)
if (rc != H_SUCCESS) {
dev_err(&p->pdev->dev, "bind err: %d\n", rc);
rc = -ENXIO;
goto err;
}
/* setup the resource for the newly bound range */
p->res.start = p->bound_addr;
......
......@@ -112,5 +112,6 @@ static inline unsigned long cmo_get_page_size(void)
int dlpar_workqueue_init(void);
void pseries_setup_rfi_flush(void);
void pseries_lpar_read_hblkrm_characteristics(void);
#endif /* _PSERIES_PSERIES_H */
......@@ -744,6 +744,7 @@ static void __init pSeries_setup_arch(void)
pseries_setup_rfi_flush();
setup_stf_barrier();
pseries_lpar_read_hblkrm_characteristics();
/* By default, only probe PCI (can be overridden by rtas_pci) */
pci_add_flags(PCI_PROBE_ONLY);
......
......@@ -140,7 +140,7 @@ static unsigned int icp_native_get_irq(void)
static void icp_native_cause_ipi(int cpu)
{
kvmppc_set_host_ipi(cpu, 1);
kvmppc_set_host_ipi(cpu);
icp_native_set_qirr(cpu, IPI_PRIORITY);
}
......@@ -179,7 +179,7 @@ void icp_native_flush_interrupt(void)
if (vec == XICS_IPI) {
/* Clear pending IPI */
int cpu = smp_processor_id();
kvmppc_set_host_ipi(cpu, 0);
kvmppc_clear_host_ipi(cpu);
icp_native_set_qirr(cpu, 0xff);
} else {
pr_err("XICS: hw interrupt 0x%x to offline cpu, disabling\n",
......@@ -200,7 +200,7 @@ static irqreturn_t icp_native_ipi_action(int irq, void *dev_id)
{
int cpu = smp_processor_id();
kvmppc_set_host_ipi(cpu, 0);
kvmppc_clear_host_ipi(cpu);
icp_native_set_qirr(cpu, 0xff);
return smp_ipi_demux();
......
......@@ -126,7 +126,7 @@ static void icp_opal_cause_ipi(int cpu)
{
int hw_cpu = get_hard_smp_processor_id(cpu);
kvmppc_set_host_ipi(cpu, 1);
kvmppc_set_host_ipi(cpu);
opal_int_set_mfrr(hw_cpu, IPI_PRIORITY);
}
......@@ -134,7 +134,7 @@ static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id)
{
int cpu = smp_processor_id();
kvmppc_set_host_ipi(cpu, 0);
kvmppc_clear_host_ipi(cpu);
opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
return smp_ipi_demux();
......@@ -157,7 +157,7 @@ void icp_opal_flush_interrupt(void)
if (vec == XICS_IPI) {
/* Clear pending IPI */
int cpu = smp_processor_id();
kvmppc_set_host_ipi(cpu, 0);
kvmppc_clear_host_ipi(cpu);
opal_int_set_mfrr(get_hard_smp_processor_id(cpu), 0xff);
} else {
pr_err("XICS: hw interrupt 0x%x to offline cpu, "
......
......@@ -4,6 +4,7 @@ noarg:
TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
large_vm_fork_separation
TEST_GEN_PROGS_EXTENDED := tlbie_test
TEST_GEN_FILES := tempfile
top_srcdir = ../../../../..
......@@ -19,3 +20,4 @@ $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
$(OUTPUT)/tempfile:
dd if=/dev/zero of=$@ bs=64k count=1
$(OUTPUT)/tlbie_test: LDLIBS += -lpthread
This diff is collapsed.
......@@ -17,3 +17,4 @@ tm-vmx-unavail
tm-unavailable
tm-trap
tm-sigreturn
tm-poison
......@@ -5,7 +5,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu
TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
$(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt \
tm-signal-context-force-tm
tm-signal-context-force-tm tm-poison
top_srcdir = ../../../../..
include ../../lib.mk
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2019, Gustavo Romero, Michael Neuling, IBM Corp.
*
* This test will spawn two processes. Both will be attached to the same
* CPU (CPU 0). The child will be in a loop writing to FP register f31 and
* VMX/VEC/Altivec register vr31 a known value, called poison, calling
* sched_yield syscall after to allow the parent to switch on the CPU.
* Parent will set f31 and vr31 to 1 and in a loop will check if f31 and
* vr31 remain 1 as expected until a given timeout (2m). If the issue is
* present child's poison will leak into parent's f31 or vr31 registers,
* otherwise, poison will never leak into parent's f31 and vr31 registers.
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <inttypes.h>
#include <sched.h>
#include <sys/types.h>
#include <signal.h>
#include <inttypes.h>
#include "tm.h"
int tm_poison_test(void)
{
int pid;
cpu_set_t cpuset;
uint64_t poison = 0xdeadbeefc0dec0fe;
uint64_t unknown = 0;
bool fail_fp = false;
bool fail_vr = false;
SKIP_IF(!have_htm());
/* Attach both Child and Parent to CPU 0 */
CPU_ZERO(&cpuset);
CPU_SET(0, &cpuset);
sched_setaffinity(0, sizeof(cpuset), &cpuset);
pid = fork();
if (!pid) {
/**
* child
*/
while (1) {
sched_yield();
asm (
"mtvsrd 31, %[poison];" // f31 = poison
"mtvsrd 63, %[poison];" // vr31 = poison
: : [poison] "r" (poison) : );
}
}
/**
* parent
*/
asm (
/*
* Set r3, r4, and f31 to known value 1 before entering
* in transaction. They won't be written after that.
*/
" li 3, 0x1 ;"
" li 4, 0x1 ;"
" mtvsrd 31, 4 ;"
/*
* The Time Base (TB) is a 64-bit counter register that is
* independent of the CPU clock and which is incremented
* at a frequency of 512000000 Hz, so every 1.953125ns.
* So it's necessary 120s/0.000000001953125s = 61440000000
* increments to get a 2 minutes timeout. Below we set that
* value in r5 and then use r6 to track initial TB value,
* updating TB values in r7 at every iteration and comparing it
* to r6. When r7 (current) - r6 (initial) > 61440000000 we bail
* out since for sure we spent already 2 minutes in the loop.
* SPR 268 is the TB register.
*/
" lis 5, 14 ;"
" ori 5, 5, 19996 ;"
" sldi 5, 5, 16 ;" // r5 = 61440000000
" mfspr 6, 268 ;" // r6 (TB initial)
"1: mfspr 7, 268 ;" // r7 (TB current)
" subf 7, 6, 7 ;" // r7 - r6 > 61440000000 ?
" cmpd 7, 5 ;"
" bgt 3f ;" // yes, exit
/*
* Main loop to check f31
*/
" tbegin. ;" // no, try again
" beq 1b ;" // restart if no timeout
" mfvsrd 3, 31 ;" // read f31
" cmpd 3, 4 ;" // f31 == 1 ?
" bne 2f ;" // broken :-(
" tabort. 3 ;" // try another transaction
"2: tend. ;" // commit transaction
"3: mr %[unknown], 3 ;" // record r3
: [unknown] "=r" (unknown)
:
: "cr0", "r3", "r4", "r5", "r6", "r7", "vs31"
);
/*
* On leak 'unknown' will contain 'poison' value from child,
* otherwise (no leak) 'unknown' will contain the same value
* as r3 before entering in transactional mode, i.e. 0x1.
*/
fail_fp = unknown != 0x1;
if (fail_fp)
printf("Unknown value %#"PRIx64" leaked into f31!\n", unknown);
else
printf("Good, no poison or leaked value into FP registers\n");
asm (
/*
* Set r3, r4, and vr31 to known value 1 before entering
* in transaction. They won't be written after that.
*/
" li 3, 0x1 ;"
" li 4, 0x1 ;"
" mtvsrd 63, 4 ;"
" lis 5, 14 ;"
" ori 5, 5, 19996 ;"
" sldi 5, 5, 16 ;" // r5 = 61440000000
" mfspr 6, 268 ;" // r6 (TB initial)
"1: mfspr 7, 268 ;" // r7 (TB current)
" subf 7, 6, 7 ;" // r7 - r6 > 61440000000 ?
" cmpd 7, 5 ;"
" bgt 3f ;" // yes, exit
/*
* Main loop to check vr31
*/
" tbegin. ;" // no, try again
" beq 1b ;" // restart if no timeout
" mfvsrd 3, 63 ;" // read vr31
" cmpd 3, 4 ;" // vr31 == 1 ?
" bne 2f ;" // broken :-(
" tabort. 3 ;" // try another transaction
"2: tend. ;" // commit transaction
"3: mr %[unknown], 3 ;" // record r3
: [unknown] "=r" (unknown)
:
: "cr0", "r3", "r4", "r5", "r6", "r7", "vs63"
);
/*
* On leak 'unknown' will contain 'poison' value from child,
* otherwise (no leak) 'unknown' will contain the same value
* as r3 before entering in transactional mode, i.e. 0x1.
*/
fail_vr = unknown != 0x1;
if (fail_vr)
printf("Unknown value %#"PRIx64" leaked into vr31!\n", unknown);
else
printf("Good, no poison or leaked value into VEC registers\n");
kill(pid, SIGKILL);
return (fail_fp | fail_vr);
}
int main(int argc, char *argv[])
{
/* Test completes in about 4m */
test_harness_set_timeout(250);
return test_harness(tm_poison_test, "tm_poison_test");
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment