Commit 611c0202 authored by Michael Ellerman's avatar Michael Ellerman

Merge branch 'fixes' into next

Merge our fixes branch to bring in some changes that are prerequisites
for work in next.
parents 9a04b0fe 2e7ec190
......@@ -147,6 +147,7 @@ config PPC
select ARCH_MIGHT_HAVE_PC_SERIO
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
select ARCH_SPLIT_ARG64 if PPC32
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x
......@@ -285,7 +286,7 @@ config PPC
#
config PPC_LONG_DOUBLE_128
depends on PPC64
depends on PPC64 && ALTIVEC
def_bool $(success,test "$(shell,echo __LONG_DOUBLE_128__ | $(CC) -E -P -)" = 1)
config PPC_BARRIER_NOSPEC
......
......@@ -32,6 +32,11 @@ static inline void arch_enter_lazy_mmu_mode(void)
if (radix_enabled())
return;
/*
* apply_to_page_range can call us this preempt enabled when
* operating on kernel page tables.
*/
preempt_disable();
batch = this_cpu_ptr(&ppc64_tlb_batch);
batch->active = 1;
}
......@@ -47,6 +52,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
if (batch->index)
__flush_tlb_pending(batch);
batch->active = 0;
preempt_enable();
}
#define arch_flush_lazy_mmu_mode() do {} while (0)
......
......@@ -602,6 +602,7 @@ ____##func(struct pt_regs *regs)
/* kernel/traps.c */
DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception);
#ifdef CONFIG_PPC_BOOK3S_64
DECLARE_INTERRUPT_HANDLER_RAW(machine_check_early_boot);
DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async);
#endif
DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception);
......
......@@ -104,6 +104,13 @@ long sys_ppc_ftruncate64(unsigned int fd, u32 reg4,
unsigned long len1, unsigned long len2);
long sys_ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
size_t len, int advice);
long sys_ppc_sync_file_range2(int fd, unsigned int flags,
unsigned int offset1,
unsigned int offset2,
unsigned int nbytes1,
unsigned int nbytes2);
long sys_ppc_fallocate(int fd, int mode, u32 offset1, u32 offset2,
u32 len1, u32 len2);
#endif
#ifdef CONFIG_COMPAT
long compat_sys_mmap2(unsigned long addr, size_t len,
......
......@@ -813,6 +813,13 @@ kernel_dbg_exc:
EXCEPTION_COMMON(0x260)
CHECK_NAPPING()
addi r3,r1,STACK_FRAME_OVERHEAD
/*
* XXX: Returning from performance_monitor_exception taken as a
* soft-NMI (Linux irqs disabled) may be risky to use interrupt_return
* and could cause bugs in return or elsewhere. That case should just
* restore registers and return. There is a workaround for one known
* problem in interrupt_exit_kernel_prepare().
*/
bl performance_monitor_exception
b interrupt_return
......
......@@ -2357,9 +2357,21 @@ EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
EXC_COMMON_BEGIN(performance_monitor_common)
GEN_COMMON performance_monitor
addi r3,r1,STACK_FRAME_OVERHEAD
bl performance_monitor_exception
lbz r4,PACAIRQSOFTMASK(r13)
cmpdi r4,IRQS_ENABLED
bne 1f
bl performance_monitor_exception_async
b interrupt_return_srr
1:
bl performance_monitor_exception_nmi
/* Clear MSR_RI before setting SRR0 and SRR1. */
li r9,0
mtmsrd r9,1
kuap_kernel_restore r9, r10
EXCEPTION_RESTORE_REGS hsrr=0
RFI_TO_KERNEL
/**
* Interrupt 0xf20 - Vector Unavailable Interrupt.
......
......@@ -374,10 +374,18 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
if (regs_is_unrecoverable(regs))
unrecoverable_exception(regs);
/*
* CT_WARN_ON comes here via program_check_exception,
* so avoid recursion.
* CT_WARN_ON comes here via program_check_exception, so avoid
* recursion.
*
* Skip the assertion on PMIs on 64e to work around a problem caused
* by NMI PMIs incorrectly taking this interrupt return path, it's
* possible for this to hit after interrupt exit to user switches
* context to user. See also the comment in the performance monitor
* handler in exceptions-64e.S
*/
if (TRAP(regs) != INTERRUPT_PROGRAM)
if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64) &&
TRAP(regs) != INTERRUPT_PROGRAM &&
TRAP(regs) != INTERRUPT_PERFMON)
CT_WARN_ON(ct_state() == CONTEXT_USER);
kuap = kuap_get_and_assert_locked();
......
......@@ -532,15 +532,24 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
* Returning to soft-disabled context.
* Check if a MUST_HARD_MASK interrupt has become pending, in which
* case we need to disable MSR[EE] in the return context.
*
* The MSR[EE] check catches among other things the short incoherency
* in hard_irq_disable() between clearing MSR[EE] and setting
* PACA_IRQ_HARD_DIS.
*/
ld r12,_MSR(r1)
andi. r10,r12,MSR_EE
beq .Lfast_kernel_interrupt_return_\srr\() // EE already disabled
lbz r11,PACAIRQHAPPENED(r13)
andi. r10,r11,PACA_IRQ_MUST_HARD_MASK
beq .Lfast_kernel_interrupt_return_\srr\() // No HARD_MASK pending
bne 1f // HARD_MASK is pending
// No HARD_MASK pending, clear possible HARD_DIS set by interrupt
andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
stb r11,PACAIRQHAPPENED(r13)
b .Lfast_kernel_interrupt_return_\srr\()
/* Must clear MSR_EE from _MSR */
1: /* Must clear MSR_EE from _MSR */
#ifdef CONFIG_PPC_BOOK3S
li r10,0
/* Clear valid before changing _MSR */
......
......@@ -112,7 +112,7 @@ PPC32_SYSCALL_DEFINE6(ppc32_fadvise64,
advice);
}
COMPAT_SYSCALL_DEFINE6(ppc_sync_file_range2,
PPC32_SYSCALL_DEFINE6(ppc_sync_file_range2,
int, fd, unsigned int, flags,
unsigned int, offset1, unsigned int, offset2,
unsigned int, nbytes1, unsigned int, nbytes2)
......@@ -122,3 +122,14 @@ COMPAT_SYSCALL_DEFINE6(ppc_sync_file_range2,
return ksys_sync_file_range(fd, offset, nbytes, flags);
}
#ifdef CONFIG_PPC32
SYSCALL_DEFINE6(ppc_fallocate,
int, fd, int, mode,
u32, offset1, u32, offset2, u32, len1, u32, len2)
{
return ksys_fallocate(fd, mode,
merge_64(offset1, offset2),
merge_64(len1, len2));
}
#endif
......@@ -394,8 +394,11 @@
305 common signalfd sys_signalfd compat_sys_signalfd
306 common timerfd_create sys_timerfd_create
307 common eventfd sys_eventfd
308 common sync_file_range2 sys_sync_file_range2 compat_sys_ppc_sync_file_range2
309 nospu fallocate sys_fallocate compat_sys_fallocate
308 32 sync_file_range2 sys_ppc_sync_file_range2 compat_sys_ppc_sync_file_range2
308 64 sync_file_range2 sys_sync_file_range2
308 spu sync_file_range2 sys_sync_file_range2
309 32 fallocate sys_ppc_fallocate compat_sys_fallocate
309 64 fallocate sys_fallocate
310 nospu subpage_prot sys_subpage_prot
311 32 timerfd_settime sys_timerfd_settime32
311 64 timerfd_settime sys_timerfd_settime
......
......@@ -142,7 +142,7 @@ SECTIONS
#endif
.data.rel.ro : AT(ADDR(.data.rel.ro) - LOAD_OFFSET) {
*(.data.rel.ro*)
*(.data.rel.ro .data.rel.ro.*)
}
.branch_lt : AT(ADDR(.branch_lt) - LOAD_OFFSET) {
......
......@@ -51,6 +51,7 @@ config KVM_BOOK3S_HV_POSSIBLE
config KVM_BOOK3S_32
tristate "KVM support for PowerPC book3s_32 processors"
depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT
depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_BOOK3S_32_HANDLER
select KVM_BOOK3S_PR_POSSIBLE
......@@ -105,6 +106,7 @@ config KVM_BOOK3S_64_HV
config KVM_BOOK3S_64_PR
tristate "KVM support without using hypervisor mode in host"
depends on KVM_BOOK3S_64
depends on !CONTEXT_TRACKING_USER
select KVM_BOOK3S_PR_POSSIBLE
help
Support running guest kernels in virtual machines on processors
......@@ -190,6 +192,7 @@ config KVM_EXIT_TIMING
config KVM_E500V2
bool "KVM support for PowerPC E500v2 processors"
depends on PPC_E500 && !PPC_E500MC
depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_MMIO
select MMU_NOTIFIER
......@@ -205,6 +208,7 @@ config KVM_E500V2
config KVM_E500MC
bool "KVM support for PowerPC E500MC/E5500/E6500 processors"
depends on PPC_E500MC
depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_MMIO
select KVM_BOOKE_HV
......
......@@ -36,7 +36,17 @@ int exit_vmx_usercopy(void)
{
disable_kernel_altivec();
pagefault_enable();
preempt_enable();
preempt_enable_no_resched();
/*
* Must never explicitly call schedule (including preempt_enable())
* while in a kuap-unlocked user copy, because the AMR register will
* not be saved and restored across context switch. However preempt
* kernels need to be preempted as soon as possible if need_resched is
* set and we are preemptible. The hack here is to schedule a
* decrementer to fire here and reschedule for us if necessary.
*/
if (IS_ENABLED(CONFIG_PREEMPT) && need_resched())
set_dec(1);
return 0;
}
......
......@@ -43,6 +43,29 @@
static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
#ifdef CONFIG_LOCKDEP
static struct lockdep_map hpte_lock_map =
STATIC_LOCKDEP_MAP_INIT("hpte_lock", &hpte_lock_map);
static void acquire_hpte_lock(void)
{
lock_map_acquire(&hpte_lock_map);
}
static void release_hpte_lock(void)
{
lock_map_release(&hpte_lock_map);
}
#else
static void acquire_hpte_lock(void)
{
}
static void release_hpte_lock(void)
{
}
#endif
static inline unsigned long ___tlbie(unsigned long vpn, int psize,
int apsize, int ssize)
{
......@@ -220,6 +243,7 @@ static inline void native_lock_hpte(struct hash_pte *hptep)
{
unsigned long *word = (unsigned long *)&hptep->v;
acquire_hpte_lock();
while (1) {
if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
break;
......@@ -234,6 +258,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep)
{
unsigned long *word = (unsigned long *)&hptep->v;
release_hpte_lock();
clear_bit_unlock(HPTE_LOCK_BIT, word);
}
......@@ -243,8 +268,11 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
{
struct hash_pte *hptep = htab_address + hpte_group;
unsigned long hpte_v, hpte_r;
unsigned long flags;
int i;
local_irq_save(flags);
if (!(vflags & HPTE_V_BOLTED)) {
DBG_LOW(" insert(group=%lx, vpn=%016lx, pa=%016lx,"
" rflags=%lx, vflags=%lx, psize=%d)\n",
......@@ -263,8 +291,10 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
hptep++;
}
if (i == HPTES_PER_GROUP)
if (i == HPTES_PER_GROUP) {
local_irq_restore(flags);
return -1;
}
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
......@@ -286,10 +316,13 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
* Now set the first dword including the valid bit
* NOTE: this also unlocks the hpte
*/
release_hpte_lock();
hptep->v = cpu_to_be64(hpte_v);
__asm__ __volatile__ ("ptesync" : : : "memory");
local_irq_restore(flags);
return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
}
......@@ -327,6 +360,7 @@ static long native_hpte_remove(unsigned long hpte_group)
return -1;
/* Invalidate the hpte. NOTE: this also unlocks it */
release_hpte_lock();
hptep->v = 0;
return i;
......@@ -339,6 +373,9 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
struct hash_pte *hptep = htab_address + slot;
unsigned long hpte_v, want_v;
int ret = 0, local = 0;
unsigned long irqflags;
local_irq_save(irqflags);
want_v = hpte_encode_avpn(vpn, bpsize, ssize);
......@@ -382,6 +419,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
if (!(flags & HPTE_NOHPTE_UPDATE))
tlbie(vpn, bpsize, apsize, ssize, local);
local_irq_restore(irqflags);
return ret;
}
......@@ -445,6 +484,9 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
unsigned long vsid;
long slot;
struct hash_pte *hptep;
unsigned long flags;
local_irq_save(flags);
vsid = get_kernel_vsid(ea, ssize);
vpn = hpt_vpn(ea, vsid, ssize);
......@@ -463,6 +505,8 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
* actual page size will be same.
*/
tlbie(vpn, psize, psize, ssize, 0);
local_irq_restore(flags);
}
/*
......@@ -476,6 +520,9 @@ static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
unsigned long vsid;
long slot;
struct hash_pte *hptep;
unsigned long flags;
local_irq_save(flags);
vsid = get_kernel_vsid(ea, ssize);
vpn = hpt_vpn(ea, vsid, ssize);
......@@ -493,6 +540,9 @@ static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
/* Invalidate the TLB */
tlbie(vpn, psize, psize, ssize, 0);
local_irq_restore(flags);
return 0;
}
......@@ -517,10 +567,11 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
/* recheck with locks held */
hpte_v = hpte_get_old_v(hptep);
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
/* Invalidate the hpte. NOTE: this also unlocks it */
release_hpte_lock();
hptep->v = 0;
else
} else
native_unlock_hpte(hptep);
}
/*
......@@ -580,10 +631,8 @@ static void native_hugepage_invalidate(unsigned long vsid,
hpte_v = hpte_get_old_v(hptep);
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
/*
* Invalidate the hpte. NOTE: this also unlocks it
*/
/* Invalidate the hpte. NOTE: this also unlocks it */
release_hpte_lock();
hptep->v = 0;
} else
native_unlock_hpte(hptep);
......@@ -765,8 +814,10 @@ static void native_flush_hash_range(unsigned long number, int local)
if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep);
else
else {
release_hpte_lock();
hptep->v = 0;
}
} pte_iterate_hashed_end();
}
......
......@@ -404,7 +404,8 @@ EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
struct change_memory_parms {
unsigned long start, end, newpp;
unsigned int step, nr_cpus, master_cpu;
unsigned int step, nr_cpus;
atomic_t master_cpu;
atomic_t cpu_counter;
};
......@@ -478,7 +479,8 @@ static int change_memory_range_fn(void *data)
{
struct change_memory_parms *parms = data;
if (parms->master_cpu != smp_processor_id())
// First CPU goes through, all others wait.
if (atomic_xchg(&parms->master_cpu, 1) == 1)
return chmem_secondary_loop(parms);
// Wait for all but one CPU (this one) to call-in
......@@ -516,7 +518,7 @@ static bool hash__change_memory_range(unsigned long start, unsigned long end,
chmem_parms.end = end;
chmem_parms.step = step;
chmem_parms.newpp = newpp;
chmem_parms.master_cpu = smp_processor_id();
atomic_set(&chmem_parms.master_cpu, 0);
cpus_read_lock();
......
......@@ -1981,7 +1981,7 @@ long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
}
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
static DEFINE_SPINLOCK(linear_map_hash_lock);
static DEFINE_RAW_SPINLOCK(linear_map_hash_lock);
static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
{
......@@ -2005,10 +2005,10 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
mmu_linear_psize, mmu_kernel_ssize);
BUG_ON (ret < 0);
spin_lock(&linear_map_hash_lock);
raw_spin_lock(&linear_map_hash_lock);
BUG_ON(linear_map_hash_slots[lmi] & 0x80);
linear_map_hash_slots[lmi] = ret | 0x80;
spin_unlock(&linear_map_hash_lock);
raw_spin_unlock(&linear_map_hash_lock);
}
static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
......@@ -2018,14 +2018,14 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
spin_lock(&linear_map_hash_lock);
raw_spin_lock(&linear_map_hash_lock);
if (!(linear_map_hash_slots[lmi] & 0x80)) {
spin_unlock(&linear_map_hash_lock);
raw_spin_unlock(&linear_map_hash_lock);
return;
}
hidx = linear_map_hash_slots[lmi] & 0x7f;
linear_map_hash_slots[lmi] = 0;
spin_unlock(&linear_map_hash_lock);
raw_spin_unlock(&linear_map_hash_lock);
if (hidx & _PTEIDX_SECONDARY)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
......
......@@ -113,23 +113,19 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
{
int i;
/* First arg comes in as a 32 bits pointer. */
EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3));
EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0));
/* Initialize tail_call_cnt, to be skipped if we do tail calls. */
EMIT(PPC_RAW_LI(_R4, 0));
#define BPF_TAILCALL_PROLOGUE_SIZE 4
EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx)));
/*
* Initialize tail_call_cnt in stack frame if we do tail calls.
* Otherwise, put in NOPs so that it can be skipped when we are
* invoked through a tail call.
*/
if (ctx->seen & SEEN_TAILCALL)
EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_1) - 1, _R1,
bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
else
EMIT(PPC_RAW_NOP());
EMIT(PPC_RAW_STW(_R4, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
#define BPF_TAILCALL_PROLOGUE_SIZE 16
/* First arg comes in as a 32 bits pointer. */
EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3));
EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0));
/*
* We need a stack frame, but we don't necessarily need to
......@@ -170,24 +166,24 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx
for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
if (bpf_is_seen_register(ctx, i))
EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i)));
}
void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
{
EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0)));
bpf_jit_emit_common_epilogue(image, ctx);
/* Tear down our stack frame */
if (ctx->seen & SEEN_FUNC)
EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
/* Tear down our stack frame */
EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx)));
if (ctx->seen & SEEN_FUNC)
EMIT(PPC_RAW_MTLR(_R0));
}
void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
{
EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0)));
bpf_jit_emit_common_epilogue(image, ctx);
EMIT(PPC_RAW_BLR());
}
......@@ -244,7 +240,6 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29));
EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array));
EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs)));
EMIT(PPC_RAW_STW(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
/*
* if (prog == NULL)
......@@ -255,19 +250,14 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
/* goto *(prog->bpf_func + prologue_size); */
EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func)));
if (ctx->seen & SEEN_FUNC)
EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE));
if (ctx->seen & SEEN_FUNC)
EMIT(PPC_RAW_MTLR(_R0));
EMIT(PPC_RAW_MTCTR(_R3));
EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1)));
/* Put tail_call_cnt in r4 */
EMIT(PPC_RAW_MR(_R4, _R0));
/* tear restore NVRs, ... */
bpf_jit_emit_common_epilogue(image, ctx);
......
......@@ -35,6 +35,7 @@
#include <asm/drmem.h>
#include "pseries.h"
#include "vas.h" /* pseries_vas_dlpar_cpu() */
/*
* This isn't a module but we expose that to userspace
......@@ -748,6 +749,16 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
return -EINVAL;
retval = update_ppp(new_entitled_ptr, NULL);
if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
/*
* The hypervisor assigns VAS resources based
* on entitled capacity for shared mode.
* Reconfig VAS windows based on DLPAR CPU events.
*/
if (pseries_vas_dlpar_cpu() != 0)
retval = H_HARDWARE;
}
} else if (!strcmp(kbuf, "capacity_weight")) {
char *endp;
*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
......
......@@ -200,16 +200,41 @@ static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
struct vas_user_win_ref *tsk_ref;
int rc;
rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
if (!rc) {
tsk_ref = &txwin->vas_win.task_ref;
vas_dump_crb(&crb);
vas_update_csb(&crb, tsk_ref);
while (atomic_read(&txwin->pending_faults)) {
rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
if (!rc) {
tsk_ref = &txwin->vas_win.task_ref;
vas_dump_crb(&crb);
vas_update_csb(&crb, tsk_ref);
}
atomic_dec(&txwin->pending_faults);
}
return IRQ_HANDLED;
}
/*
* irq_default_primary_handler() can be used only with IRQF_ONESHOT
* which disables IRQ before executing the thread handler and enables
* it after. But this disabling interrupt sets the VAS IRQ OFF
* state in the hypervisor. If the NX generates fault interrupt
* during this window, the hypervisor will not deliver this
* interrupt to the LPAR. So use VAS specific IRQ handler instead
* of calling the default primary handler.
*/
static irqreturn_t pseries_vas_irq_handler(int irq, void *data)
{
struct pseries_vas_window *txwin = data;
/*
* The thread hanlder will process this interrupt if it is
* already running.
*/
atomic_inc(&txwin->pending_faults);
return IRQ_WAKE_THREAD;
}
/*
* Allocate window and setup IRQ mapping.
*/
......@@ -240,8 +265,9 @@ static int allocate_setup_window(struct pseries_vas_window *txwin,
goto out_irq;
}
rc = request_threaded_irq(txwin->fault_virq, NULL,
pseries_vas_fault_thread_fn, IRQF_ONESHOT,
rc = request_threaded_irq(txwin->fault_virq,
pseries_vas_irq_handler,
pseries_vas_fault_thread_fn, 0,
txwin->name, txwin);
if (rc) {
pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
......@@ -826,6 +852,25 @@ int vas_reconfig_capabilties(u8 type, int new_nr_creds)
mutex_unlock(&vas_pseries_mutex);
return rc;
}
int pseries_vas_dlpar_cpu(void)
{
int new_nr_creds, rc;
rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
(u64)virt_to_phys(&hv_cop_caps));
if (!rc) {
new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds);
}
if (rc)
pr_err("Failed reconfig VAS capabilities with DLPAR\n");
return rc;
}
/*
* Total number of default credits available (target_credits)
* in LPAR depends on number of cores configured. It varies based on
......@@ -840,7 +885,15 @@ static int pseries_vas_notifier(struct notifier_block *nb,
struct of_reconfig_data *rd = data;
struct device_node *dn = rd->dn;
const __be32 *intserv = NULL;
int new_nr_creds, len, rc = 0;
int len;
/*
* For shared CPU partition, the hypervisor assigns total credits
* based on entitled core capacity. So updating VAS windows will
* be called from lparcfg_write().
*/
if (is_shared_processor())
return NOTIFY_OK;
if ((action == OF_RECONFIG_ATTACH_NODE) ||
(action == OF_RECONFIG_DETACH_NODE))
......@@ -852,19 +905,7 @@ static int pseries_vas_notifier(struct notifier_block *nb,
if (!intserv)
return NOTIFY_OK;
rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
(u64)virt_to_phys(&hv_cop_caps));
if (!rc) {
new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE,
new_nr_creds);
}
if (rc)
pr_err("Failed reconfig VAS capabilities with DLPAR\n");
return rc;
return pseries_vas_dlpar_cpu();
}
static struct notifier_block pseries_vas_nb = {
......
......@@ -132,6 +132,7 @@ struct pseries_vas_window {
u64 flags;
char *name;
int fault_virq;
atomic_t pending_faults; /* Number of pending faults */
};
int sysfs_add_vas_caps(struct vas_cop_feat_caps *caps);
......@@ -140,10 +141,15 @@ int __init sysfs_pseries_vas_init(struct vas_all_caps *vas_caps);
#ifdef CONFIG_PPC_VAS
int vas_migration_handler(int action);
int pseries_vas_dlpar_cpu(void);
#else
static inline int vas_migration_handler(int action)
{
return 0;
}
static inline int pseries_vas_dlpar_cpu(void)
{
return 0;
}
#endif
#endif /* _VAS_H */
......@@ -15,7 +15,7 @@
#endif
#ifndef compat_arg_u64
#ifdef CONFIG_CPU_BIG_ENDIAN
#ifndef CONFIG_CPU_BIG_ENDIAN
#define compat_arg_u64(name) u32 name##_lo, u32 name##_hi
#define compat_arg_u64_dual(name) u32, name##_lo, u32, name##_hi
#else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment