Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
linux
Commits
da0e7e62
Commit
da0e7e62
authored
Feb 14, 2017
by
Michael Ellerman
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'topic/ppc-kvm' into next
Merge the topic branch we're sharing with the kvm-ppc tree.
parents
a05ef161
ab9bad0e
Changes
37
Hide whitespace changes
Inline
Side-by-side
Showing
37 changed files
with
1619 additions
and
249 deletions
+1619
-249
Documentation/virtual/kvm/api.txt
Documentation/virtual/kvm/api.txt
+83
-0
arch/powerpc/include/asm/book3s/64/mmu.h
arch/powerpc/include/asm/book3s/64/mmu.h
+17
-1
arch/powerpc/include/asm/exception-64s.h
arch/powerpc/include/asm/exception-64s.h
+63
-20
arch/powerpc/include/asm/head-64.h
arch/powerpc/include/asm/head-64.h
+1
-1
arch/powerpc/include/asm/hvcall.h
arch/powerpc/include/asm/hvcall.h
+11
-0
arch/powerpc/include/asm/kvm_book3s.h
arch/powerpc/include/asm/kvm_book3s.h
+25
-1
arch/powerpc/include/asm/kvm_book3s_64.h
arch/powerpc/include/asm/kvm_book3s_64.h
+6
-0
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/include/asm/kvm_host.h
+6
-0
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/include/asm/kvm_ppc.h
+2
-0
arch/powerpc/include/asm/opal.h
arch/powerpc/include/asm/opal.h
+0
-7
arch/powerpc/include/asm/prom.h
arch/powerpc/include/asm/prom.h
+13
-4
arch/powerpc/include/asm/reg.h
arch/powerpc/include/asm/reg.h
+4
-0
arch/powerpc/include/uapi/asm/kvm.h
arch/powerpc/include/uapi/asm/kvm.h
+20
-0
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/asm-offsets.c
+2
-0
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/exceptions-64s.S
+33
-30
arch/powerpc/kernel/idle_book3s.S
arch/powerpc/kernel/idle_book3s.S
+3
-3
arch/powerpc/kernel/prom_init.c
arch/powerpc/kernel/prom_init.c
+17
-1
arch/powerpc/kvm/Makefile
arch/powerpc/kvm/Makefile
+2
-1
arch/powerpc/kvm/book3s.c
arch/powerpc/kvm/book3s.c
+1
-0
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/kvm/book3s_64_mmu_hv.c
+67
-43
arch/powerpc/kvm/book3s_64_mmu_radix.c
arch/powerpc/kvm/book3s_64_mmu_radix.c
+716
-0
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv.c
+181
-24
arch/powerpc/kvm/book3s_hv_builtin.c
arch/powerpc/kvm/book3s_hv_builtin.c
+14
-16
arch/powerpc/kvm/book3s_hv_rm_mmu.c
arch/powerpc/kvm/book3s_hv_rm_mmu.c
+23
-2
arch/powerpc/kvm/book3s_hv_rm_xics.c
arch/powerpc/kvm/book3s_hv_rm_xics.c
+8
-10
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_hv_rmhandlers.S
+127
-27
arch/powerpc/kvm/book3s_segment.S
arch/powerpc/kvm/book3s_segment.S
+25
-7
arch/powerpc/kvm/powerpc.c
arch/powerpc/kvm/powerpc.c
+32
-0
arch/powerpc/mm/init-common.c
arch/powerpc/mm/init-common.c
+2
-1
arch/powerpc/mm/init_64.c
arch/powerpc/mm/init_64.c
+35
-0
arch/powerpc/mm/pgtable-radix.c
arch/powerpc/mm/pgtable-radix.c
+2
-0
arch/powerpc/mm/pgtable_64.c
arch/powerpc/mm/pgtable_64.c
+13
-3
arch/powerpc/platforms/powernv/opal-wrappers.S
arch/powerpc/platforms/powernv/opal-wrappers.S
+29
-41
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/platforms/powernv/pci-ioda.c
+0
-5
arch/powerpc/platforms/pseries/firmware.c
arch/powerpc/platforms/pseries/firmware.c
+1
-1
arch/powerpc/platforms/pseries/lpar.c
arch/powerpc/platforms/pseries/lpar.c
+29
-0
include/uapi/linux/kvm.h
include/uapi/linux/kvm.h
+6
-0
No files found.
Documentation/virtual/kvm/api.txt
View file @
da0e7e62
...
...
@@ -3201,6 +3201,71 @@ struct kvm_reinject_control {
pit_reinject = 0 (!reinject mode) is recommended, unless running an old
operating system that uses the PIT for timing (e.g. Linux 2.4.x).
4.99 KVM_PPC_CONFIGURE_V3_MMU
Capability: KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3
Architectures: ppc
Type: vm ioctl
Parameters: struct kvm_ppc_mmuv3_cfg (in)
Returns: 0 on success,
-EFAULT if struct kvm_ppc_mmuv3_cfg cannot be read,
-EINVAL if the configuration is invalid
This ioctl controls whether the guest will use radix or HPT (hashed
page table) translation, and sets the pointer to the process table for
the guest.
struct kvm_ppc_mmuv3_cfg {
__u64 flags;
__u64 process_table;
};
There are two bits that can be set in flags; KVM_PPC_MMUV3_RADIX and
KVM_PPC_MMUV3_GTSE. KVM_PPC_MMUV3_RADIX, if set, configures the guest
to use radix tree translation, and if clear, to use HPT translation.
KVM_PPC_MMUV3_GTSE, if set and if KVM permits it, configures the guest
to be able to use the global TLB and SLB invalidation instructions;
if clear, the guest may not use these instructions.
The process_table field specifies the address and size of the guest
process table, which is in the guest's space. This field is formatted
as the second doubleword of the partition table entry, as defined in
the Power ISA V3.00, Book III section 5.7.6.1.
4.100 KVM_PPC_GET_RMMU_INFO
Capability: KVM_CAP_PPC_RADIX_MMU
Architectures: ppc
Type: vm ioctl
Parameters: struct kvm_ppc_rmmu_info (out)
Returns: 0 on success,
-EFAULT if struct kvm_ppc_rmmu_info cannot be written,
-EINVAL if no useful information can be returned
This ioctl returns a structure containing two things: (a) a list
containing supported radix tree geometries, and (b) a list that maps
page sizes to put in the "AP" (actual page size) field for the tlbie
(TLB invalidate entry) instruction.
struct kvm_ppc_rmmu_info {
struct kvm_ppc_radix_geom {
__u8 page_shift;
__u8 level_bits[4];
__u8 pad[3];
} geometries[8];
__u32 ap_encodings[8];
};
The geometries[] field gives up to 8 supported geometries for the
radix page table, in terms of the log base 2 of the smallest page
size, and the number of bits indexed at each level of the tree, from
the PTE level up to the PGD level in that order. Any unused entries
will have 0 in the page_shift field.
The ap_encodings gives the supported page sizes and their AP field
encodings, encoded with the AP value in the top 3 bits and the log
base 2 of the page size in the bottom 6 bits.
5. The kvm_run structure
------------------------
...
...
@@ -3942,3 +4007,21 @@ In order to use SynIC, it has to be activated by setting this
capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this
will disable the use of APIC hardware virtualization even if supported
by the CPU, as it's incompatible with SynIC auto-EOI behavior.
8.3 KVM_CAP_PPC_RADIX_MMU
Architectures: ppc
This capability, if KVM_CHECK_EXTENSION indicates that it is
available, means that that the kernel can support guests using the
radix MMU defined in Power ISA V3.00 (as implemented in the POWER9
processor).
8.4 KVM_CAP_PPC_HASH_MMU_V3
Architectures: ppc
This capability, if KVM_CHECK_EXTENSION indicates that it is
available, means that that the kernel can support guests using the
hashed page table MMU defined in Power ISA V3.00 (as implemented in
the POWER9 processor), including in-memory segment tables.
arch/powerpc/include/asm/book3s/64/mmu.h
View file @
da0e7e62
...
...
@@ -44,10 +44,20 @@ struct patb_entry {
};
extern
struct
patb_entry
*
partition_tb
;
/* Bits in patb0 field */
#define PATB_HR (1UL << 63)
#define PATB_GR (1UL << 63)
#define RPDB_MASK 0x0ffffffffffff00fUL
#define RPDB_SHIFT (1UL << 8)
#define RTS1_SHIFT 61
/* top 2 bits of radix tree size */
#define RTS1_MASK (3UL << RTS1_SHIFT)
#define RTS2_SHIFT 5
/* bottom 3 bits of radix tree size */
#define RTS2_MASK (7UL << RTS2_SHIFT)
#define RPDS_MASK 0x1f
/* root page dir. size field */
/* Bits in patb1 field */
#define PATB_GR (1UL << 63)
/* guest uses radix; must match HR */
#define PRTS_MASK 0x1f
/* process table size field */
/*
* Limit process table to PAGE_SIZE table. This
* also limit the max pid we can support.
...
...
@@ -138,5 +148,11 @@ static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
extern
int
(
*
register_process_table
)(
unsigned
long
base
,
unsigned
long
page_size
,
unsigned
long
tbl_size
);
#ifdef CONFIG_PPC_PSERIES
extern
void
radix_init_pseries
(
void
);
#else
static
inline
void
radix_init_pseries
(
void
)
{
};
#endif
#endif
/* __ASSEMBLY__ */
#endif
/* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
arch/powerpc/include/asm/exception-64s.h
View file @
da0e7e62
...
...
@@ -97,6 +97,15 @@
ld reg,PACAKBASE(r13); \
ori reg,reg,(ABS_ADDR(label))@l;
/*
* Branches from unrelocated code (e.g., interrupts) to labels outside
* head-y require >64K offsets.
*/
#define __LOAD_FAR_HANDLER(reg, label) \
ld reg,PACAKBASE(r13); \
ori reg,reg,(ABS_ADDR(label))@l; \
addis reg,reg,(ABS_ADDR(label))@h;
/* Exception register prefixes */
#define EXC_HV H
#define EXC_STD
...
...
@@ -227,13 +236,49 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
mtctr reg; \
bctr
#define BRANCH_LINK_TO_FAR(reg, label) \
__LOAD_FAR_HANDLER(reg, label); \
mtctr reg; \
bctrl
/*
* KVM requires __LOAD_FAR_HANDLER.
*
* __BRANCH_TO_KVM_EXIT branches are also a special case because they
* explicitly use r9 then reload it from PACA before branching. Hence
* the double-underscore.
*/
#define __BRANCH_TO_KVM_EXIT(area, label) \
mfctr r9; \
std r9,HSTATE_SCRATCH1(r13); \
__LOAD_FAR_HANDLER(r9, label); \
mtctr r9; \
ld r9,area+EX_R9(r13); \
bctr
#define BRANCH_TO_KVM(reg, label) \
__LOAD_FAR_HANDLER(reg, label); \
mtctr reg; \
bctr
#else
#define BRANCH_TO_COMMON(reg, label) \
b label
#define BRANCH_LINK_TO_FAR(reg, label) \
bl label
#define BRANCH_TO_KVM(reg, label) \
b label
#define __BRANCH_TO_KVM_EXIT(area, label) \
ld r9,area+EX_R9(r13); \
b label
#endif
#define __KVM_HANDLER_PROLOG(area, n) \
#define __KVM_HANDLER(area, h, n) \
BEGIN_FTR_SECTION_NESTED(947) \
ld r10,area+EX_CFAR(r13); \
std r10,HSTATE_CFAR(r13); \
...
...
@@ -243,30 +288,28 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
std r10,HSTATE_PPR(r13); \
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \
ld r10,area+EX_R10(r13); \
stw r9,HSTATE_SCRATCH1(r13); \
ld r9,area+EX_R9(r13); \
std r12,HSTATE_SCRATCH0(r13); \
#define __KVM_HANDLER(area, h, n) \
__KVM_HANDLER_PROLOG(area, n) \
li r12,n; \
b kvmppc_interrupt
sldi r12,r9,32; \
ori r12,r12,(n); \
/* This reloads r9 before branching to kvmppc_interrupt */
\
__BRANCH_TO_KVM_EXIT(area, kvmppc_interrupt)
#define __KVM_HANDLER_SKIP(area, h, n) \
cmpwi r10,KVM_GUEST_MODE_SKIP; \
ld r10,area+EX_R10(r13); \
beq 89f; \
stw r9,HSTATE_SCRATCH1(r13); \
BEGIN_FTR_SECTION_NESTED(948) \
ld r
9
,area+EX_PPR(r13); \
std r
9
,HSTATE_PPR(r13); \
ld r
10
,area+EX_PPR(r13); \
std r
10
,HSTATE_PPR(r13); \
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \
ld r
9,area+EX_R9
(r13); \
ld r
10,area+EX_R10
(r13); \
std r12,HSTATE_SCRATCH0(r13); \
li r12,n; \
b kvmppc_interrupt; \
sldi r12,r9,32; \
ori r12,r12,(n); \
/* This reloads r9 before branching to kvmppc_interrupt */
\
__BRANCH_TO_KVM_EXIT(area, kvmppc_interrupt); \
89: mtocrf 0x80,r9; \
ld r9,area+EX_R9(r13); \
ld r10,area+EX_R10(r13); \
b kvmppc_skip_##h##interrupt
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
...
...
@@ -393,12 +436,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_STD)
#define STD_RELON_EXCEPTION_HV(loc, vec, label) \
/* No guest interrupts come through here */
\
SET_SCRATCH0(r13);
/* save r13 */
\
EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, EXC_HV, NOTEST, vec);
EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label, \
EXC_HV, KVMTEST_HV, vec);
#define STD_RELON_EXCEPTION_HV_OOL(vec, label) \
EXCEPTION_PROLOG_1(PACA_EXGEN,
NOTEST, vec);
\
EXCEPTION_PROLOG_1(PACA_EXGEN,
KVMTEST_HV, vec);
\
EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
/* This associate vector numbers with bits in paca->irq_happened */
...
...
@@ -475,10 +518,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \
_MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \
EXC_HV, SOFTEN_
NO
TEST_HV)
EXC_HV, SOFTEN_TEST_HV)
#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \
EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_
NO
TEST_HV, vec); \
EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \
EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV)
/*
...
...
arch/powerpc/include/asm/head-64.h
View file @
da0e7e62
...
...
@@ -224,7 +224,7 @@ end_##sname:
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#define TRAMP_KVM_BEGIN(name) \
TRAMP_
REAL
_BEGIN(name)
TRAMP_
VIRT
_BEGIN(name)
#else
#define TRAMP_KVM_BEGIN(name)
#endif
...
...
arch/powerpc/include/asm/hvcall.h
View file @
da0e7e62
...
...
@@ -278,6 +278,7 @@
#define H_CLEAR_HPT 0x358
#define H_RESIZE_HPT_PREPARE 0x36C
#define H_RESIZE_HPT_COMMIT 0x370
#define H_REGISTER_PROC_TBL 0x37C
#define H_SIGNAL_SYS_RESET 0x380
#define MAX_HCALL_OPCODE H_SIGNAL_SYS_RESET
...
...
@@ -315,6 +316,16 @@
#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2
/* >= 0 values are CPU number */
/* Flag values used in H_REGISTER_PROC_TBL hcall */
#define PROC_TABLE_OP_MASK 0x18
#define PROC_TABLE_DEREG 0x10
#define PROC_TABLE_NEW 0x18
#define PROC_TABLE_TYPE_MASK 0x06
#define PROC_TABLE_HPT_SLB 0x00
#define PROC_TABLE_HPT_PT 0x02
#define PROC_TABLE_RADIX 0x04
#define PROC_TABLE_GTSE 0x01
#ifndef __ASSEMBLY__
/**
...
...
arch/powerpc/include/asm/kvm_book3s.h
View file @
da0e7e62
...
...
@@ -170,6 +170,8 @@ extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run,
unsigned
long
status
);
extern
long
kvmppc_hv_find_lock_hpte
(
struct
kvm
*
kvm
,
gva_t
eaddr
,
unsigned
long
slb_v
,
unsigned
long
valid
);
extern
int
kvmppc_hv_emulate_mmio
(
struct
kvm_run
*
run
,
struct
kvm_vcpu
*
vcpu
,
unsigned
long
gpa
,
gva_t
ea
,
int
is_store
);
extern
void
kvmppc_mmu_hpte_cache_map
(
struct
kvm_vcpu
*
vcpu
,
struct
hpte_cache
*
pte
);
extern
struct
hpte_cache
*
kvmppc_mmu_hpte_cache_next
(
struct
kvm_vcpu
*
vcpu
);
...
...
@@ -182,6 +184,25 @@ extern void kvmppc_mmu_hpte_sysexit(void);
extern
int
kvmppc_mmu_hv_init
(
void
);
extern
int
kvmppc_book3s_hcall_implemented
(
struct
kvm
*
kvm
,
unsigned
long
hc
);
extern
int
kvmppc_book3s_radix_page_fault
(
struct
kvm_run
*
run
,
struct
kvm_vcpu
*
vcpu
,
unsigned
long
ea
,
unsigned
long
dsisr
);
extern
int
kvmppc_mmu_radix_xlate
(
struct
kvm_vcpu
*
vcpu
,
gva_t
eaddr
,
struct
kvmppc_pte
*
gpte
,
bool
data
,
bool
iswrite
);
extern
int
kvmppc_init_vm_radix
(
struct
kvm
*
kvm
);
extern
void
kvmppc_free_radix
(
struct
kvm
*
kvm
);
extern
int
kvmppc_radix_init
(
void
);
extern
void
kvmppc_radix_exit
(
void
);
extern
int
kvm_unmap_radix
(
struct
kvm
*
kvm
,
struct
kvm_memory_slot
*
memslot
,
unsigned
long
gfn
);
extern
int
kvm_age_radix
(
struct
kvm
*
kvm
,
struct
kvm_memory_slot
*
memslot
,
unsigned
long
gfn
);
extern
int
kvm_test_age_radix
(
struct
kvm
*
kvm
,
struct
kvm_memory_slot
*
memslot
,
unsigned
long
gfn
);
extern
long
kvmppc_hv_get_dirty_log_radix
(
struct
kvm
*
kvm
,
struct
kvm_memory_slot
*
memslot
,
unsigned
long
*
map
);
extern
int
kvmhv_get_rmmu_info
(
struct
kvm
*
kvm
,
struct
kvm_ppc_rmmu_info
*
info
);
/* XXX remove this export when load_last_inst() is generic */
extern
int
kvmppc_ld
(
struct
kvm_vcpu
*
vcpu
,
ulong
*
eaddr
,
int
size
,
void
*
ptr
,
bool
data
);
extern
void
kvmppc_book3s_queue_irqprio
(
struct
kvm_vcpu
*
vcpu
,
unsigned
int
vec
);
...
...
@@ -211,8 +232,11 @@ extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
extern
long
kvmppc_do_h_remove
(
struct
kvm
*
kvm
,
unsigned
long
flags
,
unsigned
long
pte_index
,
unsigned
long
avpn
,
unsigned
long
*
hpret
);
extern
long
kvmppc_hv_get_dirty_log
(
struct
kvm
*
kvm
,
extern
long
kvmppc_hv_get_dirty_log
_hpt
(
struct
kvm
*
kvm
,
struct
kvm_memory_slot
*
memslot
,
unsigned
long
*
map
);
extern
void
kvmppc_harvest_vpa_dirty
(
struct
kvmppc_vpa
*
vpa
,
struct
kvm_memory_slot
*
memslot
,
unsigned
long
*
map
);
extern
void
kvmppc_update_lpcr
(
struct
kvm
*
kvm
,
unsigned
long
lpcr
,
unsigned
long
mask
);
extern
void
kvmppc_set_fscr
(
struct
kvm_vcpu
*
vcpu
,
u64
fscr
);
...
...
arch/powerpc/include/asm/kvm_book3s_64.h
View file @
da0e7e62
...
...
@@ -36,6 +36,12 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
#endif
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
static
inline
bool
kvm_is_radix
(
struct
kvm
*
kvm
)
{
return
kvm
->
arch
.
radix
;
}
#define KVM_DEFAULT_HPT_ORDER 24
/* 16MB HPT by default */
#endif
...
...
arch/powerpc/include/asm/kvm_host.h
View file @
da0e7e62
...
...
@@ -263,7 +263,11 @@ struct kvm_arch {
unsigned
long
hpt_mask
;
atomic_t
hpte_mod_interest
;
cpumask_t
need_tlb_flush
;
cpumask_t
cpu_in_guest
;
int
hpt_cma_alloc
;
u8
radix
;
pgd_t
*
pgtable
;
u64
process_table
;
struct
dentry
*
debugfs_dir
;
struct
dentry
*
htab_dentry
;
#endif
/* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
...
...
@@ -603,6 +607,7 @@ struct kvm_vcpu_arch {
ulong
fault_dar
;
u32
fault_dsisr
;
unsigned
long
intr_msr
;
ulong
fault_gpa
;
/* guest real address of page fault (POWER9) */
#endif
#ifdef CONFIG_BOOKE
...
...
@@ -657,6 +662,7 @@ struct kvm_vcpu_arch {
int
state
;
int
ptid
;
int
thread_cpu
;
int
prev_cpu
;
bool
timer_running
;
wait_queue_head_t
cpu_run
;
...
...
arch/powerpc/include/asm/kvm_ppc.h
View file @
da0e7e62
...
...
@@ -291,6 +291,8 @@ struct kvmppc_ops {
struct
irq_bypass_producer
*
);
void
(
*
irq_bypass_del_producer
)(
struct
irq_bypass_consumer
*
,
struct
irq_bypass_producer
*
);
int
(
*
configure_mmu
)(
struct
kvm
*
kvm
,
struct
kvm_ppc_mmuv3_cfg
*
cfg
);
int
(
*
get_rmmu_info
)(
struct
kvm
*
kvm
,
struct
kvm_ppc_rmmu_info
*
info
);
};
extern
struct
kvmppc_ops
*
kvmppc_hv_ops
;
...
...
arch/powerpc/include/asm/opal.h
View file @
da0e7e62
...
...
@@ -67,7 +67,6 @@ int64_t opal_pci_config_write_half_word(uint64_t phb_id, uint64_t bus_dev_func,
int64_t
opal_pci_config_write_word
(
uint64_t
phb_id
,
uint64_t
bus_dev_func
,
uint64_t
offset
,
uint32_t
data
);
int64_t
opal_set_xive
(
uint32_t
isn
,
uint16_t
server
,
uint8_t
priority
);
int64_t
opal_rm_set_xive
(
uint32_t
isn
,
uint16_t
server
,
uint8_t
priority
);
int64_t
opal_get_xive
(
uint32_t
isn
,
__be16
*
server
,
uint8_t
*
priority
);
int64_t
opal_register_exception_handler
(
uint64_t
opal_exception
,
uint64_t
handler_address
,
...
...
@@ -220,18 +219,12 @@ int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id,
int64_t
opal_pci_poll2
(
uint64_t
id
,
uint64_t
data
);
int64_t
opal_int_get_xirr
(
uint32_t
*
out_xirr
,
bool
just_poll
);
int64_t
opal_rm_int_get_xirr
(
__be32
*
out_xirr
,
bool
just_poll
);
int64_t
opal_int_set_cppr
(
uint8_t
cppr
);
int64_t
opal_int_eoi
(
uint32_t
xirr
);
int64_t
opal_rm_int_eoi
(
uint32_t
xirr
);
int64_t
opal_int_set_mfrr
(
uint32_t
cpu
,
uint8_t
mfrr
);
int64_t
opal_rm_int_set_mfrr
(
uint32_t
cpu
,
uint8_t
mfrr
);
int64_t
opal_pci_tce_kill
(
uint64_t
phb_id
,
uint32_t
kill_type
,
uint32_t
pe_num
,
uint32_t
tce_size
,
uint64_t
dma_addr
,
uint32_t
npages
);
int64_t
opal_rm_pci_tce_kill
(
uint64_t
phb_id
,
uint32_t
kill_type
,
uint32_t
pe_num
,
uint32_t
tce_size
,
uint64_t
dma_addr
,
uint32_t
npages
);
int64_t
opal_nmmu_set_ptcr
(
uint64_t
chip_id
,
uint64_t
ptcr
);
/* Internal functions */
...
...
arch/powerpc/include/asm/prom.h
View file @
da0e7e62
...
...
@@ -121,6 +121,8 @@ struct of_drconf_cell {
#define OV1_PPC_2_06 0x02
/* set if we support PowerPC 2.06 */
#define OV1_PPC_2_07 0x01
/* set if we support PowerPC 2.07 */
#define OV1_PPC_3_00 0x80
/* set if we support PowerPC 3.00 */
/* Option vector 2: Open Firmware options supported */
#define OV2_REAL_MODE 0x20
/* set if we want OF in real mode */
...
...
@@ -152,10 +154,17 @@ struct of_drconf_cell {
#define OV5_TYPE1_AFFINITY 0x0580
/* Type 1 NUMA affinity */
#define OV5_PRRN 0x0540
/* Platform Resource Reassignment */
#define OV5_RESIZE_HPT 0x0601
/* Hash Page Table resizing */
#define OV5_PFO_HW_RNG 0x0E80
/* PFO Random Number Generator */
#define OV5_PFO_HW_842 0x0E40
/* PFO Compression Accelerator */
#define OV5_PFO_HW_ENCR 0x0E20
/* PFO Encryption Accelerator */
#define OV5_SUB_PROCESSORS 0x0F01
/* 1,2,or 4 Sub-Processors supported */
#define OV5_PFO_HW_RNG 0x1180
/* PFO Random Number Generator */
#define OV5_PFO_HW_842 0x1140
/* PFO Compression Accelerator */
#define OV5_PFO_HW_ENCR 0x1120
/* PFO Encryption Accelerator */
#define OV5_SUB_PROCESSORS 0x1501
/* 1,2,or 4 Sub-Processors supported */
#define OV5_XIVE_EXPLOIT 0x1701
/* XIVE exploitation supported */
#define OV5_MMU_RADIX_300 0x1880
/* ISA v3.00 radix MMU supported */
#define OV5_MMU_HASH_300 0x1840
/* ISA v3.00 hash MMU supported */
#define OV5_MMU_SEGM_RADIX 0x1820
/* radix mode (no segmentation) */
#define OV5_MMU_PROC_TBL 0x1810
/* hcall selects SLB or proc table */
#define OV5_MMU_SLB 0x1800
/* always use SLB */
#define OV5_MMU_GTSE 0x1808
/* Guest translation shootdown */
/* Option Vector 6: IBM PAPR hints */
#define OV6_LINUX 0x02
/* Linux is our OS */
...
...
arch/powerpc/include/asm/reg.h
View file @
da0e7e62
...
...
@@ -274,10 +274,14 @@
#define SPRN_DSISR 0x012
/* Data Storage Interrupt Status Register */
#define DSISR_NOHPTE 0x40000000
/* no translation found */
#define DSISR_PROTFAULT 0x08000000
/* protection fault */
#define DSISR_BADACCESS 0x04000000
/* bad access to CI or G */
#define DSISR_ISSTORE 0x02000000
/* access was a store */
#define DSISR_DABRMATCH 0x00400000
/* hit data breakpoint */
#define DSISR_NOSEGMENT 0x00200000
/* SLB miss */
#define DSISR_KEYFAULT 0x00200000
/* Key fault */
#define DSISR_UNSUPP_MMU 0x00080000
/* Unsupported MMU config */
#define DSISR_SET_RC 0x00040000
/* Failed setting of R/C bits */
#define DSISR_PGDIRFAULT 0x00020000
/* Fault on page directory */
#define SPRN_TBRL 0x10C
/* Time Base Read Lower Register (user, R/O) */
#define SPRN_TBRU 0x10D
/* Time Base Read Upper Register (user, R/O) */
#define SPRN_CIR 0x11B
/* Chip Information Register (hyper, R/0) */
...
...
arch/powerpc/include/uapi/asm/kvm.h
View file @
da0e7e62
...
...
@@ -413,6 +413,26 @@ struct kvm_get_htab_header {
__u16
n_invalid
;
};
/* For KVM_PPC_CONFIGURE_V3_MMU */
struct
kvm_ppc_mmuv3_cfg
{
__u64
flags
;
__u64
process_table
;
/* second doubleword of partition table entry */
};
/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */
#define KVM_PPC_MMUV3_RADIX 1
/* 1 = radix mode, 0 = HPT */
#define KVM_PPC_MMUV3_GTSE 2
/* global translation shootdown enb. */
/* For KVM_PPC_GET_RMMU_INFO */
struct
kvm_ppc_rmmu_info
{
struct
kvm_ppc_radix_geom
{
__u8
page_shift
;
__u8
level_bits
[
4
];
__u8
pad
[
3
];
}
geometries
[
8
];
__u32
ap_encodings
[
8
];
};
/* Per-vcpu XICS interrupt controller state */
#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
...
...
arch/powerpc/kernel/asm-offsets.c
View file @
da0e7e62
...
...
@@ -498,6 +498,7 @@ int main(void)
DEFINE
(
KVM_NEED_FLUSH
,
offsetof
(
struct
kvm
,
arch
.
need_tlb_flush
.
bits
));
DEFINE
(
KVM_ENABLED_HCALLS
,
offsetof
(
struct
kvm
,
arch
.
enabled_hcalls
));
DEFINE
(
KVM_VRMA_SLB_V
,
offsetof
(
struct
kvm
,
arch
.
vrma_slb_v
));
DEFINE
(
KVM_RADIX
,
offsetof
(
struct
kvm
,
arch
.
radix
));
DEFINE
(
VCPU_DSISR
,
offsetof
(
struct
kvm_vcpu
,
arch
.
shregs
.
dsisr
));
DEFINE
(
VCPU_DAR
,
offsetof
(
struct
kvm_vcpu
,
arch
.
shregs
.
dar
));
DEFINE
(
VCPU_VPA
,
offsetof
(
struct
kvm_vcpu
,
arch
.
vpa
.
pinned_addr
));
...
...
@@ -537,6 +538,7 @@ int main(void)
DEFINE
(
VCPU_SLB_NR
,
offsetof
(
struct
kvm_vcpu
,
arch
.
slb_nr
));
DEFINE
(
VCPU_FAULT_DSISR
,
offsetof
(
struct
kvm_vcpu
,
arch
.
fault_dsisr
));
DEFINE
(
VCPU_FAULT_DAR
,
offsetof
(
struct
kvm_vcpu
,
arch
.
fault_dar
));
DEFINE
(
VCPU_FAULT_GPA
,
offsetof
(
struct
kvm_vcpu
,
arch
.
fault_gpa
));
DEFINE
(
VCPU_INTR_MSR
,
offsetof
(
struct
kvm_vcpu
,
arch
.
intr_msr
));
DEFINE
(
VCPU_LAST_INST
,
offsetof
(
struct
kvm_vcpu
,
arch
.
last_inst
));
DEFINE
(
VCPU_TRAP
,
offsetof
(
struct
kvm_vcpu
,
arch
.
trap
));
...
...
arch/powerpc/kernel/exceptions-64s.S
View file @
da0e7e62
...
...
@@ -142,7 +142,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
lbz
r0
,
HSTATE_HWTHREAD_REQ
(
r13
)
cmpwi
r0
,
0
beq
1
f
b
kvm_start_guest
BRANCH_TO_KVM
(
r10
,
kvm_start_guest
)
1
:
#endif
...
...
@@ -717,13 +717,9 @@ hardware_interrupt_hv:
BEGIN_FTR_SECTION
_MASKABLE_EXCEPTION_PSERIES
(0
x500
,
hardware_interrupt_common
,
EXC_HV
,
SOFTEN_TEST_HV
)
do_kvm_H0x500
:
KVM_HANDLER
(
PACA_EXGEN
,
EXC_HV
,
0x502
)
FTR_SECTION_ELSE
_MASKABLE_EXCEPTION_PSERIES
(0
x500
,
hardware_interrupt_common
,
EXC_STD
,
SOFTEN_TEST_PR
)
do_kvm_0x500
:
KVM_HANDLER
(
PACA_EXGEN
,
EXC_STD
,
0x500
)
ALT_FTR_SECTION_END_IFSET
(
CPU_FTR_HVMODE
|
CPU_FTR_ARCH_206
)
EXC_REAL_END
(
hardware_interrupt
,
0
x500
,
0x100
)
...
...
@@ -737,6 +733,8 @@ hardware_interrupt_relon_hv:
ALT_FTR_SECTION_END_IFSET
(
CPU_FTR_HVMODE
)
EXC_VIRT_END
(
hardware_interrupt
,
0
x4500
,
0x100
)
TRAMP_KVM
(
PACA_EXGEN
,
0
x500
)
TRAMP_KVM_HV
(
PACA_EXGEN
,
0
x500
)
EXC_COMMON_ASYNC
(
hardware_interrupt_common
,
0
x500
,
do_IRQ
)
...
...
@@ -832,6 +830,31 @@ EXC_VIRT(trap_0b, 0x4b00, 0x100, 0xb00)
TRAMP_KVM
(
PACA_EXGEN
,
0
xb00
)
EXC_COMMON
(
trap_0b_common
,
0
xb00
,
unknown_exception
)
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/
*
*
If
CONFIG_KVM_BOOK3S_64_HANDLER
is
set
,
save
the
PPR
(
on
systems
*
that
support
it
)
before
changing
to
HMT_MEDIUM
.
That
allows
the
KVM
*
code
to
save
that
value
into
the
guest
state
(
it
is
the
guest
's PPR
*
value
)
.
Otherwise
just
change
to
HMT_MEDIUM
as
userspace
has
*
already
saved
the
PPR
.
*/
#define SYSCALL_KVMTEST \
SET_SCRATCH0
(
r13
)
; \
GET_PACA
(
r13
)
; \
std
r9
,
PACA_EXGEN
+
EX_R9
(
r13
)
; \
OPT_GET_SPR
(
r9
,
SPRN_PPR
,
CPU_FTR_HAS_PPR
)
; \
HMT_MEDIUM
; \
std
r10
,
PACA_EXGEN
+
EX_R10
(
r13
)
; \
OPT_SAVE_REG_TO_PACA
(
PACA_EXGEN
+
EX_PPR
,
r9
,
CPU_FTR_HAS_PPR
)
; \
mfcr
r9
; \
KVMTEST_PR
(0
xc00
)
; \
GET_SCRATCH0
(
r13
)
#else
#define SYSCALL_KVMTEST \
HMT_MEDIUM
#endif
#define LOAD_SYSCALL_HANDLER(reg) \
__LOAD_HANDLER
(
reg
,
system_call_common
)
...
...
@@ -885,34 +908,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
#endif
EXC_REAL_BEGIN
(
system_call
,
0
xc00
,
0x100
)
/
*
*
If
CONFIG_KVM_BOOK3S_64_HANDLER
is
set
,
save
the
PPR
(
on
systems
*
that
support
it
)
before
changing
to
HMT_MEDIUM
.
That
allows
the
KVM
*
code
to
save
that
value
into
the
guest
state
(
it
is
the
guest
's PPR
*
value
)
.
Otherwise
just
change
to
HMT_MEDIUM
as
userspace
has
*
already
saved
the
PPR
.
*/
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
SET_SCRATCH0
(
r13
)
GET_PACA
(
r13
)
std
r9
,
PACA_EXGEN
+
EX_R9
(
r13
)
OPT_GET_SPR
(
r9
,
SPRN_PPR
,
CPU_FTR_HAS_PPR
)
;
HMT_MEDIUM
;
std
r10
,
PACA_EXGEN
+
EX_R10
(
r13
)
OPT_SAVE_REG_TO_PACA
(
PACA_EXGEN
+
EX_PPR
,
r9
,
CPU_FTR_HAS_PPR
)
;
mfcr
r9
KVMTEST_PR
(0
xc00
)
GET_SCRATCH0
(
r13
)
#else
HMT_MEDIUM
;
#endif
SYSCALL_KVMTEST
SYSCALL_PSERIES_1
SYSCALL_PSERIES_2_RFID
SYSCALL_PSERIES_3
EXC_REAL_END
(
system_call
,
0
xc00
,
0x100
)
EXC_VIRT_BEGIN
(
system_call
,
0
x4c00
,
0x100
)
HMT_MEDIUM
SYSCALL_KVMTEST
SYSCALL_PSERIES_1
SYSCALL_PSERIES_2_DIRECT
SYSCALL_PSERIES_3
...
...
@@ -927,7 +930,7 @@ TRAMP_KVM(PACA_EXGEN, 0xd00)
EXC_COMMON
(
single_step_common
,
0
xd00
,
single_step_exception
)
EXC_REAL_OOL_HV
(
h_data_storage
,
0
xe00
,
0x20
)
EXC_VIRT_
NONE
(
0x4e00
,
0
x2
0
)
EXC_VIRT_
OOL_HV
(
h_data_storage
,
0
x4e00
,
0x20
,
0xe0
0
)
TRAMP_KVM_HV_SKIP
(
PACA_EXGEN
,
0
xe00
)
EXC_COMMON_BEGIN
(
h_data_storage_common
)
mfspr
r10
,
SPRN_HDAR
...
...
@@ -943,7 +946,7 @@ EXC_COMMON_BEGIN(h_data_storage_common)
EXC_REAL_OOL_HV
(
h_instr_storage
,
0
xe20
,
0x20
)
EXC_VIRT_
NONE
(
0x4e20
,
0
x
20
)
EXC_VIRT_
OOL_HV
(
h_instr_storage
,
0
x4e20
,
0x20
,
0xe
20
)
TRAMP_KVM_HV
(
PACA_EXGEN
,
0
xe20
)
EXC_COMMON
(
h_instr_storage_common
,
0
xe20
,
unknown_exception
)
...
...
@@ -979,7 +982,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
EXCEPTION_PROLOG_COMMON_2
(
PACA_EXGEN
)
EXCEPTION_PROLOG_COMMON_3
(0
xe60
)
addi
r3
,
r1
,
STACK_FRAME_OVERHEAD
bl
hmi_exception_realmode
BRANCH_LINK_TO_FAR
(
r4
,
hmi_exception_realmode
)
/
*
Windup
the
stack
.
*/
/
*
Move
original
HSRR0
and
HSRR1
into
the
respective
regs
*/
ld
r9
,
_MSR
(
r1
)
...
...
arch/powerpc/kernel/idle_book3s.S
View file @
da0e7e62
...
...
@@ -248,7 +248,7 @@ fastsleep_workaround_at_entry:
/
*
Fast
sleep
workaround
*/
li
r3
,
1
li
r4
,
1
bl
opal_
rm_
config_cpu_idle_state
bl
opal_config_cpu_idle_state
/
*
Clear
Lock
bit
*/
li
r0
,
0
...
...
@@ -552,7 +552,7 @@ timebase_resync:
*/
ble
cr3
,
clear_lock
/
*
Time
base
re
-
sync
*/
bl
opal_r
m_r
esync_timebase
;
bl
opal_resync_timebase
;
/
*
*
If
waking
up
from
sleep
,
per
core
state
is
not
lost
,
skip
to
*
clear_lock
.
...
...
@@ -641,7 +641,7 @@ hypervisor_state_restored:
fastsleep_workaround_at_exit
:
li
r3
,
1
li
r4
,
0
bl
opal_
rm_
config_cpu_idle_state
bl
opal_config_cpu_idle_state
b
timebase_resync
/*
...
...
arch/powerpc/kernel/prom_init.c
View file @
da0e7e62
...
...
@@ -649,6 +649,7 @@ static void __init early_cmdline_parse(void)
struct
option_vector1
{
u8
byte1
;
u8
arch_versions
;
u8
arch_versions3
;
}
__packed
;
struct
option_vector2
{
...
...
@@ -691,6 +692,9 @@ struct option_vector5 {
u8
reserved2
;
__be16
reserved3
;
u8
subprocessors
;
u8
byte22
;
u8
intarch
;
u8
mmu
;
}
__packed
;
struct
option_vector6
{
...
...
@@ -700,7 +704,7 @@ struct option_vector6 {
}
__packed
;
struct
ibm_arch_vec
{
struct
{
u32
mask
,
val
;
}
pvrs
[
1
0
];
struct
{
u32
mask
,
val
;
}
pvrs
[
1
2
];
u8
num_vectors
;
...
...
@@ -749,6 +753,14 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
.
mask
=
cpu_to_be32
(
0xffff0000
),
/* POWER8 */
.
val
=
cpu_to_be32
(
0x004d0000
),
},
{
.
mask
=
cpu_to_be32
(
0xffff0000
),
/* POWER9 */
.
val
=
cpu_to_be32
(
0x004e0000
),
},
{
.
mask
=
cpu_to_be32
(
0xffffffff
),
/* all 3.00-compliant */
.
val
=
cpu_to_be32
(
0x0f000005
),
},
{
.
mask
=
cpu_to_be32
(
0xffffffff
),
/* all 2.07-compliant */
.
val
=
cpu_to_be32
(
0x0f000004
),
...
...
@@ -774,6 +786,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
.
byte1
=
0
,
.
arch_versions
=
OV1_PPC_2_00
|
OV1_PPC_2_01
|
OV1_PPC_2_02
|
OV1_PPC_2_03
|
OV1_PPC_2_04
|
OV1_PPC_2_05
|
OV1_PPC_2_06
|
OV1_PPC_2_07
,
.
arch_versions3
=
OV1_PPC_3_00
,
},
.
vec2_len
=
VECTOR_LENGTH
(
sizeof
(
struct
option_vector2
)),
...
...
@@ -836,6 +849,9 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
.
reserved2
=
0
,
.
reserved3
=
0
,
.
subprocessors
=
1
,
.
intarch
=
0
,
.
mmu
=
OV5_FEAT
(
OV5_MMU_RADIX_300
)
|
OV5_FEAT
(
OV5_MMU_HASH_300
)
|
OV5_FEAT
(
OV5_MMU_PROC_TBL
)
|
OV5_FEAT
(
OV5_MMU_GTSE
),
},
/* option vector 6: IBM PAPR hints */
...
...
arch/powerpc/kvm/Makefile
View file @
da0e7e62
...
...
@@ -70,7 +70,8 @@ endif
kvm-hv-y
+=
\
book3s_hv.o
\
book3s_hv_interrupts.o
\
book3s_64_mmu_hv.o
book3s_64_mmu_hv.o
\
book3s_64_mmu_radix.o
kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS)
:=
\
book3s_hv_rm_xics.o
...
...
arch/powerpc/kvm/book3s.c
View file @
da0e7e62
...
...
@@ -239,6 +239,7 @@ void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar,
kvmppc_set_dsisr
(
vcpu
,
flags
);
kvmppc_book3s_queue_irqprio
(
vcpu
,
BOOK3S_INTERRUPT_DATA_STORAGE
);
}
EXPORT_SYMBOL_GPL
(
kvmppc_core_queue_data_storage
);
/* used by kvm_hv */
void
kvmppc_core_queue_inst_storage
(
struct
kvm_vcpu
*
vcpu
,
ulong
flags
)
{
...
...
arch/powerpc/kvm/book3s_64_mmu_hv.c
View file @
da0e7e62
...
...
@@ -119,6 +119,9 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
long
err
=
-
EBUSY
;
long
order
;
if
(
kvm_is_radix
(
kvm
))
return
-
EINVAL
;
mutex_lock
(
&
kvm
->
lock
);
if
(
kvm
->
arch
.
hpte_setup_done
)
{
kvm
->
arch
.
hpte_setup_done
=
0
;
...
...
@@ -152,12 +155,11 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
void
kvmppc_free_hpt
(
struct
kvm
*
kvm
)
{
kvmppc_free_lpid
(
kvm
->
arch
.
lpid
);
vfree
(
kvm
->
arch
.
revmap
);
if
(
kvm
->
arch
.
hpt_cma_alloc
)
kvm_release_hpt
(
virt_to_page
(
kvm
->
arch
.
hpt_virt
),
1
<<
(
kvm
->
arch
.
hpt_order
-
PAGE_SHIFT
));
else
else
if
(
kvm
->
arch
.
hpt_virt
)
free_pages
(
kvm
->
arch
.
hpt_virt
,
kvm
->
arch
.
hpt_order
-
PAGE_SHIFT
);
}
...
...
@@ -392,8 +394,8 @@ static int instruction_is_store(unsigned int instr)
return
(
instr
&
mask
)
!=
0
;
}
static
int
kvmppc_hv_emulate_mmio
(
struct
kvm_run
*
run
,
struct
kvm_vcpu
*
vcpu
,
unsigned
long
gpa
,
gva_t
ea
,
int
is_store
)
int
kvmppc_hv_emulate_mmio
(
struct
kvm_run
*
run
,
struct
kvm_vcpu
*
vcpu
,
unsigned
long
gpa
,
gva_t
ea
,
int
is_store
)
{
u32
last_inst
;
...
...
@@ -458,6 +460,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned
long
rcbits
;
long
mmio_update
;
if
(
kvm_is_radix
(
kvm
))
return
kvmppc_book3s_radix_page_fault
(
run
,
vcpu
,
ea
,
dsisr
);
/*
* Real-mode code has already searched the HPT and found the
* entry we're interested in. Lock the entry and check that
...
...
@@ -695,12 +700,13 @@ static void kvmppc_rmap_reset(struct kvm *kvm)
srcu_read_unlock
(
&
kvm
->
srcu
,
srcu_idx
);
}
typedef
int
(
*
hva_handler_fn
)(
struct
kvm
*
kvm
,
struct
kvm_memory_slot
*
memslot
,
unsigned
long
gfn
);
static
int
kvm_handle_hva_range
(
struct
kvm
*
kvm
,
unsigned
long
start
,
unsigned
long
end
,
int
(
*
handler
)(
struct
kvm
*
kvm
,
unsigned
long
*
rmapp
,
unsigned
long
gfn
))
hva_handler_fn
handler
)
{
int
ret
;
int
retval
=
0
;
...
...
@@ -725,9 +731,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
gfn_end
=
hva_to_gfn_memslot
(
hva_end
+
PAGE_SIZE
-
1
,
memslot
);
for
(;
gfn
<
gfn_end
;
++
gfn
)
{
gfn_t
gfn_offset
=
gfn
-
memslot
->
base_gfn
;
ret
=
handler
(
kvm
,
&
memslot
->
arch
.
rmap
[
gfn_offset
],
gfn
);
ret
=
handler
(
kvm
,
memslot
,
gfn
);
retval
|=
ret
;
}
}
...
...
@@ -736,20 +740,21 @@ static int kvm_handle_hva_range(struct kvm *kvm,
}
static
int
kvm_handle_hva
(
struct
kvm
*
kvm
,
unsigned
long
hva
,
int
(
*
handler
)(
struct
kvm
*
kvm
,
unsigned
long
*
rmapp
,
unsigned
long
gfn
))
hva_handler_fn
handler
)
{
return
kvm_handle_hva_range
(
kvm
,
hva
,
hva
+
1
,
handler
);
}
static
int
kvm_unmap_rmapp
(
struct
kvm
*
kvm
,
unsigned
long
*
rmapp
,
static
int
kvm_unmap_rmapp
(
struct
kvm
*
kvm
,
struct
kvm_memory_slot
*
memslot
,
unsigned
long
gfn
)
{
struct
revmap_entry
*
rev
=
kvm
->
arch
.
revmap
;
unsigned
long
h
,
i
,
j
;
__be64
*
hptep
;
unsigned
long
ptel
,
psize
,
rcbits
;
unsigned
long
*
rmapp
;
rmapp
=
&
memslot
->
arch
.
rmap
[
gfn
-
memslot
->
base_gfn
];
for
(;;)
{
lock_rmap
(
rmapp
);
if
(
!
(
*
rmapp
&
KVMPPC_RMAP_PRESENT
))
{
...
...
@@ -810,26 +815,36 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
int
kvm_unmap_hva_hv
(
struct
kvm
*
kvm
,
unsigned
long
hva
)
{
kvm_handle_hva
(
kvm
,
hva
,
kvm_unmap_rmapp
);
hva_handler_fn
handler
;
handler
=
kvm_is_radix
(
kvm
)
?
kvm_unmap_radix
:
kvm_unmap_rmapp
;
kvm_handle_hva
(
kvm
,
hva
,
handler
);
return
0
;
}
int
kvm_unmap_hva_range_hv
(
struct
kvm
*
kvm
,
unsigned
long
start
,
unsigned
long
end
)
{
kvm_handle_hva_range
(
kvm
,
start
,
end
,
kvm_unmap_rmapp
);
hva_handler_fn
handler
;
handler
=
kvm_is_radix
(
kvm
)
?
kvm_unmap_radix
:
kvm_unmap_rmapp
;
kvm_handle_hva_range
(
kvm
,
start
,
end
,
handler
);
return
0
;
}
void
kvmppc_core_flush_memslot_hv
(
struct
kvm
*
kvm
,
struct
kvm_memory_slot
*
memslot
)
{
unsigned
long
*
rmapp
;
unsigned
long
gfn
;
unsigned
long
n
;
unsigned
long
*
rmapp
;
rmapp
=
memslot
->
arch
.
rmap
;
gfn
=
memslot
->
base_gfn
;
for
(
n
=
memslot
->
npages
;
n
;
--
n
)
{
rmapp
=
memslot
->
arch
.
rmap
;
for
(
n
=
memslot
->
npages
;
n
;
--
n
,
++
gfn
)
{
if
(
kvm_is_radix
(
kvm
))
{
kvm_unmap_radix
(
kvm
,
memslot
,
gfn
);
continue
;
}
/*
* Testing the present bit without locking is OK because
* the memslot has been marked invalid already, and hence
...
...
@@ -837,20 +852,21 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
* thus the present bit can't go from 0 to 1.
*/
if
(
*
rmapp
&
KVMPPC_RMAP_PRESENT
)
kvm_unmap_rmapp
(
kvm
,
rmapp
,
gfn
);
kvm_unmap_rmapp
(
kvm
,
memslot
,
gfn
);
++
rmapp
;
++
gfn
;
}
}
static
int
kvm_age_rmapp
(
struct
kvm
*
kvm
,
unsigned
long
*
rmapp
,
static
int
kvm_age_rmapp
(
struct
kvm
*
kvm
,
struct
kvm_memory_slot
*
memslot
,
unsigned
long
gfn
)
{
struct
revmap_entry
*
rev
=
kvm
->
arch
.
revmap
;
unsigned
long
head
,
i
,
j
;
__be64
*
hptep
;
int
ret
=
0
;
unsigned
long
*
rmapp
;
rmapp
=
&
memslot
->
arch
.
rmap
[
gfn
-
memslot
->
base_gfn
];
retry:
lock_rmap
(
rmapp
);
if
(
*
rmapp
&
KVMPPC_RMAP_REFERENCED
)
{
...
...
@@ -898,17 +914,22 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
int
kvm_age_hva_hv
(
struct
kvm
*
kvm
,
unsigned
long
start
,
unsigned
long
end
)
{
return
kvm_handle_hva_range
(
kvm
,
start
,
end
,
kvm_age_rmapp
);
hva_handler_fn
handler
;
handler
=
kvm_is_radix
(
kvm
)
?
kvm_age_radix
:
kvm_age_rmapp
;
return
kvm_handle_hva_range
(
kvm
,
start
,
end
,
handler
);
}
static
int
kvm_test_age_rmapp
(
struct
kvm
*
kvm
,
unsigned
long
*
rmapp
,
static
int
kvm_test_age_rmapp
(
struct
kvm
*
kvm
,
struct
kvm_memory_slot
*
memslot
,
unsigned
long
gfn
)
{
struct
revmap_entry
*
rev
=
kvm
->
arch
.
revmap
;
unsigned
long
head
,
i
,
j
;
unsigned
long
*
hp
;
int
ret
=
1
;
unsigned
long
*
rmapp
;
rmapp
=
&
memslot
->
arch
.
rmap
[
gfn
-
memslot
->
base_gfn
];
if
(
*
rmapp
&
KVMPPC_RMAP_REFERENCED
)
return
1
;
...
...
@@ -934,12 +955,18 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
int
kvm_test_age_hva_hv
(
struct
kvm
*
kvm
,
unsigned
long
hva
)
{
return
kvm_handle_hva
(
kvm
,
hva
,
kvm_test_age_rmapp
);
hva_handler_fn
handler
;
handler
=
kvm_is_radix
(
kvm
)
?
kvm_test_age_radix
:
kvm_test_age_rmapp
;
return
kvm_handle_hva
(
kvm
,
hva
,
handler
);
}
void
kvm_set_spte_hva_hv
(
struct
kvm
*
kvm
,
unsigned
long
hva
,
pte_t
pte
)
{
kvm_handle_hva
(
kvm
,
hva
,
kvm_unmap_rmapp
);
hva_handler_fn
handler
;
handler
=
kvm_is_radix
(
kvm
)
?
kvm_unmap_radix
:
kvm_unmap_rmapp
;
kvm_handle_hva
(
kvm
,
hva
,
handler
);
}
static
int
vcpus_running
(
struct
kvm
*
kvm
)
...
...
@@ -1040,7 +1067,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
return
npages_dirty
;
}
static
void
harvest_vpa_dirty
(
struct
kvmppc_vpa
*
vpa
,
void
kvmppc_
harvest_vpa_dirty
(
struct
kvmppc_vpa
*
vpa
,
struct
kvm_memory_slot
*
memslot
,
unsigned
long
*
map
)
{
...
...
@@ -1058,12 +1085,11 @@ static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
__set_bit_le
(
gfn
-
memslot
->
base_gfn
,
map
);
}
long
kvmppc_hv_get_dirty_log
(
struct
kvm
*
kvm
,
struct
kvm_memory_slot
*
memslot
,
unsigned
long
*
map
)
long
kvmppc_hv_get_dirty_log
_hpt
(
struct
kvm
*
kvm
,
struct
kvm_memory_slot
*
memslot
,
unsigned
long
*
map
)
{
unsigned
long
i
,
j
;
unsigned
long
*
rmapp
;
struct
kvm_vcpu
*
vcpu
;
preempt_disable
();
rmapp
=
memslot
->
arch
.
rmap
;
...
...
@@ -1079,15 +1105,6 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
__set_bit_le
(
j
,
map
);
++
rmapp
;
}
/* Harvest dirty bits from VPA and DTL updates */
/* Note: we never modify the SLB shadow buffer areas */
kvm_for_each_vcpu
(
i
,
vcpu
,
kvm
)
{
spin_lock
(
&
vcpu
->
arch
.
vpa_update_lock
);
harvest_vpa_dirty
(
&
vcpu
->
arch
.
vpa
,
memslot
,
map
);
harvest_vpa_dirty
(
&
vcpu
->
arch
.
dtl
,
memslot
,
map
);
spin_unlock
(
&
vcpu
->
arch
.
vpa_update_lock
);
}
preempt_enable
();
return
0
;
}
...
...
@@ -1142,10 +1159,14 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
srcu_idx
=
srcu_read_lock
(
&
kvm
->
srcu
);
memslot
=
gfn_to_memslot
(
kvm
,
gfn
);
if
(
memslot
)
{
rmap
=
&
memslot
->
arch
.
rmap
[
gfn
-
memslot
->
base_gfn
];
lock_rmap
(
rmap
);
*
rmap
|=
KVMPPC_RMAP_CHANGED
;
unlock_rmap
(
rmap
);
if
(
!
kvm_is_radix
(
kvm
))
{
rmap
=
&
memslot
->
arch
.
rmap
[
gfn
-
memslot
->
base_gfn
];
lock_rmap
(
rmap
);
*
rmap
|=
KVMPPC_RMAP_CHANGED
;
unlock_rmap
(
rmap
);
}
else
if
(
memslot
->
dirty_bitmap
)
{
mark_page_dirty
(
kvm
,
gfn
);
}
}
srcu_read_unlock
(
&
kvm
->
srcu
,
srcu_idx
);
}
...
...
@@ -1675,7 +1696,10 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
vcpu
->
arch
.
slb_nr
=
32
;
/* POWER7/POWER8 */
mmu
->
xlate
=
kvmppc_mmu_book3s_64_hv_xlate
;
if
(
kvm_is_radix
(
vcpu
->
kvm
))
mmu
->
xlate
=
kvmppc_mmu_radix_xlate
;
else
mmu
->
xlate
=
kvmppc_mmu_book3s_64_hv_xlate
;
mmu
->
reset_msr
=
kvmppc_mmu_book3s_64_hv_reset_msr
;
vcpu
->
arch
.
hflags
|=
BOOK3S_HFLAG_SLB
;
...
...
arch/powerpc/kvm/book3s_64_mmu_radix.c
0 → 100644
View file @
da0e7e62
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*
* Copyright 2016 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*/
#include <linux/types.h>
#include <linux/string.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
/*
* Supported radix tree geometry.
* Like p9, we support either 5 or 9 bits at the first (lowest) level,
* for a page size of 64k or 4k.
*/
static
int
p9_supported_radix_bits
[
4
]
=
{
5
,
9
,
9
,
13
};
int
kvmppc_mmu_radix_xlate
(
struct
kvm_vcpu
*
vcpu
,
gva_t
eaddr
,
struct
kvmppc_pte
*
gpte
,
bool
data
,
bool
iswrite
)
{
struct
kvm
*
kvm
=
vcpu
->
kvm
;
u32
pid
;
int
ret
,
level
,
ps
;
__be64
prte
,
rpte
;
unsigned
long
root
,
pte
,
index
;
unsigned
long
rts
,
bits
,
offset
;
unsigned
long
gpa
;
unsigned
long
proc_tbl_size
;
/* Work out effective PID */
switch
(
eaddr
>>
62
)
{
case
0
:
pid
=
vcpu
->
arch
.
pid
;
break
;
case
3
:
pid
=
0
;
break
;
default:
return
-
EINVAL
;
}
proc_tbl_size
=
1
<<
((
kvm
->
arch
.
process_table
&
PRTS_MASK
)
+
12
);
if
(
pid
*
16
>=
proc_tbl_size
)
return
-
EINVAL
;
/* Read partition table to find root of tree for effective PID */
ret
=
kvm_read_guest
(
kvm
,
kvm
->
arch
.
process_table
+
pid
*
16
,
&
prte
,
sizeof
(
prte
));
if
(
ret
)
return
ret
;
root
=
be64_to_cpu
(
prte
);
rts
=
((
root
&
RTS1_MASK
)
>>
(
RTS1_SHIFT
-
3
))
|
((
root
&
RTS2_MASK
)
>>
RTS2_SHIFT
);
bits
=
root
&
RPDS_MASK
;
root
=
root
&
RPDB_MASK
;
/* P9 DD1 interprets RTS (radix tree size) differently */
offset
=
rts
+
31
;
if
(
cpu_has_feature
(
CPU_FTR_POWER9_DD1
))
offset
-=
3
;
/* current implementations only support 52-bit space */
if
(
offset
!=
52
)
return
-
EINVAL
;
for
(
level
=
3
;
level
>=
0
;
--
level
)
{
if
(
level
&&
bits
!=
p9_supported_radix_bits
[
level
])
return
-
EINVAL
;
if
(
level
==
0
&&
!
(
bits
==
5
||
bits
==
9
))
return
-
EINVAL
;
offset
-=
bits
;
index
=
(
eaddr
>>
offset
)
&
((
1UL
<<
bits
)
-
1
);
/* check that low bits of page table base are zero */
if
(
root
&
((
1UL
<<
(
bits
+
3
))
-
1
))
return
-
EINVAL
;
ret
=
kvm_read_guest
(
kvm
,
root
+
index
*
8
,
&
rpte
,
sizeof
(
rpte
));
if
(
ret
)
return
ret
;
pte
=
__be64_to_cpu
(
rpte
);
if
(
!
(
pte
&
_PAGE_PRESENT
))
return
-
ENOENT
;
if
(
pte
&
_PAGE_PTE
)
break
;
bits
=
pte
&
0x1f
;
root
=
pte
&
0x0fffffffffffff00ul
;
}
/* need a leaf at lowest level; 512GB pages not supported */
if
(
level
<
0
||
level
==
3
)
return
-
EINVAL
;
/* offset is now log base 2 of the page size */
gpa
=
pte
&
0x01fffffffffff000ul
;
if
(
gpa
&
((
1ul
<<
offset
)
-
1
))
return
-
EINVAL
;
gpa
+=
eaddr
&
((
1ul
<<
offset
)
-
1
);
for
(
ps
=
MMU_PAGE_4K
;
ps
<
MMU_PAGE_COUNT
;
++
ps
)
if
(
offset
==
mmu_psize_defs
[
ps
].
shift
)
break
;
gpte
->
page_size
=
ps
;
gpte
->
eaddr
=
eaddr
;
gpte
->
raddr
=
gpa
;
/* Work out permissions */
gpte
->
may_read
=
!!
(
pte
&
_PAGE_READ
);
gpte
->
may_write
=
!!
(
pte
&
_PAGE_WRITE
);
gpte
->
may_execute
=
!!
(
pte
&
_PAGE_EXEC
);
if
(
kvmppc_get_msr
(
vcpu
)
&
MSR_PR
)
{
if
(
pte
&
_PAGE_PRIVILEGED
)
{
gpte
->
may_read
=
0
;
gpte
->
may_write
=
0
;
gpte
->
may_execute
=
0
;
}
}
else
{
if
(
!
(
pte
&
_PAGE_PRIVILEGED
))
{
/* Check AMR/IAMR to see if strict mode is in force */
if
(
vcpu
->
arch
.
amr
&
(
1ul
<<
62
))
gpte
->
may_read
=
0
;
if
(
vcpu
->
arch
.
amr
&
(
1ul
<<
63
))
gpte
->
may_write
=
0
;
if
(
vcpu
->
arch
.
iamr
&
(
1ul
<<
62
))
gpte
->
may_execute
=
0
;
}
}
return
0
;
}
#ifdef CONFIG_PPC_64K_PAGES
#define MMU_BASE_PSIZE MMU_PAGE_64K
#else
#define MMU_BASE_PSIZE MMU_PAGE_4K
#endif
static
void
kvmppc_radix_tlbie_page
(
struct
kvm
*
kvm
,
unsigned
long
addr
,
unsigned
int
pshift
)
{
int
psize
=
MMU_BASE_PSIZE
;
if
(
pshift
>=
PMD_SHIFT
)
psize
=
MMU_PAGE_2M
;
addr
&=
~
0xfffUL
;
addr
|=
mmu_psize_defs
[
psize
].
ap
<<
5
;
asm
volatile
(
"ptesync"
:
:
:
"memory"
);
asm
volatile
(
PPC_TLBIE_5
(
%
0
,
%
1
,
0
,
0
,
1
)
:
:
"r"
(
addr
),
"r"
(
kvm
->
arch
.
lpid
)
:
"memory"
);
asm
volatile
(
"ptesync"
:
:
:
"memory"
);
}
unsigned
long
kvmppc_radix_update_pte
(
struct
kvm
*
kvm
,
pte_t
*
ptep
,
unsigned
long
clr
,
unsigned
long
set
,
unsigned
long
addr
,
unsigned
int
shift
)
{
unsigned
long
old
=
0
;
if
(
!
(
clr
&
_PAGE_PRESENT
)
&&
cpu_has_feature
(
CPU_FTR_POWER9_DD1
)
&&
pte_present
(
*
ptep
))
{
/* have to invalidate it first */
old
=
__radix_pte_update
(
ptep
,
_PAGE_PRESENT
,
0
);
kvmppc_radix_tlbie_page
(
kvm
,
addr
,
shift
);
set
|=
_PAGE_PRESENT
;
old
&=
_PAGE_PRESENT
;
}
return
__radix_pte_update
(
ptep
,
clr
,
set
)
|
old
;
}
void
kvmppc_radix_set_pte_at
(
struct
kvm
*
kvm
,
unsigned
long
addr
,
pte_t
*
ptep
,
pte_t
pte
)
{
radix__set_pte_at
(
kvm
->
mm
,
addr
,
ptep
,
pte
,
0
);
}
static
struct
kmem_cache
*
kvm_pte_cache
;
static
pte_t
*
kvmppc_pte_alloc
(
void
)
{
return
kmem_cache_alloc
(
kvm_pte_cache
,
GFP_KERNEL
);
}
static
void
kvmppc_pte_free
(
pte_t
*
ptep
)
{
kmem_cache_free
(
kvm_pte_cache
,
ptep
);
}
static
int
kvmppc_create_pte
(
struct
kvm
*
kvm
,
pte_t
pte
,
unsigned
long
gpa
,
unsigned
int
level
,
unsigned
long
mmu_seq
)
{
pgd_t
*
pgd
;
pud_t
*
pud
,
*
new_pud
=
NULL
;
pmd_t
*
pmd
,
*
new_pmd
=
NULL
;
pte_t
*
ptep
,
*
new_ptep
=
NULL
;
unsigned
long
old
;
int
ret
;
/* Traverse the guest's 2nd-level tree, allocate new levels needed */
pgd
=
kvm
->
arch
.
pgtable
+
pgd_index
(
gpa
);
pud
=
NULL
;
if
(
pgd_present
(
*
pgd
))
pud
=
pud_offset
(
pgd
,
gpa
);
else
new_pud
=
pud_alloc_one
(
kvm
->
mm
,
gpa
);
pmd
=
NULL
;
if
(
pud
&&
pud_present
(
*
pud
))
pmd
=
pmd_offset
(
pud
,
gpa
);
else
new_pmd
=
pmd_alloc_one
(
kvm
->
mm
,
gpa
);
if
(
level
==
0
&&
!
(
pmd
&&
pmd_present
(
*
pmd
)))
new_ptep
=
kvmppc_pte_alloc
();
/* Check if we might have been invalidated; let the guest retry if so */
spin_lock
(
&
kvm
->
mmu_lock
);
ret
=
-
EAGAIN
;
if
(
mmu_notifier_retry
(
kvm
,
mmu_seq
))
goto
out_unlock
;
/* Now traverse again under the lock and change the tree */
ret
=
-
ENOMEM
;
if
(
pgd_none
(
*
pgd
))
{
if
(
!
new_pud
)
goto
out_unlock
;
pgd_populate
(
kvm
->
mm
,
pgd
,
new_pud
);
new_pud
=
NULL
;
}
pud
=
pud_offset
(
pgd
,
gpa
);
if
(
pud_none
(
*
pud
))
{
if
(
!
new_pmd
)
goto
out_unlock
;
pud_populate
(
kvm
->
mm
,
pud
,
new_pmd
);
new_pmd
=
NULL
;
}
pmd
=
pmd_offset
(
pud
,
gpa
);
if
(
pmd_large
(
*
pmd
))
{
/* Someone else has instantiated a large page here; retry */
ret
=
-
EAGAIN
;
goto
out_unlock
;
}
if
(
level
==
1
&&
!
pmd_none
(
*
pmd
))
{
/*
* There's a page table page here, but we wanted
* to install a large page. Tell the caller and let
* it try installing a normal page if it wants.
*/
ret
=
-
EBUSY
;
goto
out_unlock
;
}
if
(
level
==
0
)
{
if
(
pmd_none
(
*
pmd
))
{
if
(
!
new_ptep
)
goto
out_unlock
;
pmd_populate
(
kvm
->
mm
,
pmd
,
new_ptep
);
new_ptep
=
NULL
;
}
ptep
=
pte_offset_kernel
(
pmd
,
gpa
);
if
(
pte_present
(
*
ptep
))
{
/* PTE was previously valid, so invalidate it */
old
=
kvmppc_radix_update_pte
(
kvm
,
ptep
,
_PAGE_PRESENT
,
0
,
gpa
,
0
);
kvmppc_radix_tlbie_page
(
kvm
,
gpa
,
0
);
if
(
old
&
_PAGE_DIRTY
)
mark_page_dirty
(
kvm
,
gpa
>>
PAGE_SHIFT
);
}
kvmppc_radix_set_pte_at
(
kvm
,
gpa
,
ptep
,
pte
);
}
else
{
kvmppc_radix_set_pte_at
(
kvm
,
gpa
,
pmdp_ptep
(
pmd
),
pte
);
}
ret
=
0
;
out_unlock:
spin_unlock
(
&
kvm
->
mmu_lock
);
if
(
new_pud
)
pud_free
(
kvm
->
mm
,
new_pud
);
if
(
new_pmd
)
pmd_free
(
kvm
->
mm
,
new_pmd
);
if
(
new_ptep
)
kvmppc_pte_free
(
new_ptep
);
return
ret
;
}
int
kvmppc_book3s_radix_page_fault
(
struct
kvm_run
*
run
,
struct
kvm_vcpu
*
vcpu
,
unsigned
long
ea
,
unsigned
long
dsisr
)
{
struct
kvm
*
kvm
=
vcpu
->
kvm
;
unsigned
long
mmu_seq
,
pte_size
;
unsigned
long
gpa
,
gfn
,
hva
,
pfn
;
struct
kvm_memory_slot
*
memslot
;
struct
page
*
page
=
NULL
,
*
pages
[
1
];
long
ret
,
npages
,
ok
;
unsigned
int
writing
;
struct
vm_area_struct
*
vma
;
unsigned
long
flags
;
pte_t
pte
,
*
ptep
;
unsigned
long
pgflags
;
unsigned
int
shift
,
level
;
/* Check for unusual errors */
if
(
dsisr
&
DSISR_UNSUPP_MMU
)
{
pr_err
(
"KVM: Got unsupported MMU fault
\n
"
);
return
-
EFAULT
;
}
if
(
dsisr
&
DSISR_BADACCESS
)
{
/* Reflect to the guest as DSI */
pr_err
(
"KVM: Got radix HV page fault with DSISR=%lx
\n
"
,
dsisr
);
kvmppc_core_queue_data_storage
(
vcpu
,
ea
,
dsisr
);
return
RESUME_GUEST
;
}
/* Translate the logical address and get the page */
gpa
=
vcpu
->
arch
.
fault_gpa
&
~
0xfffUL
;
gpa
&=
~
0xF000000000000000ul
;
gfn
=
gpa
>>
PAGE_SHIFT
;
if
(
!
(
dsisr
&
DSISR_PGDIRFAULT
))
gpa
|=
ea
&
0xfff
;
memslot
=
gfn_to_memslot
(
kvm
,
gfn
);
/* No memslot means it's an emulated MMIO region */
if
(
!
memslot
||
(
memslot
->
flags
&
KVM_MEMSLOT_INVALID
))
{
if
(
dsisr
&
(
DSISR_PGDIRFAULT
|
DSISR_BADACCESS
|
DSISR_SET_RC
))
{
/*
* Bad address in guest page table tree, or other
* unusual error - reflect it to the guest as DSI.
*/
kvmppc_core_queue_data_storage
(
vcpu
,
ea
,
dsisr
);
return
RESUME_GUEST
;
}
return
kvmppc_hv_emulate_mmio
(
run
,
vcpu
,
gpa
,
ea
,
dsisr
&
DSISR_ISSTORE
);
}
/* used to check for invalidations in progress */
mmu_seq
=
kvm
->
mmu_notifier_seq
;
smp_rmb
();
writing
=
(
dsisr
&
DSISR_ISSTORE
)
!=
0
;
hva
=
gfn_to_hva_memslot
(
memslot
,
gfn
);
if
(
dsisr
&
DSISR_SET_RC
)
{
/*
* Need to set an R or C bit in the 2nd-level tables;
* if the relevant bits aren't already set in the linux
* page tables, fall through to do the gup_fast to
* set them in the linux page tables too.
*/
ok
=
0
;
pgflags
=
_PAGE_ACCESSED
;
if
(
writing
)
pgflags
|=
_PAGE_DIRTY
;
local_irq_save
(
flags
);
ptep
=
__find_linux_pte_or_hugepte
(
current
->
mm
->
pgd
,
hva
,
NULL
,
NULL
);
if
(
ptep
)
{
pte
=
READ_ONCE
(
*
ptep
);
if
(
pte_present
(
pte
)
&&
(
pte_val
(
pte
)
&
pgflags
)
==
pgflags
)
ok
=
1
;
}
local_irq_restore
(
flags
);
if
(
ok
)
{
spin_lock
(
&
kvm
->
mmu_lock
);
if
(
mmu_notifier_retry
(
vcpu
->
kvm
,
mmu_seq
))
{
spin_unlock
(
&
kvm
->
mmu_lock
);
return
RESUME_GUEST
;
}
ptep
=
__find_linux_pte_or_hugepte
(
kvm
->
arch
.
pgtable
,
gpa
,
NULL
,
&
shift
);
if
(
ptep
&&
pte_present
(
*
ptep
))
{
kvmppc_radix_update_pte
(
kvm
,
ptep
,
0
,
pgflags
,
gpa
,
shift
);
spin_unlock
(
&
kvm
->
mmu_lock
);
return
RESUME_GUEST
;
}
spin_unlock
(
&
kvm
->
mmu_lock
);
}
}
ret
=
-
EFAULT
;
pfn
=
0
;
pte_size
=
PAGE_SIZE
;
pgflags
=
_PAGE_READ
|
_PAGE_EXEC
;
level
=
0
;
npages
=
get_user_pages_fast
(
hva
,
1
,
writing
,
pages
);
if
(
npages
<
1
)
{
/* Check if it's an I/O mapping */
down_read
(
&
current
->
mm
->
mmap_sem
);
vma
=
find_vma
(
current
->
mm
,
hva
);
if
(
vma
&&
vma
->
vm_start
<=
hva
&&
hva
<
vma
->
vm_end
&&
(
vma
->
vm_flags
&
VM_PFNMAP
))
{
pfn
=
vma
->
vm_pgoff
+
((
hva
-
vma
->
vm_start
)
>>
PAGE_SHIFT
);
pgflags
=
pgprot_val
(
vma
->
vm_page_prot
);
}
up_read
(
&
current
->
mm
->
mmap_sem
);
if
(
!
pfn
)
return
-
EFAULT
;
}
else
{
page
=
pages
[
0
];
pfn
=
page_to_pfn
(
page
);
if
(
PageHuge
(
page
))
{
page
=
compound_head
(
page
);
pte_size
<<=
compound_order
(
page
);
/* See if we can insert a 2MB large-page PTE here */
if
(
pte_size
>=
PMD_SIZE
&&
(
gpa
&
PMD_MASK
&
PAGE_MASK
)
==
(
hva
&
PMD_MASK
&
PAGE_MASK
))
{
level
=
1
;
pfn
&=
~
((
PMD_SIZE
>>
PAGE_SHIFT
)
-
1
);
}
}
/* See if we can provide write access */
if
(
writing
)
{
/*
* We assume gup_fast has set dirty on the host PTE.
*/
pgflags
|=
_PAGE_WRITE
;
}
else
{
local_irq_save
(
flags
);
ptep
=
__find_linux_pte_or_hugepte
(
current
->
mm
->
pgd
,
hva
,
NULL
,
NULL
);
if
(
ptep
&&
pte_write
(
*
ptep
)
&&
pte_dirty
(
*
ptep
))
pgflags
|=
_PAGE_WRITE
;
local_irq_restore
(
flags
);
}
}
/*
* Compute the PTE value that we need to insert.
*/
pgflags
|=
_PAGE_PRESENT
|
_PAGE_PTE
|
_PAGE_ACCESSED
;
if
(
pgflags
&
_PAGE_WRITE
)
pgflags
|=
_PAGE_DIRTY
;
pte
=
pfn_pte
(
pfn
,
__pgprot
(
pgflags
));
/* Allocate space in the tree and write the PTE */
ret
=
kvmppc_create_pte
(
kvm
,
pte
,
gpa
,
level
,
mmu_seq
);
if
(
ret
==
-
EBUSY
)
{
/*
* There's already a PMD where wanted to install a large page;
* for now, fall back to installing a small page.
*/
level
=
0
;
pfn
|=
gfn
&
((
PMD_SIZE
>>
PAGE_SHIFT
)
-
1
);
pte
=
pfn_pte
(
pfn
,
__pgprot
(
pgflags
));
ret
=
kvmppc_create_pte
(
kvm
,
pte
,
gpa
,
level
,
mmu_seq
);
}
if
(
ret
==
0
||
ret
==
-
EAGAIN
)
ret
=
RESUME_GUEST
;
if
(
page
)
{
/*
* We drop pages[0] here, not page because page might
* have been set to the head page of a compound, but
* we have to drop the reference on the correct tail
* page to match the get inside gup()
*/
put_page
(
pages
[
0
]);
}
return
ret
;
}
static
void
mark_pages_dirty
(
struct
kvm
*
kvm
,
struct
kvm_memory_slot
*
memslot
,
unsigned
long
gfn
,
unsigned
int
order
)
{
unsigned
long
i
,
limit
;
unsigned
long
*
dp
;
if
(
!
memslot
->
dirty_bitmap
)
return
;
limit
=
1ul
<<
order
;
if
(
limit
<
BITS_PER_LONG
)
{
for
(
i
=
0
;
i
<
limit
;
++
i
)
mark_page_dirty
(
kvm
,
gfn
+
i
);
return
;
}
dp
=
memslot
->
dirty_bitmap
+
(
gfn
-
memslot
->
base_gfn
);
limit
/=
BITS_PER_LONG
;
for
(
i
=
0
;
i
<
limit
;
++
i
)
*
dp
++
=
~
0ul
;
}
/* Called with kvm->lock held */
int
kvm_unmap_radix
(
struct
kvm
*
kvm
,
struct
kvm_memory_slot
*
memslot
,
unsigned
long
gfn
)
{
pte_t
*
ptep
;
unsigned
long
gpa
=
gfn
<<
PAGE_SHIFT
;
unsigned
int
shift
;
unsigned
long
old
;
ptep
=
__find_linux_pte_or_hugepte
(
kvm
->
arch
.
pgtable
,
gpa
,
NULL
,
&
shift
);
if
(
ptep
&&
pte_present
(
*
ptep
))
{
old
=
kvmppc_radix_update_pte
(
kvm
,
ptep
,
_PAGE_PRESENT
,
0
,
gpa
,
shift
);
kvmppc_radix_tlbie_page
(
kvm
,
gpa
,
shift
);
if
(
old
&
_PAGE_DIRTY
)
{
if
(
!
shift
)
mark_page_dirty
(
kvm
,
gfn
);
else
mark_pages_dirty
(
kvm
,
memslot
,
gfn
,
shift
-
PAGE_SHIFT
);
}
}
return
0
;
}
/* Called with kvm->lock held */
int
kvm_age_radix
(
struct
kvm
*
kvm
,
struct
kvm_memory_slot
*
memslot
,
unsigned
long
gfn
)
{
pte_t
*
ptep
;
unsigned
long
gpa
=
gfn
<<
PAGE_SHIFT
;
unsigned
int
shift
;
int
ref
=
0
;
ptep
=
__find_linux_pte_or_hugepte
(
kvm
->
arch
.
pgtable
,
gpa
,
NULL
,
&
shift
);
if
(
ptep
&&
pte_present
(
*
ptep
)
&&
pte_young
(
*
ptep
))
{
kvmppc_radix_update_pte
(
kvm
,
ptep
,
_PAGE_ACCESSED
,
0
,
gpa
,
shift
);
/* XXX need to flush tlb here? */
ref
=
1
;
}
return
ref
;
}
/* Called with kvm->lock held */
int
kvm_test_age_radix
(
struct
kvm
*
kvm
,
struct
kvm_memory_slot
*
memslot
,
unsigned
long
gfn
)
{
pte_t
*
ptep
;
unsigned
long
gpa
=
gfn
<<
PAGE_SHIFT
;
unsigned
int
shift
;
int
ref
=
0
;
ptep
=
__find_linux_pte_or_hugepte
(
kvm
->
arch
.
pgtable
,
gpa
,
NULL
,
&
shift
);
if
(
ptep
&&
pte_present
(
*
ptep
)
&&
pte_young
(
*
ptep
))
ref
=
1
;
return
ref
;
}
/* Returns the number of PAGE_SIZE pages that are dirty */
static
int
kvm_radix_test_clear_dirty
(
struct
kvm
*
kvm
,
struct
kvm_memory_slot
*
memslot
,
int
pagenum
)
{
unsigned
long
gfn
=
memslot
->
base_gfn
+
pagenum
;
unsigned
long
gpa
=
gfn
<<
PAGE_SHIFT
;
pte_t
*
ptep
;
unsigned
int
shift
;
int
ret
=
0
;
ptep
=
__find_linux_pte_or_hugepte
(
kvm
->
arch
.
pgtable
,
gpa
,
NULL
,
&
shift
);
if
(
ptep
&&
pte_present
(
*
ptep
)
&&
pte_dirty
(
*
ptep
))
{
ret
=
1
;
if
(
shift
)
ret
=
1
<<
(
shift
-
PAGE_SHIFT
);
kvmppc_radix_update_pte
(
kvm
,
ptep
,
_PAGE_DIRTY
,
0
,
gpa
,
shift
);
kvmppc_radix_tlbie_page
(
kvm
,
gpa
,
shift
);
}
return
ret
;
}
long
kvmppc_hv_get_dirty_log_radix
(
struct
kvm
*
kvm
,
struct
kvm_memory_slot
*
memslot
,
unsigned
long
*
map
)
{
unsigned
long
i
,
j
;
unsigned
long
n
,
*
p
;
int
npages
;
/*
* Radix accumulates dirty bits in the first half of the
* memslot's dirty_bitmap area, for when pages are paged
* out or modified by the host directly. Pick up these
* bits and add them to the map.
*/
n
=
kvm_dirty_bitmap_bytes
(
memslot
)
/
sizeof
(
long
);
p
=
memslot
->
dirty_bitmap
;
for
(
i
=
0
;
i
<
n
;
++
i
)
map
[
i
]
|=
xchg
(
&
p
[
i
],
0
);
for
(
i
=
0
;
i
<
memslot
->
npages
;
i
=
j
)
{
npages
=
kvm_radix_test_clear_dirty
(
kvm
,
memslot
,
i
);
/*
* Note that if npages > 0 then i must be a multiple of npages,
* since huge pages are only used to back the guest at guest
* real addresses that are a multiple of their size.
* Since we have at most one PTE covering any given guest
* real address, if npages > 1 we can skip to i + npages.
*/
j
=
i
+
1
;
if
(
npages
)
for
(
j
=
i
;
npages
;
++
j
,
--
npages
)
__set_bit_le
(
j
,
map
);
}
return
0
;
}
static
void
add_rmmu_ap_encoding
(
struct
kvm_ppc_rmmu_info
*
info
,
int
psize
,
int
*
indexp
)
{
if
(
!
mmu_psize_defs
[
psize
].
shift
)
return
;
info
->
ap_encodings
[
*
indexp
]
=
mmu_psize_defs
[
psize
].
shift
|
(
mmu_psize_defs
[
psize
].
ap
<<
29
);
++
(
*
indexp
);
}
int
kvmhv_get_rmmu_info
(
struct
kvm
*
kvm
,
struct
kvm_ppc_rmmu_info
*
info
)
{
int
i
;
if
(
!
radix_enabled
())
return
-
EINVAL
;
memset
(
info
,
0
,
sizeof
(
*
info
));
/* 4k page size */
info
->
geometries
[
0
].
page_shift
=
12
;
info
->
geometries
[
0
].
level_bits
[
0
]
=
9
;
for
(
i
=
1
;
i
<
4
;
++
i
)
info
->
geometries
[
0
].
level_bits
[
i
]
=
p9_supported_radix_bits
[
i
];
/* 64k page size */
info
->
geometries
[
1
].
page_shift
=
16
;
for
(
i
=
0
;
i
<
4
;
++
i
)
info
->
geometries
[
1
].
level_bits
[
i
]
=
p9_supported_radix_bits
[
i
];
i
=
0
;
add_rmmu_ap_encoding
(
info
,
MMU_PAGE_4K
,
&
i
);
add_rmmu_ap_encoding
(
info
,
MMU_PAGE_64K
,
&
i
);
add_rmmu_ap_encoding
(
info
,
MMU_PAGE_2M
,
&
i
);
add_rmmu_ap_encoding
(
info
,
MMU_PAGE_1G
,
&
i
);
return
0
;
}
int
kvmppc_init_vm_radix
(
struct
kvm
*
kvm
)
{
kvm
->
arch
.
pgtable
=
pgd_alloc
(
kvm
->
mm
);
if
(
!
kvm
->
arch
.
pgtable
)
return
-
ENOMEM
;
return
0
;
}
void
kvmppc_free_radix
(
struct
kvm
*
kvm
)
{
unsigned
long
ig
,
iu
,
im
;
pte_t
*
pte
;
pmd_t
*
pmd
;
pud_t
*
pud
;
pgd_t
*
pgd
;
if
(
!
kvm
->
arch
.
pgtable
)
return
;
pgd
=
kvm
->
arch
.
pgtable
;
for
(
ig
=
0
;
ig
<
PTRS_PER_PGD
;
++
ig
,
++
pgd
)
{
if
(
!
pgd_present
(
*
pgd
))
continue
;
pud
=
pud_offset
(
pgd
,
0
);
for
(
iu
=
0
;
iu
<
PTRS_PER_PUD
;
++
iu
,
++
pud
)
{
if
(
!
pud_present
(
*
pud
))
continue
;
pmd
=
pmd_offset
(
pud
,
0
);
for
(
im
=
0
;
im
<
PTRS_PER_PMD
;
++
im
,
++
pmd
)
{
if
(
pmd_huge
(
*
pmd
))
{
pmd_clear
(
pmd
);
continue
;
}
if
(
!
pmd_present
(
*
pmd
))
continue
;
pte
=
pte_offset_map
(
pmd
,
0
);
memset
(
pte
,
0
,
sizeof
(
long
)
<<
PTE_INDEX_SIZE
);
kvmppc_pte_free
(
pte
);
pmd_clear
(
pmd
);
}
pmd_free
(
kvm
->
mm
,
pmd_offset
(
pud
,
0
));
pud_clear
(
pud
);
}
pud_free
(
kvm
->
mm
,
pud_offset
(
pgd
,
0
));
pgd_clear
(
pgd
);
}
pgd_free
(
kvm
->
mm
,
kvm
->
arch
.
pgtable
);
}
static
void
pte_ctor
(
void
*
addr
)
{
memset
(
addr
,
0
,
PTE_TABLE_SIZE
);
}
int
kvmppc_radix_init
(
void
)
{
unsigned
long
size
=
sizeof
(
void
*
)
<<
PTE_INDEX_SIZE
;
kvm_pte_cache
=
kmem_cache_create
(
"kvm-pte"
,
size
,
size
,
0
,
pte_ctor
);
if
(
!
kvm_pte_cache
)
return
-
ENOMEM
;
return
0
;
}
void
kvmppc_radix_exit
(
void
)
{
kmem_cache_destroy
(
kvm_pte_cache
);
}
arch/powerpc/kvm/book3s_hv.c
View file @
da0e7e62
...
...
@@ -1135,7 +1135,7 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
/*
* Userspace can only modify DPFD (default prefetch depth),
* ILE (interrupt little-endian) and TC (translation control).
* On POWER8
userspace can also modify AIL (alt. interrupt loc.)
* On POWER8
and POWER9 userspace can also modify AIL (alt. interrupt loc.).
*/
mask
=
LPCR_DPFD
|
LPCR_ILE
|
LPCR_TC
;
if
(
cpu_has_feature
(
CPU_FTR_ARCH_207S
))
...
...
@@ -1821,6 +1821,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
vcpu
->
arch
.
vcore
=
vcore
;
vcpu
->
arch
.
ptid
=
vcpu
->
vcpu_id
-
vcore
->
first_vcpuid
;
vcpu
->
arch
.
thread_cpu
=
-
1
;
vcpu
->
arch
.
prev_cpu
=
-
1
;
vcpu
->
arch
.
cpu_type
=
KVM_CPU_3S_64
;
kvmppc_sanity_check
(
vcpu
);
...
...
@@ -1950,11 +1951,33 @@ static void kvmppc_release_hwthread(int cpu)
tpaca
->
kvm_hstate
.
kvm_split_mode
=
NULL
;
}
static
void
do_nothing
(
void
*
x
)
{
}
static
void
radix_flush_cpu
(
struct
kvm
*
kvm
,
int
cpu
,
struct
kvm_vcpu
*
vcpu
)
{
int
i
;
cpu
=
cpu_first_thread_sibling
(
cpu
);
cpumask_set_cpu
(
cpu
,
&
kvm
->
arch
.
need_tlb_flush
);
/*
* Make sure setting of bit in need_tlb_flush precedes
* testing of cpu_in_guest bits. The matching barrier on
* the other side is the first smp_mb() in kvmppc_run_core().
*/
smp_mb
();
for
(
i
=
0
;
i
<
threads_per_core
;
++
i
)
if
(
cpumask_test_cpu
(
cpu
+
i
,
&
kvm
->
arch
.
cpu_in_guest
))
smp_call_function_single
(
cpu
+
i
,
do_nothing
,
NULL
,
1
);
}
static
void
kvmppc_start_thread
(
struct
kvm_vcpu
*
vcpu
,
struct
kvmppc_vcore
*
vc
)
{
int
cpu
;
struct
paca_struct
*
tpaca
;
struct
kvmppc_vcore
*
mvc
=
vc
->
master_vcore
;
struct
kvm
*
kvm
=
vc
->
kvm
;
cpu
=
vc
->
pcpu
;
if
(
vcpu
)
{
...
...
@@ -1965,6 +1988,27 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
cpu
+=
vcpu
->
arch
.
ptid
;
vcpu
->
cpu
=
mvc
->
pcpu
;
vcpu
->
arch
.
thread_cpu
=
cpu
;
/*
* With radix, the guest can do TLB invalidations itself,
* and it could choose to use the local form (tlbiel) if
* it is invalidating a translation that has only ever been
* used on one vcpu. However, that doesn't mean it has
* only ever been used on one physical cpu, since vcpus
* can move around between pcpus. To cope with this, when
* a vcpu moves from one pcpu to another, we need to tell
* any vcpus running on the same core as this vcpu previously
* ran to flush the TLB. The TLB is shared between threads,
* so we use a single bit in .need_tlb_flush for all 4 threads.
*/
if
(
kvm_is_radix
(
kvm
)
&&
vcpu
->
arch
.
prev_cpu
!=
cpu
)
{
if
(
vcpu
->
arch
.
prev_cpu
>=
0
&&
cpu_first_thread_sibling
(
vcpu
->
arch
.
prev_cpu
)
!=
cpu_first_thread_sibling
(
cpu
))
radix_flush_cpu
(
kvm
,
vcpu
->
arch
.
prev_cpu
,
vcpu
);
vcpu
->
arch
.
prev_cpu
=
cpu
;
}
cpumask_set_cpu
(
cpu
,
&
kvm
->
arch
.
cpu_in_guest
);
}
tpaca
=
&
paca
[
cpu
];
tpaca
->
kvm_hstate
.
kvm_vcpu
=
vcpu
;
...
...
@@ -2552,6 +2596,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
kvmppc_release_hwthread
(
pcpu
+
i
);
if
(
sip
&&
sip
->
napped
[
i
])
kvmppc_ipi_thread
(
pcpu
+
i
);
cpumask_clear_cpu
(
pcpu
+
i
,
&
vc
->
kvm
->
arch
.
cpu_in_guest
);
}
kvmppc_set_host_core
(
pcpu
);
...
...
@@ -2877,7 +2922,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
smp_mb
();
/* On the first time here, set up HTAB and VRMA */
if
(
!
vcpu
->
kvm
->
arch
.
hpte_setup_done
)
{
if
(
!
kvm_is_radix
(
vcpu
->
kvm
)
&&
!
vcpu
->
kvm
->
arch
.
hpte_setup_done
)
{
r
=
kvmppc_hv_setup_htab_rma
(
vcpu
);
if
(
r
)
goto
out
;
...
...
@@ -2939,6 +2984,13 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
{
struct
kvm_ppc_one_seg_page_size
*
sps
;
/*
* Since we don't yet support HPT guests on a radix host,
* return an error if the host uses radix.
*/
if
(
radix_enabled
())
return
-
EINVAL
;
info
->
flags
=
KVM_PPC_PAGE_SIZES_REAL
;
if
(
mmu_has_feature
(
MMU_FTR_1T_SEGMENT
))
info
->
flags
|=
KVM_PPC_1T_SEGMENTS
;
...
...
@@ -2961,8 +3013,10 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
{
struct
kvm_memslots
*
slots
;
struct
kvm_memory_slot
*
memslot
;
int
r
;
int
i
,
r
;
unsigned
long
n
;
unsigned
long
*
buf
;
struct
kvm_vcpu
*
vcpu
;
mutex_lock
(
&
kvm
->
slots_lock
);
...
...
@@ -2976,15 +3030,32 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
if
(
!
memslot
->
dirty_bitmap
)
goto
out
;
/*
* Use second half of bitmap area because radix accumulates
* bits in the first half.
*/
n
=
kvm_dirty_bitmap_bytes
(
memslot
);
memset
(
memslot
->
dirty_bitmap
,
0
,
n
);
buf
=
memslot
->
dirty_bitmap
+
n
/
sizeof
(
long
);
memset
(
buf
,
0
,
n
);
r
=
kvmppc_hv_get_dirty_log
(
kvm
,
memslot
,
memslot
->
dirty_bitmap
);
if
(
kvm_is_radix
(
kvm
))
r
=
kvmppc_hv_get_dirty_log_radix
(
kvm
,
memslot
,
buf
);
else
r
=
kvmppc_hv_get_dirty_log_hpt
(
kvm
,
memslot
,
buf
);
if
(
r
)
goto
out
;
/* Harvest dirty bits from VPA and DTL updates */
/* Note: we never modify the SLB shadow buffer areas */
kvm_for_each_vcpu
(
i
,
vcpu
,
kvm
)
{
spin_lock
(
&
vcpu
->
arch
.
vpa_update_lock
);
kvmppc_harvest_vpa_dirty
(
&
vcpu
->
arch
.
vpa
,
memslot
,
buf
);
kvmppc_harvest_vpa_dirty
(
&
vcpu
->
arch
.
dtl
,
memslot
,
buf
);
spin_unlock
(
&
vcpu
->
arch
.
vpa_update_lock
);
}
r
=
-
EFAULT
;
if
(
copy_to_user
(
log
->
dirty_bitmap
,
memslot
->
dirty_bitmap
,
n
))
if
(
copy_to_user
(
log
->
dirty_bitmap
,
buf
,
n
))
goto
out
;
r
=
0
;
...
...
@@ -3005,6 +3076,15 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
static
int
kvmppc_core_create_memslot_hv
(
struct
kvm_memory_slot
*
slot
,
unsigned
long
npages
)
{
/*
* For now, if radix_enabled() then we only support radix guests,
* and in that case we don't need the rmap array.
*/
if
(
radix_enabled
())
{
slot
->
arch
.
rmap
=
NULL
;
return
0
;
}
slot
->
arch
.
rmap
=
vzalloc
(
npages
*
sizeof
(
*
slot
->
arch
.
rmap
));
if
(
!
slot
->
arch
.
rmap
)
return
-
ENOMEM
;
...
...
@@ -3037,7 +3117,7 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
if
(
npages
)
atomic64_inc
(
&
kvm
->
arch
.
mmio_update
);
if
(
npages
&&
old
->
npages
)
{
if
(
npages
&&
old
->
npages
&&
!
kvm_is_radix
(
kvm
)
)
{
/*
* If modifying a memslot, reset all the rmap dirty bits.
* If this is a new memslot, we don't need to do anything
...
...
@@ -3046,7 +3126,7 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
*/
slots
=
kvm_memslots
(
kvm
);
memslot
=
id_to_memslot
(
slots
,
mem
->
slot
);
kvmppc_hv_get_dirty_log
(
kvm
,
memslot
,
NULL
);
kvmppc_hv_get_dirty_log
_hpt
(
kvm
,
memslot
,
NULL
);
}
}
...
...
@@ -3085,14 +3165,20 @@ static void kvmppc_setup_partition_table(struct kvm *kvm)
{
unsigned
long
dw0
,
dw1
;
/* PS field - page size for VRMA */
dw0
=
((
kvm
->
arch
.
vrma_slb_v
&
SLB_VSID_L
)
>>
1
)
|
((
kvm
->
arch
.
vrma_slb_v
&
SLB_VSID_LP
)
<<
1
);
/* HTABSIZE and HTABORG fields */
dw0
|=
kvm
->
arch
.
sdr1
;
if
(
!
kvm_is_radix
(
kvm
))
{
/* PS field - page size for VRMA */
dw0
=
((
kvm
->
arch
.
vrma_slb_v
&
SLB_VSID_L
)
>>
1
)
|
((
kvm
->
arch
.
vrma_slb_v
&
SLB_VSID_LP
)
<<
1
);
/* HTABSIZE and HTABORG fields */
dw0
|=
kvm
->
arch
.
sdr1
;
/* Second dword has GR=0; other fields are unused since UPRT=0 */
dw1
=
0
;
/* Second dword as set by userspace */
dw1
=
kvm
->
arch
.
process_table
;
}
else
{
dw0
=
PATB_HR
|
radix__get_tree_size
()
|
__pa
(
kvm
->
arch
.
pgtable
)
|
RADIX_PGD_INDEX_SIZE
;
dw1
=
PATB_GR
|
kvm
->
arch
.
process_table
;
}
mmu_partition_table_set_entry
(
kvm
->
arch
.
lpid
,
dw0
,
dw1
);
}
...
...
@@ -3262,6 +3348,7 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
{
unsigned
long
lpcr
,
lpid
;
char
buf
[
32
];
int
ret
;
/* Allocate the guest's logical partition ID */
...
...
@@ -3309,13 +3396,30 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
lpcr
|=
LPCR_HVICE
;
}
/*
* For now, if the host uses radix, the guest must be radix.
*/
if
(
radix_enabled
())
{
kvm
->
arch
.
radix
=
1
;
lpcr
&=
~
LPCR_VPM1
;
lpcr
|=
LPCR_UPRT
|
LPCR_GTSE
|
LPCR_HR
;
ret
=
kvmppc_init_vm_radix
(
kvm
);
if
(
ret
)
{
kvmppc_free_lpid
(
kvm
->
arch
.
lpid
);
return
ret
;
}
kvmppc_setup_partition_table
(
kvm
);
}
kvm
->
arch
.
lpcr
=
lpcr
;
/*
* Work out how many sets the TLB has, for the use of
* the TLB invalidation loop in book3s_hv_rmhandlers.S.
*/
if
(
cpu_has_feature
(
CPU_FTR_ARCH_300
))
if
(
kvm_is_radix
(
kvm
))
kvm
->
arch
.
tlb_sets
=
POWER9_TLB_SETS_RADIX
;
/* 128 */
else
if
(
cpu_has_feature
(
CPU_FTR_ARCH_300
))
kvm
->
arch
.
tlb_sets
=
POWER9_TLB_SETS_HASH
;
/* 256 */
else
if
(
cpu_has_feature
(
CPU_FTR_ARCH_207S
))
kvm
->
arch
.
tlb_sets
=
POWER8_TLB_SETS
;
/* 512 */
...
...
@@ -3325,8 +3429,11 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
/*
* Track that we now have a HV mode VM active. This blocks secondary
* CPU threads from coming online.
* On POWER9, we only need to do this for HPT guests on a radix
* host, which is not yet supported.
*/
kvm_hv_vm_activated
();
if
(
!
cpu_has_feature
(
CPU_FTR_ARCH_300
))
kvm_hv_vm_activated
();
/*
* Create a debugfs directory for the VM
...
...
@@ -3352,11 +3459,17 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
{
debugfs_remove_recursive
(
kvm
->
arch
.
debugfs_dir
);
kvm_hv_vm_deactivated
();
if
(
!
cpu_has_feature
(
CPU_FTR_ARCH_300
))
kvm_hv_vm_deactivated
();
kvmppc_free_vcores
(
kvm
);
kvmppc_free_hpt
(
kvm
);
kvmppc_free_lpid
(
kvm
->
arch
.
lpid
);
if
(
kvm_is_radix
(
kvm
))
kvmppc_free_radix
(
kvm
);
else
kvmppc_free_hpt
(
kvm
);
kvmppc_free_pimap
(
kvm
);
}
...
...
@@ -3385,11 +3498,6 @@ static int kvmppc_core_check_processor_compat_hv(void)
if
(
!
cpu_has_feature
(
CPU_FTR_HVMODE
)
||
!
cpu_has_feature
(
CPU_FTR_ARCH_206
))
return
-
EIO
;
/*
* Disable KVM for Power9 in radix mode.
*/
if
(
cpu_has_feature
(
CPU_FTR_ARCH_300
)
&&
radix_enabled
())
return
-
EIO
;
return
0
;
}
...
...
@@ -3657,6 +3765,41 @@ static void init_default_hcalls(void)
}
}
static
int
kvmhv_configure_mmu
(
struct
kvm
*
kvm
,
struct
kvm_ppc_mmuv3_cfg
*
cfg
)
{
unsigned
long
lpcr
;
int
radix
;
/* If not on a POWER9, reject it */
if
(
!
cpu_has_feature
(
CPU_FTR_ARCH_300
))
return
-
ENODEV
;
/* If any unknown flags set, reject it */
if
(
cfg
->
flags
&
~
(
KVM_PPC_MMUV3_RADIX
|
KVM_PPC_MMUV3_GTSE
))
return
-
EINVAL
;
/* We can't change a guest to/from radix yet */
radix
=
!!
(
cfg
->
flags
&
KVM_PPC_MMUV3_RADIX
);
if
(
radix
!=
kvm_is_radix
(
kvm
))
return
-
EINVAL
;
/* GR (guest radix) bit in process_table field must match */
if
(
!!
(
cfg
->
process_table
&
PATB_GR
)
!=
radix
)
return
-
EINVAL
;
/* Process table size field must be reasonable, i.e. <= 24 */
if
((
cfg
->
process_table
&
PRTS_MASK
)
>
24
)
return
-
EINVAL
;
kvm
->
arch
.
process_table
=
cfg
->
process_table
;
kvmppc_setup_partition_table
(
kvm
);
lpcr
=
(
cfg
->
flags
&
KVM_PPC_MMUV3_GTSE
)
?
LPCR_GTSE
:
0
;
kvmppc_update_lpcr
(
kvm
,
lpcr
,
LPCR_GTSE
);
return
0
;
}
static
struct
kvmppc_ops
kvm_ops_hv
=
{
.
get_sregs
=
kvm_arch_vcpu_ioctl_get_sregs_hv
,
.
set_sregs
=
kvm_arch_vcpu_ioctl_set_sregs_hv
,
...
...
@@ -3694,6 +3837,8 @@ static struct kvmppc_ops kvm_ops_hv = {
.
irq_bypass_add_producer
=
kvmppc_irq_bypass_add_producer_hv
,
.
irq_bypass_del_producer
=
kvmppc_irq_bypass_del_producer_hv
,
#endif
.
configure_mmu
=
kvmhv_configure_mmu
,
.
get_rmmu_info
=
kvmhv_get_rmmu_info
,
};
static
int
kvm_init_subcore_bitmap
(
void
)
...
...
@@ -3728,6 +3873,11 @@ static int kvm_init_subcore_bitmap(void)
return
0
;
}
static
int
kvmppc_radix_possible
(
void
)
{
return
cpu_has_feature
(
CPU_FTR_ARCH_300
)
&&
radix_enabled
();
}
static
int
kvmppc_book3s_init_hv
(
void
)
{
int
r
;
...
...
@@ -3767,12 +3917,19 @@ static int kvmppc_book3s_init_hv(void)
init_vcore_lists
();
r
=
kvmppc_mmu_hv_init
();
if
(
r
)
return
r
;
if
(
kvmppc_radix_possible
())
r
=
kvmppc_radix_init
();
return
r
;
}
static
void
kvmppc_book3s_exit_hv
(
void
)
{
kvmppc_free_host_rm_ops
();
if
(
kvmppc_radix_possible
())
kvmppc_radix_exit
();
kvmppc_hv_ops
=
NULL
;
}
...
...
arch/powerpc/kvm/book3s_hv_builtin.c
View file @
da0e7e62
...
...
@@ -200,7 +200,6 @@ static inline void rm_writeb(unsigned long paddr, u8 val)
/*
* Send an interrupt or message to another CPU.
* This can only be called in real mode.
* The caller needs to include any barrier needed to order writes
* to memory vs. the IPI/message.
*/
...
...
@@ -229,8 +228,7 @@ void kvmhv_rm_send_ipi(int cpu)
if
(
xics_phys
)
rm_writeb
(
xics_phys
+
XICS_MFRR
,
IPI_PRIORITY
);
else
opal_rm_int_set_mfrr
(
get_hard_smp_processor_id
(
cpu
),
IPI_PRIORITY
);
opal_int_set_mfrr
(
get_hard_smp_processor_id
(
cpu
),
IPI_PRIORITY
);
}
/*
...
...
@@ -412,14 +410,13 @@ static long kvmppc_read_one_intr(bool *again)
/* Now read the interrupt from the ICP */
xics_phys
=
local_paca
->
kvm_hstate
.
xics_phys
;
if
(
!
xics_phys
)
{
/* Use OPAL to read the XIRR */
rc
=
opal_rm_int_get_xirr
(
&
xirr
,
false
);
if
(
rc
<
0
)
return
1
;
}
else
{
rc
=
0
;
if
(
!
xics_phys
)
rc
=
opal_int_get_xirr
(
&
xirr
,
false
);
else
xirr
=
_lwzcix
(
xics_phys
+
XICS_XIRR
);
}
if
(
rc
<
0
)
return
1
;
/*
* Save XIRR for later. Since we get control in reverse endian
...
...
@@ -445,15 +442,16 @@ static long kvmppc_read_one_intr(bool *again)
* If it is an IPI, clear the MFRR and EOI it.
*/
if
(
xisr
==
XICS_IPI
)
{
rc
=
0
;
if
(
xics_phys
)
{
_stbcix
(
xics_phys
+
XICS_MFRR
,
0xff
);
_stwcix
(
xics_phys
+
XICS_XIRR
,
xirr
);
}
else
{
opal_rm_int_set_mfrr
(
hard_smp_processor_id
(),
0xff
);
rc
=
opal_rm_int_eoi
(
h_xirr
);
/* If rc > 0, there is another interrupt pending */
*
again
=
rc
>
0
;
opal_int_set_mfrr
(
hard_smp_processor_id
(),
0xff
);
rc
=
opal_int_eoi
(
h_xirr
);
}
/* If rc > 0, there is another interrupt pending */
*
again
=
rc
>
0
;
/*
* Need to ensure side effects of above stores
...
...
@@ -474,8 +472,8 @@ static long kvmppc_read_one_intr(bool *again)
if
(
xics_phys
)
_stbcix
(
xics_phys
+
XICS_MFRR
,
IPI_PRIORITY
);
else
opal_
rm_
int_set_mfrr
(
hard_smp_processor_id
(),
IPI_PRIORITY
);
opal_int_set_mfrr
(
hard_smp_processor_id
(),
IPI_PRIORITY
);
/* Let side effects complete */
smp_mb
();
return
1
;
...
...
arch/powerpc/kvm/book3s_hv_rm_mmu.c
View file @
da0e7e62
...
...
@@ -43,6 +43,7 @@ static void *real_vmalloc_addr(void *x)
static
int
global_invalidates
(
struct
kvm
*
kvm
,
unsigned
long
flags
)
{
int
global
;
int
cpu
;
/*
* If there is only one vcore, and it's currently running,
...
...
@@ -60,8 +61,14 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags)
/* any other core might now have stale TLB entries... */
smp_wmb
();
cpumask_setall
(
&
kvm
->
arch
.
need_tlb_flush
);
cpumask_clear_cpu
(
local_paca
->
kvm_hstate
.
kvm_vcore
->
pcpu
,
&
kvm
->
arch
.
need_tlb_flush
);
cpu
=
local_paca
->
kvm_hstate
.
kvm_vcore
->
pcpu
;
/*
* On POWER9, threads are independent but the TLB is shared,
* so use the bit for the first thread to represent the core.
*/
if
(
cpu_has_feature
(
CPU_FTR_ARCH_300
))
cpu
=
cpu_first_thread_sibling
(
cpu
);
cpumask_clear_cpu
(
cpu
,
&
kvm
->
arch
.
need_tlb_flush
);
}
return
global
;
...
...
@@ -182,6 +189,8 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
unsigned
long
mmu_seq
;
unsigned
long
rcbits
,
irq_flags
=
0
;
if
(
kvm_is_radix
(
kvm
))
return
H_FUNCTION
;
psize
=
hpte_page_size
(
pteh
,
ptel
);
if
(
!
psize
)
return
H_PARAMETER
;
...
...
@@ -458,6 +467,8 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
struct
revmap_entry
*
rev
;
u64
pte
,
orig_pte
,
pte_r
;
if
(
kvm_is_radix
(
kvm
))
return
H_FUNCTION
;
if
(
pte_index
>=
kvm
->
arch
.
hpt_npte
)
return
H_PARAMETER
;
hpte
=
(
__be64
*
)(
kvm
->
arch
.
hpt_virt
+
(
pte_index
<<
4
));
...
...
@@ -529,6 +540,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
struct
revmap_entry
*
rev
,
*
revs
[
4
];
u64
hp0
,
hp1
;
if
(
kvm_is_radix
(
kvm
))
return
H_FUNCTION
;
global
=
global_invalidates
(
kvm
,
0
);
for
(
i
=
0
;
i
<
4
&&
ret
==
H_SUCCESS
;
)
{
n
=
0
;
...
...
@@ -642,6 +655,8 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned
long
v
,
r
,
rb
,
mask
,
bits
;
u64
pte_v
,
pte_r
;
if
(
kvm_is_radix
(
kvm
))
return
H_FUNCTION
;
if
(
pte_index
>=
kvm
->
arch
.
hpt_npte
)
return
H_PARAMETER
;
...
...
@@ -711,6 +726,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
int
i
,
n
=
1
;
struct
revmap_entry
*
rev
=
NULL
;
if
(
kvm_is_radix
(
kvm
))
return
H_FUNCTION
;
if
(
pte_index
>=
kvm
->
arch
.
hpt_npte
)
return
H_PARAMETER
;
if
(
flags
&
H_READ_4
)
{
...
...
@@ -750,6 +767,8 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned
long
*
rmap
;
long
ret
=
H_NOT_FOUND
;
if
(
kvm_is_radix
(
kvm
))
return
H_FUNCTION
;
if
(
pte_index
>=
kvm
->
arch
.
hpt_npte
)
return
H_PARAMETER
;
...
...
@@ -796,6 +815,8 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned
long
*
rmap
;
long
ret
=
H_NOT_FOUND
;
if
(
kvm_is_radix
(
kvm
))
return
H_FUNCTION
;
if
(
pte_index
>=
kvm
->
arch
.
hpt_npte
)
return
H_PARAMETER
;
...
...
arch/powerpc/kvm/book3s_hv_rm_xics.c
View file @
da0e7e62
...
...
@@ -36,7 +36,7 @@ EXPORT_SYMBOL(kvm_irq_bypass);
static
void
icp_rm_deliver_irq
(
struct
kvmppc_xics
*
xics
,
struct
kvmppc_icp
*
icp
,
u32
new_irq
);
static
int
xics_opal_
rm_
set_server
(
unsigned
int
hw_irq
,
int
server_cpu
);
static
int
xics_opal_set_server
(
unsigned
int
hw_irq
,
int
server_cpu
);
/* -- ICS routines -- */
static
void
ics_rm_check_resend
(
struct
kvmppc_xics
*
xics
,
...
...
@@ -70,11 +70,9 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
hcpu
=
hcore
<<
threads_shift
;
kvmppc_host_rm_ops_hv
->
rm_core
[
hcore
].
rm_data
=
vcpu
;
smp_muxed_ipi_set_message
(
hcpu
,
PPC_MSG_RM_HOST_ACTION
);
if
(
paca
[
hcpu
].
kvm_hstate
.
xics_phys
)
icp_native_cause_ipi_rm
(
hcpu
);
else
opal_rm_int_set_mfrr
(
get_hard_smp_processor_id
(
hcpu
),
IPI_PRIORITY
);
kvmppc_set_host_ipi
(
hcpu
,
1
);
smp_mb
();
kvmhv_rm_send_ipi
(
hcpu
);
}
#else
static
inline
void
icp_send_hcore_msg
(
int
hcore
,
struct
kvm_vcpu
*
vcpu
)
{
}
...
...
@@ -730,7 +728,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
++
vcpu
->
stat
.
pthru_host
;
if
(
state
->
intr_cpu
!=
pcpu
)
{
++
vcpu
->
stat
.
pthru_bad_aff
;
xics_opal_
rm_
set_server
(
state
->
host_irq
,
pcpu
);
xics_opal_set_server
(
state
->
host_irq
,
pcpu
);
}
state
->
intr_cpu
=
-
1
;
}
...
...
@@ -758,16 +756,16 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
if
(
xics_phys
)
{
_stwcix
(
xics_phys
+
XICS_XIRR
,
xirr
);
}
else
{
rc
=
opal_
rm_
int_eoi
(
be32_to_cpu
(
xirr
));
rc
=
opal_int_eoi
(
be32_to_cpu
(
xirr
));
*
again
=
rc
>
0
;
}
}
static
int
xics_opal_
rm_
set_server
(
unsigned
int
hw_irq
,
int
server_cpu
)
static
int
xics_opal_set_server
(
unsigned
int
hw_irq
,
int
server_cpu
)
{
unsigned
int
mangle_cpu
=
get_hard_smp_processor_id
(
server_cpu
)
<<
2
;
return
opal_
rm_
set_xive
(
hw_irq
,
mangle_cpu
,
DEFAULT_PRIORITY
);
return
opal_set_xive
(
hw_irq
,
mangle_cpu
,
DEFAULT_PRIORITY
);
}
/*
...
...
arch/powerpc/kvm/book3s_hv_rmhandlers.S
View file @
da0e7e62
...
...
@@ -148,6 +148,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
addi
r1
,
r1
,
112
ld
r7
,
HSTATE_HOST_MSR
(
r13
)
/
*
*
If
we
came
back
from
the
guest
via
a
relocation
-
on
interrupt
,
*
we
will
be
in
virtual
mode
at
this
point
,
which
makes
it
a
*
little
easier
to
get
back
to
the
caller
.
*/
mfmsr
r0
andi
.
r0
,
r0
,
MSR_IR
/*
in
real
mode
?
*/
bne
.
Lvirt_return
cmpwi
cr1
,
r12
,
BOOK3S_INTERRUPT_MACHINE_CHECK
cmpwi
r12
,
BOOK3S_INTERRUPT_EXTERNAL
beq
11
f
...
...
@@ -181,6 +190,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mtspr
SPRN_HSRR1
,
r7
ba
0xe80
/
*
Virtual
-
mode
return
-
can
't get here for HMI or machine check */
.
Lvirt_return
:
cmpwi
r12
,
BOOK3S_INTERRUPT_EXTERNAL
beq
16
f
cmpwi
r12
,
BOOK3S_INTERRUPT_H_DOORBELL
beq
17
f
andi
.
r0
,
r7
,
MSR_EE
/*
were
interrupts
hard
-
enabled
?
*/
beq
18
f
mtmsrd
r7
,
1
/*
if
so
then
re
-
enable
them
*/
18
:
mtlr
r8
blr
16
:
mtspr
SPRN_HSRR0
,
r8
/*
jump
to
reloc
-
on
external
vector
*/
mtspr
SPRN_HSRR1
,
r7
b
exc_virt_0x4500_hardware_interrupt
17
:
mtspr
SPRN_HSRR0
,
r8
mtspr
SPRN_HSRR1
,
r7
b
exc_virt_0x4e80_h_doorbell
kvmppc_primary_no_guest
:
/
*
We
handle
this
much
like
a
ceded
vcpu
*/
/
*
put
the
HDEC
into
the
DEC
,
since
HDEC
interrupts
don
't wake us */
...
...
@@ -518,6 +547,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/*
Stack
frame
offsets
*/
#define STACK_SLOT_TID (112-16)
#define STACK_SLOT_PSSCR (112-24)
#define STACK_SLOT_PID (112-32)
.
global
kvmppc_hv_entry
kvmppc_hv_entry
:
...
...
@@ -530,6 +560,7 @@ kvmppc_hv_entry:
*
R1
=
host
R1
*
R2
=
TOC
*
all
other
volatile
GPRS
=
free
*
Does
not
preserve
non
-
volatile
GPRs
or
CR
fields
*/
mflr
r0
std
r0
,
PPC_LR_STKOFF
(
r1
)
...
...
@@ -549,32 +580,38 @@ kvmppc_hv_entry:
bl
kvmhv_start_timing
1
:
#endif
/
*
Clear
out
SLB
*/
/
*
Use
cr7
as
an
indication
of
radix
mode
*/
ld
r5
,
HSTATE_KVM_VCORE
(
r13
)
ld
r9
,
VCORE_KVM
(
r5
)
/*
pointer
to
struct
kvm
*/
lbz
r0
,
KVM_RADIX
(
r9
)
cmpwi
cr7
,
r0
,
0
/
*
Clear
out
SLB
if
hash
*/
bne
cr7
,
2
f
li
r6
,
0
slbmte
r6
,
r6
slbia
ptesync
2
:
/
*
*
POWER7
/
POWER8
host
->
guest
partition
switch
code
.
*
We
don
't have to lock against concurrent tlbies,
*
but
we
do
have
to
coordinate
across
hardware
threads
.
*/
/
*
Set
bit
in
entry
map
iff
exit
map
is
zero
.
*/
ld
r5
,
HSTATE_KVM_VCORE
(
r13
)
li
r7
,
1
lbz
r6
,
HSTATE_PTID
(
r13
)
sld
r7
,
r7
,
r6
addi
r
9
,
r5
,
VCORE_ENTRY_EXIT
21
:
lwarx
r3
,
0
,
r
9
addi
r
8
,
r5
,
VCORE_ENTRY_EXIT
21
:
lwarx
r3
,
0
,
r
8
cmpwi
r3
,
0x100
/*
any
threads
starting
to
exit
?
*/
bge
secondary_too_late
/*
if
so
we
're too late to the party */
or
r3
,
r3
,
r7
stwcx
.
r3
,
0
,
r
9
stwcx
.
r3
,
0
,
r
8
bne
21
b
/
*
Primary
thread
switches
to
guest
partition
.
*/
ld
r9
,
VCORE_KVM
(
r5
)
/*
pointer
to
struct
kvm
*/
cmpwi
r6
,
0
bne
10
f
lwz
r7
,
KVM_LPID
(
r9
)
...
...
@@ -590,30 +627,44 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/
*
See
if
we
need
to
flush
the
TLB
*/
lhz
r6
,
PACAPACAINDEX
(
r13
)
/*
test_bit
(
cpu
,
need_tlb_flush
)
*/
BEGIN_FTR_SECTION
/
*
*
On
POWER9
,
individual
threads
can
come
in
here
,
but
the
*
TLB
is
shared
between
the
4
threads
in
a
core
,
hence
*
invalidating
on
one
thread
invalidates
for
all
.
*
Thus
we
make
all
4
threads
use
the
same
bit
here
.
*/
clrrdi
r6
,
r6
,
2
END_FTR_SECTION_IFSET
(
CPU_FTR_ARCH_300
)
clrldi
r7
,
r6
,
64
-
6
/*
extract
bit
number
(
6
bits
)
*/
srdi
r6
,
r6
,
6
/*
doubleword
number
*/
sldi
r6
,
r6
,
3
/*
address
offset
*/
add
r6
,
r6
,
r9
addi
r6
,
r6
,
KVM_NEED_FLUSH
/*
dword
in
kvm
->
arch
.
need_tlb_flush
*/
li
r
0
,
1
sld
r
0
,
r0
,
r7
li
r
8
,
1
sld
r
8
,
r8
,
r7
ld
r7
,
0
(
r6
)
and
.
r7
,
r7
,
r
0
and
.
r7
,
r7
,
r
8
beq
22
f
23
:
ldarx
r7
,
0
,
r6
/*
if
set
,
clear
the
bit
*/
andc
r7
,
r7
,
r0
stdcx
.
r7
,
0
,
r6
bne
23
b
/
*
Flush
the
TLB
of
any
entries
for
this
LPID
*/
lwz
r6
,
KVM_TLB_SETS
(
r9
)
li
r0
,
0
/*
RS
for
P9
version
of
tlbiel
*/
mtctr
r6
lwz
r0
,
KVM_TLB_SETS
(
r9
)
mtctr
r0
li
r7
,
0x800
/*
IS
field
=
0
b10
*/
ptesync
28
:
tlbiel
r7
li
r0
,
0
/*
RS
for
P9
version
of
tlbiel
*/
bne
cr7
,
29
f
28
:
tlbiel
r7
/*
On
P9
,
rs
=
0
,
RIC
=
0
,
PRS
=
0
,
R
=
0
*/
addi
r7
,
r7
,
0x1000
bdnz
28
b
ptesync
b
30
f
29
:
PPC_TLBIEL
(7,0,2,1,1)
/*
for
radix
,
RIC
=
2
,
PRS
=
1
,
R
=
1
*/
addi
r7
,
r7
,
0x1000
bdnz
29
b
30
:
ptesync
23
:
ldarx
r7
,
0
,
r6
/*
clear
the
bit
after
TLB
flushed
*/
andc
r7
,
r7
,
r8
stdcx
.
r7
,
0
,
r6
bne
23
b
/
*
Add
timebase
offset
onto
timebase
*/
22
:
ld
r8
,
VCORE_TB_OFFSET
(
r5
)
...
...
@@ -658,7 +709,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
beq
kvmppc_primary_no_guest
kvmppc_got_guest
:
/
*
Load
up
guest
SLB
entries
*/
/
*
Load
up
guest
SLB
entries
(
N
.
B
.
slb_max
will
be
0
for
radix
)
*/
lwz
r5
,
VCPU_SLB_MAX
(
r4
)
cmpwi
r5
,
0
beq
9
f
...
...
@@ -696,8 +747,10 @@ kvmppc_got_guest:
BEGIN_FTR_SECTION
mfspr
r5
,
SPRN_TIDR
mfspr
r6
,
SPRN_PSSCR
mfspr
r7
,
SPRN_PID
std
r5
,
STACK_SLOT_TID
(
r1
)
std
r6
,
STACK_SLOT_PSSCR
(
r1
)
std
r7
,
STACK_SLOT_PID
(
r1
)
END_FTR_SECTION_IFSET
(
CPU_FTR_ARCH_300
)
BEGIN_FTR_SECTION
...
...
@@ -823,6 +876,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr
SPRN_BESCR
,
r6
mtspr
SPRN_PID
,
r7
mtspr
SPRN_WORT
,
r8
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
END_FTR_SECTION_IFSET
(
CPU_FTR_POWER9_DD1
)
BEGIN_FTR_SECTION
/
*
POWER8
-
only
registers
*/
ld
r5
,
VCPU_TCSCR
(
r4
)
...
...
@@ -1057,13 +1113,13 @@ hdec_soon:
kvmppc_interrupt_hv
:
/
*
*
Register
contents
:
*
R12
=
interrupt
vector
*
R12
=
(
guest
CR
<<
32
)
|
interrupt
vector
*
R13
=
PACA
*
guest
CR
,
R12
saved
in
shadow
VCPU
SCRATCH1
/
0
*
guest
R12
saved
in
shadow
VCPU
SCRATCH0
*
guest
CTR
saved
in
shadow
VCPU
SCRATCH1
if
RELOCATABLE
*
guest
R13
saved
in
SPRN_SCRATCH0
*/
std
r9
,
HSTATE_SCRATCH2
(
r13
)
lbz
r9
,
HSTATE_IN_GUEST
(
r13
)
cmpwi
r9
,
KVM_GUEST_MODE_HOST_HV
beq
kvmppc_bad_host_intr
...
...
@@ -1094,8 +1150,9 @@ kvmppc_interrupt_hv:
std
r10
,
VCPU_GPR
(
R10
)(
r9
)
std
r11
,
VCPU_GPR
(
R11
)(
r9
)
ld
r3
,
HSTATE_SCRATCH0
(
r13
)
lwz
r4
,
HSTATE_SCRATCH1
(
r13
)
std
r3
,
VCPU_GPR
(
R12
)(
r9
)
/
*
CR
is
in
the
high
half
of
r12
*/
srdi
r4
,
r12
,
32
stw
r4
,
VCPU_CR
(
r9
)
BEGIN_FTR_SECTION
ld
r3
,
HSTATE_CFAR
(
r13
)
...
...
@@ -1114,6 +1171,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
mfspr
r11
,
SPRN_SRR1
std
r10
,
VCPU_SRR0
(
r9
)
std
r11
,
VCPU_SRR1
(
r9
)
/
*
trap
is
in
the
low
half
of
r12
,
clear
CR
from
the
high
half
*/
clrldi
r12
,
r12
,
32
andi
.
r0
,
r12
,
2
/*
need
to
read
HSRR0
/
1
?
*/
beq
1
f
mfspr
r10
,
SPRN_HSRR0
...
...
@@ -1149,7 +1208,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
11
:
stw
r3
,
VCPU_HEIR
(
r9
)
/
*
these
are
volatile
across
C
function
calls
*/
#ifdef CONFIG_RELOCATABLE
ld
r3
,
HSTATE_SCRATCH1
(
r13
)
mtctr
r3
#else
mfctr
r3
#endif
mfxer
r4
std
r3
,
VCPU_CTR
(
r9
)
std
r4
,
VCPU_XER
(
r9
)
...
...
@@ -1285,11 +1349,15 @@ mc_cont:
mtspr
SPRN_CTRLT
,
r6
4
:
/
*
Read
the
guest
SLB
and
save
it
away
*/
ld
r5
,
VCPU_KVM
(
r9
)
lbz
r0
,
KVM_RADIX
(
r5
)
cmpwi
r0
,
0
li
r5
,
0
bne
3
f
/*
for
radix
,
save
0
entries
*/
lwz
r0
,
VCPU_SLB_NR
(
r9
)
/*
number
of
entries
in
SLB
*/
mtctr
r0
li
r6
,
0
addi
r7
,
r9
,
VCPU_SLB
li
r5
,
0
1
:
slbmfee
r8
,
r6
andis
.
r0
,
r8
,
SLB_ESID_V
@
h
beq
2
f
...
...
@@ -1301,7 +1369,7 @@ mc_cont:
addi
r5
,
r5
,
1
2
:
addi
r6
,
r6
,
1
bdnz
1
b
stw
r5
,
VCPU_SLB_MAX
(
r9
)
3
:
stw
r5
,
VCPU_SLB_MAX
(
r9
)
/
*
*
Save
the
guest
PURR
/
SPURR
...
...
@@ -1550,9 +1618,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
ld
r5
,
STACK_SLOT_TID
(
r1
)
ld
r6
,
STACK_SLOT_PSSCR
(
r1
)
ld
r7
,
STACK_SLOT_PID
(
r1
)
mtspr
SPRN_TIDR
,
r5
mtspr
SPRN_PSSCR
,
r6
mtspr
SPRN_PID
,
r7
END_FTR_SECTION_IFSET
(
CPU_FTR_ARCH_300
)
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
END_FTR_SECTION_IFSET
(
CPU_FTR_POWER9_DD1
)
/
*
*
POWER7
/
POWER8
guest
->
host
partition
switch
code
.
...
...
@@ -1663,6 +1736,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
isync
/
*
load
host
SLB
entries
*/
BEGIN_MMU_FTR_SECTION
b
0
f
END_MMU_FTR_SECTION_IFSET
(
MMU_FTR_TYPE_RADIX
)
ld
r8
,
PACA_SLBSHADOWPTR
(
r13
)
.
rept
SLB_NUM_BOLTED
...
...
@@ -1675,7 +1751,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
slbmte
r6
,
r5
1
:
addi
r8
,
r8
,
16
.
endr
0
:
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
/
*
Finish
timing
,
if
we
have
a
vcpu
*/
ld
r4
,
HSTATE_KVM_VCPU
(
r13
)
...
...
@@ -1702,11 +1778,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
*
reflect
the
HDSI
to
the
guest
as
a
DSI
.
*/
kvmppc_hdsi
:
ld
r3
,
VCPU_KVM
(
r9
)
lbz
r0
,
KVM_RADIX
(
r3
)
cmpwi
r0
,
0
mfspr
r4
,
SPRN_HDAR
mfspr
r6
,
SPRN_HDSISR
bne
.
Lradix_hdsi
/*
on
radix
,
just
save
DAR
/
DSISR
/
ASDR
*/
/
*
HPTE
not
found
fault
or
protection
fault
?
*/
andis
.
r0
,
r6
,
(
DSISR_NOHPTE
|
DSISR_PROTFAULT
)
@
h
beq
1
f
/*
if
not
,
send
it
to
the
guest
*/
BEGIN_FTR_SECTION
mfspr
r5
,
SPRN_ASDR
/*
on
POWER9
,
use
ASDR
to
get
VSID
*/
b
4
f
END_FTR_SECTION_IFSET
(
CPU_FTR_ARCH_300
)
andi
.
r0
,
r11
,
MSR_DR
/*
data
relocation
enabled
?
*/
beq
3
f
clrrdi
r0
,
r4
,
28
...
...
@@ -1776,13 +1860,29 @@ fast_interrupt_c_return:
stb
r0
,
HSTATE_IN_GUEST
(
r13
)
b
guest_exit_cont
.
Lradix_hdsi
:
std
r4
,
VCPU_FAULT_DAR
(
r9
)
stw
r6
,
VCPU_FAULT_DSISR
(
r9
)
.
Lradix_hisi
:
mfspr
r5
,
SPRN_ASDR
std
r5
,
VCPU_FAULT_GPA
(
r9
)
b
guest_exit_cont
/*
*
Similarly
for
an
HISI
,
reflect
it
to
the
guest
as
an
ISI
unless
*
it
is
an
HPTE
not
found
fault
for
a
page
that
we
have
paged
out
.
*/
kvmppc_hisi
:
ld
r3
,
VCPU_KVM
(
r9
)
lbz
r0
,
KVM_RADIX
(
r3
)
cmpwi
r0
,
0
bne
.
Lradix_hisi
/*
for
radix
,
just
save
ASDR
*/
andis
.
r0
,
r11
,
SRR1_ISI_NOPT
@
h
beq
1
f
BEGIN_FTR_SECTION
mfspr
r5
,
SPRN_ASDR
/*
on
POWER9
,
use
ASDR
to
get
VSID
*/
b
4
f
END_FTR_SECTION_IFSET
(
CPU_FTR_ARCH_300
)
andi
.
r0
,
r11
,
MSR_IR
/*
instruction
relocation
enabled
?
*/
beq
3
f
clrrdi
r0
,
r10
,
28
...
...
arch/powerpc/kvm/book3s_segment.S
View file @
da0e7e62
...
...
@@ -167,20 +167,38 @@ kvmppc_handler_trampoline_enter_end:
*
*
*****************************************************************************/
.
global
kvmppc_handler_trampoline_exit
kvmppc_handler_trampoline_exit
:
.
global
kvmppc_interrupt_pr
kvmppc_interrupt_pr
:
/
*
64
-
bit
entry
.
Register
usage
at
this
point
:
*
*
SPRG_SCRATCH0
=
guest
R13
*
R12
=
(
guest
CR
<<
32
)
|
exit
handler
id
*
R13
=
PACA
*
HSTATE
.
SCRATCH0
=
guest
R12
*
HSTATE
.
SCRATCH1
=
guest
CTR
if
RELOCATABLE
*/
#ifdef CONFIG_PPC64
/
*
Match
32
-
bit
entry
*/
#ifdef CONFIG_RELOCATABLE
std
r9
,
HSTATE_SCRATCH2
(
r13
)
ld
r9
,
HSTATE_SCRATCH1
(
r13
)
mtctr
r9
ld
r9
,
HSTATE_SCRATCH2
(
r13
)
#endif
rotldi
r12
,
r12
,
32
/*
Flip
R12
halves
for
stw
*/
stw
r12
,
HSTATE_SCRATCH1
(
r13
)
/*
CR
is
now
in
the
low
half
*/
srdi
r12
,
r12
,
32
/*
shift
trap
into
low
half
*/
#endif
.
global
kvmppc_handler_trampoline_exit
kvmppc_handler_trampoline_exit
:
/
*
Register
usage
at
this
point
:
*
*
SPRG_SCRATCH0
=
guest
R13
*
R12
=
exit
handler
id
*
R13
=
shadow
vcpu
(
32
-
bit
)
or
PACA
(
64
-
bit
)
*
SPRG_SCRATCH0
=
guest
R13
*
R12
=
exit
handler
id
*
R13
=
shadow
vcpu
(
32
-
bit
)
or
PACA
(
64
-
bit
)
*
HSTATE
.
SCRATCH0
=
guest
R12
*
HSTATE
.
SCRATCH1
=
guest
CR
*
*/
/
*
Save
registers
*/
...
...
arch/powerpc/kvm/powerpc.c
View file @
da0e7e62
...
...
@@ -565,6 +565,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case
KVM_CAP_PPC_HWRNG
:
r
=
kvmppc_hwrng_present
();
break
;
case
KVM_CAP_PPC_MMU_RADIX
:
r
=
!!
(
hv_enabled
&&
radix_enabled
());
break
;
case
KVM_CAP_PPC_MMU_HASH_V3
:
r
=
!!
(
hv_enabled
&&
!
radix_enabled
()
&&
cpu_has_feature
(
CPU_FTR_ARCH_300
));
break
;
#endif
case
KVM_CAP_SYNC_MMU
:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
...
...
@@ -1468,6 +1475,31 @@ long kvm_arch_vm_ioctl(struct file *filp,
r
=
kvm_vm_ioctl_rtas_define_token
(
kvm
,
argp
);
break
;
}
case
KVM_PPC_CONFIGURE_V3_MMU
:
{
struct
kvm
*
kvm
=
filp
->
private_data
;
struct
kvm_ppc_mmuv3_cfg
cfg
;
r
=
-
EINVAL
;
if
(
!
kvm
->
arch
.
kvm_ops
->
configure_mmu
)
goto
out
;
r
=
-
EFAULT
;
if
(
copy_from_user
(
&
cfg
,
argp
,
sizeof
(
cfg
)))
goto
out
;
r
=
kvm
->
arch
.
kvm_ops
->
configure_mmu
(
kvm
,
&
cfg
);
break
;
}
case
KVM_PPC_GET_RMMU_INFO
:
{
struct
kvm
*
kvm
=
filp
->
private_data
;
struct
kvm_ppc_rmmu_info
info
;
r
=
-
EINVAL
;
if
(
!
kvm
->
arch
.
kvm_ops
->
get_rmmu_info
)
goto
out
;
r
=
kvm
->
arch
.
kvm_ops
->
get_rmmu_info
(
kvm
,
&
info
);
if
(
r
>=
0
&&
copy_to_user
(
argp
,
&
info
,
sizeof
(
info
)))
r
=
-
EFAULT
;
break
;
}
default:
{
struct
kvm
*
kvm
=
filp
->
private_data
;
r
=
kvm
->
arch
.
kvm_ops
->
arch_vm_ioctl
(
filp
,
ioctl
,
arg
);
...
...
arch/powerpc/mm/init-common.c
View file @
da0e7e62
...
...
@@ -41,6 +41,7 @@ static void pmd_ctor(void *addr)
}
struct
kmem_cache
*
pgtable_cache
[
MAX_PGTABLE_INDEX_SIZE
];
EXPORT_SYMBOL_GPL
(
pgtable_cache
);
/* used by kvm_hv module */
/*
* Create a kmem_cache() for pagetables. This is not used for PTE
...
...
@@ -86,7 +87,7 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
pr_debug
(
"Allocated pgtable cache for order %d
\n
"
,
shift
);
}
EXPORT_SYMBOL_GPL
(
pgtable_cache_add
);
/* used by kvm_hv module */
void
pgtable_cache_init
(
void
)
{
...
...
arch/powerpc/mm/init_64.c
View file @
da0e7e62
...
...
@@ -42,6 +42,8 @@
#include <linux/memblock.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
#include <asm/pgalloc.h>
#include <asm/page.h>
...
...
@@ -344,12 +346,45 @@ static int __init parse_disable_radix(char *p)
}
early_param
(
"disable_radix"
,
parse_disable_radix
);
/*
* If we're running under a hypervisor, we need to check the contents of
* /chosen/ibm,architecture-vec-5 to see if the hypervisor is willing to do
* radix. If not, we clear the radix feature bit so we fall back to hash.
*/
static
void
early_check_vec5
(
void
)
{
unsigned
long
root
,
chosen
;
int
size
;
const
u8
*
vec5
;
root
=
of_get_flat_dt_root
();
chosen
=
of_get_flat_dt_subnode_by_name
(
root
,
"chosen"
);
if
(
chosen
==
-
FDT_ERR_NOTFOUND
)
return
;
vec5
=
of_get_flat_dt_prop
(
chosen
,
"ibm,architecture-vec-5"
,
&
size
);
if
(
!
vec5
)
return
;
if
(
size
<=
OV5_INDX
(
OV5_MMU_RADIX_300
)
||
!
(
vec5
[
OV5_INDX
(
OV5_MMU_RADIX_300
)]
&
OV5_FEAT
(
OV5_MMU_RADIX_300
)))
/* Hypervisor doesn't support radix */
cur_cpu_spec
->
mmu_features
&=
~
MMU_FTR_TYPE_RADIX
;
}
void
__init
mmu_early_init_devtree
(
void
)
{
/* Disable radix mode based on kernel command line. */
if
(
disable_radix
)
cur_cpu_spec
->
mmu_features
&=
~
MMU_FTR_TYPE_RADIX
;
/*
* Check /chosen/ibm,architecture-vec-5 if running as a guest.
* When running bare-metal, we can use radix if we like
* even though the ibm,architecture-vec-5 property created by
* skiboot doesn't have the necessary bits set.
*/
if
(
early_radix_enabled
()
&&
!
(
mfmsr
()
&
MSR_HV
))
early_check_vec5
();
if
(
early_radix_enabled
())
radix__early_init_devtree
();
else
...
...
arch/powerpc/mm/pgtable-radix.c
View file @
da0e7e62
...
...
@@ -414,6 +414,8 @@ void __init radix__early_init_mmu(void)
mtspr
(
SPRN_LPCR
,
lpcr
|
LPCR_UPRT
|
LPCR_HR
);
radix_init_partition_table
();
radix_init_amor
();
}
else
{
radix_init_pseries
();
}
memblock_set_current_limit
(
MEMBLOCK_ALLOC_ANYWHERE
);
...
...
arch/powerpc/mm/pgtable_64.c
View file @
da0e7e62
...
...
@@ -458,13 +458,23 @@ void __init mmu_partition_table_init(void)
void
mmu_partition_table_set_entry
(
unsigned
int
lpid
,
unsigned
long
dw0
,
unsigned
long
dw1
)
{
unsigned
long
old
=
be64_to_cpu
(
partition_tb
[
lpid
].
patb0
);
partition_tb
[
lpid
].
patb0
=
cpu_to_be64
(
dw0
);
partition_tb
[
lpid
].
patb1
=
cpu_to_be64
(
dw1
);
/* Global flush of TLBs and partition table caches for this lpid */
/*
* Global flush of TLBs and partition table caches for this lpid.
* The type of flush (hash or radix) depends on what the previous
* use of this partition ID was, not the new use.
*/
asm
volatile
(
"ptesync"
:
:
:
"memory"
);
asm
volatile
(
PPC_TLBIE_5
(
%
0
,
%
1
,
2
,
0
,
0
)
:
:
"r"
(
TLBIEL_INVAL_SET_LPID
),
"r"
(
lpid
));
if
(
old
&
PATB_HR
)
asm
volatile
(
PPC_TLBIE_5
(
%
0
,
%
1
,
2
,
0
,
1
)
:
:
"r"
(
TLBIEL_INVAL_SET_LPID
),
"r"
(
lpid
));
else
asm
volatile
(
PPC_TLBIE_5
(
%
0
,
%
1
,
2
,
0
,
0
)
:
:
"r"
(
TLBIEL_INVAL_SET_LPID
),
"r"
(
lpid
));
asm
volatile
(
"eieio; tlbsync; ptesync"
:
:
:
"memory"
);
}
EXPORT_SYMBOL_GPL
(
mmu_partition_table_set_entry
);
...
...
arch/powerpc/platforms/powernv/opal-wrappers.S
View file @
da0e7e62
...
...
@@ -58,14 +58,16 @@ END_FTR_SECTION(0, 1); \
#define OPAL_CALL(name, token) \
_GLOBAL_TOC
(
name
)
; \
mfmsr
r12
; \
mflr
r0
; \
andi
.
r11
,
r12
,
MSR_IR
|
MSR_DR
; \
std
r0
,
PPC_LR_STKOFF
(
r1
)
; \
li
r0
,
token
; \
beq
opal_real_call
; \
OPAL_BRANCH
(
opal_tracepoint_entry
)
\
mfcr
r1
2
; \
stw
r1
2
,
8
(
r1
)
; \
mfcr
r1
1
; \
stw
r1
1
,
8
(
r1
)
; \
li
r11
,
0
; \
mfmsr
r12
; \
ori
r11
,
r11
,
MSR_EE
; \
std
r12
,
PACASAVEDMSR
(
r13
)
; \
andc
r12
,
r12
,
r11
; \
...
...
@@ -98,6 +100,30 @@ opal_return:
mtcr
r4
;
rfid
opal_real_call
:
mfcr
r11
stw
r11
,
8
(
r1
)
/
*
Set
opal
return
address
*/
LOAD_REG_ADDR
(
r11
,
opal_return_realmode
)
mtlr
r11
li
r11
,
MSR_LE
andc
r12
,
r12
,
r11
mtspr
SPRN_HSRR1
,
r12
LOAD_REG_ADDR
(
r11
,
opal
)
ld
r12
,
8
(
r11
)
ld
r2
,
0
(
r11
)
mtspr
SPRN_HSRR0
,
r12
hrfid
opal_return_realmode
:
FIXUP_ENDIAN
ld
r2
,
PACATOC
(
r13
)
;
lwz
r11
,
8
(
r1
)
;
ld
r12
,
PPC_LR_STKOFF
(
r1
)
mtcr
r11
;
mtlr
r12
blr
#ifdef CONFIG_TRACEPOINTS
opal_tracepoint_entry
:
stdu
r1
,-
STACKFRAMESIZE
(
r1
)
...
...
@@ -155,36 +181,6 @@ opal_tracepoint_return:
blr
#endif
#define OPAL_CALL_REAL(name, token) \
_GLOBAL_TOC
(
name
)
; \
mflr
r0
; \
std
r0
,
PPC_LR_STKOFF
(
r1
)
; \
li
r0
,
token
; \
mfcr
r12
; \
stw
r12
,
8
(
r1
)
; \
\
/
*
Set
opal
return
address
*/
\
LOAD_REG_ADDR
(
r11
,
opal_return_realmode
)
; \
mtlr
r11
; \
mfmsr
r12
; \
li
r11
,
MSR_LE
; \
andc
r12
,
r12
,
r11
; \
mtspr
SPRN_HSRR1
,
r12
; \
LOAD_REG_ADDR
(
r11
,
opal
)
; \
ld
r12
,
8
(
r11
)
; \
ld
r2
,
0
(
r11
)
; \
mtspr
SPRN_HSRR0
,
r12
; \
hrfid
opal_return_realmode
:
FIXUP_ENDIAN
ld
r2
,
PACATOC
(
r13
)
;
lwz
r11
,
8
(
r1
)
;
ld
r12
,
PPC_LR_STKOFF
(
r1
)
mtcr
r11
;
mtlr
r12
blr
OPAL_CALL
(
opal_invalid_call
,
OPAL_INVALID_CALL
)
;
OPAL_CALL
(
opal_console_write
,
OPAL_CONSOLE_WRITE
)
;
...
...
@@ -208,7 +204,6 @@ OPAL_CALL(opal_pci_config_write_byte, OPAL_PCI_CONFIG_WRITE_BYTE);
OPAL_CALL
(
opal_pci_config_write_half_word
,
OPAL_PCI_CONFIG_WRITE_HALF_WORD
)
;
OPAL_CALL
(
opal_pci_config_write_word
,
OPAL_PCI_CONFIG_WRITE_WORD
)
;
OPAL_CALL
(
opal_set_xive
,
OPAL_SET_XIVE
)
;
OPAL_CALL_REAL
(
opal_rm_set_xive
,
OPAL_SET_XIVE
)
;
OPAL_CALL
(
opal_get_xive
,
OPAL_GET_XIVE
)
;
OPAL_CALL
(
opal_register_exception_handler
,
OPAL_REGISTER_OPAL_EXCEPTION_HANDLER
)
;
OPAL_CALL
(
opal_pci_eeh_freeze_status
,
OPAL_PCI_EEH_FREEZE_STATUS
)
;
...
...
@@ -264,7 +259,6 @@ OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE);
OPAL_CALL
(
opal_manage_flash
,
OPAL_FLASH_MANAGE
)
;
OPAL_CALL
(
opal_update_flash
,
OPAL_FLASH_UPDATE
)
;
OPAL_CALL
(
opal_resync_timebase
,
OPAL_RESYNC_TIMEBASE
)
;
OPAL_CALL_REAL
(
opal_rm_resync_timebase
,
OPAL_RESYNC_TIMEBASE
)
;
OPAL_CALL
(
opal_check_token
,
OPAL_CHECK_TOKEN
)
;
OPAL_CALL
(
opal_dump_init
,
OPAL_DUMP_INIT
)
;
OPAL_CALL
(
opal_dump_info
,
OPAL_DUMP_INFO
)
;
...
...
@@ -280,9 +274,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
OPAL_CALL
(
opal_get_param
,
OPAL_GET_PARAM
)
;
OPAL_CALL
(
opal_set_param
,
OPAL_SET_PARAM
)
;
OPAL_CALL
(
opal_handle_hmi
,
OPAL_HANDLE_HMI
)
;
OPAL_CALL_REAL
(
opal_rm_handle_hmi
,
OPAL_HANDLE_HMI
)
;
OPAL_CALL
(
opal_config_cpu_idle_state
,
OPAL_CONFIG_CPU_IDLE_STATE
)
;
OPAL_CALL_REAL
(
opal_rm_config_cpu_idle_state
,
OPAL_CONFIG_CPU_IDLE_STATE
)
;
OPAL_CALL
(
opal_slw_set_reg
,
OPAL_SLW_SET_REG
)
;
OPAL_CALL
(
opal_register_dump_region
,
OPAL_REGISTER_DUMP_REGION
)
;
OPAL_CALL
(
opal_unregister_dump_region
,
OPAL_UNREGISTER_DUMP_REGION
)
;
...
...
@@ -304,12 +296,8 @@ OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE);
OPAL_CALL
(
opal_pci_get_power_state
,
OPAL_PCI_GET_POWER_STATE
)
;
OPAL_CALL
(
opal_pci_set_power_state
,
OPAL_PCI_SET_POWER_STATE
)
;
OPAL_CALL
(
opal_int_get_xirr
,
OPAL_INT_GET_XIRR
)
;
OPAL_CALL_REAL
(
opal_rm_int_get_xirr
,
OPAL_INT_GET_XIRR
)
;
OPAL_CALL
(
opal_int_set_cppr
,
OPAL_INT_SET_CPPR
)
;
OPAL_CALL
(
opal_int_eoi
,
OPAL_INT_EOI
)
;
OPAL_CALL_REAL
(
opal_rm_int_eoi
,
OPAL_INT_EOI
)
;
OPAL_CALL
(
opal_int_set_mfrr
,
OPAL_INT_SET_MFRR
)
;
OPAL_CALL_REAL
(
opal_rm_int_set_mfrr
,
OPAL_INT_SET_MFRR
)
;
OPAL_CALL
(
opal_pci_tce_kill
,
OPAL_PCI_TCE_KILL
)
;
OPAL_CALL
(
opal_nmmu_set_ptcr
,
OPAL_NMMU_SET_PTCR
)
;
OPAL_CALL_REAL
(
opal_rm_pci_tce_kill
,
OPAL_PCI_TCE_KILL
)
;
arch/powerpc/platforms/powernv/pci-ioda.c
View file @
da0e7e62
...
...
@@ -1970,11 +1970,6 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
if
(
phb
->
model
==
PNV_PHB_MODEL_PHB3
&&
phb
->
regs
)
pnv_pci_phb3_tce_invalidate
(
pe
,
rm
,
shift
,
index
,
npages
);
else
if
(
rm
)
opal_rm_pci_tce_kill
(
phb
->
opal_id
,
OPAL_PCI_TCE_KILL_PAGES
,
pe
->
pe_number
,
1u
<<
shift
,
index
<<
shift
,
npages
);
else
opal_pci_tce_kill
(
phb
->
opal_id
,
OPAL_PCI_TCE_KILL_PAGES
,
...
...
arch/powerpc/platforms/pseries/firmware.c
View file @
da0e7e62
...
...
@@ -127,7 +127,7 @@ static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)
index
=
OV5_INDX
(
vec5_fw_features_table
[
i
].
feature
);
feat
=
OV5_FEAT
(
vec5_fw_features_table
[
i
].
feature
);
if
(
vec5
[
index
]
&
feat
)
if
(
index
<
len
&&
(
vec5
[
index
]
&
feat
)
)
powerpc_firmware_features
|=
vec5_fw_features_table
[
i
].
val
;
}
...
...
arch/powerpc/platforms/pseries/lpar.c
View file @
da0e7e62
...
...
@@ -717,6 +717,29 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
return
0
;
}
/* Actually only used for radix, so far */
static
int
pseries_lpar_register_process_table
(
unsigned
long
base
,
unsigned
long
page_size
,
unsigned
long
table_size
)
{
long
rc
;
unsigned
long
flags
=
PROC_TABLE_NEW
;
if
(
radix_enabled
())
flags
|=
PROC_TABLE_RADIX
|
PROC_TABLE_GTSE
;
for
(;;)
{
rc
=
plpar_hcall_norets
(
H_REGISTER_PROC_TBL
,
flags
,
base
,
page_size
,
table_size
);
if
(
!
H_IS_LONG_BUSY
(
rc
))
break
;
mdelay
(
get_longbusy_msecs
(
rc
));
}
if
(
rc
!=
H_SUCCESS
)
{
pr_err
(
"Failed to register process table (rc=%ld)
\n
"
,
rc
);
BUG
();
}
return
rc
;
}
void
__init
hpte_init_pseries
(
void
)
{
mmu_hash_ops
.
hpte_invalidate
=
pSeries_lpar_hpte_invalidate
;
...
...
@@ -731,6 +754,12 @@ void __init hpte_init_pseries(void)
mmu_hash_ops
.
resize_hpt
=
pseries_lpar_resize_hpt
;
}
void
radix_init_pseries
(
void
)
{
pr_info
(
"Using radix MMU under hypervisor
\n
"
);
register_process_table
=
pseries_lpar_register_process_table
;
}
#ifdef CONFIG_PPC_SMLPAR
#define CMO_FREE_HINT_DEFAULT 1
static
int
cmo_free_hint_flag
=
CMO_FREE_HINT_DEFAULT
;
...
...
include/uapi/linux/kvm.h
View file @
da0e7e62
...
...
@@ -871,6 +871,8 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_S390_USER_INSTR0 130
#define KVM_CAP_MSI_DEVID 131
#define KVM_CAP_PPC_HTM 132
#define KVM_CAP_PPC_MMU_RADIX 134
#define KVM_CAP_PPC_MMU_HASH_V3 135
#ifdef KVM_CAP_IRQ_ROUTING
...
...
@@ -1187,6 +1189,10 @@ struct kvm_s390_ucas_mapping {
#define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr)
/* Available with KVM_CAP_PPC_RTAS */
#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args)
/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */
#define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg)
/* Available with KVM_CAP_PPC_RADIX_MMU */
#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info)
/* ioctl for vm fd */
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment