Commit 63b5cf04 authored by Marcelo Tosatti's avatar Marcelo Tosatti

Merge tag 'kvm-s390-20140422' of...

Merge tag 'kvm-s390-20140422' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into queue

Lazy storage key handling
-------------------------
Linux does not use the ACC and F bits of the storage key. Newer Linux
versions also do not use the storage keys for dirty and reference
tracking. We can optimize the guest handling for those guests for faults
as well as page-in and page-out by simply not caring about the guest
visible storage key. We trap guest storage key instruction to enable
those keys only on demand.

Migration bitmap

Until now s390 never provided a proper dirty bitmap.  Let's provide a
proper migration bitmap for s390. We also change the user dirty tracking
to a fault based mechanism. This makes the host completely independent
from the storage keys. Long term this will allow us to back guest memory
with large pages.

per-VM device attributes
------------------------
To avoid the introduction of new ioctls, let's provide the
attribute semanantic also on the VM-"device".

Userspace controlled CMMA
-------------------------
The CMMA assist is changed from "always on" to "on if requested" via
per-VM device attributes. In addition a callback to reset all usage
states is provided.

Proper guest DAT handling for intercepts
----------------------------------------
While instructions handled by SIE take care of all addressing aspects,
KVM/s390 currently does not care about guest address translation of
intercepts. This worked out fine, because
- the s390 Linux kernel has a 1:1 mapping between kernel virtual<->real
 for all pages up to memory size
- intercepts happen only for a small amount of cases
- all of these intercepts happen to be in the kernel text for current
  distros

Of course we need to be better for other intercepts, kernel modules etc.
We provide the infrastructure and rework all in-kernel intercepts to work
on logical addresses (paging etc) instead of real ones. The code has
been running internally for several months now, so it is time for going
public.

GDB support
-----------
We provide breakpoints, single stepping and watchpoints.

Fixes/Cleanups
--------------
- Improve program check delivery
- Factor out the handling of transactional memory  on program checks
- Use the existing define __LC_PGM_TDB
- Several cleanups in the lowcore structure
- Documentation

NOTES
-----
- All patches touching base s390 are either ACKed or written by the s390
  maintainers
- One base KVM patch "KVM: add kvm_is_error_gpa() helper"
- One patch introduces the notion of VM device attributes
Signed-off-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>

Conflicts:
	include/uapi/linux/kvm.h
parents 5c7411e2 e325fe69
......@@ -2314,8 +2314,8 @@ struct kvm_create_device {
4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
Capability: KVM_CAP_DEVICE_CTRL
Type: device ioctl
Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device
Type: device ioctl, vm ioctl
Parameters: struct kvm_device_attr
Returns: 0 on success, -1 on error
Errors:
......@@ -2340,8 +2340,8 @@ struct kvm_device_attr {
4.81 KVM_HAS_DEVICE_ATTR
Capability: KVM_CAP_DEVICE_CTRL
Type: device ioctl
Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device
Type: device ioctl, vm ioctl
Parameters: struct kvm_device_attr
Returns: 0 on success, -1 on error
Errors:
......
Generic vm interface
====================================
The virtual machine "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same
struct kvm_device_attr as other devices, but targets VM-wide settings
and controls.
The groups and attributes per virtual machine, if any, are architecture
specific.
1. GROUP: KVM_S390_VM_MEM_CTRL
Architectures: s390
1.1. ATTRIBUTE: KVM_S390_VM_MEM_CTRL
Parameters: none
Returns: -EBUSY if already a vcpus is defined, otherwise 0
Enables CMMA for the virtual machine
1.2. ATTRIBUTE: KVM_S390_VM_CLR_CMMA
Parameteres: none
Returns: 0
Clear the CMMA status for all guest pages, so any pages the guest marked
as unused are again used any may not be reclaimed by the host.
......@@ -78,3 +78,5 @@ DIAGNOSE function code 'X'501 - KVM breakpoint
If the function code specifies 0x501, breakpoint functions may be performed.
This function code is handled by userspace.
This diagnose function code has no subfunctions and uses no parameters.
......@@ -57,6 +57,20 @@ static inline void __ctl_clear_bit(unsigned int cr, unsigned int bit)
void smp_ctl_set_bit(int cr, int bit);
void smp_ctl_clear_bit(int cr, int bit);
union ctlreg0 {
unsigned long val;
struct {
#ifdef CONFIG_64BIT
unsigned long : 32;
#endif
unsigned long : 3;
unsigned long lap : 1; /* Low-address-protection control */
unsigned long : 4;
unsigned long edat : 1; /* Enhanced-DAT-enablement control */
unsigned long : 23;
};
};
#ifdef CONFIG_SMP
# define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
# define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
......
......@@ -39,9 +39,17 @@ struct sca_entry {
__u64 reserved2[2];
} __attribute__((packed));
union ipte_control {
unsigned long val;
struct {
unsigned long k : 1;
unsigned long kh : 31;
unsigned long kg : 32;
};
};
struct sca_block {
__u64 ipte_control;
union ipte_control ipte_control;
__u64 reserved[5];
__u64 mcn;
__u64 reserved2;
......@@ -85,12 +93,26 @@ struct kvm_s390_sie_block {
__u8 reserved40[4]; /* 0x0040 */
#define LCTL_CR0 0x8000
#define LCTL_CR6 0x0200
#define LCTL_CR9 0x0040
#define LCTL_CR10 0x0020
#define LCTL_CR11 0x0010
#define LCTL_CR14 0x0002
__u16 lctl; /* 0x0044 */
__s16 icpua; /* 0x0046 */
#define ICTL_LPSW 0x00400000
#define ICTL_PINT 0x20000000
#define ICTL_LPSW 0x00400000
#define ICTL_STCTL 0x00040000
#define ICTL_ISKE 0x00004000
#define ICTL_SSKE 0x00002000
#define ICTL_RRBE 0x00001000
__u32 ictl; /* 0x0048 */
__u32 eca; /* 0x004c */
#define ICPT_INST 0x04
#define ICPT_PROGI 0x08
#define ICPT_INSTPROGI 0x0C
#define ICPT_OPEREXC 0x2C
#define ICPT_PARTEXEC 0x38
#define ICPT_IOINST 0x40
__u8 icptcode; /* 0x0050 */
__u8 reserved51; /* 0x0051 */
__u16 ihcpu; /* 0x0052 */
......@@ -109,9 +131,21 @@ struct kvm_s390_sie_block {
psw_t gpsw; /* 0x0090 */
__u64 gg14; /* 0x00a0 */
__u64 gg15; /* 0x00a8 */
__u8 reservedb0[30]; /* 0x00b0 */
__u16 iprcc; /* 0x00ce */
__u8 reservedd0[48]; /* 0x00d0 */
__u8 reservedb0[28]; /* 0x00b0 */
__u16 pgmilc; /* 0x00cc */
__u16 iprcc; /* 0x00ce */
__u32 dxc; /* 0x00d0 */
__u16 mcn; /* 0x00d4 */
__u8 perc; /* 0x00d6 */
__u8 peratmid; /* 0x00d7 */
__u64 peraddr; /* 0x00d8 */
__u8 eai; /* 0x00e0 */
__u8 peraid; /* 0x00e1 */
__u8 oai; /* 0x00e2 */
__u8 armid; /* 0x00e3 */
__u8 reservede4[4]; /* 0x00e4 */
__u64 tecmc; /* 0x00e8 */
__u8 reservedf0[16]; /* 0x00f0 */
__u64 gcr[16]; /* 0x0100 */
__u64 gbea; /* 0x0180 */
__u8 reserved188[24]; /* 0x0188 */
......@@ -146,6 +180,8 @@ struct kvm_vcpu_stat {
u32 exit_instruction;
u32 instruction_lctl;
u32 instruction_lctlg;
u32 instruction_stctl;
u32 instruction_stctg;
u32 exit_program_interruption;
u32 exit_instr_and_program;
u32 deliver_external_call;
......@@ -164,6 +200,7 @@ struct kvm_vcpu_stat {
u32 instruction_stpx;
u32 instruction_stap;
u32 instruction_storage_key;
u32 instruction_ipte_interlock;
u32 instruction_stsch;
u32 instruction_chsc;
u32 instruction_stsi;
......@@ -183,13 +220,58 @@ struct kvm_vcpu_stat {
u32 diagnose_9c;
};
#define PGM_OPERATION 0x01
#define PGM_PRIVILEGED_OP 0x02
#define PGM_EXECUTE 0x03
#define PGM_PROTECTION 0x04
#define PGM_ADDRESSING 0x05
#define PGM_SPECIFICATION 0x06
#define PGM_DATA 0x07
#define PGM_OPERATION 0x01
#define PGM_PRIVILEGED_OP 0x02
#define PGM_EXECUTE 0x03
#define PGM_PROTECTION 0x04
#define PGM_ADDRESSING 0x05
#define PGM_SPECIFICATION 0x06
#define PGM_DATA 0x07
#define PGM_FIXED_POINT_OVERFLOW 0x08
#define PGM_FIXED_POINT_DIVIDE 0x09
#define PGM_DECIMAL_OVERFLOW 0x0a
#define PGM_DECIMAL_DIVIDE 0x0b
#define PGM_HFP_EXPONENT_OVERFLOW 0x0c
#define PGM_HFP_EXPONENT_UNDERFLOW 0x0d
#define PGM_HFP_SIGNIFICANCE 0x0e
#define PGM_HFP_DIVIDE 0x0f
#define PGM_SEGMENT_TRANSLATION 0x10
#define PGM_PAGE_TRANSLATION 0x11
#define PGM_TRANSLATION_SPEC 0x12
#define PGM_SPECIAL_OPERATION 0x13
#define PGM_OPERAND 0x15
#define PGM_TRACE_TABEL 0x16
#define PGM_SPACE_SWITCH 0x1c
#define PGM_HFP_SQUARE_ROOT 0x1d
#define PGM_PC_TRANSLATION_SPEC 0x1f
#define PGM_AFX_TRANSLATION 0x20
#define PGM_ASX_TRANSLATION 0x21
#define PGM_LX_TRANSLATION 0x22
#define PGM_EX_TRANSLATION 0x23
#define PGM_PRIMARY_AUTHORITY 0x24
#define PGM_SECONDARY_AUTHORITY 0x25
#define PGM_LFX_TRANSLATION 0x26
#define PGM_LSX_TRANSLATION 0x27
#define PGM_ALET_SPECIFICATION 0x28
#define PGM_ALEN_TRANSLATION 0x29
#define PGM_ALE_SEQUENCE 0x2a
#define PGM_ASTE_VALIDITY 0x2b
#define PGM_ASTE_SEQUENCE 0x2c
#define PGM_EXTENDED_AUTHORITY 0x2d
#define PGM_LSTE_SEQUENCE 0x2e
#define PGM_ASTE_INSTANCE 0x2f
#define PGM_STACK_FULL 0x30
#define PGM_STACK_EMPTY 0x31
#define PGM_STACK_SPECIFICATION 0x32
#define PGM_STACK_TYPE 0x33
#define PGM_STACK_OPERATION 0x34
#define PGM_ASCE_TYPE 0x38
#define PGM_REGION_FIRST_TRANS 0x39
#define PGM_REGION_SECOND_TRANS 0x3a
#define PGM_REGION_THIRD_TRANS 0x3b
#define PGM_MONITOR 0x40
#define PGM_PER 0x80
#define PGM_CRYPTO_OPERATION 0x119
struct kvm_s390_interrupt_info {
struct list_head list;
......@@ -229,6 +311,45 @@ struct kvm_s390_float_interrupt {
unsigned int irq_count;
};
struct kvm_hw_wp_info_arch {
unsigned long addr;
unsigned long phys_addr;
int len;
char *old_data;
};
struct kvm_hw_bp_info_arch {
unsigned long addr;
int len;
};
/*
* Only the upper 16 bits of kvm_guest_debug->control are arch specific.
* Further KVM_GUESTDBG flags which an be used from userspace can be found in
* arch/s390/include/uapi/asm/kvm.h
*/
#define KVM_GUESTDBG_EXIT_PENDING 0x10000000
#define guestdbg_enabled(vcpu) \
(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)
#define guestdbg_sstep_enabled(vcpu) \
(vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
#define guestdbg_hw_bp_enabled(vcpu) \
(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
#define guestdbg_exit_pending(vcpu) (guestdbg_enabled(vcpu) && \
(vcpu->guest_debug & KVM_GUESTDBG_EXIT_PENDING))
struct kvm_guestdbg_info_arch {
unsigned long cr0;
unsigned long cr9;
unsigned long cr10;
unsigned long cr11;
struct kvm_hw_bp_info_arch *hw_bp_info;
struct kvm_hw_wp_info_arch *hw_wp_info;
int nr_hw_bp;
int nr_hw_wp;
unsigned long last_bp;
};
struct kvm_vcpu_arch {
struct kvm_s390_sie_block *sie_block;
......@@ -238,11 +359,13 @@ struct kvm_vcpu_arch {
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
struct tasklet_struct tasklet;
struct kvm_s390_pgm_info pgm;
union {
struct cpuid cpu_id;
u64 stidp_data;
};
struct gmap *gmap;
struct kvm_guestdbg_info_arch guestdbg;
#define KVM_S390_PFAULT_TOKEN_INVALID (-1UL)
unsigned long pfault_token;
unsigned long pfault_select;
......@@ -285,7 +408,9 @@ struct kvm_arch{
struct gmap *gmap;
int css_support;
int use_irqchip;
int use_cmma;
struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
wait_queue_head_t ipte_wq;
};
#define KVM_HVA_ERR_BAD (-1UL)
......
......@@ -56,13 +56,14 @@ struct _lowcore {
__u16 pgm_code; /* 0x008e */
__u32 trans_exc_code; /* 0x0090 */
__u16 mon_class_num; /* 0x0094 */
__u16 per_perc_atmid; /* 0x0096 */
__u8 per_code; /* 0x0096 */
__u8 per_atmid; /* 0x0097 */
__u32 per_address; /* 0x0098 */
__u32 monitor_code; /* 0x009c */
__u8 exc_access_id; /* 0x00a0 */
__u8 per_access_id; /* 0x00a1 */
__u8 op_access_id; /* 0x00a2 */
__u8 ar_access_id; /* 0x00a3 */
__u8 ar_mode_id; /* 0x00a3 */
__u8 pad_0x00a4[0x00b8-0x00a4]; /* 0x00a4 */
__u16 subchannel_id; /* 0x00b8 */
__u16 subchannel_nr; /* 0x00ba */
......@@ -196,12 +197,13 @@ struct _lowcore {
__u16 pgm_code; /* 0x008e */
__u32 data_exc_code; /* 0x0090 */
__u16 mon_class_num; /* 0x0094 */
__u16 per_perc_atmid; /* 0x0096 */
__u8 per_code; /* 0x0096 */
__u8 per_atmid; /* 0x0097 */
__u64 per_address; /* 0x0098 */
__u8 exc_access_id; /* 0x00a0 */
__u8 per_access_id; /* 0x00a1 */
__u8 op_access_id; /* 0x00a2 */
__u8 ar_access_id; /* 0x00a3 */
__u8 ar_mode_id; /* 0x00a3 */
__u8 pad_0x00a4[0x00a8-0x00a4]; /* 0x00a4 */
__u64 trans_exc_code; /* 0x00a8 */
__u64 monitor_code; /* 0x00b0 */
......
......@@ -16,6 +16,8 @@ typedef struct {
unsigned long vdso_base;
/* The mmu context has extended page tables. */
unsigned int has_pgste:1;
/* The mmu context uses storage keys. */
unsigned int use_skey:1;
} mm_context_t;
#define INIT_MM_CONTEXT(name) \
......
......@@ -23,6 +23,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.asce_bits |= _ASCE_TYPE_REGION3;
#endif
mm->context.has_pgste = 0;
mm->context.use_skey = 0;
mm->context.asce_limit = STACK_TOP_MAX;
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
......
......@@ -22,7 +22,8 @@ unsigned long *page_table_alloc(struct mm_struct *, unsigned long);
void page_table_free(struct mm_struct *, unsigned long *);
void page_table_free_rcu(struct mmu_gather *, unsigned long *);
void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long);
void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long,
bool init_skey);
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned long key, bool nq);
......
......@@ -309,7 +309,8 @@ extern unsigned long MODULES_END;
#define PGSTE_HC_BIT 0x00200000UL
#define PGSTE_GR_BIT 0x00040000UL
#define PGSTE_GC_BIT 0x00020000UL
#define PGSTE_IN_BIT 0x00008000UL /* IPTE notify bit */
#define PGSTE_UC_BIT 0x00008000UL /* user dirty (migration) */
#define PGSTE_IN_BIT 0x00004000UL /* IPTE notify bit */
#else /* CONFIG_64BIT */
......@@ -391,7 +392,8 @@ extern unsigned long MODULES_END;
#define PGSTE_HC_BIT 0x0020000000000000UL
#define PGSTE_GR_BIT 0x0004000000000000UL
#define PGSTE_GC_BIT 0x0002000000000000UL
#define PGSTE_IN_BIT 0x0000800000000000UL /* IPTE notify bit */
#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */
#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */
#endif /* CONFIG_64BIT */
......@@ -466,6 +468,16 @@ static inline int mm_has_pgste(struct mm_struct *mm)
#endif
return 0;
}
static inline int mm_use_skey(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
if (mm->context.use_skey)
return 1;
#endif
return 0;
}
/*
* pgd/pmd/pte query functions
*/
......@@ -699,26 +711,17 @@ static inline void pgste_set(pte_t *ptep, pgste_t pgste)
#endif
}
static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste,
struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
unsigned long address, bits, skey;
if (pte_val(*ptep) & _PAGE_INVALID)
if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID)
return pgste;
address = pte_val(*ptep) & PAGE_MASK;
skey = (unsigned long) page_get_storage_key(address);
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
if (!(pgste_val(pgste) & PGSTE_HC_BIT) && (bits & _PAGE_CHANGED)) {
/* Transfer dirty + referenced bit to host bits in pgste */
pgste_val(pgste) |= bits << 52;
page_set_storage_key(address, skey ^ bits, 0);
} else if (!(pgste_val(pgste) & PGSTE_HR_BIT) &&
(bits & _PAGE_REFERENCED)) {
/* Transfer referenced bit to host bit in pgste */
pgste_val(pgste) |= PGSTE_HR_BIT;
page_reset_referenced(address);
}
/* Transfer page changed & referenced bit to guest bits in pgste */
pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
/* Copy page access key and fetch protection bit to pgste */
......@@ -729,25 +732,14 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste)
}
static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
if (pte_val(*ptep) & _PAGE_INVALID)
return pgste;
/* Get referenced bit from storage key */
if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK))
pgste_val(pgste) |= PGSTE_HR_BIT | PGSTE_GR_BIT;
#endif
return pgste;
}
static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
unsigned long address;
unsigned long nkey;
if (pte_val(entry) & _PAGE_INVALID)
if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
return;
VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
address = pte_val(entry) & PAGE_MASK;
......@@ -757,23 +749,30 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry)
* key C/R to 0.
*/
nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
page_set_storage_key(address, nkey, 0);
#endif
}
static inline void pgste_set_pte(pte_t *ptep, pte_t entry)
static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
{
if (!MACHINE_HAS_ESOP &&
(pte_val(entry) & _PAGE_PRESENT) &&
(pte_val(entry) & _PAGE_WRITE)) {
/*
* Without enhanced suppression-on-protection force
* the dirty bit on for all writable ptes.
*/
pte_val(entry) |= _PAGE_DIRTY;
pte_val(entry) &= ~_PAGE_PROTECT;
if ((pte_val(entry) & _PAGE_PRESENT) &&
(pte_val(entry) & _PAGE_WRITE) &&
!(pte_val(entry) & _PAGE_INVALID)) {
if (!MACHINE_HAS_ESOP) {
/*
* Without enhanced suppression-on-protection force
* the dirty bit on for all writable ptes.
*/
pte_val(entry) |= _PAGE_DIRTY;
pte_val(entry) &= ~_PAGE_PROTECT;
}
if (!(pte_val(entry) & _PAGE_PROTECT))
/* This pte allows write access, set user-dirty */
pgste_val(pgste) |= PGSTE_UC_BIT;
}
*ptep = entry;
return pgste;
}
/**
......@@ -839,6 +838,8 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *);
unsigned long gmap_fault(unsigned long address, struct gmap *);
void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
void __gmap_zap(unsigned long address, struct gmap *);
bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
void gmap_register_ipte_notifier(struct gmap_notifier *);
void gmap_unregister_ipte_notifier(struct gmap_notifier *);
......@@ -870,8 +871,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
pgste_set_key(ptep, pgste, entry);
pgste_set_pte(ptep, entry);
pgste_set_key(ptep, pgste, entry, mm);
pgste = pgste_set_pte(ptep, pgste, entry);
pgste_set_unlock(ptep, pgste);
} else {
if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1)
......@@ -1017,45 +1018,6 @@ static inline pte_t pte_mkhuge(pte_t pte)
}
#endif
/*
* Get (and clear) the user dirty bit for a pte.
*/
static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
pte_t *ptep)
{
pgste_t pgste;
int dirty = 0;
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
pgste = pgste_update_all(ptep, pgste);
dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT);
pgste_val(pgste) &= ~PGSTE_HC_BIT;
pgste_set_unlock(ptep, pgste);
return dirty;
}
return dirty;
}
/*
* Get (and clear) the user referenced bit for a pte.
*/
static inline int ptep_test_and_clear_user_young(struct mm_struct *mm,
pte_t *ptep)
{
pgste_t pgste;
int young = 0;
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
pgste = pgste_update_young(ptep, pgste);
young = !!(pgste_val(pgste) & PGSTE_HR_BIT);
pgste_val(pgste) &= ~PGSTE_HR_BIT;
pgste_set_unlock(ptep, pgste);
}
return young;
}
static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
{
unsigned long pto = (unsigned long) ptep;
......@@ -1118,6 +1080,36 @@ static inline void ptep_flush_lazy(struct mm_struct *mm,
atomic_sub(0x10000, &mm->context.attach_count);
}
/*
* Get (and clear) the user dirty bit for a pte.
*/
static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
unsigned long addr,
pte_t *ptep)
{
pgste_t pgste;
pte_t pte;
int dirty;
if (!mm_has_pgste(mm))
return 0;
pgste = pgste_get_lock(ptep);
dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
pgste_val(pgste) &= ~PGSTE_UC_BIT;
pte = *ptep;
if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
pgste = pgste_ipte_notify(mm, ptep, pgste);
__ptep_ipte(addr, ptep);
if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
pte_val(pte) |= _PAGE_PROTECT;
else
pte_val(pte) |= _PAGE_INVALID;
*ptep = pte;
}
pgste_set_unlock(ptep, pgste);
return dirty;
}
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
......@@ -1137,7 +1129,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
pte = pte_mkold(pte);
if (mm_has_pgste(vma->vm_mm)) {
pgste_set_pte(ptep, pte);
pgste = pgste_set_pte(ptep, pgste, pte);
pgste_set_unlock(ptep, pgste);
} else
*ptep = pte;
......@@ -1182,7 +1174,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
pte_val(*ptep) = _PAGE_INVALID;
if (mm_has_pgste(mm)) {
pgste = pgste_update_all(&pte, pgste);
pgste = pgste_update_all(&pte, pgste, mm);
pgste_set_unlock(ptep, pgste);
}
return pte;
......@@ -1205,7 +1197,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
ptep_flush_lazy(mm, address, ptep);
if (mm_has_pgste(mm)) {
pgste = pgste_update_all(&pte, pgste);
pgste = pgste_update_all(&pte, pgste, mm);
pgste_set(ptep, pgste);
}
return pte;
......@@ -1219,8 +1211,8 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
if (mm_has_pgste(mm)) {
pgste = pgste_get(ptep);
pgste_set_key(ptep, pgste, pte);
pgste_set_pte(ptep, pte);
pgste_set_key(ptep, pgste, pte, mm);
pgste = pgste_set_pte(ptep, pgste, pte);
pgste_set_unlock(ptep, pgste);
} else
*ptep = pte;
......@@ -1246,7 +1238,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
_PGSTE_GPS_USAGE_UNUSED)
pte_val(pte) |= _PAGE_UNUSED;
pgste = pgste_update_all(&pte, pgste);
pgste = pgste_update_all(&pte, pgste, vma->vm_mm);
pgste_set_unlock(ptep, pgste);
}
return pte;
......@@ -1278,7 +1270,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
pte_val(*ptep) = _PAGE_INVALID;
if (!full && mm_has_pgste(mm)) {
pgste = pgste_update_all(&pte, pgste);
pgste = pgste_update_all(&pte, pgste, mm);
pgste_set_unlock(ptep, pgste);
}
return pte;
......@@ -1301,7 +1293,7 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
pte = pte_wrprotect(pte);
if (mm_has_pgste(mm)) {
pgste_set_pte(ptep, pte);
pgste = pgste_set_pte(ptep, pgste, pte);
pgste_set_unlock(ptep, pgste);
} else
*ptep = pte;
......@@ -1326,7 +1318,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
ptep_flush_direct(vma->vm_mm, address, ptep);
if (mm_has_pgste(vma->vm_mm)) {
pgste_set_pte(ptep, entry);
pgste = pgste_set_pte(ptep, pgste, entry);
pgste_set_unlock(ptep, pgste);
} else
*ptep = entry;
......@@ -1734,6 +1726,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
extern int vmem_add_mapping(unsigned long start, unsigned long size);
extern int vmem_remove_mapping(unsigned long start, unsigned long size);
extern int s390_enable_sie(void);
extern void s390_enable_skey(void);
/*
* No page table caches to initialise
......
......@@ -16,6 +16,50 @@
PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
struct psw_bits {
unsigned long long : 1;
unsigned long long r : 1; /* PER-Mask */
unsigned long long : 3;
unsigned long long t : 1; /* DAT Mode */
unsigned long long i : 1; /* Input/Output Mask */
unsigned long long e : 1; /* External Mask */
unsigned long long key : 4; /* PSW Key */
unsigned long long : 1;
unsigned long long m : 1; /* Machine-Check Mask */
unsigned long long w : 1; /* Wait State */
unsigned long long p : 1; /* Problem State */
unsigned long long as : 2; /* Address Space Control */
unsigned long long cc : 2; /* Condition Code */
unsigned long long pm : 4; /* Program Mask */
unsigned long long ri : 1; /* Runtime Instrumentation */
unsigned long long : 6;
unsigned long long eaba : 2; /* Addressing Mode */
#ifdef CONFIG_64BIT
unsigned long long : 31;
unsigned long long ia : 64;/* Instruction Address */
#else
unsigned long long ia : 31;/* Instruction Address */
#endif
};
enum {
PSW_AMODE_24BIT = 0,
PSW_AMODE_31BIT = 1,
PSW_AMODE_64BIT = 3
};
enum {
PSW_AS_PRIMARY = 0,
PSW_AS_ACCREG = 1,
PSW_AS_SECONDARY = 2,
PSW_AS_HOME = 3
};
#define psw_bits(__psw) (*({ \
typecheck(psw_t, __psw); \
&(*(struct psw_bits *)(&(__psw))); \
}))
/*
* The pt_regs struct defines the way the registers are stored on
* the stack during a system call.
......
......@@ -28,7 +28,11 @@ struct sclp_ipl_info {
struct sclp_cpu_entry {
u8 address;
u8 reserved0[13];
u8 reserved0[2];
u8 : 3;
u8 siif : 1;
u8 : 4;
u8 reserved2[10];
u8 type;
u8 reserved1;
} __attribute__((packed));
......@@ -61,5 +65,6 @@ int sclp_pci_deconfigure(u32 fid);
int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
unsigned long sclp_get_hsa_size(void);
void sclp_early_detect(void);
int sclp_has_siif(void);
#endif /* _ASM_S390_SCLP_H */
......@@ -15,6 +15,7 @@
#include <linux/types.h>
#define __KVM_S390
#define __KVM_HAVE_GUEST_DEBUG
/* Device control API: s390-specific devices */
#define KVM_DEV_FLIC_GET_ALL_IRQS 1
......@@ -54,6 +55,13 @@ struct kvm_s390_io_adapter_req {
__u64 addr;
};
/* kvm attr_group on vm fd */
#define KVM_S390_VM_MEM_CTRL 0
/* kvm attributes for mem_ctrl */
#define KVM_S390_VM_MEM_ENABLE_CMMA 0
#define KVM_S390_VM_MEM_CLR_CMMA 1
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
/* general purpose regs for s390 */
......@@ -72,11 +80,31 @@ struct kvm_fpu {
__u64 fprs[16];
};
#define KVM_GUESTDBG_USE_HW_BP 0x00010000
#define KVM_HW_BP 1
#define KVM_HW_WP_WRITE 2
#define KVM_SINGLESTEP 4
struct kvm_debug_exit_arch {
__u64 addr;
__u8 type;
__u8 pad[7]; /* Should be set to 0 */
};
struct kvm_hw_breakpoint {
__u64 addr;
__u64 phys_addr;
__u64 len;
__u8 type;
__u8 pad[7]; /* Should be set to 0 */
};
/* for KVM_SET_GUEST_DEBUG */
struct kvm_guest_debug_arch {
__u32 nr_hw_bp;
__u32 pad; /* Should be set to 0 */
struct kvm_hw_breakpoint __user *hw_bp;
};
#define KVM_SYNC_PREFIX (1UL << 0)
......
......@@ -89,16 +89,22 @@ int main(void)
DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc));
DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code));
DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code));
DEFINE(__LC_PER_CAUSE, offsetof(struct _lowcore, per_perc_atmid));
DEFINE(__LC_MON_CLASS_NR, offsetof(struct _lowcore, mon_class_num));
DEFINE(__LC_PER_CODE, offsetof(struct _lowcore, per_code));
DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_atmid));
DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address));
DEFINE(__LC_PER_PAID, offsetof(struct _lowcore, per_access_id));
DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_access_id));
DEFINE(__LC_EXC_ACCESS_ID, offsetof(struct _lowcore, exc_access_id));
DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id));
DEFINE(__LC_OP_ACCESS_ID, offsetof(struct _lowcore, op_access_id));
DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_mode_id));
DEFINE(__LC_MON_CODE, offsetof(struct _lowcore, monitor_code));
DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id));
DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr));
DEFINE(__LC_IO_INT_PARM, offsetof(struct _lowcore, io_int_parm));
DEFINE(__LC_IO_INT_WORD, offsetof(struct _lowcore, io_int_word));
DEFINE(__LC_STFL_FAC_LIST, offsetof(struct _lowcore, stfl_fac_list));
DEFINE(__LC_MCCK_CODE, offsetof(struct _lowcore, mcck_interruption_code));
DEFINE(__LC_MCCK_EXT_DAM_CODE, offsetof(struct _lowcore, external_damage_code));
DEFINE(__LC_RST_OLD_PSW, offsetof(struct _lowcore, restart_old_psw));
DEFINE(__LC_EXT_OLD_PSW, offsetof(struct _lowcore, external_old_psw));
DEFINE(__LC_SVC_OLD_PSW, offsetof(struct _lowcore, svc_old_psw));
......@@ -156,6 +162,8 @@ int main(void)
#ifdef CONFIG_32BIT
DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, extended_save_area_addr));
#else /* CONFIG_32BIT */
DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code));
DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address));
DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));
......
......@@ -391,8 +391,8 @@ ENTRY(pgm_check_handler)
jz pgm_kprobe
oi __TI_flags+3(%r12),_TIF_PER_TRAP
mvc __THREAD_per_address(4,%r1),__LC_PER_ADDRESS
mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE
mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID
mvc __THREAD_per_cause(2,%r1),__LC_PER_CODE
mvc __THREAD_per_paid(1,%r1),__LC_PER_ACCESS_ID
0: REENABLE_IRQS
xc __SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
l %r1,BASED(.Ljump_table)
......
......@@ -423,8 +423,8 @@ ENTRY(pgm_check_handler)
jz pgm_kprobe
oi __TI_flags+7(%r12),_TIF_PER_TRAP
mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
mvc __THREAD_per_cause(2,%r14),__LC_PER_CAUSE
mvc __THREAD_per_paid(1,%r14),__LC_PER_PAID
mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE
mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
0: REENABLE_IRQS
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
larl %r1,pgm_check_table
......
......@@ -11,5 +11,7 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o diag.o
kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
kvm-objs += diag.o gaccess.o guestdbg.o
obj-$(CONFIG_KVM) += kvm.o
......@@ -64,12 +64,12 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
int rc;
u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
unsigned long hva_token = KVM_HVA_ERR_BAD;
if (vcpu->run->s.regs.gprs[rx] & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm)))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
......@@ -89,8 +89,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr));
if (kvm_is_error_hva(hva_token))
if (kvm_is_error_gpa(vcpu->kvm, parm.token_addr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
vcpu->arch.pfault_token = parm.token_addr;
......@@ -167,17 +166,11 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
switch (subcode) {
case 0:
case 1:
page_table_reset_pgste(current->mm, 0, TASK_SIZE);
return -EOPNOTSUPP;
case 3:
vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
page_table_reset_pgste(current->mm, 0, TASK_SIZE);
break;
case 4:
vcpu->run->s390_reset_flags = 0;
page_table_reset_pgste(current->mm, 0, TASK_SIZE);
break;
default:
return -EOPNOTSUPP;
......
/*
* guest access functions
*
* Copyright IBM Corp. 2014
*
*/
#include <linux/vmalloc.h>
#include <linux/err.h>
#include <asm/pgtable.h>
#include "kvm-s390.h"
#include "gaccess.h"
union asce {
unsigned long val;
struct {
unsigned long origin : 52; /* Region- or Segment-Table Origin */
unsigned long : 2;
unsigned long g : 1; /* Subspace Group Control */
unsigned long p : 1; /* Private Space Control */
unsigned long s : 1; /* Storage-Alteration-Event Control */
unsigned long x : 1; /* Space-Switch-Event Control */
unsigned long r : 1; /* Real-Space Control */
unsigned long : 1;
unsigned long dt : 2; /* Designation-Type Control */
unsigned long tl : 2; /* Region- or Segment-Table Length */
};
};
enum {
ASCE_TYPE_SEGMENT = 0,
ASCE_TYPE_REGION3 = 1,
ASCE_TYPE_REGION2 = 2,
ASCE_TYPE_REGION1 = 3
};
union region1_table_entry {
unsigned long val;
struct {
unsigned long rto: 52;/* Region-Table Origin */
unsigned long : 2;
unsigned long p : 1; /* DAT-Protection Bit */
unsigned long : 1;
unsigned long tf : 2; /* Region-Second-Table Offset */
unsigned long i : 1; /* Region-Invalid Bit */
unsigned long : 1;
unsigned long tt : 2; /* Table-Type Bits */
unsigned long tl : 2; /* Region-Second-Table Length */
};
};
union region2_table_entry {
unsigned long val;
struct {
unsigned long rto: 52;/* Region-Table Origin */
unsigned long : 2;
unsigned long p : 1; /* DAT-Protection Bit */
unsigned long : 1;
unsigned long tf : 2; /* Region-Third-Table Offset */
unsigned long i : 1; /* Region-Invalid Bit */
unsigned long : 1;
unsigned long tt : 2; /* Table-Type Bits */
unsigned long tl : 2; /* Region-Third-Table Length */
};
};
struct region3_table_entry_fc0 {
unsigned long sto: 52;/* Segment-Table Origin */
unsigned long : 1;
unsigned long fc : 1; /* Format-Control */
unsigned long p : 1; /* DAT-Protection Bit */
unsigned long : 1;
unsigned long tf : 2; /* Segment-Table Offset */
unsigned long i : 1; /* Region-Invalid Bit */
unsigned long cr : 1; /* Common-Region Bit */
unsigned long tt : 2; /* Table-Type Bits */
unsigned long tl : 2; /* Segment-Table Length */
};
struct region3_table_entry_fc1 {
unsigned long rfaa : 33; /* Region-Frame Absolute Address */
unsigned long : 14;
unsigned long av : 1; /* ACCF-Validity Control */
unsigned long acc: 4; /* Access-Control Bits */
unsigned long f : 1; /* Fetch-Protection Bit */
unsigned long fc : 1; /* Format-Control */
unsigned long p : 1; /* DAT-Protection Bit */
unsigned long co : 1; /* Change-Recording Override */
unsigned long : 2;
unsigned long i : 1; /* Region-Invalid Bit */
unsigned long cr : 1; /* Common-Region Bit */
unsigned long tt : 2; /* Table-Type Bits */
unsigned long : 2;
};
union region3_table_entry {
unsigned long val;
struct region3_table_entry_fc0 fc0;
struct region3_table_entry_fc1 fc1;
struct {
unsigned long : 53;
unsigned long fc : 1; /* Format-Control */
unsigned long : 4;
unsigned long i : 1; /* Region-Invalid Bit */
unsigned long cr : 1; /* Common-Region Bit */
unsigned long tt : 2; /* Table-Type Bits */
unsigned long : 2;
};
};
struct segment_entry_fc0 {
unsigned long pto: 53;/* Page-Table Origin */
unsigned long fc : 1; /* Format-Control */
unsigned long p : 1; /* DAT-Protection Bit */
unsigned long : 3;
unsigned long i : 1; /* Segment-Invalid Bit */
unsigned long cs : 1; /* Common-Segment Bit */
unsigned long tt : 2; /* Table-Type Bits */
unsigned long : 2;
};
struct segment_entry_fc1 {
unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
unsigned long : 3;
unsigned long av : 1; /* ACCF-Validity Control */
unsigned long acc: 4; /* Access-Control Bits */
unsigned long f : 1; /* Fetch-Protection Bit */
unsigned long fc : 1; /* Format-Control */
unsigned long p : 1; /* DAT-Protection Bit */
unsigned long co : 1; /* Change-Recording Override */
unsigned long : 2;
unsigned long i : 1; /* Segment-Invalid Bit */
unsigned long cs : 1; /* Common-Segment Bit */
unsigned long tt : 2; /* Table-Type Bits */
unsigned long : 2;
};
union segment_table_entry {
unsigned long val;
struct segment_entry_fc0 fc0;
struct segment_entry_fc1 fc1;
struct {
unsigned long : 53;
unsigned long fc : 1; /* Format-Control */
unsigned long : 4;
unsigned long i : 1; /* Segment-Invalid Bit */
unsigned long cs : 1; /* Common-Segment Bit */
unsigned long tt : 2; /* Table-Type Bits */
unsigned long : 2;
};
};
enum {
TABLE_TYPE_SEGMENT = 0,
TABLE_TYPE_REGION3 = 1,
TABLE_TYPE_REGION2 = 2,
TABLE_TYPE_REGION1 = 3
};
union page_table_entry {
unsigned long val;
struct {
unsigned long pfra : 52; /* Page-Frame Real Address */
unsigned long z : 1; /* Zero Bit */
unsigned long i : 1; /* Page-Invalid Bit */
unsigned long p : 1; /* DAT-Protection Bit */
unsigned long co : 1; /* Change-Recording Override */
unsigned long : 8;
};
};
/*
* vaddress union in order to easily decode a virtual address into its
* region first index, region second index etc. parts.
*/
union vaddress {
unsigned long addr;
struct {
unsigned long rfx : 11;
unsigned long rsx : 11;
unsigned long rtx : 11;
unsigned long sx : 11;
unsigned long px : 8;
unsigned long bx : 12;
};
struct {
unsigned long rfx01 : 2;
unsigned long : 9;
unsigned long rsx01 : 2;
unsigned long : 9;
unsigned long rtx01 : 2;
unsigned long : 9;
unsigned long sx01 : 2;
unsigned long : 29;
};
};
/*
* raddress union which will contain the result (real or absolute address)
* after a page table walk. The rfaa, sfaa and pfra members are used to
* simply assign them the value of a region, segment or page table entry.
*/
union raddress {
unsigned long addr;
unsigned long rfaa : 33; /* Region-Frame Absolute Address */
unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
unsigned long pfra : 52; /* Page-Frame Real Address */
};
static int ipte_lock_count;
static DEFINE_MUTEX(ipte_mutex);
int ipte_lock_held(struct kvm_vcpu *vcpu)
{
union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control;
if (vcpu->arch.sie_block->eca & 1)
return ic->kh != 0;
return ipte_lock_count != 0;
}
static void ipte_lock_simple(struct kvm_vcpu *vcpu)
{
union ipte_control old, new, *ic;
mutex_lock(&ipte_mutex);
ipte_lock_count++;
if (ipte_lock_count > 1)
goto out;
ic = &vcpu->kvm->arch.sca->ipte_control;
do {
old = ACCESS_ONCE(*ic);
while (old.k) {
cond_resched();
old = ACCESS_ONCE(*ic);
}
new = old;
new.k = 1;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
out:
mutex_unlock(&ipte_mutex);
}
static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
{
union ipte_control old, new, *ic;
mutex_lock(&ipte_mutex);
ipte_lock_count--;
if (ipte_lock_count)
goto out;
ic = &vcpu->kvm->arch.sca->ipte_control;
do {
new = old = ACCESS_ONCE(*ic);
new.k = 0;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
if (!ipte_lock_count)
wake_up(&vcpu->kvm->arch.ipte_wq);
out:
mutex_unlock(&ipte_mutex);
}
static void ipte_lock_siif(struct kvm_vcpu *vcpu)
{
union ipte_control old, new, *ic;
ic = &vcpu->kvm->arch.sca->ipte_control;
do {
old = ACCESS_ONCE(*ic);
while (old.kg) {
cond_resched();
old = ACCESS_ONCE(*ic);
}
new = old;
new.k = 1;
new.kh++;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
}
static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
{
union ipte_control old, new, *ic;
ic = &vcpu->kvm->arch.sca->ipte_control;
do {
new = old = ACCESS_ONCE(*ic);
new.kh--;
if (!new.kh)
new.k = 0;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
if (!new.kh)
wake_up(&vcpu->kvm->arch.ipte_wq);
}
static void ipte_lock(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.sie_block->eca & 1)
ipte_lock_siif(vcpu);
else
ipte_lock_simple(vcpu);
}
static void ipte_unlock(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.sie_block->eca & 1)
ipte_unlock_siif(vcpu);
else
ipte_unlock_simple(vcpu);
}
static unsigned long get_vcpu_asce(struct kvm_vcpu *vcpu)
{
switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
case PSW_AS_PRIMARY:
return vcpu->arch.sie_block->gcr[1];
case PSW_AS_SECONDARY:
return vcpu->arch.sie_block->gcr[7];
case PSW_AS_HOME:
return vcpu->arch.sie_block->gcr[13];
}
return 0;
}
static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
{
return kvm_read_guest(kvm, gpa, val, sizeof(*val));
}
/**
* guest_translate - translate a guest virtual into a guest absolute address
* @vcpu: virtual cpu
* @gva: guest virtual address
* @gpa: points to where guest physical (absolute) address should be stored
* @write: indicates if access is a write access
*
* Translate a guest virtual address into a guest absolute address by means
* of dynamic address translation as specified by the architecuture.
* If the resulting absolute address is not available in the configuration
* an addressing exception is indicated and @gpa will not be changed.
*
* Returns: - zero on success; @gpa contains the resulting absolute address
* - a negative value if guest access failed due to e.g. broken
* guest mapping
* - a positve value if an access exception happened. In this case
* the returned value is the program interruption code as defined
* by the architecture
*/
static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
unsigned long *gpa, int write)
{
union vaddress vaddr = {.addr = gva};
union raddress raddr = {.addr = gva};
union page_table_entry pte;
int dat_protection = 0;
union ctlreg0 ctlreg0;
unsigned long ptr;
int edat1, edat2;
union asce asce;
ctlreg0.val = vcpu->arch.sie_block->gcr[0];
edat1 = ctlreg0.edat && test_vfacility(8);
edat2 = edat1 && test_vfacility(78);
asce.val = get_vcpu_asce(vcpu);
if (asce.r)
goto real_address;
ptr = asce.origin * 4096;
switch (asce.dt) {
case ASCE_TYPE_REGION1:
if (vaddr.rfx01 > asce.tl)
return PGM_REGION_FIRST_TRANS;
ptr += vaddr.rfx * 8;
break;
case ASCE_TYPE_REGION2:
if (vaddr.rfx)
return PGM_ASCE_TYPE;
if (vaddr.rsx01 > asce.tl)
return PGM_REGION_SECOND_TRANS;
ptr += vaddr.rsx * 8;
break;
case ASCE_TYPE_REGION3:
if (vaddr.rfx || vaddr.rsx)
return PGM_ASCE_TYPE;
if (vaddr.rtx01 > asce.tl)
return PGM_REGION_THIRD_TRANS;
ptr += vaddr.rtx * 8;
break;
case ASCE_TYPE_SEGMENT:
if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
return PGM_ASCE_TYPE;
if (vaddr.sx01 > asce.tl)
return PGM_SEGMENT_TRANSLATION;
ptr += vaddr.sx * 8;
break;
}
switch (asce.dt) {
case ASCE_TYPE_REGION1: {
union region1_table_entry rfte;
if (kvm_is_error_gpa(vcpu->kvm, ptr))
return PGM_ADDRESSING;
if (deref_table(vcpu->kvm, ptr, &rfte.val))
return -EFAULT;
if (rfte.i)
return PGM_REGION_FIRST_TRANS;
if (rfte.tt != TABLE_TYPE_REGION1)
return PGM_TRANSLATION_SPEC;
if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
return PGM_REGION_SECOND_TRANS;
if (edat1)
dat_protection |= rfte.p;
ptr = rfte.rto * 4096 + vaddr.rsx * 8;
}
/* fallthrough */
case ASCE_TYPE_REGION2: {
union region2_table_entry rste;
if (kvm_is_error_gpa(vcpu->kvm, ptr))
return PGM_ADDRESSING;
if (deref_table(vcpu->kvm, ptr, &rste.val))
return -EFAULT;
if (rste.i)
return PGM_REGION_SECOND_TRANS;
if (rste.tt != TABLE_TYPE_REGION2)
return PGM_TRANSLATION_SPEC;
if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
return PGM_REGION_THIRD_TRANS;
if (edat1)
dat_protection |= rste.p;
ptr = rste.rto * 4096 + vaddr.rtx * 8;
}
/* fallthrough */
case ASCE_TYPE_REGION3: {
union region3_table_entry rtte;
if (kvm_is_error_gpa(vcpu->kvm, ptr))
return PGM_ADDRESSING;
if (deref_table(vcpu->kvm, ptr, &rtte.val))
return -EFAULT;
if (rtte.i)
return PGM_REGION_THIRD_TRANS;
if (rtte.tt != TABLE_TYPE_REGION3)
return PGM_TRANSLATION_SPEC;
if (rtte.cr && asce.p && edat2)
return PGM_TRANSLATION_SPEC;
if (rtte.fc && edat2) {
dat_protection |= rtte.fc1.p;
raddr.rfaa = rtte.fc1.rfaa;
goto absolute_address;
}
if (vaddr.sx01 < rtte.fc0.tf)
return PGM_SEGMENT_TRANSLATION;
if (vaddr.sx01 > rtte.fc0.tl)
return PGM_SEGMENT_TRANSLATION;
if (edat1)
dat_protection |= rtte.fc0.p;
ptr = rtte.fc0.sto * 4096 + vaddr.sx * 8;
}
/* fallthrough */
case ASCE_TYPE_SEGMENT: {
union segment_table_entry ste;
if (kvm_is_error_gpa(vcpu->kvm, ptr))
return PGM_ADDRESSING;
if (deref_table(vcpu->kvm, ptr, &ste.val))
return -EFAULT;
if (ste.i)
return PGM_SEGMENT_TRANSLATION;
if (ste.tt != TABLE_TYPE_SEGMENT)
return PGM_TRANSLATION_SPEC;
if (ste.cs && asce.p)
return PGM_TRANSLATION_SPEC;
if (ste.fc && edat1) {
dat_protection |= ste.fc1.p;
raddr.sfaa = ste.fc1.sfaa;
goto absolute_address;
}
dat_protection |= ste.fc0.p;
ptr = ste.fc0.pto * 2048 + vaddr.px * 8;
}
}
if (kvm_is_error_gpa(vcpu->kvm, ptr))
return PGM_ADDRESSING;
if (deref_table(vcpu->kvm, ptr, &pte.val))
return -EFAULT;
if (pte.i)
return PGM_PAGE_TRANSLATION;
if (pte.z)
return PGM_TRANSLATION_SPEC;
if (pte.co && !edat1)
return PGM_TRANSLATION_SPEC;
dat_protection |= pte.p;
raddr.pfra = pte.pfra;
real_address:
raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
absolute_address:
if (write && dat_protection)
return PGM_PROTECTION;
if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
return PGM_ADDRESSING;
*gpa = raddr.addr;
return 0;
}
static inline int is_low_address(unsigned long ga)
{
/* Check for address ranges 0..511 and 4096..4607 */
return (ga & ~0x11fful) == 0;
}
static int low_address_protection_enabled(struct kvm_vcpu *vcpu)
{
union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
psw_t *psw = &vcpu->arch.sie_block->gpsw;
union asce asce;
if (!ctlreg0.lap)
return 0;
asce.val = get_vcpu_asce(vcpu);
if (psw_bits(*psw).t && asce.p)
return 0;
return 1;
}
struct trans_exc_code_bits {
unsigned long addr : 52; /* Translation-exception Address */
unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */
unsigned long : 7;
unsigned long b61 : 1;
unsigned long as : 2; /* ASCE Identifier */
};
enum {
FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
FSI_STORE = 1, /* Exception was due to store operation */
FSI_FETCH = 2 /* Exception was due to fetch operation */
};
static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
unsigned long *pages, unsigned long nr_pages,
int write)
{
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
struct trans_exc_code_bits *tec_bits;
int lap_enabled, rc;
memset(pgm, 0, sizeof(*pgm));
tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
tec_bits->as = psw_bits(*psw).as;
lap_enabled = low_address_protection_enabled(vcpu);
while (nr_pages) {
ga = kvm_s390_logical_to_effective(vcpu, ga);
tec_bits->addr = ga >> PAGE_SHIFT;
if (write && lap_enabled && is_low_address(ga)) {
pgm->code = PGM_PROTECTION;
return pgm->code;
}
ga &= PAGE_MASK;
if (psw_bits(*psw).t) {
rc = guest_translate(vcpu, ga, pages, write);
if (rc < 0)
return rc;
if (rc == PGM_PROTECTION)
tec_bits->b61 = 1;
if (rc)
pgm->code = rc;
} else {
*pages = kvm_s390_real_to_abs(vcpu, ga);
if (kvm_is_error_gpa(vcpu->kvm, *pages))
pgm->code = PGM_ADDRESSING;
}
if (pgm->code)
return pgm->code;
ga += PAGE_SIZE;
pages++;
nr_pages--;
}
return 0;
}
int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
unsigned long len, int write)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
unsigned long _len, nr_pages, gpa, idx;
unsigned long pages_array[2];
unsigned long *pages;
int need_ipte_lock;
union asce asce;
int rc;
if (!len)
return 0;
/* Access register mode is not supported yet. */
if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
return -EOPNOTSUPP;
nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
pages = pages_array;
if (nr_pages > ARRAY_SIZE(pages_array))
pages = vmalloc(nr_pages * sizeof(unsigned long));
if (!pages)
return -ENOMEM;
asce.val = get_vcpu_asce(vcpu);
need_ipte_lock = psw_bits(*psw).t && !asce.r;
if (need_ipte_lock)
ipte_lock(vcpu);
rc = guest_page_range(vcpu, ga, pages, nr_pages, write);
for (idx = 0; idx < nr_pages && !rc; idx++) {
gpa = *(pages + idx) + (ga & ~PAGE_MASK);
_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
if (write)
rc = kvm_write_guest(vcpu->kvm, gpa, data, _len);
else
rc = kvm_read_guest(vcpu->kvm, gpa, data, _len);
len -= _len;
ga += _len;
data += _len;
}
if (need_ipte_lock)
ipte_unlock(vcpu);
if (nr_pages > ARRAY_SIZE(pages_array))
vfree(pages);
return rc;
}
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
void *data, unsigned long len, int write)
{
unsigned long _len, gpa;
int rc = 0;
while (len && !rc) {
gpa = kvm_s390_real_to_abs(vcpu, gra);
_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
if (write)
rc = write_guest_abs(vcpu, gpa, data, _len);
else
rc = read_guest_abs(vcpu, gpa, data, _len);
len -= _len;
gra += _len;
data += _len;
}
return rc;
}
/*
* access guest memory
*
* Copyright IBM Corp. 2008, 2009
* Copyright IBM Corp. 2008, 2014
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
......@@ -15,100 +15,315 @@
#include <linux/compiler.h>
#include <linux/kvm_host.h>
#include <asm/uaccess.h>
#include <linux/uaccess.h>
#include <linux/ptrace.h>
#include "kvm-s390.h"
/* Convert real to absolute address by applying the prefix of the CPU */
/**
* kvm_s390_real_to_abs - convert guest real address to guest absolute address
* @vcpu - guest virtual cpu
* @gra - guest real address
*
* Returns the guest absolute address that corresponds to the passed guest real
* address @gra of a virtual guest cpu by applying its prefix.
*/
static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu,
unsigned long gaddr)
unsigned long gra)
{
unsigned long prefix = vcpu->arch.sie_block->prefix;
if (gaddr < 2 * PAGE_SIZE)
gaddr += prefix;
else if (gaddr >= prefix && gaddr < prefix + 2 * PAGE_SIZE)
gaddr -= prefix;
return gaddr;
unsigned long prefix = vcpu->arch.sie_block->prefix;
if (gra < 2 * PAGE_SIZE)
gra += prefix;
else if (gra >= prefix && gra < prefix + 2 * PAGE_SIZE)
gra -= prefix;
return gra;
}
static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
void __user *gptr,
int prefixing)
/**
* kvm_s390_logical_to_effective - convert guest logical to effective address
* @vcpu: guest virtual cpu
* @ga: guest logical address
*
* Convert a guest vcpu logical address to a guest vcpu effective address by
* applying the rules of the vcpu's addressing mode defined by PSW bits 31
* and 32 (extendended/basic addressing mode).
*
* Depending on the vcpu's addressing mode the upper 40 bits (24 bit addressing
* mode), 33 bits (31 bit addressing mode) or no bits (64 bit addressing mode)
* of @ga will be zeroed and the remaining bits will be returned.
*/
static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu,
unsigned long ga)
{
unsigned long gaddr = (unsigned long) gptr;
unsigned long uaddr;
if (prefixing)
gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
uaddr = gmap_fault(gaddr, vcpu->arch.gmap);
if (IS_ERR_VALUE(uaddr))
uaddr = -EFAULT;
return (void __user *)uaddr;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
if (psw_bits(*psw).eaba == PSW_AMODE_64BIT)
return ga;
if (psw_bits(*psw).eaba == PSW_AMODE_31BIT)
return ga & ((1UL << 31) - 1);
return ga & ((1UL << 24) - 1);
}
#define get_guest(vcpu, x, gptr) \
({ \
__typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
int __mask = sizeof(__typeof__(*(gptr))) - 1; \
int __ret; \
\
if (IS_ERR((void __force *)__uptr)) { \
__ret = PTR_ERR((void __force *)__uptr); \
} else { \
BUG_ON((unsigned long)__uptr & __mask); \
__ret = get_user(x, __uptr); \
} \
__ret; \
})
/*
* put_guest_lc, read_guest_lc and write_guest_lc are guest access functions
* which shall only be used to access the lowcore of a vcpu.
* These functions should be used for e.g. interrupt handlers where no
* guest memory access protection facilities, like key or low address
* protection, are applicable.
* At a later point guest vcpu lowcore access should happen via pinned
* prefix pages, so that these pages can be accessed directly via the
* kernel mapping. All of these *_lc functions can be removed then.
*/
#define put_guest(vcpu, x, gptr) \
/**
* put_guest_lc - write a simple variable to a guest vcpu's lowcore
* @vcpu: virtual cpu
* @x: value to copy to guest
* @gra: vcpu's destination guest real address
*
* Copies a simple value from kernel space to a guest vcpu's lowcore.
* The size of the variable may be 1, 2, 4 or 8 bytes. The destination
* must be located in the vcpu's lowcore. Otherwise the result is undefined.
*
* Returns zero on success or -EFAULT on error.
*
* Note: an error indicates that either the kernel is out of memory or
* the guest memory mapping is broken. In any case the best solution
* would be to terminate the guest.
* It is wrong to inject a guest exception.
*/
#define put_guest_lc(vcpu, x, gra) \
({ \
__typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
int __mask = sizeof(__typeof__(*(gptr))) - 1; \
int __ret; \
struct kvm_vcpu *__vcpu = (vcpu); \
__typeof__(*(gra)) __x = (x); \
unsigned long __gpa; \
\
if (IS_ERR((void __force *)__uptr)) { \
__ret = PTR_ERR((void __force *)__uptr); \
} else { \
BUG_ON((unsigned long)__uptr & __mask); \
__ret = put_user(x, __uptr); \
} \
__ret; \
__gpa = (unsigned long)(gra); \
__gpa += __vcpu->arch.sie_block->prefix; \
kvm_write_guest(__vcpu->kvm, __gpa, &__x, sizeof(__x)); \
})
static inline int __copy_guest(struct kvm_vcpu *vcpu, unsigned long to,
unsigned long from, unsigned long len,
int to_guest, int prefixing)
/**
* write_guest_lc - copy data from kernel space to guest vcpu's lowcore
* @vcpu: virtual cpu
* @gra: vcpu's source guest real address
* @data: source address in kernel space
* @len: number of bytes to copy
*
* Copy data from kernel space to guest vcpu's lowcore. The entire range must
* be located within the vcpu's lowcore, otherwise the result is undefined.
*
* Returns zero on success or -EFAULT on error.
*
* Note: an error indicates that either the kernel is out of memory or
* the guest memory mapping is broken. In any case the best solution
* would be to terminate the guest.
* It is wrong to inject a guest exception.
*/
static inline __must_check
int write_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
unsigned long len)
{
unsigned long gpa = gra + vcpu->arch.sie_block->prefix;
return kvm_write_guest(vcpu->kvm, gpa, data, len);
}
/**
* read_guest_lc - copy data from guest vcpu's lowcore to kernel space
* @vcpu: virtual cpu
* @gra: vcpu's source guest real address
* @data: destination address in kernel space
* @len: number of bytes to copy
*
* Copy data from guest vcpu's lowcore to kernel space. The entire range must
* be located within the vcpu's lowcore, otherwise the result is undefined.
*
* Returns zero on success or -EFAULT on error.
*
* Note: an error indicates that either the kernel is out of memory or
* the guest memory mapping is broken. In any case the best solution
* would be to terminate the guest.
* It is wrong to inject a guest exception.
*/
static inline __must_check
int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
unsigned long len)
{
unsigned long gpa = gra + vcpu->arch.sie_block->prefix;
return kvm_read_guest(vcpu->kvm, gpa, data, len);
}
int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
unsigned long len, int write);
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
void *data, unsigned long len, int write);
/**
* write_guest - copy data from kernel space to guest space
* @vcpu: virtual cpu
* @ga: guest address
* @data: source address in kernel space
* @len: number of bytes to copy
*
* Copy @len bytes from @data (kernel space) to @ga (guest address).
* In order to copy data to guest space the PSW of the vcpu is inspected:
* If DAT is off data will be copied to guest real or absolute memory.
* If DAT is on data will be copied to the address space as specified by
* the address space bits of the PSW:
* Primary, secondory or home space (access register mode is currently not
* implemented).
* The addressing mode of the PSW is also inspected, so that address wrap
* around is taken into account for 24-, 31- and 64-bit addressing mode,
* if the to be copied data crosses page boundaries in guest address space.
* In addition also low address and DAT protection are inspected before
* copying any data (key protection is currently not implemented).
*
* This function modifies the 'struct kvm_s390_pgm_info pgm' member of @vcpu.
* In case of an access exception (e.g. protection exception) pgm will contain
* all data necessary so that a subsequent call to 'kvm_s390_inject_prog_vcpu()'
* will inject a correct exception into the guest.
* If no access exception happened, the contents of pgm are undefined when
* this function returns.
*
* Returns: - zero on success
* - a negative value if e.g. the guest mapping is broken or in
* case of out-of-memory. In this case the contents of pgm are
* undefined. Also parts of @data may have been copied to guest
* space.
* - a positive value if an access exception happened. In this case
* the returned value is the program interruption code and the
* contents of pgm may be used to inject an exception into the
* guest. No data has been copied to guest space.
*
* Note: in case an access exception is recognized no data has been copied to
* guest space (this is also true, if the to be copied data would cross
* one or more page boundaries in guest space).
* Therefore this function may be used for nullifying and suppressing
* instruction emulation.
* It may also be used for terminating instructions, if it is undefined
* if data has been changed in guest space in case of an exception.
*/
static inline __must_check
int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
unsigned long len)
{
return access_guest(vcpu, ga, data, len, 1);
}
/**
* read_guest - copy data from guest space to kernel space
* @vcpu: virtual cpu
* @ga: guest address
* @data: destination address in kernel space
* @len: number of bytes to copy
*
* Copy @len bytes from @ga (guest address) to @data (kernel space).
*
* The behaviour of read_guest is identical to write_guest, except that
* data will be copied from guest space to kernel space.
*/
static inline __must_check
int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
unsigned long len)
{
return access_guest(vcpu, ga, data, len, 0);
}
/**
* write_guest_abs - copy data from kernel space to guest space absolute
* @vcpu: virtual cpu
* @gpa: guest physical (absolute) address
* @data: source address in kernel space
* @len: number of bytes to copy
*
* Copy @len bytes from @data (kernel space) to @gpa (guest absolute address).
* It is up to the caller to ensure that the entire guest memory range is
* valid memory before calling this function.
* Guest low address and key protection are not checked.
*
* Returns zero on success or -EFAULT on error.
*
* If an error occurs data may have been copied partially to guest memory.
*/
static inline __must_check
int write_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
unsigned long len)
{
return kvm_write_guest(vcpu->kvm, gpa, data, len);
}
/**
* read_guest_abs - copy data from guest space absolute to kernel space
* @vcpu: virtual cpu
* @gpa: guest physical (absolute) address
* @data: destination address in kernel space
* @len: number of bytes to copy
*
* Copy @len bytes from @gpa (guest absolute address) to @data (kernel space).
* It is up to the caller to ensure that the entire guest memory range is
* valid memory before calling this function.
* Guest key protection is not checked.
*
* Returns zero on success or -EFAULT on error.
*
* If an error occurs data may have been copied partially to kernel space.
*/
static inline __must_check
int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
unsigned long len)
{
return kvm_read_guest(vcpu->kvm, gpa, data, len);
}
/**
* write_guest_real - copy data from kernel space to guest space real
* @vcpu: virtual cpu
* @gra: guest real address
* @data: source address in kernel space
* @len: number of bytes to copy
*
* Copy @len bytes from @data (kernel space) to @gra (guest real address).
* It is up to the caller to ensure that the entire guest memory range is
* valid memory before calling this function.
* Guest low address and key protection are not checked.
*
* Returns zero on success or -EFAULT on error.
*
* If an error occurs data may have been copied partially to guest memory.
*/
static inline __must_check
int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
unsigned long len)
{
return access_guest_real(vcpu, gra, data, len, 1);
}
/**
* read_guest_real - copy data from guest space real to kernel space
* @vcpu: virtual cpu
* @gra: guest real address
* @data: destination address in kernel space
* @len: number of bytes to copy
*
* Copy @len bytes from @gra (guest real address) to @data (kernel space).
* It is up to the caller to ensure that the entire guest memory range is
* valid memory before calling this function.
* Guest key protection is not checked.
*
* Returns zero on success or -EFAULT on error.
*
* If an error occurs data may have been copied partially to kernel space.
*/
static inline __must_check
int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
unsigned long len)
{
unsigned long _len, rc;
void __user *uptr;
while (len) {
uptr = to_guest ? (void __user *)to : (void __user *)from;
uptr = __gptr_to_uptr(vcpu, uptr, prefixing);
if (IS_ERR((void __force *)uptr))
return -EFAULT;
_len = PAGE_SIZE - ((unsigned long)uptr & (PAGE_SIZE - 1));
_len = min(_len, len);
if (to_guest)
rc = copy_to_user((void __user *) uptr, (void *)from, _len);
else
rc = copy_from_user((void *)to, (void __user *)uptr, _len);
if (rc)
return -EFAULT;
len -= _len;
from += _len;
to += _len;
}
return 0;
return access_guest_real(vcpu, gra, data, len, 0);
}
#define copy_to_guest(vcpu, to, from, size) \
__copy_guest(vcpu, to, (unsigned long)from, size, 1, 1)
#define copy_from_guest(vcpu, to, from, size) \
__copy_guest(vcpu, (unsigned long)to, from, size, 0, 1)
#define copy_to_guest_absolute(vcpu, to, from, size) \
__copy_guest(vcpu, to, (unsigned long)from, size, 1, 0)
#define copy_from_guest_absolute(vcpu, to, from, size) \
__copy_guest(vcpu, (unsigned long)to, from, size, 0, 0)
int ipte_lock_held(struct kvm_vcpu *vcpu);
#endif /* __KVM_S390_GACCESS_H */
/*
* kvm guest debug support
*
* Copyright IBM Corp. 2014
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
* as published by the Free Software Foundation.
*
* Author(s): David Hildenbrand <dahi@linux.vnet.ibm.com>
*/
#include <linux/kvm_host.h>
#include <linux/errno.h>
#include "kvm-s390.h"
#include "gaccess.h"
/*
* Extends the address range given by *start and *stop to include the address
* range starting with estart and the length len. Takes care of overflowing
* intervals and tries to minimize the overall intervall size.
*/
static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len)
{
u64 estop;
if (len > 0)
len--;
else
len = 0;
estop = estart + len;
/* 0-0 range represents "not set" */
if ((*start == 0) && (*stop == 0)) {
*start = estart;
*stop = estop;
} else if (*start <= *stop) {
/* increase the existing range */
if (estart < *start)
*start = estart;
if (estop > *stop)
*stop = estop;
} else {
/* "overflowing" interval, whereby *stop > *start */
if (estart <= *stop) {
if (estop > *stop)
*stop = estop;
} else if (estop > *start) {
if (estart < *start)
*start = estart;
}
/* minimize the range */
else if ((estop - *stop) < (*start - estart))
*stop = estop;
else
*start = estart;
}
}
#define MAX_INST_SIZE 6
static void enable_all_hw_bp(struct kvm_vcpu *vcpu)
{
unsigned long start, len;
u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
int i;
if (vcpu->arch.guestdbg.nr_hw_bp <= 0 ||
vcpu->arch.guestdbg.hw_bp_info == NULL)
return;
/*
* If the guest is not interrested in branching events, we can savely
* limit them to the PER address range.
*/
if (!(*cr9 & PER_EVENT_BRANCH))
*cr9 |= PER_CONTROL_BRANCH_ADDRESS;
*cr9 |= PER_EVENT_IFETCH | PER_EVENT_BRANCH;
for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
start = vcpu->arch.guestdbg.hw_bp_info[i].addr;
len = vcpu->arch.guestdbg.hw_bp_info[i].len;
/*
* The instruction in front of the desired bp has to
* report instruction-fetching events
*/
if (start < MAX_INST_SIZE) {
len += start;
start = 0;
} else {
start -= MAX_INST_SIZE;
len += MAX_INST_SIZE;
}
extend_address_range(cr10, cr11, start, len);
}
}
static void enable_all_hw_wp(struct kvm_vcpu *vcpu)
{
unsigned long start, len;
u64 *cr9 = &vcpu->arch.sie_block->gcr[9];
u64 *cr10 = &vcpu->arch.sie_block->gcr[10];
u64 *cr11 = &vcpu->arch.sie_block->gcr[11];
int i;
if (vcpu->arch.guestdbg.nr_hw_wp <= 0 ||
vcpu->arch.guestdbg.hw_wp_info == NULL)
return;
/* if host uses storage alternation for special address
* spaces, enable all events and give all to the guest */
if (*cr9 & PER_EVENT_STORE && *cr9 & PER_CONTROL_ALTERATION) {
*cr9 &= ~PER_CONTROL_ALTERATION;
*cr10 = 0;
*cr11 = PSW_ADDR_INSN;
} else {
*cr9 &= ~PER_CONTROL_ALTERATION;
*cr9 |= PER_EVENT_STORE;
for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
start = vcpu->arch.guestdbg.hw_wp_info[i].addr;
len = vcpu->arch.guestdbg.hw_wp_info[i].len;
extend_address_range(cr10, cr11, start, len);
}
}
}
void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu)
{
vcpu->arch.guestdbg.cr0 = vcpu->arch.sie_block->gcr[0];
vcpu->arch.guestdbg.cr9 = vcpu->arch.sie_block->gcr[9];
vcpu->arch.guestdbg.cr10 = vcpu->arch.sie_block->gcr[10];
vcpu->arch.guestdbg.cr11 = vcpu->arch.sie_block->gcr[11];
}
void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu)
{
vcpu->arch.sie_block->gcr[0] = vcpu->arch.guestdbg.cr0;
vcpu->arch.sie_block->gcr[9] = vcpu->arch.guestdbg.cr9;
vcpu->arch.sie_block->gcr[10] = vcpu->arch.guestdbg.cr10;
vcpu->arch.sie_block->gcr[11] = vcpu->arch.guestdbg.cr11;
}
void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu)
{
/*
* TODO: if guest psw has per enabled, otherwise 0s!
* This reduces the amount of reported events.
* Need to intercept all psw changes!
*/
if (guestdbg_sstep_enabled(vcpu)) {
/* disable timer (clock-comparator) interrupts */
vcpu->arch.sie_block->gcr[0] &= ~0x800ul;
vcpu->arch.sie_block->gcr[9] |= PER_EVENT_IFETCH;
vcpu->arch.sie_block->gcr[10] = 0;
vcpu->arch.sie_block->gcr[11] = PSW_ADDR_INSN;
}
if (guestdbg_hw_bp_enabled(vcpu)) {
enable_all_hw_bp(vcpu);
enable_all_hw_wp(vcpu);
}
/* TODO: Instruction-fetching-nullification not allowed for now */
if (vcpu->arch.sie_block->gcr[9] & PER_EVENT_NULLIFICATION)
vcpu->arch.sie_block->gcr[9] &= ~PER_EVENT_NULLIFICATION;
}
#define MAX_WP_SIZE 100
static int __import_wp_info(struct kvm_vcpu *vcpu,
struct kvm_hw_breakpoint *bp_data,
struct kvm_hw_wp_info_arch *wp_info)
{
int ret = 0;
wp_info->len = bp_data->len;
wp_info->addr = bp_data->addr;
wp_info->phys_addr = bp_data->phys_addr;
wp_info->old_data = NULL;
if (wp_info->len < 0 || wp_info->len > MAX_WP_SIZE)
return -EINVAL;
wp_info->old_data = kmalloc(bp_data->len, GFP_KERNEL);
if (!wp_info->old_data)
return -ENOMEM;
/* try to backup the original value */
ret = read_guest(vcpu, wp_info->phys_addr, wp_info->old_data,
wp_info->len);
if (ret) {
kfree(wp_info->old_data);
wp_info->old_data = NULL;
}
return ret;
}
#define MAX_BP_COUNT 50
int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
int ret = 0, nr_wp = 0, nr_bp = 0, i, size;
struct kvm_hw_breakpoint *bp_data = NULL;
struct kvm_hw_wp_info_arch *wp_info = NULL;
struct kvm_hw_bp_info_arch *bp_info = NULL;
if (dbg->arch.nr_hw_bp <= 0 || !dbg->arch.hw_bp)
return 0;
else if (dbg->arch.nr_hw_bp > MAX_BP_COUNT)
return -EINVAL;
size = dbg->arch.nr_hw_bp * sizeof(struct kvm_hw_breakpoint);
bp_data = kmalloc(size, GFP_KERNEL);
if (!bp_data) {
ret = -ENOMEM;
goto error;
}
ret = copy_from_user(bp_data, dbg->arch.hw_bp, size);
if (ret)
goto error;
for (i = 0; i < dbg->arch.nr_hw_bp; i++) {
switch (bp_data[i].type) {
case KVM_HW_WP_WRITE:
nr_wp++;
break;
case KVM_HW_BP:
nr_bp++;
break;
default:
break;
}
}
size = nr_wp * sizeof(struct kvm_hw_wp_info_arch);
if (size > 0) {
wp_info = kmalloc(size, GFP_KERNEL);
if (!wp_info) {
ret = -ENOMEM;
goto error;
}
}
size = nr_bp * sizeof(struct kvm_hw_bp_info_arch);
if (size > 0) {
bp_info = kmalloc(size, GFP_KERNEL);
if (!bp_info) {
ret = -ENOMEM;
goto error;
}
}
for (nr_wp = 0, nr_bp = 0, i = 0; i < dbg->arch.nr_hw_bp; i++) {
switch (bp_data[i].type) {
case KVM_HW_WP_WRITE:
ret = __import_wp_info(vcpu, &bp_data[i],
&wp_info[nr_wp]);
if (ret)
goto error;
nr_wp++;
break;
case KVM_HW_BP:
bp_info[nr_bp].len = bp_data[i].len;
bp_info[nr_bp].addr = bp_data[i].addr;
nr_bp++;
break;
}
}
vcpu->arch.guestdbg.nr_hw_bp = nr_bp;
vcpu->arch.guestdbg.hw_bp_info = bp_info;
vcpu->arch.guestdbg.nr_hw_wp = nr_wp;
vcpu->arch.guestdbg.hw_wp_info = wp_info;
return 0;
error:
kfree(bp_data);
kfree(wp_info);
kfree(bp_info);
return ret;
}
void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu)
{
int i;
struct kvm_hw_wp_info_arch *hw_wp_info = NULL;
for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
hw_wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
kfree(hw_wp_info->old_data);
hw_wp_info->old_data = NULL;
}
kfree(vcpu->arch.guestdbg.hw_wp_info);
vcpu->arch.guestdbg.hw_wp_info = NULL;
kfree(vcpu->arch.guestdbg.hw_bp_info);
vcpu->arch.guestdbg.hw_bp_info = NULL;
vcpu->arch.guestdbg.nr_hw_wp = 0;
vcpu->arch.guestdbg.nr_hw_bp = 0;
}
static inline int in_addr_range(u64 addr, u64 a, u64 b)
{
if (a <= b)
return (addr >= a) && (addr <= b);
else
/* "overflowing" interval */
return (addr <= a) && (addr >= b);
}
#define end_of_range(bp_info) (bp_info->addr + bp_info->len - 1)
static struct kvm_hw_bp_info_arch *find_hw_bp(struct kvm_vcpu *vcpu,
unsigned long addr)
{
struct kvm_hw_bp_info_arch *bp_info = vcpu->arch.guestdbg.hw_bp_info;
int i;
if (vcpu->arch.guestdbg.nr_hw_bp == 0)
return NULL;
for (i = 0; i < vcpu->arch.guestdbg.nr_hw_bp; i++) {
/* addr is directly the start or in the range of a bp */
if (addr == bp_info->addr)
goto found;
if (bp_info->len > 0 &&
in_addr_range(addr, bp_info->addr, end_of_range(bp_info)))
goto found;
bp_info++;
}
return NULL;
found:
return bp_info;
}
static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu)
{
int i;
struct kvm_hw_wp_info_arch *wp_info = NULL;
void *temp = NULL;
if (vcpu->arch.guestdbg.nr_hw_wp == 0)
return NULL;
for (i = 0; i < vcpu->arch.guestdbg.nr_hw_wp; i++) {
wp_info = &vcpu->arch.guestdbg.hw_wp_info[i];
if (!wp_info || !wp_info->old_data || wp_info->len <= 0)
continue;
temp = kmalloc(wp_info->len, GFP_KERNEL);
if (!temp)
continue;
/* refetch the wp data and compare it to the old value */
if (!read_guest(vcpu, wp_info->phys_addr, temp,
wp_info->len)) {
if (memcmp(temp, wp_info->old_data, wp_info->len)) {
kfree(temp);
return wp_info;
}
}
kfree(temp);
temp = NULL;
}
return NULL;
}
void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu)
{
vcpu->run->exit_reason = KVM_EXIT_DEBUG;
vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
}
#define per_bp_event(code) \
(code & (PER_EVENT_IFETCH | PER_EVENT_BRANCH))
#define per_write_wp_event(code) \
(code & (PER_EVENT_STORE | PER_EVENT_STORE_REAL))
static int debug_exit_required(struct kvm_vcpu *vcpu)
{
u32 perc = (vcpu->arch.sie_block->perc << 24);
struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
struct kvm_hw_wp_info_arch *wp_info = NULL;
struct kvm_hw_bp_info_arch *bp_info = NULL;
unsigned long addr = vcpu->arch.sie_block->gpsw.addr;
unsigned long peraddr = vcpu->arch.sie_block->peraddr;
if (guestdbg_hw_bp_enabled(vcpu)) {
if (per_write_wp_event(perc) &&
vcpu->arch.guestdbg.nr_hw_wp > 0) {
wp_info = any_wp_changed(vcpu);
if (wp_info) {
debug_exit->addr = wp_info->addr;
debug_exit->type = KVM_HW_WP_WRITE;
goto exit_required;
}
}
if (per_bp_event(perc) &&
vcpu->arch.guestdbg.nr_hw_bp > 0) {
bp_info = find_hw_bp(vcpu, addr);
/* remove duplicate events if PC==PER address */
if (bp_info && (addr != peraddr)) {
debug_exit->addr = addr;
debug_exit->type = KVM_HW_BP;
vcpu->arch.guestdbg.last_bp = addr;
goto exit_required;
}
/* breakpoint missed */
bp_info = find_hw_bp(vcpu, peraddr);
if (bp_info && vcpu->arch.guestdbg.last_bp != peraddr) {
debug_exit->addr = peraddr;
debug_exit->type = KVM_HW_BP;
goto exit_required;
}
}
}
if (guestdbg_sstep_enabled(vcpu) && per_bp_event(perc)) {
debug_exit->addr = addr;
debug_exit->type = KVM_SINGLESTEP;
goto exit_required;
}
return 0;
exit_required:
return 1;
}
#define guest_per_enabled(vcpu) \
(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER)
static void filter_guest_per_event(struct kvm_vcpu *vcpu)
{
u32 perc = vcpu->arch.sie_block->perc << 24;
u64 peraddr = vcpu->arch.sie_block->peraddr;
u64 addr = vcpu->arch.sie_block->gpsw.addr;
u64 cr9 = vcpu->arch.sie_block->gcr[9];
u64 cr10 = vcpu->arch.sie_block->gcr[10];
u64 cr11 = vcpu->arch.sie_block->gcr[11];
/* filter all events, demanded by the guest */
u32 guest_perc = perc & cr9 & PER_EVENT_MASK;
if (!guest_per_enabled(vcpu))
guest_perc = 0;
/* filter "successful-branching" events */
if (guest_perc & PER_EVENT_BRANCH &&
cr9 & PER_CONTROL_BRANCH_ADDRESS &&
!in_addr_range(addr, cr10, cr11))
guest_perc &= ~PER_EVENT_BRANCH;
/* filter "instruction-fetching" events */
if (guest_perc & PER_EVENT_IFETCH &&
!in_addr_range(peraddr, cr10, cr11))
guest_perc &= ~PER_EVENT_IFETCH;
/* All other PER events will be given to the guest */
/* TODO: Check alterated address/address space */
vcpu->arch.sie_block->perc = guest_perc >> 24;
if (!guest_perc)
vcpu->arch.sie_block->iprcc &= ~PGM_PER;
}
void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu)
{
if (debug_exit_required(vcpu))
vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
filter_guest_per_event(vcpu);
}
......@@ -16,6 +16,7 @@
#include <linux/pagemap.h>
#include <asm/kvm_host.h>
#include <asm/asm-offsets.h>
#include "kvm-s390.h"
#include "gaccess.h"
......@@ -29,6 +30,7 @@ static const intercept_handler_t instruction_handlers[256] = {
[0x83] = kvm_s390_handle_diag,
[0xae] = kvm_s390_handle_sigp,
[0xb2] = kvm_s390_handle_b2,
[0xb6] = kvm_s390_handle_stctl,
[0xb7] = kvm_s390_handle_lctl,
[0xb9] = kvm_s390_handle_b9,
[0xe5] = kvm_s390_handle_e5,
......@@ -109,22 +111,112 @@ static int handle_instruction(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
static void __extract_prog_irq(struct kvm_vcpu *vcpu,
struct kvm_s390_pgm_info *pgm_info)
{
memset(pgm_info, 0, sizeof(struct kvm_s390_pgm_info));
pgm_info->code = vcpu->arch.sie_block->iprcc;
switch (vcpu->arch.sie_block->iprcc & ~PGM_PER) {
case PGM_AFX_TRANSLATION:
case PGM_ASX_TRANSLATION:
case PGM_EX_TRANSLATION:
case PGM_LFX_TRANSLATION:
case PGM_LSTE_SEQUENCE:
case PGM_LSX_TRANSLATION:
case PGM_LX_TRANSLATION:
case PGM_PRIMARY_AUTHORITY:
case PGM_SECONDARY_AUTHORITY:
case PGM_SPACE_SWITCH:
pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
break;
case PGM_ALEN_TRANSLATION:
case PGM_ALE_SEQUENCE:
case PGM_ASTE_INSTANCE:
case PGM_ASTE_SEQUENCE:
case PGM_ASTE_VALIDITY:
case PGM_EXTENDED_AUTHORITY:
pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
break;
case PGM_ASCE_TYPE:
case PGM_PAGE_TRANSLATION:
case PGM_REGION_FIRST_TRANS:
case PGM_REGION_SECOND_TRANS:
case PGM_REGION_THIRD_TRANS:
case PGM_SEGMENT_TRANSLATION:
pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
pgm_info->op_access_id = vcpu->arch.sie_block->oai;
break;
case PGM_MONITOR:
pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn;
pgm_info->mon_code = vcpu->arch.sie_block->tecmc;
break;
case PGM_DATA:
pgm_info->data_exc_code = vcpu->arch.sie_block->dxc;
break;
case PGM_PROTECTION:
pgm_info->trans_exc_code = vcpu->arch.sie_block->tecmc;
pgm_info->exc_access_id = vcpu->arch.sie_block->eai;
break;
default:
break;
}
if (vcpu->arch.sie_block->iprcc & PGM_PER) {
pgm_info->per_code = vcpu->arch.sie_block->perc;
pgm_info->per_atmid = vcpu->arch.sie_block->peratmid;
pgm_info->per_address = vcpu->arch.sie_block->peraddr;
pgm_info->per_access_id = vcpu->arch.sie_block->peraid;
}
}
/*
* restore ITDB to program-interruption TDB in guest lowcore
* and set TX abort indication if required
*/
static int handle_itdb(struct kvm_vcpu *vcpu)
{
struct kvm_s390_itdb *itdb;
int rc;
if (!IS_TE_ENABLED(vcpu) || !IS_ITDB_VALID(vcpu))
return 0;
if (current->thread.per_flags & PER_FLAG_NO_TE)
return 0;
itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba;
rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb));
if (rc)
return rc;
memset(itdb, 0, sizeof(*itdb));
return 0;
}
#define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
static int handle_prog(struct kvm_vcpu *vcpu)
{
struct kvm_s390_pgm_info pgm_info;
int rc;
vcpu->stat.exit_program_interruption++;
/* Restore ITDB to Program-Interruption TDB in guest memory */
if (IS_TE_ENABLED(vcpu) &&
!(current->thread.per_flags & PER_FLAG_NO_TE) &&
IS_ITDB_VALID(vcpu)) {
copy_to_guest(vcpu, TDB_ADDR, vcpu->arch.sie_block->itdba,
sizeof(struct kvm_s390_itdb));
memset((void *) vcpu->arch.sie_block->itdba, 0,
sizeof(struct kvm_s390_itdb));
if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
kvm_s390_handle_per_event(vcpu);
/* the interrupt might have been filtered out completely */
if (vcpu->arch.sie_block->iprcc == 0)
return 0;
}
trace_kvm_s390_intercept_prog(vcpu, vcpu->arch.sie_block->iprcc);
return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
rc = handle_itdb(vcpu);
if (rc)
return rc;
__extract_prog_irq(vcpu, &pgm_info);
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
......
......@@ -56,6 +56,17 @@ static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
return 1;
}
static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
{
if (psw_extint_disabled(vcpu) ||
!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
return 0;
if (guestdbg_enabled(vcpu) && guestdbg_sstep_enabled(vcpu))
/* No timer interrupts when single stepping */
return 0;
return 1;
}
static u64 int_word_to_isc_bits(u32 int_word)
{
u8 isc = (int_word & 0x38000000) >> 27;
......@@ -131,7 +142,13 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
&vcpu->arch.sie_block->cpuflags);
vcpu->arch.sie_block->lctl = 0x0000;
vcpu->arch.sie_block->ictl &= ~ICTL_LPSW;
vcpu->arch.sie_block->ictl &= ~(ICTL_LPSW | ICTL_STCTL | ICTL_PINT);
if (guestdbg_enabled(vcpu)) {
vcpu->arch.sie_block->lctl |= (LCTL_CR0 | LCTL_CR9 |
LCTL_CR10 | LCTL_CR11);
vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT);
}
}
static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
......@@ -174,6 +191,106 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
}
}
static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
struct kvm_s390_pgm_info *pgm_info)
{
const unsigned short table[] = { 2, 4, 4, 6 };
int rc = 0;
switch (pgm_info->code & ~PGM_PER) {
case PGM_AFX_TRANSLATION:
case PGM_ASX_TRANSLATION:
case PGM_EX_TRANSLATION:
case PGM_LFX_TRANSLATION:
case PGM_LSTE_SEQUENCE:
case PGM_LSX_TRANSLATION:
case PGM_LX_TRANSLATION:
case PGM_PRIMARY_AUTHORITY:
case PGM_SECONDARY_AUTHORITY:
case PGM_SPACE_SWITCH:
rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
(u64 *)__LC_TRANS_EXC_CODE);
break;
case PGM_ALEN_TRANSLATION:
case PGM_ALE_SEQUENCE:
case PGM_ASTE_INSTANCE:
case PGM_ASTE_SEQUENCE:
case PGM_ASTE_VALIDITY:
case PGM_EXTENDED_AUTHORITY:
rc = put_guest_lc(vcpu, pgm_info->exc_access_id,
(u8 *)__LC_EXC_ACCESS_ID);
break;
case PGM_ASCE_TYPE:
case PGM_PAGE_TRANSLATION:
case PGM_REGION_FIRST_TRANS:
case PGM_REGION_SECOND_TRANS:
case PGM_REGION_THIRD_TRANS:
case PGM_SEGMENT_TRANSLATION:
rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
(u64 *)__LC_TRANS_EXC_CODE);
rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
(u8 *)__LC_EXC_ACCESS_ID);
rc |= put_guest_lc(vcpu, pgm_info->op_access_id,
(u8 *)__LC_OP_ACCESS_ID);
break;
case PGM_MONITOR:
rc = put_guest_lc(vcpu, pgm_info->mon_class_nr,
(u64 *)__LC_MON_CLASS_NR);
rc |= put_guest_lc(vcpu, pgm_info->mon_code,
(u64 *)__LC_MON_CODE);
break;
case PGM_DATA:
rc = put_guest_lc(vcpu, pgm_info->data_exc_code,
(u32 *)__LC_DATA_EXC_CODE);
break;
case PGM_PROTECTION:
rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
(u64 *)__LC_TRANS_EXC_CODE);
rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
(u8 *)__LC_EXC_ACCESS_ID);
break;
}
if (pgm_info->code & PGM_PER) {
rc |= put_guest_lc(vcpu, pgm_info->per_code,
(u8 *) __LC_PER_CODE);
rc |= put_guest_lc(vcpu, pgm_info->per_atmid,
(u8 *)__LC_PER_ATMID);
rc |= put_guest_lc(vcpu, pgm_info->per_address,
(u64 *) __LC_PER_ADDRESS);
rc |= put_guest_lc(vcpu, pgm_info->per_access_id,
(u8 *) __LC_PER_ACCESS_ID);
}
switch (vcpu->arch.sie_block->icptcode) {
case ICPT_INST:
case ICPT_INSTPROGI:
case ICPT_OPEREXC:
case ICPT_PARTEXEC:
case ICPT_IOINST:
/* last instruction only stored for these icptcodes */
rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
(u16 *) __LC_PGM_ILC);
break;
case ICPT_PROGI:
rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->pgmilc,
(u16 *) __LC_PGM_ILC);
break;
default:
rc |= put_guest_lc(vcpu, 0,
(u16 *) __LC_PGM_ILC);
}
rc |= put_guest_lc(vcpu, pgm_info->code,
(u16 *)__LC_PGM_INT_CODE);
rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
return rc;
}
static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt_info *inti)
{
......@@ -186,26 +303,28 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
vcpu->stat.deliver_emergency_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
inti->emerg.code, 0);
rc = put_guest(vcpu, 0x1201, (u16 __user *)__LC_EXT_INT_CODE);
rc |= put_guest(vcpu, inti->emerg.code,
(u16 __user *)__LC_EXT_CPU_ADDR);
rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
rc = put_guest_lc(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE);
rc |= put_guest_lc(vcpu, inti->emerg.code,
(u16 *)__LC_EXT_CPU_ADDR);
rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_EXT_NEW_PSW, sizeof(psw_t));
break;
case KVM_S390_INT_EXTERNAL_CALL:
VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
vcpu->stat.deliver_external_call++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
inti->extcall.code, 0);
rc = put_guest(vcpu, 0x1202, (u16 __user *)__LC_EXT_INT_CODE);
rc |= put_guest(vcpu, inti->extcall.code,
(u16 __user *)__LC_EXT_CPU_ADDR);
rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_EXT_NEW_PSW, sizeof(psw_t));
rc = put_guest_lc(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE);
rc |= put_guest_lc(vcpu, inti->extcall.code,
(u16 *)__LC_EXT_CPU_ADDR);
rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw,
sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
&vcpu->arch.sie_block->gpsw,
sizeof(psw_t));
break;
case KVM_S390_INT_SERVICE:
VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
......@@ -213,37 +332,39 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
vcpu->stat.deliver_service_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
inti->ext.ext_params, 0);
rc = put_guest(vcpu, 0x2401, (u16 __user *)__LC_EXT_INT_CODE);
rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
rc = put_guest_lc(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE);
rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw,
sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_EXT_NEW_PSW, sizeof(psw_t));
rc |= put_guest(vcpu, inti->ext.ext_params,
(u32 __user *)__LC_EXT_PARAMS);
rc |= put_guest_lc(vcpu, inti->ext.ext_params,
(u32 *)__LC_EXT_PARAMS);
break;
case KVM_S390_INT_PFAULT_INIT:
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
inti->ext.ext_params2);
rc = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE);
rc |= put_guest(vcpu, 0x0600, (u16 __user *) __LC_EXT_CPU_ADDR);
rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
rc = put_guest_lc(vcpu, 0x2603, (u16 *) __LC_EXT_INT_CODE);
rc |= put_guest_lc(vcpu, 0x0600, (u16 *) __LC_EXT_CPU_ADDR);
rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_EXT_NEW_PSW, sizeof(psw_t));
rc |= put_guest(vcpu, inti->ext.ext_params2,
(u64 __user *) __LC_EXT_PARAMS2);
rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
(u64 *) __LC_EXT_PARAMS2);
break;
case KVM_S390_INT_PFAULT_DONE:
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
inti->ext.ext_params2);
rc = put_guest(vcpu, 0x2603, (u16 __user *) __LC_EXT_INT_CODE);
rc |= put_guest(vcpu, 0x0680, (u16 __user *) __LC_EXT_CPU_ADDR);
rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
rc = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR);
rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw,
sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_EXT_NEW_PSW, sizeof(psw_t));
rc |= put_guest(vcpu, inti->ext.ext_params2,
(u64 __user *) __LC_EXT_PARAMS2);
rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
(u64 *)__LC_EXT_PARAMS2);
break;
case KVM_S390_INT_VIRTIO:
VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
......@@ -252,16 +373,17 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
inti->ext.ext_params,
inti->ext.ext_params2);
rc = put_guest(vcpu, 0x2603, (u16 __user *)__LC_EXT_INT_CODE);
rc |= put_guest(vcpu, 0x0d00, (u16 __user *)__LC_EXT_CPU_ADDR);
rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
rc = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR);
rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw,
sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_EXT_NEW_PSW, sizeof(psw_t));
rc |= put_guest(vcpu, inti->ext.ext_params,
(u32 __user *)__LC_EXT_PARAMS);
rc |= put_guest(vcpu, inti->ext.ext_params2,
(u64 __user *)__LC_EXT_PARAMS2);
rc |= put_guest_lc(vcpu, inti->ext.ext_params,
(u32 *)__LC_EXT_PARAMS);
rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
(u64 *)__LC_EXT_PARAMS2);
break;
case KVM_S390_SIGP_STOP:
VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
......@@ -285,12 +407,12 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
vcpu->stat.deliver_restart_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
0, 0);
rc = copy_to_guest(vcpu,
offsetof(struct _lowcore, restart_old_psw),
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
offsetof(struct _lowcore, restart_psw),
sizeof(psw_t));
rc = write_guest_lc(vcpu,
offsetof(struct _lowcore, restart_old_psw),
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
&vcpu->arch.sie_block->gpsw,
sizeof(psw_t));
atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
break;
case KVM_S390_PROGRAM_INT:
......@@ -300,13 +422,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
vcpu->stat.deliver_program_int++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
inti->pgm.code, 0);
rc = put_guest(vcpu, inti->pgm.code, (u16 __user *)__LC_PGM_INT_CODE);
rc |= put_guest(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
(u16 __user *)__LC_PGM_ILC);
rc |= copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_PGM_NEW_PSW, sizeof(psw_t));
rc = __deliver_prog_irq(vcpu, &inti->pgm);
break;
case KVM_S390_MCHK:
......@@ -317,11 +433,12 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
inti->mchk.mcic);
rc = kvm_s390_vcpu_store_status(vcpu,
KVM_S390_STORE_STATUS_PREFIXED);
rc |= put_guest(vcpu, inti->mchk.mcic, (u64 __user *) __LC_MCCK_CODE);
rc |= copy_to_guest(vcpu, __LC_MCK_OLD_PSW,
rc |= put_guest_lc(vcpu, inti->mchk.mcic, (u64 *)__LC_MCCK_CODE);
rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
&vcpu->arch.sie_block->gpsw,
sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_MCK_NEW_PSW, sizeof(psw_t));
break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
......@@ -334,18 +451,20 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
vcpu->stat.deliver_io_int++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
param0, param1);
rc = put_guest(vcpu, inti->io.subchannel_id,
(u16 __user *) __LC_SUBCHANNEL_ID);
rc |= put_guest(vcpu, inti->io.subchannel_nr,
(u16 __user *) __LC_SUBCHANNEL_NR);
rc |= put_guest(vcpu, inti->io.io_int_parm,
(u32 __user *) __LC_IO_INT_PARM);
rc |= put_guest(vcpu, inti->io.io_int_word,
(u32 __user *) __LC_IO_INT_WORD);
rc |= copy_to_guest(vcpu, __LC_IO_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_IO_NEW_PSW, sizeof(psw_t));
rc = put_guest_lc(vcpu, inti->io.subchannel_id,
(u16 *)__LC_SUBCHANNEL_ID);
rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
(u16 *)__LC_SUBCHANNEL_NR);
rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
(u32 *)__LC_IO_INT_PARM);
rc |= put_guest_lc(vcpu, inti->io.io_int_word,
(u32 *)__LC_IO_INT_WORD);
rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
&vcpu->arch.sie_block->gpsw,
sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
&vcpu->arch.sie_block->gpsw,
sizeof(psw_t));
break;
}
default:
......@@ -358,25 +477,21 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
}
}
static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
{
int rc;
if (psw_extint_disabled(vcpu))
return 0;
if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
return 0;
rc = put_guest(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
rc |= copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
__LC_EXT_NEW_PSW, sizeof(psw_t));
rc = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
&vcpu->arch.sie_block->gpsw,
sizeof(psw_t));
if (rc) {
printk("kvm: The guest lowcore is not mapped during interrupt "
"delivery, killing userspace\n");
do_exit(SIGKILL);
}
return 1;
}
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
......@@ -406,19 +521,20 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
spin_unlock(&fi->lock);
}
if ((!rc) && (vcpu->arch.sie_block->ckc <
get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) {
if ((!psw_extint_disabled(vcpu)) &&
(vcpu->arch.sie_block->gcr[0] & 0x800ul))
rc = 1;
}
if (!rc && kvm_cpu_has_pending_timer(vcpu))
rc = 1;
return rc;
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
return 0;
if (!(vcpu->arch.sie_block->ckc <
get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
return 0;
if (!ckc_interrupts_enabled(vcpu))
return 0;
return 1;
}
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
......@@ -441,8 +557,7 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP; /* disabled wait */
}
if (psw_extint_disabled(vcpu) ||
(!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) {
if (!ckc_interrupts_enabled(vcpu)) {
VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
goto no_timer;
}
......@@ -554,9 +669,8 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
} while (deliver);
}
if ((vcpu->arch.sie_block->ckc <
get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
__try_deliver_ckc_interrupt(vcpu);
if (kvm_cpu_has_pending_timer(vcpu))
deliver_ckc_interrupt(vcpu);
if (atomic_read(&fi->active)) {
do {
......@@ -660,6 +774,31 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
return 0;
}
int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
struct kvm_s390_pgm_info *pgm_info)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_interrupt_info *inti;
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
if (!inti)
return -ENOMEM;
VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
pgm_info->code);
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
pgm_info->code, 0, 1);
inti->type = KVM_S390_PROGRAM_INT;
memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm));
spin_lock_bh(&li->lock);
list_add(&inti->list, &li->list);
atomic_set(&li->active, 1);
BUG_ON(waitqueue_active(li->wq));
spin_unlock_bh(&li->lock);
return 0;
}
struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
u64 cr6, u64 schid)
{
......@@ -810,6 +949,12 @@ int kvm_s390_inject_vm(struct kvm *kvm,
return __inject_vm(kvm, inti);
}
void kvm_s390_reinject_io_int(struct kvm *kvm,
struct kvm_s390_interrupt_info *inti)
{
__inject_vm(kvm, inti);
}
int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt *s390int)
{
......@@ -900,7 +1045,7 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
return 0;
}
static void clear_floating_interrupts(struct kvm *kvm)
void kvm_s390_clear_float_irqs(struct kvm *kvm)
{
struct kvm_s390_float_interrupt *fi;
struct kvm_s390_interrupt_info *n, *inti = NULL;
......@@ -1246,7 +1391,7 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
break;
case KVM_DEV_FLIC_CLEAR_IRQS:
r = 0;
clear_floating_interrupts(dev->kvm);
kvm_s390_clear_float_irqs(dev->kvm);
break;
case KVM_DEV_FLIC_APF_ENABLE:
dev->kvm->arch.gmap->pfault_enabled = 1;
......
......@@ -11,6 +11,7 @@
* Christian Borntraeger <borntraeger@de.ibm.com>
* Heiko Carstens <heiko.carstens@de.ibm.com>
* Christian Ehrhardt <ehrhardt@de.ibm.com>
* Jason J. Herne <jjherne@us.ibm.com>
*/
#include <linux/compiler.h>
......@@ -51,6 +52,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
......@@ -66,6 +69,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
{ "instruction_stap", VCPU_STAT(instruction_stap) },
{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
{ "instruction_essa", VCPU_STAT(instruction_essa) },
......@@ -90,7 +94,7 @@ unsigned long *vfacilities;
static struct gmap_notifier gmap_notifier;
/* test availability of vfacility */
static inline int test_vfacility(unsigned long nr)
int test_vfacility(unsigned long nr)
{
return __test_facility(nr, (void *) vfacilities);
}
......@@ -161,6 +165,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_ENABLE_CAP_VM:
case KVM_CAP_VM_ATTRIBUTES:
r = 1;
break;
case KVM_CAP_NR_VCPUS:
......@@ -179,6 +184,25 @@ int kvm_dev_ioctl_check_extension(long ext)
return r;
}
static void kvm_s390_sync_dirty_log(struct kvm *kvm,
struct kvm_memory_slot *memslot)
{
gfn_t cur_gfn, last_gfn;
unsigned long address;
struct gmap *gmap = kvm->arch.gmap;
down_read(&gmap->mm->mmap_sem);
/* Loop over all guest pages */
last_gfn = memslot->base_gfn + memslot->npages;
for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
address = gfn_to_hva_memslot(memslot, cur_gfn);
if (gmap_test_and_clear_dirty(address, gmap))
mark_page_dirty(kvm, cur_gfn);
}
up_read(&gmap->mm->mmap_sem);
}
/* Section: vm related */
/*
* Get (and clear) the dirty memory log for a memory slot.
......@@ -186,7 +210,36 @@ int kvm_dev_ioctl_check_extension(long ext)
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log)
{
return 0;
int r;
unsigned long n;
struct kvm_memory_slot *memslot;
int is_dirty = 0;
mutex_lock(&kvm->slots_lock);
r = -EINVAL;
if (log->slot >= KVM_USER_MEM_SLOTS)
goto out;
memslot = id_to_memslot(kvm->memslots, log->slot);
r = -ENOENT;
if (!memslot->dirty_bitmap)
goto out;
kvm_s390_sync_dirty_log(kvm, memslot);
r = kvm_get_dirty_log(kvm, log, &is_dirty);
if (r)
goto out;
/* Clear the dirty log */
if (is_dirty) {
n = kvm_dirty_bitmap_bytes(memslot);
memset(memslot->dirty_bitmap, 0, n);
}
r = 0;
out:
mutex_unlock(&kvm->slots_lock);
return r;
}
static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
......@@ -208,11 +261,86 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
return r;
}
static int kvm_s390_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret;
unsigned int idx;
switch (attr->attr) {
case KVM_S390_VM_MEM_ENABLE_CMMA:
ret = -EBUSY;
mutex_lock(&kvm->lock);
if (atomic_read(&kvm->online_vcpus) == 0) {
kvm->arch.use_cmma = 1;
ret = 0;
}
mutex_unlock(&kvm->lock);
break;
case KVM_S390_VM_MEM_CLR_CMMA:
mutex_lock(&kvm->lock);
idx = srcu_read_lock(&kvm->srcu);
page_table_reset_pgste(kvm->arch.gmap->mm, 0, TASK_SIZE, false);
srcu_read_unlock(&kvm->srcu, idx);
mutex_unlock(&kvm->lock);
ret = 0;
break;
default:
ret = -ENXIO;
break;
}
return ret;
}
static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret;
switch (attr->group) {
case KVM_S390_VM_MEM_CTRL:
ret = kvm_s390_mem_control(kvm, attr);
break;
default:
ret = -ENXIO;
break;
}
return ret;
}
static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
{
return -ENXIO;
}
static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret;
switch (attr->group) {
case KVM_S390_VM_MEM_CTRL:
switch (attr->attr) {
case KVM_S390_VM_MEM_ENABLE_CMMA:
case KVM_S390_VM_MEM_CLR_CMMA:
ret = 0;
break;
default:
ret = -ENXIO;
break;
}
break;
default:
ret = -ENXIO;
break;
}
return ret;
}
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
struct kvm *kvm = filp->private_data;
void __user *argp = (void __user *)arg;
struct kvm_device_attr attr;
int r;
switch (ioctl) {
......@@ -245,6 +373,27 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
break;
}
case KVM_SET_DEVICE_ATTR: {
r = -EFAULT;
if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
break;
r = kvm_s390_vm_set_attr(kvm, &attr);
break;
}
case KVM_GET_DEVICE_ATTR: {
r = -EFAULT;
if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
break;
r = kvm_s390_vm_get_attr(kvm, &attr);
break;
}
case KVM_HAS_DEVICE_ATTR: {
r = -EFAULT;
if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
break;
r = kvm_s390_vm_has_attr(kvm, &attr);
break;
}
default:
r = -ENOTTY;
}
......@@ -291,6 +440,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
spin_lock_init(&kvm->arch.float_int.lock);
INIT_LIST_HEAD(&kvm->arch.float_int.list);
init_waitqueue_head(&kvm->arch.ipte_wq);
debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
VM_EVENT(kvm, 3, "%s", "vm created");
......@@ -321,6 +471,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
kvm_s390_clear_local_irqs(vcpu);
kvm_clear_async_pf_completion_queue(vcpu);
if (!kvm_is_ucontrol(vcpu->kvm)) {
clear_bit(63 - vcpu->vcpu_id,
......@@ -334,9 +485,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
if (kvm_is_ucontrol(vcpu->kvm))
gmap_free(vcpu->arch.gmap);
if (vcpu->arch.sie_block->cbrlo)
__free_page(__pfn_to_page(
vcpu->arch.sie_block->cbrlo >> PAGE_SHIFT));
if (kvm_s390_cmma_enabled(vcpu->kvm))
kvm_s390_vcpu_unsetup_cmma(vcpu);
free_page((unsigned long)(vcpu->arch.sie_block));
kvm_vcpu_uninit(vcpu);
......@@ -371,6 +521,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
if (!kvm_is_ucontrol(kvm))
gmap_free(kvm->arch.gmap);
kvm_s390_destroy_adapters(kvm);
kvm_s390_clear_float_irqs(kvm);
}
/* Section: vcpu related */
......@@ -450,9 +601,26 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
return 0;
}
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
{
free_page(vcpu->arch.sie_block->cbrlo);
vcpu->arch.sie_block->cbrlo = 0;
}
int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
{
vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
if (!vcpu->arch.sie_block->cbrlo)
return -ENOMEM;
vcpu->arch.sie_block->ecb2 |= 0x80;
vcpu->arch.sie_block->ecb2 &= ~0x08;
return 0;
}
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
struct page *cbrl;
int rc = 0;
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
CPUSTAT_SM |
......@@ -463,15 +631,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->ecb |= 0x10;
vcpu->arch.sie_block->ecb2 = 8;
vcpu->arch.sie_block->eca = 0xC1002001U;
vcpu->arch.sie_block->eca = 0xC1002000U;
if (sclp_has_siif())
vcpu->arch.sie_block->eca |= 1;
vcpu->arch.sie_block->fac = (int) (long) vfacilities;
if (kvm_enabled_cmma()) {
cbrl = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (cbrl) {
vcpu->arch.sie_block->ecb2 |= 0x80;
vcpu->arch.sie_block->ecb2 &= ~0x08;
vcpu->arch.sie_block->cbrlo = page_to_phys(cbrl);
}
vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
if (kvm_s390_cmma_enabled(vcpu->kvm)) {
rc = kvm_s390_vcpu_setup_cmma(vcpu);
if (rc)
return rc;
}
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
......@@ -479,7 +647,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
get_cpu_id(&vcpu->arch.cpu_id);
vcpu->arch.cpu_id.version = 0xff;
return 0;
return rc;
}
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
......@@ -768,10 +936,40 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
return -EINVAL; /* not implemented yet */
}
#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
KVM_GUESTDBG_USE_HW_BP | \
KVM_GUESTDBG_ENABLE)
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
return -EINVAL; /* not implemented yet */
int rc = 0;
vcpu->guest_debug = 0;
kvm_s390_clear_bp_data(vcpu);
if (vcpu->guest_debug & ~VALID_GUESTDBG_FLAGS)
return -EINVAL;
if (dbg->control & KVM_GUESTDBG_ENABLE) {
vcpu->guest_debug = dbg->control;
/* enforce guest PER */
atomic_set_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
rc = kvm_s390_import_bp_data(vcpu, dbg);
} else {
atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
vcpu->arch.guestdbg.last_bp = 0;
}
if (rc) {
vcpu->guest_debug = 0;
kvm_s390_clear_bp_data(vcpu);
atomic_clear_mask(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
}
return rc;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
......@@ -786,6 +984,18 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
return -EINVAL; /* not implemented yet */
}
bool kvm_s390_cmma_enabled(struct kvm *kvm)
{
if (!MACHINE_IS_LPAR)
return false;
/* only enable for z10 and later */
if (!MACHINE_HAS_EDAT1)
return false;
if (!kvm->arch.use_cmma)
return false;
return true;
}
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
{
/*
......@@ -882,8 +1092,9 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
if (!vcpu->arch.gmap->pfault_enabled)
return 0;
hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8))
hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
hva += current->thread.gmap_addr & ~PAGE_MASK;
if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
return 0;
rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
......@@ -916,6 +1127,11 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
if (rc)
return rc;
if (guestdbg_enabled(vcpu)) {
kvm_s390_backup_guest_per_regs(vcpu);
kvm_s390_patch_guest_per_regs(vcpu);
}
vcpu->arch.sie_block->icptcode = 0;
cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
......@@ -932,6 +1148,9 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
vcpu->arch.sie_block->icptcode);
trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
if (guestdbg_enabled(vcpu))
kvm_s390_restore_guest_per_regs(vcpu);
if (exit_reason >= 0) {
rc = 0;
} else if (kvm_is_ucontrol(vcpu->kvm)) {
......@@ -968,16 +1187,6 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
return rc;
}
bool kvm_enabled_cmma(void)
{
if (!MACHINE_IS_LPAR)
return false;
/* only enable for z10 and later */
if (!MACHINE_HAS_EDAT1)
return false;
return true;
}
static int __vcpu_run(struct kvm_vcpu *vcpu)
{
int rc, exit_reason;
......@@ -1007,7 +1216,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
rc = vcpu_post_run(vcpu, exit_reason);
} while (!signal_pending(current) && !rc);
} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
return rc;
......@@ -1018,6 +1227,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
int rc;
sigset_t sigsaved;
if (guestdbg_exit_pending(vcpu)) {
kvm_s390_prepare_debug_exit(vcpu);
return 0;
}
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
......@@ -1030,6 +1244,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
case KVM_EXIT_S390_RESET:
case KVM_EXIT_S390_UCONTROL:
case KVM_EXIT_S390_TSCH:
case KVM_EXIT_DEBUG:
break;
default:
BUG();
......@@ -1055,6 +1270,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
rc = -EINTR;
}
if (guestdbg_exit_pending(vcpu) && !rc) {
kvm_s390_prepare_debug_exit(vcpu);
rc = 0;
}
if (rc == -EOPNOTSUPP) {
/* intercept cannot be handled in-kernel, prepare kvm-run */
kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
......@@ -1082,83 +1302,50 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return rc;
}
static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
unsigned long n, int prefix)
{
if (prefix)
return copy_to_guest(vcpu, guestdest, from, n);
else
return copy_to_guest_absolute(vcpu, guestdest, from, n);
}
/*
* store status at address
* we use have two special cases:
* KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
* KVM_S390_STORE_STATUS_PREFIXED: -> prefix
*/
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr)
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
{
unsigned char archmode = 1;
int prefix;
u64 clkcomp;
int rc;
if (addr == KVM_S390_STORE_STATUS_NOADDR) {
if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
if (write_guest_abs(vcpu, 163, &archmode, 1))
return -EFAULT;
addr = SAVE_AREA_BASE;
prefix = 0;
} else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
if (copy_to_guest(vcpu, 163ul, &archmode, 1))
gpa = SAVE_AREA_BASE;
} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
if (write_guest_real(vcpu, 163, &archmode, 1))
return -EFAULT;
addr = SAVE_AREA_BASE;
prefix = 1;
} else
prefix = 0;
if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
vcpu->arch.guest_fpregs.fprs, 128, prefix))
return -EFAULT;
if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs),
vcpu->run->s.regs.gprs, 128, prefix))
return -EFAULT;
if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw),
&vcpu->arch.sie_block->gpsw, 16, prefix))
return -EFAULT;
if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg),
&vcpu->arch.sie_block->prefix, 4, prefix))
return -EFAULT;
if (__guestcopy(vcpu,
addr + offsetof(struct save_area, fp_ctrl_reg),
&vcpu->arch.guest_fpregs.fpc, 4, prefix))
return -EFAULT;
if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg),
&vcpu->arch.sie_block->todpr, 4, prefix))
return -EFAULT;
if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer),
&vcpu->arch.sie_block->cputm, 8, prefix))
return -EFAULT;
gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
}
rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
vcpu->arch.guest_fpregs.fprs, 128);
rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
vcpu->run->s.regs.gprs, 128);
rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
&vcpu->arch.sie_block->gpsw, 16);
rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
&vcpu->arch.sie_block->prefix, 4);
rc |= write_guest_abs(vcpu,
gpa + offsetof(struct save_area, fp_ctrl_reg),
&vcpu->arch.guest_fpregs.fpc, 4);
rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
&vcpu->arch.sie_block->todpr, 4);
rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
&vcpu->arch.sie_block->cputm, 8);
clkcomp = vcpu->arch.sie_block->ckc >> 8;
if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
&clkcomp, 8, prefix))
return -EFAULT;
if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
&vcpu->run->s.regs.acrs, 64, prefix))
return -EFAULT;
if (__guestcopy(vcpu,
addr + offsetof(struct save_area, ctrl_regs),
&vcpu->arch.sie_block->gcr, 128, prefix))
return -EFAULT;
return 0;
rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
&clkcomp, 8);
rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
&vcpu->run->s.regs.acrs, 64);
rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
&vcpu->arch.sie_block->gcr, 128);
return rc ? -EFAULT : 0;
}
int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
......
......@@ -28,7 +28,6 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
/* Transactional Memory Execution related macros */
#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10))
#define TDB_ADDR 0x1800UL
#define TDB_FORMAT1 1
#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
......@@ -130,6 +129,7 @@ void kvm_s390_tasklet(unsigned long parm);
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
void kvm_s390_clear_float_irqs(struct kvm *kvm);
int __must_check kvm_s390_inject_vm(struct kvm *kvm,
struct kvm_s390_interrupt *s390int);
int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
......@@ -137,6 +137,8 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
u64 cr6, u64 schid);
void kvm_s390_reinject_io_int(struct kvm *kvm,
struct kvm_s390_interrupt_info *inti);
int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
/* implemented in priv.c */
......@@ -145,6 +147,7 @@ int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu);
int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu);
int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
......@@ -158,14 +161,64 @@ void s390_vcpu_block(struct kvm_vcpu *vcpu);
void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
void exit_sie(struct kvm_vcpu *vcpu);
void exit_sie_sync(struct kvm_vcpu *vcpu);
/* are we going to support cmma? */
bool kvm_enabled_cmma(void);
int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu);
/* is cmma enabled */
bool kvm_s390_cmma_enabled(struct kvm *kvm);
int test_vfacility(unsigned long nr);
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
/* implemented in interrupt.c */
int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
struct kvm_s390_pgm_info *pgm_info);
/**
* kvm_s390_inject_prog_cond - conditionally inject a program check
* @vcpu: virtual cpu
* @rc: original return/error code
*
* This function is supposed to be used after regular guest access functions
* failed, to conditionally inject a program check to a vcpu. The typical
* pattern would look like
*
* rc = write_guest(vcpu, addr, data, len);
* if (rc)
* return kvm_s390_inject_prog_cond(vcpu, rc);
*
* A negative return code from guest access functions implies an internal error
* like e.g. out of memory. In these cases no program check should be injected
* to the guest.
* A positive value implies that an exception happened while accessing a guest's
* memory. In this case all data belonging to the corresponding program check
* has been stored in vcpu->arch.pgm and can be injected with
* kvm_s390_inject_prog_irq().
*
* Returns: - the original @rc value if @rc was negative (internal error)
* - zero if @rc was already zero
* - zero or error code from injecting if @rc was positive
* (program check injected to @vcpu)
*/
static inline int kvm_s390_inject_prog_cond(struct kvm_vcpu *vcpu, int rc)
{
if (rc <= 0)
return rc;
return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
}
/* implemented in interrupt.c */
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int psw_extint_disabled(struct kvm_vcpu *vcpu);
void kvm_s390_destroy_adapters(struct kvm *kvm);
/* implemented in guestdbg.c */
void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
void kvm_s390_restore_guest_per_regs(struct kvm_vcpu *vcpu);
void kvm_s390_patch_guest_per_regs(struct kvm_vcpu *vcpu);
int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg);
void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu);
void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
#endif
......@@ -35,8 +35,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu *cpup;
s64 hostclk, val;
int i, rc;
u64 op2;
int i;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
......@@ -44,8 +44,9 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
op2 = kvm_s390_get_base_disp_s(vcpu);
if (op2 & 7) /* Operand must be on a doubleword boundary */
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (get_guest(vcpu, val, (u64 __user *) op2))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
rc = read_guest(vcpu, op2, &val, sizeof(val));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
if (store_tod_clock(&hostclk)) {
kvm_s390_set_psw_cc(vcpu, 3);
......@@ -65,8 +66,8 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
static int handle_set_prefix(struct kvm_vcpu *vcpu)
{
u64 operand2;
u32 address = 0;
u8 tmp;
u32 address;
int rc;
vcpu->stat.instruction_spx++;
......@@ -80,14 +81,18 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
/* get the value */
if (get_guest(vcpu, address, (u32 __user *) operand2))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
rc = read_guest(vcpu, operand2, &address, sizeof(address));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
address = address & 0x7fffe000u;
address &= 0x7fffe000u;
/* make sure that the new value is valid memory */
if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
(copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1)))
/*
* Make sure the new value is valid memory. We only need to check the
* first page, since address is 8k aligned and memory pieces are always
* at least 1MB aligned and have at least a size of 1MB.
*/
if (kvm_is_error_gpa(vcpu->kvm, address))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
kvm_s390_set_prefix(vcpu, address);
......@@ -101,6 +106,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
{
u64 operand2;
u32 address;
int rc;
vcpu->stat.instruction_stpx++;
......@@ -117,8 +123,9 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
address = address & 0x7fffe000u;
/* get the value */
if (put_guest(vcpu, address, (u32 __user *)operand2))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
rc = write_guest(vcpu, operand2, &address, sizeof(address));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
trace_kvm_s390_handle_prefix(vcpu, 0, address);
......@@ -127,28 +134,44 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
{
u64 useraddr;
u16 vcpu_id = vcpu->vcpu_id;
u64 ga;
int rc;
vcpu->stat.instruction_stap++;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
useraddr = kvm_s390_get_base_disp_s(vcpu);
ga = kvm_s390_get_base_disp_s(vcpu);
if (useraddr & 1)
if (ga & 1)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (put_guest(vcpu, vcpu->vcpu_id, (u16 __user *)useraddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
rc = write_guest(vcpu, ga, &vcpu_id, sizeof(vcpu_id));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", useraddr);
trace_kvm_s390_handle_stap(vcpu, useraddr);
VCPU_EVENT(vcpu, 5, "storing cpu address to %llx", ga);
trace_kvm_s390_handle_stap(vcpu, ga);
return 0;
}
static void __skey_check_enable(struct kvm_vcpu *vcpu)
{
if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)))
return;
s390_enable_skey();
trace_kvm_s390_skey_related_inst(vcpu);
vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
}
static int handle_skey(struct kvm_vcpu *vcpu)
{
__skey_check_enable(vcpu);
vcpu->stat.instruction_storage_key++;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
......@@ -160,9 +183,21 @@ static int handle_skey(struct kvm_vcpu *vcpu)
return 0;
}
static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
vcpu->stat.instruction_ipte_interlock++;
if (psw_bits(*psw).p)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
psw->addr = __rewind_psw(*psw, 4);
VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
return 0;
}
static int handle_test_block(struct kvm_vcpu *vcpu)
{
unsigned long hva;
gpa_t addr;
int reg2;
......@@ -173,14 +208,13 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
addr = kvm_s390_real_to_abs(vcpu, addr);
hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
if (kvm_is_error_hva(hva))
if (kvm_is_error_gpa(vcpu->kvm, addr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
/*
* We don't expect errors on modern systems, and do not care
* about storage keys (yet), so let's just clear the page.
*/
if (clear_user((void __user *)hva, PAGE_SIZE) != 0)
if (kvm_clear_guest(vcpu->kvm, addr, PAGE_SIZE))
return -EFAULT;
kvm_s390_set_psw_cc(vcpu, 0);
vcpu->run->s.regs.gprs[0] = 0;
......@@ -190,9 +224,12 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
static int handle_tpi(struct kvm_vcpu *vcpu)
{
struct kvm_s390_interrupt_info *inti;
unsigned long len;
u32 tpi_data[3];
int cc, rc;
u64 addr;
int cc;
rc = 0;
addr = kvm_s390_get_base_disp_s(vcpu);
if (addr & 3)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
......@@ -201,30 +238,41 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
if (!inti)
goto no_interrupt;
cc = 1;
tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr;
tpi_data[1] = inti->io.io_int_parm;
tpi_data[2] = inti->io.io_int_word;
if (addr) {
/*
* Store the two-word I/O interruption code into the
* provided area.
*/
if (put_guest(vcpu, inti->io.subchannel_id, (u16 __user *)addr)
|| put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *)(addr + 2))
|| put_guest(vcpu, inti->io.io_int_parm, (u32 __user *)(addr + 4)))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
len = sizeof(tpi_data) - 4;
rc = write_guest(vcpu, addr, &tpi_data, len);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
} else {
/*
* Store the three-word I/O interruption code into
* the appropriate lowcore area.
*/
put_guest(vcpu, inti->io.subchannel_id, (u16 __user *) __LC_SUBCHANNEL_ID);
put_guest(vcpu, inti->io.subchannel_nr, (u16 __user *) __LC_SUBCHANNEL_NR);
put_guest(vcpu, inti->io.io_int_parm, (u32 __user *) __LC_IO_INT_PARM);
put_guest(vcpu, inti->io.io_int_word, (u32 __user *) __LC_IO_INT_WORD);
len = sizeof(tpi_data);
if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len))
rc = -EFAULT;
}
kfree(inti);
/*
* If we encounter a problem storing the interruption code, the
* instruction is suppressed from the guest's view: reinject the
* interrupt.
*/
if (!rc)
kfree(inti);
else
kvm_s390_reinject_io_int(vcpu->kvm, inti);
no_interrupt:
/* Set condition code and we're done. */
kvm_s390_set_psw_cc(vcpu, cc);
return 0;
if (!rc)
kvm_s390_set_psw_cc(vcpu, cc);
return rc ? -EFAULT : 0;
}
static int handle_tsch(struct kvm_vcpu *vcpu)
......@@ -292,10 +340,10 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
vfacilities, 4);
rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list),
vfacilities, 4);
if (rc)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
return rc;
VCPU_EVENT(vcpu, 5, "store facility list value %x",
*(unsigned int *) vfacilities);
trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities);
......@@ -333,6 +381,7 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
psw_t *gpsw = &vcpu->arch.sie_block->gpsw;
psw_compat_t new_psw;
u64 addr;
int rc;
if (gpsw->mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
......@@ -340,8 +389,10 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
addr = kvm_s390_get_base_disp_s(vcpu);
if (addr & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw)))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
if (!(new_psw.mask & PSW32_MASK_BASE))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
gpsw->mask = (new_psw.mask & ~PSW32_MASK_BASE) << 32;
......@@ -357,6 +408,7 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
{
psw_t new_psw;
u64 addr;
int rc;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
......@@ -364,8 +416,9 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
addr = kvm_s390_get_base_disp_s(vcpu);
if (addr & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (copy_from_guest(vcpu, &new_psw, addr, sizeof(new_psw)))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
vcpu->arch.sie_block->gpsw = new_psw;
if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
......@@ -375,7 +428,9 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
static int handle_stidp(struct kvm_vcpu *vcpu)
{
u64 stidp_data = vcpu->arch.stidp_data;
u64 operand2;
int rc;
vcpu->stat.instruction_stidp++;
......@@ -387,8 +442,9 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
if (operand2 & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
if (put_guest(vcpu, vcpu->arch.stidp_data, (u64 __user *)operand2))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
rc = write_guest(vcpu, operand2, &stidp_data, sizeof(stidp_data));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
return 0;
......@@ -474,9 +530,10 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
break;
}
if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
goto out_exception;
rc = write_guest(vcpu, operand2, (void *)mem, PAGE_SIZE);
if (rc) {
rc = kvm_s390_inject_prog_cond(vcpu, rc);
goto out;
}
trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
free_page(mem);
......@@ -485,7 +542,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
return 0;
out_no_data:
kvm_s390_set_psw_cc(vcpu, 3);
out_exception:
out:
free_page(mem);
return rc;
}
......@@ -496,6 +553,7 @@ static const intercept_handler_t b2_handlers[256] = {
[0x10] = handle_set_prefix,
[0x11] = handle_store_prefix,
[0x12] = handle_store_cpu_address,
[0x21] = handle_ipte_interlock,
[0x29] = handle_skey,
[0x2a] = handle_skey,
[0x2b] = handle_skey,
......@@ -513,6 +571,7 @@ static const intercept_handler_t b2_handlers[256] = {
[0x3a] = handle_io_inst,
[0x3b] = handle_io_inst,
[0x3c] = handle_io_inst,
[0x50] = handle_ipte_interlock,
[0x5f] = handle_io_inst,
[0x74] = handle_io_inst,
[0x76] = handle_io_inst,
......@@ -618,6 +677,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
}
if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
__skey_check_enable(vcpu);
if (set_guest_storage_key(current->mm, useraddr,
vcpu->run->s.regs.gprs[reg1] & PFMF_KEY,
vcpu->run->s.regs.gprs[reg1] & PFMF_NQ))
......@@ -642,7 +702,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries);
gmap = vcpu->arch.gmap;
vcpu->stat.instruction_essa++;
if (!kvm_enabled_cmma() || !vcpu->arch.sie_block->cbrlo)
if (!kvm_s390_cmma_enabled(vcpu->kvm))
return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
......@@ -672,7 +732,10 @@ static int handle_essa(struct kvm_vcpu *vcpu)
}
static const intercept_handler_t b9_handlers[256] = {
[0x8a] = handle_ipte_interlock,
[0x8d] = handle_epsw,
[0x8e] = handle_ipte_interlock,
[0x8f] = handle_ipte_interlock,
[0xab] = handle_essa,
[0xaf] = handle_pfmf,
};
......@@ -693,32 +756,67 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
{
int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
u64 useraddr;
u32 val = 0;
int reg, rc;
u64 ga;
vcpu->stat.instruction_lctl++;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
useraddr = kvm_s390_get_base_disp_rs(vcpu);
ga = kvm_s390_get_base_disp_rs(vcpu);
if (useraddr & 3)
if (ga & 3)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3,
useraddr);
trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, useraddr);
VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
reg = reg1;
do {
rc = get_guest(vcpu, val, (u32 __user *) useraddr);
rc = read_guest(vcpu, ga, &val, sizeof(val));
if (rc)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
return kvm_s390_inject_prog_cond(vcpu, rc);
vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
vcpu->arch.sie_block->gcr[reg] |= val;
useraddr += 4;
ga += 4;
if (reg == reg3)
break;
reg = (reg + 1) % 16;
} while (1);
return 0;
}
int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
{
int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
u64 ga;
u32 val;
int reg, rc;
vcpu->stat.instruction_stctl++;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
ga = kvm_s390_get_base_disp_rs(vcpu);
if (ga & 3)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
VCPU_EVENT(vcpu, 5, "stctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
reg = reg1;
do {
val = vcpu->arch.sie_block->gcr[reg] & 0x00000000fffffffful;
rc = write_guest(vcpu, ga, &val, sizeof(val));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
ga += 4;
if (reg == reg3)
break;
reg = (reg + 1) % 16;
......@@ -731,7 +829,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
{
int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
u64 useraddr;
u64 ga, val;
int reg, rc;
vcpu->stat.instruction_lctlg++;
......@@ -739,23 +837,58 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
useraddr = kvm_s390_get_base_disp_rsy(vcpu);
ga = kvm_s390_get_base_disp_rsy(vcpu);
if (useraddr & 7)
if (ga & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
reg = reg1;
VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3,
useraddr);
trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, useraddr);
VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
do {
rc = get_guest(vcpu, vcpu->arch.sie_block->gcr[reg],
(u64 __user *) useraddr);
rc = read_guest(vcpu, ga, &val, sizeof(val));
if (rc)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
useraddr += 8;
return kvm_s390_inject_prog_cond(vcpu, rc);
vcpu->arch.sie_block->gcr[reg] = val;
ga += 8;
if (reg == reg3)
break;
reg = (reg + 1) % 16;
} while (1);
return 0;
}
static int handle_stctg(struct kvm_vcpu *vcpu)
{
int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
u64 ga, val;
int reg, rc;
vcpu->stat.instruction_stctg++;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
ga = kvm_s390_get_base_disp_rsy(vcpu);
if (ga & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
reg = reg1;
VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
do {
val = vcpu->arch.sie_block->gcr[reg];
rc = write_guest(vcpu, ga, &val, sizeof(val));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
ga += 8;
if (reg == reg3)
break;
reg = (reg + 1) % 16;
......@@ -766,6 +899,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
static const intercept_handler_t eb_handlers[256] = {
[0x2f] = handle_lctlg,
[0x25] = handle_stctg,
};
int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
......
......@@ -235,7 +235,6 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
struct kvm_vcpu *dst_vcpu = NULL;
struct kvm_s390_interrupt_info *inti;
int rc;
u8 tmp;
if (cpu_addr < KVM_MAX_VCPUS)
dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
......@@ -243,10 +242,13 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
return SIGP_CC_NOT_OPERATIONAL;
li = &dst_vcpu->arch.local_int;
/* make sure that the new value is valid memory */
address = address & 0x7fffe000u;
if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1)) {
/*
* Make sure the new value is valid memory. We only need to check the
* first page, since address is 8k aligned and memory pieces are always
* at least 1MB aligned and have at least a size of 1MB.
*/
address &= 0x7fffe000u;
if (kvm_is_error_gpa(vcpu->kvm, address)) {
*reg &= 0xffffffff00000000UL;
*reg |= SIGP_STATUS_INVALID_PARAMETER;
return SIGP_CC_STATUS_STORED;
......
......@@ -30,6 +30,20 @@
TP_printk("%02d[%016lx-%016lx]: " p_str, __entry->id, \
__entry->pswmask, __entry->pswaddr, p_args)
TRACE_EVENT(kvm_s390_skey_related_inst,
TP_PROTO(VCPU_PROTO_COMMON),
TP_ARGS(VCPU_ARGS_COMMON),
TP_STRUCT__entry(
VCPU_FIELD_COMMON
),
TP_fast_assign(
VCPU_ASSIGN_COMMON
),
VCPU_TP_PRINTK("%s", "first instruction related to skeys on vcpu")
);
TRACE_EVENT(kvm_s390_major_guest_pfault,
TP_PROTO(VCPU_PROTO_COMMON),
TP_ARGS(VCPU_ARGS_COMMON),
......@@ -301,6 +315,31 @@ TRACE_EVENT(kvm_s390_handle_lctl,
__entry->reg1, __entry->reg3, __entry->addr)
);
TRACE_EVENT(kvm_s390_handle_stctl,
TP_PROTO(VCPU_PROTO_COMMON, int g, int reg1, int reg3, u64 addr),
TP_ARGS(VCPU_ARGS_COMMON, g, reg1, reg3, addr),
TP_STRUCT__entry(
VCPU_FIELD_COMMON
__field(int, g)
__field(int, reg1)
__field(int, reg3)
__field(u64, addr)
),
TP_fast_assign(
VCPU_ASSIGN_COMMON
__entry->g = g;
__entry->reg1 = reg1;
__entry->reg3 = reg3;
__entry->addr = addr;
),
VCPU_TP_PRINTK("%s: storing cr %x-%x to %016llx",
__entry->g ? "stctg" : "stctl",
__entry->reg1, __entry->reg3, __entry->addr)
);
TRACE_EVENT(kvm_s390_handle_prefix,
TP_PROTO(VCPU_PROTO_COMMON, int set, u32 address),
TP_ARGS(VCPU_ARGS_COMMON, set, address),
......
......@@ -832,6 +832,7 @@ void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte)
}
spin_unlock(&gmap_notifier_lock);
}
EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
static inline int page_table_with_pgste(struct page *page)
{
......@@ -864,8 +865,7 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
atomic_set(&page->_mapcount, 0);
table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT,
PAGE_SIZE/2);
clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
return table;
}
......@@ -883,8 +883,8 @@ static inline void page_table_free_pgste(unsigned long *table)
__free_page(page);
}
static inline unsigned long page_table_reset_pte(struct mm_struct *mm,
pmd_t *pmd, unsigned long addr, unsigned long end)
static inline unsigned long page_table_reset_pte(struct mm_struct *mm, pmd_t *pmd,
unsigned long addr, unsigned long end, bool init_skey)
{
pte_t *start_pte, *pte;
spinlock_t *ptl;
......@@ -895,6 +895,22 @@ static inline unsigned long page_table_reset_pte(struct mm_struct *mm,
do {
pgste = pgste_get_lock(pte);
pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
if (init_skey) {
unsigned long address;
pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
PGSTE_GR_BIT | PGSTE_GC_BIT);
/* skip invalid and not writable pages */
if (pte_val(*pte) & _PAGE_INVALID ||
!(pte_val(*pte) & _PAGE_WRITE)) {
pgste_set_unlock(pte, pgste);
continue;
}
address = pte_val(*pte) & PAGE_MASK;
page_set_storage_key(address, PAGE_DEFAULT_KEY, 1);
}
pgste_set_unlock(pte, pgste);
} while (pte++, addr += PAGE_SIZE, addr != end);
pte_unmap_unlock(start_pte, ptl);
......@@ -902,8 +918,8 @@ static inline unsigned long page_table_reset_pte(struct mm_struct *mm,
return addr;
}
static inline unsigned long page_table_reset_pmd(struct mm_struct *mm,
pud_t *pud, unsigned long addr, unsigned long end)
static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, pud_t *pud,
unsigned long addr, unsigned long end, bool init_skey)
{
unsigned long next;
pmd_t *pmd;
......@@ -913,14 +929,14 @@ static inline unsigned long page_table_reset_pmd(struct mm_struct *mm,
next = pmd_addr_end(addr, end);
if (pmd_none_or_clear_bad(pmd))
continue;
next = page_table_reset_pte(mm, pmd, addr, next);
next = page_table_reset_pte(mm, pmd, addr, next, init_skey);
} while (pmd++, addr = next, addr != end);
return addr;
}
static inline unsigned long page_table_reset_pud(struct mm_struct *mm,
pgd_t *pgd, unsigned long addr, unsigned long end)
static inline unsigned long page_table_reset_pud(struct mm_struct *mm, pgd_t *pgd,
unsigned long addr, unsigned long end, bool init_skey)
{
unsigned long next;
pud_t *pud;
......@@ -930,14 +946,14 @@ static inline unsigned long page_table_reset_pud(struct mm_struct *mm,
next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud))
continue;
next = page_table_reset_pmd(mm, pud, addr, next);
next = page_table_reset_pmd(mm, pud, addr, next, init_skey);
} while (pud++, addr = next, addr != end);
return addr;
}
void page_table_reset_pgste(struct mm_struct *mm,
unsigned long start, unsigned long end)
void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
unsigned long end, bool init_skey)
{
unsigned long addr, next;
pgd_t *pgd;
......@@ -949,7 +965,7 @@ void page_table_reset_pgste(struct mm_struct *mm,
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
continue;
next = page_table_reset_pud(mm, pgd, addr, next);
next = page_table_reset_pud(mm, pgd, addr, next, init_skey);
} while (pgd++, addr = next, addr != end);
up_read(&mm->mmap_sem);
}
......@@ -989,7 +1005,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
/* changing the guest storage key is considered a change of the page */
if ((pgste_val(new) ^ pgste_val(old)) &
(PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
pgste_val(new) |= PGSTE_HC_BIT;
pgste_val(new) |= PGSTE_UC_BIT;
pgste_set_unlock(ptep, new);
pte_unmap_unlock(*ptep, ptl);
......@@ -1011,6 +1027,11 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
return NULL;
}
void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
unsigned long end, bool init_skey)
{
}
static inline void page_table_free_pgste(unsigned long *table)
{
}
......@@ -1357,6 +1378,50 @@ int s390_enable_sie(void)
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
/*
* Enable storage key handling from now on and initialize the storage
* keys with the default key.
*/
void s390_enable_skey(void)
{
/*
* To avoid races between multiple vcpus, ending in calling
* page_table_reset twice or more,
* the page_table_lock is taken for serialization.
*/
spin_lock(&current->mm->page_table_lock);
if (mm_use_skey(current->mm)) {
spin_unlock(&current->mm->page_table_lock);
return;
}
current->mm->context.use_skey = 1;
spin_unlock(&current->mm->page_table_lock);
page_table_reset_pgste(current->mm, 0, TASK_SIZE, true);
}
EXPORT_SYMBOL_GPL(s390_enable_skey);
/*
* Test and reset if a guest page is dirty
*/
bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap)
{
pte_t *pte;
spinlock_t *ptl;
bool dirty = false;
pte = get_locked_pte(gmap->mm, address, &ptl);
if (unlikely(!pte))
return false;
if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte))
dirty = true;
spin_unlock(ptl);
return dirty;
}
EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
......
......@@ -22,7 +22,8 @@ struct read_info_sccb {
u8 rnsize; /* 10 */
u8 _reserved0[16 - 11]; /* 11-15 */
u16 ncpurl; /* 16-17 */
u8 _reserved7[24 - 18]; /* 18-23 */
u16 cpuoff; /* 18-19 */
u8 _reserved7[24 - 20]; /* 20-23 */
u8 loadparm[8]; /* 24-31 */
u8 _reserved1[48 - 32]; /* 32-47 */
u64 facilities; /* 48-55 */
......@@ -45,6 +46,7 @@ static unsigned int sclp_con_has_linemode __initdata;
static unsigned long sclp_hsa_size;
static unsigned int sclp_max_cpu;
static struct sclp_ipl_info sclp_ipl_info;
static unsigned char sclp_siif;
u64 sclp_facilities;
u8 sclp_fac84;
......@@ -96,6 +98,9 @@ static int __init sclp_read_info_early(struct read_info_sccb *sccb)
static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
{
struct sclp_cpu_entry *cpue;
u16 boot_cpu_address, cpu;
if (sclp_read_info_early(sccb))
return;
......@@ -116,6 +121,15 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
sclp_max_cpu = sccb->hcpua + 1;
}
boot_cpu_address = stap();
cpue = (void *)sccb + sccb->cpuoff;
for (cpu = 0; cpu < sccb->ncpurl; cpue++, cpu++) {
if (boot_cpu_address != cpue->address)
continue;
sclp_siif = cpue->siif;
break;
}
/* Save IPL information */
sclp_ipl_info.is_valid = 1;
if (sccb->flags & 0x2)
......@@ -148,6 +162,12 @@ unsigned int sclp_get_max_cpu(void)
return sclp_max_cpu;
}
int sclp_has_siif(void)
{
return sclp_siif;
}
EXPORT_SYMBOL(sclp_has_siif);
/*
* This function will be called after sclp_facilities_detect(), which gets
* called from early.c code. The sclp_facilities_detect() function retrieves
......
......@@ -880,6 +880,13 @@ static inline hpa_t pfn_to_hpa(pfn_t pfn)
return (hpa_t)pfn << PAGE_SHIFT;
}
static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
{
unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
return kvm_is_error_hva(hva);
}
static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
{
set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
......
......@@ -745,6 +745,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_ENABLE_CAP_VM 98
#define KVM_CAP_S390_IRQCHIP 99
#define KVM_CAP_IOEVENTFD_NO_LENGTH 100
#define KVM_CAP_VM_ATTRIBUTES 101
#ifdef KVM_CAP_IRQ_ROUTING
......
......@@ -637,14 +637,12 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
*/
static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
{
#ifndef CONFIG_S390
unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes);
if (!memslot->dirty_bitmap)
return -ENOMEM;
#endif /* !CONFIG_S390 */
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment