Commit 72f293de authored by Shuo Liu's avatar Shuo Liu Committed by Greg Kroah-Hartman

virt: acrn: Introduce I/O request management

An I/O request of a User VM, which is constructed by the hypervisor, is
distributed by the ACRN Hypervisor Service Module to an I/O client
corresponding to the address range of the I/O request.

For each User VM, there is a shared 4-KByte memory region used for I/O
requests communication between the hypervisor and Service VM. An I/O
request is a 256-byte structure buffer, which is 'struct
acrn_io_request', that is filled by an I/O handler of the hypervisor
when a trapped I/O access happens in a User VM. ACRN userspace in the
Service VM first allocates a 4-KByte page and passes the GPA (Guest
Physical Address) of the buffer to the hypervisor. The buffer is used as
an array of 16 I/O request slots with each I/O request slot being 256
bytes. This array is indexed by vCPU ID.

An I/O client, which is 'struct acrn_ioreq_client', is responsible for
handling User VM I/O requests whose accessed GPA falls in a certain
range. Multiple I/O clients can be associated with each User VM. There
is a special client associated with each User VM, called the default
client, that handles all I/O requests that do not fit into the range of
any other I/O clients. The ACRN userspace acts as the default client for
each User VM.

The state transitions of a ACRN I/O request are as follows.

   FREE -> PENDING -> PROCESSING -> COMPLETE -> FREE -> ...

FREE: this I/O request slot is empty
PENDING: a valid I/O request is pending in this slot
PROCESSING: the I/O request is being processed
COMPLETE: the I/O request has been processed

An I/O request in COMPLETE or FREE state is owned by the hypervisor. HSM
and ACRN userspace are in charge of processing the others.

The processing flow of I/O requests are listed as following:

a) The I/O handler of the hypervisor will fill an I/O request with
   PENDING state when a trapped I/O access happens in a User VM.
b) The hypervisor makes an upcall, which is a notification interrupt, to
   the Service VM.
c) The upcall handler schedules a worker to dispatch I/O requests.
d) The worker looks for the PENDING I/O requests, assigns them to
   different registered clients based on the address of the I/O accesses,
   updates their state to PROCESSING, and notifies the corresponding
   client to handle.
e) The notified client handles the assigned I/O requests.
f) The HSM updates I/O requests states to COMPLETE and notifies the
   hypervisor of the completion via hypercalls.

Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Yu Wang <yu1.wang@intel.com>
Cc: Reinette Chatre <reinette.chatre@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: default avatarZhi Wang <zhi.a.wang@intel.com>
Reviewed-by: default avatarReinette Chatre <reinette.chatre@intel.com>
Acked-by: default avatarDavidlohr Bueso <dbueso@suse.de>
Signed-off-by: default avatarShuo Liu <shuo.a.liu@intel.com>
Link: https://lore.kernel.org/r/20210207031040.49576-10-shuo.a.liu@intel.comSigned-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 88f537d5
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_ACRN_HSM) := acrn.o
acrn-y := hsm.o vm.o mm.o
acrn-y := hsm.o vm.o mm.o ioreq.o
......@@ -12,10 +12,15 @@
extern struct miscdevice acrn_dev;
#define ACRN_NAME_LEN 16
#define ACRN_MEM_MAPPING_MAX 256
#define ACRN_MEM_REGION_ADD 0
#define ACRN_MEM_REGION_DEL 2
struct acrn_vm;
struct acrn_ioreq_client;
/**
* struct vm_memory_region_op - Hypervisor memory operation
* @type: Operation type (ACRN_MEM_REGION_*)
......@@ -75,9 +80,63 @@ struct vm_memory_mapping {
size_t size;
};
/**
* struct acrn_ioreq_buffer - Data for setting the ioreq buffer of User VM
* @ioreq_buf: The GPA of the IO request shared buffer of a VM
*
* The parameter for the HC_SET_IOREQ_BUFFER hypercall used to set up
* the shared I/O request buffer between Service VM and ACRN hypervisor.
*/
struct acrn_ioreq_buffer {
u64 ioreq_buf;
};
struct acrn_ioreq_range {
struct list_head list;
u32 type;
u64 start;
u64 end;
};
#define ACRN_IOREQ_CLIENT_DESTROYING 0U
typedef int (*ioreq_handler_t)(struct acrn_ioreq_client *client,
struct acrn_io_request *req);
/**
* struct acrn_ioreq_client - Structure of I/O client.
* @name: Client name
* @vm: The VM that the client belongs to
* @list: List node for this acrn_ioreq_client
* @is_default: If this client is the default one
* @flags: Flags (ACRN_IOREQ_CLIENT_*)
* @range_list: I/O ranges
* @range_lock: Lock to protect range_list
* @ioreqs_map: The pending I/O requests bitmap.
* @handler: I/O requests handler of this client
* @thread: The thread which executes the handler
* @wq: The wait queue for the handler thread parking
* @priv: Data for the thread
*/
struct acrn_ioreq_client {
char name[ACRN_NAME_LEN];
struct acrn_vm *vm;
struct list_head list;
bool is_default;
unsigned long flags;
struct list_head range_list;
rwlock_t range_lock;
DECLARE_BITMAP(ioreqs_map, ACRN_IO_REQUEST_MAX);
ioreq_handler_t handler;
struct task_struct *thread;
wait_queue_head_t wq;
void *priv;
};
#define ACRN_INVALID_VMID (0xffffU)
#define ACRN_VM_FLAG_DESTROYED 0U
#define ACRN_VM_FLAG_CLEARING_IOREQ 1U
extern struct list_head acrn_vm_list;
extern rwlock_t acrn_vm_list_lock;
/**
* struct acrn_vm - Properties of ACRN User VM.
* @list: Entry within global list of all VMs.
......@@ -90,6 +149,11 @@ struct vm_memory_mapping {
* &acrn_vm.regions_mapping_count.
* @regions_mapping: Memory mappings of this VM.
* @regions_mapping_count: Number of memory mapping of this VM.
* @ioreq_clients_lock: Lock to protect ioreq_clients and default_client
* @ioreq_clients: The I/O request clients list of this VM
* @default_client: The default I/O request client
* @ioreq_buf: I/O request shared buffer
* @ioreq_page: The page of the I/O request shared buffer
*/
struct acrn_vm {
struct list_head list;
......@@ -99,6 +163,11 @@ struct acrn_vm {
struct mutex regions_mapping_lock;
struct vm_memory_mapping regions_mapping[ACRN_MEM_MAPPING_MAX];
int regions_mapping_count;
spinlock_t ioreq_clients_lock;
struct list_head ioreq_clients;
struct acrn_ioreq_client *default_client;
struct acrn_io_request_buffer *ioreq_buf;
struct page *ioreq_page;
};
struct acrn_vm *acrn_vm_create(struct acrn_vm *vm,
......@@ -112,4 +181,17 @@ int acrn_vm_memseg_unmap(struct acrn_vm *vm, struct acrn_vm_memmap *memmap);
int acrn_vm_ram_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap);
void acrn_vm_all_ram_unmap(struct acrn_vm *vm);
int acrn_ioreq_init(struct acrn_vm *vm, u64 buf_vma);
void acrn_ioreq_deinit(struct acrn_vm *vm);
int acrn_ioreq_intr_setup(void);
void acrn_ioreq_intr_remove(void);
void acrn_ioreq_request_clear(struct acrn_vm *vm);
int acrn_ioreq_client_wait(struct acrn_ioreq_client *client);
int acrn_ioreq_request_default_complete(struct acrn_vm *vm, u16 vcpu);
struct acrn_ioreq_client *acrn_ioreq_client_create(struct acrn_vm *vm,
ioreq_handler_t handler,
void *data, bool is_default,
const char *name);
void acrn_ioreq_client_destroy(struct acrn_ioreq_client *client);
#endif /* __ACRN_HSM_DRV_H */
......@@ -48,6 +48,7 @@ static long acrn_dev_ioctl(struct file *filp, unsigned int cmd,
struct acrn_vm *vm = filp->private_data;
struct acrn_vm_creation *vm_param;
struct acrn_vcpu_regs *cpu_regs;
struct acrn_ioreq_notify notify;
struct acrn_vm_memmap memmap;
int i, ret = 0;
......@@ -147,6 +148,35 @@ static long acrn_dev_ioctl(struct file *filp, unsigned int cmd,
ret = acrn_vm_memseg_unmap(vm, &memmap);
break;
case ACRN_IOCTL_CREATE_IOREQ_CLIENT:
if (vm->default_client)
return -EEXIST;
if (!acrn_ioreq_client_create(vm, NULL, NULL, true, "acrndm"))
ret = -EINVAL;
break;
case ACRN_IOCTL_DESTROY_IOREQ_CLIENT:
if (vm->default_client)
acrn_ioreq_client_destroy(vm->default_client);
break;
case ACRN_IOCTL_ATTACH_IOREQ_CLIENT:
if (vm->default_client)
ret = acrn_ioreq_client_wait(vm->default_client);
else
ret = -ENODEV;
break;
case ACRN_IOCTL_NOTIFY_REQUEST_FINISH:
if (copy_from_user(&notify, (void __user *)ioctl_param,
sizeof(struct acrn_ioreq_notify)))
return -EFAULT;
if (notify.reserved != 0)
return -EINVAL;
ret = acrn_ioreq_request_default_complete(vm, notify.vcpu);
break;
case ACRN_IOCTL_CLEAR_VM_IOREQ:
acrn_ioreq_request_clear(vm);
break;
default:
dev_dbg(acrn_dev.this_device, "Unknown IOCTL 0x%x!\n", cmd);
ret = -ENOTTY;
......@@ -188,14 +218,23 @@ static int __init hsm_init(void)
return -EPERM;
ret = misc_register(&acrn_dev);
if (ret)
if (ret) {
pr_err("Create misc dev failed!\n");
return ret;
}
return ret;
ret = acrn_ioreq_intr_setup();
if (ret) {
pr_err("Setup I/O request handler failed!\n");
misc_deregister(&acrn_dev);
return ret;
}
return 0;
}
static void __exit hsm_exit(void)
{
acrn_ioreq_intr_remove();
misc_deregister(&acrn_dev);
}
module_init(hsm_init);
......
......@@ -21,6 +21,10 @@
#define HC_RESET_VM _HC_ID(HC_ID, HC_ID_VM_BASE + 0x05)
#define HC_SET_VCPU_REGS _HC_ID(HC_ID, HC_ID_VM_BASE + 0x06)
#define HC_ID_IOREQ_BASE 0x30UL
#define HC_SET_IOREQ_BUFFER _HC_ID(HC_ID, HC_ID_IOREQ_BASE + 0x00)
#define HC_NOTIFY_REQUEST_FINISH _HC_ID(HC_ID, HC_ID_IOREQ_BASE + 0x01)
#define HC_ID_MEM_BASE 0x40UL
#define HC_VM_SET_MEMORY_REGIONS _HC_ID(HC_ID, HC_ID_MEM_BASE + 0x02)
......@@ -91,6 +95,30 @@ static inline long hcall_set_vcpu_regs(u64 vmid, u64 regs_state)
return acrn_hypercall2(HC_SET_VCPU_REGS, vmid, regs_state);
}
/**
* hcall_set_ioreq_buffer() - Set up the shared buffer for I/O Requests.
* @vmid: User VM ID
* @buffer: Service VM GPA of the shared buffer
*
* Return: 0 on success, <0 on failure
*/
static inline long hcall_set_ioreq_buffer(u64 vmid, u64 buffer)
{
return acrn_hypercall2(HC_SET_IOREQ_BUFFER, vmid, buffer);
}
/**
* hcall_notify_req_finish() - Notify ACRN Hypervisor of I/O request completion.
* @vmid: User VM ID
* @vcpu: The vCPU which initiated the I/O request
*
* Return: 0 on success, <0 on failure
*/
static inline long hcall_notify_req_finish(u64 vmid, u64 vcpu)
{
return acrn_hypercall2(HC_NOTIFY_REQUEST_FINISH, vmid, vcpu);
}
/**
* hcall_set_memory_regions() - Inform the hypervisor to set up EPT mappings
* @regions_pa: Service VM GPA of &struct vm_memory_region_batch
......
This diff is collapsed.
......@@ -15,9 +15,12 @@
#include "acrn_drv.h"
/* List of VMs */
static LIST_HEAD(acrn_vm_list);
/* To protect acrn_vm_list */
static DEFINE_MUTEX(acrn_vm_list_lock);
LIST_HEAD(acrn_vm_list);
/*
* acrn_vm_list is read in a worker thread which dispatch I/O requests and
* is wrote in VM creation ioctl. Use the rwlock mechanism to protect it.
*/
DEFINE_RWLOCK(acrn_vm_list_lock);
struct acrn_vm *acrn_vm_create(struct acrn_vm *vm,
struct acrn_vm_creation *vm_param)
......@@ -32,12 +35,20 @@ struct acrn_vm *acrn_vm_create(struct acrn_vm *vm,
}
mutex_init(&vm->regions_mapping_lock);
INIT_LIST_HEAD(&vm->ioreq_clients);
spin_lock_init(&vm->ioreq_clients_lock);
vm->vmid = vm_param->vmid;
vm->vcpu_num = vm_param->vcpu_num;
mutex_lock(&acrn_vm_list_lock);
if (acrn_ioreq_init(vm, vm_param->ioreq_buf) < 0) {
hcall_destroy_vm(vm_param->vmid);
vm->vmid = ACRN_INVALID_VMID;
return NULL;
}
write_lock_bh(&acrn_vm_list_lock);
list_add(&vm->list, &acrn_vm_list);
mutex_unlock(&acrn_vm_list_lock);
write_unlock_bh(&acrn_vm_list_lock);
dev_dbg(acrn_dev.this_device, "VM %u created.\n", vm->vmid);
return vm;
......@@ -52,9 +63,11 @@ int acrn_vm_destroy(struct acrn_vm *vm)
return 0;
/* Remove from global VM list */
mutex_lock(&acrn_vm_list_lock);
write_lock_bh(&acrn_vm_list_lock);
list_del_init(&vm->list);
mutex_unlock(&acrn_vm_list_lock);
write_unlock_bh(&acrn_vm_list_lock);
acrn_ioreq_deinit(vm);
ret = hcall_destroy_vm(vm->vmid);
if (ret < 0) {
......
......@@ -14,6 +14,145 @@
#include <linux/types.h>
#include <linux/uuid.h>
#define ACRN_IO_REQUEST_MAX 16
#define ACRN_IOREQ_STATE_PENDING 0
#define ACRN_IOREQ_STATE_COMPLETE 1
#define ACRN_IOREQ_STATE_PROCESSING 2
#define ACRN_IOREQ_STATE_FREE 3
#define ACRN_IOREQ_TYPE_PORTIO 0
#define ACRN_IOREQ_TYPE_MMIO 1
#define ACRN_IOREQ_DIR_READ 0
#define ACRN_IOREQ_DIR_WRITE 1
/**
* struct acrn_mmio_request - Info of a MMIO I/O request
* @direction: Access direction of this request (ACRN_IOREQ_DIR_*)
* @reserved: Reserved for alignment and should be 0
* @address: Access address of this MMIO I/O request
* @size: Access size of this MMIO I/O request
* @value: Read/write value of this MMIO I/O request
*/
struct acrn_mmio_request {
__u32 direction;
__u32 reserved;
__u64 address;
__u64 size;
__u64 value;
};
/**
* struct acrn_pio_request - Info of a PIO I/O request
* @direction: Access direction of this request (ACRN_IOREQ_DIR_*)
* @reserved: Reserved for alignment and should be 0
* @address: Access address of this PIO I/O request
* @size: Access size of this PIO I/O request
* @value: Read/write value of this PIO I/O request
*/
struct acrn_pio_request {
__u32 direction;
__u32 reserved;
__u64 address;
__u64 size;
__u32 value;
};
/**
* struct acrn_io_request - 256-byte ACRN I/O request
* @type: Type of this request (ACRN_IOREQ_TYPE_*).
* @completion_polling: Polling flag. Hypervisor will poll completion of the
* I/O request if this flag set.
* @reserved0: Reserved fields.
* @reqs: Union of different types of request. Byte offset: 64.
* @reqs.pio_request: PIO request data of the I/O request.
* @reqs.mmio_request: MMIO request data of the I/O request.
* @reqs.data: Raw data of the I/O request.
* @reserved1: Reserved fields.
* @kernel_handled: Flag indicates this request need be handled in kernel.
* @processed: The status of this request (ACRN_IOREQ_STATE_*).
*
* The state transitions of ACRN I/O request:
*
* FREE -> PENDING -> PROCESSING -> COMPLETE -> FREE -> ...
*
* An I/O request in COMPLETE or FREE state is owned by the hypervisor. HSM and
* ACRN userspace are in charge of processing the others.
*
* On basis of the states illustrated above, a typical lifecycle of ACRN IO
* request would look like:
*
* Flow (assume the initial state is FREE)
* |
* | Service VM vCPU 0 Service VM vCPU x User vCPU y
* |
* | hypervisor:
* | fills in type, addr, etc.
* | pauses the User VM vCPU y
* | sets the state to PENDING (a)
* | fires an upcall to Service VM
* |
* | HSM:
* | scans for PENDING requests
* | sets the states to PROCESSING (b)
* | assigns the requests to clients (c)
* V
* | client:
* | scans for the assigned requests
* | handles the requests (d)
* | HSM:
* | sets states to COMPLETE
* | notifies the hypervisor
* |
* | hypervisor:
* | resumes User VM vCPU y (e)
* |
* | hypervisor:
* | post handling (f)
* V sets states to FREE
*
* Note that the procedures (a) to (f) in the illustration above require to be
* strictly processed in the order. One vCPU cannot trigger another request of
* I/O emulation before completing the previous one.
*
* Atomic and barriers are required when HSM and hypervisor accessing the state
* of &struct acrn_io_request.
*
*/
struct acrn_io_request {
__u32 type;
__u32 completion_polling;
__u32 reserved0[14];
union {
struct acrn_pio_request pio_request;
struct acrn_mmio_request mmio_request;
__u64 data[8];
} reqs;
__u32 reserved1;
__u32 kernel_handled;
__u32 processed;
} __attribute__((aligned(256)));
struct acrn_io_request_buffer {
union {
struct acrn_io_request req_slot[ACRN_IO_REQUEST_MAX];
__u8 reserved[4096];
};
};
/**
* struct acrn_ioreq_notify - The structure of ioreq completion notification
* @vmid: User VM ID
* @reserved: Reserved and should be 0
* @vcpu: vCPU ID
*/
struct acrn_ioreq_notify {
__u16 vmid;
__u16 reserved;
__u32 vcpu;
};
/**
* struct acrn_vm_creation - Info to create a User VM
* @vmid: User VM ID returned from the hypervisor
......@@ -218,6 +357,17 @@ struct acrn_vm_memmap {
#define ACRN_IOCTL_SET_VCPU_REGS \
_IOW(ACRN_IOCTL_TYPE, 0x16, struct acrn_vcpu_regs)
#define ACRN_IOCTL_NOTIFY_REQUEST_FINISH \
_IOW(ACRN_IOCTL_TYPE, 0x31, struct acrn_ioreq_notify)
#define ACRN_IOCTL_CREATE_IOREQ_CLIENT \
_IO(ACRN_IOCTL_TYPE, 0x32)
#define ACRN_IOCTL_ATTACH_IOREQ_CLIENT \
_IO(ACRN_IOCTL_TYPE, 0x33)
#define ACRN_IOCTL_DESTROY_IOREQ_CLIENT \
_IO(ACRN_IOCTL_TYPE, 0x34)
#define ACRN_IOCTL_CLEAR_VM_IOREQ \
_IO(ACRN_IOCTL_TYPE, 0x35)
#define ACRN_IOCTL_SET_MEMSEG \
_IOW(ACRN_IOCTL_TYPE, 0x41, struct acrn_vm_memmap)
#define ACRN_IOCTL_UNSET_MEMSEG \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment