Commit adde0476 authored by Marc Zyngier's avatar Marc Zyngier

Merge branch kvm-arm64/selftest/s2-faults into kvmarm-master/next

* kvm-arm64/selftest/s2-faults:
  : .
  : New KVM/arm64 selftests exercising various sorts of S2 faults, courtesy
  : of Ricardo Koller. From the cover letter:
  :
  : "This series adds a new aarch64 selftest for testing stage 2 fault handling
  : for various combinations of guest accesses (e.g., write, S1PTW), backing
  : sources (e.g., anon), and types of faults (e.g., read on hugetlbfs with a
  : hole, write on a readonly memslot). Each test tries a different combination
  : and then checks that the access results in the right behavior (e.g., uffd
  : faults with the right address and write/read flag). [...]"
  : .
  KVM: selftests: aarch64: Add mix of tests into page_fault_test
  KVM: selftests: aarch64: Add readonly memslot tests into page_fault_test
  KVM: selftests: aarch64: Add dirty logging tests into page_fault_test
  KVM: selftests: aarch64: Add userfaultfd tests into page_fault_test
  KVM: selftests: aarch64: Add aarch64/page_fault_test
  KVM: selftests: Use the right memslot for code, page-tables, and data allocations
  KVM: selftests: Fix alignment in virt_arch_pgd_alloc() and vm_vaddr_alloc()
  KVM: selftests: Add vm->memslots[] and enum kvm_mem_region_type
  KVM: selftests: Stash backing_src_type in struct userspace_mem_region
  tools: Copy bitfield.h from the kernel sources
  KVM: selftests: aarch64: Construct DEFAULT_MAIR_EL1 using sysreg.h macros
  KVM: selftests: Add missing close and munmap in __vm_mem_region_delete()
  KVM: selftests: aarch64: Add virt_get_pte_hva() library function
  KVM: selftests: Add a userfaultfd library
Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
parents 02f6fdd4 ff2b5509
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2014 Felix Fietkau <nbd@nbd.name>
* Copyright (C) 2004 - 2009 Ivo van Doorn <IvDoorn@gmail.com>
*/
#ifndef _LINUX_BITFIELD_H
#define _LINUX_BITFIELD_H
#include <linux/build_bug.h>
#include <asm/byteorder.h>
/*
* Bitfield access macros
*
* FIELD_{GET,PREP} macros take as first parameter shifted mask
* from which they extract the base mask and shift amount.
* Mask must be a compilation time constant.
*
* Example:
*
* #define REG_FIELD_A GENMASK(6, 0)
* #define REG_FIELD_B BIT(7)
* #define REG_FIELD_C GENMASK(15, 8)
* #define REG_FIELD_D GENMASK(31, 16)
*
* Get:
* a = FIELD_GET(REG_FIELD_A, reg);
* b = FIELD_GET(REG_FIELD_B, reg);
*
* Set:
* reg = FIELD_PREP(REG_FIELD_A, 1) |
* FIELD_PREP(REG_FIELD_B, 0) |
* FIELD_PREP(REG_FIELD_C, c) |
* FIELD_PREP(REG_FIELD_D, 0x40);
*
* Modify:
* reg &= ~REG_FIELD_C;
* reg |= FIELD_PREP(REG_FIELD_C, c);
*/
#define __bf_shf(x) (__builtin_ffsll(x) - 1)
#define __scalar_type_to_unsigned_cases(type) \
unsigned type: (unsigned type)0, \
signed type: (unsigned type)0
#define __unsigned_scalar_typeof(x) typeof( \
_Generic((x), \
char: (unsigned char)0, \
__scalar_type_to_unsigned_cases(char), \
__scalar_type_to_unsigned_cases(short), \
__scalar_type_to_unsigned_cases(int), \
__scalar_type_to_unsigned_cases(long), \
__scalar_type_to_unsigned_cases(long long), \
default: (x)))
#define __bf_cast_unsigned(type, x) ((__unsigned_scalar_typeof(type))(x))
#define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \
({ \
BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \
_pfx "mask is not constant"); \
BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero"); \
BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \
~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \
_pfx "value too large for the field"); \
BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) > \
__bf_cast_unsigned(_reg, ~0ull), \
_pfx "type of reg too small for mask"); \
__BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \
(1ULL << __bf_shf(_mask))); \
})
/**
* FIELD_MAX() - produce the maximum value representable by a field
* @_mask: shifted mask defining the field's length and position
*
* FIELD_MAX() returns the maximum value that can be held in the field
* specified by @_mask.
*/
#define FIELD_MAX(_mask) \
({ \
__BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_MAX: "); \
(typeof(_mask))((_mask) >> __bf_shf(_mask)); \
})
/**
* FIELD_FIT() - check if value fits in the field
* @_mask: shifted mask defining the field's length and position
* @_val: value to test against the field
*
* Return: true if @_val can fit inside @_mask, false if @_val is too big.
*/
#define FIELD_FIT(_mask, _val) \
({ \
__BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_FIT: "); \
!((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \
})
/**
* FIELD_PREP() - prepare a bitfield element
* @_mask: shifted mask defining the field's length and position
* @_val: value to put in the field
*
* FIELD_PREP() masks and shifts up the value. The result should
* be combined with other fields of the bitfield using logical OR.
*/
#define FIELD_PREP(_mask, _val) \
({ \
__BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: "); \
((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \
})
/**
* FIELD_GET() - extract a bitfield element
* @_mask: shifted mask defining the field's length and position
* @_reg: value of entire bitfield
*
* FIELD_GET() extracts the field specified by @_mask from the
* bitfield passed in as @_reg by masking and shifting it down.
*/
#define FIELD_GET(_mask, _reg) \
({ \
__BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \
(typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \
})
extern void __compiletime_error("value doesn't fit into mask")
__field_overflow(void);
extern void __compiletime_error("bad bitfield mask")
__bad_mask(void);
static __always_inline u64 field_multiplier(u64 field)
{
if ((field | (field - 1)) & ((field | (field - 1)) + 1))
__bad_mask();
return field & -field;
}
static __always_inline u64 field_mask(u64 field)
{
return field / field_multiplier(field);
}
#define field_max(field) ((typeof(field))field_mask(field))
#define ____MAKE_OP(type,base,to,from) \
static __always_inline __##type type##_encode_bits(base v, base field) \
{ \
if (__builtin_constant_p(v) && (v & ~field_mask(field))) \
__field_overflow(); \
return to((v & field_mask(field)) * field_multiplier(field)); \
} \
static __always_inline __##type type##_replace_bits(__##type old, \
base val, base field) \
{ \
return (old & ~to(field)) | type##_encode_bits(val, field); \
} \
static __always_inline void type##p_replace_bits(__##type *p, \
base val, base field) \
{ \
*p = (*p & ~to(field)) | type##_encode_bits(val, field); \
} \
static __always_inline base type##_get_bits(__##type v, base field) \
{ \
return (from(v) & field)/field_multiplier(field); \
}
#define __MAKE_OP(size) \
____MAKE_OP(le##size,u##size,cpu_to_le##size,le##size##_to_cpu) \
____MAKE_OP(be##size,u##size,cpu_to_be##size,be##size##_to_cpu) \
____MAKE_OP(u##size,u##size,,)
____MAKE_OP(u8,u8,,)
__MAKE_OP(16)
__MAKE_OP(32)
__MAKE_OP(64)
#undef __MAKE_OP
#undef ____MAKE_OP
#endif
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
/aarch64/debug-exceptions /aarch64/debug-exceptions
/aarch64/get-reg-list /aarch64/get-reg-list
/aarch64/hypercalls /aarch64/hypercalls
/aarch64/page_fault_test
/aarch64/psci_test /aarch64/psci_test
/aarch64/vcpu_width_config /aarch64/vcpu_width_config
/aarch64/vgic_init /aarch64/vgic_init
......
...@@ -47,6 +47,7 @@ LIBKVM += lib/perf_test_util.c ...@@ -47,6 +47,7 @@ LIBKVM += lib/perf_test_util.c
LIBKVM += lib/rbtree.c LIBKVM += lib/rbtree.c
LIBKVM += lib/sparsebit.c LIBKVM += lib/sparsebit.c
LIBKVM += lib/test_util.c LIBKVM += lib/test_util.c
LIBKVM += lib/userfaultfd_util.c
LIBKVM_STRING += lib/string_override.c LIBKVM_STRING += lib/string_override.c
...@@ -152,6 +153,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/arch_timer ...@@ -152,6 +153,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
TEST_GEN_PROGS_aarch64 += aarch64/hypercalls TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
TEST_GEN_PROGS_aarch64 += aarch64/psci_test TEST_GEN_PROGS_aarch64 += aarch64/psci_test
TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
......
This diff is collapsed.
...@@ -22,23 +22,13 @@ ...@@ -22,23 +22,13 @@
#include "test_util.h" #include "test_util.h"
#include "perf_test_util.h" #include "perf_test_util.h"
#include "guest_modes.h" #include "guest_modes.h"
#include "userfaultfd_util.h"
#ifdef __NR_userfaultfd #ifdef __NR_userfaultfd
#ifdef PRINT_PER_PAGE_UPDATES
#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
#else
#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
#endif
#ifdef PRINT_PER_VCPU_UPDATES
#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
#else
#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
#endif
static int nr_vcpus = 1; static int nr_vcpus = 1;
static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
static size_t demand_paging_size; static size_t demand_paging_size;
static char *guest_data_prototype; static char *guest_data_prototype;
...@@ -67,9 +57,11 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) ...@@ -67,9 +57,11 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
ts_diff.tv_sec, ts_diff.tv_nsec); ts_diff.tv_sec, ts_diff.tv_nsec);
} }
static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr) static int handle_uffd_page_request(int uffd_mode, int uffd,
struct uffd_msg *msg)
{ {
pid_t tid = syscall(__NR_gettid); pid_t tid = syscall(__NR_gettid);
uint64_t addr = msg->arg.pagefault.address;
struct timespec start; struct timespec start;
struct timespec ts_diff; struct timespec ts_diff;
int r; int r;
...@@ -116,174 +108,32 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr) ...@@ -116,174 +108,32 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr)
return 0; return 0;
} }
bool quit_uffd_thread; struct test_params {
struct uffd_handler_args {
int uffd_mode; int uffd_mode;
int uffd; useconds_t uffd_delay;
int pipefd; enum vm_mem_backing_src_type src_type;
useconds_t delay; bool partition_vcpu_memory_access;
}; };
static void *uffd_handler_thread_fn(void *arg) static void prefault_mem(void *alias, uint64_t len)
{
struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg;
int uffd = uffd_args->uffd;
int pipefd = uffd_args->pipefd;
useconds_t delay = uffd_args->delay;
int64_t pages = 0;
struct timespec start;
struct timespec ts_diff;
clock_gettime(CLOCK_MONOTONIC, &start);
while (!quit_uffd_thread) {
struct uffd_msg msg;
struct pollfd pollfd[2];
char tmp_chr;
int r;
uint64_t addr;
pollfd[0].fd = uffd;
pollfd[0].events = POLLIN;
pollfd[1].fd = pipefd;
pollfd[1].events = POLLIN;
r = poll(pollfd, 2, -1);
switch (r) {
case -1:
pr_info("poll err");
continue;
case 0:
continue;
case 1:
break;
default:
pr_info("Polling uffd returned %d", r);
return NULL;
}
if (pollfd[0].revents & POLLERR) {
pr_info("uffd revents has POLLERR");
return NULL;
}
if (pollfd[1].revents & POLLIN) {
r = read(pollfd[1].fd, &tmp_chr, 1);
TEST_ASSERT(r == 1,
"Error reading pipefd in UFFD thread\n");
return NULL;
}
if (!(pollfd[0].revents & POLLIN))
continue;
r = read(uffd, &msg, sizeof(msg));
if (r == -1) {
if (errno == EAGAIN)
continue;
pr_info("Read of uffd got errno %d\n", errno);
return NULL;
}
if (r != sizeof(msg)) {
pr_info("Read on uffd returned unexpected size: %d bytes", r);
return NULL;
}
if (!(msg.event & UFFD_EVENT_PAGEFAULT))
continue;
if (delay)
usleep(delay);
addr = msg.arg.pagefault.address;
r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr);
if (r < 0)
return NULL;
pages++;
}
ts_diff = timespec_elapsed(start);
PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
pages, ts_diff.tv_sec, ts_diff.tv_nsec,
pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
return NULL;
}
static void setup_demand_paging(struct kvm_vm *vm,
pthread_t *uffd_handler_thread, int pipefd,
int uffd_mode, useconds_t uffd_delay,
struct uffd_handler_args *uffd_args,
void *hva, void *alias, uint64_t len)
{ {
bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR); size_t p;
int uffd;
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
int ret;
PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", TEST_ASSERT(alias != NULL, "Alias required for minor faults");
is_minor ? "MINOR" : "MISSING", for (p = 0; p < (len / demand_paging_size); ++p) {
is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); memcpy(alias + (p * demand_paging_size),
guest_data_prototype, demand_paging_size);
/* In order to get minor faults, prefault via the alias. */
if (is_minor) {
size_t p;
expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
TEST_ASSERT(alias != NULL, "Alias required for minor faults");
for (p = 0; p < (len / demand_paging_size); ++p) {
memcpy(alias + (p * demand_paging_size),
guest_data_prototype, demand_paging_size);
}
} }
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
TEST_ASSERT(uffd >= 0, __KVM_SYSCALL_ERROR("userfaultfd()", uffd));
uffdio_api.api = UFFD_API;
uffdio_api.features = 0;
ret = ioctl(uffd, UFFDIO_API, &uffdio_api);
TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_API", ret));
uffdio_register.range.start = (uint64_t)hva;
uffdio_register.range.len = len;
uffdio_register.mode = uffd_mode;
ret = ioctl(uffd, UFFDIO_REGISTER, &uffdio_register);
TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_REGISTER", ret));
TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
expected_ioctls, "missing userfaultfd ioctls");
uffd_args->uffd_mode = uffd_mode;
uffd_args->uffd = uffd;
uffd_args->pipefd = pipefd;
uffd_args->delay = uffd_delay;
pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn,
uffd_args);
PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
hva, hva + len);
} }
struct test_params {
int uffd_mode;
useconds_t uffd_delay;
enum vm_mem_backing_src_type src_type;
bool partition_vcpu_memory_access;
};
static void run_test(enum vm_guest_mode mode, void *arg) static void run_test(enum vm_guest_mode mode, void *arg)
{ {
struct test_params *p = arg; struct test_params *p = arg;
pthread_t *uffd_handler_threads = NULL; struct uffd_desc **uffd_descs = NULL;
struct uffd_handler_args *uffd_args = NULL;
struct timespec start; struct timespec start;
struct timespec ts_diff; struct timespec ts_diff;
int *pipefds = NULL;
struct kvm_vm *vm; struct kvm_vm *vm;
int r, i; int i;
vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
p->src_type, p->partition_vcpu_memory_access); p->src_type, p->partition_vcpu_memory_access);
...@@ -296,15 +146,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) ...@@ -296,15 +146,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
memset(guest_data_prototype, 0xAB, demand_paging_size); memset(guest_data_prototype, 0xAB, demand_paging_size);
if (p->uffd_mode) { if (p->uffd_mode) {
uffd_handler_threads = uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *));
malloc(nr_vcpus * sizeof(*uffd_handler_threads)); TEST_ASSERT(uffd_descs, "Memory allocation failed");
TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
uffd_args = malloc(nr_vcpus * sizeof(*uffd_args));
TEST_ASSERT(uffd_args, "Memory allocation failed");
pipefds = malloc(sizeof(int) * nr_vcpus * 2);
TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
for (i = 0; i < nr_vcpus; i++) { for (i = 0; i < nr_vcpus; i++) {
struct perf_test_vcpu_args *vcpu_args; struct perf_test_vcpu_args *vcpu_args;
...@@ -317,19 +160,17 @@ static void run_test(enum vm_guest_mode mode, void *arg) ...@@ -317,19 +160,17 @@ static void run_test(enum vm_guest_mode mode, void *arg)
vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa); vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa);
vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa); vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa);
prefault_mem(vcpu_alias,
vcpu_args->pages * perf_test_args.guest_page_size);
/* /*
* Set up user fault fd to handle demand paging * Set up user fault fd to handle demand paging
* requests. * requests.
*/ */
r = pipe2(&pipefds[i * 2], uffd_descs[i] = uffd_setup_demand_paging(
O_CLOEXEC | O_NONBLOCK); p->uffd_mode, p->uffd_delay, vcpu_hva,
TEST_ASSERT(!r, "Failed to set up pipefd"); vcpu_args->pages * perf_test_args.guest_page_size,
&handle_uffd_page_request);
setup_demand_paging(vm, &uffd_handler_threads[i],
pipefds[i * 2], p->uffd_mode,
p->uffd_delay, &uffd_args[i],
vcpu_hva, vcpu_alias,
vcpu_args->pages * perf_test_args.guest_page_size);
} }
} }
...@@ -344,15 +185,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) ...@@ -344,15 +185,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
pr_info("All vCPU threads joined\n"); pr_info("All vCPU threads joined\n");
if (p->uffd_mode) { if (p->uffd_mode) {
char c;
/* Tell the user fault fd handler threads to quit */ /* Tell the user fault fd handler threads to quit */
for (i = 0; i < nr_vcpus; i++) { for (i = 0; i < nr_vcpus; i++)
r = write(pipefds[i * 2 + 1], &c, 1); uffd_stop_demand_paging(uffd_descs[i]);
TEST_ASSERT(r == 1, "Unable to write to pipefd");
pthread_join(uffd_handler_threads[i], NULL);
}
} }
pr_info("Total guest execution time: %ld.%.9lds\n", pr_info("Total guest execution time: %ld.%.9lds\n",
...@@ -364,11 +199,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) ...@@ -364,11 +199,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
perf_test_destroy_vm(vm); perf_test_destroy_vm(vm);
free(guest_data_prototype); free(guest_data_prototype);
if (p->uffd_mode) { if (p->uffd_mode)
free(uffd_handler_threads); free(uffd_descs);
free(uffd_args);
free(pipefds);
}
} }
static void help(char *name) static void help(char *name)
......
...@@ -38,12 +38,25 @@ ...@@ -38,12 +38,25 @@
* NORMAL 4 1111:1111 * NORMAL 4 1111:1111
* NORMAL_WT 5 1011:1011 * NORMAL_WT 5 1011:1011
*/ */
#define DEFAULT_MAIR_EL1 ((0x00ul << (0 * 8)) | \
(0x04ul << (1 * 8)) | \ /* Linux doesn't use these memory types, so let's define them. */
(0x0cul << (2 * 8)) | \ #define MAIR_ATTR_DEVICE_GRE UL(0x0c)
(0x44ul << (3 * 8)) | \ #define MAIR_ATTR_NORMAL_WT UL(0xbb)
(0xfful << (4 * 8)) | \
(0xbbul << (5 * 8))) #define MT_DEVICE_nGnRnE 0
#define MT_DEVICE_nGnRE 1
#define MT_DEVICE_GRE 2
#define MT_NORMAL_NC 3
#define MT_NORMAL 4
#define MT_NORMAL_WT 5
#define DEFAULT_MAIR_EL1 \
(MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \
MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \
MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \
MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \
MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \
MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
#define MPIDR_HWID_BITMASK (0xff00fffffful) #define MPIDR_HWID_BITMASK (0xff00fffffful)
...@@ -92,11 +105,19 @@ enum { ...@@ -92,11 +105,19 @@ enum {
#define ESR_EC_MASK (ESR_EC_NUM - 1) #define ESR_EC_MASK (ESR_EC_NUM - 1)
#define ESR_EC_SVC64 0x15 #define ESR_EC_SVC64 0x15
#define ESR_EC_IABT 0x21
#define ESR_EC_DABT 0x25
#define ESR_EC_HW_BP_CURRENT 0x31 #define ESR_EC_HW_BP_CURRENT 0x31
#define ESR_EC_SSTEP_CURRENT 0x33 #define ESR_EC_SSTEP_CURRENT 0x33
#define ESR_EC_WP_CURRENT 0x35 #define ESR_EC_WP_CURRENT 0x35
#define ESR_EC_BRK_INS 0x3c #define ESR_EC_BRK_INS 0x3c
/* Access flag */
#define PTE_AF (1ULL << 10)
/* Access flag update enable/disable */
#define TCR_EL1_HA (1ULL << 39)
void aarch64_get_supported_page_sizes(uint32_t ipa, void aarch64_get_supported_page_sizes(uint32_t ipa,
bool *ps4k, bool *ps16k, bool *ps64k); bool *ps4k, bool *ps16k, bool *ps64k);
...@@ -109,6 +130,8 @@ void vm_install_exception_handler(struct kvm_vm *vm, ...@@ -109,6 +130,8 @@ void vm_install_exception_handler(struct kvm_vm *vm,
void vm_install_sync_handler(struct kvm_vm *vm, void vm_install_sync_handler(struct kvm_vm *vm,
int vector, int ec, handler_fn handler); int vector, int ec, handler_fn handler);
uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
static inline void cpu_relax(void) static inline void cpu_relax(void)
{ {
asm volatile("yield" ::: "memory"); asm volatile("yield" ::: "memory");
......
...@@ -34,6 +34,7 @@ struct userspace_mem_region { ...@@ -34,6 +34,7 @@ struct userspace_mem_region {
struct sparsebit *unused_phy_pages; struct sparsebit *unused_phy_pages;
int fd; int fd;
off_t offset; off_t offset;
enum vm_mem_backing_src_type backing_src_type;
void *host_mem; void *host_mem;
void *host_alias; void *host_alias;
void *mmap_start; void *mmap_start;
...@@ -64,6 +65,14 @@ struct userspace_mem_regions { ...@@ -64,6 +65,14 @@ struct userspace_mem_regions {
DECLARE_HASHTABLE(slot_hash, 9); DECLARE_HASHTABLE(slot_hash, 9);
}; };
enum kvm_mem_region_type {
MEM_REGION_CODE,
MEM_REGION_DATA,
MEM_REGION_PT,
MEM_REGION_TEST_DATA,
NR_MEM_REGIONS,
};
struct kvm_vm { struct kvm_vm {
int mode; int mode;
unsigned long type; unsigned long type;
...@@ -92,6 +101,13 @@ struct kvm_vm { ...@@ -92,6 +101,13 @@ struct kvm_vm {
int stats_fd; int stats_fd;
struct kvm_stats_header stats_header; struct kvm_stats_header stats_header;
struct kvm_stats_desc *stats_desc; struct kvm_stats_desc *stats_desc;
/*
* KVM region slots. These are the default memslots used by page
* allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE]
* memslot.
*/
uint32_t memslots[NR_MEM_REGIONS];
}; };
...@@ -104,6 +120,13 @@ struct kvm_vm { ...@@ -104,6 +120,13 @@ struct kvm_vm {
struct userspace_mem_region * struct userspace_mem_region *
memslot2region(struct kvm_vm *vm, uint32_t memslot); memslot2region(struct kvm_vm *vm, uint32_t memslot);
static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm,
enum kvm_mem_region_type type)
{
assert(type < NR_MEM_REGIONS);
return memslot2region(vm, vm->memslots[type]);
}
/* Minimum allocated guest virtual and physical addresses */ /* Minimum allocated guest virtual and physical addresses */
#define KVM_UTIL_MIN_VADDR 0x2000 #define KVM_UTIL_MIN_VADDR 0x2000
#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
...@@ -384,7 +407,11 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); ...@@ -384,7 +407,11 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
enum kvm_mem_region_type type);
vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm,
enum kvm_mem_region_type type);
vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
...@@ -646,13 +673,13 @@ vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); ...@@ -646,13 +673,13 @@ vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
* __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to
* calculate the amount of memory needed for per-vCPU data, e.g. stacks. * calculate the amount of memory needed for per-vCPU data, e.g. stacks.
*/ */
struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages); struct kvm_vm *____vm_create(enum vm_guest_mode mode);
struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
uint64_t nr_extra_pages); uint64_t nr_extra_pages);
static inline struct kvm_vm *vm_create_barebones(void) static inline struct kvm_vm *vm_create_barebones(void)
{ {
return ____vm_create(VM_MODE_DEFAULT, 0); return ____vm_create(VM_MODE_DEFAULT);
} }
static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus)
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* KVM userfaultfd util
*
* Copyright (C) 2018, Red Hat, Inc.
* Copyright (C) 2019-2022 Google LLC
*/
#define _GNU_SOURCE /* for pipe2 */
#include <inttypes.h>
#include <time.h>
#include <pthread.h>
#include <linux/userfaultfd.h>
#include "test_util.h"
typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg);
struct uffd_desc {
int uffd_mode;
int uffd;
int pipefds[2];
useconds_t delay;
uffd_handler_t handler;
pthread_t thread;
};
struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
void *hva, uint64_t len,
uffd_handler_t handler);
void uffd_stop_demand_paging(struct uffd_desc *uffd);
#ifdef PRINT_PER_PAGE_UPDATES
#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
#else
#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
#endif
#ifdef PRINT_PER_VCPU_UPDATES
#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
#else
#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
#endif
...@@ -77,13 +77,15 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm) ...@@ -77,13 +77,15 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
void virt_arch_pgd_alloc(struct kvm_vm *vm) void virt_arch_pgd_alloc(struct kvm_vm *vm)
{ {
if (!vm->pgd_created) { size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
vm_paddr_t paddr = vm_phy_pages_alloc(vm,
page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size, if (vm->pgd_created)
KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); return;
vm->pgd = paddr;
vm->pgd_created = true; vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
} KVM_GUEST_PAGE_TABLE_MIN_PADDR,
vm->memslots[MEM_REGION_PT]);
vm->pgd_created = true;
} }
static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
...@@ -134,12 +136,12 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, ...@@ -134,12 +136,12 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{ {
uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */ uint64_t attr_idx = MT_NORMAL;
_virt_pg_map(vm, vaddr, paddr, attr_idx); _virt_pg_map(vm, vaddr, paddr, attr_idx);
} }
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
{ {
uint64_t *ptep; uint64_t *ptep;
...@@ -170,11 +172,18 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) ...@@ -170,11 +172,18 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
TEST_FAIL("Page table levels must be 2, 3, or 4"); TEST_FAIL("Page table levels must be 2, 3, or 4");
} }
return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); return ptep;
unmapped_gva: unmapped_gva:
TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva); TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
exit(1); exit(EXIT_FAILURE);
}
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint64_t *ptep = virt_get_pte_hva(vm, gva);
return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
} }
static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
...@@ -319,13 +328,16 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) ...@@ -319,13 +328,16 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
struct kvm_vcpu_init *init, void *guest_code) struct kvm_vcpu_init *init, void *guest_code)
{ {
size_t stack_size = vm->page_size == 4096 ? size_t stack_size;
DEFAULT_STACK_PGS * vm->page_size : uint64_t stack_vaddr;
vm->page_size;
uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
DEFAULT_ARM64_GUEST_STACK_VADDR_MIN);
struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
vm->page_size;
stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
DEFAULT_ARM64_GUEST_STACK_VADDR_MIN,
MEM_REGION_DATA);
aarch64_vcpu_setup(vcpu, init); aarch64_vcpu_setup(vcpu, init);
vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size); vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
...@@ -429,8 +441,8 @@ void route_exception(struct ex_regs *regs, int vector) ...@@ -429,8 +441,8 @@ void route_exception(struct ex_regs *regs, int vector)
void vm_init_descriptor_tables(struct kvm_vm *vm) void vm_init_descriptor_tables(struct kvm_vm *vm)
{ {
vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers), vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
vm->page_size); vm->page_size, MEM_REGION_DATA);
*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
} }
......
...@@ -161,7 +161,8 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) ...@@ -161,7 +161,8 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
seg_vend |= vm->page_size - 1; seg_vend |= vm->page_size - 1;
size_t seg_size = seg_vend - seg_vstart + 1; size_t seg_size = seg_vend - seg_vstart + 1;
vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart); vm_vaddr_t vaddr = __vm_vaddr_alloc(vm, seg_size, seg_vstart,
MEM_REGION_CODE);
TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate " TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
"virtual memory for segment at requested min addr,\n" "virtual memory for segment at requested min addr,\n"
" segment idx: %u\n" " segment idx: %u\n"
......
...@@ -185,13 +185,10 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { ...@@ -185,13 +185,10 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
"Missing new mode params?"); "Missing new mode params?");
struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) struct kvm_vm *____vm_create(enum vm_guest_mode mode)
{ {
struct kvm_vm *vm; struct kvm_vm *vm;
pr_debug("%s: mode='%s' pages='%ld'\n", __func__,
vm_guest_mode_string(mode), nr_pages);
vm = calloc(1, sizeof(*vm)); vm = calloc(1, sizeof(*vm));
TEST_ASSERT(vm != NULL, "Insufficient Memory"); TEST_ASSERT(vm != NULL, "Insufficient Memory");
...@@ -287,9 +284,6 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) ...@@ -287,9 +284,6 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages)
/* Allocate and setup memory for guest. */ /* Allocate and setup memory for guest. */
vm->vpages_mapped = sparsebit_alloc(); vm->vpages_mapped = sparsebit_alloc();
if (nr_pages != 0)
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
0, 0, nr_pages, 0);
return vm; return vm;
} }
...@@ -335,8 +329,16 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, ...@@ -335,8 +329,16 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus,
nr_extra_pages); nr_extra_pages);
struct kvm_vm *vm; struct kvm_vm *vm;
int i;
pr_debug("%s: mode='%s' pages='%ld'\n", __func__,
vm_guest_mode_string(mode), nr_pages);
vm = ____vm_create(mode, nr_pages); vm = ____vm_create(mode);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0);
for (i = 0; i < NR_MEM_REGIONS; i++)
vm->memslots[i] = 0;
kvm_vm_elf_load(vm, program_invocation_name); kvm_vm_elf_load(vm, program_invocation_name);
...@@ -586,6 +588,12 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, ...@@ -586,6 +588,12 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
sparsebit_free(&region->unused_phy_pages); sparsebit_free(&region->unused_phy_pages);
ret = munmap(region->mmap_start, region->mmap_size); ret = munmap(region->mmap_start, region->mmap_size);
TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
if (region->fd >= 0) {
/* There's an extra map when using shared memory. */
ret = munmap(region->mmap_alias, region->mmap_size);
TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
close(region->fd);
}
free(region); free(region);
} }
...@@ -923,6 +931,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, ...@@ -923,6 +931,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
vm_mem_backing_src_alias(src_type)->name); vm_mem_backing_src_alias(src_type)->name);
} }
region->backing_src_type = src_type;
region->unused_phy_pages = sparsebit_alloc(); region->unused_phy_pages = sparsebit_alloc();
sparsebit_set_num(region->unused_phy_pages, sparsebit_set_num(region->unused_phy_pages,
guest_paddr >> vm->page_shift, npages); guest_paddr >> vm->page_shift, npages);
...@@ -1217,32 +1226,15 @@ static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, ...@@ -1217,32 +1226,15 @@ static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
return pgidx_start * vm->page_size; return pgidx_start * vm->page_size;
} }
/* vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
* VM Virtual Address Allocate enum kvm_mem_region_type type)
*
* Input Args:
* vm - Virtual Machine
* sz - Size in bytes
* vaddr_min - Minimum starting virtual address
*
* Output Args: None
*
* Return:
* Starting guest virtual address
*
* Allocates at least sz bytes within the virtual address space of the vm
* given by vm. The allocated bytes are mapped to a virtual address >=
* the address given by vaddr_min. Note that each allocation uses a
* a unique set of pages, with the minimum real allocation being at least
* a page.
*/
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
{ {
uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
virt_pgd_alloc(vm); virt_pgd_alloc(vm);
vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages,
KVM_UTIL_MIN_PFN * vm->page_size, 0); KVM_UTIL_MIN_PFN * vm->page_size,
vm->memslots[type]);
/* /*
* Find an unused range of virtual page addresses of at least * Find an unused range of virtual page addresses of at least
...@@ -1263,6 +1255,30 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min) ...@@ -1263,6 +1255,30 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
return vaddr_start; return vaddr_start;
} }
/*
* VM Virtual Address Allocate
*
* Input Args:
* vm - Virtual Machine
* sz - Size in bytes
* vaddr_min - Minimum starting virtual address
*
* Output Args: None
*
* Return:
* Starting guest virtual address
*
* Allocates at least sz bytes within the virtual address space of the vm
* given by vm. The allocated bytes are mapped to a virtual address >=
* the address given by vaddr_min. Note that each allocation uses a
* a unique set of pages, with the minimum real allocation being at least
* a page. The allocated physical space comes from the TEST_DATA memory region.
*/
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
{
return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA);
}
/* /*
* VM Virtual Address Allocate Pages * VM Virtual Address Allocate Pages
* *
...@@ -1282,6 +1298,11 @@ vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages) ...@@ -1282,6 +1298,11 @@ vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR); return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
} }
vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type)
{
return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type);
}
/* /*
* VM Virtual Address Allocate Page * VM Virtual Address Allocate Page
* *
...@@ -1847,7 +1868,8 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, ...@@ -1847,7 +1868,8 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm) vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
{ {
return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR,
vm->memslots[MEM_REGION_PT]);
} }
/* /*
......
...@@ -55,13 +55,15 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) ...@@ -55,13 +55,15 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
void virt_arch_pgd_alloc(struct kvm_vm *vm) void virt_arch_pgd_alloc(struct kvm_vm *vm)
{ {
if (!vm->pgd_created) { size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size;
vm_paddr_t paddr = vm_phy_pages_alloc(vm,
page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size, if (vm->pgd_created)
KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); return;
vm->pgd = paddr;
vm->pgd_created = true; vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
} KVM_GUEST_PAGE_TABLE_MIN_PADDR,
vm->memslots[MEM_REGION_PT]);
vm->pgd_created = true;
} }
void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
...@@ -279,15 +281,18 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, ...@@ -279,15 +281,18 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
void *guest_code) void *guest_code)
{ {
int r; int r;
size_t stack_size = vm->page_size == 4096 ? size_t stack_size;
DEFAULT_STACK_PGS * vm->page_size : unsigned long stack_vaddr;
vm->page_size;
unsigned long stack_vaddr = vm_vaddr_alloc(vm, stack_size,
DEFAULT_RISCV_GUEST_STACK_VADDR_MIN);
unsigned long current_gp = 0; unsigned long current_gp = 0;
struct kvm_mp_state mps; struct kvm_mp_state mps;
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
vm->page_size;
stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
DEFAULT_RISCV_GUEST_STACK_VADDR_MIN,
MEM_REGION_DATA);
vcpu = __vm_vcpu_add(vm, vcpu_id); vcpu = __vm_vcpu_add(vm, vcpu_id);
riscv_vcpu_mmu_setup(vcpu); riscv_vcpu_mmu_setup(vcpu);
......
...@@ -21,7 +21,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) ...@@ -21,7 +21,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
return; return;
paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION, paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0); KVM_GUEST_PAGE_TABLE_MIN_PADDR,
vm->memslots[MEM_REGION_PT]);
memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size); memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
vm->pgd = paddr; vm->pgd = paddr;
...@@ -167,8 +168,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, ...@@ -167,8 +168,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
vm->page_size); vm->page_size);
stack_vaddr = vm_vaddr_alloc(vm, stack_size, stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
DEFAULT_GUEST_STACK_VADDR_MIN); DEFAULT_GUEST_STACK_VADDR_MIN,
MEM_REGION_DATA);
vcpu = __vm_vcpu_add(vm, vcpu_id); vcpu = __vm_vcpu_add(vm, vcpu_id);
......
// SPDX-License-Identifier: GPL-2.0
/*
* KVM userfaultfd util
* Adapted from demand_paging_test.c
*
* Copyright (C) 2018, Red Hat, Inc.
* Copyright (C) 2019-2022 Google LLC
*/
#define _GNU_SOURCE /* for pipe2 */
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <poll.h>
#include <pthread.h>
#include <linux/userfaultfd.h>
#include <sys/syscall.h>
#include "kvm_util.h"
#include "test_util.h"
#include "perf_test_util.h"
#include "userfaultfd_util.h"
#ifdef __NR_userfaultfd
static void *uffd_handler_thread_fn(void *arg)
{
struct uffd_desc *uffd_desc = (struct uffd_desc *)arg;
int uffd = uffd_desc->uffd;
int pipefd = uffd_desc->pipefds[0];
useconds_t delay = uffd_desc->delay;
int64_t pages = 0;
struct timespec start;
struct timespec ts_diff;
clock_gettime(CLOCK_MONOTONIC, &start);
while (1) {
struct uffd_msg msg;
struct pollfd pollfd[2];
char tmp_chr;
int r;
pollfd[0].fd = uffd;
pollfd[0].events = POLLIN;
pollfd[1].fd = pipefd;
pollfd[1].events = POLLIN;
r = poll(pollfd, 2, -1);
switch (r) {
case -1:
pr_info("poll err");
continue;
case 0:
continue;
case 1:
break;
default:
pr_info("Polling uffd returned %d", r);
return NULL;
}
if (pollfd[0].revents & POLLERR) {
pr_info("uffd revents has POLLERR");
return NULL;
}
if (pollfd[1].revents & POLLIN) {
r = read(pollfd[1].fd, &tmp_chr, 1);
TEST_ASSERT(r == 1,
"Error reading pipefd in UFFD thread\n");
return NULL;
}
if (!(pollfd[0].revents & POLLIN))
continue;
r = read(uffd, &msg, sizeof(msg));
if (r == -1) {
if (errno == EAGAIN)
continue;
pr_info("Read of uffd got errno %d\n", errno);
return NULL;
}
if (r != sizeof(msg)) {
pr_info("Read on uffd returned unexpected size: %d bytes", r);
return NULL;
}
if (!(msg.event & UFFD_EVENT_PAGEFAULT))
continue;
if (delay)
usleep(delay);
r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg);
if (r < 0)
return NULL;
pages++;
}
ts_diff = timespec_elapsed(start);
PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
pages, ts_diff.tv_sec, ts_diff.tv_nsec,
pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
return NULL;
}
struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
void *hva, uint64_t len,
uffd_handler_t handler)
{
struct uffd_desc *uffd_desc;
bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
int uffd;
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
int ret;
PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
is_minor ? "MINOR" : "MISSING",
is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
uffd_desc = malloc(sizeof(struct uffd_desc));
TEST_ASSERT(uffd_desc, "malloc failed");
/* In order to get minor faults, prefault via the alias. */
if (is_minor)
expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
uffdio_api.api = UFFD_API;
uffdio_api.features = 0;
TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1,
"ioctl UFFDIO_API failed: %" PRIu64,
(uint64_t)uffdio_api.api);
uffdio_register.range.start = (uint64_t)hva;
uffdio_register.range.len = len;
uffdio_register.mode = uffd_mode;
TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
"ioctl UFFDIO_REGISTER failed");
TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
expected_ioctls, "missing userfaultfd ioctls");
ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK);
TEST_ASSERT(!ret, "Failed to set up pipefd");
uffd_desc->uffd_mode = uffd_mode;
uffd_desc->uffd = uffd;
uffd_desc->delay = delay;
uffd_desc->handler = handler;
pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn,
uffd_desc);
PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
hva, hva + len);
return uffd_desc;
}
void uffd_stop_demand_paging(struct uffd_desc *uffd)
{
char c = 0;
int ret;
ret = write(uffd->pipefds[1], &c, 1);
TEST_ASSERT(ret == 1, "Unable to write to pipefd");
ret = pthread_join(uffd->thread, NULL);
TEST_ASSERT(ret == 0, "Pthread_join failed.");
close(uffd->uffd);
close(uffd->pipefds[1]);
close(uffd->pipefds[0]);
free(uffd);
}
#endif /* __NR_userfaultfd */
...@@ -552,7 +552,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) ...@@ -552,7 +552,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt) static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
{ {
if (!vm->gdt) if (!vm->gdt)
vm->gdt = vm_vaddr_alloc_page(vm); vm->gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
dt->base = vm->gdt; dt->base = vm->gdt;
dt->limit = getpagesize(); dt->limit = getpagesize();
...@@ -562,7 +562,7 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, ...@@ -562,7 +562,7 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
int selector) int selector)
{ {
if (!vm->tss) if (!vm->tss)
vm->tss = vm_vaddr_alloc_page(vm); vm->tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
memset(segp, 0, sizeof(*segp)); memset(segp, 0, sizeof(*segp));
segp->base = vm->tss; segp->base = vm->tss;
...@@ -647,8 +647,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, ...@@ -647,8 +647,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
vm_vaddr_t stack_vaddr; vm_vaddr_t stack_vaddr;
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
DEFAULT_GUEST_STACK_VADDR_MIN); DEFAULT_GUEST_STACK_VADDR_MIN,
MEM_REGION_DATA);
vcpu = __vm_vcpu_add(vm, vcpu_id); vcpu = __vm_vcpu_add(vm, vcpu_id);
vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
...@@ -1145,8 +1146,8 @@ void vm_init_descriptor_tables(struct kvm_vm *vm) ...@@ -1145,8 +1146,8 @@ void vm_init_descriptor_tables(struct kvm_vm *vm)
extern void *idt_handlers; extern void *idt_handlers;
int i; int i;
vm->idt = vm_vaddr_alloc_page(vm); vm->idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
vm->handlers = vm_vaddr_alloc_page(vm); vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
/* Handlers have the same address in both address spaces.*/ /* Handlers have the same address in both address spaces.*/
for (i = 0; i < NUM_INTERRUPTS; i++) for (i = 0; i < NUM_INTERRUPTS; i++)
set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment