Commit df4ec5aa authored by Anish Moorthy's avatar Anish Moorthy Committed by Sean Christopherson

KVM: selftests: Allow many vCPUs and reader threads per UFFD in demand paging test

At the moment, demand_paging_test does not support profiling/testing
multiple vCPU threads concurrently faulting on a single uffd because

    (a) "-u" (run test in userfaultfd mode) creates a uffd for each vCPU's
        region, so that each uffd services a single vCPU thread.
    (b) "-u -o" (userfaultfd mode + overlapped vCPU memory accesses)
        simply doesn't work: the test tries to register the same memory
        to multiple uffds, causing an error.

Add support for many vcpus per uffd by
    (1) Keeping "-u" behavior unchanged.
    (2) Making "-u -a" create a single uffd for all of guest memory.
    (3) Making "-u -o" implicitly pass "-a", solving the problem in (b).
In cases (2) and (3) all vCPU threads fault on a single uffd.

With potentially multiple vCPUs per UFFD, it makes sense to allow
configuring the number of reader threads per UFFD as well: add the "-r"
flag to do so.
Signed-off-by: default avatarAnish Moorthy <amoorthy@google.com>
Acked-by: default avatarJames Houghton <jthoughton@google.com>
Link: https://lore.kernel.org/r/20240215235405.368539-12-amoorthy@google.com
[sean: fix kernel style violations, use calloc() for arrays]
Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
parent 2ca76c12
...@@ -375,14 +375,14 @@ static void setup_uffd(struct kvm_vm *vm, struct test_params *p, ...@@ -375,14 +375,14 @@ static void setup_uffd(struct kvm_vm *vm, struct test_params *p,
*pt_uffd = uffd_setup_demand_paging(uffd_mode, 0, *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0,
pt_args.hva, pt_args.hva,
pt_args.paging_size, pt_args.paging_size,
test->uffd_pt_handler); 1, test->uffd_pt_handler);
*data_uffd = NULL; *data_uffd = NULL;
if (test->uffd_data_handler) if (test->uffd_data_handler)
*data_uffd = uffd_setup_demand_paging(uffd_mode, 0, *data_uffd = uffd_setup_demand_paging(uffd_mode, 0,
data_args.hva, data_args.hva,
data_args.paging_size, data_args.paging_size,
test->uffd_data_handler); 1, test->uffd_data_handler);
} }
static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd, static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd,
......
...@@ -77,8 +77,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, ...@@ -77,8 +77,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
copy.mode = 0; copy.mode = 0;
r = ioctl(uffd, UFFDIO_COPY, &copy); r = ioctl(uffd, UFFDIO_COPY, &copy);
if (r == -1) { /*
pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n", * With multiple vCPU threads fault on a single page and there are
* multiple readers for the UFFD, at least one of the UFFDIO_COPYs
* will fail with EEXIST: handle that case without signaling an
* error.
*
* Note that this also suppress any EEXISTs occurring from,
* e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never
* happens here, but a realistic VMM might potentially maintain
* some external state to correctly surface EEXISTs to userspace
* (or prevent duplicate COPY/CONTINUEs in the first place).
*/
if (r == -1 && errno != EEXIST) {
pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d, errno = %d\n",
addr, tid, errno); addr, tid, errno);
return r; return r;
} }
...@@ -89,8 +101,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, ...@@ -89,8 +101,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
cont.range.len = demand_paging_size; cont.range.len = demand_paging_size;
r = ioctl(uffd, UFFDIO_CONTINUE, &cont); r = ioctl(uffd, UFFDIO_CONTINUE, &cont);
if (r == -1) { /*
pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n", * With multiple vCPU threads fault on a single page and there are
* multiple readers for the UFFD, at least one of the UFFDIO_COPYs
* will fail with EEXIST: handle that case without signaling an
* error.
*
* Note that this also suppress any EEXISTs occurring from,
* e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never
* happens here, but a realistic VMM might potentially maintain
* some external state to correctly surface EEXISTs to userspace
* (or prevent duplicate COPY/CONTINUEs in the first place).
*/
if (r == -1 && errno != EEXIST) {
pr_info("Failed UFFDIO_CONTINUE in 0x%lx, thread %d, errno = %d\n",
addr, tid, errno); addr, tid, errno);
return r; return r;
} }
...@@ -110,7 +134,9 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, ...@@ -110,7 +134,9 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
struct test_params { struct test_params {
int uffd_mode; int uffd_mode;
bool single_uffd;
useconds_t uffd_delay; useconds_t uffd_delay;
int readers_per_uffd;
enum vm_mem_backing_src_type src_type; enum vm_mem_backing_src_type src_type;
bool partition_vcpu_memory_access; bool partition_vcpu_memory_access;
}; };
...@@ -131,11 +157,12 @@ static void run_test(enum vm_guest_mode mode, void *arg) ...@@ -131,11 +157,12 @@ static void run_test(enum vm_guest_mode mode, void *arg)
struct memstress_vcpu_args *vcpu_args; struct memstress_vcpu_args *vcpu_args;
struct test_params *p = arg; struct test_params *p = arg;
struct uffd_desc **uffd_descs = NULL; struct uffd_desc **uffd_descs = NULL;
uint64_t uffd_region_size;
struct timespec start; struct timespec start;
struct timespec ts_diff; struct timespec ts_diff;
double vcpu_paging_rate; double vcpu_paging_rate;
struct kvm_vm *vm; struct kvm_vm *vm;
int i; int i, num_uffds = 0;
vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
p->src_type, p->partition_vcpu_memory_access); p->src_type, p->partition_vcpu_memory_access);
...@@ -148,7 +175,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) ...@@ -148,7 +175,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
memset(guest_data_prototype, 0xAB, demand_paging_size); memset(guest_data_prototype, 0xAB, demand_paging_size);
if (p->uffd_mode == UFFDIO_REGISTER_MODE_MINOR) { if (p->uffd_mode == UFFDIO_REGISTER_MODE_MINOR) {
for (i = 0; i < nr_vcpus; i++) { num_uffds = p->single_uffd ? 1 : nr_vcpus;
for (i = 0; i < num_uffds; i++) {
vcpu_args = &memstress_args.vcpu_args[i]; vcpu_args = &memstress_args.vcpu_args[i];
prefault_mem(addr_gpa2alias(vm, vcpu_args->gpa), prefault_mem(addr_gpa2alias(vm, vcpu_args->gpa),
vcpu_args->pages * memstress_args.guest_page_size); vcpu_args->pages * memstress_args.guest_page_size);
...@@ -156,9 +184,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) ...@@ -156,9 +184,13 @@ static void run_test(enum vm_guest_mode mode, void *arg)
} }
if (p->uffd_mode) { if (p->uffd_mode) {
uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *)); num_uffds = p->single_uffd ? 1 : nr_vcpus;
uffd_region_size = nr_vcpus * guest_percpu_mem_size / num_uffds;
uffd_descs = malloc(num_uffds * sizeof(struct uffd_desc *));
TEST_ASSERT(uffd_descs, "Memory allocation failed"); TEST_ASSERT(uffd_descs, "Memory allocation failed");
for (i = 0; i < nr_vcpus; i++) { for (i = 0; i < num_uffds; i++) {
struct memstress_vcpu_args *vcpu_args;
void *vcpu_hva; void *vcpu_hva;
vcpu_args = &memstress_args.vcpu_args[i]; vcpu_args = &memstress_args.vcpu_args[i];
...@@ -171,7 +203,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) ...@@ -171,7 +203,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
*/ */
uffd_descs[i] = uffd_setup_demand_paging( uffd_descs[i] = uffd_setup_demand_paging(
p->uffd_mode, p->uffd_delay, vcpu_hva, p->uffd_mode, p->uffd_delay, vcpu_hva,
vcpu_args->pages * memstress_args.guest_page_size, uffd_region_size,
p->readers_per_uffd,
&handle_uffd_page_request); &handle_uffd_page_request);
} }
} }
...@@ -188,7 +221,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) ...@@ -188,7 +221,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
if (p->uffd_mode) { if (p->uffd_mode) {
/* Tell the user fault fd handler threads to quit */ /* Tell the user fault fd handler threads to quit */
for (i = 0; i < nr_vcpus; i++) for (i = 0; i < num_uffds; i++)
uffd_stop_demand_paging(uffd_descs[i]); uffd_stop_demand_paging(uffd_descs[i]);
} }
...@@ -212,15 +245,20 @@ static void run_test(enum vm_guest_mode mode, void *arg) ...@@ -212,15 +245,20 @@ static void run_test(enum vm_guest_mode mode, void *arg)
static void help(char *name) static void help(char *name)
{ {
puts(""); puts("");
printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n" printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-a]\n"
" [-b memory] [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name); " [-d uffd_delay_usec] [-r readers_per_uffd] [-b memory]\n"
" [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name);
guest_modes_help(); guest_modes_help();
printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n" printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
" UFFD registration mode: 'MISSING' or 'MINOR'.\n"); " UFFD registration mode: 'MISSING' or 'MINOR'.\n");
kvm_print_vcpu_pinning_help(); kvm_print_vcpu_pinning_help();
printf(" -a: Use a single userfaultfd for all of guest memory, instead of\n"
" creating one for each region paged by a unique vCPU\n"
" Set implicitly with -o, and no effect without -u.\n");
printf(" -d: add a delay in usec to the User Fault\n" printf(" -d: add a delay in usec to the User Fault\n"
" FD handler to simulate demand paging\n" " FD handler to simulate demand paging\n"
" overheads. Ignored without -u.\n"); " overheads. Ignored without -u.\n");
printf(" -r: Set the number of reader threads per uffd.\n");
printf(" -b: specify the size of the memory region which should be\n" printf(" -b: specify the size of the memory region which should be\n"
" demand paged by each vCPU. e.g. 10M or 3G.\n" " demand paged by each vCPU. e.g. 10M or 3G.\n"
" Default: 1G\n"); " Default: 1G\n");
...@@ -239,12 +277,14 @@ int main(int argc, char *argv[]) ...@@ -239,12 +277,14 @@ int main(int argc, char *argv[])
struct test_params p = { struct test_params p = {
.src_type = DEFAULT_VM_MEM_SRC, .src_type = DEFAULT_VM_MEM_SRC,
.partition_vcpu_memory_access = true, .partition_vcpu_memory_access = true,
.readers_per_uffd = 1,
.single_uffd = false,
}; };
int opt; int opt;
guest_modes_append_default(); guest_modes_append_default();
while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:c:o")) != -1) { while ((opt = getopt(argc, argv, "ahom:u:d:b:s:v:c:r:")) != -1) {
switch (opt) { switch (opt) {
case 'm': case 'm':
guest_modes_cmdline(optarg); guest_modes_cmdline(optarg);
...@@ -256,6 +296,9 @@ int main(int argc, char *argv[]) ...@@ -256,6 +296,9 @@ int main(int argc, char *argv[])
p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR; p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR;
TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'."); TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'.");
break; break;
case 'a':
p.single_uffd = true;
break;
case 'd': case 'd':
p.uffd_delay = strtoul(optarg, NULL, 0); p.uffd_delay = strtoul(optarg, NULL, 0);
TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported."); TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported.");
...@@ -276,6 +319,13 @@ int main(int argc, char *argv[]) ...@@ -276,6 +319,13 @@ int main(int argc, char *argv[])
break; break;
case 'o': case 'o':
p.partition_vcpu_memory_access = false; p.partition_vcpu_memory_access = false;
p.single_uffd = true;
break;
case 'r':
p.readers_per_uffd = atoi(optarg);
TEST_ASSERT(p.readers_per_uffd >= 1,
"Invalid number of readers per uffd %d: must be >=1",
p.readers_per_uffd);
break; break;
case 'h': case 'h':
default: default:
......
...@@ -17,17 +17,27 @@ ...@@ -17,17 +17,27 @@
typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg); typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg);
struct uffd_desc { struct uffd_reader_args {
int uffd_mode; int uffd_mode;
int uffd; int uffd;
int pipefds[2];
useconds_t delay; useconds_t delay;
uffd_handler_t handler; uffd_handler_t handler;
pthread_t thread; /* Holds the read end of the pipe for killing the reader. */
int pipe;
};
struct uffd_desc {
int uffd;
uint64_t num_readers;
/* Holds the write ends of the pipes for killing the readers. */
int *pipefds;
pthread_t *readers;
struct uffd_reader_args *reader_args;
}; };
struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
void *hva, uint64_t len, void *hva, uint64_t len,
uint64_t num_readers,
uffd_handler_t handler); uffd_handler_t handler);
void uffd_stop_demand_paging(struct uffd_desc *uffd); void uffd_stop_demand_paging(struct uffd_desc *uffd);
......
...@@ -27,10 +27,8 @@ ...@@ -27,10 +27,8 @@
static void *uffd_handler_thread_fn(void *arg) static void *uffd_handler_thread_fn(void *arg)
{ {
struct uffd_desc *uffd_desc = (struct uffd_desc *)arg; struct uffd_reader_args *reader_args = (struct uffd_reader_args *)arg;
int uffd = uffd_desc->uffd; int uffd = reader_args->uffd;
int pipefd = uffd_desc->pipefds[0];
useconds_t delay = uffd_desc->delay;
int64_t pages = 0; int64_t pages = 0;
struct timespec start; struct timespec start;
struct timespec ts_diff; struct timespec ts_diff;
...@@ -44,7 +42,7 @@ static void *uffd_handler_thread_fn(void *arg) ...@@ -44,7 +42,7 @@ static void *uffd_handler_thread_fn(void *arg)
pollfd[0].fd = uffd; pollfd[0].fd = uffd;
pollfd[0].events = POLLIN; pollfd[0].events = POLLIN;
pollfd[1].fd = pipefd; pollfd[1].fd = reader_args->pipe;
pollfd[1].events = POLLIN; pollfd[1].events = POLLIN;
r = poll(pollfd, 2, -1); r = poll(pollfd, 2, -1);
...@@ -92,9 +90,9 @@ static void *uffd_handler_thread_fn(void *arg) ...@@ -92,9 +90,9 @@ static void *uffd_handler_thread_fn(void *arg)
if (!(msg.event & UFFD_EVENT_PAGEFAULT)) if (!(msg.event & UFFD_EVENT_PAGEFAULT))
continue; continue;
if (delay) if (reader_args->delay)
usleep(delay); usleep(reader_args->delay);
r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg); r = reader_args->handler(reader_args->uffd_mode, uffd, &msg);
if (r < 0) if (r < 0)
return NULL; return NULL;
pages++; pages++;
...@@ -110,6 +108,7 @@ static void *uffd_handler_thread_fn(void *arg) ...@@ -110,6 +108,7 @@ static void *uffd_handler_thread_fn(void *arg)
struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
void *hva, uint64_t len, void *hva, uint64_t len,
uint64_t num_readers,
uffd_handler_t handler) uffd_handler_t handler)
{ {
struct uffd_desc *uffd_desc; struct uffd_desc *uffd_desc;
...@@ -118,14 +117,25 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, ...@@ -118,14 +117,25 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
struct uffdio_api uffdio_api; struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register; struct uffdio_register uffdio_register;
uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
int ret; int ret, i;
PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
is_minor ? "MINOR" : "MISSING", is_minor ? "MINOR" : "MISSING",
is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
uffd_desc = malloc(sizeof(struct uffd_desc)); uffd_desc = malloc(sizeof(struct uffd_desc));
TEST_ASSERT(uffd_desc, "malloc failed"); TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor");
uffd_desc->pipefds = calloc(sizeof(int), num_readers);
TEST_ASSERT(uffd_desc->pipefds, "Failed to alloc pipes");
uffd_desc->readers = calloc(sizeof(pthread_t), num_readers);
TEST_ASSERT(uffd_desc->readers, "Failed to alloc reader threads");
uffd_desc->reader_args = calloc(sizeof(struct uffd_reader_args), num_readers);
TEST_ASSERT(uffd_desc->reader_args, "Failed to alloc reader_args");
uffd_desc->num_readers = num_readers;
/* In order to get minor faults, prefault via the alias. */ /* In order to get minor faults, prefault via the alias. */
if (is_minor) if (is_minor)
...@@ -148,18 +158,28 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, ...@@ -148,18 +158,28 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
expected_ioctls, "missing userfaultfd ioctls"); expected_ioctls, "missing userfaultfd ioctls");
ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK);
TEST_ASSERT(!ret, "Failed to set up pipefd");
uffd_desc->uffd_mode = uffd_mode;
uffd_desc->uffd = uffd; uffd_desc->uffd = uffd;
uffd_desc->delay = delay; for (i = 0; i < uffd_desc->num_readers; ++i) {
uffd_desc->handler = handler; int pipes[2];
pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn,
uffd_desc); ret = pipe2((int *) &pipes, O_CLOEXEC | O_NONBLOCK);
TEST_ASSERT(!ret, "Failed to set up pipefd %i for uffd_desc %p",
i, uffd_desc);
PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", uffd_desc->pipefds[i] = pipes[1];
hva, hva + len);
uffd_desc->reader_args[i].uffd_mode = uffd_mode;
uffd_desc->reader_args[i].uffd = uffd;
uffd_desc->reader_args[i].delay = delay;
uffd_desc->reader_args[i].handler = handler;
uffd_desc->reader_args[i].pipe = pipes[0];
pthread_create(&uffd_desc->readers[i], NULL, uffd_handler_thread_fn,
&uffd_desc->reader_args[i]);
PER_VCPU_DEBUG("Created uffd thread %i for HVA range [%p, %p)\n",
i, hva, hva + len);
}
return uffd_desc; return uffd_desc;
} }
...@@ -167,19 +187,26 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, ...@@ -167,19 +187,26 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
void uffd_stop_demand_paging(struct uffd_desc *uffd) void uffd_stop_demand_paging(struct uffd_desc *uffd)
{ {
char c = 0; char c = 0;
int ret; int i;
ret = write(uffd->pipefds[1], &c, 1); for (i = 0; i < uffd->num_readers; ++i)
TEST_ASSERT(ret == 1, "Unable to write to pipefd"); TEST_ASSERT(write(uffd->pipefds[i], &c, 1) == 1,
"Unable to write to pipefd %i for uffd_desc %p", i, uffd);
ret = pthread_join(uffd->thread, NULL); for (i = 0; i < uffd->num_readers; ++i)
TEST_ASSERT(ret == 0, "Pthread_join failed."); TEST_ASSERT(!pthread_join(uffd->readers[i], NULL),
"Pthread_join failed on reader %i for uffd_desc %p", i, uffd);
close(uffd->uffd); close(uffd->uffd);
close(uffd->pipefds[1]); for (i = 0; i < uffd->num_readers; ++i) {
close(uffd->pipefds[0]); close(uffd->pipefds[i]);
close(uffd->reader_args[i].pipe);
}
free(uffd->pipefds);
free(uffd->readers);
free(uffd->reader_args);
free(uffd); free(uffd);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment