Commit a4500b82 authored by Al Viro's avatar Al Viro Committed by Alex Deucher

drm/amdkfd: CRIU fixes

Instead of trying to use close_fd() on failure exits, just have
criu_get_prime_handle() store the file reference without inserting
it into descriptor table.

Then, once the callers are past the last failure exit, they can go
and either insert all those file references into the corresponding
slots of descriptor table, or drop all those file references and
free the unused descriptors.
Reviewed-by: default avatarFelix Kuehling <felix.kuehling@amd.com>
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 6c6ca71b
...@@ -36,7 +36,6 @@ ...@@ -36,7 +36,6 @@
#include <linux/mman.h> #include <linux/mman.h>
#include <linux/ptrace.h> #include <linux/ptrace.h>
#include <linux/dma-buf.h> #include <linux/dma-buf.h>
#include <linux/fdtable.h>
#include <linux/processor.h> #include <linux/processor.h>
#include "kfd_priv.h" #include "kfd_priv.h"
#include "kfd_device_queue_manager.h" #include "kfd_device_queue_manager.h"
...@@ -1835,7 +1834,8 @@ static uint32_t get_process_num_bos(struct kfd_process *p) ...@@ -1835,7 +1834,8 @@ static uint32_t get_process_num_bos(struct kfd_process *p)
} }
static int criu_get_prime_handle(struct kgd_mem *mem, static int criu_get_prime_handle(struct kgd_mem *mem,
int flags, u32 *shared_fd) int flags, u32 *shared_fd,
struct file **file)
{ {
struct dma_buf *dmabuf; struct dma_buf *dmabuf;
int ret; int ret;
...@@ -1846,13 +1846,14 @@ static int criu_get_prime_handle(struct kgd_mem *mem, ...@@ -1846,13 +1846,14 @@ static int criu_get_prime_handle(struct kgd_mem *mem,
return ret; return ret;
} }
ret = dma_buf_fd(dmabuf, flags); ret = get_unused_fd_flags(flags);
if (ret < 0) { if (ret < 0) {
pr_err("dmabuf create fd failed, ret:%d\n", ret); pr_err("dmabuf create fd failed, ret:%d\n", ret);
goto out_free_dmabuf; goto out_free_dmabuf;
} }
*shared_fd = ret; *shared_fd = ret;
*file = dmabuf->file;
return 0; return 0;
out_free_dmabuf: out_free_dmabuf:
...@@ -1860,6 +1861,25 @@ static int criu_get_prime_handle(struct kgd_mem *mem, ...@@ -1860,6 +1861,25 @@ static int criu_get_prime_handle(struct kgd_mem *mem,
return ret; return ret;
} }
static void commit_files(struct file **files,
struct kfd_criu_bo_bucket *bo_buckets,
unsigned int count,
int err)
{
while (count--) {
struct file *file = files[count];
if (!file)
continue;
if (err) {
fput(file);
put_unused_fd(bo_buckets[count].dmabuf_fd);
} else {
fd_install(bo_buckets[count].dmabuf_fd, file);
}
}
}
static int criu_checkpoint_bos(struct kfd_process *p, static int criu_checkpoint_bos(struct kfd_process *p,
uint32_t num_bos, uint32_t num_bos,
uint8_t __user *user_bos, uint8_t __user *user_bos,
...@@ -1868,6 +1888,7 @@ static int criu_checkpoint_bos(struct kfd_process *p, ...@@ -1868,6 +1888,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
{ {
struct kfd_criu_bo_bucket *bo_buckets; struct kfd_criu_bo_bucket *bo_buckets;
struct kfd_criu_bo_priv_data *bo_privs; struct kfd_criu_bo_priv_data *bo_privs;
struct file **files = NULL;
int ret = 0, pdd_index, bo_index = 0, id; int ret = 0, pdd_index, bo_index = 0, id;
void *mem; void *mem;
...@@ -1881,6 +1902,12 @@ static int criu_checkpoint_bos(struct kfd_process *p, ...@@ -1881,6 +1902,12 @@ static int criu_checkpoint_bos(struct kfd_process *p,
goto exit; goto exit;
} }
files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL);
if (!files) {
ret = -ENOMEM;
goto exit;
}
for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) { for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
struct kfd_process_device *pdd = p->pdds[pdd_index]; struct kfd_process_device *pdd = p->pdds[pdd_index];
struct amdgpu_bo *dumper_bo; struct amdgpu_bo *dumper_bo;
...@@ -1923,7 +1950,7 @@ static int criu_checkpoint_bos(struct kfd_process *p, ...@@ -1923,7 +1950,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
ret = criu_get_prime_handle(kgd_mem, ret = criu_get_prime_handle(kgd_mem,
bo_bucket->alloc_flags & bo_bucket->alloc_flags &
KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0, KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
&bo_bucket->dmabuf_fd); &bo_bucket->dmabuf_fd, &files[bo_index]);
if (ret) if (ret)
goto exit; goto exit;
} else { } else {
...@@ -1974,12 +2001,8 @@ static int criu_checkpoint_bos(struct kfd_process *p, ...@@ -1974,12 +2001,8 @@ static int criu_checkpoint_bos(struct kfd_process *p,
*priv_offset += num_bos * sizeof(*bo_privs); *priv_offset += num_bos * sizeof(*bo_privs);
exit: exit:
while (ret && bo_index--) { commit_files(files, bo_buckets, bo_index, ret);
if (bo_buckets[bo_index].alloc_flags kvfree(files);
& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
close_fd(bo_buckets[bo_index].dmabuf_fd);
}
kvfree(bo_buckets); kvfree(bo_buckets);
kvfree(bo_privs); kvfree(bo_privs);
return ret; return ret;
...@@ -2331,7 +2354,8 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, ...@@ -2331,7 +2354,8 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
static int criu_restore_bo(struct kfd_process *p, static int criu_restore_bo(struct kfd_process *p,
struct kfd_criu_bo_bucket *bo_bucket, struct kfd_criu_bo_bucket *bo_bucket,
struct kfd_criu_bo_priv_data *bo_priv) struct kfd_criu_bo_priv_data *bo_priv,
struct file **file)
{ {
struct kfd_process_device *pdd; struct kfd_process_device *pdd;
struct kgd_mem *kgd_mem; struct kgd_mem *kgd_mem;
...@@ -2383,7 +2407,7 @@ static int criu_restore_bo(struct kfd_process *p, ...@@ -2383,7 +2407,7 @@ static int criu_restore_bo(struct kfd_process *p,
if (bo_bucket->alloc_flags if (bo_bucket->alloc_flags
& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) { & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
ret = criu_get_prime_handle(kgd_mem, DRM_RDWR, ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,
&bo_bucket->dmabuf_fd); &bo_bucket->dmabuf_fd, file);
if (ret) if (ret)
return ret; return ret;
} else { } else {
...@@ -2400,6 +2424,7 @@ static int criu_restore_bos(struct kfd_process *p, ...@@ -2400,6 +2424,7 @@ static int criu_restore_bos(struct kfd_process *p,
{ {
struct kfd_criu_bo_bucket *bo_buckets = NULL; struct kfd_criu_bo_bucket *bo_buckets = NULL;
struct kfd_criu_bo_priv_data *bo_privs = NULL; struct kfd_criu_bo_priv_data *bo_privs = NULL;
struct file **files = NULL;
int ret = 0; int ret = 0;
uint32_t i = 0; uint32_t i = 0;
...@@ -2413,6 +2438,12 @@ static int criu_restore_bos(struct kfd_process *p, ...@@ -2413,6 +2438,12 @@ static int criu_restore_bos(struct kfd_process *p,
if (!bo_buckets) if (!bo_buckets)
return -ENOMEM; return -ENOMEM;
files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL);
if (!files) {
ret = -ENOMEM;
goto exit;
}
ret = copy_from_user(bo_buckets, (void __user *)args->bos, ret = copy_from_user(bo_buckets, (void __user *)args->bos,
args->num_bos * sizeof(*bo_buckets)); args->num_bos * sizeof(*bo_buckets));
if (ret) { if (ret) {
...@@ -2438,7 +2469,7 @@ static int criu_restore_bos(struct kfd_process *p, ...@@ -2438,7 +2469,7 @@ static int criu_restore_bos(struct kfd_process *p,
/* Create and map new BOs */ /* Create and map new BOs */
for (; i < args->num_bos; i++) { for (; i < args->num_bos; i++) {
ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]); ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]);
if (ret) { if (ret) {
pr_debug("Failed to restore BO[%d] ret%d\n", i, ret); pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);
goto exit; goto exit;
...@@ -2453,11 +2484,8 @@ static int criu_restore_bos(struct kfd_process *p, ...@@ -2453,11 +2484,8 @@ static int criu_restore_bos(struct kfd_process *p,
ret = -EFAULT; ret = -EFAULT;
exit: exit:
while (ret && i--) { commit_files(files, bo_buckets, i, ret);
if (bo_buckets[i].alloc_flags kvfree(files);
& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
close_fd(bo_buckets[i].dmabuf_fd);
}
kvfree(bo_buckets); kvfree(bo_buckets);
kvfree(bo_privs); kvfree(bo_privs);
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment