Commit b91c23e0 authored by Felix Kuehling's avatar Felix Kuehling Committed by Alex Deucher

drm/amdkfd: Fix error handling in criu_checkpoint

Checkpoint BOs last. That way we don't need to close dmabuf FDs if
something else fails later. This avoids problematic access to user mode
memory in the error handling code path.

criu_checkpoint_bos has its own error handling and cleanup that does not
depend on access to user memory.

In the private data, keep BOs before the remaining objects. This is
necessary to restore things in the correct order as restoring events
depends on the events-page BO being restored first.

Fixes: be072b06 ("drm/amdkfd: CRIU export BOs as prime dmabuf objects")
Reported-by: default avatarJann Horn <jannh@google.com>
CC: Rajneesh Bhardwaj <Rajneesh.Bhardwaj@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-and-tested-by: default avatarRajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
parent 66f79037
...@@ -1950,7 +1950,7 @@ static int criu_checkpoint(struct file *filep, ...@@ -1950,7 +1950,7 @@ static int criu_checkpoint(struct file *filep,
{ {
int ret; int ret;
uint32_t num_devices, num_bos, num_objects; uint32_t num_devices, num_bos, num_objects;
uint64_t priv_size, priv_offset = 0; uint64_t priv_size, priv_offset = 0, bo_priv_offset;
if (!args->devices || !args->bos || !args->priv_data) if (!args->devices || !args->bos || !args->priv_data)
return -EINVAL; return -EINVAL;
...@@ -1994,38 +1994,34 @@ static int criu_checkpoint(struct file *filep, ...@@ -1994,38 +1994,34 @@ static int criu_checkpoint(struct file *filep,
if (ret) if (ret)
goto exit_unlock; goto exit_unlock;
ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos, /* Leave room for BOs in the private data. They need to be restored
(uint8_t __user *)args->priv_data, &priv_offset); * before events, but we checkpoint them last to simplify the error
if (ret) * handling.
goto exit_unlock; */
bo_priv_offset = priv_offset;
priv_offset += num_bos * sizeof(struct kfd_criu_bo_priv_data);
if (num_objects) { if (num_objects) {
ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data, ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data,
&priv_offset); &priv_offset);
if (ret) if (ret)
goto close_bo_fds; goto exit_unlock;
ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data, ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,
&priv_offset); &priv_offset);
if (ret) if (ret)
goto close_bo_fds; goto exit_unlock;
ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset); ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset);
if (ret) if (ret)
goto close_bo_fds; goto exit_unlock;
} }
close_bo_fds: /* This must be the last thing in this function that can fail.
if (ret) { * Otherwise we leak dmabuf file descriptors.
/* If IOCTL returns err, user assumes all FDs opened in criu_dump_bos are closed */ */
uint32_t i; ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,
struct kfd_criu_bo_bucket *bo_buckets = (struct kfd_criu_bo_bucket *) args->bos; (uint8_t __user *)args->priv_data, &bo_priv_offset);
for (i = 0; i < num_bos; i++) {
if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
close_fd(bo_buckets[i].dmabuf_fd);
}
}
exit_unlock: exit_unlock:
mutex_unlock(&p->mutex); mutex_unlock(&p->mutex);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment