Commit 36988070 authored by Rajneesh Bhardwaj's avatar Rajneesh Bhardwaj Committed by Alex Deucher

drm/amdkfd: CRIU Introduce Checkpoint-Restore APIs

Checkpoint-Restore in userspace (CRIU) is a powerful tool that can
snapshot a running process and later restore it on same or a remote
machine but expects the processes that have a device file (e.g. GPU)
associated with them, provide necessary driver support to assist CRIU
and its extensible plugin interface. Thus, In order to support the
Checkpoint-Restore of any ROCm process, the AMD Radeon Open Compute
Kernel driver, needs to provide a set of new APIs that provide
necessary VRAM metadata and its contents to a userspace component
(CRIU plugin) that can store it in form of image files.

This introduces some new ioctls which will be used to checkpoint-Restore
any KFD bound user process. KFD only allows ioctl calls from the same
process that opened the KFD file descriptor. Since these ioctls are
expected to be called from a KFD criu plugin which has elevated ptrace
attached privileges and CAP_CHECKPOINT_RESTORE capabilities attached with
the file descriptors so modify KFD to allow such calls.

(API redesigned by David Yat Sin)
Suggested-by: default avatarFelix Kuehling <felix.kuehling@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarDavid Yat Sin <david.yatsin@amd.com>
Signed-off-by: default avatarRajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent afa37315
......@@ -33,6 +33,7 @@
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/ptrace.h>
#include <linux/dma-buf.h>
#include <asm/processor.h>
#include "kfd_priv.h"
......@@ -1859,6 +1860,75 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
}
#endif
static int criu_checkpoint(struct file *filep,
struct kfd_process *p,
struct kfd_ioctl_criu_args *args)
{
return 0;
}
static int criu_restore(struct file *filep,
struct kfd_process *p,
struct kfd_ioctl_criu_args *args)
{
return 0;
}
static int criu_unpause(struct file *filep,
struct kfd_process *p,
struct kfd_ioctl_criu_args *args)
{
return 0;
}
static int criu_resume(struct file *filep,
struct kfd_process *p,
struct kfd_ioctl_criu_args *args)
{
return 0;
}
static int criu_process_info(struct file *filep,
struct kfd_process *p,
struct kfd_ioctl_criu_args *args)
{
return 0;
}
static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data)
{
struct kfd_ioctl_criu_args *args = data;
int ret;
dev_dbg(kfd_device, "CRIU operation: %d\n", args->op);
switch (args->op) {
case KFD_CRIU_OP_PROCESS_INFO:
ret = criu_process_info(filep, p, args);
break;
case KFD_CRIU_OP_CHECKPOINT:
ret = criu_checkpoint(filep, p, args);
break;
case KFD_CRIU_OP_UNPAUSE:
ret = criu_unpause(filep, p, args);
break;
case KFD_CRIU_OP_RESTORE:
ret = criu_restore(filep, p, args);
break;
case KFD_CRIU_OP_RESUME:
ret = criu_resume(filep, p, args);
break;
default:
dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op);
ret = -EINVAL;
break;
}
if (ret)
dev_dbg(kfd_device, "CRIU operation:%d err:%d\n", args->op, ret);
return ret;
}
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
.cmd_drv = 0, .name = #ioctl}
......@@ -1962,6 +2032,10 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
kfd_ioctl_set_xnack_mode, 0),
AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,
kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
......@@ -1976,6 +2050,7 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
char *kdata = NULL;
unsigned int usize, asize;
int retcode = -EINVAL;
bool ptrace_attached = false;
if (nr >= AMDKFD_CORE_IOCTL_COUNT)
goto err_i1;
......@@ -2001,7 +2076,15 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
* processes need to create their own KFD device context.
*/
process = filep->private_data;
if (process->lead_thread != current->group_leader) {
rcu_read_lock();
if ((ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE) &&
ptrace_parent(process->lead_thread) == current)
ptrace_attached = true;
rcu_read_unlock();
if (process->lead_thread != current->group_leader
&& !ptrace_attached) {
dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
retcode = -EBADF;
goto err_i1;
......@@ -2016,6 +2099,19 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
goto err_i1;
}
/*
* Versions of docker shipped in Ubuntu 18.xx and 20.xx do not support
* CAP_CHECKPOINT_RESTORE, so we also allow access if CAP_SYS_ADMIN as CAP_SYS_ADMIN is a
* more priviledged access.
*/
if (unlikely(ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE)) {
if (!capable(CAP_CHECKPOINT_RESTORE) &&
!capable(CAP_SYS_ADMIN)) {
retcode = -EACCES;
goto err_i1;
}
}
if (cmd & (IOC_IN | IOC_OUT)) {
if (asize <= sizeof(stack_kdata)) {
kdata = stack_kdata;
......
......@@ -121,7 +121,26 @@
*/
#define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512
/**
* enum kfd_ioctl_flags - KFD ioctl flags
* Various flags that can be set in &amdkfd_ioctl_desc.flags to control how
* userspace can use a given ioctl.
*/
enum kfd_ioctl_flags {
/*
* @KFD_IOC_FLAG_CHECKPOINT_RESTORE:
* Certain KFD ioctls such as AMDKFD_IOC_CRIU_OP can potentially
* perform privileged operations and load arbitrary data into MQDs and
* eventually HQD registers when the queue is mapped by HWS. In order to
* prevent this we should perform additional security checks.
*
* This is equivalent to callers with the CHECKPOINT_RESTORE capability.
*
* Note: Since earlier versions of docker do not support CHECKPOINT_RESTORE,
* we also allow ioctls with SYS_ADMIN capability.
*/
KFD_IOC_FLAG_CHECKPOINT_RESTORE = BIT(0),
};
/*
* Kernel module parameter to specify maximum number of supported queues per
* device
......@@ -1006,6 +1025,50 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
uint64_t tba_addr,
uint64_t tma_addr);
/* CRIU */
/*
* Need to increment KFD_CRIU_PRIV_VERSION each time a change is made to any of the CRIU private
* structures:
* kfd_criu_process_priv_data
* kfd_criu_device_priv_data
* kfd_criu_bo_priv_data
* kfd_criu_queue_priv_data
* kfd_criu_event_priv_data
* kfd_criu_svm_range_priv_data
*/
#define KFD_CRIU_PRIV_VERSION 1
struct kfd_criu_process_priv_data {
uint32_t version;
};
struct kfd_criu_device_priv_data {
/* For future use */
uint64_t reserved;
};
struct kfd_criu_bo_priv_data {
uint64_t reserved;
};
struct kfd_criu_svm_range_priv_data {
uint32_t object_type;
uint32_t reserved;
};
struct kfd_criu_queue_priv_data {
uint32_t object_type;
uint32_t reserved;
};
struct kfd_criu_event_priv_data {
uint32_t object_type;
uint32_t reserved;
};
/* CRIU - End */
/* Queue Context Management */
int init_queue(struct queue **q, const struct queue_properties *properties);
void uninit_queue(struct queue *q);
......
......@@ -468,6 +468,82 @@ struct kfd_ioctl_smi_events_args {
__u32 anon_fd; /* from KFD */
};
/**************************************************************************************************
* CRIU IOCTLs (Checkpoint Restore In Userspace)
*
* When checkpointing a process, the userspace application will perform:
* 1. PROCESS_INFO op to determine current process information. This pauses execution and evicts
* all the queues.
* 2. CHECKPOINT op to checkpoint process contents (BOs, queues, events, svm-ranges)
* 3. UNPAUSE op to un-evict all the queues
*
* When restoring a process, the CRIU userspace application will perform:
*
* 1. RESTORE op to restore process contents
* 2. RESUME op to start the process
*
* Note: Queues are forced into an evicted state after a successful PROCESS_INFO. User
* application needs to perform an UNPAUSE operation after calling PROCESS_INFO.
*/
enum kfd_criu_op {
KFD_CRIU_OP_PROCESS_INFO,
KFD_CRIU_OP_CHECKPOINT,
KFD_CRIU_OP_UNPAUSE,
KFD_CRIU_OP_RESTORE,
KFD_CRIU_OP_RESUME,
};
/**
* kfd_ioctl_criu_args - Arguments perform CRIU operation
* @devices: [in/out] User pointer to memory location for devices information.
* This is an array of type kfd_criu_device_bucket.
* @bos: [in/out] User pointer to memory location for BOs information
* This is an array of type kfd_criu_bo_bucket.
* @priv_data: [in/out] User pointer to memory location for private data
* @priv_data_size: [in/out] Size of priv_data in bytes
* @num_devices: [in/out] Number of GPUs used by process. Size of @devices array.
* @num_bos [in/out] Number of BOs used by process. Size of @bos array.
* @num_objects: [in/out] Number of objects used by process. Objects are opaque to
* user application.
* @pid: [in/out] PID of the process being checkpointed
* @op [in] Type of operation (kfd_criu_op)
*
* Return: 0 on success, -errno on failure
*/
struct kfd_ioctl_criu_args {
__u64 devices; /* Used during ops: CHECKPOINT, RESTORE */
__u64 bos; /* Used during ops: CHECKPOINT, RESTORE */
__u64 priv_data; /* Used during ops: CHECKPOINT, RESTORE */
__u64 priv_data_size; /* Used during ops: PROCESS_INFO, RESTORE */
__u32 num_devices; /* Used during ops: PROCESS_INFO, RESTORE */
__u32 num_bos; /* Used during ops: PROCESS_INFO, RESTORE */
__u32 num_objects; /* Used during ops: PROCESS_INFO, RESTORE */
__u32 pid; /* Used during ops: PROCESS_INFO, RESUME */
__u32 op;
};
struct kfd_criu_device_bucket {
__u32 user_gpu_id;
__u32 actual_gpu_id;
__u32 drm_fd;
__u32 pad;
};
struct kfd_criu_bo_bucket {
__u64 addr;
__u64 size;
__u64 offset;
__u64 restored_offset; /* During restore, updated offset for BO */
__u32 gpu_id; /* This is the user_gpu_id */
__u32 alloc_flags;
__u32 dmabuf_fd;
__u32 pad;
};
/* CRIU IOCTLs - END */
/**************************************************************************************************/
/* Register offset inside the remapped mmio page
*/
enum kfd_mmio_remap {
......@@ -742,7 +818,10 @@ struct kfd_ioctl_set_xnack_mode_args {
#define AMDKFD_IOC_SET_XNACK_MODE \
AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args)
#define AMDKFD_IOC_CRIU_OP \
AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args)
#define AMDKFD_COMMAND_START 0x01
#define AMDKFD_COMMAND_END 0x22
#define AMDKFD_COMMAND_END 0x23
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment