Commit 81a48979 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86_sgx_for_v5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 SGX updates from Borislav Petkov:
 "Add the guest side of SGX support in KVM guests. Work by Sean
  Christopherson, Kai Huang and Jarkko Sakkinen.

  Along with the usual fixes, cleanups and improvements"

* tag 'x86_sgx_for_v5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
  x86/sgx: Mark sgx_vepc_vm_ops static
  x86/sgx: Do not update sgx_nr_free_pages in sgx_setup_epc_section()
  x86/sgx: Move provisioning device creation out of SGX driver
  x86/sgx: Add helpers to expose ECREATE and EINIT to KVM
  x86/sgx: Add helper to update SGX_LEPUBKEYHASHn MSRs
  x86/sgx: Add encls_faulted() helper
  x86/sgx: Add SGX2 ENCLS leaf definitions (EAUG, EMODPR and EMODT)
  x86/sgx: Move ENCLS leaf definitions to sgx.h
  x86/sgx: Expose SGX architectural definitions to the kernel
  x86/sgx: Initialize virtual EPC driver even when SGX driver is disabled
  x86/cpu/intel: Allow SGX virtualization without Launch Control support
  x86/sgx: Introduce virtual EPC for use by KVM guests
  x86/sgx: Add SGX_CHILD_PRESENT hardware error code
  x86/sgx: Wipe out EREMOVE from sgx_free_epc_page()
  x86/cpufeatures: Add SGX1 and SGX2 sub-features
  x86/cpufeatures: Make SGX_LC feature bit depend on SGX bit
  x86/sgx: Remove unnecessary kmap() from sgx_ioc_enclave_init()
  selftests/sgx: Use getauxval() to simplify test code
  selftests/sgx: Improve error detection and messages
  x86/sgx: Add a basic NUMA allocation scheme to sgx_alloc_epc_page()
  ...
parents 47e12f14 523caed9
......@@ -209,3 +209,44 @@ An application may be loaded into a container enclave which is specially
configured with a library OS and run-time which permits the application to run.
The enclave run-time and library OS work together to execute the application
when a thread enters the enclave.
Impact of Potential Kernel SGX Bugs
===================================
EPC leaks
---------
When EPC page leaks happen, a WARNING like this is shown in dmesg:
"EREMOVE returned ... and an EPC page was leaked. SGX may become unusable..."
This is effectively a kernel use-after-free of an EPC page, and due
to the way SGX works, the bug is detected at freeing. Rather than
adding the page back to the pool of available EPC pages, the kernel
intentionally leaks the page to avoid additional errors in the future.
When this happens, the kernel will likely soon leak more EPC pages, and
SGX will likely become unusable because the memory available to SGX is
limited. However, while this may be fatal to SGX, the rest of the kernel
is unlikely to be impacted and should continue to work.
As a result, when this happpens, user should stop running any new
SGX workloads, (or just any new workloads), and migrate all valuable
workloads. Although a machine reboot can recover all EPC memory, the bug
should be reported to Linux developers.
Virtual EPC
===========
The implementation has also a virtual EPC driver to support SGX enclaves
in guests. Unlike the SGX driver, an EPC page allocated by the virtual
EPC driver doesn't have a specific enclave associated with it. This is
because KVM doesn't track how a guest uses EPC pages.
As a result, the SGX core page reclaimer doesn't support reclaiming EPC
pages allocated to KVM guests through the virtual EPC driver. If the
user wants to deploy SGX applications both on the host and in guests
on the same machine, the user should reserve enough EPC (by taking out
total virtual EPC size of all SGX VMs from the physical EPC size) for
host SGX applications so they can run with acceptable performance.
......@@ -9282,6 +9282,7 @@ Q: https://patchwork.kernel.org/project/intel-sgx/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/sgx
F: Documentation/x86/sgx.rst
F: arch/x86/entry/vdso/vsgx.S
F: arch/x86/include/asm/sgx.h
F: arch/x86/include/uapi/asm/sgx.h
F: arch/x86/kernel/cpu/sgx/*
F: tools/testing/selftests/sgx/*
......
......@@ -1933,6 +1933,7 @@ config X86_SGX
depends on CRYPTO_SHA256=y
select SRCU
select MMU_NOTIFIER
select NUMA_KEEP_MEMINFO if NUMA
help
Intel(R) Software Guard eXtensions (SGX) is a set of CPU instructions
that can be used by applications to set aside private regions of code
......
......@@ -292,6 +292,8 @@
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
......
......@@ -2,15 +2,20 @@
/**
* Copyright(c) 2016-20 Intel Corporation.
*
* Contains data structures defined by the SGX architecture. Data structures
* defined by the Linux software stack should not be placed here.
* Intel Software Guard Extensions (SGX) support.
*/
#ifndef _ASM_X86_SGX_ARCH_H
#define _ASM_X86_SGX_ARCH_H
#ifndef _ASM_X86_SGX_H
#define _ASM_X86_SGX_H
#include <linux/bits.h>
#include <linux/types.h>
/*
* This file contains both data structures defined by SGX architecture and Linux
* defined software data structures and functions. The two should not be mixed
* together for better readibility. The architectural definitions come first.
*/
/* The SGX specific CPUID function. */
#define SGX_CPUID 0x12
/* EPC enumeration. */
......@@ -22,16 +27,36 @@
/* The bitmask for the EPC section type. */
#define SGX_CPUID_EPC_MASK GENMASK(3, 0)
enum sgx_encls_function {
ECREATE = 0x00,
EADD = 0x01,
EINIT = 0x02,
EREMOVE = 0x03,
EDGBRD = 0x04,
EDGBWR = 0x05,
EEXTEND = 0x06,
ELDU = 0x08,
EBLOCK = 0x09,
EPA = 0x0A,
EWB = 0x0B,
ETRACK = 0x0C,
EAUG = 0x0D,
EMODPR = 0x0E,
EMODT = 0x0F,
};
/**
* enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV
* %SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not
* been completed yet.
* %SGX_CHILD_PRESENT SECS has child pages present in the EPC.
* %SGX_INVALID_EINITTOKEN: EINITTOKEN is invalid and enclave signer's
* public key does not match IA32_SGXLEPUBKEYHASH.
* %SGX_UNMASKED_EVENT: An unmasked event, e.g. INTR, was received
*/
enum sgx_return_code {
SGX_NOT_TRACKED = 11,
SGX_CHILD_PRESENT = 13,
SGX_INVALID_EINITTOKEN = 16,
SGX_UNMASKED_EVENT = 128,
};
......@@ -335,4 +360,19 @@ struct sgx_sigstruct {
#define SGX_LAUNCH_TOKEN_SIZE 304
#endif /* _ASM_X86_SGX_ARCH_H */
/*
* Do not put any hardware-defined SGX structure representations below this
* comment!
*/
#ifdef CONFIG_X86_SGX_KVM
int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
int *trapnr);
int sgx_virt_einit(void __user *sigstruct, void __user *token,
void __user *secs, u64 *lepubkeyhash, int *trapnr);
#endif
int sgx_set_attribute(unsigned long *allowed_attributes,
unsigned int attribute_fd);
#endif /* _ASM_X86_SGX_H */
......@@ -72,6 +72,9 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_AVX512_FP16, X86_FEATURE_AVX512BW },
{ X86_FEATURE_ENQCMD, X86_FEATURE_XSAVES },
{ X86_FEATURE_PER_THREAD_MBA, X86_FEATURE_MBA },
{ X86_FEATURE_SGX_LC, X86_FEATURE_SGX },
{ X86_FEATURE_SGX1, X86_FEATURE_SGX },
{ X86_FEATURE_SGX2, X86_FEATURE_SGX1 },
{}
};
......
......@@ -93,15 +93,9 @@ static void init_vmx_capabilities(struct cpuinfo_x86 *c)
}
#endif /* CONFIG_X86_VMX_FEATURE_NAMES */
static void clear_sgx_caps(void)
{
setup_clear_cpu_cap(X86_FEATURE_SGX);
setup_clear_cpu_cap(X86_FEATURE_SGX_LC);
}
static int __init nosgx(char *str)
{
clear_sgx_caps();
setup_clear_cpu_cap(X86_FEATURE_SGX);
return 0;
}
......@@ -110,23 +104,30 @@ early_param("nosgx", nosgx);
void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
{
bool enable_sgx_kvm = false, enable_sgx_driver = false;
bool tboot = tboot_enabled();
bool enable_sgx;
bool enable_vmx;
u64 msr;
if (rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr)) {
clear_cpu_cap(c, X86_FEATURE_VMX);
clear_sgx_caps();
clear_cpu_cap(c, X86_FEATURE_SGX);
return;
}
/*
* Enable SGX if and only if the kernel supports SGX and Launch Control
* is supported, i.e. disable SGX if the LE hash MSRs can't be written.
*/
enable_sgx = cpu_has(c, X86_FEATURE_SGX) &&
cpu_has(c, X86_FEATURE_SGX_LC) &&
IS_ENABLED(CONFIG_X86_SGX);
enable_vmx = cpu_has(c, X86_FEATURE_VMX) &&
IS_ENABLED(CONFIG_KVM_INTEL);
if (cpu_has(c, X86_FEATURE_SGX) && IS_ENABLED(CONFIG_X86_SGX)) {
/*
* Separate out SGX driver enabling from KVM. This allows KVM
* guests to use SGX even if the kernel SGX driver refuses to
* use it. This happens if flexible Launch Control is not
* available.
*/
enable_sgx_driver = cpu_has(c, X86_FEATURE_SGX_LC);
enable_sgx_kvm = enable_vmx && IS_ENABLED(CONFIG_X86_SGX_KVM);
}
if (msr & FEAT_CTL_LOCKED)
goto update_caps;
......@@ -142,15 +143,18 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
* i.e. KVM is enabled, to avoid unnecessarily adding an attack vector
* for the kernel, e.g. using VMX to hide malicious code.
*/
if (cpu_has(c, X86_FEATURE_VMX) && IS_ENABLED(CONFIG_KVM_INTEL)) {
if (enable_vmx) {
msr |= FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
if (tboot)
msr |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX;
}
if (enable_sgx)
msr |= FEAT_CTL_SGX_ENABLED | FEAT_CTL_SGX_LC_ENABLED;
if (enable_sgx_kvm || enable_sgx_driver) {
msr |= FEAT_CTL_SGX_ENABLED;
if (enable_sgx_driver)
msr |= FEAT_CTL_SGX_LC_ENABLED;
}
wrmsrl(MSR_IA32_FEAT_CTL, msr);
......@@ -173,10 +177,29 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
}
update_sgx:
if (!(msr & FEAT_CTL_SGX_ENABLED) ||
!(msr & FEAT_CTL_SGX_LC_ENABLED) || !enable_sgx) {
if (enable_sgx)
pr_err_once("SGX disabled by BIOS\n");
clear_sgx_caps();
if (!(msr & FEAT_CTL_SGX_ENABLED)) {
if (enable_sgx_kvm || enable_sgx_driver)
pr_err_once("SGX disabled by BIOS.\n");
clear_cpu_cap(c, X86_FEATURE_SGX);
return;
}
/*
* VMX feature bit may be cleared due to being disabled in BIOS,
* in which case SGX virtualization cannot be supported either.
*/
if (!cpu_has(c, X86_FEATURE_VMX) && enable_sgx_kvm) {
pr_err_once("SGX virtualization disabled due to lack of VMX.\n");
enable_sgx_kvm = 0;
}
if (!(msr & FEAT_CTL_SGX_LC_ENABLED) && enable_sgx_driver) {
if (!enable_sgx_kvm) {
pr_err_once("SGX Launch Control is locked. Disable SGX.\n");
clear_cpu_cap(c, X86_FEATURE_SGX);
} else {
pr_err_once("SGX Launch Control is locked. Support SGX virtualization only.\n");
clear_cpu_cap(c, X86_FEATURE_SGX_LC);
}
}
}
......@@ -36,6 +36,8 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_CDP_L2, CPUID_ECX, 2, 0x00000010, 2 },
{ X86_FEATURE_MBA, CPUID_EBX, 3, 0x00000010, 0 },
{ X86_FEATURE_PER_THREAD_MBA, CPUID_ECX, 0, 0x00000010, 3 },
{ X86_FEATURE_SGX1, CPUID_EAX, 0, 0x00000012, 0 },
{ X86_FEATURE_SGX2, CPUID_EAX, 1, 0x00000012, 0 },
{ X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
......
......@@ -3,3 +3,4 @@ obj-y += \
encl.o \
ioctl.o \
main.o
obj-$(CONFIG_X86_SGX_KVM) += virt.o
......@@ -136,10 +136,6 @@ static const struct file_operations sgx_encl_fops = {
.get_unmapped_area = sgx_get_unmapped_area,
};
const struct file_operations sgx_provision_fops = {
.owner = THIS_MODULE,
};
static struct miscdevice sgx_dev_enclave = {
.minor = MISC_DYNAMIC_MINOR,
.name = "sgx_enclave",
......@@ -147,13 +143,6 @@ static struct miscdevice sgx_dev_enclave = {
.fops = &sgx_encl_fops,
};
static struct miscdevice sgx_dev_provision = {
.minor = MISC_DYNAMIC_MINOR,
.name = "sgx_provision",
.nodename = "sgx_provision",
.fops = &sgx_provision_fops,
};
int __init sgx_drv_init(void)
{
unsigned int eax, ebx, ecx, edx;
......@@ -187,11 +176,5 @@ int __init sgx_drv_init(void)
if (ret)
return ret;
ret = misc_register(&sgx_dev_provision);
if (ret) {
misc_deregister(&sgx_dev_enclave);
return ret;
}
return 0;
}
......@@ -7,7 +7,7 @@
#include <linux/shmem_fs.h>
#include <linux/suspend.h>
#include <linux/sched/mm.h>
#include "arch.h"
#include <asm/sgx.h>
#include "encl.h"
#include "encls.h"
#include "sgx.h"
......@@ -78,7 +78,7 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
ret = __sgx_encl_eldu(encl_page, epc_page, secs_page);
if (ret) {
sgx_free_epc_page(epc_page);
sgx_encl_free_epc_page(epc_page);
return ERR_PTR(ret);
}
......@@ -404,7 +404,7 @@ void sgx_encl_release(struct kref *ref)
if (sgx_unmark_page_reclaimable(entry->epc_page))
continue;
sgx_free_epc_page(entry->epc_page);
sgx_encl_free_epc_page(entry->epc_page);
encl->secs_child_cnt--;
entry->epc_page = NULL;
}
......@@ -415,7 +415,7 @@ void sgx_encl_release(struct kref *ref)
xa_destroy(&encl->page_array);
if (!encl->secs_child_cnt && encl->secs.epc_page) {
sgx_free_epc_page(encl->secs.epc_page);
sgx_encl_free_epc_page(encl->secs.epc_page);
encl->secs.epc_page = NULL;
}
......@@ -423,7 +423,7 @@ void sgx_encl_release(struct kref *ref)
va_page = list_first_entry(&encl->va_pages, struct sgx_va_page,
list);
list_del(&va_page->list);
sgx_free_epc_page(va_page->epc_page);
sgx_encl_free_epc_page(va_page->epc_page);
kfree(va_page);
}
......@@ -686,7 +686,7 @@ struct sgx_epc_page *sgx_alloc_va_page(void)
ret = __epa(sgx_get_epc_virt_addr(epc_page));
if (ret) {
WARN_ONCE(1, "EPA returned %d (0x%x)", ret, ret);
sgx_free_epc_page(epc_page);
sgx_encl_free_epc_page(epc_page);
return ERR_PTR(-EFAULT);
}
......@@ -735,3 +735,24 @@ bool sgx_va_page_full(struct sgx_va_page *va_page)
return slot == SGX_VA_SLOT_COUNT;
}
/**
* sgx_encl_free_epc_page - free an EPC page assigned to an enclave
* @page: EPC page to be freed
*
* Free an EPC page assigned to an enclave. It does EREMOVE for the page, and
* only upon success, it puts the page back to free page list. Otherwise, it
* gives a WARNING to indicate page is leaked.
*/
void sgx_encl_free_epc_page(struct sgx_epc_page *page)
{
int ret;
WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
ret = __eremove(sgx_get_epc_virt_addr(page));
if (WARN_ONCE(ret, EREMOVE_ERROR_MESSAGE, ret, ret))
return;
sgx_free_epc_page(page);
}
......@@ -115,5 +115,6 @@ struct sgx_epc_page *sgx_alloc_va_page(void);
unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page);
void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset);
bool sgx_va_page_full(struct sgx_va_page *va_page);
void sgx_encl_free_epc_page(struct sgx_epc_page *page);
#endif /* _X86_ENCL_H */
......@@ -11,21 +11,6 @@
#include <asm/traps.h>
#include "sgx.h"
enum sgx_encls_function {
ECREATE = 0x00,
EADD = 0x01,
EINIT = 0x02,
EREMOVE = 0x03,
EDGBRD = 0x04,
EDGBWR = 0x05,
EEXTEND = 0x06,
ELDU = 0x08,
EBLOCK = 0x09,
EPA = 0x0A,
EWB = 0x0B,
ETRACK = 0x0C,
};
/**
* ENCLS_FAULT_FLAG - flag signifying an ENCLS return code is a trapnr
*
......@@ -55,6 +40,19 @@ enum sgx_encls_function {
} while (0); \
}
/*
* encls_faulted() - Check if an ENCLS leaf faulted given an error code
* @ret: the return value of an ENCLS leaf function call
*
* Return:
* - true: ENCLS leaf faulted.
* - false: Otherwise.
*/
static inline bool encls_faulted(int ret)
{
return ret & ENCLS_FAULT_FLAG;
}
/**
* encls_failed() - Check if an ENCLS function failed
* @ret: the return value of an ENCLS function call
......@@ -65,7 +63,7 @@ enum sgx_encls_function {
*/
static inline bool encls_failed(int ret)
{
if (ret & ENCLS_FAULT_FLAG)
if (encls_faulted(ret))
return ENCLS_TRAPNR(ret) != X86_TRAP_PF;
return !!ret;
......
......@@ -2,6 +2,7 @@
/* Copyright(c) 2016-20 Intel Corporation. */
#include <asm/mman.h>
#include <asm/sgx.h>
#include <linux/mman.h>
#include <linux/delay.h>
#include <linux/file.h>
......@@ -47,7 +48,7 @@ static void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
encl->page_cnt--;
if (va_page) {
sgx_free_epc_page(va_page->epc_page);
sgx_encl_free_epc_page(va_page->epc_page);
list_del(&va_page->list);
kfree(va_page);
}
......@@ -117,7 +118,7 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
return 0;
err_out:
sgx_free_epc_page(encl->secs.epc_page);
sgx_encl_free_epc_page(encl->secs.epc_page);
encl->secs.epc_page = NULL;
err_out_backing:
......@@ -365,7 +366,7 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
mmap_read_unlock(current->mm);
err_out_free:
sgx_free_epc_page(epc_page);
sgx_encl_free_epc_page(epc_page);
kfree(encl_page);
return ret;
......@@ -495,7 +496,7 @@ static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct,
void *token)
{
u64 mrsigner[4];
int i, j, k;
int i, j;
void *addr;
int ret;
......@@ -544,8 +545,7 @@ static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct,
preempt_disable();
for (k = 0; k < 4; k++)
wrmsrl(MSR_IA32_SGXLEPUBKEYHASH0 + k, mrsigner[k]);
sgx_update_lepubkeyhash(mrsigner);
ret = __einit(sigstruct, token, addr);
......@@ -568,7 +568,7 @@ static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct,
}
}
if (ret & ENCLS_FAULT_FLAG) {
if (encls_faulted(ret)) {
if (encls_failed(ret))
ENCLS_WARN(ret, "EINIT");
......@@ -604,7 +604,6 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg)
{
struct sgx_sigstruct *sigstruct;
struct sgx_enclave_init init_arg;
struct page *initp_page;
void *token;
int ret;
......@@ -615,11 +614,15 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg)
if (copy_from_user(&init_arg, arg, sizeof(init_arg)))
return -EFAULT;
initp_page = alloc_page(GFP_KERNEL);
if (!initp_page)
/*
* 'sigstruct' must be on a page boundary and 'token' on a 512 byte
* boundary. kmalloc() will give this alignment when allocating
* PAGE_SIZE bytes.
*/
sigstruct = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!sigstruct)
return -ENOMEM;
sigstruct = kmap(initp_page);
token = (void *)((unsigned long)sigstruct + PAGE_SIZE / 2);
memset(token, 0, SGX_LAUNCH_TOKEN_SIZE);
......@@ -645,8 +648,7 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg)
ret = sgx_encl_init(encl, sigstruct, token);
out:
kunmap(initp_page);
__free_page(initp_page);
kfree(sigstruct);
return ret;
}
......@@ -665,24 +667,11 @@ static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg)
static long sgx_ioc_enclave_provision(struct sgx_encl *encl, void __user *arg)
{
struct sgx_enclave_provision params;
struct file *file;
if (copy_from_user(&params, arg, sizeof(params)))
return -EFAULT;
file = fget(params.fd);
if (!file)
return -EINVAL;
if (file->f_op != &sgx_provision_fops) {
fput(file);
return -EINVAL;
}
encl->attributes_mask |= SGX_ATTR_PROVISIONKEY;
fput(file);
return 0;
return sgx_set_attribute(&encl->attributes_mask, params.fd);
}
long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2016-20 Intel Corporation. */
#include <linux/file.h>
#include <linux/freezer.h>
#include <linux/highmem.h>
#include <linux/kthread.h>
#include <linux/miscdevice.h>
#include <linux/pagemap.h>
#include <linux/ratelimit.h>
#include <linux/sched/mm.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <asm/sgx.h>
#include "driver.h"
#include "encl.h"
#include "encls.h"
......@@ -23,42 +26,58 @@ static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq);
* with sgx_reclaimer_lock acquired.
*/
static LIST_HEAD(sgx_active_page_list);
static DEFINE_SPINLOCK(sgx_reclaimer_lock);
/* The free page list lock protected variables prepend the lock. */
static unsigned long sgx_nr_free_pages;
/* Nodes with one or more EPC sections. */
static nodemask_t sgx_numa_mask;
/*
* Array with one list_head for each possible NUMA node. Each
* list contains all the sgx_epc_section's which are on that
* node.
*/
static struct sgx_numa_node *sgx_numa_nodes;
static LIST_HEAD(sgx_dirty_page_list);
/*
* Reset dirty EPC pages to uninitialized state. Laundry can be left with SECS
* pages whose child pages blocked EREMOVE.
* Reset post-kexec EPC pages to the uninitialized state. The pages are removed
* from the input list, and made available for the page allocator. SECS pages
* prepending their children in the input list are left intact.
*/
static void sgx_sanitize_section(struct sgx_epc_section *section)
static void __sgx_sanitize_pages(struct list_head *dirty_page_list)
{
struct sgx_epc_page *page;
LIST_HEAD(dirty);
int ret;
/* init_laundry_list is thread-local, no need for a lock: */
while (!list_empty(&section->init_laundry_list)) {
/* dirty_page_list is thread-local, no need for a lock: */
while (!list_empty(dirty_page_list)) {
if (kthread_should_stop())
return;
/* needed for access to ->page_list: */
spin_lock(&section->lock);
page = list_first_entry(&section->init_laundry_list,
struct sgx_epc_page, list);
page = list_first_entry(dirty_page_list, struct sgx_epc_page, list);
ret = __eremove(sgx_get_epc_virt_addr(page));
if (!ret)
list_move(&page->list, &section->page_list);
else
if (!ret) {
/*
* page is now sanitized. Make it available via the SGX
* page allocator:
*/
list_del(&page->list);
sgx_free_epc_page(page);
} else {
/* The page is not yet clean - move to the dirty list. */
list_move_tail(&page->list, &dirty);
spin_unlock(&section->lock);
}
cond_resched();
}
list_splice(&dirty, &section->init_laundry_list);
list_splice(&dirty, dirty_page_list);
}
static bool sgx_reclaimer_age(struct sgx_epc_page *epc_page)
......@@ -278,7 +297,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
sgx_encl_ewb(encl->secs.epc_page, &secs_backing);
sgx_free_epc_page(encl->secs.epc_page);
sgx_encl_free_epc_page(encl->secs.epc_page);
encl->secs.epc_page = NULL;
sgx_encl_put_backing(&secs_backing, true);
......@@ -308,6 +327,7 @@ static void sgx_reclaim_pages(void)
struct sgx_epc_section *section;
struct sgx_encl_page *encl_page;
struct sgx_epc_page *epc_page;
struct sgx_numa_node *node;
pgoff_t page_index;
int cnt = 0;
int ret;
......@@ -379,50 +399,33 @@ static void sgx_reclaim_pages(void)
epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
section = &sgx_epc_sections[epc_page->section];
spin_lock(&section->lock);
list_add_tail(&epc_page->list, &section->page_list);
section->free_cnt++;
spin_unlock(&section->lock);
}
}
static unsigned long sgx_nr_free_pages(void)
{
unsigned long cnt = 0;
int i;
for (i = 0; i < sgx_nr_epc_sections; i++)
cnt += sgx_epc_sections[i].free_cnt;
node = section->node;
return cnt;
spin_lock(&node->lock);
list_add_tail(&epc_page->list, &node->free_page_list);
sgx_nr_free_pages++;
spin_unlock(&node->lock);
}
}
static bool sgx_should_reclaim(unsigned long watermark)
{
return sgx_nr_free_pages() < watermark &&
!list_empty(&sgx_active_page_list);
return sgx_nr_free_pages < watermark && !list_empty(&sgx_active_page_list);
}
static int ksgxd(void *p)
{
int i;
set_freezable();
/*
* Sanitize pages in order to recover from kexec(). The 2nd pass is
* required for SECS pages, whose child pages blocked EREMOVE.
*/
for (i = 0; i < sgx_nr_epc_sections; i++)
sgx_sanitize_section(&sgx_epc_sections[i]);
for (i = 0; i < sgx_nr_epc_sections; i++) {
sgx_sanitize_section(&sgx_epc_sections[i]);
__sgx_sanitize_pages(&sgx_dirty_page_list);
__sgx_sanitize_pages(&sgx_dirty_page_list);
/* Should never happen. */
if (!list_empty(&sgx_epc_sections[i].init_laundry_list))
WARN(1, "EPC section %d has unsanitized pages.\n", i);
}
/* sanity check: */
WARN_ON(!list_empty(&sgx_dirty_page_list));
while (!kthread_should_stop()) {
if (try_to_freeze())
......@@ -454,45 +457,56 @@ static bool __init sgx_page_reclaimer_init(void)
return true;
}
static struct sgx_epc_page *__sgx_alloc_epc_page_from_section(struct sgx_epc_section *section)
static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
{
struct sgx_epc_page *page;
struct sgx_numa_node *node = &sgx_numa_nodes[nid];
struct sgx_epc_page *page = NULL;
spin_lock(&section->lock);
spin_lock(&node->lock);
if (list_empty(&section->page_list)) {
spin_unlock(&section->lock);
if (list_empty(&node->free_page_list)) {
spin_unlock(&node->lock);
return NULL;
}
page = list_first_entry(&section->page_list, struct sgx_epc_page, list);
page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list);
list_del_init(&page->list);
section->free_cnt--;
sgx_nr_free_pages--;
spin_unlock(&node->lock);
spin_unlock(&section->lock);
return page;
}
/**
* __sgx_alloc_epc_page() - Allocate an EPC page
*
* Iterate through EPC sections and borrow a free EPC page to the caller. When a
* page is no longer needed it must be released with sgx_free_epc_page().
* Iterate through NUMA nodes and reserve ia free EPC page to the caller. Start
* from the NUMA node, where the caller is executing.
*
* Return:
* an EPC page,
* -errno on error
* - an EPC page: A borrowed EPC pages were available.
* - NULL: Out of EPC pages.
*/
struct sgx_epc_page *__sgx_alloc_epc_page(void)
{
struct sgx_epc_section *section;
struct sgx_epc_page *page;
int i;
int nid_of_current = numa_node_id();
int nid = nid_of_current;
for (i = 0; i < sgx_nr_epc_sections; i++) {
section = &sgx_epc_sections[i];
if (node_isset(nid_of_current, sgx_numa_mask)) {
page = __sgx_alloc_epc_page_from_node(nid_of_current);
if (page)
return page;
}
/* Fall back to the non-local NUMA nodes: */
while (true) {
nid = next_node_in(nid, sgx_numa_mask);
if (nid == nid_of_current)
break;
page = __sgx_alloc_epc_page_from_section(section);
page = __sgx_alloc_epc_page_from_node(nid);
if (page)
return page;
}
......@@ -598,23 +612,22 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim)
* sgx_free_epc_page() - Free an EPC page
* @page: an EPC page
*
* Call EREMOVE for an EPC page and insert it back to the list of free pages.
* Put the EPC page back to the list of free pages. It's the caller's
* responsibility to make sure that the page is in uninitialized state. In other
* words, do EREMOVE, EWB or whatever operation is necessary before calling
* this function.
*/
void sgx_free_epc_page(struct sgx_epc_page *page)
{
struct sgx_epc_section *section = &sgx_epc_sections[page->section];
int ret;
struct sgx_numa_node *node = section->node;
WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
spin_lock(&node->lock);
ret = __eremove(sgx_get_epc_virt_addr(page));
if (WARN_ONCE(ret, "EREMOVE returned %d (0x%x)", ret, ret))
return;
list_add_tail(&page->list, &node->free_page_list);
sgx_nr_free_pages++;
spin_lock(&section->lock);
list_add_tail(&page->list, &section->page_list);
section->free_cnt++;
spin_unlock(&section->lock);
spin_unlock(&node->lock);
}
static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
......@@ -635,18 +648,14 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
}
section->phys_addr = phys_addr;
spin_lock_init(&section->lock);
INIT_LIST_HEAD(&section->page_list);
INIT_LIST_HEAD(&section->init_laundry_list);
for (i = 0; i < nr_pages; i++) {
section->pages[i].section = index;
section->pages[i].flags = 0;
section->pages[i].owner = NULL;
list_add_tail(&section->pages[i].list, &section->init_laundry_list);
list_add_tail(&section->pages[i].list, &sgx_dirty_page_list);
}
section->free_cnt = nr_pages;
return true;
}
......@@ -665,8 +674,13 @@ static bool __init sgx_page_cache_init(void)
{
u32 eax, ebx, ecx, edx, type;
u64 pa, size;
int nid;
int i;
sgx_numa_nodes = kmalloc_array(num_possible_nodes(), sizeof(*sgx_numa_nodes), GFP_KERNEL);
if (!sgx_numa_nodes)
return false;
for (i = 0; i < ARRAY_SIZE(sgx_epc_sections); i++) {
cpuid_count(SGX_CPUID, i + SGX_CPUID_EPC, &eax, &ebx, &ecx, &edx);
......@@ -689,6 +703,21 @@ static bool __init sgx_page_cache_init(void)
break;
}
nid = numa_map_to_online_node(phys_to_target_node(pa));
if (nid == NUMA_NO_NODE) {
/* The physical address is already printed above. */
pr_warn(FW_BUG "Unable to map EPC section to online node. Fallback to the NUMA node 0.\n");
nid = 0;
}
if (!node_isset(nid, sgx_numa_mask)) {
spin_lock_init(&sgx_numa_nodes[nid].lock);
INIT_LIST_HEAD(&sgx_numa_nodes[nid].free_page_list);
node_set(nid, sgx_numa_mask);
}
sgx_epc_sections[i].node = &sgx_numa_nodes[nid];
sgx_nr_epc_sections++;
}
......@@ -700,6 +729,67 @@ static bool __init sgx_page_cache_init(void)
return true;
}
/*
* Update the SGX_LEPUBKEYHASH MSRs to the values specified by caller.
* Bare-metal driver requires to update them to hash of enclave's signer
* before EINIT. KVM needs to update them to guest's virtual MSR values
* before doing EINIT from guest.
*/
void sgx_update_lepubkeyhash(u64 *lepubkeyhash)
{
int i;
WARN_ON_ONCE(preemptible());
for (i = 0; i < 4; i++)
wrmsrl(MSR_IA32_SGXLEPUBKEYHASH0 + i, lepubkeyhash[i]);
}
const struct file_operations sgx_provision_fops = {
.owner = THIS_MODULE,
};
static struct miscdevice sgx_dev_provision = {
.minor = MISC_DYNAMIC_MINOR,
.name = "sgx_provision",
.nodename = "sgx_provision",
.fops = &sgx_provision_fops,
};
/**
* sgx_set_attribute() - Update allowed attributes given file descriptor
* @allowed_attributes: Pointer to allowed enclave attributes
* @attribute_fd: File descriptor for specific attribute
*
* Append enclave attribute indicated by file descriptor to allowed
* attributes. Currently only SGX_ATTR_PROVISIONKEY indicated by
* /dev/sgx_provision is supported.
*
* Return:
* -0: SGX_ATTR_PROVISIONKEY is appended to allowed_attributes
* -EINVAL: Invalid, or not supported file descriptor
*/
int sgx_set_attribute(unsigned long *allowed_attributes,
unsigned int attribute_fd)
{
struct file *file;
file = fget(attribute_fd);
if (!file)
return -EINVAL;
if (file->f_op != &sgx_provision_fops) {
fput(file);
return -EINVAL;
}
*allowed_attributes |= SGX_ATTR_PROVISIONKEY;
fput(file);
return 0;
}
EXPORT_SYMBOL_GPL(sgx_set_attribute);
static int __init sgx_init(void)
{
int ret;
......@@ -716,12 +806,28 @@ static int __init sgx_init(void)
goto err_page_cache;
}
ret = sgx_drv_init();
ret = misc_register(&sgx_dev_provision);
if (ret)
goto err_kthread;
/*
* Always try to initialize the native *and* KVM drivers.
* The KVM driver is less picky than the native one and
* can function if the native one is not supported on the
* current system or fails to initialize.
*
* Error out only if both fail to initialize.
*/
ret = sgx_drv_init();
if (sgx_vepc_init() && ret)
goto err_provision;
return 0;
err_provision:
misc_deregister(&sgx_dev_provision);
err_kthread:
kthread_stop(ksgxd_tsk);
......
......@@ -8,11 +8,15 @@
#include <linux/rwsem.h>
#include <linux/types.h>
#include <asm/asm.h>
#include "arch.h"
#include <asm/sgx.h>
#undef pr_fmt
#define pr_fmt(fmt) "sgx: " fmt
#define EREMOVE_ERROR_MESSAGE \
"EREMOVE returned %d (0x%x) and an EPC page was leaked. SGX may become unusable. " \
"Refer to Documentation/x86/sgx.rst for more information."
#define SGX_MAX_EPC_SECTIONS 8
#define SGX_EEXTEND_BLOCK_SIZE 256
#define SGX_NR_TO_SCAN 16
......@@ -29,29 +33,26 @@ struct sgx_epc_page {
struct list_head list;
};
/*
* Contains the tracking data for NUMA nodes having EPC pages. Most importantly,
* the free page list local to the node is stored here.
*/
struct sgx_numa_node {
struct list_head free_page_list;
spinlock_t lock;
};
/*
* The firmware can define multiple chunks of EPC to the different areas of the
* physical memory e.g. for memory areas of the each node. This structure is
* used to store EPC pages for one EPC section and virtual memory area where
* the pages have been mapped.
*
* 'lock' must be held before accessing 'page_list' or 'free_cnt'.
*/
struct sgx_epc_section {
unsigned long phys_addr;
void *virt_addr;
struct sgx_epc_page *pages;
spinlock_t lock;
struct list_head page_list;
unsigned long free_cnt;
/*
* Pages which need EREMOVE run on them before they can be
* used. Only safe to be accessed in ksgxd and init code.
* Not protected by locks.
*/
struct list_head init_laundry_list;
struct sgx_numa_node *node;
};
extern struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
......@@ -83,4 +84,15 @@ void sgx_mark_page_reclaimable(struct sgx_epc_page *page);
int sgx_unmark_page_reclaimable(struct sgx_epc_page *page);
struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim);
#ifdef CONFIG_X86_SGX_KVM
int __init sgx_vepc_init(void);
#else
static inline int __init sgx_vepc_init(void)
{
return -ENODEV;
}
#endif
void sgx_update_lepubkeyhash(u64 *lepubkeyhash);
#endif /* _X86_SGX_H */
// SPDX-License-Identifier: GPL-2.0
/*
* Device driver to expose SGX enclave memory to KVM guests.
*
* Copyright(c) 2021 Intel Corporation.
*/
#include <linux/miscdevice.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/sched/mm.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/xarray.h>
#include <asm/sgx.h>
#include <uapi/asm/sgx.h>
#include "encls.h"
#include "sgx.h"
struct sgx_vepc {
struct xarray page_array;
struct mutex lock;
};
/*
* Temporary SECS pages that cannot be EREMOVE'd due to having child in other
* virtual EPC instances, and the lock to protect it.
*/
static struct mutex zombie_secs_pages_lock;
static struct list_head zombie_secs_pages;
static int __sgx_vepc_fault(struct sgx_vepc *vepc,
struct vm_area_struct *vma, unsigned long addr)
{
struct sgx_epc_page *epc_page;
unsigned long index, pfn;
int ret;
WARN_ON(!mutex_is_locked(&vepc->lock));
/* Calculate index of EPC page in virtual EPC's page_array */
index = vma->vm_pgoff + PFN_DOWN(addr - vma->vm_start);
epc_page = xa_load(&vepc->page_array, index);
if (epc_page)
return 0;
epc_page = sgx_alloc_epc_page(vepc, false);
if (IS_ERR(epc_page))
return PTR_ERR(epc_page);
ret = xa_err(xa_store(&vepc->page_array, index, epc_page, GFP_KERNEL));
if (ret)
goto err_free;
pfn = PFN_DOWN(sgx_get_epc_phys_addr(epc_page));
ret = vmf_insert_pfn(vma, addr, pfn);
if (ret != VM_FAULT_NOPAGE) {
ret = -EFAULT;
goto err_delete;
}
return 0;
err_delete:
xa_erase(&vepc->page_array, index);
err_free:
sgx_free_epc_page(epc_page);
return ret;
}
static vm_fault_t sgx_vepc_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct sgx_vepc *vepc = vma->vm_private_data;
int ret;
mutex_lock(&vepc->lock);
ret = __sgx_vepc_fault(vepc, vma, vmf->address);
mutex_unlock(&vepc->lock);
if (!ret)
return VM_FAULT_NOPAGE;
if (ret == -EBUSY && (vmf->flags & FAULT_FLAG_ALLOW_RETRY)) {
mmap_read_unlock(vma->vm_mm);
return VM_FAULT_RETRY;
}
return VM_FAULT_SIGBUS;
}
static const struct vm_operations_struct sgx_vepc_vm_ops = {
.fault = sgx_vepc_fault,
};
static int sgx_vepc_mmap(struct file *file, struct vm_area_struct *vma)
{
struct sgx_vepc *vepc = file->private_data;
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
vma->vm_ops = &sgx_vepc_vm_ops;
/* Don't copy VMA in fork() */
vma->vm_flags |= VM_PFNMAP | VM_IO | VM_DONTDUMP | VM_DONTCOPY;
vma->vm_private_data = vepc;
return 0;
}
static int sgx_vepc_free_page(struct sgx_epc_page *epc_page)
{
int ret;
/*
* Take a previously guest-owned EPC page and return it to the
* general EPC page pool.
*
* Guests can not be trusted to have left this page in a good
* state, so run EREMOVE on the page unconditionally. In the
* case that a guest properly EREMOVE'd this page, a superfluous
* EREMOVE is harmless.
*/
ret = __eremove(sgx_get_epc_virt_addr(epc_page));
if (ret) {
/*
* Only SGX_CHILD_PRESENT is expected, which is because of
* EREMOVE'ing an SECS still with child, in which case it can
* be handled by EREMOVE'ing the SECS again after all pages in
* virtual EPC have been EREMOVE'd. See comments in below in
* sgx_vepc_release().
*
* The user of virtual EPC (KVM) needs to guarantee there's no
* logical processor is still running in the enclave in guest,
* otherwise EREMOVE will get SGX_ENCLAVE_ACT which cannot be
* handled here.
*/
WARN_ONCE(ret != SGX_CHILD_PRESENT, EREMOVE_ERROR_MESSAGE,
ret, ret);
return ret;
}
sgx_free_epc_page(epc_page);
return 0;
}
static int sgx_vepc_release(struct inode *inode, struct file *file)
{
struct sgx_vepc *vepc = file->private_data;
struct sgx_epc_page *epc_page, *tmp, *entry;
unsigned long index;
LIST_HEAD(secs_pages);
xa_for_each(&vepc->page_array, index, entry) {
/*
* Remove all normal, child pages. sgx_vepc_free_page()
* will fail if EREMOVE fails, but this is OK and expected on
* SECS pages. Those can only be EREMOVE'd *after* all their
* child pages. Retries below will clean them up.
*/
if (sgx_vepc_free_page(entry))
continue;
xa_erase(&vepc->page_array, index);
}
/*
* Retry EREMOVE'ing pages. This will clean up any SECS pages that
* only had children in this 'epc' area.
*/
xa_for_each(&vepc->page_array, index, entry) {
epc_page = entry;
/*
* An EREMOVE failure here means that the SECS page still
* has children. But, since all children in this 'sgx_vepc'
* have been removed, the SECS page must have a child on
* another instance.
*/
if (sgx_vepc_free_page(epc_page))
list_add_tail(&epc_page->list, &secs_pages);
xa_erase(&vepc->page_array, index);
}
/*
* SECS pages are "pinned" by child pages, and "unpinned" once all
* children have been EREMOVE'd. A child page in this instance
* may have pinned an SECS page encountered in an earlier release(),
* creating a zombie. Since some children were EREMOVE'd above,
* try to EREMOVE all zombies in the hopes that one was unpinned.
*/
mutex_lock(&zombie_secs_pages_lock);
list_for_each_entry_safe(epc_page, tmp, &zombie_secs_pages, list) {
/*
* Speculatively remove the page from the list of zombies,
* if the page is successfully EREMOVE'd it will be added to
* the list of free pages. If EREMOVE fails, throw the page
* on the local list, which will be spliced on at the end.
*/
list_del(&epc_page->list);
if (sgx_vepc_free_page(epc_page))
list_add_tail(&epc_page->list, &secs_pages);
}
if (!list_empty(&secs_pages))
list_splice_tail(&secs_pages, &zombie_secs_pages);
mutex_unlock(&zombie_secs_pages_lock);
kfree(vepc);
return 0;
}
static int sgx_vepc_open(struct inode *inode, struct file *file)
{
struct sgx_vepc *vepc;
vepc = kzalloc(sizeof(struct sgx_vepc), GFP_KERNEL);
if (!vepc)
return -ENOMEM;
mutex_init(&vepc->lock);
xa_init(&vepc->page_array);
file->private_data = vepc;
return 0;
}
static const struct file_operations sgx_vepc_fops = {
.owner = THIS_MODULE,
.open = sgx_vepc_open,
.release = sgx_vepc_release,
.mmap = sgx_vepc_mmap,
};
static struct miscdevice sgx_vepc_dev = {
.minor = MISC_DYNAMIC_MINOR,
.name = "sgx_vepc",
.nodename = "sgx_vepc",
.fops = &sgx_vepc_fops,
};
int __init sgx_vepc_init(void)
{
/* SGX virtualization requires KVM to work */
if (!cpu_feature_enabled(X86_FEATURE_VMX))
return -ENODEV;
INIT_LIST_HEAD(&zombie_secs_pages);
mutex_init(&zombie_secs_pages_lock);
return misc_register(&sgx_vepc_dev);
}
/**
* sgx_virt_ecreate() - Run ECREATE on behalf of guest
* @pageinfo: Pointer to PAGEINFO structure
* @secs: Userspace pointer to SECS page
* @trapnr: trap number injected to guest in case of ECREATE error
*
* Run ECREATE on behalf of guest after KVM traps ECREATE for the purpose
* of enforcing policies of guest's enclaves, and return the trap number
* which should be injected to guest in case of any ECREATE error.
*
* Return:
* - 0: ECREATE was successful.
* - <0: on error.
*/
int sgx_virt_ecreate(struct sgx_pageinfo *pageinfo, void __user *secs,
int *trapnr)
{
int ret;
/*
* @secs is an untrusted, userspace-provided address. It comes from
* KVM and is assumed to be a valid pointer which points somewhere in
* userspace. This can fault and call SGX or other fault handlers when
* userspace mapping @secs doesn't exist.
*
* Add a WARN() to make sure @secs is already valid userspace pointer
* from caller (KVM), who should already have handled invalid pointer
* case (for instance, made by malicious guest). All other checks,
* such as alignment of @secs, are deferred to ENCLS itself.
*/
if (WARN_ON_ONCE(!access_ok(secs, PAGE_SIZE)))
return -EINVAL;
__uaccess_begin();
ret = __ecreate(pageinfo, (void *)secs);
__uaccess_end();
if (encls_faulted(ret)) {
*trapnr = ENCLS_TRAPNR(ret);
return -EFAULT;
}
/* ECREATE doesn't return an error code, it faults or succeeds. */
WARN_ON_ONCE(ret);
return 0;
}
EXPORT_SYMBOL_GPL(sgx_virt_ecreate);
static int __sgx_virt_einit(void __user *sigstruct, void __user *token,
void __user *secs)
{
int ret;
/*
* Make sure all userspace pointers from caller (KVM) are valid.
* All other checks deferred to ENCLS itself. Also see comment
* for @secs in sgx_virt_ecreate().
*/
#define SGX_EINITTOKEN_SIZE 304
if (WARN_ON_ONCE(!access_ok(sigstruct, sizeof(struct sgx_sigstruct)) ||
!access_ok(token, SGX_EINITTOKEN_SIZE) ||
!access_ok(secs, PAGE_SIZE)))
return -EINVAL;
__uaccess_begin();
ret = __einit((void *)sigstruct, (void *)token, (void *)secs);
__uaccess_end();
return ret;
}
/**
* sgx_virt_einit() - Run EINIT on behalf of guest
* @sigstruct: Userspace pointer to SIGSTRUCT structure
* @token: Userspace pointer to EINITTOKEN structure
* @secs: Userspace pointer to SECS page
* @lepubkeyhash: Pointer to guest's *virtual* SGX_LEPUBKEYHASH MSR values
* @trapnr: trap number injected to guest in case of EINIT error
*
* Run EINIT on behalf of guest after KVM traps EINIT. If SGX_LC is available
* in host, SGX driver may rewrite the hardware values at wish, therefore KVM
* needs to update hardware values to guest's virtual MSR values in order to
* ensure EINIT is executed with expected hardware values.
*
* Return:
* - 0: EINIT was successful.
* - <0: on error.
*/
int sgx_virt_einit(void __user *sigstruct, void __user *token,
void __user *secs, u64 *lepubkeyhash, int *trapnr)
{
int ret;
if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) {
ret = __sgx_virt_einit(sigstruct, token, secs);
} else {
preempt_disable();
sgx_update_lepubkeyhash(lepubkeyhash);
ret = __sgx_virt_einit(sigstruct, token, secs);
preempt_enable();
}
/* Propagate up the error from the WARN_ON_ONCE in __sgx_virt_einit() */
if (ret == -EINVAL)
return ret;
if (encls_faulted(ret)) {
*trapnr = ENCLS_TRAPNR(ret);
return -EFAULT;
}
return ret;
}
EXPORT_SYMBOL_GPL(sgx_virt_einit);
......@@ -84,6 +84,18 @@ config KVM_INTEL
To compile this as a module, choose M here: the module
will be called kvm-intel.
config X86_SGX_KVM
bool "Software Guard eXtensions (SGX) Virtualization"
depends on X86_SGX && KVM_INTEL
help
Enables KVM guests to create SGX enclaves.
This includes support to expose "raw" unreclaimable enclave memory to
guests via a device node, e.g. /dev/sgx_vepc.
If unsure, say N.
config KVM_AMD
tristate "KVM for AMD processors support"
depends on KVM
......
......@@ -14,7 +14,7 @@
#define __aligned(x) __attribute__((__aligned__(x)))
#define __packed __attribute__((packed))
#include "../../../../arch/x86/kernel/cpu/sgx/arch.h"
#include "../../../../arch/x86/include/asm/sgx.h"
#include "../../../../arch/x86/include/asm/enclu.h"
#include "../../../../arch/x86/include/uapi/asm/sgx.h"
......
......@@ -45,19 +45,19 @@ static bool encl_map_bin(const char *path, struct encl *encl)
fd = open(path, O_RDONLY);
if (fd == -1) {
perror("open()");
perror("enclave executable open()");
return false;
}
ret = stat(path, &sb);
if (ret) {
perror("stat()");
perror("enclave executable stat()");
goto err;
}
bin = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (bin == MAP_FAILED) {
perror("mmap()");
perror("enclave executable mmap()");
goto err;
}
......@@ -90,8 +90,7 @@ static bool encl_ioc_create(struct encl *encl)
ioc.src = (unsigned long)secs;
rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_CREATE, &ioc);
if (rc) {
fprintf(stderr, "SGX_IOC_ENCLAVE_CREATE failed: errno=%d\n",
errno);
perror("SGX_IOC_ENCLAVE_CREATE failed");
munmap((void *)secs->base, encl->encl_size);
return false;
}
......@@ -116,31 +115,72 @@ static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg)
rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_ADD_PAGES, &ioc);
if (rc < 0) {
fprintf(stderr, "SGX_IOC_ENCLAVE_ADD_PAGES failed: errno=%d.\n",
errno);
perror("SGX_IOC_ENCLAVE_ADD_PAGES failed");
return false;
}
return true;
}
bool encl_load(const char *path, struct encl *encl)
{
const char device_path[] = "/dev/sgx_enclave";
Elf64_Phdr *phdr_tbl;
off_t src_offset;
Elf64_Ehdr *ehdr;
struct stat sb;
void *ptr;
int i, j;
int ret;
int fd = -1;
memset(encl, 0, sizeof(*encl));
ret = open("/dev/sgx_enclave", O_RDWR);
if (ret < 0) {
fprintf(stderr, "Unable to open /dev/sgx_enclave\n");
fd = open(device_path, O_RDWR);
if (fd < 0) {
perror("Unable to open /dev/sgx_enclave");
goto err;
}
ret = stat(device_path, &sb);
if (ret) {
perror("device file stat()");
goto err;
}
/*
* This just checks if the /dev file has these permission
* bits set. It does not check that the current user is
* the owner or in the owning group.
*/
if (!(sb.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) {
fprintf(stderr, "no execute permissions on device file %s\n", device_path);
goto err;
}
ptr = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0);
if (ptr == (void *)-1) {
perror("mmap for read");
goto err;
}
munmap(ptr, PAGE_SIZE);
#define ERR_MSG \
"mmap() succeeded for PROT_READ, but failed for PROT_EXEC.\n" \
" Check that current user has execute permissions on %s and \n" \
" that /dev does not have noexec set: mount | grep \"/dev .*noexec\"\n" \
" If so, remount it executable: mount -o remount,exec /dev\n\n"
ptr = mmap(NULL, PAGE_SIZE, PROT_EXEC, MAP_SHARED, fd, 0);
if (ptr == (void *)-1) {
fprintf(stderr, ERR_MSG, device_path);
goto err;
}
munmap(ptr, PAGE_SIZE);
encl->fd = ret;
encl->fd = fd;
if (!encl_map_bin(path, encl))
goto err;
......@@ -217,6 +257,8 @@ bool encl_load(const char *path, struct encl *encl)
return true;
err:
if (fd != -1)
close(fd);
encl_delete(encl);
return false;
}
......@@ -229,7 +271,7 @@ static bool encl_map_area(struct encl *encl)
area = mmap(NULL, encl_size * 2, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (area == MAP_FAILED) {
perror("mmap");
perror("reservation mmap()");
return false;
}
......@@ -268,8 +310,7 @@ bool encl_build(struct encl *encl)
ioc.sigstruct = (uint64_t)&encl->sigstruct;
ret = ioctl(encl->fd, SGX_IOC_ENCLAVE_INIT, &ioc);
if (ret) {
fprintf(stderr, "SGX_IOC_ENCLAVE_INIT failed: errno=%d\n",
errno);
perror("SGX_IOC_ENCLAVE_INIT failed");
return false;
}
......
......@@ -15,6 +15,7 @@
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/auxv.h>
#include "defines.h"
#include "main.h"
#include "../kselftest.h"
......@@ -28,24 +29,6 @@ struct vdso_symtab {
Elf64_Word *elf_hashtab;
};
static void *vdso_get_base_addr(char *envp[])
{
Elf64_auxv_t *auxv;
int i;
for (i = 0; envp[i]; i++)
;
auxv = (Elf64_auxv_t *)&envp[i + 1];
for (i = 0; auxv[i].a_type != AT_NULL; i++) {
if (auxv[i].a_type == AT_SYSINFO_EHDR)
return (void *)auxv[i].a_un.a_val;
}
return NULL;
}
static Elf64_Dyn *vdso_get_dyntab(void *addr)
{
Elf64_Ehdr *ehdr = addr;
......@@ -162,7 +145,7 @@ static int user_handler(long rdi, long rsi, long rdx, long ursp, long r8, long r
return 0;
}
int main(int argc, char *argv[], char *envp[])
int main(int argc, char *argv[])
{
struct sgx_enclave_run run;
struct vdso_symtab symtab;
......@@ -195,7 +178,7 @@ int main(int argc, char *argv[], char *envp[])
addr = mmap((void *)encl.encl_base + seg->offset, seg->size,
seg->prot, MAP_SHARED | MAP_FIXED, encl.fd, 0);
if (addr == MAP_FAILED) {
fprintf(stderr, "mmap() failed, errno=%d.\n", errno);
perror("mmap() segment failed");
exit(KSFT_FAIL);
}
}
......@@ -203,7 +186,8 @@ int main(int argc, char *argv[], char *envp[])
memset(&run, 0, sizeof(run));
run.tcs = encl.encl_base;
addr = vdso_get_base_addr(envp);
/* Get vDSO base address */
addr = (void *)getauxval(AT_SYSINFO_EHDR);
if (!addr)
goto err;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment