[RFC PATCH v4 04/12] x86/sgx: Require userspace to define enclave pages' protection bits
Xing, Cedric
cedric.xing at intel.com
Fri Jun 21 16:42:54 UTC 2019
> From: Christopherson, Sean J
> Sent: Wednesday, June 19, 2019 3:24 PM
>
> diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h index
> 6dba9f282232..67a3babbb24d 100644
> --- a/arch/x86/include/uapi/asm/sgx.h
> +++ b/arch/x86/include/uapi/asm/sgx.h
> @@ -35,15 +35,17 @@ struct sgx_enclave_create {
> * @src: address for the page data
> * @secinfo: address for the SECINFO data
> * @mrmask: bitmask for the measured 256 byte chunks
> + * @prot: maximal PROT_{READ,WRITE,EXEC} protections for the page
> */
> struct sgx_enclave_add_page {
> __u64 addr;
> __u64 src;
> __u64 secinfo;
> - __u64 mrmask;
> + __u16 mrmask;
> + __u8 prot;
> + __u8 pad;
> };
Given EPCM permissions cannot change in SGX1, these maximal PROT_* flags can be the same as EPCM permissions, so don't have to be specified by user code until SGX2. Given we don't have a clear picture on how SGX2 will work yet, I think we shall take "prot" off until it is proven necessary.
> diff --git a/arch/x86/kernel/cpu/sgx/driver/main.c b/arch/x86/kernel/cpu/sgx/driver/main.c
> index 29384cdd0842..dabfe2a7245a 100644
> --- a/arch/x86/kernel/cpu/sgx/driver/main.c
> +++ b/arch/x86/kernel/cpu/sgx/driver/main.c
> @@ -93,15 +93,64 @@ static long sgx_compat_ioctl(struct file *filep, unsigned int cmd, }
> #endif
>
> +/*
> + * Returns the AND of VM_{READ,WRITE,EXEC} permissions across all pages
> + * covered by the specific VMA. A non-existent (or yet to be added)
> +enclave
> + * page is considered to have no RWX permissions, i.e. is inaccessible.
> + */
> +static unsigned long sgx_allowed_rwx(struct sgx_encl *encl,
> + struct vm_area_struct *vma)
> +{
> + unsigned long allowed_rwx = VM_READ | VM_WRITE | VM_EXEC;
> + unsigned long idx, idx_start, idx_end;
> + struct sgx_encl_page *page;
> +
> + idx_start = PFN_DOWN(vma->vm_start);
> + idx_end = PFN_DOWN(vma->vm_end - 1);
> +
> + for (idx = idx_start; idx <= idx_end; ++idx) {
> + /*
> + * No need to take encl->lock, vm_prot_bits is set prior to
> + * insertion and never changes, and racing with adding pages is
> + * a userspace bug.
> + */
> + rcu_read_lock();
> + page = radix_tree_lookup(&encl->page_tree, idx);
> + rcu_read_unlock();
This loop iterates through every page in the range, which could be very slow if the range is large.
> +
> + /* Do not allow R|W|X to a non-existent page. */
> + if (!page)
> + allowed_rwx = 0;
> + else
> + allowed_rwx &= page->vm_prot_bits;
> + if (!allowed_rwx)
> + break;
> + }
> +
> + return allowed_rwx;
> +}
> +
> static int sgx_mmap(struct file *file, struct vm_area_struct *vma) {
> struct sgx_encl *encl = file->private_data;
> + unsigned long allowed_rwx;
> int ret;
>
> + allowed_rwx = sgx_allowed_rwx(encl, vma);
> + if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC) & ~allowed_rwx)
> + return -EACCES;
> +
> ret = sgx_encl_mm_add(encl, vma->vm_mm);
> if (ret)
> return ret;
>
> + if (!(allowed_rwx & VM_READ))
> + vma->vm_flags &= ~VM_MAYREAD;
> + if (!(allowed_rwx & VM_WRITE))
> + vma->vm_flags &= ~VM_MAYWRITE;
> + if (!(allowed_rwx & VM_EXEC))
> + vma->vm_flags &= ~VM_MAYEXEC;
> +
Say a range comprised of a RW sub-range and a RX sub-range is being mmap()'ed as R here. It'd succeed but mprotect(<RW sub-range>, RW) afterwards will fail because VM_MAYWRITE is cleared here. However, if those two sub-ranges are mapped by separate mmap() calls then the same mprotect() would succeed. The inconsistence here is unexpected and unprecedented.
> vma->vm_ops = &sgx_vm_ops;
> vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
> vma->vm_private_data = encl;
More information about the Linux-security-module-archive
mailing list