[RFC PATCH v4 04/12] x86/sgx: Require userspace to define enclave pages' protection bits

Xing, Cedric cedric.xing at intel.com
Fri Jun 21 16:42:54 UTC 2019


> From: Christopherson, Sean J
> Sent: Wednesday, June 19, 2019 3:24 PM
> 
> diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h index
> 6dba9f282232..67a3babbb24d 100644
> --- a/arch/x86/include/uapi/asm/sgx.h
> +++ b/arch/x86/include/uapi/asm/sgx.h
> @@ -35,15 +35,17 @@ struct sgx_enclave_create  {
>   * @src:	address for the page data
>   * @secinfo:	address for the SECINFO data
>   * @mrmask:	bitmask for the measured 256 byte chunks
> + * @prot:	maximal PROT_{READ,WRITE,EXEC} protections for the page
>   */
>  struct sgx_enclave_add_page {
>  	__u64	addr;
>  	__u64	src;
>  	__u64	secinfo;
> -	__u64	mrmask;
> +	__u16	mrmask;
> +	__u8	prot;
> +	__u8	pad;
>  };

Given EPCM permissions cannot change in SGX1, these maximal PROT_* flags can be the same as EPCM permissions, so don't have to be specified by user code until SGX2. Given we don't have a clear picture on how SGX2 will work yet, I think we shall take "prot" off until it is proven necessary. 

> diff --git a/arch/x86/kernel/cpu/sgx/driver/main.c b/arch/x86/kernel/cpu/sgx/driver/main.c
> index 29384cdd0842..dabfe2a7245a 100644
> --- a/arch/x86/kernel/cpu/sgx/driver/main.c
> +++ b/arch/x86/kernel/cpu/sgx/driver/main.c
> @@ -93,15 +93,64 @@ static long sgx_compat_ioctl(struct file *filep, unsigned int cmd,  }
> #endif
> 
> +/*
> + * Returns the AND of VM_{READ,WRITE,EXEC} permissions across all pages
> + * covered by the specific VMA.  A non-existent (or yet to be added)
> +enclave
> + * page is considered to have no RWX permissions, i.e. is inaccessible.
> + */
> +static unsigned long sgx_allowed_rwx(struct sgx_encl *encl,
> +				     struct vm_area_struct *vma)
> +{
> +	unsigned long allowed_rwx = VM_READ | VM_WRITE | VM_EXEC;
> +	unsigned long idx, idx_start, idx_end;
> +	struct sgx_encl_page *page;
> +
> +	idx_start = PFN_DOWN(vma->vm_start);
> +	idx_end = PFN_DOWN(vma->vm_end - 1);
> +
> +	for (idx = idx_start; idx <= idx_end; ++idx) {
> +		/*
> +		 * No need to take encl->lock, vm_prot_bits is set prior to
> +		 * insertion and never changes, and racing with adding pages is
> +		 * a userspace bug.
> +		 */
> +		rcu_read_lock();
> +		page = radix_tree_lookup(&encl->page_tree, idx);
> +		rcu_read_unlock();

This loop iterates through every page in the range, which could be very slow if the range is large.

> +
> +		/* Do not allow R|W|X to a non-existent page. */
> +		if (!page)
> +			allowed_rwx = 0;
> +		else
> +			allowed_rwx &= page->vm_prot_bits;
> +		if (!allowed_rwx)
> +			break;
> +	}
> +
> +	return allowed_rwx;
> +}
> +
>  static int sgx_mmap(struct file *file, struct vm_area_struct *vma)  {
>  	struct sgx_encl *encl = file->private_data;
> +	unsigned long allowed_rwx;
>  	int ret;
> 
> +	allowed_rwx = sgx_allowed_rwx(encl, vma);
> +	if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC) & ~allowed_rwx)
> +		return -EACCES;
> +
>  	ret = sgx_encl_mm_add(encl, vma->vm_mm);
>  	if (ret)
>  		return ret;
> 
> +	if (!(allowed_rwx & VM_READ))
> +		vma->vm_flags &= ~VM_MAYREAD;
> +	if (!(allowed_rwx & VM_WRITE))
> +		vma->vm_flags &= ~VM_MAYWRITE;
> +	if (!(allowed_rwx & VM_EXEC))
> +		vma->vm_flags &= ~VM_MAYEXEC;
> +

Say a range comprised of a RW sub-range and a RX sub-range is being mmap()'ed as R here. It'd succeed but mprotect(<RW sub-range>, RW) afterwards will fail because VM_MAYWRITE is cleared here. However, if those two sub-ranges are mapped by separate mmap() calls then the same mprotect() would succeed. The inconsistence here is unexpected and unprecedented.

>  	vma->vm_ops = &sgx_vm_ops;
>  	vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
>  	vma->vm_private_data = encl;



More information about the Linux-security-module-archive mailing list