[PATCH v2 06/25] capability: provide helpers for converting between xattrs and vfs_caps

Christian Brauner brauner at kernel.org
Thu Feb 22 15:20:08 UTC 2024


On Wed, Feb 21, 2024 at 03:24:37PM -0600, Seth Forshee (DigitalOcean) wrote:
> To pass around vfs_caps instead of raw xattr data we will need to
> convert between the two representations near userspace and disk
> boundaries. We already convert xattrs from disks to vfs_caps, so move
> that code into a helper, and change get_vfs_caps_from_disk() to use the
> helper.
> 
> When converting vfs_caps to xattrs we have different considerations
> depending on the destination of the xattr data. For xattrs which will be
> written to disk we need to reject the xattr if the rootid does not map
> into the filesystem's user namespace, whereas xattrs read by userspace
> may need to undergo a conversion from v3 to v2 format when the rootid
> does not map. So this helper is split into an internal and an external
> interface. The internal interface does not return an error if the rootid
> has no mapping in the target user namespace and will be used for
> conversions targeting userspace. The external interface returns
> EOVERFLOW if the rootid has no mapping and will be used for all other
> conversions.
> 
> Signed-off-by: Seth Forshee (DigitalOcean) <sforshee at kernel.org>
> ---
>  include/linux/capability.h |  10 ++
>  security/commoncap.c       | 228 +++++++++++++++++++++++++++++++++++----------
>  2 files changed, 187 insertions(+), 51 deletions(-)
> 
> diff --git a/include/linux/capability.h b/include/linux/capability.h
> index eb46d346bbbc..a0893ac4664b 100644
> --- a/include/linux/capability.h
> +++ b/include/linux/capability.h
> @@ -209,6 +209,16 @@ static inline bool checkpoint_restore_ns_capable(struct user_namespace *ns)
>  		ns_capable(ns, CAP_SYS_ADMIN);
>  }
>  
> +/* helpers to convert between xattr and in-kernel representations */
> +int vfs_caps_from_xattr(struct mnt_idmap *idmap,
> +			struct user_namespace *src_userns,
> +			struct vfs_caps *vfs_caps,
> +			const void *data, size_t size);
> +ssize_t vfs_caps_to_xattr(struct mnt_idmap *idmap,
> +			  struct user_namespace *dest_userns,
> +			  const struct vfs_caps *vfs_caps,
> +			  void *data, size_t size);
> +
>  /* audit system wants to get cap info from files as well */
>  int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
>  			   const struct dentry *dentry,
> diff --git a/security/commoncap.c b/security/commoncap.c
> index a0b5c9740759..7531c9634997 100644
> --- a/security/commoncap.c
> +++ b/security/commoncap.c
> @@ -619,54 +619,41 @@ static inline int bprm_caps_from_vfs_caps(struct vfs_caps *caps,
>  }
>  
>  /**
> - * get_vfs_caps_from_disk - retrieve vfs caps from disk
> + * vfs_caps_from_xattr - convert raw caps xattr data to vfs_caps
>   *
> - * @idmap:	idmap of the mount the inode was found from
> - * @dentry:	dentry from which @inode is retrieved
> - * @cpu_caps:	vfs capabilities
> + * @idmap:      idmap of the mount the inode was found from
> + * @src_userns: user namespace for ids in xattr data
> + * @vfs_caps:   destination buffer for vfs_caps data
> + * @data:       rax xattr caps data
> + * @size:       size of xattr data
>   *
> - * Extract the on-exec-apply capability sets for an executable file.
> + * Converts a raw security.capability xattr into the kernel-internal
> + * capabilities format.
>   *
> - * If the inode has been found through an idmapped mount the idmap of
> - * the vfsmount must be passed through @idmap. This function will then
> - * take care to map the inode according to @idmap before checking
> - * permissions. On non-idmapped mounts or if permission checking is to be
> - * performed on the raw inode simply pass @nop_mnt_idmap.
> + * If the xattr is being read or written through an idmapped mount the
> + * idmap of the vfsmount must be passed through @idmap. This function
> + * will then take care to map the rootid according to @idmap.
> + *
> + * Return: On success, return 0; on error, return < 0.
>   */
> -int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
> -			   const struct dentry *dentry,
> -			   struct vfs_caps *cpu_caps)
> +int vfs_caps_from_xattr(struct mnt_idmap *idmap,
> +			struct user_namespace *src_userns,
> +			struct vfs_caps *vfs_caps,
> +			const void *data, size_t size)
>  {
> -	struct inode *inode = d_backing_inode(dentry);
>  	__u32 magic_etc;
> -	int size;
> -	struct vfs_ns_cap_data data, *nscaps = &data;
> -	struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
> +	const struct vfs_ns_cap_data *ns_caps = data;
> +	struct vfs_cap_data *caps = (struct vfs_cap_data *)ns_caps;
>  	kuid_t rootkuid;
> -	vfsuid_t rootvfsuid;
> -	struct user_namespace *fs_ns;
> -
> -	memset(cpu_caps, 0, sizeof(struct vfs_caps));
> -
> -	if (!inode)
> -		return -ENODATA;
>  
> -	fs_ns = inode->i_sb->s_user_ns;
> -	size = __vfs_getxattr((struct dentry *)dentry, inode,
> -			      XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ);
> -	if (size == -ENODATA || size == -EOPNOTSUPP)
> -		/* no data, that's ok */
> -		return -ENODATA;
> -
> -	if (size < 0)
> -		return size;
> +	memset(vfs_caps, 0, sizeof(*vfs_caps));
>  
>  	if (size < sizeof(magic_etc))
>  		return -EINVAL;
>  
> -	cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc);
> +	vfs_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc);
>  
> -	rootkuid = make_kuid(fs_ns, 0);
> +	rootkuid = make_kuid(src_userns, 0);
>  	switch (magic_etc & VFS_CAP_REVISION_MASK) {
>  	case VFS_CAP_REVISION_1:
>  		if (size != XATTR_CAPS_SZ_1)
> @@ -679,39 +666,178 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
>  	case VFS_CAP_REVISION_3:
>  		if (size != XATTR_CAPS_SZ_3)
>  			return -EINVAL;
> -		rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
> +		rootkuid = make_kuid(src_userns, le32_to_cpu(ns_caps->rootid));
>  		break;
>  
>  	default:
>  		return -EINVAL;
>  	}
>  
> -	rootvfsuid = make_vfsuid(idmap, fs_ns, rootkuid);
> -	if (!vfsuid_valid(rootvfsuid))
> -		return -ENODATA;
> +	vfs_caps->rootid = make_vfsuid(idmap, src_userns, rootkuid);
> +	if (!vfsuid_valid(vfs_caps->rootid))
> +		return -EOVERFLOW;
>  
> -	/* Limit the caps to the mounter of the filesystem
> -	 * or the more limited uid specified in the xattr.
> +	vfs_caps->permitted.val = le32_to_cpu(caps->data[0].permitted);
> +	vfs_caps->inheritable.val = le32_to_cpu(caps->data[0].inheritable);
> +
> +	/*
> +	 * Rev1 had just a single 32-bit word, later expanded
> +	 * to a second one for the high bits
>  	 */
> -	if (!rootid_owns_currentns(rootvfsuid))
> -		return -ENODATA;
> +	if ((magic_etc & VFS_CAP_REVISION_MASK) != VFS_CAP_REVISION_1) {
> +		vfs_caps->permitted.val += (u64)le32_to_cpu(caps->data[1].permitted) << 32;
> +		vfs_caps->inheritable.val += (u64)le32_to_cpu(caps->data[1].inheritable) << 32;

That + makes this even more difficult to read. This should be rewritten.

> +	}
> +
> +	vfs_caps->permitted.val &= CAP_VALID_MASK;
> +	vfs_caps->inheritable.val &= CAP_VALID_MASK;
> +
> +	return 0;
> +}
> +
> +/*
> + * Inner implementation of vfs_caps_to_xattr() which does not return an
> + * error if the rootid does not map into @dest_userns.
> + */
> +static ssize_t __vfs_caps_to_xattr(struct mnt_idmap *idmap,
> +				   struct user_namespace *dest_userns,
> +				   const struct vfs_caps *vfs_caps,
> +				   void *data, size_t size)
> +{
> +	struct vfs_ns_cap_data *ns_caps = data;
> +	struct vfs_cap_data *caps = (struct vfs_cap_data *)ns_caps;
> +	kuid_t rootkuid;
> +	uid_t rootid;
> +
> +	memset(ns_caps, 0, size);
> +
> +	rootid = 0;
> +	switch (vfs_caps->magic_etc & VFS_CAP_REVISION_MASK) {
> +	case VFS_CAP_REVISION_1:
> +		if (size < XATTR_CAPS_SZ_1)
> +			return -EINVAL;
> +		size = XATTR_CAPS_SZ_1;
> +		break;
> +	case VFS_CAP_REVISION_2:
> +		if (size < XATTR_CAPS_SZ_2)
> +			return -EINVAL;
> +		size = XATTR_CAPS_SZ_2;
> +		break;
> +	case VFS_CAP_REVISION_3:
> +		if (size < XATTR_CAPS_SZ_3)
> +			return -EINVAL;
> +		size = XATTR_CAPS_SZ_3;
> +		rootkuid = from_vfsuid(idmap, dest_userns, vfs_caps->rootid);
> +		rootid = from_kuid(dest_userns, rootkuid);
> +		ns_caps->rootid = cpu_to_le32(rootid);
> +		break;
>  
> -	cpu_caps->permitted.val = le32_to_cpu(caps->data[0].permitted);
> -	cpu_caps->inheritable.val = le32_to_cpu(caps->data[0].inheritable);
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	caps->magic_etc = cpu_to_le32(vfs_caps->magic_etc);
> +
> +	caps->data[0].permitted = cpu_to_le32(lower_32_bits(vfs_caps->permitted.val));
> +	caps->data[0].inheritable = cpu_to_le32(lower_32_bits(vfs_caps->inheritable.val));
>  
>  	/*
>  	 * Rev1 had just a single 32-bit word, later expanded
>  	 * to a second one for the high bits
>  	 */
> -	if ((magic_etc & VFS_CAP_REVISION_MASK) != VFS_CAP_REVISION_1) {
> -		cpu_caps->permitted.val += (u64)le32_to_cpu(caps->data[1].permitted) << 32;
> -		cpu_caps->inheritable.val += (u64)le32_to_cpu(caps->data[1].inheritable) << 32;
> +	if ((vfs_caps->magic_etc & VFS_CAP_REVISION_MASK) != VFS_CAP_REVISION_1) {
> +		caps->data[1].permitted =
> +			cpu_to_le32(upper_32_bits(vfs_caps->permitted.val));
> +		caps->data[1].inheritable =
> +			cpu_to_le32(upper_32_bits(vfs_caps->inheritable.val));
>  	}
>  
> -	cpu_caps->permitted.val &= CAP_VALID_MASK;
> -	cpu_caps->inheritable.val &= CAP_VALID_MASK;
> +	return size;
> +}
> +
> +
> +/**
> + * vfs_caps_to_xattr - convert vfs_caps to raw caps xattr data
> + *
> + * @idmap:       idmap of the mount the inode was found from
> + * @dest_userns: user namespace for ids in xattr data
> + * @vfs_caps:    source vfs_caps data
> + * @data:        destination buffer for rax xattr caps data
> + * @size:        size of the @data buffer
> + *
> + * Converts a kernel-internal capability into the raw security.capability
> + * xattr format.
> + *
> + * If the xattr is being read or written through an idmapped mount the
> + * idmap of the vfsmount must be passed through @idmap. This function
> + * will then take care to map the rootid according to @idmap.
> + *
> + * Return: On success, return the size of the xattr data. On error,
> + * return < 0.
> + */
> +ssize_t vfs_caps_to_xattr(struct mnt_idmap *idmap,
> +			  struct user_namespace *dest_userns,
> +			  const struct vfs_caps *vfs_caps,
> +			  void *data, size_t size)
> +{
> +	struct vfs_ns_cap_data *caps = data;
> +	int ret;

This should very likely be ssize_t ret.

> +
> +	ret = __vfs_caps_to_xattr(idmap, dest_userns, vfs_caps, data, size);
> +	if (ret > 0 &&
> +	    (vfs_caps->magic_etc & VFS_CAP_REVISION_MASK) == VFS_CAP_REVISION_3 &&
> +	     le32_to_cpu(caps->rootid) == (uid_t)-1)
> +		return -EOVERFLOW;
> +	return ret;
> +}
> +
> +/**
> + * get_vfs_caps_from_disk - retrieve vfs caps from disk
> + *
> + * @idmap:	idmap of the mount the inode was found from
> + * @dentry:	dentry from which @inode is retrieved
> + * @cpu_caps:	vfs capabilities
> + *
> + * Extract the on-exec-apply capability sets for an executable file.
> + *
> + * If the inode has been found through an idmapped mount the idmap of
> + * the vfsmount must be passed through @idmap. This function will then
> + * take care to map the inode according to @idmap before checking
> + * permissions. On non-idmapped mounts or if permission checking is to be
> + * performed on the raw inode simply pass @nop_mnt_idmap.
> + */
> +int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
> +			   const struct dentry *dentry,
> +			   struct vfs_caps *cpu_caps)
> +{
> +	struct inode *inode = d_backing_inode(dentry);
> +	int size, ret;
> +	struct vfs_ns_cap_data data, *nscaps = &data;
> +
> +	if (!inode)
> +		return -ENODATA;
>  
> -	cpu_caps->rootid = rootvfsuid;
> +	size = __vfs_getxattr((struct dentry *)dentry, inode,
> +			      XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ);
> +	if (size == -ENODATA || size == -EOPNOTSUPP)
> +		/* no data, that's ok */
> +		return -ENODATA;
> +
> +	if (size < 0)
> +		return size;
> +
> +	ret = vfs_caps_from_xattr(idmap, inode->i_sb->s_user_ns,
> +				  cpu_caps, nscaps, size);
> +	if (ret == -EOVERFLOW)
> +		return -ENODATA;
> +	if (ret)
> +		return ret;
> +
> +	/* Limit the caps to the mounter of the filesystem
> +	 * or the more limited uid specified in the xattr.
> +	 */
> +	if (!rootid_owns_currentns(cpu_caps->rootid))
> +		return -ENODATA;
>  
>  	return 0;
>  }
> 
> -- 
> 2.43.0
> 



More information about the Linux-security-module-archive mailing list