[PATCH v2 20/25] ovl: add fscaps handlers

Christian Brauner brauner at kernel.org
Fri Feb 23 09:04:37 UTC 2024


On Wed, Feb 21, 2024 at 03:24:51PM -0600, Seth Forshee (DigitalOcean) wrote:
> Add handlers which read fs caps from the lower or upper filesystem and
> write/remove fs caps to the upper filesystem, performing copy-up as
> necessary.
> 
> While fscaps only really make sense on regular files, the general policy
> is to allow most xattr namespaces on all different inode types, so
> fscaps handlers are installed in the inode operations for all types of
> inodes.
> 
> Signed-off-by: Seth Forshee (DigitalOcean) <sforshee at kernel.org>
> ---
>  fs/overlayfs/dir.c       |  2 ++
>  fs/overlayfs/inode.c     | 72 ++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/overlayfs/overlayfs.h |  5 ++++
>  3 files changed, 79 insertions(+)
> 
> diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
> index 0f8b4a719237..4ff360fe10c9 100644
> --- a/fs/overlayfs/dir.c
> +++ b/fs/overlayfs/dir.c
> @@ -1307,6 +1307,8 @@ const struct inode_operations ovl_dir_inode_operations = {
>  	.get_inode_acl	= ovl_get_inode_acl,
>  	.get_acl	= ovl_get_acl,
>  	.set_acl	= ovl_set_acl,
> +	.get_fscaps	= ovl_get_fscaps,
> +	.set_fscaps	= ovl_set_fscaps,
>  	.update_time	= ovl_update_time,
>  	.fileattr_get	= ovl_fileattr_get,
>  	.fileattr_set	= ovl_fileattr_set,
> diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
> index c63b31a460be..7a8978ea6fe1 100644
> --- a/fs/overlayfs/inode.c
> +++ b/fs/overlayfs/inode.c
> @@ -568,6 +568,72 @@ int ovl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
>  }
>  #endif
>  
> +int ovl_get_fscaps(struct mnt_idmap *idmap, struct dentry *dentry,
> +		   struct vfs_caps *caps)
> +{
> +	int err;
> +	const struct cred *old_cred;
> +	struct path realpath;
> +
> +	ovl_path_real(dentry, &realpath);
> +	old_cred = ovl_override_creds(dentry->d_sb);
> +	err = vfs_get_fscaps(mnt_idmap(realpath.mnt), realpath.dentry, caps);

Right, vfs_get_fscaps() returns a struct vfs_caps which contains a
vfs{g,u}id and has the lower/upper layer's idmap taken into account.

That confused me at first because vfs_get_acl() returns a struct
posix_acl which contains k{g,u}id.

Reading through this made me realize that we need a few more words about
the translations. The reason is that we do distinct things for POSIX
ACLs and for fscaps. For POSIX ACLs when we call vfs_get_acl() what we
get is a struct posix_acl which contains k{g,u}id_t types. Because
struct posix_acl is cached filesytems wide and thus shared among
concurrent retrievers from different mounts with different idmappings.
Which means that we can't put vfs{g,u}id_t types in there. Instead we
perform translations on the fly. We do that in the VFS during path
lookup and we do that for overlayfs when it retrieves POSIX ACLs.

However, for fscaps we seem to do it differently because they're not
cached which is ok because they don't matter during path lookup as POSIX
ACLs do. So performance here doesn't matter too much. But that means
overall that the translations are quite distinct. And that gets
confusing when we have a stacking filesystem in the mix where we have to
take into account the privileges of the mounter of the overlayfs
instance and the idmap of the lower/upper layer.

I only skimmed my old commit but I think that commit 0c5fd887d2bb ("acl: move
idmapped mount fixup into vfs_{g,s}etxattr()") contains a detailed explanation
of this as I see:

    > For POSIX ACLs we need to do something similar. However, in contrast to fscaps
    > we cannot apply the fix directly to the kernel internal posix acl data
    > structure as this would alter the cached values and would also require a rework
    > of how we currently deal with POSIX ACLs in general which almost never take the
    > filesystem idmapping into account (the noteable exception being FUSE but even
    > there the implementation is special) and instead retrieve the raw values based
    > on the initial idmapping.

Could you please add a diagram/explanation illustrating the translations for
fscaps in the general case and for stacking filesystems? It doesn't really
matter too much where you put it. Either add a section to
Documentation/filesystems/porting.rst or add a section to
Documentation/filesystems/idmapping.rst.

> +	revert_creds(old_cred);
> +	return err;
> +}
> +
> +int ovl_set_fscaps(struct mnt_idmap *idmap, struct dentry *dentry,
> +		   const struct vfs_caps *caps, int setxattr_flags)
> +{
> +	int err;
> +	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
> +	struct dentry *upperdentry = ovl_dentry_upper(dentry);
> +	struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry);
> +	const struct cred *old_cred;
> +
> +	/*
> +	 * If the fscaps are to be remove from a lower file, check that they
> +	 * exist before copying up.
> +	 */
> +	if (!caps && !upperdentry) {
> +		struct path realpath;
> +		struct vfs_caps lower_caps;
> +
> +		ovl_path_lower(dentry, &realpath);
> +		old_cred = ovl_override_creds(dentry->d_sb);
> +		err = vfs_get_fscaps(mnt_idmap(realpath.mnt), realdentry,
> +				     &lower_caps);
> +		revert_creds(old_cred);
> +		if (err)
> +			goto out;
> +	}
> +
> +	err = ovl_want_write(dentry);
> +	if (err)
> +		goto out;
> +
> +	err = ovl_copy_up(dentry);
> +	if (err)
> +		goto out_drop_write;
> +	upperdentry = ovl_dentry_upper(dentry);
> +
> +	old_cred = ovl_override_creds(dentry->d_sb);
> +	if (!caps)
> +		err = vfs_remove_fscaps(ovl_upper_mnt_idmap(ofs), upperdentry);
> +	else
> +		err = vfs_set_fscaps(ovl_upper_mnt_idmap(ofs), upperdentry,
> +				     caps, setxattr_flags);
> +	revert_creds(old_cred);
> +
> +	/* copy c/mtime */
> +	ovl_copyattr(d_inode(dentry));
> +
> +out_drop_write:
> +	ovl_drop_write(dentry);
> +out:
> +	return err;
> +}
> +
>  int ovl_update_time(struct inode *inode, int flags)
>  {
>  	if (flags & S_ATIME) {
> @@ -747,6 +813,8 @@ static const struct inode_operations ovl_file_inode_operations = {
>  	.get_inode_acl	= ovl_get_inode_acl,
>  	.get_acl	= ovl_get_acl,
>  	.set_acl	= ovl_set_acl,
> +	.get_fscaps	= ovl_get_fscaps,
> +	.set_fscaps	= ovl_set_fscaps,
>  	.update_time	= ovl_update_time,
>  	.fiemap		= ovl_fiemap,
>  	.fileattr_get	= ovl_fileattr_get,
> @@ -758,6 +826,8 @@ static const struct inode_operations ovl_symlink_inode_operations = {
>  	.get_link	= ovl_get_link,
>  	.getattr	= ovl_getattr,
>  	.listxattr	= ovl_listxattr,
> +	.get_fscaps	= ovl_get_fscaps,
> +	.set_fscaps	= ovl_set_fscaps,
>  	.update_time	= ovl_update_time,
>  };
>  
> @@ -769,6 +839,8 @@ static const struct inode_operations ovl_special_inode_operations = {
>  	.get_inode_acl	= ovl_get_inode_acl,
>  	.get_acl	= ovl_get_acl,
>  	.set_acl	= ovl_set_acl,
> +	.get_fscaps	= ovl_get_fscaps,
> +	.set_fscaps	= ovl_set_fscaps,
>  	.update_time	= ovl_update_time,
>  };
>  
> diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
> index ee949f3e7c77..4f948749ee02 100644
> --- a/fs/overlayfs/overlayfs.h
> +++ b/fs/overlayfs/overlayfs.h
> @@ -781,6 +781,11 @@ static inline struct posix_acl *ovl_get_acl_path(const struct path *path,
>  }
>  #endif
>  
> +int ovl_get_fscaps(struct mnt_idmap *idmap, struct dentry *dentry,
> +		   struct vfs_caps *caps);
> +int ovl_set_fscaps(struct mnt_idmap *idmap, struct dentry *dentry,
> +		   const struct vfs_caps *caps, int setxattr_flags);
> +
>  int ovl_update_time(struct inode *inode, int flags);
>  bool ovl_is_private_xattr(struct super_block *sb, const char *name);
>  
> 
> -- 
> 2.43.0
> 



More information about the Linux-security-module-archive mailing list