[PATCH] vfs: allow unprivileged whiteout creation

Fri May 1 07:31:27 UTC 2020

On Fri, May 01, 2020 at 05:14:44AM +0100, Al Viro wrote:
> On Thu, Apr 09, 2020 at 11:28:59PM +0200, Miklos Szeredi wrote:
> > From: Miklos Szeredi <mszeredi at redhat.com>
> > 
> > Whiteouts, unlike real device node should not require privileges to create.
> > 
> > The general concern with device nodes is that opening them can have side
> > effects.  The kernel already avoids zero major (see
> > Documentation/admin-guide/devices.txt).  To be on the safe side the patch
> > explicitly forbids registering a char device with 0/0 number (see
> > cdev_add()).
> > 
> > This guarantees that a non-O_PATH open on a whiteout will fail with ENODEV;
> > i.e. it won't have any side effect.
> 
> >  int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
> >  {
> > +	bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
> >  	int error = may_create(dir, dentry);
> >  
> >  	if (error)
> >  		return error;
> >  
> > -	if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
> > +	if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD) &&
> > +	    !is_whiteout)
> >  		return -EPERM;
> 
> Hmm...  That exposes vfs_whiteout() to LSM; are you sure that you won't
> end up with regressions for overlayfs on sufficiently weird setups?

You're right.  OTOH, what can we do?  We can't fix the weird setups, only the
distros/admins can.

Can we just try this, and revert to calling ->mknod directly from overlayfs if
it turns out to be a problem that people can't fix easily?

I guess we could add a new ->whiteout security hook as well, but I'm not sure
it's worth it.  Cc: LMS mailing list; patch re-added for context.

Thanks,
Miklos

---
 fs/char_dev.c                 |    3 +++
 fs/namei.c                    |   17 ++++-------------
 include/linux/device_cgroup.h |    3 +++
 3 files changed, 10 insertions(+), 13 deletions(-)

--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -483,6 +483,9 @@ int cdev_add(struct cdev *p, dev_t dev,
 	p->dev = dev;
 	p->count = count;
 
+	if (WARN_ON(dev == WHITEOUT_DEV))
+		return -EBUSY;
+
 	error = kobj_map(cdev_map, dev, count, NULL,
 			 exact_match, exact_lock, p);
 	if (error)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3505,12 +3505,14 @@ EXPORT_SYMBOL(user_path_create);
 
 int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
 {
+	bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
 	int error = may_create(dir, dentry);
 
 	if (error)
 		return error;
 
-	if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
+	if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD) &&
+	    !is_whiteout)
 		return -EPERM;
 
 	if (!dir->i_op->mknod)
@@ -4345,9 +4347,6 @@ static int do_renameat2(int olddfd, cons
 	    (flags & RENAME_EXCHANGE))
 		return -EINVAL;
 
-	if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD))
-		return -EPERM;
-
 	if (flags & RENAME_EXCHANGE)
 		target_flags = 0;
 
@@ -4485,15 +4484,7 @@ SYSCALL_DEFINE2(rename, const char __use
 
 int vfs_whiteout(struct inode *dir, struct dentry *dentry)
 {
-	int error = may_create(dir, dentry);
-	if (error)
-		return error;
-
-	if (!dir->i_op->mknod)
-		return -EPERM;
-
-	return dir->i_op->mknod(dir, dentry,
-				S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
+	return vfs_mknod(dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
 }
 EXPORT_SYMBOL(vfs_whiteout);
 
--- a/include/linux/device_cgroup.h
+++ b/include/linux/device_cgroup.h
@@ -51,6 +51,9 @@ static inline int devcgroup_inode_mknod(
 	if (!S_ISBLK(mode) && !S_ISCHR(mode))
 		return 0;
 
+	if (S_ISCHR(mode) && dev == WHITEOUT_DEV)
+		return 0;
+
 	if (S_ISBLK(mode))
 		type = DEVCG_DEV_BLOCK;
 	else