[RFC PATCH v3 3/3] devguard: added device guard for mknod in non-initial userns

Christian Brauner brauner at kernel.org
Fri Dec 15 16:36:12 UTC 2023


On Fri, Dec 15, 2023 at 03:15:33PM +0100, Christian Brauner wrote:
> On Fri, Dec 15, 2023 at 02:26:53PM +0100, Michael Weiß wrote:
> > On 15.12.23 13:31, Christian Brauner wrote:
> > > On Wed, Dec 13, 2023 at 03:38:13PM +0100, Michael Weiß wrote:
> > >> devguard is a simple LSM to allow CAP_MKNOD in non-initial user
> > >> namespace in cooperation of an attached cgroup device program. We
> > >> just need to implement the security_inode_mknod() hook for this.
> > >> In the hook, we check if the current task is guarded by a device
> > >> cgroup using the lately introduced cgroup_bpf_current_enabled()
> > >> helper. If so, we strip out SB_I_NODEV from the super block.
> > >>
> > >> Access decisions to those device nodes are then guarded by existing
> > >> device cgroups mechanism.
> > >>
> > >> Signed-off-by: Michael Weiß <michael.weiss at aisec.fraunhofer.de>
> > >> ---
> > > 
> > > I think you misunderstood me... My point was that I believe you don't
> > > need an additional LSM at all and no additional LSM hook. But I might be
> > > wrong. Only a POC would show.
> > 
> > Yeah sorry, I got your point now.
> 
> I think I might have had a misconception about how this works.
> A bpf LSM program can't easily alter a kernel object such as struct
> super_block I've been told.

Which is why you need that new hook in there. I get it now. In any case,
I think we can do this slightly nicer (for some definition of nice)...

So the thing below moves the capability check for mknod into the
security_inode_mknod() hook (This should be a separate patch.).

It moves raising SB_I_NODEV into security_sb_device_access() and the old
semantics are retained if no LSM claims device management. If an LSM
claims device management we raise the new flag and don't even raise
SB_I_NODEV. The capability check is namespace aware if device management
is claimed by an LSM. That's backward compatible. And we don't need any
sysctl.

What's missing is that all devcgroup_*() calls should be moved into a
new, unified security_device_access() hook that's called consistently in
all places where that matters such as blkdev_get_by_dev() and so on. Let
the bpf lsm implement that new hook.

Then write a sample BPF LSM as POC that this works. This would also
all other LSMs to do device management if they wanted to.

Thoughts?

>From 7f4177e4f87e0b0182022f114c0287a0f0985752 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner at kernel.org>
Date: Fri, 15 Dec 2023 17:22:26 +0100
Subject: [PATCH] UNTESTED, UNCOMPILED, PROBABLY BUGGY

Signed-off-and-definitely-neither-compiled-nor-tested-by: Christian Brauner <brauner at kernel.org>
---
 fs/namei.c                    |  5 -----
 fs/namespace.c                | 11 +++++++----
 fs/super.c                    |  6 ++++--
 include/linux/fs.h            |  1 +
 include/linux/lsm_hook_defs.h |  1 +
 include/linux/security.h      | 15 ++++++++++++---
 security/commoncap.c          | 19 +++++++++++++++++++
 security/security.c           | 22 ++++++++++++++++++++++
 8 files changed, 66 insertions(+), 14 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 71c13b2990b4..da481e6a696c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3959,16 +3959,11 @@ EXPORT_SYMBOL(user_path_create);
 int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
 	      struct dentry *dentry, umode_t mode, dev_t dev)
 {
-	bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
 	int error = may_create(idmap, dir, dentry);
 
 	if (error)
 		return error;
 
-	if ((S_ISCHR(mode) || S_ISBLK(mode)) && !is_whiteout &&
-	    !capable(CAP_MKNOD))
-		return -EPERM;
-
 	if (!dir->i_op->mknod)
 		return -EPERM;
 
diff --git a/fs/namespace.c b/fs/namespace.c
index fbf0e596fcd3..e87cc0320091 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4887,7 +4887,6 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
 
 static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags)
 {
-	const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
 	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
 	unsigned long s_iflags;
 
@@ -4899,9 +4898,13 @@ static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags
 	if (!(s_iflags & SB_I_USERNS_VISIBLE))
 		return false;
 
-	if ((s_iflags & required_iflags) != required_iflags) {
-		WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
-			  required_iflags);
+	if (!(s_iflags & SB_I_NOEXEC)) {
+		WARN_ONCE(1, "Expected s_iflags to contain SB_I_NOEXEC\n");
+		return true;
+	}
+
+	if (!(s_iflags & (SB_I_NODEV | SB_I_MANAGED_DEVICES))) {
+		WARN_ONCE(1, "Expected s_iflags to contain device access mask\n");
 		return true;
 	}
 
diff --git a/fs/super.c b/fs/super.c
index 076392396e72..7b8098db17c9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -362,8 +362,10 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
 	}
 	s->s_bdi = &noop_backing_dev_info;
 	s->s_flags = flags;
-	if (s->s_user_ns != &init_user_ns)
-		s->s_iflags |= SB_I_NODEV;
+
+	if (security_sb_device_access(s))
+		goto fail;
+
 	INIT_HLIST_NODE(&s->s_instances);
 	INIT_HLIST_BL_HEAD(&s->s_roots);
 	mutex_init(&s->s_sync_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 98b7a7a8c42e..6ca0fe922478 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1164,6 +1164,7 @@ extern int send_sigurg(struct fown_struct *fown);
 #define SB_I_USERNS_VISIBLE		0x00000010 /* fstype already mounted */
 #define SB_I_IMA_UNVERIFIABLE_SIGNATURE	0x00000020
 #define SB_I_UNTRUSTED_MOUNTER		0x00000040
+#define SB_I_MANAGED_DEVICES		0x00000080
 
 #define SB_I_SKIP_SYNC	0x00000100	/* Skip superblock at global sync */
 #define SB_I_PERSB_BDI	0x00000200	/* has a per-sb bdi */
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 3fdd00b452ac..8c8a0d8aa71d 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -60,6 +60,7 @@ LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc,
 LSM_HOOK(int, -ENOPARAM, fs_context_parse_param, struct fs_context *fc,
 	 struct fs_parameter *param)
 LSM_HOOK(int, 0, sb_alloc_security, struct super_block *sb)
+LSM_HOOK(int, 0, sb_device_access, struct super_block *sb)
 LSM_HOOK(void, LSM_RET_VOID, sb_delete, struct super_block *sb)
 LSM_HOOK(void, LSM_RET_VOID, sb_free_security, struct super_block *sb)
 LSM_HOOK(void, LSM_RET_VOID, sb_free_mnt_opts, void *mnt_opts)
diff --git a/include/linux/security.h b/include/linux/security.h
index 00809d2d5c38..a174f8c09594 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -155,6 +155,8 @@ extern int cap_capset(struct cred *new, const struct cred *old,
 extern int cap_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file);
 int cap_inode_setxattr(struct dentry *dentry, const char *name,
 		       const void *value, size_t size, int flags);
+int cap_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
+		    dev_t dev);
 int cap_inode_removexattr(struct mnt_idmap *idmap,
 			  struct dentry *dentry, const char *name);
 int cap_inode_need_killpriv(struct dentry *dentry);
@@ -348,6 +350,7 @@ int security_inode_symlink(struct inode *dir, struct dentry *dentry,
 int security_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
 int security_inode_rmdir(struct inode *dir, struct dentry *dentry);
 int security_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev);
+int security_sb_device_access(struct super_block *sb);
 int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
 			  struct inode *new_dir, struct dentry *new_dentry,
 			  unsigned int flags);
@@ -823,10 +826,16 @@ static inline int security_inode_rmdir(struct inode *dir,
 	return 0;
 }
 
-static inline int security_inode_mknod(struct inode *dir,
-					struct dentry *dentry,
-					int mode, dev_t dev)
+static inline int security_inode_mknod(struct inode *dir, struct dentry *dentry,
+				       int mode, dev_t dev)
+{
+	return cap_inode_mknod(dir, dentry, mode, dev);
+}
+
+static inline int security_sb_device_access(struct super_block *sb)
 {
+	if (s->s_user_ns != &init_user_ns)
+		sb->s_iflags |= SB_I_NODEV;
 	return 0;
 }
 
diff --git a/security/commoncap.c b/security/commoncap.c
index 8e8c630ce204..f4a208fdf939 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -1438,6 +1438,24 @@ int cap_mmap_file(struct file *file, unsigned long reqprot,
 	return 0;
 }
 
+int cap_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
+		    dev_t dev)
+{
+	bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
+	struct super_block *sb = dir->i_sb;
+	struct user_namespace *userns;
+
+	if (dir->i_sb->s_iflags & SB_I_MANAGED_DEVICES)
+		userns = sb->s_user_ns;
+	else
+		userns = &init_user_ns;
+	if ((S_ISCHR(mode) || S_ISBLK(mode)) && !is_whiteout &&
+	    !ns_capable(userns, CAP_MKNOD))
+		return -EPERM;
+
+	return 0;
+}
+
 #ifdef CONFIG_SECURITY
 
 static struct security_hook_list capability_hooks[] __ro_after_init = {
@@ -1448,6 +1466,7 @@ static struct security_hook_list capability_hooks[] __ro_after_init = {
 	LSM_HOOK_INIT(capget, cap_capget),
 	LSM_HOOK_INIT(capset, cap_capset),
 	LSM_HOOK_INIT(bprm_creds_from_file, cap_bprm_creds_from_file),
+	LSM_HOOK_INIT(inode_mknod, cap_inode_mknod),
 	LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv),
 	LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv),
 	LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity),
diff --git a/security/security.c b/security/security.c
index 088a79c35c26..192b992f1a34 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1221,6 +1221,28 @@ int security_sb_alloc(struct super_block *sb)
 	return rc;
 }
 
+int security_sb_device_access(struct super_block *sb)
+{
+	int rc;
+
+	rc = call_int_hook(sb_device_access, 0, sb);
+	switch (rc) {
+	case 0:
+		/*
+		 * LSM doesn't do device access management and this is an
+		 * untrusted mount so block all device access.
+		 */
+		if (sb->s_user_ns != &init_user_ns)
+			sb->s_iflags |= SB_I_NODEV;
+		return 0;
+	case 1:
+		sb->s_iflags |= SB_I_MANAGED_DEVICES;
+		return 0;
+	}
+
+	return rc;
+}
+
 /**
  * security_sb_delete() - Release super_block LSM associated objects
  * @sb: filesystem superblock
-- 
2.42.0




More information about the Linux-security-module-archive mailing list