[PATCH 1/2] Add namespace tags that can be used for matching without pinning a ns

David Howells dhowells at redhat.com
Thu Feb 4 17:47:39 UTC 2021


Add a ns tag struct that consists of just a refcount.  It's address can be
used to compare namespaces without the need to pin a namespace.  Just the
tag needs pinning.

Signed-off-by: David Howells <dhowells at redhat.com>
---

 fs/namespace.c            |   18 ++++++++----------
 include/linux/ns_common.h |   23 +++++++++++++++++++++++
 include/linux/proc_ns.h   |   38 +++++++++++++++++++++++++++++++++++---
 init/version.c            |    9 ++++++++-
 ipc/msgutil.c             |    7 ++++++-
 ipc/namespace.c           |    8 +++-----
 kernel/cgroup/cgroup.c    |    5 +++++
 kernel/cgroup/namespace.c |    6 +++---
 kernel/pid.c              |    5 +++++
 kernel/pid_namespace.c    |   18 +++++++++---------
 kernel/time/namespace.c   |   13 +++++--------
 kernel/user.c             |    5 +++++
 kernel/user_namespace.c   |    7 +++----
 kernel/utsname.c          |   24 +++++++++++++-----------
 net/core/net_namespace.c  |   38 +++++++++++++++-----------------------
 15 files changed, 146 insertions(+), 78 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 9d33909d0f9e..f8da9be8c6f7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3238,10 +3238,9 @@ static void dec_mnt_namespaces(struct ucounts *ucounts)
 
 static void free_mnt_ns(struct mnt_namespace *ns)
 {
-	if (!is_anon_ns(ns))
-		ns_free_inum(&ns->ns);
 	dec_mnt_namespaces(ns->ucounts);
 	put_user_ns(ns->user_ns);
+	destroy_ns_common(&ns->ns);
 	kfree(ns);
 }
 
@@ -3269,18 +3268,17 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
 		dec_mnt_namespaces(ucounts);
 		return ERR_PTR(-ENOMEM);
 	}
-	if (!anon) {
-		ret = ns_alloc_inum(&new_ns->ns);
-		if (ret) {
-			kfree(new_ns);
-			dec_mnt_namespaces(ucounts);
-			return ERR_PTR(ret);
-		}
+
+	ret = init_ns_common(&new_ns->ns, anon);
+	if (ret) {
+		destroy_ns_common(&new_ns->ns);
+		kfree(new_ns);
+		dec_mnt_namespaces(ucounts);
+		return ERR_PTR(ret);
 	}
 	new_ns->ns.ops = &mntns_operations;
 	if (!anon)
 		new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
-	refcount_set(&new_ns->ns.count, 1);
 	INIT_LIST_HEAD(&new_ns->list);
 	init_waitqueue_head(&new_ns->poll);
 	spin_lock_init(&new_ns->ns_lock);
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 0f1d024bd958..45174ad8a435 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -3,14 +3,37 @@
 #define _LINUX_NS_COMMON_H
 
 #include <linux/refcount.h>
+#include <linux/slab.h>
 
 struct proc_ns_operations;
 
+/*
+ * Comparable tag for namespaces so that namespaces don't have to be pinned by
+ * something that wishes to detect if a namespace matches a criterion.
+ */
+struct ns_tag {
+	refcount_t	usage;
+};
+
 struct ns_common {
 	atomic_long_t stashed;
 	const struct proc_ns_operations *ops;
+	struct ns_tag *tag;
 	unsigned int inum;
 	refcount_t count;
 };
 
+static inline struct ns_tag *get_ns_tag(struct ns_tag *tag)
+{
+	if (tag)
+		refcount_inc(&tag->usage);
+	return tag;
+}
+
+static inline void put_ns_tag(struct ns_tag *tag)
+{
+	if (tag && refcount_dec_and_test(&tag->usage))
+		kfree(tag);
+}
+
 #endif
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 75807ecef880..9fb7eb403923 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -64,13 +64,45 @@ static inline void proc_free_inum(unsigned int inum) {}
 
 #endif /* CONFIG_PROC_FS */
 
-static inline int ns_alloc_inum(struct ns_common *ns)
+/**
+ * init_ns_common - Initialise the common part of a namespace
+ * @ns: The namespace to initialise
+ * @anon: The namespace will be anonymous
+ *
+ * Set up the common part of a namespace, assigning an inode number and
+ * creating a tag.  Returns 0 on success and a negative error code on failure.
+ * On failure, the caller must call destroy_ns_common().
+ */
+static inline int init_ns_common(struct ns_common *ns, bool anon)
 {
+	struct ns_tag *tag;
+
+	tag = kzalloc(sizeof(*tag), GFP_KERNEL);
+	if (!tag)
+		return -ENOMEM;
+
+	refcount_set(&tag->usage, 1);
+	ns->tag = tag;
+	ns->inum = 0;
 	atomic_long_set(&ns->stashed, 0);
-	return proc_alloc_inum(&ns->inum);
+	refcount_set(&ns->count, 1);
+
+	return anon ? 0 : proc_alloc_inum(&ns->inum);
 }
 
-#define ns_free_inum(ns) proc_free_inum((ns)->inum)
+/**
+ * destroy_ns_common - Clean up the common part of a namespace
+ * @ns: The namespace to clean up
+ */
+static inline void destroy_ns_common(struct ns_common *ns)
+{
+	put_ns_tag(ns->tag);
+	ns->tag = NULL;
+	if (ns->inum) {
+		proc_free_inum(ns->inum);
+		ns->inum = 0;
+	}
+}
 
 extern struct file *proc_ns_fget(int fd);
 #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
diff --git a/init/version.c b/init/version.c
index 80d2b7566b39..3c867b6c4aa4 100644
--- a/init/version.c
+++ b/init/version.c
@@ -24,8 +24,15 @@ extern int version_string(LINUX_VERSION_CODE);
 int version_string(LINUX_VERSION_CODE);
 #endif
 
+static struct ns_tag init_uts_ns_tag = {
+	.usage		= REFCOUNT_INIT(1),
+};
+
 struct uts_namespace init_uts_ns = {
-	.ns.count = REFCOUNT_INIT(2),
+	.ns = {
+		.count		= REFCOUNT_INIT(2),
+		.tag		= &init_uts_ns_tag,
+	},
 	.name = {
 		.sysname	= UTS_SYSNAME,
 		.nodename	= UTS_NODENAME,
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index d0a0e877cadd..62bf194c38c6 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -20,13 +20,18 @@
 
 DEFINE_SPINLOCK(mq_lock);
 
+static struct ns_tag init_ipc_ns_tag = {
+	.usage		= REFCOUNT_INIT(1),
+};
+
 /*
  * The next 2 defines are here bc this is the only file
  * compiled when either CONFIG_SYSVIPC and CONFIG_POSIX_MQUEUE
  * and not CONFIG_IPC_NS.
  */
 struct ipc_namespace init_ipc_ns = {
-	.ns.count = REFCOUNT_INIT(1),
+	.ns.tag	= &init_ipc_ns_tag,
+	.ns.count = REFCOUNT_INIT(2),
 	.user_ns = &init_user_ns,
 	.ns.inum = PROC_IPC_INIT_INO,
 #ifdef CONFIG_IPC_NS
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 7bd0766ddc3b..06c0829ab866 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -46,12 +46,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	if (ns == NULL)
 		goto fail_dec;
 
-	err = ns_alloc_inum(&ns->ns);
+	err = init_ns_common(&ns->ns, false);
 	if (err)
 		goto fail_free;
 	ns->ns.ops = &ipcns_operations;
-
-	refcount_set(&ns->ns.count, 1);
 	ns->user_ns = get_user_ns(user_ns);
 	ns->ucounts = ucounts;
 
@@ -67,8 +65,8 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 
 fail_put:
 	put_user_ns(ns->user_ns);
-	ns_free_inum(&ns->ns);
 fail_free:
+	destroy_ns_common(&ns->ns);
 	kfree(ns);
 fail_dec:
 	dec_ipc_namespaces(ucounts);
@@ -127,7 +125,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
 
 	dec_ipc_namespaces(ns->ucounts);
 	put_user_ns(ns->user_ns);
-	ns_free_inum(&ns->ns);
+	destroy_ns_common(&ns->ns);
 	kfree(ns);
 }
 
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 613845769103..fb397fa2386f 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -197,8 +197,13 @@ static u16 have_exit_callback __read_mostly;
 static u16 have_release_callback __read_mostly;
 static u16 have_canfork_callback __read_mostly;
 
+static struct ns_tag init_cgroup_ns_tag = {
+	.usage		= REFCOUNT_INIT(1),
+};
+
 /* cgroup namespace for init task */
 struct cgroup_namespace init_cgroup_ns = {
+	.ns.tag		= &init_cgroup_ns_tag,
 	.ns.count	= REFCOUNT_INIT(2),
 	.user_ns	= &init_user_ns,
 	.ns.ops		= &cgroupns_operations,
diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c
index f5e8828c109c..7c8c0ccd1feb 100644
--- a/kernel/cgroup/namespace.c
+++ b/kernel/cgroup/namespace.c
@@ -27,12 +27,12 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
 	new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL);
 	if (!new_ns)
 		return ERR_PTR(-ENOMEM);
-	ret = ns_alloc_inum(&new_ns->ns);
+	ret = init_ns_common(&new_ns->ns, false);
 	if (ret) {
+		destroy_ns_common(&new_ns->ns);
 		kfree(new_ns);
 		return ERR_PTR(ret);
 	}
-	refcount_set(&new_ns->ns.count, 1);
 	new_ns->ns.ops = &cgroupns_operations;
 	return new_ns;
 }
@@ -42,7 +42,7 @@ void free_cgroup_ns(struct cgroup_namespace *ns)
 	put_css_set(ns->root_cset);
 	dec_cgroup_namespaces(ns->ucounts);
 	put_user_ns(ns->user_ns);
-	ns_free_inum(&ns->ns);
+	destroy_ns_common(&ns->ns);
 	kfree(ns);
 }
 EXPORT_SYMBOL(free_cgroup_ns);
diff --git a/kernel/pid.c b/kernel/pid.c
index ebdf9c60cd0b..65015c5b26db 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -66,6 +66,10 @@ int pid_max = PID_MAX_DEFAULT;
 int pid_max_min = RESERVED_PIDS + 1;
 int pid_max_max = PID_MAX_LIMIT;
 
+static struct ns_tag init_pid_ns_tag = {
+	.usage		= REFCOUNT_INIT(1),
+};
+
 /*
  * PID-map pages start out as NULL, they get allocated upon
  * first use and are never deallocated. This way a low pid_max
@@ -73,6 +77,7 @@ int pid_max_max = PID_MAX_LIMIT;
  * the scheme scales to up to 4 million PIDs, runtime.
  */
 struct pid_namespace init_pid_ns = {
+	.ns.tag = &init_pid_ns_tag,
 	.ns.count = REFCOUNT_INIT(2),
 	.idr = IDR_INIT(init_pid_ns.idr),
 	.pid_allocated = PIDNS_ADDING,
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index ca43239a255a..a562071e52e1 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -93,16 +93,15 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 
 	idr_init(&ns->idr);
 
-	ns->pid_cachep = create_pid_cachep(level);
-	if (ns->pid_cachep == NULL)
-		goto out_free_idr;
-
-	err = ns_alloc_inum(&ns->ns);
+	err = init_ns_common(&ns->ns, false);
 	if (err)
-		goto out_free_idr;
+		goto out_free;
 	ns->ns.ops = &pidns_operations;
 
-	refcount_set(&ns->ns.count, 1);
+	ns->pid_cachep = create_pid_cachep(level);
+	if (ns->pid_cachep == NULL)
+		goto out_free;
+
 	ns->level = level;
 	ns->parent = get_pid_ns(parent_pid_ns);
 	ns->user_ns = get_user_ns(user_ns);
@@ -111,8 +110,9 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 
 	return ns;
 
-out_free_idr:
+out_free:
 	idr_destroy(&ns->idr);
+	destroy_ns_common(&ns->ns);
 	kmem_cache_free(pid_ns_cachep, ns);
 out_dec:
 	dec_pid_namespaces(ucounts);
@@ -132,7 +132,7 @@ static void delayed_free_pidns(struct rcu_head *p)
 
 static void destroy_pid_namespace(struct pid_namespace *ns)
 {
-	ns_free_inum(&ns->ns);
+	destroy_ns_common(&ns->ns);
 
 	idr_destroy(&ns->idr);
 	call_rcu(&ns->rcu, delayed_free_pidns);
diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
index 6ca625f5e554..5c5847048900 100644
--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -92,16 +92,14 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns,
 	if (!ns)
 		goto fail_dec;
 
-	refcount_set(&ns->ns.count, 1);
+	err = init_ns_common(&ns->ns, false);
+	if (err)
+		goto fail_free;
 
 	ns->vvar_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!ns->vvar_page)
 		goto fail_free;
 
-	err = ns_alloc_inum(&ns->ns);
-	if (err)
-		goto fail_free_page;
-
 	ns->ucounts = ucounts;
 	ns->ns.ops = &timens_operations;
 	ns->user_ns = get_user_ns(user_ns);
@@ -109,9 +107,8 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns,
 	ns->frozen_offsets = false;
 	return ns;
 
-fail_free_page:
-	__free_page(ns->vvar_page);
 fail_free:
+	destroy_ns_common(&ns->ns);
 	kfree(ns);
 fail_dec:
 	dec_time_namespaces(ucounts);
@@ -230,7 +227,7 @@ void free_time_ns(struct time_namespace *ns)
 {
 	dec_time_namespaces(ns->ucounts);
 	put_user_ns(ns->user_ns);
-	ns_free_inum(&ns->ns);
+	destroy_ns_common(&ns->ns);
 	__free_page(ns->vvar_page);
 	kfree(ns);
 }
diff --git a/kernel/user.c b/kernel/user.c
index a2478cddf536..78ee75f4cd21 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -20,6 +20,10 @@
 #include <linux/user_namespace.h>
 #include <linux/proc_ns.h>
 
+static struct ns_tag init_user_ns_tag = {
+	.usage	= REFCOUNT_INIT(1),
+};
+
 /*
  * userns count is 1 for root user, 1 for init_uts_ns,
  * and 1 for... ?
@@ -55,6 +59,7 @@ struct user_namespace init_user_ns = {
 			},
 		},
 	},
+	.ns.tag	= &init_user_ns_tag,
 	.ns.count = REFCOUNT_INIT(3),
 	.owner = GLOBAL_ROOT_UID,
 	.group = GLOBAL_ROOT_GID,
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index af612945a4d0..f60cf7b5973c 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -106,12 +106,11 @@ int create_user_ns(struct cred *new)
 	if (!ns)
 		goto fail_dec;
 
-	ret = ns_alloc_inum(&ns->ns);
+	ret = init_ns_common(&ns->ns, false);
 	if (ret)
 		goto fail_free;
 	ns->ns.ops = &userns_operations;
 
-	refcount_set(&ns->ns.count, 1);
 	/* Leave the new->user_ns reference with the new user namespace. */
 	ns->parent = parent_ns;
 	ns->level = parent_ns->level + 1;
@@ -142,8 +141,8 @@ int create_user_ns(struct cred *new)
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 	key_put(ns->persistent_keyring_register);
 #endif
-	ns_free_inum(&ns->ns);
 fail_free:
+	destroy_ns_common(&ns->ns);
 	kmem_cache_free(user_ns_cachep, ns);
 fail_dec:
 	dec_user_namespaces(ucounts);
@@ -193,7 +192,7 @@ static void free_user_ns(struct work_struct *work)
 		}
 		retire_userns_sysctls(ns);
 		key_free_user_ns(ns);
-		ns_free_inum(&ns->ns);
+		destroy_ns_common(&ns->ns);
 		kmem_cache_free(user_ns_cachep, ns);
 		dec_user_namespaces(ucounts);
 		ns = parent;
diff --git a/kernel/utsname.c b/kernel/utsname.c
index b1ac3ca870f2..4755f007199f 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -30,10 +30,17 @@ static void dec_uts_namespaces(struct ucounts *ucounts)
 static struct uts_namespace *create_uts_ns(void)
 {
 	struct uts_namespace *uts_ns;
+	int err;
 
 	uts_ns = kmem_cache_alloc(uts_ns_cache, GFP_KERNEL);
-	if (uts_ns)
-		refcount_set(&uts_ns->ns.count, 1);
+	if (uts_ns) {
+		err = init_ns_common(&uts_ns->ns, false);
+		if (err < 0) {
+			destroy_ns_common(&uts_ns->ns);
+			kmem_cache_free(uts_ns_cache, uts_ns);
+			return ERR_PTR(err);
+		}
+	}
 	return uts_ns;
 }
 
@@ -54,14 +61,11 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 	if (!ucounts)
 		goto fail;
 
-	err = -ENOMEM;
 	ns = create_uts_ns();
-	if (!ns)
+	if (IS_ERR(ns)) {
+		err = PTR_ERR(ns);
 		goto fail_dec;
-
-	err = ns_alloc_inum(&ns->ns);
-	if (err)
-		goto fail_free;
+	}
 
 	ns->ucounts = ucounts;
 	ns->ns.ops = &utsns_operations;
@@ -72,8 +76,6 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 	up_read(&uts_sem);
 	return ns;
 
-fail_free:
-	kmem_cache_free(uts_ns_cache, ns);
 fail_dec:
 	dec_uts_namespaces(ucounts);
 fail:
@@ -107,7 +109,7 @@ void free_uts_ns(struct uts_namespace *ns)
 {
 	dec_uts_namespaces(ns->ucounts);
 	put_user_ns(ns->user_ns);
-	ns_free_inum(&ns->ns);
+	destroy_ns_common(&ns->ns);
 	kmem_cache_free(uts_ns_cache, ns);
 }
 
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2ef3b4557f40..f53f7ddec553 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -44,8 +44,14 @@ EXPORT_SYMBOL_GPL(net_rwsem);
 static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) };
 #endif
 
+static struct ns_tag init_net_tag = {
+	.usage		= REFCOUNT_INIT(1),
+};
+
 struct net init_net = {
-	.ns.count	= REFCOUNT_INIT(1),
+	.ns.tag		= &init_net_tag,
+	.ns.count	= REFCOUNT_INIT(2),
+	.ns.ops		= &netns_operations,
 	.dev_base_head	= LIST_HEAD_INIT(init_net.dev_base_head),
 #ifdef CONFIG_KEYS
 	.key_domain	= &init_net_key_domain,
@@ -329,7 +335,6 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 	int error = 0;
 	LIST_HEAD(net_exit_list);
 
-	refcount_set(&net->ns.count, 1);
 	refcount_set(&net->passive, 1);
 	get_random_bytes(&net->hash_mix, sizeof(u32));
 	net->dev_base_seq = 1;
@@ -419,6 +424,10 @@ static struct net *net_alloc(void)
 	if (!net)
 		goto out_free;
 
+	if (init_ns_common(&net->ns, false) < 0)
+		goto out_free_2;
+	net->ns.ops = &netns_operations;
+
 #ifdef CONFIG_KEYS
 	net->key_domain = kzalloc(sizeof(struct key_tag), GFP_KERNEL);
 	if (!net->key_domain)
@@ -432,6 +441,7 @@ static struct net *net_alloc(void)
 
 #ifdef CONFIG_KEYS
 out_free_2:
+	destroy_ns_common(&net->ns);
 	kmem_cache_free(net_cachep, net);
 	net = NULL;
 #endif
@@ -443,6 +453,7 @@ static struct net *net_alloc(void)
 static void net_free(struct net *net)
 {
 	kfree(rcu_access_pointer(net->gen));
+	destroy_ns_common(&net->ns);
 	kmem_cache_free(net_cachep, net);
 }
 
@@ -700,24 +711,6 @@ struct net *get_net_ns_by_pid(pid_t pid)
 }
 EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 
-static __net_init int net_ns_net_init(struct net *net)
-{
-#ifdef CONFIG_NET_NS
-	net->ns.ops = &netns_operations;
-#endif
-	return ns_alloc_inum(&net->ns);
-}
-
-static __net_exit void net_ns_net_exit(struct net *net)
-{
-	ns_free_inum(&net->ns);
-}
-
-static struct pernet_operations __net_initdata net_ns_ops = {
-	.init = net_ns_net_init,
-	.exit = net_ns_net_exit,
-};
-
 static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
 	[NETNSA_NONE]		= { .type = NLA_UNSPEC },
 	[NETNSA_NSID]		= { .type = NLA_S32 },
@@ -1097,6 +1090,8 @@ static int __init net_ns_init(void)
 		panic("Could not create netns workq");
 #endif
 
+	proc_alloc_inum(&init_net.ns.inum);
+
 	ng = net_alloc_generic();
 	if (!ng)
 		panic("Could not allocate generic netns");
@@ -1114,9 +1109,6 @@ static int __init net_ns_init(void)
 	init_net_initialized = true;
 	up_write(&pernet_ops_rwsem);
 
-	if (register_pernet_subsys(&net_ns_ops))
-		panic("Could not register network namespace subsystems");
-
 	rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
 		      RTNL_FLAG_DOIT_UNLOCKED);
 	rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,




More information about the Linux-security-module-archive mailing list