[RFC PATCH] getting misc stats/attributes via xattr API

Miklos Szeredi miklos at szeredi.hu
Tue May 3 12:23:23 UTC 2022


This is a simplification of the getvalues(2) prototype and moving it to the
getxattr(2) interface, as suggested by Dave.

The patch itself just adds the possibility to retrieve a single line of
/proc/$$/mountinfo (which was the basic requirement from which the fsinfo
patchset grew out of).

But this should be able to serve Amir's per-sb iostats, as well as a host of
other cases where some statistic needs to be retrieved from some object.  Note:
a filesystem object often represents other kinds of objects (such as processes
in /proc) so this is not limited to fs attributes.

This also opens up the interface to setting attributes via setxattr(2).

After some pondering I made the namespace so:

: - root
bar - an attribute
foo: - a folder (can contain attributes and/or folders)

The contents of a folder is represented by a null separated list of names.

Examples:

$ getfattr -etext -n ":" .
# file: .
:="mnt:\000mntns:"

$ getfattr -etext -n ":mnt:" .
# file: .
:mnt:="info"

$ getfattr -etext -n ":mnt:info" .
# file: .
:mnt:info="21 1 254:0 / / rw,relatime - ext4 /dev/root rw\012"

$ getfattr -etext -n ":mntns:" .
# file: .
:mntns:="21:\00022:\00024:\00025:\00023:\00026:\00027:\00028:\00029:\00030:\00031:"

$ getfattr -etext -n ":mntns:28:" .
# file: .
:mntns:28:="info"

Comments?

Thanks,
Miklos

---
 fs/Makefile            |    2 
 fs/mount.h             |    8 +
 fs/namespace.c         |   15 ++-
 fs/pnode.h             |    2 
 fs/proc_namespace.c    |   15 ++-
 fs/values.c            |  242 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/xattr.c             |   16 ++-
 include/linux/values.h |   11 ++
 8 files changed, 295 insertions(+), 16 deletions(-)

--- a/fs/Makefile
+++ b/fs/Makefile
@@ -16,7 +16,7 @@ obj-y :=	open.o read_write.o file_table.
 		pnode.o splice.o sync.o utimes.o d_path.o \
 		stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
 		fs_types.o fs_context.o fs_parser.o fsopen.o init.o \
-		kernel_read_file.o remap_range.o
+		kernel_read_file.o remap_range.o values.o
 
 ifeq ($(CONFIG_BLOCK),y)
 obj-y +=	buffer.o direct-io.o mpage.o
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -148,3 +148,11 @@ static inline bool is_anon_ns(struct mnt
 }
 
 extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);
+
+struct mount *mnt_list_next(struct mnt_namespace *ns, struct list_head *p);
+extern void namespace_lock_read(void);
+extern void namespace_unlock_read(void);
+extern int show_mountinfo_root(struct seq_file *m, struct vfsmount *mnt,
+			       struct path *root);
+extern bool is_path_reachable(struct mount *, struct dentry *,
+			      const struct path *root);
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1332,9 +1332,7 @@ struct vfsmount *mnt_clone_internal(cons
 	return &p->mnt;
 }
 
-#ifdef CONFIG_PROC_FS
-static struct mount *mnt_list_next(struct mnt_namespace *ns,
-				   struct list_head *p)
+struct mount *mnt_list_next(struct mnt_namespace *ns, struct list_head *p)
 {
 	struct mount *mnt, *ret = NULL;
 
@@ -1351,6 +1349,7 @@ static struct mount *mnt_list_next(struc
 	return ret;
 }
 
+#ifdef CONFIG_PROC_FS
 /* iterator; we want it to have access to namespace_sem, thus here... */
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
@@ -1507,6 +1506,16 @@ static inline void namespace_lock(void)
 	down_write(&namespace_sem);
 }
 
+void namespace_lock_read(void)
+{
+	down_read(&namespace_sem);
+}
+
+void namespace_unlock_read(void)
+{
+	up_read(&namespace_sem);
+}
+
 enum umount_tree_flags {
 	UMOUNT_SYNC = 1,
 	UMOUNT_PROPAGATE = 2,
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -50,7 +50,5 @@ void mnt_set_mountpoint(struct mount *,
 void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp,
 			   struct mount *mnt);
 struct mount *copy_tree(struct mount *, struct dentry *, int);
-bool is_path_reachable(struct mount *, struct dentry *,
-			 const struct path *root);
 int count_mounts(struct mnt_namespace *ns, struct mount *mnt);
 #endif /* _LINUX_PNODE_H */
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -132,9 +132,9 @@ static int show_vfsmnt(struct seq_file *
 	return err;
 }
 
-static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
+int show_mountinfo_root(struct seq_file *m, struct vfsmount *mnt,
+			struct path *root)
 {
-	struct proc_mounts *p = m->private;
 	struct mount *r = real_mount(mnt);
 	struct super_block *sb = mnt->mnt_sb;
 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
@@ -152,7 +152,7 @@ static int show_mountinfo(struct seq_fil
 	seq_putc(m, ' ');
 
 	/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
-	err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
+	err = seq_path_root(m, &mnt_path, root, " \t\n\\");
 	if (err)
 		goto out;
 
@@ -164,7 +164,7 @@ static int show_mountinfo(struct seq_fil
 		seq_printf(m, " shared:%i", r->mnt_group_id);
 	if (IS_MNT_SLAVE(r)) {
 		int master = r->mnt_master->mnt_group_id;
-		int dom = get_dominating_id(r, &p->root);
+		int dom = get_dominating_id(r, root);
 		seq_printf(m, " master:%i", master);
 		if (dom && dom != master)
 			seq_printf(m, " propagate_from:%i", dom);
@@ -194,6 +194,13 @@ static int show_mountinfo(struct seq_fil
 	return err;
 }
 
+static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
+{
+	struct proc_mounts *p = m->private;
+
+	return show_mountinfo_root(m, mnt, &p->root);
+}
+
 static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
 {
 	struct proc_mounts *p = m->private;
--- /dev/null
+++ b/fs/values.c
@@ -0,0 +1,242 @@
+#include <linux/values.h>
+#include <linux/fs_struct.h>
+#include <linux/seq_file.h>
+#include <linux/nsproxy.h>
+#include "../lib/kstrtox.h"
+#include "mount.h"
+
+struct val_string {
+	const char *str;
+	size_t len;
+};
+
+struct val_iter {
+	struct val_string name;
+	struct seq_file seq;
+	int error;
+};
+
+struct val_desc {
+	struct val_string name;
+	union {
+		u64 idx;
+		int (*get)(struct val_iter *vi, const struct path *path);
+	};
+};
+
+#define VAL_STRING(x) { .str = x, .len = sizeof(x) - 1 }
+#define VD_NAME(x) .name = VAL_STRING(x)
+
+static int val_err(struct val_iter *vi, int err)
+{
+	vi->error = err;
+	return 0;
+}
+
+static int val_end_seq(struct val_iter *vi)
+{
+	if (vi->seq.count == vi->seq.size)
+		return -EOVERFLOW;
+
+	return 0;
+}
+
+static inline void val_string_skip(struct val_string *s, size_t count)
+{
+	WARN_ON(s->len < count);
+	s->str += count;
+	s->len -= count;
+}
+
+static bool val_string_prefix(const struct val_string *p,
+			      const struct val_string *s)
+{
+	return s->len >= p->len && !memcmp(s->str, p->str, p->len);
+}
+
+static struct val_desc *val_lookup(struct val_iter *vi, struct val_desc *vd)
+{
+	for (; vd->name.len; vd++) {
+		if (val_string_prefix(&vd->name, &vi->name)) {
+			val_string_skip(&vi->name, vd->name.len);
+			break;
+		}
+	}
+	return vd;
+}
+
+static int val_get_group(struct val_iter *vi, struct val_desc *vd)
+{
+	for (; vd->name.len; vd++)
+		seq_write(&vi->seq, vd->name.str, vd->name.len + 1);
+
+	return val_end_seq(vi);
+}
+
+enum {
+	VAL_MNT_INFO,
+};
+
+static struct val_desc val_mnt_group[] = {
+	{ VD_NAME("info"),		.idx = VAL_MNT_INFO		},
+	{ }
+};
+
+static int val_mnt_show(struct val_iter *vi, struct vfsmount *mnt)
+{
+	struct val_desc *vd = val_lookup(vi, val_mnt_group);
+	struct path root;
+
+	if (!vd->name.str)
+		return val_err(vi, -ENOENT);
+
+	switch(vd->idx) {
+	case VAL_MNT_INFO:
+		get_fs_root(current->fs, &root);
+		show_mountinfo_root(&vi->seq, mnt, &root);
+		path_put(&root);
+		break;
+	}
+
+	return 0;
+}
+
+static int val_mnt_get(struct val_iter *vi, const struct path *path)
+{
+	int err;
+
+	if (!vi->name.len)
+		return val_get_group(vi, val_mnt_group);
+
+	namespace_lock_read();
+	err = val_mnt_show(vi, path->mnt);
+	namespace_unlock_read();
+
+	return err;
+}
+
+/* called with namespace_sem held for read */
+static struct vfsmount *mnt_lookup_by_id(struct mnt_namespace *ns,
+					 struct path *root, int id)
+{
+	struct mount *m;
+
+	for (m = mnt_list_next(ns, &ns->list); m; m = mnt_list_next(ns, &m->mnt_list)) {
+		if (m->mnt_id == id) {
+			if (is_path_reachable(m, m->mnt.mnt_root, root))
+				return mntget(&m->mnt);
+			else
+				return NULL;
+		}
+	}
+	return NULL;
+}
+
+static void seq_mnt_list(struct seq_file *seq, struct mnt_namespace *ns,
+			 struct path *root)
+{
+	struct mount *m;
+
+	namespace_lock_read();
+	for (m = mnt_list_next(ns, &ns->list); m; m = mnt_list_next(ns, &m->mnt_list)) {
+		if (is_path_reachable(m, m->mnt.mnt_root, root)) {
+			seq_printf(seq, "%i:", m->mnt_id);
+			seq_putc(seq, '\0');
+		}
+	}
+	namespace_unlock_read();
+}
+
+static int val_mntns_get(struct val_iter *vi, const struct path *path)
+{
+	struct mnt_namespace *mnt_ns = current->nsproxy->mnt_ns;
+	struct vfsmount *mnt;
+	struct path root;
+	unsigned long long mnt_id;
+	unsigned int end;
+	int err;
+
+	if (!vi->name.len) {
+		get_fs_root(current->fs, &root);
+		seq_mnt_list(&vi->seq, mnt_ns, &root);
+		path_put(&root);
+		return val_end_seq(vi);
+	}
+
+	end = _parse_integer(vi->name.str, 10, &mnt_id);
+	if (end & KSTRTOX_OVERFLOW)
+		return val_err(vi, -ENOENT);
+	if (vi->name.str[end] != VAL_SEP)
+		return val_err(vi, -ENOENT);
+	val_string_skip(&vi->name, end + 1);
+
+	namespace_lock_read();
+	get_fs_root(current->fs, &root);
+	mnt = mnt_lookup_by_id(mnt_ns, &root, mnt_id);
+	path_put(&root);
+	if (!mnt) {
+		namespace_unlock_read();
+		return val_err(vi, -ENOENT);
+	}
+	if (vi->name.len)
+		err = val_mnt_show(vi, mnt);
+	else
+		err = val_get_group(vi, val_mnt_group);
+
+	namespace_unlock_read();
+	mntput(mnt);
+
+	return err;
+}
+
+
+
+static struct val_desc val_toplevel_group[] = {
+	{ VD_NAME("mnt:"),	.get = val_mnt_get,	},
+	{ VD_NAME("mntns:"),	.get = val_mntns_get,	},
+	{ },
+};
+
+static int getvalues(struct val_iter *vi, const struct path *path)
+{
+	struct val_desc *vd;
+	int err;
+
+	if (!vi->name.len)
+		return val_get_group(vi, val_toplevel_group);
+
+	vd = val_lookup(vi, val_toplevel_group);
+	if (!vd->name.len)
+		err = val_err(vi, -ENOENT);
+	else
+		err = vd->get(vi, path);
+
+	return err ?: vi->error;
+}
+
+ssize_t val_getxattr(struct path *path, const char *name, size_t namelen,
+		     void __user *value, size_t size)
+{
+	int err;
+	char val[1024];
+	struct val_iter vi = {
+		.name = { .str = name, .len = namelen },
+		.seq = { .buf = val, .size = min(sizeof(val), size) },
+	};
+
+	if (!size)
+		return sizeof(val);
+
+	val_string_skip(&vi.name, 1);
+
+	err = getvalues(&vi, path);
+	if (err < 0)
+		return err;
+
+	WARN_ON(vi.seq.count > size);
+	if (copy_to_user(value, vi.seq.buf, vi.seq.count))
+		return -EFAULT;
+
+	return vi.seq.count;
+}
+
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -22,6 +22,7 @@
 #include <linux/audit.h>
 #include <linux/vmalloc.h>
 #include <linux/posix_acl_xattr.h>
+#include <linux/values.h>
 
 #include <linux/uaccess.h>
 
@@ -643,12 +644,13 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, cons
  * Extended attribute GET operations
  */
 static ssize_t
-getxattr(struct user_namespace *mnt_userns, struct dentry *d,
-	 const char __user *name, void __user *value, size_t size)
+getxattr(struct path *path, const char __user *name,
+	 void __user *value, size_t size)
 {
 	ssize_t error;
 	void *kvalue = NULL;
 	char kname[XATTR_NAME_MAX + 1];
+	struct user_namespace *mnt_userns = mnt_user_ns(path->mnt);
 
 	error = strncpy_from_user(kname, name, sizeof(kname));
 	if (error == 0 || error == sizeof(kname))
@@ -656,6 +658,9 @@ getxattr(struct user_namespace *mnt_user
 	if (error < 0)
 		return error;
 
+	if (kname[0] == VAL_SEP)
+		return val_getxattr(path, kname, error, value, size);
+
 	if (size) {
 		if (size > XATTR_SIZE_MAX)
 			size = XATTR_SIZE_MAX;
@@ -664,7 +669,7 @@ getxattr(struct user_namespace *mnt_user
 			return -ENOMEM;
 	}
 
-	error = vfs_getxattr(mnt_userns, d, kname, kvalue, size);
+	error = vfs_getxattr(mnt_userns, path->dentry, kname, kvalue, size);
 	if (error > 0) {
 		if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
 		    (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
@@ -693,7 +698,7 @@ static ssize_t path_getxattr(const char
 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
 	if (error)
 		return error;
-	error = getxattr(mnt_user_ns(path.mnt), path.dentry, name, value, size);
+	error = getxattr(&path, name, value, size);
 	path_put(&path);
 	if (retry_estale(error, lookup_flags)) {
 		lookup_flags |= LOOKUP_REVAL;
@@ -723,8 +728,7 @@ SYSCALL_DEFINE4(fgetxattr, int, fd, cons
 	if (!f.file)
 		return error;
 	audit_file(f.file);
-	error = getxattr(file_mnt_user_ns(f.file), f.file->f_path.dentry,
-			 name, value, size);
+	error = getxattr(&f.file->f_path, name, value, size);
 	fdput(f);
 	return error;
 }
--- /dev/null
+++ b/include/linux/values.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/types.h>
+
+#define VAL_SEP ':'
+
+struct path;
+
+ssize_t val_getxattr(struct path *path, const char *name, size_t namelen,
+		     void __user *value, size_t size);
+



More information about the Linux-security-module-archive mailing list