[PATCH v7 5/9] pidfs, coredump: add PIDFD_INFO_COREDUMP
Alexander Mikhalitsyn
alexander at mihalicyn.com
Thu May 15 14:08:13 UTC 2025
Am Do., 15. Mai 2025 um 00:04 Uhr schrieb Christian Brauner
<brauner at kernel.org>:
>
> Extend the PIDFD_INFO_COREDUMP ioctl() with the new PIDFD_INFO_COREDUMP
> mask flag. This adds the fields @coredump_mask and @coredump_cookie to
> struct pidfd_info.
>
> When a task coredumps the kernel will provide the following information
> to userspace in @coredump_mask:
>
> * PIDFD_COREDUMPED is raised if the task did actually coredump.
> * PIDFD_COREDUMP_SKIP is raised if the task skipped coredumping (e.g.,
> undumpable).
> * PIDFD_COREDUMP_USER is raised if this is a regular coredump and
> doesn't need special care by the coredump server.
> * PIDFD_COREDUMP_ROOT is raised if the generated coredump should be
> treated as sensitive and the coredump server should restrict to the
> generated coredump to sufficiently privileged users.
>
> If userspace uses the coredump socket to process coredumps it needs to
> be able to discern connection from the kernel from connects from
> userspace (e.g., Python generating it's own coredumps and forwarding
> them to systemd). The @coredump_cookie extension uses the SO_COOKIE of
> the new connection. This allows userspace to validate that the
> connection has been made from the kernel by a crashing task:
>
> fd_coredump = accept4(fd_socket, NULL, NULL, SOCK_CLOEXEC);
> getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD, &fd_peer_pidfd, &fd_peer_pidfd_len);
>
> struct pidfd_info info = {
> info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP,
> };
>
> ioctl(pidfd, PIDFD_GET_INFO, &info);
> /* Refuse connections that aren't from a crashing task. */
> if (!(info.mask & PIDFD_INFO_COREDUMP) || !(info.coredump_mask & PIDFD_COREDUMPED) )
> close(fd_coredump);
>
> /*
> * Make sure that the coredump cookie matches the connection cookie.
> * If they don't it's not the coredump connection from the kernel.
> * We'll get another connection request in a bit.
> */
> getsocketop(fd_coredump, SOL_SOCKET, SO_COOKIE, &peer_cookie, &peer_cookie_len);
> if (!info.coredump_cookie || (info.coredump_cookie != peer_cookie))
> close(fd_coredump);
>
> The kernel guarantees that by the time the connection is made the all
> PIDFD_INFO_COREDUMP info is available.
>
> Signed-off-by: Christian Brauner <brauner at kernel.org>
Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn at canonical.com>
> ---
> fs/coredump.c | 34 ++++++++++++++++++++
> fs/pidfs.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++
> include/linux/pidfs.h | 10 ++++++
> include/uapi/linux/pidfd.h | 22 +++++++++++++
> net/unix/af_unix.c | 7 ++++
> 5 files changed, 152 insertions(+)
>
> diff --git a/fs/coredump.c b/fs/coredump.c
> index e1256ebb89c1..bfc4a32f737c 100644
> --- a/fs/coredump.c
> +++ b/fs/coredump.c
> @@ -46,7 +46,9 @@
> #include <linux/pidfs.h>
> #include <linux/net.h>
> #include <linux/socket.h>
> +#include <net/af_unix.h>
> #include <net/net_namespace.h>
> +#include <net/sock.h>
> #include <uapi/linux/pidfd.h>
> #include <uapi/linux/un.h>
>
> @@ -598,6 +600,8 @@ static int umh_coredump_setup(struct subprocess_info *info, struct cred *new)
> if (IS_ERR(pidfs_file))
> return PTR_ERR(pidfs_file);
>
> + pidfs_coredump(cp);
> +
> /*
> * Usermode helpers are childen of either
> * system_unbound_wq or of kthreadd. So we know that
> @@ -876,8 +880,34 @@ void do_coredump(const kernel_siginfo_t *siginfo)
> goto close_fail;
> }
>
> + /*
> + * Set the thread-group leader pid which is used for the
> + * peer credentials during connect() below. Then
> + * immediately register it in pidfs...
> + */
> + cprm.pid = task_tgid(current);
> + retval = pidfs_register_pid(cprm.pid);
> + if (retval) {
> + sock_release(socket);
> + goto close_fail;
> + }
> +
> + /*
> + * ... and set the coredump information so userspace
> + * has it available after connect()...
> + */
> + pidfs_coredump(&cprm);
> +
> + /*
> + * ... On connect() the peer credentials are recorded
> + * and @cprm.pid registered in pidfs...
> + */
> retval = kernel_connect(socket, (struct sockaddr *)(&addr),
> addr_len, O_NONBLOCK | SOCK_COREDUMP);
> +
> + /* ... So we can safely put our pidfs reference now... */
> + pidfs_put_pid(cprm.pid);
> +
> if (retval) {
> if (retval == -EAGAIN)
> coredump_report_failure("Coredump socket %s receive queue full", addr.sun_path);
> @@ -886,6 +916,10 @@ void do_coredump(const kernel_siginfo_t *siginfo)
> goto close_fail;
> }
>
> + /* ... and validate that @sk_peer_pid matches @cprm.pid. */
> + if (WARN_ON_ONCE(unix_peer(socket->sk)->sk_peer_pid != cprm.pid))
> + goto close_fail;
> +
> cprm.limit = RLIM_INFINITY;
> cprm.file = no_free_ptr(file);
> #else
> diff --git a/fs/pidfs.c b/fs/pidfs.c
> index 3b39e471840b..d7b9a0dd2db6 100644
> --- a/fs/pidfs.c
> +++ b/fs/pidfs.c
> @@ -20,6 +20,7 @@
> #include <linux/time_namespace.h>
> #include <linux/utsname.h>
> #include <net/net_namespace.h>
> +#include <linux/coredump.h>
>
> #include "internal.h"
> #include "mount.h"
> @@ -33,6 +34,8 @@ static struct kmem_cache *pidfs_cachep __ro_after_init;
> struct pidfs_exit_info {
> __u64 cgroupid;
> __s32 exit_code;
> + __u32 coredump_mask;
> + __u64 coredump_cookie;
> };
>
> struct pidfs_inode {
> @@ -240,6 +243,22 @@ static inline bool pid_in_current_pidns(const struct pid *pid)
> return false;
> }
>
> +static __u32 pidfs_coredump_mask(unsigned long mm_flags)
> +{
> + switch (__get_dumpable(mm_flags)) {
> + case SUID_DUMP_USER:
> + return PIDFD_COREDUMP_USER;
> + case SUID_DUMP_ROOT:
> + return PIDFD_COREDUMP_ROOT;
> + case SUID_DUMP_DISABLE:
> + return PIDFD_COREDUMP_SKIP;
> + default:
> + WARN_ON_ONCE(true);
> + }
> +
> + return 0;
> +}
> +
> static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
> {
> struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg;
> @@ -280,6 +299,13 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
> }
> }
>
> + if (mask & PIDFD_INFO_COREDUMP) {
> + kinfo.mask |= PIDFD_INFO_COREDUMP;
> + smp_rmb();
> + kinfo.coredump_cookie = READ_ONCE(pidfs_i(inode)->__pei.coredump_cookie);
> + kinfo.coredump_mask = READ_ONCE(pidfs_i(inode)->__pei.coredump_mask);
> + }
> +
> task = get_pid_task(pid, PIDTYPE_PID);
> if (!task) {
> /*
> @@ -296,6 +322,16 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
> if (!c)
> return -ESRCH;
>
> + if (!(kinfo.mask & PIDFD_INFO_COREDUMP)) {
> + task_lock(task);
> + if (task->mm) {
> + smp_rmb();
> + kinfo.coredump_cookie = READ_ONCE(pidfs_i(inode)->__pei.coredump_cookie);
> + kinfo.coredump_mask = pidfs_coredump_mask(task->mm->flags);
> + }
> + task_unlock(task);
> + }
> +
> /* Unconditionally return identifiers and credentials, the rest only on request */
>
> user_ns = current_user_ns();
> @@ -559,6 +595,49 @@ void pidfs_exit(struct task_struct *tsk)
> }
> }
>
> +#if defined(CONFIG_COREDUMP) && defined(CONFIG_UNIX)
> +void pidfs_coredump_cookie(struct pid *pid, u64 coredump_cookie)
> +{
> + struct pidfs_exit_info *exit_info;
> + struct dentry *dentry = pid->stashed;
> + struct inode *inode;
> +
> + if (WARN_ON_ONCE(!dentry))
> + return;
> +
> + inode = d_inode(dentry);
> + exit_info = &pidfs_i(inode)->__pei;
> + /* Can't use smp_store_release() because of 32bit. */
> + smp_wmb();
> + WRITE_ONCE(exit_info->coredump_cookie, coredump_cookie);
> +}
> +#endif
> +
> +#ifdef CONFIG_COREDUMP
> +void pidfs_coredump(const struct coredump_params *cprm)
> +{
> + struct pid *pid = cprm->pid;
> + struct pidfs_exit_info *exit_info;
> + struct dentry *dentry;
> + struct inode *inode;
> + __u32 coredump_mask = 0;
> +
> + dentry = pid->stashed;
> + if (WARN_ON_ONCE(!dentry))
> + return;
> +
> + inode = d_inode(dentry);
> + exit_info = &pidfs_i(inode)->__pei;
> + /* Note how we were coredumped. */
> + coredump_mask = pidfs_coredump_mask(cprm->mm_flags);
> + /* Note that we actually did coredump. */
> + coredump_mask |= PIDFD_COREDUMPED;
> + /* If coredumping is set to skip we should never end up here. */
> + VFS_WARN_ON_ONCE(coredump_mask & PIDFD_COREDUMP_SKIP);
> + smp_store_release(&exit_info->coredump_mask, coredump_mask);
> +}
> +#endif
> +
> static struct vfsmount *pidfs_mnt __ro_after_init;
>
> /*
> diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h
> index 2676890c4d0d..497997bc5e34 100644
> --- a/include/linux/pidfs.h
> +++ b/include/linux/pidfs.h
> @@ -2,11 +2,21 @@
> #ifndef _LINUX_PID_FS_H
> #define _LINUX_PID_FS_H
>
> +struct coredump_params;
> +
> struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags);
> void __init pidfs_init(void);
> void pidfs_add_pid(struct pid *pid);
> void pidfs_remove_pid(struct pid *pid);
> void pidfs_exit(struct task_struct *tsk);
> +#ifdef CONFIG_COREDUMP
> +void pidfs_coredump(const struct coredump_params *cprm);
> +#endif
> +#if defined(CONFIG_COREDUMP) && defined(CONFIG_UNIX)
> +void pidfs_coredump_cookie(struct pid *pid, u64 coredump_cookie);
> +#elif defined(CONFIG_UNIX)
> +static inline void pidfs_coredump_cookie(struct pid *pid, u64 coredump_cookie) { }
> +#endif
> extern const struct dentry_operations pidfs_dentry_operations;
> int pidfs_register_pid(struct pid *pid);
> void pidfs_get_pid(struct pid *pid);
> diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
> index 8c1511edd0e9..69267c5ae6d0 100644
> --- a/include/uapi/linux/pidfd.h
> +++ b/include/uapi/linux/pidfd.h
> @@ -25,9 +25,28 @@
> #define PIDFD_INFO_CREDS (1UL << 1) /* Always returned, even if not requested */
> #define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */
> #define PIDFD_INFO_EXIT (1UL << 3) /* Only returned if requested. */
> +#define PIDFD_INFO_COREDUMP (1UL << 4) /* Only returned if requested. */
>
> #define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */
>
> +/*
> + * Values for @coredump_mask in pidfd_info.
> + * Only valid if PIDFD_INFO_COREDUMP is set in @mask.
> + *
> + * Note, the @PIDFD_COREDUMP_ROOT flag indicates that the generated
> + * coredump should be treated as sensitive and access should only be
> + * granted to privileged users.
> + *
> + * If the coredump AF_UNIX socket is used for processing coredumps
> + * @coredump_cookie will be set to the socket SO_COOKIE of the receivers
> + * client socket. This allows the coredump handler to detect whether an
> + * incoming coredump connection was initiated from the crashing task.
> + */
> +#define PIDFD_COREDUMPED (1U << 0) /* Did crash and... */
> +#define PIDFD_COREDUMP_SKIP (1U << 1) /* coredumping generation was skipped. */
> +#define PIDFD_COREDUMP_USER (1U << 2) /* coredump was done as the user. */
> +#define PIDFD_COREDUMP_ROOT (1U << 3) /* coredump was done as root. */
> +
> /*
> * The concept of process and threads in userland and the kernel is a confusing
> * one - within the kernel every thread is a 'task' with its own individual PID,
> @@ -92,6 +111,9 @@ struct pidfd_info {
> __u32 fsuid;
> __u32 fsgid;
> __s32 exit_code;
> + __u32 coredump_mask;
> + __u32 __spare1;
> + __u64 coredump_cookie;
> };
>
> #define PIDFS_IOCTL_MAGIC 0xFF
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index a9d1c9ba2961..053d2e48e918 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -99,6 +99,7 @@
> #include <linux/seq_file.h>
> #include <linux/skbuff.h>
> #include <linux/slab.h>
> +#include <linux/sock_diag.h>
> #include <linux/socket.h>
> #include <linux/splice.h>
> #include <linux/string.h>
> @@ -742,6 +743,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
>
> struct unix_peercred {
> struct pid *peer_pid;
> + u64 cookie;
> const struct cred *peer_cred;
> };
>
> @@ -777,6 +779,8 @@ static void drop_peercred(struct unix_peercred *peercred)
> static inline void init_peercred(struct sock *sk,
> const struct unix_peercred *peercred)
> {
> + if (peercred->cookie)
> + pidfs_coredump_cookie(peercred->peer_pid, peercred->cookie);
> sk->sk_peer_pid = peercred->peer_pid;
> sk->sk_peer_cred = peercred->peer_cred;
> }
> @@ -1713,6 +1717,9 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
> unix_peer(newsk) = sk;
> newsk->sk_state = TCP_ESTABLISHED;
> newsk->sk_type = sk->sk_type;
> + /* Prepare a new socket cookie for the receiver. */
> + if (flags & SOCK_COREDUMP)
> + peercred.cookie = sock_gen_cookie(newsk);
> init_peercred(newsk, &peercred);
> newu = unix_sk(newsk);
> newu->listener = other;
>
> --
> 2.47.2
>
More information about the Linux-security-module-archive
mailing list