[v1] security: add trace event for cap_capable

Fri Oct 25 01:15:56 UTC 2024

On Thu, Oct 24, 2024 at 4:28 PM <sergeh at kernel.org> wrote:
>
> On Thu, Oct 24, 2024 at 03:40:12AM -0700, Jordan Rome wrote:
> > In cases where we want a stable way to observe/trace
> > cap_capable (e.g. protection from inlining and API updates)
> > add a tracepoint that passes:
> > - The credentials used
> > - The user namespace which needs the capability
>
> "the user namespace which needs the capability" is not quite the
> right way to put this.  It's the user namespace against which the
> capability is needed.  It's an object, not a subject.  Or maybe
> "the user namespace of the resource being accessed".
>

I like "The user namespace of the resource being accessed"

> > - The user namespace that actually has the capability (if one exists)
>
> How about "the user namespace in which the task has the
> capability targeted at the resource"?  (It's not the user
> namespace itself that has the capability)
>

This phrasing seems a little confusing. How about:
"The user namespace that has the capability to access the targeted resource" ?

> > - The capability to check for
> > - Bitmask of options defined in include/linux/security.h
> > - The return value of the check
> >
> > Signed-off-by: Jordan Rome <linux at jordanrome.com>
> > ---
> >  MAINTAINERS                       |  1 +
> >  include/trace/events/capability.h | 58 +++++++++++++++++++++++++++++++
> >  security/commoncap.c              | 21 +++++++----
> >  3 files changed, 74 insertions(+), 6 deletions(-)
> >  create mode 100644 include/trace/events/capability.h
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index cc40a9d9b8cd..210e9076c858 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -4994,6 +4994,7 @@ M:      Serge Hallyn <serge at hallyn.com>
> >  L:   linux-security-module at vger.kernel.org
> >  S:   Supported
> >  F:   include/linux/capability.h
> > +F:   include/trace/events/capability.h
> >  F:   include/uapi/linux/capability.h
> >  F:   kernel/capability.c
> >  F:   security/commoncap.c
> > diff --git a/include/trace/events/capability.h b/include/trace/events/capability.h
> > new file mode 100644
> > index 000000000000..092b8e77063a
> > --- /dev/null
> > +++ b/include/trace/events/capability.h
> > @@ -0,0 +1,58 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#undef TRACE_SYSTEM
> > +#define TRACE_SYSTEM capability
> > +
> > +#if !defined(_TRACE_CAPABILITY_H) || defined(TRACE_HEADER_MULTI_READ)
> > +#define _TRACE_CAPABILITY_H
> > +
> > +#include <linux/cred.h>
> > +#include <linux/tracepoint.h>
> > +#include <linux/user_namespace.h>
> > +
> > +/**
> > + * capable - called after it's determined if a task has a particular
> > + * effective capability
> > + *
> > + * @cred: The credentials used
> > + * @targ_ns:  The user namespace which needs the capability
>
> (same here)
>
> > + * @capable_ns:  The user namespace that actually has the capability
> > + *               if ret is 0 otherwise this will be NULL
> > + * @cap: The capability to check for
> > + * @opts: Bitmask of options defined in include/linux/security.h
> > + * @ret: The return value of the check: 0 if it does, -ve if it does not
> > + *
> > + * Allows to trace calls to cap_capable in commoncap.c
> > + */
> > +TRACE_EVENT(capable,
> > +
> > +     TP_PROTO(const struct cred *cred, struct user_namespace *targ_ns,
> > +             struct user_namespace *capable_ns, int cap, unsigned int opts, int ret),
> > +
> > +     TP_ARGS(cred, targ_ns, capable_ns, cap, opts, ret),
> > +
> > +     TP_STRUCT__entry(
> > +             __field(const struct cred *, cred)
> > +             __field(struct user_namespace *, targ_ns)
> > +             __field(struct user_namespace *, capable_ns)
> > +             __field(int, cap)
> > +             __field(unsigned int, opts)
> > +             __field(int, ret)
> > +     ),
> > +
> > +     TP_fast_assign(
> > +             __entry->cred       = cred;
> > +             __entry->targ_ns    = targ_ns;
> > +             __entry->capable_ns = capable_ns;
> > +             __entry->cap        = cap;
> > +             __entry->opts       = opts;
> > +             __entry->ret        = ret;
> > +     ),
> > +
> > +     TP_printk("cap %d, opts %u, ret %d",
> > +             __entry->cap, __entry->opts, __entry->ret)
> > +);
> > +
> > +#endif /* _TRACE_CAPABILITY_H */
> > +
> > +/* This part must be outside protection */
> > +#include <trace/define_trace.h>
> > diff --git a/security/commoncap.c b/security/commoncap.c
> > index 162d96b3a676..675d40fbaa77 100644
> > --- a/security/commoncap.c
> > +++ b/security/commoncap.c
> > @@ -27,6 +27,9 @@
> >  #include <linux/mnt_idmapping.h>
> >  #include <uapi/linux/lsm.h>
> >
> > +#define CREATE_TRACE_POINTS
> > +#include <trace/events/capability.h>
> > +
> >  /*
> >   * If a non-root user executes a setuid-root binary in
> >   * !secure(SECURE_NOROOT) mode, then we raise capabilities.
> > @@ -68,6 +71,7 @@ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
> >               int cap, unsigned int opts)
> >  {
> >       struct user_namespace *ns = targ_ns;
> > +     int ret = 0;
> >
> >       /* See if cred has the capability in the target user namespace
> >        * by examining the target user namespace and all of the target
> > @@ -75,22 +79,26 @@ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
> >        */
> >       for (;;) {
> >               /* Do we have the necessary capabilities? */
> > -             if (ns == cred->user_ns)
> > -                     return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
> > +             if (ns == cred->user_ns) {
> > +                     ret = cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
> > +                     break;
> > +             }
> >
> >               /*
> >                * If we're already at a lower level than we're looking for,
> >                * we're done searching.
> >                */
> > -             if (ns->level <= cred->user_ns->level)
> > -                     return -EPERM;
> > +             if (ns->level <= cred->user_ns->level) {
> > +                     ret = -EPERM;
> > +                     break;
> > +             }
> >
> >               /*
> >                * The owner of the user namespace in the parent of the
> >                * user namespace has all caps.
> >                */
> >               if ((ns->parent == cred->user_ns) && uid_eq(ns->owner, cred->euid))
> > -                     return 0;
> > +                     break;
> >
> >               /*
> >                * If you have a capability in a parent user ns, then you have
> > @@ -99,7 +107,8 @@ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
> >               ns = ns->parent;
> >       }
> >
> > -     /* We never get here */
>
> With this change, I become less comfortable with us assuming that it is
> the case that we'll never just drop off the end of the while loop.  I'd
> be more comfortable if you set ret = -EPERM at the top, and set it to 0
> in the last break.
>

Sure. I can make this change.

> > +     trace_capable(cred, targ_ns, ret == 0 ? ns : NULL, cap, opts, ret);
> > +     return ret;
> >  }
> >
> >  /**
> > --
> > 2.43.5
> >