[PATCH 2/3] gfp: mm: introduce __GFP_NOINIT

Kees Cook keescook at chromium.org
Tue Apr 23 19:11:09 UTC 2019


On Thu, Apr 18, 2019 at 8:42 AM Alexander Potapenko <glider at google.com> wrote:
>
> When passed to an allocator (either pagealloc or SL[AOU]B), __GFP_NOINIT
> tells it to not initialize the requested memory if the init_allocations
> boot option is enabled. This can be useful in the cases the newly
> allocated memory is going to be initialized by the caller right away.

Maybe add "... as seen when the slab allocator needs to allocate new
pages from the page allocator." just to help clarify it here (instead
of from the end of the commit log where you mention it offhand).

>
> __GFP_NOINIT basically defeats the hardening against information leaks
> provided by the init_allocations feature, so one should use it with
> caution.
>
> This patch also adds __GFP_NOINIT to alloc_pages() calls in SL[AOU]B.
>
> Signed-off-by: Alexander Potapenko <glider at google.com>
> Cc: Andrew Morton <akpm at linux-foundation.org>
> Cc: Masahiro Yamada <yamada.masahiro at socionext.com>
> Cc: James Morris <jmorris at namei.org>
> Cc: "Serge E. Hallyn" <serge at hallyn.com>
> Cc: Nick Desaulniers <ndesaulniers at google.com>
> Cc: Kostya Serebryany <kcc at google.com>
> Cc: Dmitry Vyukov <dvyukov at google.com>
> Cc: Kees Cook <keescook at chromium.org>
> Cc: Sandeep Patil <sspatil at android.com>
> Cc: Laura Abbott <labbott at redhat.com>
> Cc: Randy Dunlap <rdunlap at infradead.org>
> Cc: Jann Horn <jannh at google.com>
> Cc: Mark Rutland <mark.rutland at arm.com>
> Cc: Qian Cai <cai at lca.pw>
> Cc: Vlastimil Babka <vbabka at suse.cz>
> Cc: linux-mm at kvack.org
> Cc: linux-security-module at vger.kernel.org
> Cc: kernel-hardening at lists.openwall.com
> ---
>  include/linux/gfp.h | 6 +++++-
>  include/linux/mm.h  | 2 +-
>  kernel/kexec_core.c | 2 +-
>  mm/slab.c           | 2 +-
>  mm/slob.c           | 1 +
>  mm/slub.c           | 1 +
>  6 files changed, 10 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index fdab7de7490d..66d7f5604fe2 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -44,6 +44,7 @@ struct vm_area_struct;
>  #else
>  #define ___GFP_NOLOCKDEP       0
>  #endif
> +#define ___GFP_NOINIT          0x1000000u
>  /* If the above are modified, __GFP_BITS_SHIFT may need updating */

I think you want to add NOINIT below GFP_ACCOUNT, update NOLOCKDEP and
then adjust GFP_BITS_SHIFT differently, noted below.

>
>  /*
> @@ -208,16 +209,19 @@ struct vm_area_struct;
>   * %__GFP_COMP address compound page metadata.
>   *
>   * %__GFP_ZERO returns a zeroed page on success.
> + *
> + * %__GFP_NOINIT requests non-initialized memory from the underlying allocator.
>   */
>  #define __GFP_NOWARN   ((__force gfp_t)___GFP_NOWARN)
>  #define __GFP_COMP     ((__force gfp_t)___GFP_COMP)
>  #define __GFP_ZERO     ((__force gfp_t)___GFP_ZERO)
> +#define __GFP_NOINIT   ((__force gfp_t)___GFP_NOINIT)
>
>  /* Disable lockdep for GFP context tracking */
>  #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
>
>  /* Room for N __GFP_FOO bits */
> -#define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP))
> +#define __GFP_BITS_SHIFT (25)

This should just be 24 + ...   with the bit field added above NOLOCKDEP.

>  #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
>
>  /**
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index b38b71a5efaa..8f03334a9033 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2601,7 +2601,7 @@ DECLARE_STATIC_KEY_FALSE(init_allocations);
>  static inline bool want_init_memory(gfp_t flags)
>  {
>         if (static_branch_unlikely(&init_allocations))
> -               return true;
> +               return !(flags & __GFP_NOINIT);
>         return flags & __GFP_ZERO;
>  }

You need to test for GFP_ZERO here too: return ((flags & __GFP_NOINIT
| __GFP_ZERO) == 0)

Also, I wonder, for the sake of readability, if this should be named
__GFP_NO_AUTOINIT ?

I'd also like to see each use of __GFP_NOINIT include a comment above
its use where the logic is explained for _why_ it's safe (or
reasonable) to use __GFP_NOINIT in each place.

>
> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
> index be84f5f95c97..f9d1f1236cd0 100644
> --- a/kernel/kexec_core.c
> +++ b/kernel/kexec_core.c
> @@ -302,7 +302,7 @@ static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
>  {
>         struct page *pages;
>
> -       pages = alloc_pages(gfp_mask & ~__GFP_ZERO, order);
> +       pages = alloc_pages((gfp_mask & ~__GFP_ZERO) | __GFP_NOINIT, order);
>         if (pages) {
>                 unsigned int count, i;
>
> diff --git a/mm/slab.c b/mm/slab.c
> index dcc5b73cf767..762cb0e7bcc1 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -1393,7 +1393,7 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
>         struct page *page;
>         int nr_pages;
>
> -       flags |= cachep->allocflags;
> +       flags |= (cachep->allocflags | __GFP_NOINIT);
>
>         page = __alloc_pages_node(nodeid, flags, cachep->gfporder);
>         if (!page) {
> diff --git a/mm/slob.c b/mm/slob.c
> index 18981a71e962..867d2d68a693 100644
> --- a/mm/slob.c
> +++ b/mm/slob.c
> @@ -192,6 +192,7 @@ static void *slob_new_pages(gfp_t gfp, int order, int node)
>  {
>         void *page;
>
> +       gfp |= __GFP_NOINIT;
>  #ifdef CONFIG_NUMA
>         if (node != NUMA_NO_NODE)
>                 page = __alloc_pages_node(node, gfp, order);
> diff --git a/mm/slub.c b/mm/slub.c
> index e4efb6575510..a79b4cb768a2 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1493,6 +1493,7 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s,

What about kmalloc_large_node()?

>         struct page *page;
>         unsigned int order = oo_order(oo);
>
> +       flags |= __GFP_NOINIT;
>         if (node == NUMA_NO_NODE)
>                 page = alloc_pages(flags, order);
>         else

And just so I make sure I'm understanding this correctly: __GFP_NOINIT
is passed to the page allocator because we know each allocation from
the slab will get initialized at "sub allocation" time.

Looks good!

-- 
Kees Cook



More information about the Linux-security-module-archive mailing list