[PATCH 1/6] Generic radix trees

Liu Bo obuil.liubo at gmail.com
Sat May 26 03:16:42 UTC 2018


Hi Kent,

(Add all ML to cc this time.)

On Wed, May 23, 2018 at 9:18 AM, Kent Overstreet
<kent.overstreet at gmail.com> wrote:
> Very simple radix tree implementation that supports storing arbitrary
> size entries, up to PAGE_SIZE - upcoming patches will convert existing
> flex_array users to genradixes. The new genradix code has a much simpler
> API and implementation, and doesn't have a hard limit on the number of
> elements like flex_array does.
>
> Signed-off-by: Kent Overstreet <kent.overstreet at gmail.com>
> ---
>  include/linux/generic-radix-tree.h | 222 +++++++++++++++++++++++++++++
>  lib/Makefile                       |   3 +-
>  lib/generic-radix-tree.c           | 180 +++++++++++++++++++++++
>  3 files changed, 404 insertions(+), 1 deletion(-)
>  create mode 100644 include/linux/generic-radix-tree.h
>  create mode 100644 lib/generic-radix-tree.c
>
> diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h
> new file mode 100644
> index 0000000000..3328813322
> --- /dev/null
> +++ b/include/linux/generic-radix-tree.h
> @@ -0,0 +1,222 @@
> +#ifndef _LINUX_GENERIC_RADIX_TREE_H
> +#define _LINUX_GENERIC_RADIX_TREE_H
> +
> +/*
> + * Generic radix trees/sparse arrays:
> + *
> + * Very simple and minimalistic, supporting arbitrary size entries up to
> + * PAGE_SIZE.
> + *
> + * A genradix is defined with the type it will store, like so:
> + * static GENRADIX(struct foo) foo_genradix;
> + *
> + * The main operations are:
> + * - genradix_init(radix) - initialize an empty genradix
> + *
> + * - genradix_free(radix) - free all memory owned by the genradix and
> + *   reinitialize it
> + *
> + * - genradix_ptr(radix, idx) - gets a pointer to the entry at idx, returning
> + *   NULL if that entry does not exist
> + *
> + * - genradix_ptr_alloc(radix, idx, gfp) - gets a pointer to an entry,
> + *   allocating it if necessary
> + *
> + * - genradix_for_each(radix, iter, p) - iterate over each entry in a genradix
> + *
> + * The radix tree allocates one page of entries at a time, so entries may exist
> + * that were never explicitly allocated - they will be initialized to all
> + * zeroes.
> + *
> + * Internally, a genradix is just a radix tree of pages, and indexing works in
> + * terms of byte offsets. The wrappers in this header file use sizeof on the
> + * type the radix contains to calculate a byte offset from the index - see
> + * __idx_to_offset.
> + */
> +
> +#include <asm/page.h>
> +#include <linux/bug.h>
> +#include <linux/kernel.h>
> +#include <linux/log2.h>
> +
> +struct genradix_node;
> +
> +struct __genradix {
> +       struct genradix_node            *root;
> +       size_t                          depth;
> +};
> +
> +#define __GENRADIX_INITIALIZER                                 \
> +       {                                                       \
> +               .tree = {                                       \
> +                       .root = NULL,                           \
> +                       .depth = 0,                             \
> +               }                                               \
> +       }
> +
> +/*
> + * We use a 0 size array to stash the type we're storing without taking any
> + * space at runtime - then the various accessor macros can use typeof() to get
> + * to it for casts/sizeof - we also force the alignment so that storing a type
> + * with a ridiculous alignment doesn't blow up the alignment or size of the
> + * genradix.
> + */
> +
> +#define GENRADIX(_type)                                                \
> +struct {                                                       \
> +       struct __genradix       tree;                           \
> +       _type                   type[0] __aligned(1);           \
> +}
> +
> +#define DEFINE_GENRADIX(_name, _type)                          \
> +       GENRADIX(_type) _name = __GENRADIX_INITIALIZER
> +
> +/**
> + * genradix_init - initialize a genradix
> + * @_radix:    genradix to initialize
> + *
> + * Does not fail
> + */
> +#define genradix_init(_radix)                                  \
> +do {                                                           \
> +       *(_radix) = (typeof(*_radix)) __GENRADIX_INITIALIZER;   \
> +} while (0)
> +
> +void __genradix_free(struct __genradix *);
> +
> +/**
> + * genradix_free: free all memory owned by a genradix
> + *
> + * After freeing, @_radix will be reinitialized and empty
> + */
> +#define genradix_free(_radix)  __genradix_free(&(_radix)->tree)
> +
> +static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
> +{
> +       if (__builtin_constant_p(obj_size))
> +               BUILD_BUG_ON(obj_size > PAGE_SIZE);
> +       else
> +               BUG_ON(obj_size > PAGE_SIZE);
> +
> +       if (!is_power_of_2(obj_size)) {
> +               size_t objs_per_page = PAGE_SIZE / obj_size;
> +
> +               return (idx / objs_per_page) * PAGE_SIZE +
> +                       (idx % objs_per_page) * obj_size;
> +       } else {
> +               return idx * obj_size;
> +       }
> +}
> +
> +#define __genradix_cast(_radix)                (typeof((_radix)->type[0]) *)
> +#define __genradix_obj_size(_radix)    sizeof((_radix)->type[0])
> +#define __genradix_idx_to_offset(_radix, _idx)                 \
> +       __idx_to_offset(_idx, __genradix_obj_size(_radix))
> +
> +void *__genradix_ptr(struct __genradix *, size_t);
> +
> +/**
> + * genradix_ptr - get a pointer to a genradix entry
> + * @_radix:    genradix to access
> + * @_idx:      index to fetch
> + *
> + * Returns a pointer to entry at @_idx, or NULL if that entry does not exist.
> + */
> +#define genradix_ptr(_radix, _idx)                             \
> +       (__genradix_cast(_radix)                                \
> +        __genradix_ptr(&(_radix)->tree,                        \
> +                       __genradix_idx_to_offset(_radix, _idx)))
> +
> +void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t);
> +
> +/**
> + * genradix_ptr - get a pointer to a genradix entry, allocating it if necessary
> + * @_radix:    genradix to access
> + * @_idx:      index to fetch
> + * @_gfp:      gfp mask
> + *
> + * Returns a pointer to entry at @_idx, or NULL on allocation failure
> + */
> +#define genradix_ptr_alloc(_radix, _idx, _gfp)                 \
> +       (__genradix_cast(_radix)                                \
> +        __genradix_ptr_alloc(&(_radix)->tree,                  \
> +                       __genradix_idx_to_offset(_radix, _idx), \
> +                       _gfp))
> +
> +struct genradix_iter {
> +       size_t                  offset;
> +       size_t                  pos;
> +};
> +
> +/**
> + * genradix_iter_init - initialize a genradix_iter
> + * @_radix:    genradix that will be iterated over
> + * @_idx       index to start iterating from
> + */
> +#define genradix_iter_init(_radix, _idx)                       \
> +       ((struct genradix_iter) {                               \
> +               .pos    = (_idx),                               \
> +               .offset = __genradix_idx_to_offset((_radix), (_idx)),\
> +       })
> +
> +void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t);
> +
> +/**
> + * genradix_iter_peek - get first entry at or above iterator's current
> + *                     position
> + * @_iter:     a genradix_iter
> + * @_radix:    genradix being iterated over
> + *
> + * If no more entries exist at or above @_iter's current position, returns NULL
> + */
> +#define genradix_iter_peek(_iter, _radix)                      \
> +       (__genradix_cast(_radix)                                \
> +        __genradix_iter_peek(_iter, &(_radix)->tree,           \
> +                             PAGE_SIZE / __genradix_obj_size(_radix)))
> +
> +static inline void __genradix_iter_advance(struct genradix_iter *iter,
> +                                          size_t obj_size)
> +{
> +       iter->offset += obj_size;
> +
> +       if (!is_power_of_2(obj_size) &&
> +           (iter->offset & (PAGE_SIZE - 1)) + obj_size > PAGE_SIZE)
> +               iter->offset = round_up(iter->offset, PAGE_SIZE);
> +
> +       iter->pos++;
> +}
> +
> +#define genradix_iter_advance(_iter, _radix)                   \
> +       __genradix_iter_advance(_iter, __genradix_obj_size(_radix))
> +
> +/**
> + * genradix_for_each - iterate over entry in a genradix
> + * @_radix:    genradix to iterate over
> + * @_iter:     a genradix_iter to track current position
> + * @_p:                pointer to genradix entry type
> + *
> + * On every iteration, @_p will point to the current entry, and @_iter.pos
> + * will be the current entry's index.
> + */
> +#define genradix_for_each(_radix, _iter, _p)                   \
> +       for (_iter = genradix_iter_init(_radix, 0);             \
> +            _p = genradix_iter_peek(&(_iter), _uradix);        \
> +            genradix_iter_advance(&(_iter), _uradix))
> +
> +int __genradix_prealloc(struct __genradix *, size_t, gfp_t);
> +
> +/**
> + * genradix_prealloc - preallocate entries in a generic radix tree
> + * @_radix:    genradix to preallocate
> + * @_nr:       number of entries to preallocate
> + * @_gfp:      gfp mask
> + *
> + * Returns 0 on success, -ENOMEM on failure
> + */
> +#define genradix_prealloc(_radix, _nr, _gfp)                   \
> +        __genradix_prealloc(&(_radix)->tree,                   \
> +                       __genradix_idx_to_offset(_radix, _nr + 1),\
> +                       _gfp)
> +
> +
> +#endif /* _LINUX_GENERIC_RADIX_TREE_H */
> diff --git a/lib/Makefile b/lib/Makefile
> index a90d4fcd74..5db5a7fb1e 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -39,7 +39,8 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
>          gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
>          bsearch.o find_bit.o llist.o memweight.o kfifo.o \
>          percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \
> -        once.o refcount.o usercopy.o errseq.o bucket_locks.o
> +        once.o refcount.o usercopy.o errseq.o bucket_locks.o \
> +        generic-radix-tree.o
>  obj-$(CONFIG_STRING_SELFTEST) += test_string.o
>  obj-y += string_helpers.o
>  obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
> diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c
> new file mode 100644
> index 0000000000..4537c7c62c
> --- /dev/null
> +++ b/lib/generic-radix-tree.c
> @@ -0,0 +1,180 @@
> +
> +#include <linux/export.h>
> +#include <linux/generic-radix-tree.h>
> +#include <linux/gfp.h>
> +
> +#define GENRADIX_ARY           (PAGE_SIZE / sizeof(struct genradix_node *))
> +#define GENRADIX_ARY_SHIFT     ilog2(GENRADIX_ARY)
> +
> +struct genradix_node {
> +       union {
> +               /* Interior node: */
> +               struct genradix_node    *children[GENRADIX_ARY];
> +
> +               /* Leaf: */
> +               u8                      data[PAGE_SIZE];
> +       };
> +};
> +
> +static inline unsigned genradix_depth_shift(unsigned depth)
> +{
> +       return PAGE_SHIFT + GENRADIX_ARY_SHIFT * depth;
> +}
> +
> +/*
> + * Returns size (of data, in bytes) that a tree of a given depth holds:
> + */
> +static inline size_t genradix_depth_size(unsigned depth)
> +{
> +       return 1UL << genradix_depth_shift(depth);
> +}
> +
> +/*
> + * Returns pointer to the specified byte @offset within @radix, or NULL if not
> + * allocated
> + */
> +void *__genradix_ptr(struct __genradix *radix, size_t offset)
> +{
> +       size_t level = radix->depth;
> +       struct genradix_node *n = radix->root;
> +
> +       if (offset >= genradix_depth_size(radix->depth))
> +               return NULL;
> +
> +       while (1) {
> +               if (!n)
> +                       return NULL;
> +               if (!level)
> +                       break;
> +
> +               level--;
> +
> +               n = n->children[offset >> genradix_depth_shift(level)];
> +               offset &= genradix_depth_size(level) - 1;
> +       }
> +
> +       return &n->data[offset];
> +}
> +EXPORT_SYMBOL(__genradix_ptr);
> +
> +/*
> + * Returns pointer to the specified byte @offset within @radix, allocating it if
> + * necessary - newly allocated slots are always zeroed out:
> + */
> +void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
> +                          gfp_t gfp_mask)
> +{
> +       struct genradix_node **n;

Any reason that " struct genradix_node ** " is used here instead of "
struct genradix_node * "?

Looks like this function only manipulates *n, am I missing something?

thanks,
liubo

> +       size_t level;
> +
> +       /* Increase tree depth if necessary: */
> +
> +       while (offset >= genradix_depth_size(radix->depth)) {
> +               struct genradix_node *new_root =
> +                       (void *) __get_free_page(gfp_mask|__GFP_ZERO);
> +
> +               if (!new_root)
> +                       return NULL;
> +
> +               new_root->children[0] = radix->root;
> +               radix->root = new_root;
> +               radix->depth++;
> +       }
> +
> +       n = &radix->root;
> +       level = radix->depth;
> +
> +       while (1) {
> +               if (!*n) {
> +                       *n = (void *) __get_free_page(gfp_mask|__GFP_ZERO);
> +                       if (!*n)
> +                               return NULL;
> +               }
> +
> +               if (!level)
> +                       break;
> +
> +               level--;
> +
> +               n = &(*n)->children[offset >> genradix_depth_shift(level)];
> +               offset &= genradix_depth_size(level) - 1;
> +       }
> +
> +       return &(*n)->data[offset];
> +}
> +EXPORT_SYMBOL(__genradix_ptr_alloc);
> +
> +void *__genradix_iter_peek(struct genradix_iter *iter,
> +                          struct __genradix *radix,
> +                          size_t objs_per_page)
> +{
> +       struct genradix_node *n;
> +       size_t level, i;
> +
> +       if (!radix->root)
> +               return NULL;
> +restart:
> +       if (iter->offset >= genradix_depth_size(radix->depth))
> +               return NULL;
> +
> +       n       = radix->root;
> +       level   = radix->depth;
> +
> +       while (level) {
> +               level--;
> +
> +               i = (iter->offset >> genradix_depth_shift(level)) &
> +                       (GENRADIX_ARY - 1);
> +
> +               while (!n->children[i]) {
> +                       i++;
> +                       iter->offset = round_down(iter->offset +
> +                                          genradix_depth_size(level),
> +                                          genradix_depth_size(level));
> +                       iter->pos = (iter->offset >> PAGE_SHIFT) *
> +                               objs_per_page;
> +                       if (i == GENRADIX_ARY)
> +                               goto restart;
> +               }
> +
> +               n = n->children[i];
> +       }
> +
> +       return &n->data[iter->offset & (PAGE_SIZE - 1)];
> +}
> +EXPORT_SYMBOL(__genradix_iter_peek);
> +
> +static void genradix_free_recurse(struct genradix_node *n, unsigned level)
> +{
> +       if (level) {
> +               unsigned i;
> +
> +               for (i = 0; i < GENRADIX_ARY; i++)
> +                       if (n->children[i])
> +                               genradix_free_recurse(n->children[i], level - 1);
> +       }
> +
> +       free_page((unsigned long) n);
> +}
> +
> +int __genradix_prealloc(struct __genradix *radix, size_t size,
> +                       gfp_t gfp_mask)
> +{
> +       size_t offset;
> +
> +       for (offset = 0; offset < size; offset += PAGE_SIZE)
> +               if (!__genradix_ptr_alloc(radix, offset, gfp_mask))
> +                       return -ENOMEM;
> +
> +       return 0;
> +}
> +EXPORT_SYMBOL(__genradix_prealloc);
> +
> +void __genradix_free(struct __genradix *radix)
> +{
> +       genradix_free_recurse(radix->root, radix->depth);
> +
> +       radix->root = NULL;
> +       radix->depth = 0;
> +}
> +EXPORT_SYMBOL(__genradix_free);
> --
> 2.17.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-raid" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-security-module" in
the body of a message to majordomo at vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



More information about the Linux-security-module-archive mailing list