[PATCH 1/1] apparmor: cache buffers on percpu list if there is lock contention

Thu Feb 16 23:42:12 UTC 2023

On 2/16/23 13:46, Anil Altinay wrote:
> On a heavily loaded machine there can be lock contention on the
> global buffers lock. Add a percpu list to cache buffers on when
> lock contention is encountered.
> 
> When allocating buffers attempt to use cached buffers first,
> before taking the global buffers lock. When freeing buffers
> try to put them back to the global list but if contention is
> encountered, put the buffer on the percpu list.
> 
> The length of time a buffer is held on the percpu list is dynamically
> adjusted based on lock contention.  The amount of hold time is rapidly
> increased and slow ramped down.
> 
> Fixes: df323337e507 ("apparmor: Use a memory pool instead per-CPU caches")
> Link: https://lore.kernel.org/lkml/cfd5cc6f-5943-2e06-1dbe-f4b4ad5c1fa1@canonical.com/
> Signed-off-by: John Johansen <john.johansen at canonical.com>
> Reported-by: Sergey Senozhatsky <senozhatsky at chromium.org>
> Signed-off-by: Anil Altinay <aaltinay at google.com>
> Cc: stable at vger.kernel.org

NAK, this version of the patch has an issue that prevented it from
being pushed upstream.

I can send out the revised version for people to look at but I
haven't been able to get proper testing on it yet, hence why
I haven't pushed it yet either.

> ---
>   security/apparmor/lsm.c | 73 ++++++++++++++++++++++++++++++++++++++---
>   1 file changed, 68 insertions(+), 5 deletions(-)
> 
> diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
> index c6728a629437..56b22e2def4c 100644
> --- a/security/apparmor/lsm.c
> +++ b/security/apparmor/lsm.c
> @@ -49,12 +49,19 @@ union aa_buffer {
>   	char buffer[1];
>   };
>   
> +struct aa_local_cache {
> +	unsigned int contention;
> +	unsigned int hold;
> +	struct list_head head;
> +};
> +
>   #define RESERVE_COUNT 2
>   static int reserve_count = RESERVE_COUNT;
>   static int buffer_count;
>   
>   static LIST_HEAD(aa_global_buffers);
>   static DEFINE_SPINLOCK(aa_buffers_lock);
> +static DEFINE_PER_CPU(struct aa_local_cache, aa_local_buffers);
>   
>   /*
>    * LSM hook functions
> @@ -1634,14 +1641,43 @@ static int param_set_mode(const char *val, const struct kernel_param *kp)
>   	return 0;
>   }
>   
> +static void update_contention(struct aa_local_cache *cache)
> +{
> +	cache->contention += 3;
> +	if (cache->contention > 9)
> +		cache->contention = 9;
> +	cache->hold += 1 << cache->contention;		/* 8, 64, 512 */
> +}
> +
>   char *aa_get_buffer(bool in_atomic)
>   {
>   	union aa_buffer *aa_buf;
> +	struct aa_local_cache *cache;
>   	bool try_again = true;
>   	gfp_t flags = (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
> +	/* use per cpu cached buffers first */
> +	cache = get_cpu_ptr(&aa_local_buffers);
> +	if (!list_empty(&cache->head)) {
> +		aa_buf = list_first_entry(&cache->head, union aa_buffer, list);
> +		list_del(&aa_buf->list);
> +		cache->hold--;
> +		put_cpu_ptr(&aa_local_buffers);
> +		return &aa_buf->buffer[0];
> +	}
> +	put_cpu_ptr(&aa_local_buffers);
>   
> +	if (!spin_trylock(&aa_buffers_lock)) {
> +		cache = get_cpu_ptr(&aa_local_buffers);
> +		update_contention(cache);
> +		put_cpu_ptr(&aa_local_buffers);
> +		spin_lock(&aa_buffers_lock);
> +	} else {
> +		cache = get_cpu_ptr(&aa_local_buffers);
> +		if (cache->contention)
> +			cache->contention--;
> +		put_cpu_ptr(&aa_local_buffers);
> +	}
>   retry:
> -	spin_lock(&aa_buffers_lock);
>   	if (buffer_count > reserve_count ||
>   	    (in_atomic && !list_empty(&aa_global_buffers))) {
>   		aa_buf = list_first_entry(&aa_global_buffers, union aa_buffer,
> @@ -1667,6 +1703,7 @@ char *aa_get_buffer(bool in_atomic)
>   	if (!aa_buf) {
>   		if (try_again) {
>   			try_again = false;
> +			spin_lock(&aa_buffers_lock);
>   			goto retry;
>   		}
>   		pr_warn_once("AppArmor: Failed to allocate a memory buffer.\n");
> @@ -1678,15 +1715,32 @@ char *aa_get_buffer(bool in_atomic)
>   void aa_put_buffer(char *buf)
>   {
>   	union aa_buffer *aa_buf;
> +	struct aa_local_cache *cache;
>   
>   	if (!buf)
>   		return;
>   	aa_buf = container_of(buf, union aa_buffer, buffer[0]);
>   
> -	spin_lock(&aa_buffers_lock);
> -	list_add(&aa_buf->list, &aa_global_buffers);
> -	buffer_count++;
> -	spin_unlock(&aa_buffers_lock);
> +	cache = get_cpu_ptr(&aa_local_buffers);
> +	if (!cache->hold) {
> +		put_cpu_ptr(&aa_local_buffers);
> +		if (spin_trylock(&aa_buffers_lock)) {
> +			list_add(&aa_buf->list, &aa_global_buffers);
> +			buffer_count++;
> +			spin_unlock(&aa_buffers_lock);
> +			cache = get_cpu_ptr(&aa_local_buffers);
> +			if (cache->contention)
> +				cache->contention--;
> +			put_cpu_ptr(&aa_local_buffers);
> +			return;
> +		}
> +		cache = get_cpu_ptr(&aa_local_buffers);
> +		update_contention(cache);
> +	}
> +
> +	/* cache in percpu list */
> +	list_add(&aa_buf->list, &cache->head);
> +	put_cpu_ptr(&aa_local_buffers);
>   }
>   
>   /*
> @@ -1728,6 +1782,15 @@ static int __init alloc_buffers(void)
>   	union aa_buffer *aa_buf;
>   	int i, num;
>   
> +	/*
> +	 * per cpu set of cached allocated buffers used to help reduce
> +	 * lock contention
> +	 */
> +	for_each_possible_cpu(i) {
> +		per_cpu(aa_local_buffers, i).contention = 0;
> +		per_cpu(aa_local_buffers, i).hold = 0;
> +		INIT_LIST_HEAD(&per_cpu(aa_local_buffers, i).head);
> +	}
>   	/*
>   	 * A function may require two buffers at once. Usually the buffers are
>   	 * used for a short period of time and are shared. On UP kernel buffers