[PATCH] kernel: Introduce a write lock/unlock wrapper for tasklist_lock

Jarkko Sakkinen jarkko at kernel.org
Wed Jan 3 14:04:54 UTC 2024


On Mon Dec 25, 2023 at 10:19 AM EET, Maria Yu wrote:
> As a rwlock for tasklist_lock, there are multiple scenarios to acquire
> read lock which write lock needed to be waiting for.
> In freeze_process/thaw_processes it can take about 200+ms for holding read
> lock of tasklist_lock by walking and freezing/thawing tasks in commercial
> devices. And write_lock_irq will have preempt disabled and local irq
> disabled to spin until the tasklist_lock can be acquired. This leading to
> a bad responsive performance of current system.
> Take an example:
> 1. cpu0 is holding read lock of tasklist_lock to thaw_processes.
> 2. cpu1 is waiting write lock of tasklist_lock to exec a new thread with
>    preempt_disabled and local irq disabled.
> 3. cpu2 is waiting write lock of tasklist_lock to do_exit with
>    preempt_disabled and local irq disabled.
> 4. cpu3 is waiting write lock of tasklist_lock to do_exit with
>    preempt_disabled and local irq disabled.
> So introduce a write lock/unlock wrapper for tasklist_lock specificly.
> The current taskslist_lock writers all have write_lock_irq to hold
> tasklist_lock, and write_unlock_irq to release tasklist_lock, that means
> the writers are not suitable or workable to wait on tasklist_lock in irq
> disabled scenarios. So the write lock/unlock wrapper here only follow the
> current design of directly use local_irq_disable and local_irq_enable,
> and not take already irq disabled writer callers into account.
> Use write_trylock in the loop and enabled irq for cpu to repsond if lock
> cannot be taken.
>
> Signed-off-by: Maria Yu <quic_aiquny at quicinc.com>
> ---
>  fs/exec.c                  | 10 +++++-----
>  include/linux/sched/task.h | 29 +++++++++++++++++++++++++++++
>  kernel/exit.c              | 16 ++++++++--------
>  kernel/fork.c              |  6 +++---
>  kernel/ptrace.c            | 12 ++++++------
>  kernel/sys.c               |  8 ++++----
>  security/keys/keyctl.c     |  4 ++--
>  7 files changed, 57 insertions(+), 28 deletions(-)
>
> diff --git a/fs/exec.c b/fs/exec.c
> index 4aa19b24f281..030eef6852eb 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1086,7 +1086,7 @@ static int de_thread(struct task_struct *tsk)
>  
>  		for (;;) {
>  			cgroup_threadgroup_change_begin(tsk);
> -			write_lock_irq(&tasklist_lock);
> +			write_lock_tasklist_lock();
>  			/*
>  			 * Do this under tasklist_lock to ensure that
>  			 * exit_notify() can't miss ->group_exec_task
> @@ -1095,7 +1095,7 @@ static int de_thread(struct task_struct *tsk)
>  			if (likely(leader->exit_state))
>  				break;
>  			__set_current_state(TASK_KILLABLE);
> -			write_unlock_irq(&tasklist_lock);
> +			write_unlock_tasklist_lock();
>  			cgroup_threadgroup_change_end(tsk);
>  			schedule();
>  			if (__fatal_signal_pending(tsk))
> @@ -1150,7 +1150,7 @@ static int de_thread(struct task_struct *tsk)
>  		 */
>  		if (unlikely(leader->ptrace))
>  			__wake_up_parent(leader, leader->parent);
> -		write_unlock_irq(&tasklist_lock);
> +		write_unlock_tasklist_lock();
>  		cgroup_threadgroup_change_end(tsk);
>  
>  		release_task(leader);
> @@ -1198,13 +1198,13 @@ static int unshare_sighand(struct task_struct *me)
>  
>  		refcount_set(&newsighand->count, 1);
>  
> -		write_lock_irq(&tasklist_lock);
> +		write_lock_tasklist_lock();
>  		spin_lock(&oldsighand->siglock);
>  		memcpy(newsighand->action, oldsighand->action,
>  		       sizeof(newsighand->action));
>  		rcu_assign_pointer(me->sighand, newsighand);
>  		spin_unlock(&oldsighand->siglock);
> -		write_unlock_irq(&tasklist_lock);
> +		write_unlock_tasklist_lock();
>  
>  		__cleanup_sighand(oldsighand);
>  	}
> diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
> index a23af225c898..6f69d9a3c868 100644
> --- a/include/linux/sched/task.h
> +++ b/include/linux/sched/task.h
> @@ -50,6 +50,35 @@ struct kernel_clone_args {
>   * a separate lock).
>   */
>  extern rwlock_t tasklist_lock;
> +
> +/*
> + * Tasklist_lock is a special lock, it takes a good amount of time of
> + * taskslist_lock readers to finish, and the pure write_irq_lock api
> + * will do local_irq_disable at the very first, and put the current cpu
> + * waiting for the lock while is non-responsive for interrupts.
> + *
> + * The current taskslist_lock writers all have write_lock_irq to hold
> + * tasklist_lock, and write_unlock_irq to release tasklist_lock, that
> + * means the writers are not suitable or workable to wait on
> + * tasklist_lock in irq disabled scenarios. So the write lock/unlock
> + * wrapper here only follow the current design of directly use
> + * local_irq_disable and local_irq_enable.
> + */
> +static inline void write_lock_tasklist_lock(void)
> +{
> +	while (1) {
> +		local_irq_disable();
> +		if (write_trylock(&tasklist_lock))
> +			break;
> +		local_irq_enable();
> +		cpu_relax();
> +	}

Maybe:

	local_irq_disable();
	while (!write_trylock(&tasklist_lock)) {
		local_irq_enable();
		cpu_relax();
		local_irq_disable();
	}

BR, Jarkko



More information about the Linux-security-module-archive mailing list