[RFC 2/6] percpu-refcount: Add torture test for percpu refcount

Neeraj Upadhyay Neeraj.Upadhyay at amd.com
Mon Sep 16 05:08:07 UTC 2024


Add torture test to verify percpu managed mode operations,
verifying that a percpu ref does not have non-zero count when
all users have dropped their reference and that there is no early
release of the ref while users hold references to it.

Signed-off-by: Neeraj Upadhyay <Neeraj.Upadhyay at amd.com>
---
 .../admin-guide/kernel-parameters.txt         |  57 +++
 lib/Kconfig.debug                             |   9 +
 lib/Makefile                                  |   1 +
 lib/percpu-refcount-torture.c                 | 367 ++++++++++++++++++
 lib/percpu-refcount.c                         |  49 ++-
 lib/percpu-refcount.h                         |   6 +
 6 files changed, 483 insertions(+), 6 deletions(-)
 create mode 100644 lib/percpu-refcount-torture.c
 create mode 100644 lib/percpu-refcount.h

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 0f02a1b04fe9..225f2dac294d 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4677,6 +4677,63 @@
 
 			Default: 5000 ms
 
+	percpu_refcount_torture.busted_early_ref_release= [KNL]
+			Enable testing buggy release of percpu ref while
+			there are active users. Used for testing failure
+			scenarios in the test.
+
+			Default: 0 (disabled)
+
+	percpu_refcount_torture.busted_late_ref_release= [KNL]
+			Enable testing buggy non-zero reference count after
+			all ref users have dropped their reference. Used for
+			testing failure scenarios in the test.
+
+			Default: 0 (disabled)
+
+	percpu_refcount_torture.delay_us= [KNL]
+			Delay (in us) between reference increment and decrement
+			operations of ref users.
+
+			Default: 10
+
+	percpu_refcount_torture.niterations= [KNL]
+			Number of iterations of ref increment and decrement by
+			ref users.
+
+			Default: 100
+
+	percpu_refcount_torture.nrefs= [KNL]
+			Number of percpu ref instances.
+
+			Default: 2
+
+	percpu_refcount_torture.nusers= [KNL]
+			Number of percpu ref user threads which increment and
+			decrement a percpu ref.
+
+			Default: 2
+
+	percpu_refcount_torture.onoff_holdoff= [KNL]
+			Set time (s) after boot for CPU-hotplug testing.
+
+			Default: 0
+
+	percpu_refcount_torture.onoff_interval= [KNL]
+			Set time (jiffies) between CPU-hotplug operations,
+			or zero to disable CPU-hotplug testing.
+
+	percpu_refcount_torture.stutter= [KNL]
+			Set wait time (jiffies) between two iterations of
+			percpu ref operations.
+
+			Default: 0
+
+	percpu_refcount_torture.verbose= [KNL]
+			Enable additional printk() statements.
+
+			Default: 0 (Disabled)
+
 	pirq=		[SMP,APIC] Manual mp-table setup
 			See Documentation/arch/x86/i386/IO-APIC.rst.
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a30c03a66172..7e0117e01f05 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1611,6 +1611,15 @@ config SCF_TORTURE_TEST
 	  module may be built after the fact on the running kernel to
 	  be tested, if desired.
 
+config PERCPU_REFCOUNT_TORTURE_TEST
+	tristate "torture tests for percpu refcount"
+	select TORTURE_TEST
+	help
+	  This options provides a kernel module that runs percpu
+	  refcount torture tests for managed percpu refs. The kernel
+	  module may be built after the fact on the running kernel
+	  to be tested, if desired.
+
 config CSD_LOCK_WAIT_DEBUG
 	bool "Debugging for csd_lock_wait(), called from smp_call_function*()"
 	depends on DEBUG_KERNEL
diff --git a/lib/Makefile b/lib/Makefile
index 322bb127b4dc..d0286f7dfb37 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -50,6 +50,7 @@ obj-y += bcd.o sort.o parser.o debug_locks.o random32.o \
 	 once.o refcount.o rcuref.o usercopy.o errseq.o bucket_locks.o \
 	 generic-radix-tree.o bitmap-str.o
 obj-$(CONFIG_STRING_KUNIT_TEST) += string_kunit.o
+obj-$(CONFIG_PERCPU_REFCOUNT_TORTURE_TEST) += percpu-refcount-torture.o
 obj-y += string_helpers.o
 obj-$(CONFIG_STRING_HELPERS_KUNIT_TEST) += string_helpers_kunit.o
 obj-y += hexdump.o
diff --git a/lib/percpu-refcount-torture.c b/lib/percpu-refcount-torture.c
new file mode 100644
index 000000000000..686f5a228b40
--- /dev/null
+++ b/lib/percpu-refcount-torture.c
@@ -0,0 +1,367 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/percpu-refcount.h>
+#include <linux/torture.h>
+
+#include "percpu-refcount.h"
+
+static int busted_early_ref_release;
+module_param(busted_early_ref_release, int, 0444);
+MODULE_PARM_DESC(busted_early_ref_release,
+		 "Enable busted premature release of ref (default = 0), 0 = disable");
+
+static int busted_late_ref_release;
+module_param(busted_late_ref_release, int, 0444);
+MODULE_PARM_DESC(busted_late_ref_release,
+		 "Enable busted late release of ref (default = 0), 0 = disable");
+
+static long delay_us = 10;
+module_param(delay_us, long, 0444);
+MODULE_PARM_DESC(delay_us,
+		 "delay between reader refcount operations in microseconds (default = 10)");
+
+static long nrefs = 2;
+module_param(nrefs, long, 0444);
+MODULE_PARM_DESC(nrefs, "Number of percpu refs (default = 2)");
+
+static long niterations = 100;
+module_param(niterations, long, 0444);
+MODULE_PARM_DESC(niterations,
+		 "Number of iterations of ref increment and decrement (default = 100)");
+
+static long nusers = 2;
+module_param(nusers, long, 0444);
+MODULE_PARM_DESC(nusers, "Number of refcount users (default = 2)");
+
+static int onoff_holdoff;
+module_param(onoff_holdoff, int, 0444);
+MODULE_PARM_DESC(onoff_holdoff, "Time after boot before CPU hotplugs (seconds)");
+
+static int onoff_interval;
+module_param(onoff_interval, int, 0444);
+MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (jiffies), 0=disable");
+
+static int stutter;
+module_param(stutter, int, 0444);
+MODULE_PARM_DESC(stutter, "Stutter period in jiffies (default = 0), 0 = disable");
+
+static int verbose = 1;
+module_param(verbose, int, 0444);
+MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s");
+
+static struct task_struct **ref_user_tasks;
+static struct task_struct *ref_manager_task;
+static struct task_struct **busted_early_release_tasks;
+static struct task_struct **busted_late_release_tasks;
+
+static struct percpu_ref *refs;
+static long *num_per_ref_users;
+
+static atomic_t running;
+static atomic_t *ref_running;
+
+static char *torture_type = "percpu-refcount";
+
+static int percpu_ref_manager_thread(void *data)
+{
+	int i;
+
+	while (atomic_read(&running) != 0) {
+		percpu_ref_test_flush_release_work();
+		stutter_wait("percpu_ref_manager_thread");
+	}
+	/* Ensure ordering with ref users */
+	smp_mb();
+
+	percpu_ref_test_flush_release_work();
+
+	for (i = 0; i < nrefs; i++) {
+		WARN(percpu_ref_test_is_percpu(&refs[i]),
+			"!!! released ref %d should be in atomic mode", i);
+		WARN(!percpu_ref_is_zero(&refs[i]),
+			"!!! released ref %d should have 0 refcount", i);
+	}
+
+	do {
+		stutter_wait("percpu_ref_manager_thread");
+	} while (!torture_must_stop());
+
+	torture_kthread_stopping("percpu_ref_manager_thread");
+
+	return 0;
+}
+
+static int percpu_ref_test_thread(void *data)
+{
+	struct percpu_ref *ref = (struct percpu_ref *)data;
+	int i = 0;
+
+	percpu_ref_get(ref);
+
+	do {
+		percpu_ref_get(ref);
+		udelay(delay_us);
+		percpu_ref_put(ref);
+		stutter_wait("percpu_ref_test_thread");
+		i++;
+	} while (i < niterations);
+
+	atomic_dec(&ref_running[ref - refs]);
+	/* Order ref release with ref_running[ref_idx] == 0 */
+	smp_mb();
+	percpu_ref_put(ref);
+	/* Order ref decrement with running == 0 */
+	smp_mb();
+	atomic_dec(&running);
+
+	do {
+		stutter_wait("percpu_ref_test_thread");
+	} while (!torture_must_stop());
+
+	torture_kthread_stopping("percpu_ref_test_thread");
+
+	return 0;
+}
+
+static int percpu_ref_busted_early_thread(void *data)
+{
+	struct percpu_ref *ref = (struct percpu_ref *)data;
+	int ref_idx = ref - refs;
+	int i = 0, j;
+
+	do {
+		/* Extra ref put momemtarily */
+		for (j = 0; j < num_per_ref_users[ref_idx]; j++)
+			percpu_ref_put(ref);
+		stutter_wait("percpu_ref_busted_early_thread");
+		for (j = 0; j < num_per_ref_users[ref_idx]; j++)
+			percpu_ref_get(ref);
+		i++;
+		stutter_wait("percpu_ref_busted_early_thread");
+	} while (i < niterations * 10);
+
+	do {
+		stutter_wait("percpu_ref_busted_early_thread");
+	} while (!torture_must_stop());
+
+	torture_kthread_stopping("percpu_ref_busted_early_thread");
+
+	return 0;
+}
+
+static int percpu_ref_busted_late_thread(void *data)
+{
+	struct percpu_ref *ref = (struct percpu_ref *)data;
+	int i = 0;
+
+	do {
+		/* Extra ref get momemtarily */
+		percpu_ref_get(ref);
+		stutter_wait("percpu_ref_busted_late_thread");
+		percpu_ref_put(ref);
+		i++;
+	} while (i < niterations);
+
+	do {
+		stutter_wait("percpu_ref_busted_late_thread");
+	} while (!torture_must_stop());
+
+	torture_kthread_stopping("percpu_ref_busted_late_thread");
+
+	return 0;
+}
+
+static void percpu_ref_test_cleanup(void)
+{
+	int i;
+
+	if (torture_cleanup_begin())
+		return;
+
+	if (busted_late_release_tasks) {
+		for (i = 0; i < nrefs; i++)
+			torture_stop_kthread(busted_late_task, busted_late_release_tasks[i]);
+		kfree(busted_late_release_tasks);
+		busted_late_release_tasks = NULL;
+	}
+
+	if (busted_early_release_tasks) {
+		for (i = 0; i < nrefs; i++)
+			torture_stop_kthread(busted_early_task, busted_early_release_tasks[i]);
+		kfree(busted_early_release_tasks);
+		busted_early_release_tasks = NULL;
+	}
+
+	if (ref_manager_task) {
+		torture_stop_kthread(ref_manager, ref_manager_task);
+		ref_manager_task = NULL;
+	}
+
+	if (ref_user_tasks) {
+		for (i = 0; i < nusers; i++)
+			torture_stop_kthread(ref_user, ref_user_tasks[i]);
+		kfree(ref_user_tasks);
+		ref_user_tasks = NULL;
+	}
+
+	kfree(ref_running);
+	ref_running = NULL;
+
+	kfree(num_per_ref_users);
+	num_per_ref_users = NULL;
+
+	if (refs) {
+		for (i = 0; i < nrefs; i++)
+			percpu_ref_exit(&refs[i]);
+		kfree(refs);
+		refs = NULL;
+	}
+
+	torture_cleanup_end();
+}
+
+static void percpu_ref_test_release(struct percpu_ref *ref)
+{
+	WARN(!!atomic_add_return(0, &ref_running[ref-refs]), "!!! Premature ref release");
+}
+
+static int __init percpu_ref_torture_init(void)
+{
+	DEFINE_TORTURE_RANDOM(rand);
+	struct torture_random_state *trsp = &rand;
+	int flags;
+	int err;
+	int ref_idx;
+	int i;
+
+	if (!torture_init_begin("percpu-refcount", verbose))
+		return -EBUSY;
+
+	atomic_set(&running, nusers);
+	/* Order @running with later increment and decrement operations */
+	smp_mb();
+
+	refs = kcalloc(nrefs, sizeof(refs[0]), GFP_KERNEL);
+	if (!refs) {
+		TOROUT_ERRSTRING("out of memory");
+		err = -ENOMEM;
+		goto init_err;
+	}
+	for (i = 0; i < nrefs; i++) {
+		flags = torture_random(trsp) & 1 ? PERCPU_REF_INIT_ATOMIC : PERCPU_REF_REL_MANAGED;
+		err = percpu_ref_init(&refs[i], percpu_ref_test_release,
+				      flags, GFP_KERNEL);
+		if (err)
+			goto init_err;
+		if (!(flags & PERCPU_REF_REL_MANAGED))
+			percpu_ref_switch_to_managed(&refs[i]);
+	}
+
+	num_per_ref_users = kcalloc(nrefs, sizeof(num_per_ref_users[0]), GFP_KERNEL);
+	if (!num_per_ref_users) {
+		TOROUT_ERRSTRING("out of memory");
+		err = -ENOMEM;
+		goto init_err;
+	}
+	for (i = 0; i < nrefs; i++)
+		num_per_ref_users[i] = 0;
+
+	ref_user_tasks = kcalloc(nusers, sizeof(ref_user_tasks[0]), GFP_KERNEL);
+	if (!ref_user_tasks) {
+		TOROUT_ERRSTRING("out of memory");
+		err = -ENOMEM;
+		goto init_err;
+	}
+
+	ref_running = kcalloc(nrefs, sizeof(ref_running[0]), GFP_KERNEL);
+	if (!ref_running) {
+		TOROUT_ERRSTRING("out of memory");
+		err = -ENOMEM;
+		goto init_err;
+	}
+
+	for (i = 0; i < nusers; i++) {
+		ref_idx = torture_random(trsp) % nrefs;
+		atomic_inc(&ref_running[ref_idx]);
+		num_per_ref_users[ref_idx]++;
+		/* Order increments with subquent reads */
+		smp_mb();
+		err = torture_create_kthread(percpu_ref_test_thread,
+					     &refs[ref_idx], ref_user_tasks[i]);
+		if (torture_init_error(err))
+			goto init_err;
+	}
+
+	err = torture_create_kthread(percpu_ref_manager_thread, NULL, ref_manager_task);
+	if (torture_init_error(err))
+		goto init_err;
+
+	/* Drop initial reference, after test threads have started running */
+	udelay(1);
+	for (i = 0; i < nrefs; i++)
+		percpu_ref_put(&refs[i]);
+
+
+	if (busted_early_ref_release) {
+		busted_early_release_tasks = kcalloc(nrefs,
+						     sizeof(busted_early_release_tasks[0]),
+						     GFP_KERNEL);
+		if (!busted_early_release_tasks) {
+			TOROUT_ERRSTRING("out of memory");
+			err = -ENOMEM;
+			goto init_err;
+		}
+		for (i = 0; i < nrefs; i++) {
+			err = torture_create_kthread(percpu_ref_busted_early_thread,
+					     &refs[i], busted_early_release_tasks[i]);
+			if (torture_init_error(err))
+				goto init_err;
+		}
+	}
+
+	if (busted_late_ref_release) {
+		busted_late_release_tasks = kcalloc(nrefs, sizeof(busted_late_release_tasks[0]),
+						    GFP_KERNEL);
+		if (!busted_late_release_tasks) {
+			TOROUT_ERRSTRING("out of memory");
+			err = -ENOMEM;
+			goto init_err;
+		}
+		for (i = 0; i < nrefs; i++) {
+			err = torture_create_kthread(percpu_ref_busted_late_thread,
+					     &refs[i], busted_late_release_tasks[i]);
+			if (torture_init_error(err))
+				goto init_err;
+		}
+	}
+	if (stutter) {
+		err = torture_stutter_init(stutter, stutter);
+		if (torture_init_error(err))
+			goto init_err;
+	}
+
+	err = torture_onoff_init(onoff_holdoff * HZ, onoff_interval, NULL);
+	if (torture_init_error(err))
+		goto init_err;
+
+	torture_init_end();
+	return 0;
+init_err:
+	torture_init_end();
+	percpu_ref_test_cleanup();
+	return err;
+}
+
+static void __exit percpu_ref_torture_exit(void)
+{
+	percpu_ref_test_cleanup();
+}
+
+module_init(percpu_ref_torture_init);
+module_exit(percpu_ref_torture_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("percpu refcount torture test");
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 7b97f9728c5b..7d0c85c7ce57 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -11,6 +11,8 @@
 #include <linux/mm.h>
 #include <linux/percpu-refcount.h>
 
+#include "percpu-refcount.h"
+
 /*
  * Initially, a percpu refcount is just a set of percpu counters. Initially, we
  * don't try to detect the ref hitting 0 - which means that get/put can just
@@ -677,6 +679,7 @@ static void percpu_ref_release_work_fn(struct work_struct *work)
 	struct percpu_ref *ref;
 	int count = 0;
 	bool held;
+	struct llist_node *last_node = READ_ONCE(last_percpu_ref_node);
 
 	first = READ_ONCE(percpu_ref_manage_head.first);
 	if (!first)
@@ -711,7 +714,7 @@ static void percpu_ref_release_work_fn(struct work_struct *work)
 	 * +----------+  +------+  +------+    +------+    +------+
 	 *
 	 */
-	if (last_percpu_ref_node == NULL || last_percpu_ref_node->next == NULL) {
+	if (last_node == NULL || last_node->next == NULL) {
 retry_sentinel_get:
 		sen_node = percpu_ref_get_sen_node();
 		/*
@@ -741,11 +744,10 @@ static void percpu_ref_release_work_fn(struct work_struct *work)
 			head = prev->next;
 		}
 	} else {
-		prev = last_percpu_ref_node;
+		prev = last_node;
 		head = prev->next;
 	}
 
-	last_percpu_ref_node = NULL;
 	llist_for_each_safe(pos, next, head) {
 		/* Free sentinel node which is present in the list */
 		if (percpu_ref_is_sen_node(pos)) {
@@ -773,18 +775,53 @@ static void percpu_ref_release_work_fn(struct work_struct *work)
 			continue;
 		__percpu_ref_switch_to_percpu_checked(ref, false);
 		count++;
-		if (count == max_scan_count) {
-			last_percpu_ref_node = pos;
-			break;
+		if (count == READ_ONCE(max_scan_count)) {
+			WRITE_ONCE(last_percpu_ref_node, pos);
+			goto queue_release_work;
 		}
 		prev = pos;
 	}
 
+	WRITE_ONCE(last_percpu_ref_node, NULL);
 queue_release_work:
 	queue_delayed_work(percpu_ref_release_wq, &percpu_ref_release_work,
 			   scan_interval);
 }
 
+bool percpu_ref_test_is_percpu(struct percpu_ref *ref)
+{
+	unsigned long __percpu *percpu_count;
+
+	return __ref_is_percpu(ref, &percpu_count);
+}
+EXPORT_SYMBOL_GPL(percpu_ref_test_is_percpu);
+
+void percpu_ref_test_flush_release_work(void)
+{
+	int max_flush = READ_ONCE(max_scan_count);
+	int max_count = 1000;
+
+	/* Complete any executing release work */
+	flush_delayed_work(&percpu_ref_release_work);
+	/* Scan till the end of the llist */
+	WRITE_ONCE(max_scan_count, INT_MAX);
+	/* max scan count update visible to release work */
+	smp_mb();
+	flush_delayed_work(&percpu_ref_release_work);
+	/* max scan count update visible to release work */
+	smp_mb();
+	WRITE_ONCE(max_scan_count, 1);
+	/* max scan count update visible to work */
+	smp_mb();
+	flush_delayed_work(&percpu_ref_release_work);
+	while (READ_ONCE(last_percpu_ref_node) != NULL && max_count--)
+		flush_delayed_work(&percpu_ref_release_work);
+	/* max scan count update visible to work */
+	smp_mb();
+	WRITE_ONCE(max_scan_count, max_flush);
+}
+EXPORT_SYMBOL_GPL(percpu_ref_test_flush_release_work);
+
 static __init int percpu_ref_setup(void)
 {
 	percpu_ref_release_wq = alloc_workqueue("percpu_ref_release_wq",
diff --git a/lib/percpu-refcount.h b/lib/percpu-refcount.h
new file mode 100644
index 000000000000..be2ac0411194
--- /dev/null
+++ b/lib/percpu-refcount.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef __LIB_REFCOUNT_H
+#define __LIB_REFCOUNT_H
+bool percpu_ref_test_is_percpu(struct percpu_ref *ref);
+void percpu_ref_test_flush_release_work(void);
+#endif
-- 
2.34.1




More information about the Linux-security-module-archive mailing list